def clearRelatedDb(self):
        edb.get_timeseries_db().remove()
        edb.get_place_db().remove()
        edb.get_stop_db().remove()

        edb.get_trip_new_db().remove()
        edb.get_section_new_db().remove()
Esempio n. 2
0
    def setUp(self):
        # We need to access the database directly sometimes in order to
        # forcibly insert entries for the tests to pass. But we put the import
        # in here to reduce the temptation to use the database directly elsewhere.
        import emission.core.get_database as edb
        import uuid

        self.testUUID = uuid.uuid4()

        self.trips = json.load(
            open("emission/tests/data/smoothing_data/trip_list.txt"),
            object_hook=bju.object_hook)
        for trip in self.trips:
            trip["user_id"] = self.testUUID
            edb.get_trip_new_db().save(trip)

        self.trips = [ecwt.Trip(t) for t in self.trips]

        self.sections = json.load(
            open("emission/tests/data/smoothing_data/section_list.txt"),
            object_hook=bju.object_hook)
        for section in self.sections:
            section["user_id"] = self.testUUID
            edb.get_section_new_db().save(section)

        self.sections = [ecws.Section(s) for s in self.sections]

        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
Esempio n. 3
0
    def clearRelatedDb(self):
        edb.get_timeseries_db().remove()
        edb.get_place_db().remove()
        edb.get_stop_db().remove()

        edb.get_trip_new_db().remove()
        edb.get_section_new_db().remove()
    def setUp(self):
        # We need to access the database directly sometimes in order to
        # forcibly insert entries for the tests to pass. But we put the import
        # in here to reduce the temptation to use the database directly elsewhere.
        import emission.core.get_database as edb
        import uuid

        self.testUUID = uuid.uuid4()

        self.trips = json.load(open("emission/tests/data/smoothing_data/trip_list.txt"),
                                 object_hook=bju.object_hook)
        for trip in self.trips:
            trip["user_id"] = self.testUUID
            edb.get_trip_new_db().save(trip)

        self.trips = [ecwt.Trip(t) for t in self.trips]

        self.sections = json.load(open("emission/tests/data/smoothing_data/section_list.txt"),
                                 object_hook=bju.object_hook)
        for section in self.sections:
            section["user_id"] = self.testUUID
            edb.get_section_new_db().save(section)

        self.sections = [ecws.Section(s) for s in self.sections]

        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
    def clearRelatedDb(self):
        edb.get_timeseries_db().remove({'user_id': self.testUUID})
        edb.get_place_db().remove({'user_id': self.testUUID})
        edb.get_stop_db().remove({'user_id': self.testUUID})

        edb.get_trip_new_db().remove({'user_id': self.testUUID})
        edb.get_section_new_db().remove({'user_id': self.testUUID})
Esempio n. 6
0
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID})
     edb.get_timeseries_db().remove({"user_id": self.iosUUID})
     edb.get_usercache_db().remove({"user_id": self.androidUUID})
     edb.get_usercache_db().remove({"user_id": self.iosUUID})
     edb.get_place_db().remove()
     edb.get_trip_new_db().remove()
 def tearDown(self):
     edb.get_timeseries_db().remove({"user_id": self.androidUUID}) 
     edb.get_timeseries_db().remove({"user_id": self.iosUUID}) 
     edb.get_usercache_db().remove({"user_id": self.androidUUID}) 
     edb.get_usercache_db().remove({"user_id": self.iosUUID}) 
     edb.get_place_db().remove() 
     edb.get_trip_new_db().remove() 
Esempio n. 8
0
def del_objects(args):
    del_query = {}
    if args.user_id != "all":
        del_query['user_id'] = uuid.UUID(args.user_id)
    
    if args.date is None:
        trip_query = del_query
        place_query = del_query
    else:
        day_dt = pydt.datetime.strptime(args.date, "%Y-%m-%d")
        logging.debug("day_dt is %s" % day_dt)
        day_ts = time.mktime(day_dt.timetuple())
        logging.debug("day_ts is %s" % day_ts)
        trip_query = copy.copy(del_query)
        trip_query.update({"start_ts": {"$gt": day_ts}})
        place_query = copy.copy(del_query)
        place_query.update({"exit_ts": {"$gt": day_ts}})

    print "trip_query = %s" % trip_query
    print "place_query = %s" % place_query

    # Since sections have the same basic structure as trips and stops have the
    # same basic structure as places, we can reuse the queries
    print "Deleting trips for %s after %s" % (args.user_id, args.date)
    print edb.get_trip_new_db().remove(trip_query)
    print "Deleting sections for %s after %s" % (args.user_id, args.date)
    print edb.get_section_new_db().remove(trip_query)
    print "Deleting places for %s after %s" % (args.user_id, args.date)
    print edb.get_place_db().remove(place_query)
    print "Deleting stops for %s after %s" % (args.user_id, args.date)
    print edb.get_stop_db().remove(place_query)
 def setUp(self):
     self.clearRelatedDb()
     edb.get_trip_db().remove()
     edb.get_section_db().remove()
     edb.get_trip_new_db().remove()
     edb.get_section_new_db().remove()
     etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21")
     eaicf.filter_accuracy(self.testUUID)
     estfm.move_all_filters_to_data()
     eaist.segment_current_trips(self.testUUID)
     eaiss.segment_current_sections(self.testUUID)
def read_data(uuid=None, size=None, old=True):
    data = []
    trip_db = edb.get_trip_db()
    if not old:
        trip_db = edb.get_trip_new_db()
        trips = trip_db.find({"user_id": uuid})
    else:
        if uuid:
            trips = trip_db.find({'user_id': uuid, 'type': 'move'})
        else:
            trips = trip_db.find({'type': 'move'})
        for t in trips:
            try:
                trip = Trip.trip_from_json(t)
            except:
                continue
            if not (trip.trip_start_location and trip.trip_end_location
                    and trip.start_time):
                continue
            data.append(trip)
            if size:
                if len(data) == size:
                    break
        return data
    return [ecwt.Trip(trip) for trip in trips]
Esempio n. 11
0
def get_aggregate_trips(time_query, box=None):
    curr_query = _get_ts_query(time_query)
    if box:
        curr_query.update({"start_loc" : {"$geoWithin" : {"$box": box}}})
        curr_query.update({"end_loc" : {"$geoWithin" : {"$box": box}}})
    trip_doc_cursor = edb.get_trip_new_db().find(curr_query).sort(time_query.timeType, pymongo.ASCENDING)
    return [ecwt.Trip(doc) for doc in trip_doc_cursor]
Esempio n. 12
0
def get_trips(user_id, time_query):
    curr_query = _get_ts_query(time_query)
    curr_query.update({"user_id": user_id})
    trip_doc_cursor = edb.get_trip_new_db().find(curr_query).sort(
        time_query.timeType, pymongo.ASCENDING)
    # TODO: Fix "TripIterator" and return it instead of this list
    return [ecwt.Trip(doc) for doc in trip_doc_cursor]
    def testReadDataNew(self):
        db = edb.get_trip_new_db()
        trips = db.find()
        uuids = set()
        for t in trips:
            if self.testUUID in t:
                uuids.add(t[self.testUUID])
        if len(uuids) == 0:
            uuids.add(None)
        self.uuids = uuids

        data = cp.read_data(size=10, old=False)
        print 'there are ' + str(len(data))
        self.assertTrue(len(data) == 10 or len(data) == 0) #len(data)==0 if the test is run on an empty database
        uuid = 'baduuid'
        data = cp.read_data(uuid=uuid, old=False)
        self.assertTrue(not data)
        uuid = self.uuids.pop()
        data = cp.read_data(uuid=uuid, old=False)
        self.assertTrue(len(data) <= db.find({'user_id' : uuid}).count())
        self.uuids.add(uuid)
        sum = 0
        for uuid in self.uuids:
            data = cp.read_data(uuid=uuid, old=False)
            sum += len(data)
        data = cp.read_data(uuid=uuid, old=False)
        self.assertTrue(0 <= len(data) <= db.find().count())
        print "len(data) = %s" % len(data)
        self.assertTrue(len(data) == sum)
def export_timeline(user_id_str, start_day_str, end_day_str, file_name):
    logging.info(
        "Extracting trips for user %s day %s -> %s and saving to file %s" %
        (user_id_str, start_day_str, end_day_str, file))

    # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date()
    start_day_dt = pydt.datetime.strptime(start_day_str, "%Y-%m-%d")
    end_day_dt = pydt.datetime.strptime(end_day_str, "%Y-%m-%d")
    logging.debug("start_day_dt is %s, end_day_dt is %s" %
                  (start_day_dt, end_day_dt))
    # TODO: Convert to call to get_timeseries once we get that working
    # Or should we even do that?
    query = {
        'user_id': uuid.UUID(user_id_str),
        'start_local_dt': {
            '$gt': start_day_dt,
            "$lt": end_day_dt
        }
    }
    print "query = %s" % query
    entry_list = list(edb.get_trip_new_db().find(query))
    logging.info("Found %d entries" % len(entry_list))
    json.dump(entry_list,
              open(file_name, "w"),
              default=bju.default,
              allow_nan=False,
              indent=4)
Esempio n. 15
0
def get_aggregate_trips(time_query, box=None):
    curr_query = _get_ts_query(time_query)
    if box:
        curr_query.update({"start_loc": {"$geoWithin": {"$box": box}}})
        curr_query.update({"end_loc": {"$geoWithin": {"$box": box}}})
    trip_doc_cursor = edb.get_trip_new_db().find(curr_query).sort(
        time_query.timeType, pymongo.ASCENDING)
    return [ecwt.Trip(doc) for doc in trip_doc_cursor]
    def setUp(self):
        self.clearRelatedDb()
        edb.get_common_trip_db().drop()
        edb.get_section_new_db().drop()
        edb.get_trip_new_db().drop()
        self.testUserId = uuid.uuid4()
        self.testEnd = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1,2.092)))
        esdcpq.save_common_place(self.testEnd)
        self.testEnd = esdcpq.get_common_place_at_location(self.testEnd.location)
        self.testEnd.successors = ()

        self.testStart = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1,2)))
        self.testStart.successors = (self.testEnd.get_id(),)

        esdcpq.save_common_place(self.testEnd)
        esdcpq.save_common_place(self.testStart)
        self.time0 = datetime.datetime(1900, 1, 1, 1)
    def setUp(self):
        self.clearRelatedDb()
        edb.get_common_trip_db().drop()
        edb.get_section_new_db().drop()
        edb.get_trip_new_db().drop()
        self.testUserId = uuid.uuid4()
        self.testEnd = esdcpq.make_new_common_place(uuid.uuid4(),
                                                    gj.Point((1, 2.092)))
        esdcpq.save_common_place(self.testEnd)
        self.testEnd = esdcpq.get_common_place_at_location(
            self.testEnd.location)
        self.testEnd.successors = ()

        self.testStart = esdcpq.make_new_common_place(uuid.uuid4(),
                                                      gj.Point((1, 2)))
        self.testStart.successors = (self.testEnd.get_id(), )

        esdcpq.save_common_place(self.testEnd)
        esdcpq.save_common_place(self.testStart)
        self.time0 = datetime.datetime(1900, 1, 1, 1)
def export_timeline(user_id_str, start_day_str, end_day_str, file_name):
    logging.info("Extracting trips for user %s day %s -> %s and saving to file %s" %
                 (user_id_str, start_day_str, end_day_str, file))

    # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date()
    start_day_dt = pydt.datetime.strptime(start_day_str, "%Y-%m-%d")
    end_day_dt = pydt.datetime.strptime(end_day_str, "%Y-%m-%d")
    logging.debug("start_day_dt is %s, end_day_dt is %s" % (start_day_dt, end_day_dt))
    # TODO: Convert to call to get_timeseries once we get that working
    # Or should we even do that?
    query = {'user_id': uuid.UUID(user_id_str), 'start_local_dt': {'$gt': start_day_dt, "$lt": end_day_dt}}
    print "query = %s" % query
    entry_list = list(edb.get_trip_new_db().find(query))
    logging.info("Found %d entries" % len(entry_list))
    json.dump(entry_list, open(file_name, "w"), default=bju.default, allow_nan=False, indent=4)
def read_data(uuid=None, size=None, old=True):
    data = []
    trip_db = edb.get_trip_db()
    if not old:
        trip_db = edb.get_trip_new_db()
        trips = trip_db.find({"user_id" : uuid})
    else:
        if uuid:
            trips = trip_db.find({'user_id' : uuid, 'type' : 'move'})
        else:
            trips = trip_db.find({'type' : 'move'})
        for t in trips:
            try: 
                trip = Trip.trip_from_json(t)
            except:
                continue
            if not (trip.trip_start_location and trip.trip_end_location and trip.start_time):
                continue
            data.append(trip)
            if size:
                if len(data) == size:
                    break
        return data
    return [ecwt.Trip(trip) for trip in trips]
            else:
                exit_tz = "America/Los_Angeles"
            logging.debug("exit metadata timezone = %s" % exit_tz)
            entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'], exit_tz)
        else:
            logging.warning("No exit timestamp found, skipping")

        collection.save(entry)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("key",
        help="the key representing the stream that we want to fix")
    parser.add_argument("-f", "--filename",
        help="a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored")

    args = parser.parse_args()
    if args.filename is not None:
        fix_file(args.filename)
    elif args.key == "trips":
        fix_trips_or_sections(edb.get_trip_new_db())
    elif args.key == "sections":
        fix_trips_or_sections(edb.get_section_new_db())
    elif args.key == "places":
        fix_stops_or_places(edb.get_place_db())
    elif args.key == "stops":
        fix_stops_or_places(edb.get_stop_db())
    else:
        fix_timeseries(args.key)

# Removes all materialized views and the pipeline state.
# This will cause us to reprocess the pipeline from scratch
# As history begins to accumulate, we may want to specify a point to reset the
# pipeline to instead of deleting everything
import logging
logging.basicConfig(level=logging.DEBUG)

import emission.core.get_database as edb

if __name__ == '__main__':
    print "Deleting all trips"
    print edb.get_trip_new_db().remove()
    print "Deleting all sections"
    print edb.get_section_new_db().remove()
    print "Deleting pipeline state"
    print edb.get_pipeline_state_db().remove()
def create_new_trip(user_id):
    _id = edb.get_trip_new_db().save({"user_id": user_id})
    return ecwt.Trip({"_id": _id, "user_id": user_id})
    logging.info("About to convert %s entries" % result_cursor.count())
    for i, wrapper in enumerate(result_cursor):
        entry = convert_wrapper_to_entry(key, wrapper)
        if entry.get_id() != wrapper["_id"]:
            logging.warn("entry.id = %s, wrapper.id = %s" % (entry.get_id(), wrapper["_id"]))
        if i % 10000 == 0:
            print "converted %s -> %s" % (wrapper, entry)
        edb.get_timeseries_db().insert(entry)
        collection.remove(wrapper)

def move_ts_entries(key):
    tdb = edb.get_timeseries_db()
    atdb = edb.get_analysis_timeseries_db()

    result_cursor = tdb.find({'metadata.key': key})
    logging.info("About to convert %s entries" % result_cursor.count())

    for i, entry_doc in enumerate(result_cursor):
        if i % 10000 == 0:
            print "moved %s from one ts to the other" % (entry_doc)
        atdb.insert(entry_doc)
        tdb.remove(entry_doc)

if __name__ == '__main__':
    # No arguments - muahahahaha. Just going to copy known fields over.
    convert_collection(edb.get_trip_new_db(), "segmentation/raw_trip")
    convert_collection(edb.get_place_db(), "segmentation/raw_place")
    convert_collection(edb.get_section_new_db(), "segmentation/raw_section")
    convert_collection(edb.get_stop_db(), "segmentation/raw_stop")
    move_ts_entries("analysis/smoothing")
def get_trip(trip_id):
    """
    Returns the trip for specified trip id.
    :rtype : emission.core.wrapper.Trip
    """
    return ecwt.Trip(edb.get_trip_new_db().find_one({"_id": trip_id}))
 def tearDown(self):
     edb.get_common_trip_db().drop()
     edb.get_section_new_db().drop()
     edb.get_trip_new_db().drop()
Esempio n. 26
0
def create_new_trip(user_id):
    _id = edb.get_trip_new_db().save({"user_id": user_id})
    return ecwt.Trip({"_id": _id, "user_id": user_id})
def save_trip(trip):
    edb.get_trip_new_db().save(trip)
    def tearDown(self):
        import emission.core.get_database as edb

        edb.get_timeseries_db().remove({"user_id": self.testUUID})
        edb.get_section_new_db().remove()
        edb.get_trip_new_db().remove()
Esempio n. 29
0
        else:
            logging.warning("No exit timestamp found, skipping")

        collection.save(entry)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "key", help="the key representing the stream that we want to fix")
    parser.add_argument(
        "-f",
        "--filename",
        help=
        "a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored"
    )

    args = parser.parse_args()
    if args.filename is not None:
        fix_file(args.filename)
    elif args.key == "trips":
        fix_trips_or_sections(edb.get_trip_new_db())
    elif args.key == "sections":
        fix_trips_or_sections(edb.get_section_new_db())
    elif args.key == "places":
        fix_stops_or_places(edb.get_place_db())
    elif args.key == "stops":
        fix_stops_or_places(edb.get_stop_db())
    else:
        fix_timeseries(args.key)
 def setUp(self):
     self.testUserId = uuid.uuid4()
     edb.get_trip_new_db().remove()
Esempio n. 31
0
def get_trip(trip_id):
    """
    Returns the trip for specified trip id.
    :rtype : emission.core.wrapper.Trip
    """
    return ecwt.Trip(edb.get_trip_new_db().find_one({"_id": trip_id}))
 def setUp(self):
     edb.get_common_trip_db().drop()
     edb.get_section_new_db().drop()
     edb.get_trip_new_db().drop()
 def tearDown(self):
     edb.get_common_trip_db().drop()
     edb.get_section_new_db().drop()
     edb.get_trip_new_db().drop()
 def testSaveTrip(self):
     new_trip = self.create_fake_trip()
     self.assertEqual(edb.get_trip_new_db().find({"end_ts": 6}).count(), 1)
     self.assertEqual(edb.get_trip_new_db().find_one({"end_ts": 6})["_id"], new_trip.get_id())
     self.assertEqual(edb.get_trip_new_db().find_one({"end_ts": 6})["user_id"], self.testUserId)
Esempio n. 35
0
    def tearDown(self):
        import emission.core.get_database as edb

        edb.get_timeseries_db().remove({"user_id": self.testUUID})
        edb.get_section_new_db().remove()
        edb.get_trip_new_db().remove()
Esempio n. 36
0
def save_trip(trip):
    edb.get_trip_new_db().save(trip)
def get_trips(user_id, time_query):
    curr_query = _get_ts_query(time_query)
    curr_query.update({"user_id": user_id})
    trip_doc_cursor = edb.get_trip_new_db().find(curr_query).sort(time_query.timeType, pymongo.ASCENDING)
    # TODO: Fix "TripIterator" and return it instead of this list
    return [ecwt.Trip(doc) for doc in trip_doc_cursor]