def clearRelatedDb(self): edb.get_timeseries_db().remove() edb.get_place_db().remove() edb.get_stop_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove()
def setUp(self): # We need to access the database directly sometimes in order to # forcibly insert entries for the tests to pass. But we put the import # in here to reduce the temptation to use the database directly elsewhere. import emission.core.get_database as edb import uuid self.testUUID = uuid.uuid4() self.trips = json.load( open("emission/tests/data/smoothing_data/trip_list.txt"), object_hook=bju.object_hook) for trip in self.trips: trip["user_id"] = self.testUUID edb.get_trip_new_db().save(trip) self.trips = [ecwt.Trip(t) for t in self.trips] self.sections = json.load( open("emission/tests/data/smoothing_data/section_list.txt"), object_hook=bju.object_hook) for section in self.sections: section["user_id"] = self.testUUID edb.get_section_new_db().save(section) self.sections = [ecws.Section(s) for s in self.sections] self.ts = esta.TimeSeries.get_time_series(self.testUUID)
def setUp(self): # We need to access the database directly sometimes in order to # forcibly insert entries for the tests to pass. But we put the import # in here to reduce the temptation to use the database directly elsewhere. import emission.core.get_database as edb import uuid self.testUUID = uuid.uuid4() self.trips = json.load(open("emission/tests/data/smoothing_data/trip_list.txt"), object_hook=bju.object_hook) for trip in self.trips: trip["user_id"] = self.testUUID edb.get_trip_new_db().save(trip) self.trips = [ecwt.Trip(t) for t in self.trips] self.sections = json.load(open("emission/tests/data/smoothing_data/section_list.txt"), object_hook=bju.object_hook) for section in self.sections: section["user_id"] = self.testUUID edb.get_section_new_db().save(section) self.sections = [ecws.Section(s) for s in self.sections] self.ts = esta.TimeSeries.get_time_series(self.testUUID)
def clearRelatedDb(self): edb.get_timeseries_db().remove({'user_id': self.testUUID}) edb.get_place_db().remove({'user_id': self.testUUID}) edb.get_stop_db().remove({'user_id': self.testUUID}) edb.get_trip_new_db().remove({'user_id': self.testUUID}) edb.get_section_new_db().remove({'user_id': self.testUUID})
def tearDown(self): edb.get_timeseries_db().remove({"user_id": self.androidUUID}) edb.get_timeseries_db().remove({"user_id": self.iosUUID}) edb.get_usercache_db().remove({"user_id": self.androidUUID}) edb.get_usercache_db().remove({"user_id": self.iosUUID}) edb.get_place_db().remove() edb.get_trip_new_db().remove()
def del_objects(args): del_query = {} if args.user_id != "all": del_query['user_id'] = uuid.UUID(args.user_id) if args.date is None: trip_query = del_query place_query = del_query else: day_dt = pydt.datetime.strptime(args.date, "%Y-%m-%d") logging.debug("day_dt is %s" % day_dt) day_ts = time.mktime(day_dt.timetuple()) logging.debug("day_ts is %s" % day_ts) trip_query = copy.copy(del_query) trip_query.update({"start_ts": {"$gt": day_ts}}) place_query = copy.copy(del_query) place_query.update({"exit_ts": {"$gt": day_ts}}) print "trip_query = %s" % trip_query print "place_query = %s" % place_query # Since sections have the same basic structure as trips and stops have the # same basic structure as places, we can reuse the queries print "Deleting trips for %s after %s" % (args.user_id, args.date) print edb.get_trip_new_db().remove(trip_query) print "Deleting sections for %s after %s" % (args.user_id, args.date) print edb.get_section_new_db().remove(trip_query) print "Deleting places for %s after %s" % (args.user_id, args.date) print edb.get_place_db().remove(place_query) print "Deleting stops for %s after %s" % (args.user_id, args.date) print edb.get_stop_db().remove(place_query)
def setUp(self): self.clearRelatedDb() edb.get_trip_db().remove() edb.get_section_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID)
def read_data(uuid=None, size=None, old=True): data = [] trip_db = edb.get_trip_db() if not old: trip_db = edb.get_trip_new_db() trips = trip_db.find({"user_id": uuid}) else: if uuid: trips = trip_db.find({'user_id': uuid, 'type': 'move'}) else: trips = trip_db.find({'type': 'move'}) for t in trips: try: trip = Trip.trip_from_json(t) except: continue if not (trip.trip_start_location and trip.trip_end_location and trip.start_time): continue data.append(trip) if size: if len(data) == size: break return data return [ecwt.Trip(trip) for trip in trips]
def get_aggregate_trips(time_query, box=None): curr_query = _get_ts_query(time_query) if box: curr_query.update({"start_loc" : {"$geoWithin" : {"$box": box}}}) curr_query.update({"end_loc" : {"$geoWithin" : {"$box": box}}}) trip_doc_cursor = edb.get_trip_new_db().find(curr_query).sort(time_query.timeType, pymongo.ASCENDING) return [ecwt.Trip(doc) for doc in trip_doc_cursor]
def get_trips(user_id, time_query): curr_query = _get_ts_query(time_query) curr_query.update({"user_id": user_id}) trip_doc_cursor = edb.get_trip_new_db().find(curr_query).sort( time_query.timeType, pymongo.ASCENDING) # TODO: Fix "TripIterator" and return it instead of this list return [ecwt.Trip(doc) for doc in trip_doc_cursor]
def testReadDataNew(self): db = edb.get_trip_new_db() trips = db.find() uuids = set() for t in trips: if self.testUUID in t: uuids.add(t[self.testUUID]) if len(uuids) == 0: uuids.add(None) self.uuids = uuids data = cp.read_data(size=10, old=False) print 'there are ' + str(len(data)) self.assertTrue(len(data) == 10 or len(data) == 0) #len(data)==0 if the test is run on an empty database uuid = 'baduuid' data = cp.read_data(uuid=uuid, old=False) self.assertTrue(not data) uuid = self.uuids.pop() data = cp.read_data(uuid=uuid, old=False) self.assertTrue(len(data) <= db.find({'user_id' : uuid}).count()) self.uuids.add(uuid) sum = 0 for uuid in self.uuids: data = cp.read_data(uuid=uuid, old=False) sum += len(data) data = cp.read_data(uuid=uuid, old=False) self.assertTrue(0 <= len(data) <= db.find().count()) print "len(data) = %s" % len(data) self.assertTrue(len(data) == sum)
def export_timeline(user_id_str, start_day_str, end_day_str, file_name): logging.info( "Extracting trips for user %s day %s -> %s and saving to file %s" % (user_id_str, start_day_str, end_day_str, file)) # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date() start_day_dt = pydt.datetime.strptime(start_day_str, "%Y-%m-%d") end_day_dt = pydt.datetime.strptime(end_day_str, "%Y-%m-%d") logging.debug("start_day_dt is %s, end_day_dt is %s" % (start_day_dt, end_day_dt)) # TODO: Convert to call to get_timeseries once we get that working # Or should we even do that? query = { 'user_id': uuid.UUID(user_id_str), 'start_local_dt': { '$gt': start_day_dt, "$lt": end_day_dt } } print "query = %s" % query entry_list = list(edb.get_trip_new_db().find(query)) logging.info("Found %d entries" % len(entry_list)) json.dump(entry_list, open(file_name, "w"), default=bju.default, allow_nan=False, indent=4)
def get_aggregate_trips(time_query, box=None): curr_query = _get_ts_query(time_query) if box: curr_query.update({"start_loc": {"$geoWithin": {"$box": box}}}) curr_query.update({"end_loc": {"$geoWithin": {"$box": box}}}) trip_doc_cursor = edb.get_trip_new_db().find(curr_query).sort( time_query.timeType, pymongo.ASCENDING) return [ecwt.Trip(doc) for doc in trip_doc_cursor]
def setUp(self): self.clearRelatedDb() edb.get_common_trip_db().drop() edb.get_section_new_db().drop() edb.get_trip_new_db().drop() self.testUserId = uuid.uuid4() self.testEnd = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1,2.092))) esdcpq.save_common_place(self.testEnd) self.testEnd = esdcpq.get_common_place_at_location(self.testEnd.location) self.testEnd.successors = () self.testStart = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1,2))) self.testStart.successors = (self.testEnd.get_id(),) esdcpq.save_common_place(self.testEnd) esdcpq.save_common_place(self.testStart) self.time0 = datetime.datetime(1900, 1, 1, 1)
def setUp(self): self.clearRelatedDb() edb.get_common_trip_db().drop() edb.get_section_new_db().drop() edb.get_trip_new_db().drop() self.testUserId = uuid.uuid4() self.testEnd = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1, 2.092))) esdcpq.save_common_place(self.testEnd) self.testEnd = esdcpq.get_common_place_at_location( self.testEnd.location) self.testEnd.successors = () self.testStart = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1, 2))) self.testStart.successors = (self.testEnd.get_id(), ) esdcpq.save_common_place(self.testEnd) esdcpq.save_common_place(self.testStart) self.time0 = datetime.datetime(1900, 1, 1, 1)
def export_timeline(user_id_str, start_day_str, end_day_str, file_name): logging.info("Extracting trips for user %s day %s -> %s and saving to file %s" % (user_id_str, start_day_str, end_day_str, file)) # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date() start_day_dt = pydt.datetime.strptime(start_day_str, "%Y-%m-%d") end_day_dt = pydt.datetime.strptime(end_day_str, "%Y-%m-%d") logging.debug("start_day_dt is %s, end_day_dt is %s" % (start_day_dt, end_day_dt)) # TODO: Convert to call to get_timeseries once we get that working # Or should we even do that? query = {'user_id': uuid.UUID(user_id_str), 'start_local_dt': {'$gt': start_day_dt, "$lt": end_day_dt}} print "query = %s" % query entry_list = list(edb.get_trip_new_db().find(query)) logging.info("Found %d entries" % len(entry_list)) json.dump(entry_list, open(file_name, "w"), default=bju.default, allow_nan=False, indent=4)
def read_data(uuid=None, size=None, old=True): data = [] trip_db = edb.get_trip_db() if not old: trip_db = edb.get_trip_new_db() trips = trip_db.find({"user_id" : uuid}) else: if uuid: trips = trip_db.find({'user_id' : uuid, 'type' : 'move'}) else: trips = trip_db.find({'type' : 'move'}) for t in trips: try: trip = Trip.trip_from_json(t) except: continue if not (trip.trip_start_location and trip.trip_end_location and trip.start_time): continue data.append(trip) if size: if len(data) == size: break return data return [ecwt.Trip(trip) for trip in trips]
else: exit_tz = "America/Los_Angeles" logging.debug("exit metadata timezone = %s" % exit_tz) entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'], exit_tz) else: logging.warning("No exit timestamp found, skipping") collection.save(entry) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("key", help="the key representing the stream that we want to fix") parser.add_argument("-f", "--filename", help="a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored") args = parser.parse_args() if args.filename is not None: fix_file(args.filename) elif args.key == "trips": fix_trips_or_sections(edb.get_trip_new_db()) elif args.key == "sections": fix_trips_or_sections(edb.get_section_new_db()) elif args.key == "places": fix_stops_or_places(edb.get_place_db()) elif args.key == "stops": fix_stops_or_places(edb.get_stop_db()) else: fix_timeseries(args.key)
# Removes all materialized views and the pipeline state. # This will cause us to reprocess the pipeline from scratch # As history begins to accumulate, we may want to specify a point to reset the # pipeline to instead of deleting everything import logging logging.basicConfig(level=logging.DEBUG) import emission.core.get_database as edb if __name__ == '__main__': print "Deleting all trips" print edb.get_trip_new_db().remove() print "Deleting all sections" print edb.get_section_new_db().remove() print "Deleting pipeline state" print edb.get_pipeline_state_db().remove()
def create_new_trip(user_id): _id = edb.get_trip_new_db().save({"user_id": user_id}) return ecwt.Trip({"_id": _id, "user_id": user_id})
logging.info("About to convert %s entries" % result_cursor.count()) for i, wrapper in enumerate(result_cursor): entry = convert_wrapper_to_entry(key, wrapper) if entry.get_id() != wrapper["_id"]: logging.warn("entry.id = %s, wrapper.id = %s" % (entry.get_id(), wrapper["_id"])) if i % 10000 == 0: print "converted %s -> %s" % (wrapper, entry) edb.get_timeseries_db().insert(entry) collection.remove(wrapper) def move_ts_entries(key): tdb = edb.get_timeseries_db() atdb = edb.get_analysis_timeseries_db() result_cursor = tdb.find({'metadata.key': key}) logging.info("About to convert %s entries" % result_cursor.count()) for i, entry_doc in enumerate(result_cursor): if i % 10000 == 0: print "moved %s from one ts to the other" % (entry_doc) atdb.insert(entry_doc) tdb.remove(entry_doc) if __name__ == '__main__': # No arguments - muahahahaha. Just going to copy known fields over. convert_collection(edb.get_trip_new_db(), "segmentation/raw_trip") convert_collection(edb.get_place_db(), "segmentation/raw_place") convert_collection(edb.get_section_new_db(), "segmentation/raw_section") convert_collection(edb.get_stop_db(), "segmentation/raw_stop") move_ts_entries("analysis/smoothing")
def get_trip(trip_id): """ Returns the trip for specified trip id. :rtype : emission.core.wrapper.Trip """ return ecwt.Trip(edb.get_trip_new_db().find_one({"_id": trip_id}))
def tearDown(self): edb.get_common_trip_db().drop() edb.get_section_new_db().drop() edb.get_trip_new_db().drop()
def save_trip(trip): edb.get_trip_new_db().save(trip)
def tearDown(self): import emission.core.get_database as edb edb.get_timeseries_db().remove({"user_id": self.testUUID}) edb.get_section_new_db().remove() edb.get_trip_new_db().remove()
else: logging.warning("No exit timestamp found, skipping") collection.save(entry) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( "key", help="the key representing the stream that we want to fix") parser.add_argument( "-f", "--filename", help= "a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored" ) args = parser.parse_args() if args.filename is not None: fix_file(args.filename) elif args.key == "trips": fix_trips_or_sections(edb.get_trip_new_db()) elif args.key == "sections": fix_trips_or_sections(edb.get_section_new_db()) elif args.key == "places": fix_stops_or_places(edb.get_place_db()) elif args.key == "stops": fix_stops_or_places(edb.get_stop_db()) else: fix_timeseries(args.key)
def setUp(self): self.testUserId = uuid.uuid4() edb.get_trip_new_db().remove()
def setUp(self): edb.get_common_trip_db().drop() edb.get_section_new_db().drop() edb.get_trip_new_db().drop()
def testSaveTrip(self): new_trip = self.create_fake_trip() self.assertEqual(edb.get_trip_new_db().find({"end_ts": 6}).count(), 1) self.assertEqual(edb.get_trip_new_db().find_one({"end_ts": 6})["_id"], new_trip.get_id()) self.assertEqual(edb.get_trip_new_db().find_one({"end_ts": 6})["user_id"], self.testUserId)
def get_trips(user_id, time_query): curr_query = _get_ts_query(time_query) curr_query.update({"user_id": user_id}) trip_doc_cursor = edb.get_trip_new_db().find(curr_query).sort(time_query.timeType, pymongo.ASCENDING) # TODO: Fix "TripIterator" and return it instead of this list return [ecwt.Trip(doc) for doc in trip_doc_cursor]