def clearRelatedDb(self): edb.get_timeseries_db().remove() edb.get_place_db().remove() edb.get_stop_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove()
def setUp(self): # We need to access the database directly sometimes in order to # forcibly insert entries for the tests to pass. But we put the import # in here to reduce the temptation to use the database directly elsewhere. import emission.core.get_database as edb import uuid self.testUUID = uuid.uuid4() self.trips = json.load( open("emission/tests/data/smoothing_data/trip_list.txt"), object_hook=bju.object_hook) for trip in self.trips: trip["user_id"] = self.testUUID edb.get_trip_new_db().save(trip) self.trips = [ecwt.Trip(t) for t in self.trips] self.sections = json.load( open("emission/tests/data/smoothing_data/section_list.txt"), object_hook=bju.object_hook) for section in self.sections: section["user_id"] = self.testUUID edb.get_section_new_db().save(section) self.sections = [ecws.Section(s) for s in self.sections] self.ts = esta.TimeSeries.get_time_series(self.testUUID)
def clearRelatedDb(self): edb.get_timeseries_db().remove({'user_id': self.testUUID}) edb.get_place_db().remove({'user_id': self.testUUID}) edb.get_stop_db().remove({'user_id': self.testUUID}) edb.get_trip_new_db().remove({'user_id': self.testUUID}) edb.get_section_new_db().remove({'user_id': self.testUUID})
def setUp(self): # We need to access the database directly sometimes in order to # forcibly insert entries for the tests to pass. But we put the import # in here to reduce the temptation to use the database directly elsewhere. import emission.core.get_database as edb import uuid self.testUUID = uuid.uuid4() self.trips = json.load(open("emission/tests/data/smoothing_data/trip_list.txt"), object_hook=bju.object_hook) for trip in self.trips: trip["user_id"] = self.testUUID edb.get_trip_new_db().save(trip) self.trips = [ecwt.Trip(t) for t in self.trips] self.sections = json.load(open("emission/tests/data/smoothing_data/section_list.txt"), object_hook=bju.object_hook) for section in self.sections: section["user_id"] = self.testUUID edb.get_section_new_db().save(section) self.sections = [ecws.Section(s) for s in self.sections] self.ts = esta.TimeSeries.get_time_series(self.testUUID)
def del_objects(args): del_query = {} if args.user_id != "all": del_query['user_id'] = uuid.UUID(args.user_id) if args.date is None: trip_query = del_query place_query = del_query else: day_dt = pydt.datetime.strptime(args.date, "%Y-%m-%d") logging.debug("day_dt is %s" % day_dt) day_ts = time.mktime(day_dt.timetuple()) logging.debug("day_ts is %s" % day_ts) trip_query = copy.copy(del_query) trip_query.update({"start_ts": {"$gt": day_ts}}) place_query = copy.copy(del_query) place_query.update({"exit_ts": {"$gt": day_ts}}) print "trip_query = %s" % trip_query print "place_query = %s" % place_query # Since sections have the same basic structure as trips and stops have the # same basic structure as places, we can reuse the queries print "Deleting trips for %s after %s" % (args.user_id, args.date) print edb.get_trip_new_db().remove(trip_query) print "Deleting sections for %s after %s" % (args.user_id, args.date) print edb.get_section_new_db().remove(trip_query) print "Deleting places for %s after %s" % (args.user_id, args.date) print edb.get_place_db().remove(place_query) print "Deleting stops for %s after %s" % (args.user_id, args.date) print edb.get_stop_db().remove(place_query)
def testSaveSection(self): new_section = esds.create_new_section(self.testUserId, self.test_trip_id) new_section.start_ts = 5 new_section.end_ts = 6 esds.save_section(new_section) self.assertEqual(edb.get_section_new_db().find({"end_ts": 6}).count(), 1) self.assertEqual(edb.get_section_new_db().find_one({"end_ts": 6})["_id"], new_section.get_id()) self.assertEqual(edb.get_section_new_db().find_one({"end_ts": 6})["user_id"], self.testUserId) self.assertEqual(edb.get_section_new_db().find_one({"end_ts": 6})["trip_id"], self.test_trip_id)
def setUp(self): self.clearRelatedDb() edb.get_trip_db().remove() edb.get_section_db().remove() edb.get_trip_new_db().remove() edb.get_section_new_db().remove() etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2015-aug-21") eaicf.filter_accuracy(self.testUUID) estfm.move_all_filters_to_data() eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID)
def setUp(self): self.clearRelatedDb() edb.get_common_trip_db().drop() edb.get_section_new_db().drop() edb.get_trip_new_db().drop() self.testUserId = uuid.uuid4() self.testEnd = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1,2.092))) esdcpq.save_common_place(self.testEnd) self.testEnd = esdcpq.get_common_place_at_location(self.testEnd.location) self.testEnd.successors = () self.testStart = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1,2))) self.testStart.successors = (self.testEnd.get_id(),) esdcpq.save_common_place(self.testEnd) esdcpq.save_common_place(self.testStart) self.time0 = datetime.datetime(1900, 1, 1, 1)
def get_sections_for_trip(user_id, trip_id): """ Get the set of sections that are children of this trip. """ section_doc_cursor = edb.get_section_new_db().find({ "user_id": user_id, "trip_id": trip_id }).sort("start_ts", pymongo.ASCENDING) return [ecws.Section(doc) for doc in section_doc_cursor]
def testSaveSection(self): new_section = esds.create_new_section(self.testUserId, self.test_trip_id) new_section.start_ts = 5 new_section.end_ts = 6 esds.save_section(new_section) self.assertEqual(edb.get_section_new_db().find({ "end_ts": 6 }).count(), 1) self.assertEqual( edb.get_section_new_db().find_one({"end_ts": 6})["_id"], new_section.get_id()) self.assertEqual( edb.get_section_new_db().find_one({"end_ts": 6})["user_id"], self.testUserId) self.assertEqual( edb.get_section_new_db().find_one({"end_ts": 6})["trip_id"], self.test_trip_id)
def setUp(self): self.clearRelatedDb() edb.get_common_trip_db().drop() edb.get_section_new_db().drop() edb.get_trip_new_db().drop() self.testUserId = uuid.uuid4() self.testEnd = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1, 2.092))) esdcpq.save_common_place(self.testEnd) self.testEnd = esdcpq.get_common_place_at_location( self.testEnd.location) self.testEnd.successors = () self.testStart = esdcpq.make_new_common_place(uuid.uuid4(), gj.Point((1, 2))) self.testStart.successors = (self.testEnd.get_id(), ) esdcpq.save_common_place(self.testEnd) esdcpq.save_common_place(self.testStart) self.time0 = datetime.datetime(1900, 1, 1, 1)
def setUp(self): edb.get_common_trip_db().drop() edb.get_section_new_db().drop() edb.get_trip_new_db().drop()
def create_new_section(user_id, trip_id): _id = edb.get_section_new_db().save({ "user_id": user_id, "trip_id": trip_id }) return ecws.Section({"_id": _id, "user_id": user_id, "trip_id": trip_id})
def _get_sections_for_query(section_query, sort_field): logging.debug("Returning sections for query %s" % section_query) section_doc_cursor = edb.get_section_new_db().find(section_query).sort( sort_field, pymongo.ASCENDING) # TODO: Fix "TripIterator" and return it instead of this list return [ecws.Section(doc) for doc in section_doc_cursor]
def get_section(section_id): return ecws.Section(edb.get_section_new_db().find_one({"_id": section_id}))
def save_section(section): edb.get_section_new_db().save(section)
def setUp(self): self.testUserId = uuid.uuid4() edb.get_section_new_db().remove() self.test_trip_id = "test_trip_id"
def create_new_section(user_id, trip_id): _id = edb.get_section_new_db().save({"user_id": user_id, "trip_id": trip_id}) return ecws.Section({"_id": _id, "user_id": user_id, "trip_id": trip_id})
def _get_sections_for_query(section_query, sort_field): logging.debug("Returning sections for query %s" % section_query) section_doc_cursor = edb.get_section_new_db().find(section_query).sort(sort_field, pymongo.ASCENDING) # TODO: Fix "TripIterator" and return it instead of this list return [ecws.Section(doc) for doc in section_doc_cursor]
else: logging.warning("No exit timestamp found, skipping") collection.save(entry) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( "key", help="the key representing the stream that we want to fix") parser.add_argument( "-f", "--filename", help= "a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored" ) args = parser.parse_args() if args.filename is not None: fix_file(args.filename) elif args.key == "trips": fix_trips_or_sections(edb.get_trip_new_db()) elif args.key == "sections": fix_trips_or_sections(edb.get_section_new_db()) elif args.key == "places": fix_stops_or_places(edb.get_place_db()) elif args.key == "stops": fix_stops_or_places(edb.get_stop_db()) else: fix_timeseries(args.key)
# Removes all materialized views and the pipeline state. # This will cause us to reprocess the pipeline from scratch # As history begins to accumulate, we may want to specify a point to reset the # pipeline to instead of deleting everything import logging logging.basicConfig(level=logging.DEBUG) import emission.core.get_database as edb if __name__ == '__main__': print "Deleting all trips" print edb.get_trip_new_db().remove() print "Deleting all sections" print edb.get_section_new_db().remove() print "Deleting pipeline state" print edb.get_pipeline_state_db().remove()
def tearDown(self): edb.get_common_trip_db().drop() edb.get_section_new_db().drop() edb.get_trip_new_db().drop()
def get_sections_for_trip(user_id, trip_id): """ Get the set of sections that are children of this trip. """ section_doc_cursor = edb.get_section_new_db().find({"user_id": user_id, "trip_id": trip_id}).sort("start_ts", pymongo.ASCENDING) return [ecws.Section(doc) for doc in section_doc_cursor]
def tearDown(self): import emission.core.get_database as edb edb.get_timeseries_db().remove({"user_id": self.testUUID}) edb.get_section_new_db().remove() edb.get_trip_new_db().remove()
logging.info("About to convert %s entries" % result_cursor.count()) for i, wrapper in enumerate(result_cursor): entry = convert_wrapper_to_entry(key, wrapper) if entry.get_id() != wrapper["_id"]: logging.warn("entry.id = %s, wrapper.id = %s" % (entry.get_id(), wrapper["_id"])) if i % 10000 == 0: print "converted %s -> %s" % (wrapper, entry) edb.get_timeseries_db().insert(entry) collection.remove(wrapper) def move_ts_entries(key): tdb = edb.get_timeseries_db() atdb = edb.get_analysis_timeseries_db() result_cursor = tdb.find({'metadata.key': key}) logging.info("About to convert %s entries" % result_cursor.count()) for i, entry_doc in enumerate(result_cursor): if i % 10000 == 0: print "moved %s from one ts to the other" % (entry_doc) atdb.insert(entry_doc) tdb.remove(entry_doc) if __name__ == '__main__': # No arguments - muahahahaha. Just going to copy known fields over. convert_collection(edb.get_trip_new_db(), "segmentation/raw_trip") convert_collection(edb.get_place_db(), "segmentation/raw_place") convert_collection(edb.get_section_new_db(), "segmentation/raw_section") convert_collection(edb.get_stop_db(), "segmentation/raw_stop") move_ts_entries("analysis/smoothing")
else: exit_tz = "America/Los_Angeles" logging.debug("exit metadata timezone = %s" % exit_tz) entry['exit_local_dt'] = get_local_date(entry['exit_fmt_time'], exit_tz) else: logging.warning("No exit timestamp found, skipping") collection.save(entry) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("key", help="the key representing the stream that we want to fix") parser.add_argument("-f", "--filename", help="a saved timeline whose local_dt needs to be fixed. If this is specified, key is ignored") args = parser.parse_args() if args.filename is not None: fix_file(args.filename) elif args.key == "trips": fix_trips_or_sections(edb.get_trip_new_db()) elif args.key == "sections": fix_trips_or_sections(edb.get_section_new_db()) elif args.key == "places": fix_stops_or_places(edb.get_place_db()) elif args.key == "stops": fix_stops_or_places(edb.get_stop_db()) else: fix_timeseries(args.key)