def testLocalRangeRolloverQuery(self): """ Search for all entries between 8:18 and 8:20 local time, both inclusive """ start_local_dt = ecwl.LocalDate({ 'year': 2015, 'month': 8, 'hour': 8, 'minute': 18 }) end_local_dt = ecwl.LocalDate({ 'year': 2015, 'month': 8, 'hour': 9, 'minute': 8 }) final_query = {"user_id": self.testUUID} final_query.update( esdl.get_range_query("data.local_dt", start_local_dt, end_local_dt)) entries = edb.get_timeseries_db().find(final_query).sort( 'data.ts', pymongo.ASCENDING) self.assertEqual(448, entries.count()) entries_list = list(entries) # Note that since this is a set of filters, as opposed to a range, this # returns all entries between 18 and 8 in both hours. # so 8:18 is valid, but so is 9:57 self.assertEqual(ecwe.Entry(entries_list[0]).data.local_dt.hour, 8) self.assertEqual(ecwe.Entry(entries_list[0]).data.local_dt.minute, 18) self.assertEqual(ecwe.Entry(entries_list[-1]).data.local_dt.hour, 9) self.assertEqual(ecwe.Entry(entries_list[-1]).data.local_dt.minute, 57)
def setUp(self): # We need to access the database directly sometimes in order to # forcibly insert entries for the tests to pass. But we put the import # in here to reduce the temptation to use the database directly elsewhere. import emission.core.get_database as edb import uuid self.testUUID = uuid.uuid4() self.ts = esta.TimeSeries.get_time_series(self.testUUID) with open("emission/tests/data/smoothing_data/trip_list.txt") as tfp: self.trip_entries = json.load(tfp, object_hook=bju.object_hook) for trip_entry in self.trip_entries: trip_entry["user_id"] = self.testUUID self.ts.insert(trip_entry) self.trip_entries = [ecwe.Entry(t) for t in self.trip_entries] with open( "emission/tests/data/smoothing_data/section_list.txt") as sfp: self.section_entries = json.load(sfp, object_hook=bju.object_hook) for section_entry in self.section_entries: section_entry["user_id"] = self.testUUID self.ts.insert(section_entry) self.section_entries = [ecwe.Entry(s) for s in self.section_entries]
def section_to_geojson(section, tl): """ This is the trickiest part of the visualization. The section is basically a collection of points with a line through them. So the representation is a feature in which one feature which is the line, and one feature collection which is the set of point features. :param section: the section to be converted :return: a feature collection which is the geojson version of the section """ ts = esta.TimeSeries.get_time_series(section.user_id) entry_it = ts.find_entries(["analysis/recreated_location"], esda.get_time_query_for_trip_like( "analysis/cleaned_section", section.get_id())) # TODO: Decide whether we want to use Rewrite to use dataframes throughout instead of python arrays. # dataframes insert nans. We could use fillna to fill with default values, but if we are not actually # using dataframe features here, it is unclear how much that would help. feature_array = [] section_location_entries = [ecwe.Entry(entry) for entry in entry_it] if len(section_location_entries) != 0: logging.debug("first element in section_location_array = %s" % section_location_entries[0]) if not ecc.compare_rounded_arrays(section.data.end_loc.coordinates, section_location_entries[-1].data.loc.coordinates, digits=4): logging.info("section_location_array[-1].data.loc %s != section.data.end_loc %s even after df.ts fix, filling gap" % \ (section_location_entries[-1].data.loc, section.data.end_loc)) assert(False) last_loc_doc = ts.get_entry_at_ts("background/filtered_location", "data.ts", section.data.end_ts) if last_loc_doc is None: logging.warning("can't find entry to patch gap, leaving gap") else: last_loc_entry = ecwe.Entry(last_loc_doc) logging.debug("Adding new entry %s to fill the end point gap between %s and %s" % (last_loc_entry.data.loc, section_location_entries[-1].data.loc, section.data.end_loc)) section_location_entries.append(last_loc_entry) points_line_feature = point_array_to_line(section_location_entries) points_line_feature.id = str(section.get_id()) points_line_feature.properties.update(copy.copy(section.data)) # Update works on dicts, convert back to a section object to make the modes # work properly points_line_feature.properties = ecwcs.Cleanedsection(points_line_feature.properties) points_line_feature.properties["feature_type"] = "section" points_line_feature.properties["sensed_mode"] = str(points_line_feature.properties.sensed_mode) _del_non_derializable(points_line_feature.properties, ["start_loc", "end_loc"]) # feature_array.append(gj.FeatureCollection(points_feature_array)) feature_array.append(points_line_feature) return gj.FeatureCollection(feature_array)
def testRemoveAllOutliers(self): etc.setupRealExample( self, "emission/tests/data/real_examples/shankari_2016-06-20") self.ts = esta.TimeSeries.get_time_series(self.testUUID) eaist.segment_current_trips(self.testUUID) eaiss.segment_current_sections(self.testUUID) eaicl.filter_current_sections(self.testUUID) # get all sections sections = [ ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY], time_query=None) ] for section in sections: filtered_points_entry_doc = self.ts.get_entry_at_ts( "analysis/smoothing", "data.section", section.get_id()) if filtered_points_entry_doc is not None: logging.debug("Found smoothing result for section %s" % section.get_id()) # Setting the set of deleted points to everything loc_tq = esda.get_time_query_for_trip_like( esda.RAW_SECTION_KEY, section.get_id()) loc_df = self.ts.get_data_df("background/filtered_location", loc_tq) filtered_points_entry_doc["data"]["deleted_points"] = loc_df[ "_id"].tolist() self.ts.update(ecwe.Entry(filtered_points_entry_doc)) # All we care is that this should not crash. eaicr.clean_and_resample(self.testUUID) # Most of the trips have zero length, but apparently one has non-zero length # because the stop length is non zero!! # So there is only one cleaned trip left cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY, time_query=None) self.assertEqual(len(cleaned_trips_df), 1) # We don't support squishing sections, but we only store stops and sections # for non-squished trips. And this non-squished trip happens to have # two sections and one stop cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY, time_query=None) self.assertEqual(len(cleaned_sections_df), 2) self.assertEqual(cleaned_sections_df.distance.tolist(), [0, 0]) cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY, time_query=None) self.assertEqual(len(cleaned_stops_df), 1) self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
def testWrapActivity(self): testEntryJSON = { '_id': '55a4418c7d65cb39ee9737d2', 'data': { 'type': 5, 'confidence': 100, 'ts': 1436826360.493 }, 'metadata': { 'key': 'background/motion_activity', 'platform': 'android', 'read_ts': 0, 'type': 'message', 'write_ts': 1436826360.493, 'write_fmt_time': '2015-07-13 15:26:00.493000-07:00' }, 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954') } entry = ecwe.Entry(testEntryJSON) self.assertEquals(entry.metadata.key, 'background/motion_activity') self.assertEquals(entry.metadata.type, 'message') self.assertEquals(entry.data.type, ecwm.MotionTypes.TILTING) self.assertEquals(entry.data.confidence, 100) logging.debug("activity time = %s, written at %s (%s)" % (entry.data.ts, entry.metadata.write_ts, entry.metadata.write_fmt_time))
def get_last_place_entry(key, user_id): """ There are many ways to find the last place. One would be to find the one with the max enter_ts. But that is not performant because we would need to retrieve all the enter_ts and find their max, which is expensive. Instead, we use the property that we process data in chunks of trips, so the last place would have been created and entered but not exited. :param key: """ ts = esta.TimeSeries.get_time_series(user_id) ret_place_doc = ts.analysis_timeseries_db.find_one({ 'user_id': user_id, 'metadata.key': key, 'data.exit_ts': { '$exists': False } }) logging.debug("last place doc = %s" % ret_place_doc) if ret_place_doc is None: return None ret_place = ecwe.Entry(ret_place_doc) assert ('exit_ts' not in ret_place.data) assert ('exit_fmt_time' not in ret_place.data) assert ('starting_trip' not in ret_place.data) return ret_place
def testLocalDateReadWrite(self): ts = esta.TimeSeries.get_time_series(self.testUUID) start_ts = arrow.now().timestamp ma_ts = 1460586729 local_dt = ecwl.LocalDate.get_local_date(ma_ts, "America/Los_Angeles") fmt_time = arrow.get(ma_ts).to("America/Los_Angeles").isoformat() ma = ecwm.Motionactivity({ "ts": 1460586729, "local_dt": local_dt, "fmt_time": fmt_time }) ma_entry = ecwe.Entry.create_entry(self.testUUID, "background/motion_activity", ma) ts.insert(ma_entry) ret_entry = ecwe.Entry( ts.get_entry_at_ts("background/motion_activity", "data.ts", 1460586729)) self.assertGreaterEqual(ret_entry.metadata.write_ts, start_ts) metadata_dt = arrow.get(ret_entry.metadata.write_ts).to( ret_entry.metadata.time_zone).datetime self.assertEqual(metadata_dt.hour, ret_entry.metadata.write_local_dt.hour) self.assertEqual(metadata_dt.minute, ret_entry.metadata.write_local_dt.minute) self.assertEqual(metadata_dt.weekday(), ret_entry.metadata.write_local_dt.weekday) self.assertEqual(ret_entry.data.local_dt.hour, 15) self.assertEqual(ret_entry.data.local_dt.month, 4) self.assertEqual(ret_entry.data.local_dt.weekday, 2) self.assertEqual(ret_entry.data.fmt_time, "2016-04-13T15:32:09-07:00")
def testWrapLocation(self): testEntryJSON = {'_id': '55a4418c7d65cb39ee9737cf', 'data': {'accuracy': 52.5, 'altitude': 0, 'bearing': 0, 'elapsedRealtimeNanos': 100623898000000, 'latitude': 37.3885529, 'longitude': -122.0879696, 'loc': {"coordinates": [-122.0879696, 37.3885529], "type": "Point"}, 'sensed_speed': 0, 'ts': 1436826356.852}, 'metadata': {'key': 'background/location', 'platform': 'android', 'read_ts': 0, 'type': 'message', 'write_ts': 1436826357.115, 'write_fmt_time': '2015-07-13 15:25:57.115000-07:00' }, 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954')} entry = ecwe.Entry(testEntryJSON) self.assertEqual(entry.metadata.key, 'background/location') self.assertEqual(entry.metadata.type, 'message') self.assertEqual(entry.data.latitude, 37.3885529) self.assertEqual(entry.data.longitude, -122.0879696) # self.assertEqual(entry.data.loc, gj.Point((-122.0879696, 37.3885529))) self.assertTrue(isinstance(entry.data.loc, gj.Point)) logging.debug("location time = %s, written at %s (%s)" % (entry.data.ts, entry.metadata.write_ts, entry.metadata.write_fmt_time))
def create_confirmed_trips(user_id, timerange): ts = esta.TimeSeries.get_time_series(user_id) toConfirmTrips = esda.get_entries(esda.CLEANED_TRIP_KEY, user_id, time_query=timerange) logging.debug("Converting %d cleaned trips to confirmed ones" % len(toConfirmTrips)) lastTripProcessed = None if len(toConfirmTrips) == 0: logging.debug("len(toConfirmTrips) == 0, early return") return None input_key_list = eac.get_config()["userinput.keylist"] for tct in toConfirmTrips: # Copy the trip and fill in the new values confirmed_trip_dict = copy.copy(tct) del confirmed_trip_dict["_id"] confirmed_trip_dict["metadata"]["key"] = "analysis/confirmed_trip" confirmed_trip_dict["data"]["cleaned_trip"] = tct.get_id() confirmed_trip_dict["data"]["user_input"] = \ get_user_input_dict(ts, tct, input_key_list) confirmed_trip_entry = ecwe.Entry(confirmed_trip_dict) # save the entry ts.insert(confirmed_trip_entry) # if everything is successful, then update the last successful trip lastTripProcessed = tct return lastTripProcessed
def _get_sections_for_query(section_query, sort_field): section_query.update({"metadata.key": "segmentation/raw_section"}) logging.debug("Returning sections for query %s" % section_query) section_doc_cursor = edb.get_analysis_timeseries_db().find( section_query).sort(sort_field, pymongo.ASCENDING) logging.debug("result cursor length = %d" % section_doc_cursor.count()) return [ecwe.Entry(doc) for doc in section_doc_cursor]
def savePredictionsStep(self): for i, currProb in enumerate(self.predictedProb): currSectionEntry = self.toPredictSections[i] currSection = currSectionEntry.data currProb = currProb # Insert the prediction mp = ecwm.Modeprediction() mp.trip_id = currSection.trip_id mp.section_id = currSectionEntry.get_id() mp.algorithm_id = ecwm.AlgorithmTypes.SIMPLE_RULE_ENGINE mp.predicted_mode_map = currProb mp.start_ts = currSection.start_ts mp.end_ts = currSection.end_ts self.ts.insert_data(self.user_id, "inference/prediction", mp) # There are now two predictions, but don't want to do a bunch of # refactoring, so just create the inferred # section object right here is_dict = copy.copy(currSectionEntry) del is_dict["_id"] is_dict["metadata"]["key"] = "analysis/inferred_section" is_dict["data"]["sensed_mode"] = ecwm.PredictedModeTypes[ easf.select_inferred_mode([mp])].value is_dict["data"]["cleaned_section"] = currSectionEntry.get_id() ise = ecwe.Entry(is_dict) logging.debug("Updating sensed mode for section = %s to %s" % (currSectionEntry.get_id(), ise.data.sensed_mode)) self.ts.insert(ise) # Set last_section_done after saving because otherwise if there is an error # during inference, we will not save results and never re-run self.last_section_done = self.toPredictSections[-1]
def _get_stops_for_query(stop_query, sort_key): logging.debug("Returning stops for query %s" % stop_query) stop_query.update({"metadata.key": "segmentation/raw_stop"}) logging.debug("updated query = %s" % stop_query) stop_doc_cursor = edb.get_analysis_timeseries_db().find(stop_query).sort( sort_key, pymongo.ASCENDING) logging.debug("result count = %d" % stop_doc_cursor.count()) return [ecwe.Entry(doc) for doc in stop_doc_cursor]
def geojson_incidents_in_range(user_id, start_ts, end_ts): MANUAL_INCIDENT_KEY = "manual/incident" ts = esta.TimeSeries.get_time_series(user_id) uc = enua.UserCache.getUserCache(user_id) tq = estt.TimeQuery("data.ts", start_ts, end_ts) incident_entry_docs = list(ts.find_entries([MANUAL_INCIDENT_KEY], time_query=tq)) \ + list(uc.getMessage([MANUAL_INCIDENT_KEY], tq)) incidents = [ecwe.Entry(doc) for doc in incident_entry_docs] return list(map(incident_to_geojson, incidents))
def moveToLongTerm(self): """ In order to move to the long term, we need to do the following: a) determine the time range to be processed. We do this by checking the pipeline state. this does not leak information since the process will run whether there is data for it to work on or not. So the pipeline state is stored outside of the user cache. b) process the time range. pass in a function that works on every entry to convert it to the appropriate format. c) delete the time range once it is processed (in usercache or here?) d) update the pipeline state to reflect the new range (here) """ # Error handling: if any of the entries has an error in processing, we # move it to a separate "error_usercache" and process the rest. The # stage is still marked successful. This means that the stage can never # be unsuccessful. We could try to keep it, but then the delete query # below will get significantly more complicated. uc = enua.UserCache.getUserCache(self.user_id) messages = uc.getMessage() # Here, we assume that the user only has data from a single platform. # Since this is a temporary hack, this is fine if len(messages) == 0: logging.debug("No messages to process") # Since we didn't get the current time range, there is no current # state, so we don't need to mark it as done # esp.mark_usercache_done(None) return time_query = esp.get_time_range_for_usercache(self.user_id) ts = etsa.TimeSeries.get_time_series(self.user_id) curr_entry_it = uc.getMessage(None, time_query) last_ts_processed = None for entry_doc in curr_entry_it: unified_entry = None try: # We don't want to use our wrapper classes yet because they are based on the # standard long-term formats, and we don't yet know whether the # incoming entries are consistent with them. That's why we have the # convert_to_common_format step. So let's just wrap this in a # generic attrdict for now. entry = ad.AttrDict(entry_doc) unified_entry = enuf.convert_to_common_format(entry) ts.insert(unified_entry) last_ts_processed = ecwe.Entry(unified_entry).metadata.write_ts time_query.endTs = last_ts_processed except pymongo.errors.DuplicateKeyError as e: logging.info("document already present in timeseries, skipping since read-only") except Exception as e: logging.exception("Backtrace time") logging.warn("Got error %s while saving entry %s -> %s"% (e, entry, unified_entry)) ts.insert_error(entry_doc) logging.debug("Deleting all entries for query %s" % time_query) uc.clearProcessedMessages(time_query) esp.mark_usercache_done(self.user_id, last_ts_processed)
def get_all_points_for_range(user_id, key, start_ts, end_ts): import emission.storage.timeseries.timequery as estt # import emission.core.wrapper.location as ecwl tq = estt.TimeQuery("metadata.write_ts", start_ts, end_ts) ts = esta.TimeSeries.get_time_series(user_id) entry_it = ts.find_entries([key], tq) points_array = [ecwe.Entry(entry) for entry in entry_it] return get_feature_list_for_point_array(points_array)
def get_user_input_from_cache_series(user_id, trip_obj, user_input_key): tq = estt.TimeQuery("data.start_ts", trip_obj.data.start_ts, trip_obj.data.end_ts) potential_candidates = estsc.find_entries(user_id, [user_input_key], tq) if len(potential_candidates) == 0: return None sorted_pc = sorted(potential_candidates, key=lambda c:c["metadata"]["write_ts"]) most_recent_entry = potential_candidates[-1] logging.debug("most recent entry has id %s" % most_recent_entry["_id"]) logging.debug("and is mapped to entry %s" % most_recent_entry) return ecwe.Entry(most_recent_entry)
def get_stops_for_trip(key, user_id, trip_id): """ Get the set of sections that are children of this trip. """ query = {"user_id": user_id, "data.trip_id": trip_id, "metadata.key": key} logging.debug("About to execute query %s with sort_key %s" % (query, "data.enter_ts")) stop_doc_cursor = edb.get_analysis_timeseries_db().find(query).sort( "data.enter_ts", pymongo.ASCENDING) return [ecwe.Entry(doc) for doc in stop_doc_cursor]
def get_sections_for_trip(key, user_id, trip_id): # type: (UUID, object_id) -> list(sections) """ Get the set of sections that are children of this trip. """ query = {"user_id": user_id, "data.trip_id": trip_id, "metadata.key": key} section_doc_cursor = edb.get_analysis_timeseries_db().find(query).sort( "data.start_ts", pymongo.ASCENDING) logging.debug("About to execute query %s" % query) return [ecwe.Entry(doc) for doc in section_doc_cursor]
def compare_trip_result(self, result_dicts, expect_dicts): # This is basically a bunch of asserts to ensure that the timeline is as # expected. We are not using a recursive diff because things like the IDs # will change from run to run. Instead, I pick out a bunch of important # things that are highly user visible # Since this is deterministic, we can also include things that are not that user visible :) result = [ecwe.Entry(r) for r in result_dicts] expect = [ecwe.Entry(e) for e in expect_dicts] for rt, et in zip(result, expect): logging.debug("Comparing %s -> %s with %s -> %s" % (rt.data.start_fmt_time, rt.data.end_fmt_time, et.data.start_fmt_time, et.data.end_fmt_time)) self.assertEqual(len(result), len(expect)) for rt, et in zip(result, expect): logging.debug("======= Comparing trip =========") logging.debug(json.dumps(rt, indent=4, default=bju.default)) logging.debug(json.dumps(et, indent=4, default=bju.default)) # Highly user visible self.assertEqual(rt.data["user_input"], et.data["user_input"]) # self.assertEqual(rt.data.inferred_primary_mode, et.data.inferred_primary_mode) logging.debug(20 * "=")
def get_entries(key, user_id, time_query, geo_query=None, extra_query_list=None): ts = get_timeseries_for_user(user_id) doc_cursor = ts.find_entries([key], time_query, geo_query, extra_query_list) # TODO: Fix "TripIterator" and return it instead of this list curr_entry_list = [ecwe.Entry(doc) for doc in doc_cursor] logging.debug("Returning entry with length %d result" % len(curr_entry_list)) return curr_entry_list
def get_timeline_from_dt(user_id, start_dt, end_dt): import emission.core.get_database as edb import emission.core.wrapper.entry as ecwe logging.info("About to query for %s -> %s" % (start_dt, end_dt)) result_cursor = edb.get_timeseries_db().find({ "user_id": user_id, "data.local_dt": { "$gte": start_dt, "$lte": end_dt } }).sort("metadata.write_ts") logging.debug("result cursor has %d entries" % result_cursor.count()) result_list = list(result_cursor) logging.debug("result list has %d entries" % len(result_list)) if len(result_list) == 0: return get_timeline(user_id, 0, 0) start_ts = ecwe.Entry(result_list[0]).metadata.write_ts end_ts = ecwe.Entry(result_list[-1]).metadata.write_ts logging.debug( "Converted datetime range %s -> %s to timestamp range %s -> %s" % (start_dt, end_dt, start_ts, end_ts)) return get_timeline(user_id, start_ts, end_ts)
def _to_df_entry(entry_dict): entry = ecwe.Entry(entry_dict) ret_val = entry.data for ld_key in ret_val.local_dates: if ld_key in ret_val: for field_key in ret_val[ld_key]: expanded_key = "%s_%s" % (ld_key, field_key) ret_val[expanded_key] = ret_val[ld_key][field_key] del ret_val[ld_key] ret_val["_id"] = entry["_id"] ret_val['user_id'] = entry['user_id'] ret_val["metadata_write_ts"] = entry["metadata"]["write_ts"] # logging.debug("ret_val = %s " % ret_val) return ret_val
def get_filtered_points(section, filtered_section_data): logging.debug("Getting filtered points for section %s" % section) ts = esta.TimeSeries.get_time_series(section.user_id) loc_entry_it = ts.find_entries(["background/filtered_location"], esda.get_time_query_for_trip_like( esda.RAW_SECTION_KEY, section.get_id())) loc_entry_list = [ecwe.Entry(e) for e in loc_entry_it] # We know that the assertion fails in the geojson conversion code and we # handle it there, so we are just going to comment this out for now. # assert (loc_entry_list[-1].data.loc == section.data.end_loc, # "section_location_array[-1].loc != section.end_loc even after df.ts fix", # (loc_entry_list[-1].data.loc, section.data.end_loc)) # Find the list of points to filter filtered_points_entry_doc = ts.get_entry_at_ts("analysis/smoothing", "data.section", section.get_id()) if filtered_points_entry_doc is None: logging.debug( "No filtered_points_entry, filtered_points_list is empty") filtered_point_id_list = [] else: # TODO: Figure out how to make collections work for the wrappers and then change this to an Entry filtered_points_entry = ad.AttrDict(filtered_points_entry_doc) filtered_point_id_list = list( filtered_points_entry.data.deleted_points) logging.debug("deleting %s points from section points" % len(filtered_point_id_list)) filtered_loc_list = remove_outliers(loc_entry_list, filtered_point_id_list) # filtered_loc_list has removed the outliers. Now, we resample the data at # 30 sec intervals resampled_loc_df = resample(filtered_loc_list, interval=30) # If this is the first section, we need to find the start place of the parent trip # and actually start from there. That will fix the distances but not the duration # because we haven't yet figured out how to get the correct start time. # TODO: Fix this!! # For now, we will fudge this in the geojson converter, as always with_speeds_df = eaicl.add_dist_heading_speed(resampled_loc_df) with_speeds_df["idx"] = np.arange(0, len(with_speeds_df)) with_speeds_df_nona = with_speeds_df.dropna() logging.info("removed %d entries containing n/a" % (len(with_speeds_df_nona) - len(with_speeds_df))) return with_speeds_df_nona
def get_aggregate_timeline_from_dt(start_dt, end_dt, box=None): import emission.core.get_database as edb import emission.core.wrapper.entry as ecwe import emission.storage.decorations.place_queries as esdp import emission.storage.decorations.trip_queries as esdt if not box: logging.info("About to query for %s -> %s" % (start_dt, end_dt)) else: logging.info("About to query for %s -> %s in %s" % (start_dt, end_dt, box)) result_cursor = edb.get_timeseries_db().find({ "data.local_dt": { "$gte": start_dt, "$lte": end_dt } }).sort("metadata.write_ts") logging.debug("about to query result_cursor.count()") result_cursor_count = result_cursor.count() logging.debug("result cursor has %d entries" % result_cursor_count) if result_cursor_count == 0: return Timeline([], []) logging.debug("About to query for time data in result cursor") start_ts = ecwe.Entry(result_cursor[0]).metadata.write_ts end_ts = ecwe.Entry(result_cursor[result_cursor_count - 1]).metadata.write_ts logging.debug( "Converted datetime range %s -> %s to timestamp range %s -> %s" % (start_dt, end_dt, start_ts, end_ts)) places = esdp.get_aggregate_places(enua.UserCache.TimeQuery( "enter_ts", start_ts, end_ts), box=box) trips = esdt.get_aggregate_trips(enua.UserCache.TimeQuery( "start_ts", start_ts, end_ts), box=box) return Timeline(places, trips)
def savePredictionsStep(self): from emission.core.wrapper.user import User from emission.core.wrapper.client import Client uniqueModes = self.model.classes_ for i in range(self.predictedProb.shape[0]): currSectionEntry = self.toPredictSections[i] currSection = currSectionEntry.data currProb = self.convertPredictedProbToMap(uniqueModes, self.predictedProb[i]) # Special handling for the AIR mode # AIR is not a mode that is sensed from the phone, but it is inferred # through some heuristics in cleanAndResample instead of through the # decision tree. Ideally those heurstics should be replaced by the # inference through the decision tree, or through a separate heuristic # step. But we are out of time for a bigger refactor here. # so we say that if the sensed mode == AIR, we are going to use it # directly and ignore the inferred mode if currSection.sensed_mode == ecwma.MotionTypes.AIR_OR_HSR: currProb = {'AIR_OR_HSR': 1.0} # Insert the prediction mp = ecwm.Modeprediction() mp.trip_id = currSection.trip_id mp.section_id = currSectionEntry.get_id() mp.algorithm_id = ecwm.AlgorithmTypes.SEED_RANDOM_FOREST mp.predicted_mode_map = currProb mp.start_ts = currSection.start_ts mp.end_ts = currSection.end_ts self.ts.insert_data(self.user_id, "inference/prediction", mp) # Since there is currently only one prediction, create the inferred # section object right here is_dict = copy.copy(currSectionEntry) del is_dict["_id"] is_dict["metadata"]["key"] = "analysis/inferred_section" is_dict["data"]["sensed_mode"] = ecwm.PredictedModeTypes[ easf.select_inferred_mode([mp])].value is_dict["data"]["cleaned_section"] = currSectionEntry.get_id() ise = ecwe.Entry(is_dict) logging.debug("Updating sensed mode for section = %s to %s" % (currSectionEntry.get_id(), ise.data.sensed_mode)) self.ts.insert(ise) # Set last_section_done after saving because otherwise if there is an error # during inference, we will not save results and never re-run self.last_section_done = self.toPredictSections[-1]
def get_entries(key, user_id, time_query, geo_query=None, extra_query_list=None): if user_id is not None: ts = esta.TimeSeries.get_time_series(user_id) else: ts = esta.TimeSeries.get_aggregate_time_series() doc_cursor = ts.find_entries([key], time_query, geo_query, extra_query_list) # TODO: Fix "TripIterator" and return it instead of this list curr_entry_list = [ecwe.Entry(doc) for doc in doc_cursor] logging.debug("Returning entry with length %d result" % len(curr_entry_list)) return curr_entry_list
def testLocalMatchingQuery(self): """ Search for all entries that occur at minute = 8 from any hour """ start_local_dt = ecwl.LocalDate({'minute': 8}) end_local_dt = ecwl.LocalDate({'minute': 8}) final_query = {"user_id": self.testUUID} final_query.update(esdl.get_range_query("data.local_dt", start_local_dt, end_local_dt)) entries_docs = edb.get_timeseries_db().find(final_query).sort("metadata.write_ts") self.assertEquals(20, entries_docs.count()) entries = [ecwe.Entry(doc) for doc in entries_docs] logging.debug("entries bookends are %s and %s" % (entries[0], entries[-1])) first_entry = entries[0] self.assertEquals(first_entry.data.local_dt.hour, 9) last_entry = entries[19] self.assertEquals(last_entry.data.local_dt.hour, 17)
def get_last_entry(user_id, time_query, config_key): user_ts = esta.TimeSeries.get_time_series(user_id) # get the max write_ts for this stream, which corresponds to the last entry # We expect this to be small, unless users are continuously overriding values config_overrides = list(user_ts.find_entries([config_key], time_query)) logging.debug("Found %d user overrides for user %s" % (len(config_overrides), user_id)) if len(config_overrides) == 0: logging.warning("No user defined overrides for %s, early return" % user_id) return (None, None) else: # entries are sorted by the write_ts, we can take the last value coe = ecwe.Entry(config_overrides[-1]) logging.debug("last entry is %s" % coe) return (coe.data, coe.metadata.write_ts)
def get_first_place_entry(key, user_id): """ Similar to get_last_place_entry, only finding one with only an exit_ts and no enter_ts. """ ts = esta.TimeSeries.get_time_series(user_id) ret_place_doc = ts.analysis_timeseries_db.find_one({'user_id': user_id, 'metadata.key': key, 'data.enter_ts' : {'$exists': False}}) logging.debug("first place doc = %s" % ret_place_doc) if ret_place_doc is None: return None ret_place = ecwe.Entry(ret_place_doc) assert('enter_ts' not in ret_place.data) assert('enter_fmt_time' not in ret_place.data) assert('ending_trip' not in ret_place.data) return ret_place
def insert(self, entry): """ Inserts the specified entry and returns the object ID """ logging.debug("insert called") if type(entry) == dict: entry = ecwe.Entry(entry) if "user_id" not in entry or entry["user_id"] is None: entry["user_id"] = self.user_id if self.user_id is not None and entry["user_id"] != self.user_id: raise AttributeError("Saving entry %s for %s in timeseries for %s" % (entry, entry["user_id"], self.user_id)) else: logging.debug("entry was fine, no need to fix it") logging.debug("Inserting entry %s into timeseries" % entry) return self.get_timeseries_db(entry.metadata.key).insert(entry)