def export_timeline(user_id, start_day_str, end_day_str, file_name): logging.info("Extracting timeline for user %s day %s -> %s and saving to file %s" % (user_id, start_day_str, end_day_str, file_name)) # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date() start_day_ts = arrow.get(start_day_str).timestamp end_day_ts = arrow.get(end_day_str).timestamp logging.debug("start_day_ts = %s (%s), end_day_ts = %s (%s)" % (start_day_ts, arrow.get(start_day_ts), end_day_ts, arrow.get(end_day_ts))) ts = esta.TimeSeries.get_time_series(user_id) loc_time_query = estt.TimeQuery("data.ts", start_day_ts, end_day_ts) loc_entry_list = list(ts.find_entries(key_list=None, time_query=loc_time_query)) trip_time_query = estt.TimeQuery("data.start_ts", start_day_ts, end_day_ts) trip_entry_list = list(ts.find_entries(key_list=None, time_query=trip_time_query)) place_time_query = estt.TimeQuery("data.enter_ts", start_day_ts, end_day_ts) place_entry_list = list(ts.find_entries(key_list=None, time_query=place_time_query)) combined_list = loc_entry_list + trip_entry_list + place_entry_list logging.info("Found %d loc entries, %d trip-like entries, %d place-like entries = %d total entries" % (len(loc_entry_list), len(trip_entry_list), len(place_entry_list), len(combined_list))) validate_truncation(loc_entry_list, trip_entry_list, place_entry_list) unique_key_list = set(map(lambda e: e["metadata"]["key"], combined_list)) logging.info("timeline has unique keys = %s" % unique_key_list) if len(combined_list) == 0 or unique_key_list == set(['stats/pipeline_time']): logging.info("No entries found in range for user %s, skipping save" % user_id) else: combined_filename = "%s_%s.gz" % (file_name, user_id) json.dump(combined_list, gzip.open(combined_filename, "wb"), default=bju.default, allow_nan=False, indent=4)
def export_timeline(user_id, start_day_str, end_day_str, file_name): logging.info("Extracting timeline for user %s day %s -> %s and saving to file %s" % (user_id, start_day_str, end_day_str, file_name)) # day_dt = pydt.datetime.strptime(day_str, "%Y-%m-%d").date() start_day_ts = arrow.get(start_day_str).timestamp end_day_ts = arrow.get(end_day_str).timestamp logging.debug("start_day_ts = %s (%s), end_day_ts = %s (%s)" % (start_day_ts, arrow.get(start_day_ts), end_day_ts, arrow.get(end_day_ts))) ts = esta.TimeSeries.get_time_series(user_id) loc_time_query = estt.TimeQuery("data.ts", start_day_ts, end_day_ts) loc_entry_list = list(estcs.find_entries(user_id, key_list=None, time_query=loc_time_query)) ma_time_query = estt.TimeQuery("metadata.write_ts", start_day_ts, end_day_ts) ma_entry_list = list(estcs.find_entries(user_id, key_list=["background/motion_activity"], time_query=ma_time_query)) trip_time_query = estt.TimeQuery("data.start_ts", start_day_ts, end_day_ts) trip_entry_list = list(ts.find_entries(key_list=None, time_query=trip_time_query)) place_time_query = estt.TimeQuery("data.enter_ts", start_day_ts, end_day_ts) place_entry_list = list(ts.find_entries(key_list=None, time_query=place_time_query)) # Handle the case of the first place, which has no enter_ts and won't be # matched by the default query first_place_extra_query = {'$and': [{'data.enter_ts': {'$exists': False}}, {'data.exit_ts': {'$exists': True}}]} first_place_entry_list = list(ts.find_entries(key_list=None, time_query=None, extra_query_list=[first_place_extra_query])) logging.info("First place entry list = %s" % first_place_entry_list) combined_list = loc_entry_list + ma_entry_list + trip_entry_list + place_entry_list + first_place_entry_list logging.info("Found %d loc entries, %d motion entries, %d trip-like entries, %d place-like entries = %d total entries" % (len(loc_entry_list), len(ma_entry_list), len(trip_entry_list), len(place_entry_list), len(combined_list))) validate_truncation(loc_entry_list, trip_entry_list, place_entry_list) unique_key_list = set([e["metadata"]["key"] for e in combined_list]) logging.info("timeline has unique keys = %s" % unique_key_list) if len(combined_list) == 0 or unique_key_list == set(['stats/pipeline_time']): logging.info("No entries found in range for user %s, skipping save" % user_id) else: # Also dump the pipeline state, since that's where we have analysis results upto # This allows us to copy data to a different *live system*, not just # duplicate for analysis combined_filename = "%s_%s.gz" % (file_name, user_id) with gzip.open(combined_filename, "wt") as gcfd: json.dump(combined_list, gcfd, default=bju.default, allow_nan=False, indent=4) import emission.core.get_database as edb pipeline_state_list = list(edb.get_pipeline_state_db().find({"user_id": user_id})) logging.info("Found %d pipeline states %s" % (len(pipeline_state_list), list([ps["pipeline_stage"] for ps in pipeline_state_list]))) pipeline_filename = "%s_pipelinestate_%s.gz" % (file_name, user_id) with gzip.open(pipeline_filename, "wt") as gpfd: json.dump(pipeline_state_list, gpfd, default=bju.default, allow_nan=False, indent=4)
def get_maps_for_range_old(user_id, start_ts, end_ts): # First, get the timeline for that range. ts = esta.TimeSeries.get_time_series(user_id) trip_list = esdt.get_trips(user_id, estt.TimeQuery("data.start_ts", start_ts, end_ts)) # TODO: Should the timeline support random access as well? # If it did, we wouldn't need this additional map # I think that it would be good to support a doubly linked list, i.e. prev and next in addition # to the iteration interface place_list = esdp.get_places(user_id, estt.TimeQuery("data.exit_ts", start_ts, end_ts)) place_list = place_list + (esdp.get_places(user_id, estt.TimeQuery("data.enter_ts", start_ts, end_ts))) place_map = dict([(p.get_id(), p) for p in place_list]) map_list = [] flipped_midpoint = lambda p1_p22: [old_div((p1_p22[0].coordinates[1] + p1_p22[1].coordinates[1]),2), old_div((p1_p22[0].coordinates[0] + p1_p22[1].coordinates[0]),2)] for i, trip in enumerate(trip_list): logging.debug("-" * 20 + trip.start_fmt_time + "=>" + trip.end_fmt_time + "(" + str(trip.end_ts - trip.start_ts) + ")") if (len(esdt.get_raw_sections_for_trip(user_id, trip.get_id())) == 0 and len(esdt.get_raw_stops_for_trip(user_id, trip.get_id())) == 0): logging.debug("Skipping trip because it has no stops and no sections") continue start_point = gj.GeoJSON.to_instance(trip.start_loc) end_point = gj.GeoJSON.to_instance(trip.end_loc) curr_map = folium.Map(flipped_midpoint((start_point, end_point))) map_list.append(curr_map) logging.debug("About to display places %s and %s" % (trip.start_place, trip.end_place)) update_place(curr_map, trip.start_place, place_map, marker_color='green') update_place(curr_map, trip.end_place, place_map, marker_color='red') # TODO: Should get_timeline_for_trip work on a trip_id or on a trip object # it seems stupid to convert trip object -> id -> trip object curr_trip_timeline = esdt.get_raw_timeline_for_trip(user_id, trip.get_id()) for i, trip_element in enumerate(curr_trip_timeline): # logging.debug("Examining element %s of type %s" % (trip_element, type(trip_element))) if type(trip_element) == ecws.Stop: time_query = esds.get_time_query_for_stop(trip_element.get_id()) logging.debug("time_query for stop %s = %s" % (trip_element, time_query)) stop_points_df = ts.get_data_df("background/filtered_location", time_query) # logging.debug("stop_points_df.head() = %s" % stop_points_df.head()) if len(stop_points_df) > 0: update_line(curr_map, stop_points_df, line_color = sel_color_list[-1], popup="%s -> %s" % (trip_element.enter_fmt_time, trip_element.exit_fmt_time)) else: assert(type(trip_element) == ecwsc.Section) time_query = esdsc.get_time_query_for_section(trip_element.get_id()) logging.debug("time_query for section %s = %s" % (trip_element, "[%s,%s,%s]" % (time_query.timeType, time_query.startTs, time_query.endTs))) section_points_df = ts.get_data_df("background/filtered_location", time_query) logging.debug("section_points_df.tail() = %s" % section_points_df.tail()) if len(section_points_df) > 0: update_line(curr_map, section_points_df, line_color = sel_color_list[trip_element.sensed_mode.value], popup="%s (%s -> %s)" % (trip_element.sensed_mode, trip_element.start_fmt_time, trip_element.end_fmt_time)) else: logging.warning("found no points for section %s" % trip_element) return map_list
def get_timeline(user_id, place_key, trip_key, untracked_key, start_ts, end_ts, geojson=None, extra_query_list=None): logging.info("About to query for timestamps %s -> %s" % (start_ts, end_ts)) """ Return a timeline of the trips and places from this start timestamp to this end timestamp. Note that each place and each trip has *two* associated timestamps, so we need to define which trips need to be returned. Right now, we define this as all places that are entered and all trips that are started within the specified time frame. Note that this means that, by definition, this may not include the starting and ending places for all trips, which is something that we need for our visualization. But we don't want the timeline to be visualization specific. Let's compromise by adding method to fill in start and end places which we will call if the timeline is used for visualization and not if not. This also means that we can use the id map to avoid duplicates in case the place does exist. :param user_id: the user whose timeline we are considering :param start_ts: the starting timestamp. we will include all places and trips that start after this. :param end_ts: the ending timestamp. we will include all places and trips that end after this. :return: a timeline object """ (place_gq, trip_gq) = get_place_trip_geoquery(geojson) places_entries = esda.get_entries(place_key, user_id=user_id, time_query=estt.TimeQuery( "data.enter_ts", start_ts, end_ts), geo_query=place_gq, extra_query_list=extra_query_list) trips_entries = esda.get_entries(trip_key, user_id=user_id, untracked_key=untracked_key, time_query=estt.TimeQuery( "data.start_ts", start_ts, end_ts), geo_query=trip_gq, extra_query_list=extra_query_list) for place in places_entries: logging.debug("Considering place %s: %s -> %s " % (place.get_id(), place.data.enter_fmt_time, place.data.exit_fmt_time)) for trip in trips_entries: logging.debug( "Considering trip %s: %s -> %s " % (trip.get_id(), trip.data.start_fmt_time, trip.data.end_fmt_time)) return Timeline(place_key, trip_key, places_entries, trips_entries)
def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn): """ Get grouped dataframes for the specific time range and at the specified frequency :param user_id: The user for whom we are computing this information. None for all users. :param from_ld: The start timestamp :param to_ld: The end timestamp :param freq: The frequency as specified in a pandas date_range frequency string. We only support frequencies of a day or longer in order to return the data in a format that makes sense http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases The canonical list can be found at: > pandas.tseries.offsets.prefix_mapping :return: a list of ModeStatTimeSummary objects """ time_query = estt.TimeQuery("data.start_ts", start_ts, end_ts) section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY, user_id=user_id, time_query=time_query, geo_query=None) if len(section_df) == 0: logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query)) return [] logging.debug("first row is %s" % section_df.iloc[0]) secs_to_nanos = lambda x: x * 10 ** 9 section_df['start_dt'] = pd.to_datetime(secs_to_nanos(section_df.start_ts)) time_grouped_df = section_df.groupby(pd.Grouper(freq=freq, key='start_dt')) return grouped_to_summary(time_grouped_df, timestamp_fill_times, summary_fn)
def testGetDataDf(self): ts = esta.TimeSeries.get_time_series(self.testUUID) tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) df = ts.get_data_df("background/filtered_location", tq) self.assertEqual(len(df), 327) logging.debug("df.columns = %s" % df.columns) self.assertEqual(len(df.columns), 13)
def get_user_input_from_cache_series(user_id, trip_obj, user_input_key): tq = estt.TimeQuery("data.start_ts", trip_obj.data.start_ts, trip_obj.data.end_ts) ts = esta.TimeSeries.get_time_series(user_id) potential_candidates = estsc.find_entries(user_id, [user_input_key], tq) return final_candidate(valid_user_input(ts, trip_obj), potential_candidates)
def get_time_query_for_trip_like(key, trip_like_id): """ Returns the query that returns all the points associated with this trip-like (raw trip, cleaned trip, raw section) """ trip = get_object(key, trip_like_id) return estt.TimeQuery("data.ts", trip.start_ts, trip.end_ts)
def delete_data(all_users): new_data = False for user in all_users: ts = esta.TimeSeries.get_time_series(user["uuid"]) start = arrow.get( "2019-01-01").timestamp # arrow.utcnow().float_timestamp-(3600*6) end = arrow.utcnow().float_timestamp tq = estt.TimeQuery("data.start_ts", start, end) is_df = ts.get_data_df("analysis/inferred_section", time_query=tq) if is_df.empty: continue new_data = True for index, row in is_df.iterrows(): entity_id = "urn:" + str(row["cleaned_section"]) r = requests.delete( "http://cema.nlehd.de:2042/ngsi-ld/v1/temporal/entities/" + entity_id, headers={"Content-Type": "application/ld+json"}, ) print(r) r = requests.delete( "http://cema.nlehd.de:2042/ngsi-ld/v1/entities/" + entity_id, headers={"Content-Type": "application/ld+json"}, ) print(r) if not new_data: print("Did not find any new data")
def count_query(): edb.pm_address = request.json['pm_address'] # Dummy id used as a placeholder. It must be consistent for each user but # otherwise doesn't matter. An optimization would remove all instance of user_uuid. user_uuid = request.json['uuid'] query = request.json['query'] query_obj = saq.AE(1) cost = query_obj.generate_diff_priv_cost(query['alpha'], query['offset']) # Try and deduce from the privacy budget available_budget = safmt.deduct_budget(edb.pm_address, cost) if not available_budget: # Query could not complete, no budget remaining return {"success": False} start_time = query['start_ts'] end_time = query['end_ts'] time_query = estt.TimeQuery("data.ts", start_time, end_time) region = query['sel_region'] if region is None: geo_query = None else: geo_query = estg.GeoQuery(["data.loc"], region) loc_entry_list = esda.get_entries(esda.CLEANED_LOCATION_KEY, user_uuid, time_query=time_query, geo_query=geo_query) convert_objectid_to_string(loc_entry_list) if len(loc_entry_list) > 0: ret_val = 1 else: ret_val = 0 return {"success" : True, "results": ret_val}
def getTimeseriesEntries(time_type): if 'user' not in request.json: abort(401, "only a user can read his/her data") user_uuid = getUUID(request) key_list = request.json['key_list'] if 'from_local_date' in request.json and 'to_local_date' in request.json: start_time = request.json['from_local_date'] end_time = request.json['to_local_date'] time_query = esttc.TimeComponentQuery("metadata.write_ts", start_time, end_time) else: start_time = request.json['start_time'] end_time = request.json['end_time'] time_query = estt.TimeQuery("metadata.write_ts", start_time, end_time) # Note that queries from usercache are limited to 100,000 entries # and entries from timeseries are limited to 250,000, so we will # return at most 350,000 entries. So this means that we don't need # additional filtering, but this should be documented in # the API data_list = esdc.find_entries(user_uuid, key_list, time_query) return {'phone_data': data_list}
def getPublicData(): ids = request.json['phone_ids'] all_uuids = map(lambda id: UUID(id), ids) uuids = [uuid for uuid in all_uuids if uuid in estag.TEST_PHONE_IDS] from_ts = request.query.from_ts to_ts = request.query.to_ts time_range = estt.TimeQuery("metadata.write_ts", float(from_ts), float(to_ts)) time_query = time_range.get_query() user_queries = map(lambda id: {'user_id': id}, uuids) for q in user_queries: q.update(time_query) num_entries_ts = map(lambda q: edb.get_timeseries_db().find(q).count(), user_queries) num_entries_uc = map(lambda q: edb.get_usercache_db().find(q).count(), user_queries) total_entries = sum(num_entries_ts + num_entries_uc) logging.debug("Total entries requested: %d" % total_entries) threshold = 200000 if total_entries > threshold: data_list = None else: data_list = map(lambda u: esdc.find_entries(u, None, time_range), all_uuids) return {'phone_data': data_list}
def get_ongoing_motion_in_range(start_ts, end_ts, timeseries): tq = estt.TimeQuery(timeType="data.ts", startTs=start_ts, endTs=end_ts) motion_list = list( timeseries.find_entries(["background/motion_activity"], tq)) logging.debug("Found %s motion_activity entries in range %s -> %s" % (len(motion_list), tq.startTs, tq.endTs)) logging.debug("sample activities are %s" % motion_list[0:5]) return motion_list
def testExtraQueries(self): ts = esta.TimeSeries.get_time_series(self.testUUID) # Query for all of Aug tq = estt.TimeQuery("metadata.write_ts", 1438387200, 1441065600) ignored_phones = {"user_id": {"$nin": [self.testUUID]}} # user_id is in both the extra query and the base query with self.assertRaises(AttributeError): list(ts.find_entries(time_query=tq, extra_query_list=[ignored_phones]))
def testSegmentationWrapperIOS(self): eaist.segment_current_trips(self.iosUUID) # The previous line should have created places and trips and stored # them into the database. Now, we want to query to ensure that they # were created correctly. tq_place = estt.TimeQuery("data.enter_ts", 1446796800, 1446847600) created_places_entries = esda.get_entries(esda.RAW_PLACE_KEY, self.iosUUID, tq_place) tq_trip = estt.TimeQuery("data.start_ts", 1446796800, 1446847600) created_trips_entries = esda.get_entries(esda.RAW_TRIP_KEY, self.iosUUID, tq_trip) for i, place in enumerate(created_places_entries): logging.debug( "Retrieved places %s: %s -> %s" % (i, place.data.enter_fmt_time, place.data.exit_fmt_time)) for i, trip in enumerate(created_trips_entries): logging.debug( "Retrieved trips %s: %s -> %s" % (i, trip.data.start_fmt_time, trip.data.end_fmt_time)) # We expect there to be 4 places, but the first one is that start of # the chain, so it has a start_time of None and it won't be retrieved # by the query on the start_time that we show here. self.assertEqual(len(created_places_entries), 2) self.assertEqual(len(created_trips_entries), 2) # Pick the first two trips and the first place and ensure that they are all linked correctly # Note that this is the first place, not the second place because the true first place will not # be retrieved by the query, as shown above # The first trip here is a dummy trip, so let's check the second and third trip instead trip0 = created_trips_entries[0] trip1 = created_trips_entries[1] place0 = created_places_entries[0] self.assertEqual(trip0.data.end_place, place0.get_id()) self.assertEqual(trip1.data.start_place, place0.get_id()) self.assertEqual(place0.data.ending_trip, trip0.get_id()) self.assertEqual(place0.data.starting_trip, trip1.get_id()) self.assertEqual(round(trip0.data.duration), 14 * 60 + 41) self.assertEqual(round(trip1.data.duration), 1 * 60 * 60 + 50 * 60 + 56) self.assertIsNotNone(place0.data.location)
def geojson_incidents_in_range(user_id, start_ts, end_ts): MANUAL_INCIDENT_KEY = "manual/incident" ts = esta.TimeSeries.get_time_series(user_id) uc = enua.UserCache.getUserCache(user_id) tq = estt.TimeQuery("data.ts", start_ts, end_ts) incident_entry_docs = list(ts.find_entries([MANUAL_INCIDENT_KEY], time_query=tq)) \ + list(uc.getMessage([MANUAL_INCIDENT_KEY], tq)) incidents = [ecwe.Entry(doc) for doc in incident_entry_docs] return list(map(incident_to_geojson, incidents))
def get_user_input_from_cache_series(user_id, trip_obj, user_input_key): tq = estt.TimeQuery("data.start_ts", trip_obj.data.start_ts, trip_obj.data.end_ts) potential_candidates = estsc.find_entries(user_id, [user_input_key], tq) if len(potential_candidates) == 0: return None sorted_pc = sorted(potential_candidates, key=lambda c:c["metadata"]["write_ts"]) most_recent_entry = potential_candidates[-1] logging.debug("most recent entry has id %s" % most_recent_entry["_id"]) logging.debug("and is mapped to entry %s" % most_recent_entry) return ecwe.Entry(most_recent_entry)
def get_all_points_for_range(user_id, key, start_ts, end_ts): import emission.storage.timeseries.timequery as estt # import emission.core.wrapper.location as ecwl tq = estt.TimeQuery("metadata.write_ts", start_ts, end_ts) ts = esta.TimeSeries.get_time_series(user_id) entry_it = ts.find_entries([key], tq) points_array = [ecwe.Entry(entry) for entry in entry_it] return get_feature_list_for_point_array(points_array)
def testNoOverrides(self): tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({ 'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config' })) self.assertEqual(len(saved_entries), 0)
def testOneOverride(self): cfg_1 = copy.copy(self.dummy_config) cfg_1['metadata']['write_ts'] = 1440700000 edb.get_timeseries_db().insert(cfg_1) tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'})) self.assertEqual(len(saved_entries), 1) logging.debug(saved_entries[0]) self.assertEqual(saved_entries[0]['data']['is_duty_cycling'], cfg_1['data']['is_duty_cycling'])
def testQueryStops(self): new_stop = etsa.savePlaceLike(self, esda.RAW_STOP_KEY, ecws.Stop) new_stop["data"]["trip_id"] = self.test_trip_id estb.BuiltinTimeSeries.update(new_stop) ret_arr_one = esds.get_stops_for_trip(self.testUserId, self.test_trip_id) self.assertEqual(len(ret_arr_one), 1) self.assertEqual(ret_arr_one, [new_stop]) ret_arr_list = esds.get_stops_for_trip_list(self.testUserId, [self.test_trip_id]) self.assertEqual(ret_arr_one, ret_arr_list) ret_arr_time = esda.get_objects(esda.RAW_STOP_KEY, self.testUserId, estt.TimeQuery("data.enter_ts", 4, 6)) self.assertEqual([entry.data for entry in ret_arr_list], ret_arr_time)
def testOldOverride(self): cfg_1 = copy.copy(self.dummy_config) cfg_1['metadata']['write_ts'] = 1440500000 edb.get_timeseries_db().insert(cfg_1) cfg_2 = copy.copy(self.dummy_config) cfg_2['metadata']['write_ts'] = 1440610000 edb.get_timeseries_db().insert(cfg_2) tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) eacc.save_all_configs(self.androidUUID, tq) saved_entries = list(edb.get_usercache_db().find({'user_id': self.androidUUID, 'metadata.key': 'config/sensor_config'})) self.assertEqual(len(saved_entries), 0)
def getTimeseriesEntries(time_type): if 'user' not in request.json: abort(401, "only a user can read his/her data") user_uuid = getUUID(request) key_list = request.json['key_list'] if 'from_local_date' in request.json and 'to_local_date' in request.json: start_time = request.json['from_local_date'] end_time = request.json['to_local_date'] time_key = request.json.get('key_local_date', 'metadata.write_ts') time_query = esttc.TimeComponentQuery(time_key, start_time, end_time) else: start_time = request.json['start_time'] end_time = request.json['end_time'] time_key = request.json.get('key_time', 'metadata.write_ts') time_query = estt.TimeQuery(time_key, start_time, end_time) # Note that queries from usercache are limited to 100,000 entries # and entries from timeseries are limited to 250,000, so we will # return at most 350,000 entries. So this means that we don't need # additional filtering, but this should be documented in # the API data_list = esdc.find_entries(user_uuid, key_list, time_query) if 'max_entries' in request.json: me = request.json['max_entries'] if (type(me) != int): logging.error("aborting: max entry count is %s, type %s, expected int" % (me, type(me))) abort(500, "Invalid max_entries %s" % me) if len(data_list) > me: if request.json['trunc_method'] == 'first': logging.debug("first n entries is %s" % me) data_list = data_list[:me] if request.json['trunc_method'] == 'last': logging.debug("first n entries is %s" % me) data_list = data_list[-me:] elif request.json["trunc_method"] == "sample": sample_rate = len(data_list)//me + 1 logging.debug("sampling rate is %s" % sample_rate) data_list = data_list[::sample_rate] else: logging.error("aborting: unexpected sampling method %s" % request.json["trunc_method"]) abort(500, "sampling method not specified while retriving limited data") else: logging.debug("Found %d entries < %s, no truncation" % (len(data_list), me)) logging.debug("successfully returning list of size %s" % len(data_list)) return {'phone_data': data_list}
def testSegmentationPointsDwellSegmentationDistFilter(self): ts = esta.TimeSeries.get_time_series(self.iosUUID) tq = estt.TimeQuery("metadata.write_ts", 1446796800, 1446847600) dstdsm = dsdf.DwellSegmentationDistFilter(time_threshold = 10 * 60, # 5 mins point_threshold = 10, distance_threshold = 100) # 100 m segmentation_points = dstdsm.segment_into_trips(ts, tq) for (start, end) in segmentation_points: logging.debug("trip is from %s (%f) -> %s (%f)" % (start.fmt_time, start.ts, end.fmt_time, end.ts)) self.assertIsNotNone(segmentation_points) self.assertEqual(len(segmentation_points), 3) self.assertEqual([start.ts for (start, end) in segmentation_points], [1446797042.282652, 1446821561.559255, 1446825828.465837]) self.assertEqual([end.ts for (start, end) in segmentation_points], [1446797923.682973, 1446825092.302420, 1446828217.125328])
def testQuerySections(self): new_section = ecws.Section() new_section.start_ts = 5 new_section.end_ts = 6 new_section.trip_id = self.test_trip_id esta.TimeSeries.get_time_series(self.testUserId).insert_data(self.testUserId, esda.RAW_SECTION_KEY, new_section) ret_arr_one = esds.get_sections_for_trip(self.testUserId, self.test_trip_id) self.assertEqual(len(ret_arr_one), 1) self.assertEqual([entry.data for entry in ret_arr_one], [new_section]) ret_arr_list = esds.get_sections_for_trip_list(self.testUserId, [self.test_trip_id]) self.assertEqual(ret_arr_one, ret_arr_list) ret_arr_time = esda.get_objects(esda.RAW_SECTION_KEY, self.testUserId, estt.TimeQuery("data.start_ts", 4, 6)) self.assertEqual([entry.data for entry in ret_arr_list], ret_arr_time)
def get_user_input_for_trip_object(ts, trip_obj, user_input_key): tq = estt.TimeQuery("data.start_ts", trip_obj.data.start_ts, trip_obj.data.end_ts) # In general, all candiates will have the same start_ts, so no point in # sorting by it. Only exception to general rule is when user first provides # input before the pipeline is run, and then overwrites after pipeline is # run potential_candidates = ts.get_data_df(user_input_key, tq) if len(potential_candidates) == 0: return None sorted_pc = potential_candidates.sort_values(by="metadata_write_ts") most_recent_entry_id = potential_candidates._id.iloc[-1] logging.debug("most recent entry has id %s" % most_recent_entry_id) ret_val = ts.get_entry_from_id(user_input_key, most_recent_entry_id) logging.debug("and is mapped to entry %s" % ret_val) return ret_val
def testSegmentationPointsSmoothedHighConfidenceMotion(self): ts = esta.TimeSeries.get_time_series(self.androidUUID) tq = estt.TimeQuery("metadata.write_ts", 1440695152.989, 1440699266.669) shcmsm = shcm.SmoothedHighConfidenceMotion(60, 100, [ecwm.MotionTypes.TILTING, ecwm.MotionTypes.UNKNOWN, ecwm.MotionTypes.STILL]) segmentation_points = shcmsm.segment_into_sections(ts, 0, tq) for (start, end, motion) in segmentation_points: logging.info("section is from %s (%f) -> %s (%f) using mode %s" % (start.fmt_time, start.ts, end.fmt_time, end.ts, motion)) self.assertIsNotNone(segmentation_points) self.assertEqual(len(segmentation_points), 2) self.assertEqual([start.ts for (start, end, motion) in segmentation_points], [1440695873.453, 1440698306.892]) self.assertEqual([end.ts for (start, end, motion) in segmentation_points], [1440698066.704, 1440699234.834])
def testSegmentationPointsDwellSegmentationTimeFilter(self): ts = esta.TimeSeries.get_time_series(self.androidUUID) tq = estt.TimeQuery("metadata.write_ts", 1440658800, 1440745200) dstfsm = dstf.DwellSegmentationTimeFilter(time_threshold = 5 * 60, # 5 mins point_threshold = 10, distance_threshold = 100) # 100 m segmentation_points = dstfsm.segment_into_trips(ts, tq) for (start, end) in segmentation_points: logging.debug("trip is from %s (%f) -> %s (%f)" % (start.fmt_time, start.ts, end.fmt_time, end.ts)) self.assertIsNotNone(segmentation_points) self.assertEqual(len(segmentation_points), 8) self.assertEqual([start.ts for (start, end) in segmentation_points], [1440688739.672, 1440689662.943, 1440690718.768, 1440695152.989, 1440699933.687, 1440716367.376, 1440720239.012, 1440728519.971]) self.assertEqual([end.ts for (start, end) in segmentation_points], [1440689408.302, 1440690108.678, 1440694424.894, 1440699298.535, 1440700070.129, 1440719699.470, 1440723334.898, 1440729184.411])
def testRemoveOutliers(self): TS_START = 12345 for i in range(0,10): dummy_loc = ecwl.Location({ "ts": TS_START + i, "lat": 50 + i, "lng": 180 + i }) self.ts.insert(ecwe.Entry.create_entry(self.testUUID, "background/filtered_location", dummy_loc)) tq = estt.TimeQuery("data.ts", TS_START - 10, TS_START + 10 + 10) loc_entries = list(self.ts.find_entries(["background/filtered_location"], tq)) loc_df = self.ts.get_data_df("background/filtered_location", tq) filtered_loc_df = eaicc.remove_outliers(loc_entries, loc_df["_id"]) self.assertEqual(len(loc_entries), len(loc_df)) self.assertEqual(len(filtered_loc_df), 0)
def get_trip_for_user_input_obj(ts, ui_obj): # the match check that we have is: # user input can start after trip start # user input can end before trip end OR user input is within 5 mins of trip end # Given those considerations, there is no principled query for trip data # that fits into our query model # the trip start is before the user input start, but that can go until eternity # and the trip end can be either before or after the user input end # we know that the trip end is after the user input start, but again, that # can go on until now. # As a workaround, let us assume that the trip start is no more than a day # before the start of the ui object, which seems like a fairly conservative # assumption ONE_DAY = 24 * 60 * 60 tq = estt.TimeQuery("data.start_ts", ui_obj.data.start_ts - ONE_DAY, ui_obj.data.start_ts + ONE_DAY) potential_candidates = ts.find_entries(["analysis/confirmed_trip"], tq) return final_candidate(valid_trip(ts, ui_obj), potential_candidates)