Beispiel #1
0
    def testLocalRangeRolloverQuery(self):
        """
        Search for all entries between 8:18 and 8:20 local time, both inclusive
        """
        start_local_dt = ecwl.LocalDate({
            'year': 2015,
            'month': 8,
            'hour': 8,
            'minute': 18
        })
        end_local_dt = ecwl.LocalDate({
            'year': 2015,
            'month': 8,
            'hour': 9,
            'minute': 8
        })
        final_query = {"user_id": self.testUUID}
        final_query.update(
            esdl.get_range_query("data.local_dt", start_local_dt,
                                 end_local_dt))
        entries = edb.get_timeseries_db().find(final_query).sort(
            'data.ts', pymongo.ASCENDING)
        self.assertEqual(448, entries.count())

        entries_list = list(entries)

        # Note that since this is a set of filters, as opposed to a range, this
        # returns all entries between 18 and 8 in both hours.
        # so 8:18 is valid, but so is 9:57
        self.assertEqual(ecwe.Entry(entries_list[0]).data.local_dt.hour, 8)
        self.assertEqual(ecwe.Entry(entries_list[0]).data.local_dt.minute, 18)
        self.assertEqual(ecwe.Entry(entries_list[-1]).data.local_dt.hour, 9)
        self.assertEqual(ecwe.Entry(entries_list[-1]).data.local_dt.minute, 57)
    def setUp(self):
        # We need to access the database directly sometimes in order to
        # forcibly insert entries for the tests to pass. But we put the import
        # in here to reduce the temptation to use the database directly elsewhere.
        import emission.core.get_database as edb
        import uuid

        self.testUUID = uuid.uuid4()
        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
        with open("emission/tests/data/smoothing_data/trip_list.txt") as tfp:
            self.trip_entries = json.load(tfp, object_hook=bju.object_hook)
        for trip_entry in self.trip_entries:
            trip_entry["user_id"] = self.testUUID
            self.ts.insert(trip_entry)

        self.trip_entries = [ecwe.Entry(t) for t in self.trip_entries]

        with open(
                "emission/tests/data/smoothing_data/section_list.txt") as sfp:
            self.section_entries = json.load(sfp, object_hook=bju.object_hook)
        for section_entry in self.section_entries:
            section_entry["user_id"] = self.testUUID
            self.ts.insert(section_entry)

        self.section_entries = [ecwe.Entry(s) for s in self.section_entries]
Beispiel #3
0
def section_to_geojson(section, tl):
    """
    This is the trickiest part of the visualization.
    The section is basically a collection of points with a line through them.
    So the representation is a feature in which one feature which is the line, and one feature collection which is the set of point features.
    :param section: the section to be converted
    :return: a feature collection which is the geojson version of the section
    """

    ts = esta.TimeSeries.get_time_series(section.user_id)
    entry_it = ts.find_entries(["analysis/recreated_location"],
                               esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section",
                                   section.get_id()))

    # TODO: Decide whether we want to use Rewrite to use dataframes throughout instead of python arrays.
    # dataframes insert nans. We could use fillna to fill with default values, but if we are not actually
    # using dataframe features here, it is unclear how much that would help.
    feature_array = []
    section_location_entries = [ecwe.Entry(entry) for entry in entry_it]
    if len(section_location_entries) != 0:
        logging.debug("first element in section_location_array = %s" % section_location_entries[0])

        if not ecc.compare_rounded_arrays(section.data.end_loc.coordinates,
                                      section_location_entries[-1].data.loc.coordinates,
                                      digits=4):
            logging.info("section_location_array[-1].data.loc %s != section.data.end_loc %s even after df.ts fix, filling gap" % \
                    (section_location_entries[-1].data.loc, section.data.end_loc))
            assert(False)
            last_loc_doc = ts.get_entry_at_ts("background/filtered_location", "data.ts", section.data.end_ts)
            if last_loc_doc is None:
                logging.warning("can't find entry to patch gap, leaving gap")
            else:
                last_loc_entry = ecwe.Entry(last_loc_doc)
                logging.debug("Adding new entry %s to fill the end point gap between %s and %s"
                   % (last_loc_entry.data.loc, section_location_entries[-1].data.loc,
                        section.data.end_loc))
                section_location_entries.append(last_loc_entry)

    points_line_feature = point_array_to_line(section_location_entries)
    points_line_feature.id = str(section.get_id())
    points_line_feature.properties.update(copy.copy(section.data))
    # Update works on dicts, convert back to a section object to make the modes
    # work properly
    points_line_feature.properties = ecwcs.Cleanedsection(points_line_feature.properties)
    points_line_feature.properties["feature_type"] = "section"
    points_line_feature.properties["sensed_mode"] = str(points_line_feature.properties.sensed_mode)

    _del_non_derializable(points_line_feature.properties, ["start_loc", "end_loc"])

    # feature_array.append(gj.FeatureCollection(points_feature_array))
    feature_array.append(points_line_feature)

    return gj.FeatureCollection(feature_array)
Beispiel #4
0
    def testRemoveAllOutliers(self):
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2016-06-20")
        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        eaicl.filter_current_sections(self.testUUID)
        # get all sections
        sections = [
            ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY],
                                                        time_query=None)
        ]
        for section in sections:
            filtered_points_entry_doc = self.ts.get_entry_at_ts(
                "analysis/smoothing", "data.section", section.get_id())
            if filtered_points_entry_doc is not None:
                logging.debug("Found smoothing result for section %s" %
                              section.get_id())
                # Setting the set of deleted points to everything
                loc_tq = esda.get_time_query_for_trip_like(
                    esda.RAW_SECTION_KEY, section.get_id())
                loc_df = self.ts.get_data_df("background/filtered_location",
                                             loc_tq)
                filtered_points_entry_doc["data"]["deleted_points"] = loc_df[
                    "_id"].tolist()
                self.ts.update(ecwe.Entry(filtered_points_entry_doc))

        # All we care is that this should not crash.
        eaicr.clean_and_resample(self.testUUID)

        # Most of the trips have zero length, but apparently one has non-zero length
        # because the stop length is non zero!!
        # So there is only one cleaned trip left
        cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY,
                                               time_query=None)
        self.assertEqual(len(cleaned_trips_df), 1)

        # We don't support squishing sections, but we only store stops and sections
        # for non-squished trips. And this non-squished trip happens to have
        # two sections and one stop
        cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY,
                                                  time_query=None)
        self.assertEqual(len(cleaned_sections_df), 2)
        self.assertEqual(cleaned_sections_df.distance.tolist(), [0, 0])

        cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY,
                                               time_query=None)
        self.assertEqual(len(cleaned_stops_df), 1)
        self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
Beispiel #5
0
 def testWrapActivity(self):
     testEntryJSON = {
         '_id': '55a4418c7d65cb39ee9737d2',
         'data': {
             'type': 5,
             'confidence': 100,
             'ts': 1436826360.493
         },
         'metadata': {
             'key': 'background/motion_activity',
             'platform': 'android',
             'read_ts': 0,
             'type': 'message',
             'write_ts': 1436826360.493,
             'write_fmt_time': '2015-07-13 15:26:00.493000-07:00'
         },
         'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954')
     }
     entry = ecwe.Entry(testEntryJSON)
     self.assertEquals(entry.metadata.key, 'background/motion_activity')
     self.assertEquals(entry.metadata.type, 'message')
     self.assertEquals(entry.data.type, ecwm.MotionTypes.TILTING)
     self.assertEquals(entry.data.confidence, 100)
     logging.debug("activity time = %s, written at %s (%s)" %
                   (entry.data.ts, entry.metadata.write_ts,
                    entry.metadata.write_fmt_time))
Beispiel #6
0
def get_last_place_entry(key, user_id):
    """
    There are many ways to find the last place.  One would be to find the one
    with the max enter_ts.  But that is not performant because we would need to
    retrieve all the enter_ts and find their max, which is expensive. Instead, we
    use the property that we process data in chunks of trips, so the last place
    would have been created and entered but not exited.
    :param key:
    """
    ts = esta.TimeSeries.get_time_series(user_id)
    ret_place_doc = ts.analysis_timeseries_db.find_one({
        'user_id': user_id,
        'metadata.key': key,
        'data.exit_ts': {
            '$exists': False
        }
    })
    logging.debug("last place doc = %s" % ret_place_doc)
    if ret_place_doc is None:
        return None
    ret_place = ecwe.Entry(ret_place_doc)
    assert ('exit_ts' not in ret_place.data)
    assert ('exit_fmt_time' not in ret_place.data)
    assert ('starting_trip' not in ret_place.data)
    return ret_place
Beispiel #7
0
    def testLocalDateReadWrite(self):
        ts = esta.TimeSeries.get_time_series(self.testUUID)
        start_ts = arrow.now().timestamp
        ma_ts = 1460586729
        local_dt = ecwl.LocalDate.get_local_date(ma_ts, "America/Los_Angeles")
        fmt_time = arrow.get(ma_ts).to("America/Los_Angeles").isoformat()
        ma = ecwm.Motionactivity({
            "ts": 1460586729,
            "local_dt": local_dt,
            "fmt_time": fmt_time
        })
        ma_entry = ecwe.Entry.create_entry(self.testUUID,
                                           "background/motion_activity", ma)
        ts.insert(ma_entry)
        ret_entry = ecwe.Entry(
            ts.get_entry_at_ts("background/motion_activity", "data.ts",
                               1460586729))

        self.assertGreaterEqual(ret_entry.metadata.write_ts, start_ts)
        metadata_dt = arrow.get(ret_entry.metadata.write_ts).to(
            ret_entry.metadata.time_zone).datetime
        self.assertEqual(metadata_dt.hour,
                         ret_entry.metadata.write_local_dt.hour)
        self.assertEqual(metadata_dt.minute,
                         ret_entry.metadata.write_local_dt.minute)
        self.assertEqual(metadata_dt.weekday(),
                         ret_entry.metadata.write_local_dt.weekday)

        self.assertEqual(ret_entry.data.local_dt.hour, 15)
        self.assertEqual(ret_entry.data.local_dt.month, 4)
        self.assertEqual(ret_entry.data.local_dt.weekday, 2)
        self.assertEqual(ret_entry.data.fmt_time, "2016-04-13T15:32:09-07:00")
Beispiel #8
0
    def testWrapLocation(self):
        testEntryJSON = {'_id': '55a4418c7d65cb39ee9737cf',
         'data': {'accuracy': 52.5,
          'altitude': 0,
          'bearing': 0,
          'elapsedRealtimeNanos': 100623898000000,
          'latitude': 37.3885529,
          'longitude': -122.0879696,
          'loc': {"coordinates": [-122.0879696, 37.3885529], "type": "Point"},
          'sensed_speed': 0,
          'ts': 1436826356.852},
         'metadata': {'key': 'background/location',
          'platform': 'android',
          'read_ts': 0,
          'type': 'message',
          'write_ts': 1436826357.115,
          'write_fmt_time': '2015-07-13 15:25:57.115000-07:00'
        },
        'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954')}

        entry = ecwe.Entry(testEntryJSON)
        self.assertEqual(entry.metadata.key, 'background/location')
        self.assertEqual(entry.metadata.type, 'message')
        self.assertEqual(entry.data.latitude, 37.3885529)
        self.assertEqual(entry.data.longitude, -122.0879696)
        # self.assertEqual(entry.data.loc, gj.Point((-122.0879696, 37.3885529)))
        self.assertTrue(isinstance(entry.data.loc, gj.Point))
        logging.debug("location time = %s, written at %s (%s)" % 
            (entry.data.ts, entry.metadata.write_ts, entry.metadata.write_fmt_time))
Beispiel #9
0
def create_confirmed_trips(user_id, timerange):
    ts = esta.TimeSeries.get_time_series(user_id)
    toConfirmTrips = esda.get_entries(esda.CLEANED_TRIP_KEY, user_id,
        time_query=timerange)
    logging.debug("Converting %d cleaned trips to confirmed ones" % len(toConfirmTrips))
    lastTripProcessed = None
    if len(toConfirmTrips) == 0:
        logging.debug("len(toConfirmTrips) == 0, early return")
        return None
    input_key_list = eac.get_config()["userinput.keylist"]
    for tct in toConfirmTrips:
        # Copy the trip and fill in the new values
        confirmed_trip_dict = copy.copy(tct)
        del confirmed_trip_dict["_id"]
        confirmed_trip_dict["metadata"]["key"] = "analysis/confirmed_trip"
        confirmed_trip_dict["data"]["cleaned_trip"] = tct.get_id()
        confirmed_trip_dict["data"]["user_input"] = \
            get_user_input_dict(ts, tct, input_key_list)
        confirmed_trip_entry = ecwe.Entry(confirmed_trip_dict)
        # save the entry
        ts.insert(confirmed_trip_entry)
        # if everything is successful, then update the last successful trip
        lastTripProcessed = tct

    return lastTripProcessed
def _get_sections_for_query(section_query, sort_field):
    section_query.update({"metadata.key": "segmentation/raw_section"})
    logging.debug("Returning sections for query %s" % section_query)
    section_doc_cursor = edb.get_analysis_timeseries_db().find(
        section_query).sort(sort_field, pymongo.ASCENDING)
    logging.debug("result cursor length = %d" % section_doc_cursor.count())
    return [ecwe.Entry(doc) for doc in section_doc_cursor]
Beispiel #11
0
    def savePredictionsStep(self):
        for i, currProb in enumerate(self.predictedProb):
            currSectionEntry = self.toPredictSections[i]
            currSection = currSectionEntry.data
            currProb = currProb

            # Insert the prediction
            mp = ecwm.Modeprediction()
            mp.trip_id = currSection.trip_id
            mp.section_id = currSectionEntry.get_id()
            mp.algorithm_id = ecwm.AlgorithmTypes.SIMPLE_RULE_ENGINE
            mp.predicted_mode_map = currProb
            mp.start_ts = currSection.start_ts
            mp.end_ts = currSection.end_ts
            self.ts.insert_data(self.user_id, "inference/prediction", mp)

            # There are now two predictions, but don't want to do a bunch of
            # refactoring, so just create the inferred # section object right here
            is_dict = copy.copy(currSectionEntry)
            del is_dict["_id"]
            is_dict["metadata"]["key"] = "analysis/inferred_section"
            is_dict["data"]["sensed_mode"] = ecwm.PredictedModeTypes[
                easf.select_inferred_mode([mp])].value
            is_dict["data"]["cleaned_section"] = currSectionEntry.get_id()
            ise = ecwe.Entry(is_dict)
            logging.debug("Updating sensed mode for section = %s to %s" %
                          (currSectionEntry.get_id(), ise.data.sensed_mode))
            self.ts.insert(ise)
        # Set last_section_done after saving because otherwise if there is an error
        # during inference, we will not save results and never re-run
        self.last_section_done = self.toPredictSections[-1]
Beispiel #12
0
def _get_stops_for_query(stop_query, sort_key):
    logging.debug("Returning stops for query %s" % stop_query)
    stop_query.update({"metadata.key": "segmentation/raw_stop"})
    logging.debug("updated query = %s" % stop_query)
    stop_doc_cursor = edb.get_analysis_timeseries_db().find(stop_query).sort(
        sort_key, pymongo.ASCENDING)
    logging.debug("result count = %d" % stop_doc_cursor.count())
    return [ecwe.Entry(doc) for doc in stop_doc_cursor]
Beispiel #13
0
def geojson_incidents_in_range(user_id, start_ts, end_ts):
    MANUAL_INCIDENT_KEY = "manual/incident"
    ts = esta.TimeSeries.get_time_series(user_id)
    uc = enua.UserCache.getUserCache(user_id)
    tq = estt.TimeQuery("data.ts", start_ts, end_ts)
    incident_entry_docs = list(ts.find_entries([MANUAL_INCIDENT_KEY], time_query=tq)) \
        + list(uc.getMessage([MANUAL_INCIDENT_KEY], tq))
    incidents = [ecwe.Entry(doc) for doc in incident_entry_docs]
    return list(map(incident_to_geojson, incidents))
Beispiel #14
0
    def moveToLongTerm(self):
        """
        In order to move to the long term, we need to do the following:
        a) determine the time range to be processed. We do this by checking the
            pipeline state. this does not leak information since the process
            will run whether there is data for it to work on or not. So the
            pipeline state is stored outside of the user cache.
        b) process the time range. pass in a function that works on every entry
            to convert it to the appropriate format.
        c) delete the time range once it is processed (in usercache or here?)
        d) update the pipeline state to reflect the new range (here)
        """
        # Error handling: if any of the entries has an error in processing, we
        # move it to a separate "error_usercache" and process the rest. The
        # stage is still marked successful. This means that the stage can never
        # be unsuccessful. We could try to keep it, but then the delete query
        # below will get significantly more complicated.
        uc = enua.UserCache.getUserCache(self.user_id)
        messages = uc.getMessage()
        # Here, we assume that the user only has data from a single platform.
        # Since this is a temporary hack, this is fine
        if len(messages) == 0:
            logging.debug("No messages to process")
            # Since we didn't get the current time range, there is no current 
            # state, so we don't need to mark it as done
            # esp.mark_usercache_done(None)
            return

        time_query = esp.get_time_range_for_usercache(self.user_id)

        ts = etsa.TimeSeries.get_time_series(self.user_id)

        curr_entry_it = uc.getMessage(None, time_query)
        last_ts_processed = None
        for entry_doc in curr_entry_it:
            unified_entry = None
            try:
                # We don't want to use our wrapper classes yet because they are based on the
                # standard long-term formats, and we don't yet know whether the
                # incoming entries are consistent with them. That's why we have the
                # convert_to_common_format step. So let's just wrap this in a
                # generic attrdict for now.
                entry = ad.AttrDict(entry_doc)
                unified_entry = enuf.convert_to_common_format(entry)
                ts.insert(unified_entry)
                last_ts_processed = ecwe.Entry(unified_entry).metadata.write_ts
                time_query.endTs = last_ts_processed
            except pymongo.errors.DuplicateKeyError as e:
                logging.info("document already present in timeseries, skipping since read-only")
            except Exception as e:
                logging.exception("Backtrace time")
                logging.warn("Got error %s while saving entry %s -> %s"% (e, entry, unified_entry))
                ts.insert_error(entry_doc)
        logging.debug("Deleting all entries for query %s" % time_query)
        uc.clearProcessedMessages(time_query)
        esp.mark_usercache_done(self.user_id, last_ts_processed)
Beispiel #15
0
def get_all_points_for_range(user_id, key, start_ts, end_ts):
    import emission.storage.timeseries.timequery as estt
#     import emission.core.wrapper.location as ecwl 
    
    tq = estt.TimeQuery("metadata.write_ts", start_ts, end_ts)
    ts = esta.TimeSeries.get_time_series(user_id)
    entry_it = ts.find_entries([key], tq)
    points_array = [ecwe.Entry(entry) for entry in entry_it]

    return get_feature_list_for_point_array(points_array)
def get_user_input_from_cache_series(user_id, trip_obj, user_input_key):
    tq = estt.TimeQuery("data.start_ts", trip_obj.data.start_ts, trip_obj.data.end_ts)
    potential_candidates = estsc.find_entries(user_id, [user_input_key], tq)
    if len(potential_candidates) == 0:
        return None
    sorted_pc = sorted(potential_candidates, key=lambda c:c["metadata"]["write_ts"])
    most_recent_entry = potential_candidates[-1]
    logging.debug("most recent entry has id %s" % most_recent_entry["_id"])
    logging.debug("and is mapped to entry %s" % most_recent_entry)
    return ecwe.Entry(most_recent_entry)
def get_stops_for_trip(key, user_id, trip_id):
    """
    Get the set of sections that are children of this trip.
    """
    query = {"user_id": user_id, "data.trip_id": trip_id,
             "metadata.key": key}
    logging.debug("About to execute query %s with sort_key %s" % (query, "data.enter_ts"))
    stop_doc_cursor = edb.get_analysis_timeseries_db().find(query).sort(
        "data.enter_ts", pymongo.ASCENDING)
    return [ecwe.Entry(doc) for doc in stop_doc_cursor]
Beispiel #18
0
def get_sections_for_trip(key, user_id, trip_id):
    # type: (UUID, object_id) -> list(sections)
    """
    Get the set of sections that are children of this trip.
    """
    query = {"user_id": user_id, "data.trip_id": trip_id,
             "metadata.key": key}
    section_doc_cursor = edb.get_analysis_timeseries_db().find(query).sort(
        "data.start_ts", pymongo.ASCENDING)
    logging.debug("About to execute query %s" % query)
    return [ecwe.Entry(doc) for doc in section_doc_cursor]
Beispiel #19
0
    def compare_trip_result(self, result_dicts, expect_dicts):
        # This is basically a bunch of asserts to ensure that the timeline is as
        # expected. We are not using a recursive diff because things like the IDs
        # will change from run to run. Instead, I pick out a bunch of important
        # things that are highly user visible
        # Since this is deterministic, we can also include things that are not that user visible :)
        result = [ecwe.Entry(r) for r in result_dicts]
        expect = [ecwe.Entry(e) for e in expect_dicts]

        for rt, et in zip(result, expect):
            logging.debug("Comparing %s -> %s with %s -> %s" %
                          (rt.data.start_fmt_time, rt.data.end_fmt_time,
                           et.data.start_fmt_time, et.data.end_fmt_time))
        self.assertEqual(len(result), len(expect))
        for rt, et in zip(result, expect):
            logging.debug("======= Comparing trip =========")
            logging.debug(json.dumps(rt, indent=4, default=bju.default))
            logging.debug(json.dumps(et, indent=4, default=bju.default))
            # Highly user visible
            self.assertEqual(rt.data["user_input"], et.data["user_input"])
            # self.assertEqual(rt.data.inferred_primary_mode, et.data.inferred_primary_mode)
            logging.debug(20 * "=")
Beispiel #20
0
def get_entries(key,
                user_id,
                time_query,
                geo_query=None,
                extra_query_list=None):
    ts = get_timeseries_for_user(user_id)
    doc_cursor = ts.find_entries([key], time_query, geo_query,
                                 extra_query_list)
    # TODO: Fix "TripIterator" and return it instead of this list
    curr_entry_list = [ecwe.Entry(doc) for doc in doc_cursor]
    logging.debug("Returning entry with length %d result" %
                  len(curr_entry_list))
    return curr_entry_list
Beispiel #21
0
def get_timeline_from_dt(user_id, start_dt, end_dt):
    import emission.core.get_database as edb
    import emission.core.wrapper.entry as ecwe

    logging.info("About to query for %s -> %s" % (start_dt, end_dt))
    result_cursor = edb.get_timeseries_db().find({
        "user_id": user_id,
        "data.local_dt": {
            "$gte": start_dt,
            "$lte": end_dt
        }
    }).sort("metadata.write_ts")
    logging.debug("result cursor has %d entries" % result_cursor.count())
    result_list = list(result_cursor)
    logging.debug("result list has %d entries" % len(result_list))
    if len(result_list) == 0:
        return get_timeline(user_id, 0, 0)
    start_ts = ecwe.Entry(result_list[0]).metadata.write_ts
    end_ts = ecwe.Entry(result_list[-1]).metadata.write_ts
    logging.debug(
        "Converted datetime range %s -> %s to timestamp range %s -> %s" %
        (start_dt, end_dt, start_ts, end_ts))
    return get_timeline(user_id, start_ts, end_ts)
Beispiel #22
0
 def _to_df_entry(entry_dict):
     entry = ecwe.Entry(entry_dict)
     ret_val = entry.data
     for ld_key in ret_val.local_dates:
         if ld_key in ret_val:
             for field_key in ret_val[ld_key]:
                 expanded_key = "%s_%s" % (ld_key, field_key)
                 ret_val[expanded_key] = ret_val[ld_key][field_key]
             del ret_val[ld_key]
     ret_val["_id"] = entry["_id"]
     ret_val['user_id'] = entry['user_id']
     ret_val["metadata_write_ts"] = entry["metadata"]["write_ts"]
     # logging.debug("ret_val = %s " % ret_val)
     return ret_val
Beispiel #23
0
def get_filtered_points(section, filtered_section_data):
    logging.debug("Getting filtered points for section %s" % section)
    ts = esta.TimeSeries.get_time_series(section.user_id)
    loc_entry_it = ts.find_entries(["background/filtered_location"],
                                   esda.get_time_query_for_trip_like(
                                       esda.RAW_SECTION_KEY, section.get_id()))

    loc_entry_list = [ecwe.Entry(e) for e in loc_entry_it]

    # We know that the assertion fails in the geojson conversion code and we
    # handle it there, so we are just going to comment this out for now.
    # assert (loc_entry_list[-1].data.loc == section.data.end_loc,
    #         "section_location_array[-1].loc != section.end_loc even after df.ts fix",
    #         (loc_entry_list[-1].data.loc, section.data.end_loc))

    # Find the list of points to filter
    filtered_points_entry_doc = ts.get_entry_at_ts("analysis/smoothing",
                                                   "data.section",
                                                   section.get_id())

    if filtered_points_entry_doc is None:
        logging.debug(
            "No filtered_points_entry, filtered_points_list is empty")
        filtered_point_id_list = []
    else:
        # TODO: Figure out how to make collections work for the wrappers and then change this to an Entry
        filtered_points_entry = ad.AttrDict(filtered_points_entry_doc)
        filtered_point_id_list = list(
            filtered_points_entry.data.deleted_points)
        logging.debug("deleting %s points from section points" %
                      len(filtered_point_id_list))

    filtered_loc_list = remove_outliers(loc_entry_list, filtered_point_id_list)

    # filtered_loc_list has removed the outliers. Now, we resample the data at
    # 30 sec intervals
    resampled_loc_df = resample(filtered_loc_list, interval=30)
    # If this is the first section, we need to find the start place of the parent trip
    # and actually start from there. That will fix the distances but not the duration
    # because we haven't yet figured out how to get the correct start time.
    # TODO: Fix this!!
    # For now, we will fudge this in the geojson converter, as always

    with_speeds_df = eaicl.add_dist_heading_speed(resampled_loc_df)
    with_speeds_df["idx"] = np.arange(0, len(with_speeds_df))
    with_speeds_df_nona = with_speeds_df.dropna()
    logging.info("removed %d entries containing n/a" %
                 (len(with_speeds_df_nona) - len(with_speeds_df)))
    return with_speeds_df_nona
Beispiel #24
0
def get_aggregate_timeline_from_dt(start_dt, end_dt, box=None):
    import emission.core.get_database as edb
    import emission.core.wrapper.entry as ecwe
    import emission.storage.decorations.place_queries as esdp
    import emission.storage.decorations.trip_queries as esdt

    if not box:
        logging.info("About to query for %s -> %s" % (start_dt, end_dt))
    else:
        logging.info("About to query for %s -> %s in %s" %
                     (start_dt, end_dt, box))
    result_cursor = edb.get_timeseries_db().find({
        "data.local_dt": {
            "$gte": start_dt,
            "$lte": end_dt
        }
    }).sort("metadata.write_ts")
    logging.debug("about to query result_cursor.count()")
    result_cursor_count = result_cursor.count()
    logging.debug("result cursor has %d entries" % result_cursor_count)
    if result_cursor_count == 0:
        return Timeline([], [])
    logging.debug("About to query for time data in result cursor")
    start_ts = ecwe.Entry(result_cursor[0]).metadata.write_ts
    end_ts = ecwe.Entry(result_cursor[result_cursor_count -
                                      1]).metadata.write_ts
    logging.debug(
        "Converted datetime range %s -> %s to timestamp range %s -> %s" %
        (start_dt, end_dt, start_ts, end_ts))
    places = esdp.get_aggregate_places(enua.UserCache.TimeQuery(
        "enter_ts", start_ts, end_ts),
                                       box=box)
    trips = esdt.get_aggregate_trips(enua.UserCache.TimeQuery(
        "start_ts", start_ts, end_ts),
                                     box=box)
    return Timeline(places, trips)
Beispiel #25
0
    def savePredictionsStep(self):
        from emission.core.wrapper.user import User
        from emission.core.wrapper.client import Client

        uniqueModes = self.model.classes_

        for i in range(self.predictedProb.shape[0]):
            currSectionEntry = self.toPredictSections[i]
            currSection = currSectionEntry.data
            currProb = self.convertPredictedProbToMap(uniqueModes,
                                                      self.predictedProb[i])

            # Special handling for the AIR mode
            # AIR is not a mode that is sensed from the phone, but it is inferred
            # through some heuristics in cleanAndResample instead of through the
            # decision tree. Ideally those heurstics should be replaced by the
            # inference through the decision tree, or through a separate heuristic
            # step. But we are out of time for a bigger refactor here.
            # so we say that if the sensed mode == AIR, we are going to use it
            # directly and ignore the inferred mode
            if currSection.sensed_mode == ecwma.MotionTypes.AIR_OR_HSR:
                currProb = {'AIR_OR_HSR': 1.0}

            # Insert the prediction
            mp = ecwm.Modeprediction()
            mp.trip_id = currSection.trip_id
            mp.section_id = currSectionEntry.get_id()
            mp.algorithm_id = ecwm.AlgorithmTypes.SEED_RANDOM_FOREST
            mp.predicted_mode_map = currProb
            mp.start_ts = currSection.start_ts
            mp.end_ts = currSection.end_ts
            self.ts.insert_data(self.user_id, "inference/prediction", mp)

            # Since there is currently only one prediction, create the inferred
            # section object right here
            is_dict = copy.copy(currSectionEntry)
            del is_dict["_id"]
            is_dict["metadata"]["key"] = "analysis/inferred_section"
            is_dict["data"]["sensed_mode"] = ecwm.PredictedModeTypes[
                easf.select_inferred_mode([mp])].value
            is_dict["data"]["cleaned_section"] = currSectionEntry.get_id()
            ise = ecwe.Entry(is_dict)
            logging.debug("Updating sensed mode for section = %s to %s" %
                          (currSectionEntry.get_id(), ise.data.sensed_mode))
            self.ts.insert(ise)
        # Set last_section_done after saving because otherwise if there is an error
        # during inference, we will not save results and never re-run
        self.last_section_done = self.toPredictSections[-1]
def get_entries(key,
                user_id,
                time_query,
                geo_query=None,
                extra_query_list=None):
    if user_id is not None:
        ts = esta.TimeSeries.get_time_series(user_id)
    else:
        ts = esta.TimeSeries.get_aggregate_time_series()
    doc_cursor = ts.find_entries([key], time_query, geo_query,
                                 extra_query_list)
    # TODO: Fix "TripIterator" and return it instead of this list
    curr_entry_list = [ecwe.Entry(doc) for doc in doc_cursor]
    logging.debug("Returning entry with length %d result" %
                  len(curr_entry_list))
    return curr_entry_list
 def testLocalMatchingQuery(self):
     """
     Search for all entries that occur at minute = 8 from any hour
     """
     start_local_dt = ecwl.LocalDate({'minute': 8})
     end_local_dt = ecwl.LocalDate({'minute': 8})
     final_query = {"user_id": self.testUUID}
     final_query.update(esdl.get_range_query("data.local_dt", start_local_dt, end_local_dt))
     entries_docs = edb.get_timeseries_db().find(final_query).sort("metadata.write_ts")
     self.assertEquals(20, entries_docs.count())
     entries = [ecwe.Entry(doc) for doc in entries_docs]
     logging.debug("entries bookends are %s and %s" % (entries[0], entries[-1]))
     first_entry = entries[0]
     self.assertEquals(first_entry.data.local_dt.hour, 9)
     last_entry = entries[19]
     self.assertEquals(last_entry.data.local_dt.hour, 17)
Beispiel #28
0
def get_last_entry(user_id, time_query, config_key):
    user_ts = esta.TimeSeries.get_time_series(user_id)

    # get the max write_ts for this stream, which corresponds to the last entry
    # We expect this to be small, unless users are continuously overriding values
    config_overrides = list(user_ts.find_entries([config_key], time_query))
    logging.debug("Found %d user overrides for user %s" %
                  (len(config_overrides), user_id))
    if len(config_overrides) == 0:
        logging.warning("No user defined overrides for %s, early return" %
                        user_id)
        return (None, None)
    else:
        # entries are sorted by the write_ts, we can take the last value
        coe = ecwe.Entry(config_overrides[-1])
        logging.debug("last entry is %s" % coe)
        return (coe.data, coe.metadata.write_ts)
def get_first_place_entry(key, user_id):
    """
    Similar to get_last_place_entry, only finding one with only an exit_ts
    and no enter_ts.
    """
    ts = esta.TimeSeries.get_time_series(user_id)
    ret_place_doc = ts.analysis_timeseries_db.find_one({'user_id': user_id,
                                                        'metadata.key': key,
                                                        'data.enter_ts' : {'$exists': False}})
    logging.debug("first place doc = %s" % ret_place_doc)
    if ret_place_doc is None:
        return None
    ret_place = ecwe.Entry(ret_place_doc)
    assert('enter_ts' not in ret_place.data)
    assert('enter_fmt_time' not in ret_place.data)
    assert('ending_trip' not in ret_place.data)
    return ret_place
Beispiel #30
0
    def insert(self, entry):
        """
        Inserts the specified entry and returns the object ID 
        """
        logging.debug("insert called")
        if type(entry) == dict:
            entry = ecwe.Entry(entry)
        if "user_id" not in entry or entry["user_id"] is None:
            entry["user_id"] = self.user_id
        if self.user_id is not None and entry["user_id"] != self.user_id:
            raise AttributeError("Saving entry %s for %s in timeseries for %s" % 
		(entry, entry["user_id"], self.user_id))
        else:
            logging.debug("entry was fine, no need to fix it")

        logging.debug("Inserting entry %s into timeseries" % entry)
        return self.get_timeseries_db(entry.metadata.key).insert(entry)