Exemple #1
0
    def squish_stops(self, motion_changes, section_list):
        STOP_DISTANCE_THRESHOLD = max(
            eac.get_config()["section.startStopRadius"],
            eac.get_config()["section.endStopRadius"])

        for i, x in enumerate(zip(section_list, section_list[1:])):
            try:
                ((ss1, se1, st1), (ss2, se2, st2)) = x
            except ValueError as e:
                print(len(x), [len(xm) for xm in x])
                print(x[0])
            # i is the index of s1, i+1 is the index of s2
            stop_duration = ss2.ts - se1.ts
            stop_distance = ecc.calDistance(ss2.loc["loc"]["coordinates"],
                                            se1.loc["loc"]["coordinates"])
            logging.debug(
                "while squishing stop, %s (%d) -> %s (%d), duration = %d, dist = %d"
                % (se1.fmt_time, i, ss2.fmt_time, i + 1, stop_duration,
                   stop_distance))
            if stop_distance > STOP_DISTANCE_THRESHOLD:
                mcs1, mce1 = motion_changes[i]
                mcs2, mce2 = motion_changes[i + 1]
                assert mce1.ts == mcs2.ts, \
                    "curr motion change ends at %s, next motion change ends at %s" % \
                        (mce1.fmt_time, mce2.fmt_time)

                # e.g. if the motion changed at 11:33, mce1.ts = mcs2.ts = 11:33
                # if the first section's last point (se1) was at 11:31 and the
                # second section's first point (ss2) was at 11:51, then we
                # should set the second section's first point to be the first
                # section's last point (e.g. ss2 = se1)

                stop_start_gap = mce1.ts - se1.ts
                stop_end_gap = ss2.ts - mcs2.ts
                gap_ratio = max(stop_start_gap, stop_end_gap) / min(
                    stop_start_gap, stop_end_gap)

                log_msg = (
                    "need to squish, comparing start_gap = %d, end_gap = %d, ratio = %4f"
                    % (stop_start_gap, stop_end_gap, gap_ratio))
                if gap_ratio >= 1.5:
                    if stop_start_gap < stop_end_gap:
                        logging.debug(log_msg + ", setting ss2 <- se1")
                        section_list[i + 1] = (se1, se2, st2)
                    else:
                        logging.debug(log_msg + ", setting se1 <- ss2")
                        section_list[i] = (ss1, ss2, st1)
                else:
                    logging.debug(log_msg +
                                  ", no change, fixed in clean_and_resample")
Exemple #2
0
def _get_transit_prediction(i, section_entry):
    # Let's make the start a little more forgiving than the end
    start_radius = eac.get_config()["section.startStopRadius"]
    end_radius = eac.get_config()["section.startStopRadius"]
    start_transit_stops = enetm.get_stops_near(section_entry.data.start_loc,
                                               start_radius)
    end_transit_stops = enetm.get_stops_near(section_entry.data.end_loc,
                                             end_radius)
    predicted_transit_modes = enetm.get_predicted_transit_mode(
        start_transit_stops, end_transit_stops)
    logging.debug("Got predicted transit mode %s" % predicted_transit_modes)
    collapsed_transit_mode = collapse_modes(section_entry,
                                            predicted_transit_modes)
    return collapsed_transit_mode
Exemple #3
0
    def runPredictionPipeline(self, user_id, timerange):
        self.ts = esta.TimeSeries.get_time_series(user_id)
        self.toPredictSections = esda.get_entries(esda.CLEANED_SECTION_KEY,
                                                  user_id,
                                                  time_query=timerange)
        if (len(self.toPredictSections) == 0):
            logging.debug("len(toPredictSections) == 0, early return")
            if self.last_section_done is not None:
                logging.error("self.last_section_done == %s, expecting None" %
                              self.last_section_done)
                if eac.get_config()["classification.validityAssertions"]:
                    assert False
            return None

        self.loadModelStage()
        logging.info("loadModelStage DONE")
        self.selFeatureIndices = self.selectFeatureIndicesStep()
        logging.info("selectFeatureIndicesStep DONE")
        (self.toPredictFeatureMatrix, self.tripIds, self.sectionIds) = \
            self.generateFeatureMatrixAndIDsStep(self.toPredictSections)
        logging.info("generateFeatureMatrixAndIDsStep DONE")
        self.predictedProb = self.predictModesStep()
        #This is a matrix of the entries and their corresponding probabilities for each classification
        logging.info("predictModesStep DONE")
        self.savePredictionsStep()
        logging.info("savePredictionsStep DONE")
Exemple #4
0
def create_confirmed_trips(user_id, timerange):
    ts = esta.TimeSeries.get_time_series(user_id)
    toConfirmTrips = esda.get_entries(esda.CLEANED_TRIP_KEY, user_id,
        time_query=timerange)
    logging.debug("Converting %d cleaned trips to confirmed ones" % len(toConfirmTrips))
    lastTripProcessed = None
    if len(toConfirmTrips) == 0:
        logging.debug("len(toConfirmTrips) == 0, early return")
        return None
    input_key_list = eac.get_config()["userinput.keylist"]
    for tct in toConfirmTrips:
        # Copy the trip and fill in the new values
        confirmed_trip_dict = copy.copy(tct)
        del confirmed_trip_dict["_id"]
        confirmed_trip_dict["metadata"]["key"] = "analysis/confirmed_trip"
        confirmed_trip_dict["data"]["cleaned_trip"] = tct.get_id()
        confirmed_trip_dict["data"]["user_input"] = \
            get_user_input_dict(ts, tct, input_key_list)
        confirmed_trip_entry = ecwe.Entry(confirmed_trip_dict)
        # save the entry
        ts.insert(confirmed_trip_entry)
        # if everything is successful, then update the last successful trip
        lastTripProcessed = tct

    return lastTripProcessed
    def check_transition_validity(self, streak_start, streak_end):
        """
        If there is a single flip flop (e.g. WALKING -> BICYCLING -> WALKING),
        the only decision we have is whether to remove the middle transition.
        And we can include a pretty simple fix for that - the only valid
        intermediate transition is WALKING. For all other modes, we expect that
        the travel will be long enough that we will get at least a couple of
        activity points. It's not worth it otherwise
        """
        if not ((streak_start == streak_end) or
                (streak_start + 1 == streak_end)):
            logging.error("1 flip check called with streak %d -> %d" %
                          (streak_start, streak_end))
            if eac.get_config(
            )["intake.segmentation.section_segmentation.sectionValidityAssertions"]:
                assert False

        start_change = self.motion_changes[streak_start]
        if not eaid.is_walking_type(start_change[0].type):
            logging.debug("single transition %s, not WALKING, merging" %
                          start_change[0].type)
            return self.get_merge_direction(streak_start, streak_end)
        else:
            logging.debug("single transition %s, WALKING, not merging yet" %
                          start_change[0].type)
        return MergeResult.NONE()
Exemple #6
0
def collapse_modes(section_entry, modes):
    """
    GIS supports a rich set of transit modes (e.g. train, subway, light_rail),
    but we only support the basic train. We can add support for the others, but
    not without changing the client and we don't want to make those changes now.
    
    Also, there could be multiple parallel modes (e.g. bus and train) at both
    start and end points. This method merges the list of entries returned by
    the GIS into one, potentially using speed information 
    """
    # train_mode_list = ['funicular', 'miniature', 'rail', 'railway',
    #     'light_rail', 'subway', 'monorail', 'tram', 'aerialway', ]
    train_mode_list = [
        'funicular', 'miniature', 'rail', 'railway', 'monorail', 'monorail',
        'aerialway', 'tracks'
    ]

    if modes is None or len(modes) == 0:
        return None

    # map all train-like modes to train
    map_train = lambda m: 'TRAIN' if m in train_mode_list else m.upper()
    train_mapped_modes = list(map(map_train, modes))

    logging.debug("train_mapped_modes = %s" % train_mapped_modes)

    if len(train_mapped_modes) == 1:
        return train_mapped_modes[0]

    # now uniquify them. If there is only one possible mode (e.g. train), there
    # should be only one entry here
    unique_modes = set(train_mapped_modes)

    logging.debug("unique_modes = %s" % unique_modes)

    if len(unique_modes) == 1:
        return list(unique_modes)[0]

    supported_modes = set(['BUS', 'TRAIN', 'LIGHT_RAIL', 'SUBWAY', 'TRAM'])

    if not unique_modes.issubset(supported_modes):
        logging.error("unique_modes = %s, but we support only %s" %
                      (unique_modes, supported_modes))
        if eac.get_config()["classification.validityAssertions"]:
            assert False

    # could be either bus or train. Let's use the speed to decide
    # local bus speeds are pretty close to bike, which is why it is hard to
    # distinguish between them
    if eaid.is_bicycling_speed(
            pd.Series(section_entry.data["speeds"]).median()):
        return 'BUS'
    else:
        # We only want to return a train modes there, so let's remove bus from the list
        if 'BUS' in train_mapped_modes:
            train_mapped_modes.remove("BUS")
        logging.debug("Returning %s but could also be another train modes" %
                      train_mapped_modes[0])
        return train_mapped_modes[0]
 def selectFeatureIndicesStep(self):
   genericFeatureIndices = list(range(0,2)) + list(range(4,9))
   AdvancedFeatureIndices = list(range(10,13))
   LocationFeatureIndices = list(range(13,17))
   TimeFeatureIndices = list(range(17,19))
   BusTrainFeatureIndices = list(range(19,22))
   logging.debug("generic features = %s" % genericFeatureIndices)
   logging.debug("advanced features = %s" % AdvancedFeatureIndices)
   logging.debug("location features = %s" % LocationFeatureIndices)
   logging.debug("time features = %s" % TimeFeatureIndices)
   logging.debug("bus train features = %s" % BusTrainFeatureIndices)
   retIndices = genericFeatureIndices
   if eac.get_config()["classification.inference.mode.useAdvancedFeatureIndices"]:
       retIndices = retIndices + AdvancedFeatureIndices
   if eac.get_config()["classification.inference.mode.useBusTrainFeatureIndices"]:
       retIndices = retIndices + BusTrainFeatureIndices
   return retIndices
 def selectFeatureIndicesStep(self):
   genericFeatureIndices = list(range(0,2)) + list(range(4,9))
   AdvancedFeatureIndices = list(range(10,13))
   LocationFeatureIndices = list(range(13,17))
   TimeFeatureIndices = list(range(17,19))
   BusTrainFeatureIndices = list(range(19,22))
   logging.debug("generic features = %s" % genericFeatureIndices)
   logging.debug("advanced features = %s" % AdvancedFeatureIndices)
   logging.debug("location features = %s" % LocationFeatureIndices)
   logging.debug("time features = %s" % TimeFeatureIndices)
   logging.debug("bus train features = %s" % BusTrainFeatureIndices)
   retIndices = genericFeatureIndices
   if eac.get_config()["classification.inference.mode.useAdvancedFeatureIndices"]:
       retIndices = retIndices + AdvancedFeatureIndices
   if eac.get_config()["classification.inference.mode.useBusTrainFeatureIndices"]:
       retIndices = retIndices + BusTrainFeatureIndices
   return retIndices
Exemple #9
0
def filter_accuracy(user_id):
    time_query = epq.get_time_range_for_accuracy_filtering(user_id)
    timeseries = esta.TimeSeries.get_time_series(user_id)

    if not eac.get_config()["intake.cleaning.filter_accuracy.enable"]:
        logging.debug("filter_accuracy disabled, early return")
        epq.mark_accuracy_filtering_done(user_id, None)
        return

    try:
        unfiltered_points_df = timeseries.get_data_df("background/location",
                                                      time_query)
        if len(unfiltered_points_df) == 0:
            epq.mark_accuracy_filtering_done(user_id, None)
        else:
            filtered_from_unfiltered_df = unfiltered_points_df[
                unfiltered_points_df.accuracy < 200]
            logging.info(
                "filtered %d of %d points" %
                (len(filtered_from_unfiltered_df), len(unfiltered_points_df)))
            for idx, entry in filtered_from_unfiltered_df.iterrows():
                # First, we check to see if this is a duplicate of an existing entry.
                # If so, we will skip it since it is probably generated as a duplicate...
                if check_prior_duplicate(filtered_from_unfiltered_df, idx,
                                         entry):
                    logging.info(
                        "Found duplicate entry at index %s, id = %s, lat = %s, lng = %s, skipping"
                        % (idx, entry._id, entry.latitude, entry.longitude))
                    continue
                # Next, we check to see if there is an existing "background/filtered_location" point that corresponds
                # to this point. If there is, then we don't want to re-insert. This ensures that this step is idempotent
                if check_existing_filtered_location(timeseries, entry):
                    logging.info(
                        "Found existing filtered location for entry at index = %s, id = %s, ts = %s, fmt_time = %s, skipping"
                        % (idx, entry._id, entry.ts, entry.fmt_time))
                    continue
                # logging.debug("Inserting %s filtered entry %s into timeseries" % (idx, entry))
                entry_copy = convert_to_filtered(
                    timeseries.get_entry_at_ts("background/location",
                                               "metadata.write_ts",
                                               entry.metadata_write_ts))
                timeseries.insert(entry_copy)
            last_entry_processed = unfiltered_points_df.iloc[
                -1].metadata_write_ts
            epq.mark_accuracy_filtering_done(user_id,
                                             float(last_entry_processed))
    except:
        logging.exception("Marking accuracy filtering as failed")
        epq.mark_accuracy_filtering_failed(user_id)
Exemple #10
0
    def runPredictionPipeline(self, user_id, timerange):
        self.ts = esta.TimeSeries.get_time_series(user_id)
        self.toPredictSections = esda.get_entries(esda.CLEANED_SECTION_KEY,
                                                  user_id,
                                                  time_query=timerange)
        if (len(self.toPredictSections) == 0):
            logging.debug("len(toPredictSections) == 0, early return")
            if self.last_section_done is not None:
                logging.error("self.last_section_done == %s, expecting None" %
                              self.last_section_done)
                if eac.get_config()["classification.validityAssertions"]:
                    assert False
            return None

        self.predictedProb = self.predictModesStep()
        logging.info("predictModesStep DONE")
        self.savePredictionsStep()
        logging.info("savePredictionsStep DONE")
    def check_no_location_walk(self, streak_start, streak_end):
        if not ((streak_start == streak_end) or
                (streak_start + 1 == streak_end)):
            logging.error("1 flip check called with streak %d -> %d" %
                          (streak_start, streak_end))
            if eac.get_config(
            )["intake.segmentation.section_segmentation.sectionValidityAssertions"]:
                assert False

        ssm, sem = self.motion_changes[streak_start]
        streak_locs = self.seg_method.filter_points_for_range(
            self.seg_method.location_points, ssm, sem)
        streak_unfiltered_locs = self.seg_method.filter_points_for_range(
            self.seg_method.unfiltered_loc_df, ssm, sem)

        if len(streak_locs) <= 1:
            # we have no points, not even unfiltered. This must be bogus
            return self.get_merge_direction(streak_start, streak_end)

        return MergeResult.NONE()
Exemple #12
0
def match_incoming_inputs(user_id, timerange):
    ts = esta.TimeSeries.get_time_series(user_id)
    input_key_list = eac.get_config()["userinput.keylist"]
    toMatchInputs = [ecwe.Entry(e) for e in ts.find_entries(input_key_list, time_query=timerange)]
    logging.debug("Matching %d inputs to trips" % len(toMatchInputs))
    lastInputProcessed = None
    if len(toMatchInputs) == 0:
        logging.debug("len(toMatchInputs) == 0, early return")
        return None
    for ui in toMatchInputs:
        confirmed_trip = esdt.get_trip_for_user_input_obj(ts, ui)
        if confirmed_trip is not None:
            input_name = obj_to_dict_key(ui.metadata.key)
            confirmed_trip["data"]["user_input"][input_name] = ui.data.label
            import emission.storage.timeseries.builtin_timeseries as estbt
            estbt.BuiltinTimeSeries.update(confirmed_trip)
        else:
            logging.warn("No match found for user input %s, moving forward anyway" % ui)
        lastInputProcessed = ui

    return lastInputProcessed
Exemple #13
0
def section_to_geojson(section, tl):
    """
    This is the trickiest part of the visualization.
    The section is basically a collection of points with a line through them.
    So the representation is a feature in which one feature which is the line, and one feature collection which is the set of point features.
    :param section: the section to be converted
    :return: a feature collection which is the geojson version of the section
    """

    ts = esta.TimeSeries.get_time_series(section.user_id)
    entry_it = ts.find_entries(["analysis/recreated_location"],
                               esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section",
                                   section.get_id()))

    # TODO: Decide whether we want to use Rewrite to use dataframes throughout instead of python arrays.
    # dataframes insert nans. We could use fillna to fill with default values, but if we are not actually
    # using dataframe features here, it is unclear how much that would help.
    feature_array = []
    section_location_entries = [ecwe.Entry(entry) for entry in entry_it]
    if len(section_location_entries) != 0:
        logging.debug("first element in section_location_array = %s" % section_location_entries[0])

        if not ecc.compare_rounded_arrays(section.data.end_loc.coordinates,
                                      section_location_entries[-1].data.loc.coordinates,
                                      digits=4):
            logging.info("section_location_array[-1].data.loc %s != section.data.end_loc %s even after df.ts fix, filling gap" % \
                    (section_location_entries[-1].data.loc, section.data.end_loc))
            if eac.get_config()["output.conversion.validityAssertions"]:
                assert(False)
            last_loc_doc = ts.get_entry_at_ts("background/filtered_location", "data.ts", section.data.end_ts)
            if last_loc_doc is None:
                logging.warning("can't find entry to patch gap, leaving gap")
            else:
                last_loc_entry = ecwe.Entry(last_loc_doc)
                logging.debug("Adding new entry %s to fill the end point gap between %s and %s"
                   % (last_loc_entry.data.loc, section_location_entries[-1].data.loc,
                        section.data.end_loc))
                section_location_entries.append(last_loc_entry)

    points_line_feature = point_array_to_line(section_location_entries)
    points_line_feature.id = str(section.get_id())
    points_line_feature.properties.update(copy.copy(section.data))
    # Update works on dicts, convert back to a section object to make the modes
    # work properly
    points_line_feature.properties = ecwcs.Cleanedsection(points_line_feature.properties)

    points_line_feature.properties["feature_type"] = "section"

    if eac.get_section_key_for_analysis_results() == esda.INFERRED_SECTION_KEY:
        ise = esds.cleaned2inferred_section(section.user_id, section.get_id())
        if ise is not None:
            logging.debug("mapped cleaned section %s -> inferred section %s" % 
                (section.get_id(), ise.get_id()))
            logging.debug("changing mode from %s -> %s" % 
                (points_line_feature.properties.sensed_mode, ise.data.sensed_mode))
            points_line_feature.properties["sensed_mode"] = str(ise.data.sensed_mode)
        else:
            points_line_feature.properties["sensed_mode"] = str(points_line_feature.properties.sensed_mode)
    else:
        points_line_feature.properties["sensed_mode"] = str(points_line_feature.properties.sensed_mode)
    
    _del_non_derializable(points_line_feature.properties, ["start_loc", "end_loc"])

    # feature_array.append(gj.FeatureCollection(points_feature_array))
    feature_array.append(points_line_feature)

    return gj.FeatureCollection(feature_array)