Example #1
0
 def testGetTimeRangeForTrip(self):
     new_trip = self.create_fake_trip()
     ret_tq = esda.get_time_query_for_trip_like(esda.RAW_TRIP_KEY,
                                                new_trip.get_id())
     self.assertEqual(ret_tq.timeType, "data.ts")
     self.assertEqual(ret_tq.startTs, 5)
     self.assertEqual(ret_tq.endTs, 6)
Example #2
0
    def testPointFilteringRichmondJump(self):
        classicJumpTrip1 = self.trip_entries[6]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [s for s in self.section_entries
                                if s.data.trip_id == classicJumpTrip1.get_id()]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df("background/filtered_location",
                            esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                              section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" % np.nonzero(to_delete_mask))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            # There is only one section
            self.assertEqual(i, 0)
            # The bad section, should have the third point filtered
            self.assertEqual(np.count_nonzero(to_delete_mask), 1)
            self.assertEqual([str(id) for id in delete_ids], ["55e86dbb7d65cb39ee987e09"])
def section_to_geojson(section, tl):
    """
    This is the trickiest part of the visualization.
    The section is basically a collection of points with a line through them.
    So the representation is a feature in which one feature which is the line, and one feature collection which is the set of point features.
    :param section: the section to be converted
    :return: a feature collection which is the geojson version of the section
    """

    ts = esta.TimeSeries.get_time_series(section.user_id)
    entry_it = ts.find_entries(["analysis/recreated_location"],
                               esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section",
                                   section.get_id()))

    # TODO: Decide whether we want to use Rewrite to use dataframes throughout instead of python arrays.
    # dataframes insert nans. We could use fillna to fill with default values, but if we are not actually
    # using dataframe features here, it is unclear how much that would help.
    feature_array = []
    section_location_entries = [ecwe.Entry(entry) for entry in entry_it]
    if len(section_location_entries) != 0:
        logging.debug("first element in section_location_array = %s" % section_location_entries[0])

        # Fudge the end point so that we don't have a gap because of the ts != write_ts mismatch
        # TODO: Fix this once we are able to query by the data timestamp instead of the metadata ts

        if section_location_entries[-1].data.loc != section.data.end_loc:
            logging.info("section_location_array[-1].data.loc %s != section.data.end_loc %s even after df.ts fix, filling gap" % \
                    (section_location_entries[-1].data.loc, section.data.end_loc))
            last_loc_doc = ts.get_entry_at_ts("background/filtered_location", "data.ts", section.data.end_ts)
            if last_loc_doc is None:
                logging.warning("can't find entry to patch gap, leaving gap")
            else:
                last_loc_entry = ecwe.Entry(last_loc_doc)
                logging.debug("Adding new entry %s to fill the end point gap between %s and %s"
                   % (last_loc_entry.data.loc, section_location_entries[-1].data.loc,
                        section.data.end_loc))
                section_location_entries.append(last_loc_entry)

    points_line_feature = point_array_to_line(section_location_entries)
    # If this is the first section, we already start from the trip start. But we actually need to start from the
    # prior place. Fudge this too. Note also that we may want to figure out how to handle this properly in the model
    # without needing fudging. TODO: Unclear how exactly to do this
    if section.data.start_stop is None:
        # This is the first section. So we need to find the start place of the parent trip
        parent_trip = tl.get_object(section.data.trip_id)
        start_place_of_parent_trip = tl.get_object(parent_trip.data.start_place)
        points_line_feature.geometry.coordinates.insert(0, start_place_of_parent_trip.data.location.coordinates)

    points_line_feature.id = str(section.get_id())
    points_line_feature.properties = copy.copy(section.data)
    points_line_feature.properties["feature_type"] = "section"
    points_line_feature.properties["sensed_mode"] = str(points_line_feature.properties.sensed_mode)

    _del_non_derializable(points_line_feature.properties, ["start_loc", "end_loc"])

    # feature_array.append(gj.FeatureCollection(points_feature_array))
    feature_array.append(points_line_feature)

    return gj.FeatureCollection(feature_array)
def filter_jumps(user_id, section_id):
    """
    filters out any jumps in the points related to this section and stores a entry that lists the deleted points for
    this trip and this section.
    :param user_id: the user id to filter the trips for
    :param section_id: the section_id to filter the trips for
    :return: none. saves an entry with the filtered points into the database.
    """

    logging.debug("filter_jumps(%s, %s) called" % (user_id, section_id))
    outlier_algo = eaico.BoxplotOutlier()
    filtering_algo = eaicj.SmoothZigzag()

    tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section_id)
    ts = esta.TimeSeries.get_time_series(user_id)
    section_points_df = ts.get_data_df("background/filtered_location", tq)
    logging.debug("len(section_points_df) = %s" % len(section_points_df))
    points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo, filtering_algo)
    if points_to_ignore_df is None:
        # There were no points to delete
        return
    deleted_point_id_list = list(points_to_ignore_df._id)
    logging.debug("deleted %s points" % len(deleted_point_id_list))

    filter_result = ecws.Smoothresults()
    filter_result.section = section_id
    filter_result.deleted_points = deleted_point_id_list
    filter_result.outlier_algo = "BoxplotOutlier"
    filter_result.filtering_algo = "SmoothZigzag"

    result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing", filter_result)
    ts.insert(result_entry)
def filter_jumps(user_id, section_id):
    """
    filters out any jumps in the points related to this section and stores a entry that lists the deleted points for
    this trip and this section.
    :param user_id: the user id to filter the trips for
    :param section_id: the section_id to filter the trips for
    :return: none. saves an entry with the filtered points into the database.
    """

    logging.debug("filter_jumps(%s, %s) called" % (user_id, section_id))
    outlier_algo = eaico.BoxplotOutlier()
    filtering_algo = eaicj.SmoothZigzag()

    tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section_id)
    ts = esta.TimeSeries.get_time_series(user_id)
    section_points_df = ts.get_data_df("background/filtered_location", tq)
    logging.debug("len(section_points_df) = %s" % len(section_points_df))
    points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo,
                                               filtering_algo)
    if points_to_ignore_df is None:
        # There were no points to delete
        return
    deleted_point_id_list = list(points_to_ignore_df._id)
    logging.debug("deleted %s points" % len(deleted_point_id_list))

    filter_result = ecws.Smoothresults()
    filter_result.section = section_id
    filter_result.deleted_points = deleted_point_id_list
    filter_result.outlier_algo = "BoxplotOutlier"
    filter_result.filtering_algo = "SmoothZigzag"

    result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing",
                                           filter_result)
    ts.insert(result_entry)
    def testPointFilteringZigzag(self):
        classicJumpTrip1 = self.trip_entries[8]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [
            s for s in self.section_entries
            if s.data.trip_id == classicJumpTrip1.get_id()
        ]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df(
                "background/filtered_location",
                esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                  section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" %
                          np.nonzero(jump_algo.inlier_mask_.to_numpy()))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" %
                          np.nonzero(to_delete_mask.to_numpy()))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            if i == 0:
                # this is the zigzag section
                self.assertEqual(
                    np.nonzero(to_delete_mask.to_numpy())[0].tolist(),
                    [25, 64, 114, 115, 116, 117, 118, 119, 120, 123, 126])
                self.assertEqual(delete_ids, [
                    boi.ObjectId('55edafe77d65cb39ee9882ff'),
                    boi.ObjectId('55edcc157d65cb39ee98836e'),
                    boi.ObjectId('55edcc1f7d65cb39ee988400'),
                    boi.ObjectId('55edcc1f7d65cb39ee988403'),
                    boi.ObjectId('55edcc1f7d65cb39ee988406'),
                    boi.ObjectId('55edcc1f7d65cb39ee988409'),
                    boi.ObjectId('55edcc1f7d65cb39ee98840c'),
                    boi.ObjectId('55edcc207d65cb39ee988410'),
                    boi.ObjectId('55edcc207d65cb39ee988412'),
                    boi.ObjectId('55edcc217d65cb39ee98841f'),
                    boi.ObjectId('55edcc217d65cb39ee988429')
                ])
            else:
                self.assertEqual(len(np.nonzero(to_delete_mask.to_numpy())[0]),
                                 0)
                self.assertEqual(len(delete_ids), 0)
Example #7
0
def section_to_geojson(section, tl):
    """
    This is the trickiest part of the visualization.
    The section is basically a collection of points with a line through them.
    So the representation is a feature in which one feature which is the line, and one feature collection which is the set of point features.
    :param section: the section to be converted
    :return: a feature collection which is the geojson version of the section
    """

    ts = esta.TimeSeries.get_time_series(section.user_id)
    entry_it = ts.find_entries(["analysis/recreated_location"],
                               esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section",
                                   section.get_id()))

    # TODO: Decide whether we want to use Rewrite to use dataframes throughout instead of python arrays.
    # dataframes insert nans. We could use fillna to fill with default values, but if we are not actually
    # using dataframe features here, it is unclear how much that would help.
    feature_array = []
    section_location_entries = [ecwe.Entry(entry) for entry in entry_it]
    if len(section_location_entries) != 0:
        logging.debug("first element in section_location_array = %s" % section_location_entries[0])

        if not ecc.compare_rounded_arrays(section.data.end_loc.coordinates,
                                      section_location_entries[-1].data.loc.coordinates,
                                      digits=4):
            logging.info("section_location_array[-1].data.loc %s != section.data.end_loc %s even after df.ts fix, filling gap" % \
                    (section_location_entries[-1].data.loc, section.data.end_loc))
            assert(False)
            last_loc_doc = ts.get_entry_at_ts("background/filtered_location", "data.ts", section.data.end_ts)
            if last_loc_doc is None:
                logging.warning("can't find entry to patch gap, leaving gap")
            else:
                last_loc_entry = ecwe.Entry(last_loc_doc)
                logging.debug("Adding new entry %s to fill the end point gap between %s and %s"
                   % (last_loc_entry.data.loc, section_location_entries[-1].data.loc,
                        section.data.end_loc))
                section_location_entries.append(last_loc_entry)

    points_line_feature = point_array_to_line(section_location_entries)
    points_line_feature.id = str(section.get_id())
    points_line_feature.properties.update(copy.copy(section.data))
    # Update works on dicts, convert back to a section object to make the modes
    # work properly
    points_line_feature.properties = ecwcs.Cleanedsection(points_line_feature.properties)
    points_line_feature.properties["feature_type"] = "section"
    points_line_feature.properties["sensed_mode"] = str(points_line_feature.properties.sensed_mode)

    _del_non_derializable(points_line_feature.properties, ["start_loc", "end_loc"])

    # feature_array.append(gj.FeatureCollection(points_feature_array))
    feature_array.append(points_line_feature)

    return gj.FeatureCollection(feature_array)
Example #8
0
    def testRemoveAllOutliers(self):
        etc.setupRealExample(
            self, "emission/tests/data/real_examples/shankari_2016-06-20")
        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        eaicl.filter_current_sections(self.testUUID)
        # get all sections
        sections = [
            ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY],
                                                        time_query=None)
        ]
        for section in sections:
            filtered_points_entry_doc = self.ts.get_entry_at_ts(
                "analysis/smoothing", "data.section", section.get_id())
            if filtered_points_entry_doc is not None:
                logging.debug("Found smoothing result for section %s" %
                              section.get_id())
                # Setting the set of deleted points to everything
                loc_tq = esda.get_time_query_for_trip_like(
                    esda.RAW_SECTION_KEY, section.get_id())
                loc_df = self.ts.get_data_df("background/filtered_location",
                                             loc_tq)
                filtered_points_entry_doc["data"]["deleted_points"] = loc_df[
                    "_id"].tolist()
                self.ts.update(ecwe.Entry(filtered_points_entry_doc))

        # All we care is that this should not crash.
        eaicr.clean_and_resample(self.testUUID)

        # Most of the trips have zero length, but apparently one has non-zero length
        # because the stop length is non zero!!
        # So there is only one cleaned trip left
        cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY,
                                               time_query=None)
        self.assertEqual(len(cleaned_trips_df), 1)

        # We don't support squishing sections, but we only store stops and sections
        # for non-squished trips. And this non-squished trip happens to have
        # two sections and one stop
        cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY,
                                                  time_query=None)
        self.assertEqual(len(cleaned_sections_df), 2)
        self.assertEqual(cleaned_sections_df.distance.tolist(), [0, 0])

        cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY,
                                               time_query=None)
        self.assertEqual(len(cleaned_stops_df), 1)
        self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
Example #9
0
def get_filtered_points(section, filtered_section_data):
    logging.debug("Getting filtered points for section %s" % section)
    ts = esta.TimeSeries.get_time_series(section.user_id)
    loc_entry_it = ts.find_entries(["background/filtered_location"],
                                   esda.get_time_query_for_trip_like(
                                       esda.RAW_SECTION_KEY, section.get_id()))

    loc_entry_list = [ecwe.Entry(e) for e in loc_entry_it]

    # We know that the assertion fails in the geojson conversion code and we
    # handle it there, so we are just going to comment this out for now.
    # assert (loc_entry_list[-1].data.loc == section.data.end_loc,
    #         "section_location_array[-1].loc != section.end_loc even after df.ts fix",
    #         (loc_entry_list[-1].data.loc, section.data.end_loc))

    # Find the list of points to filter
    filtered_points_entry_doc = ts.get_entry_at_ts("analysis/smoothing",
                                                   "data.section",
                                                   section.get_id())

    if filtered_points_entry_doc is None:
        logging.debug(
            "No filtered_points_entry, filtered_points_list is empty")
        filtered_point_id_list = []
    else:
        # TODO: Figure out how to make collections work for the wrappers and then change this to an Entry
        filtered_points_entry = ad.AttrDict(filtered_points_entry_doc)
        filtered_point_id_list = list(
            filtered_points_entry.data.deleted_points)
        logging.debug("deleting %s points from section points" %
                      len(filtered_point_id_list))

    filtered_loc_list = remove_outliers(loc_entry_list, filtered_point_id_list)

    # filtered_loc_list has removed the outliers. Now, we resample the data at
    # 30 sec intervals
    resampled_loc_df = resample(filtered_loc_list, interval=30)
    # If this is the first section, we need to find the start place of the parent trip
    # and actually start from there. That will fix the distances but not the duration
    # because we haven't yet figured out how to get the correct start time.
    # TODO: Fix this!!
    # For now, we will fudge this in the geojson converter, as always

    with_speeds_df = eaicl.add_dist_heading_speed(resampled_loc_df)
    with_speeds_df["idx"] = np.arange(0, len(with_speeds_df))
    with_speeds_df_nona = with_speeds_df.dropna()
    logging.info("removed %d entries containing n/a" %
                 (len(with_speeds_df_nona) - len(with_speeds_df)))
    return with_speeds_df_nona
Example #10
0
def filter_jumps(user_id, section_id):
    """
    filters out any jumps in the points related to this section and stores a entry that lists the deleted points for
    this trip and this section.
    :param user_id: the user id to filter the trips for
    :param section_id: the section_id to filter the trips for
    :return: none. saves an entry with the filtered points into the database.
    """

    logging.debug("filter_jumps(%s, %s) called" % (user_id, section_id))
    outlier_algo = eaico.BoxplotOutlier()

    tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section_id)
    ts = esta.TimeSeries.get_time_series(user_id)
    section_points_df = ts.get_data_df("background/filtered_location", tq)
    is_ios = section_points_df["filter"].dropna().unique().tolist() == [
        "distance"
    ]
    if is_ios:
        logging.debug("Found iOS section, filling in gaps with fake data")
        section_points_df = _ios_fill_fake_data(section_points_df)
    filtering_algo = eaicj.SmoothZigzag(is_ios, DEFAULT_SAME_POINT_DISTANCE)

    logging.debug("len(section_points_df) = %s" % len(section_points_df))
    points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo,
                                               filtering_algo)
    if points_to_ignore_df is None:
        # There were no points to delete
        return
    points_to_ignore_df_filtered = points_to_ignore_df._id.dropna()
    logging.debug(
        "after filtering ignored points, %s -> %s" %
        (len(points_to_ignore_df), len(points_to_ignore_df_filtered)))
    # We shouldn't really filter any fuzzed points because they represent 100m in 60 secs
    # but let's actually check for that
    # assert len(points_to_ignore_df) == len(points_to_ignore_df_filtered)
    deleted_point_id_list = list(points_to_ignore_df_filtered)
    logging.debug("deleted %s points" % len(deleted_point_id_list))

    filter_result = ecws.Smoothresults()
    filter_result.section = section_id
    filter_result.deleted_points = deleted_point_id_list
    filter_result.outlier_algo = "BoxplotOutlier"
    filter_result.filtering_algo = "SmoothZigzag"

    result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing",
                                           filter_result)
    ts.insert(result_entry)
    def testPointFilteringZigzag(self):
        classicJumpTrip1 = self.trip_entries[8]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [s for s in self.section_entries
                                if s.data.trip_id == classicJumpTrip1.get_id()]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df("background/filtered_location",
                            esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                              section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" % np.nonzero(to_delete_mask))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            if i == 0:
                # this is the zigzag section
                self.assertEqual(np.nonzero(to_delete_mask)[0].tolist(),
                                 [25, 64, 114, 115, 116, 117, 118, 119, 120, 123, 126])
                self.assertEqual(delete_ids,
                                 [boi.ObjectId('55edafe77d65cb39ee9882ff'),
                                  boi.ObjectId('55edcc157d65cb39ee98836e'),
                                  boi.ObjectId('55edcc1f7d65cb39ee988400'),
                                  boi.ObjectId('55edcc1f7d65cb39ee988403'),
                                  boi.ObjectId('55edcc1f7d65cb39ee988406'),
                                  boi.ObjectId('55edcc1f7d65cb39ee988409'),
                                  boi.ObjectId('55edcc1f7d65cb39ee98840c'),
                                  boi.ObjectId('55edcc207d65cb39ee988410'),
                                  boi.ObjectId('55edcc207d65cb39ee988412'),
                                  boi.ObjectId('55edcc217d65cb39ee98841f'),
                                  boi.ObjectId('55edcc217d65cb39ee988429')])
            else:
                self.assertEqual(len(np.nonzero(to_delete_mask)[0]), 0)
                self.assertEqual(len(delete_ids), 0)
def get_filtered_points(section, filtered_section_data):
    logging.debug("Getting filtered points for section %s" % section)
    ts = esta.TimeSeries.get_time_series(section.user_id)
    loc_entry_it = ts.find_entries(["background/filtered_location"],
                                   esda.get_time_query_for_trip_like(
                                       esda.RAW_SECTION_KEY, section.get_id()))

    loc_entry_list = [ecwe.Entry(e) for e in loc_entry_it]

    # We know that the assertion fails in the geojson conversion code and we
    # handle it there, so we are just going to comment this out for now.
    # assert (loc_entry_list[-1].data.loc == section.data.end_loc,
    #         "section_location_array[-1].loc != section.end_loc even after df.ts fix",
    #         (loc_entry_list[-1].data.loc, section.data.end_loc))

    # Find the list of points to filter
    filtered_points_entry_doc = ts.get_entry_at_ts("analysis/smoothing",
                                                   "data.section",
                                                   section.get_id())

    if filtered_points_entry_doc is None:
        logging.debug("No filtered_points_entry, filtered_points_list is empty")
        filtered_point_id_list = []
    else:
        # TODO: Figure out how to make collections work for the wrappers and then change this to an Entry
        filtered_points_entry = ad.AttrDict(filtered_points_entry_doc)
        filtered_point_id_list = list(filtered_points_entry.data.deleted_points)
        logging.debug("deleting %s points from section points" % len(
            filtered_point_id_list))

    filtered_loc_list = remove_outliers(loc_entry_list, filtered_point_id_list)

    # filtered_loc_list has removed the outliers. Now, we resample the data at
    # 30 sec intervals
    resampled_loc_df = resample(filtered_loc_list, interval=30)
    # If this is the first section, we need to find the start place of the parent trip
    # and actually start from there. That will fix the distances but not the duration
    # because we haven't yet figured out how to get the correct start time.
    # TODO: Fix this!!
    # For now, we will fudge this in the geojson converter, as always

    with_speeds_df = eaicl.add_dist_heading_speed(resampled_loc_df)
    with_speeds_df["idx"] = np.arange(0, len(with_speeds_df))
    with_speeds_df_nona = with_speeds_df.dropna()
    logging.info("removed %d entries containing n/a" % 
        (len(with_speeds_df_nona) - len(with_speeds_df)))
    return with_speeds_df_nona
    def testPointFilteringShanghaiJump(self):
        classicJumpTrip1 = self.trip_entries[0]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [
            s for s in self.section_entries
            if s.data.trip_id == classicJumpTrip1.get_id()
        ]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df(
                "background/filtered_location",
                esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                  section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" %
                          np.nonzero(jump_algo.inlier_mask_.to_numpy()))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" %
                          np.nonzero(to_delete_mask.to_numpy()))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            # Automated checks. Might be able to remove logging statements later
            if i != 2:
                # Not the bad section. Should not be filtered
                self.assertEqual(np.count_nonzero(to_delete_mask), 0)
                self.assertEqual(len(delete_ids), 0)
            else:
                # The bad section, should have the third point filtered
                self.assertEqual(np.count_nonzero(to_delete_mask), 1)
                self.assertEqual([str(id) for id in delete_ids],
                                 ["55d8c4837d65cb39ee983cb4"])
def filter_jumps(user_id, section_id):
    """
    filters out any jumps in the points related to this section and stores a entry that lists the deleted points for
    this trip and this section.
    :param user_id: the user id to filter the trips for
    :param section_id: the section_id to filter the trips for
    :return: none. saves an entry with the filtered points into the database.
    """

    logging.debug("filter_jumps(%s, %s) called" % (user_id, section_id))
    outlier_algo = eaico.BoxplotOutlier()

    tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section_id)
    ts = esta.TimeSeries.get_time_series(user_id)
    section_points_df = ts.get_data_df("background/filtered_location", tq)
    is_ios = section_points_df["filter"].dropna().unique().tolist() == ["distance"]
    if is_ios:
        logging.debug("Found iOS section, filling in gaps with fake data")
        section_points_df = _ios_fill_fake_data(section_points_df)
    filtering_algo = eaicj.SmoothZigzag(is_ios, DEFAULT_SAME_POINT_DISTANCE)

    logging.debug("len(section_points_df) = %s" % len(section_points_df))
    points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo, filtering_algo)
    if points_to_ignore_df is None:
        # There were no points to delete
        return
    points_to_ignore_df_filtered = points_to_ignore_df._id.dropna()
    logging.debug("after filtering ignored points, %s -> %s" %
                  (len(points_to_ignore_df), len(points_to_ignore_df_filtered)))
    # We shouldn't really filter any fuzzed points because they represent 100m in 60 secs
    # but let's actually check for that
    # assert len(points_to_ignore_df) == len(points_to_ignore_df_filtered)
    deleted_point_id_list = list(points_to_ignore_df_filtered)
    logging.debug("deleted %s points" % len(deleted_point_id_list))

    filter_result = ecws.Smoothresults()
    filter_result.section = section_id
    filter_result.deleted_points = deleted_point_id_list
    filter_result.outlier_algo = "BoxplotOutlier"
    filter_result.filtering_algo = "SmoothZigzag"

    result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing", filter_result)
    ts.insert(result_entry)
    def testRemoveAllOutliers(self):
        etc.setupRealExample(self, "emission/tests/data/real_examples/shankari_2016-06-20")
        self.ts = esta.TimeSeries.get_time_series(self.testUUID)
        eaist.segment_current_trips(self.testUUID)
        eaiss.segment_current_sections(self.testUUID)
        eaicl.filter_current_sections(self.testUUID)
        # get all sections
        sections = [ecwe.Entry(s) for s in self.ts.find_entries([esda.RAW_SECTION_KEY], time_query=None)]
        for section in sections:
            filtered_points_entry_doc = self.ts.get_entry_at_ts("analysis/smoothing",
                                                           "data.section",
                                                           section.get_id())
            if filtered_points_entry_doc is not None:
                logging.debug("Found smoothing result for section %s" % section.get_id())
                # Setting the set of deleted points to everything
                loc_tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section.get_id())
                loc_df = self.ts.get_data_df("background/filtered_location", loc_tq)
                filtered_points_entry_doc["data"]["deleted_points"] = loc_df["_id"].tolist()
                self.ts.update(ecwe.Entry(filtered_points_entry_doc))

        # All we care is that this should not crash.
        eaicr.clean_and_resample(self.testUUID)

        # Most of the trips have zero length, but apparently one has non-zero length
        # because the stop length is non zero!!
        # So there is only one cleaned trip left
        cleaned_trips_df = self.ts.get_data_df(esda.CLEANED_TRIP_KEY, time_query=None)
        self.assertEqual(len(cleaned_trips_df), 1)

        # We don't support squishing sections, but we only store stops and sections
        # for non-squished trips. And this non-squished trip happens to have
        # two sections and one stop
        cleaned_sections_df = self.ts.get_data_df(esda.CLEANED_SECTION_KEY, time_query=None)
        self.assertEqual(len(cleaned_sections_df), 2)
        self.assertEqual(cleaned_sections_df.distance.tolist(), [0,0])

        cleaned_stops_df = self.ts.get_data_df(esda.CLEANED_STOP_KEY, time_query=None)
        self.assertEqual(len(cleaned_stops_df), 1)
        self.assertAlmostEqual(cleaned_stops_df.distance[0], 3252, places=0)
    def testPointFilteringShanghaiJump(self):
        classicJumpTrip1 = self.trip_entries[0]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [s for s in self.section_entries
                                if s.data.trip_id == classicJumpTrip1.get_id()]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag()

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df("background/filtered_location",
                            esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                              section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" % np.nonzero(to_delete_mask))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            # Automated checks. Might be able to remove logging statements later
            if i != 2:
                # Not the bad section. Should not be filtered
                self.assertEqual(np.count_nonzero(to_delete_mask), 0)
                self.assertEqual(len(delete_ids), 0)
            else:
                # The bad section, should have the third point filtered
                self.assertEqual(np.count_nonzero(to_delete_mask), 1)
                self.assertEqual([str(id) for id in delete_ids], ["55d8c4837d65cb39ee983cb4"])
Example #17
0
def segment_trip_into_sections(user_id, trip_entry, trip_source):
    ts = esta.TimeSeries.get_time_series(user_id)
    time_query = esda.get_time_query_for_trip_like(esda.RAW_TRIP_KEY,
                                                   trip_entry.get_id())
    distance_from_place = _get_distance_from_start_place_to_end(trip_entry)

    if (trip_source == "DwellSegmentationTimeFilter"):
        import emission.analysis.intake.segmentation.section_segmentation_methods.smoothed_high_confidence_motion as shcm
        shcmsm = shcm.SmoothedHighConfidenceMotion(60, 100, [
            ecwm.MotionTypes.TILTING, ecwm.MotionTypes.UNKNOWN,
            ecwm.MotionTypes.STILL
        ])
    else:
        assert (trip_source == "DwellSegmentationDistFilter")
        import emission.analysis.intake.segmentation.section_segmentation_methods.smoothed_high_confidence_with_visit_transitions as shcmvt
        shcmsm = shcmvt.SmoothedHighConfidenceMotionWithVisitTransitions(
            49,
            50,
            [
                ecwm.MotionTypes.TILTING,
                ecwm.MotionTypes.UNKNOWN,
                ecwm.MotionTypes.STILL,
                ecwm.MotionTypes.NONE,  # iOS only
                ecwm.MotionTypes.STOPPED_WHILE_IN_VEHICLE
            ])  # iOS only

    segmentation_points = shcmsm.segment_into_sections(ts, distance_from_place,
                                                       time_query)

    # Since we are segmenting an existing trip into sections, we do not need to worry about linking with
    # a prior place, since it will be linked through the trip object.
    # So this is much simpler than the trip case.
    # Again, since this is segmenting a trip, we can just start with a section

    prev_section_entry = None

    # TODO: Should we link the locations to the trips this way, or by using a foreign key?
    # If we want to use a foreign key, then we need to include the object id in the data df as well so that we can
    # set it properly.
    ts = esta.TimeSeries.get_time_series(user_id)

    get_loc_for_ts = lambda time: ecwl.Location(
        ts.get_entry_at_ts("background/filtered_location", "data.ts", time)[
            "data"])
    trip_start_loc = get_loc_for_ts(trip_entry.data.start_ts)
    trip_end_loc = get_loc_for_ts(trip_entry.data.end_ts)
    logging.debug("trip_start_loc = %s, trip_end_loc = %s" %
                  (trip_start_loc, trip_end_loc))

    for (i, (start_loc_doc, end_loc_doc,
             sensed_mode)) in enumerate(segmentation_points):
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" %
                      (start_loc_doc, end_loc_doc))
        get_loc_for_row = lambda row: ts.df_row_to_entry(
            "background/filtered_location", row).data
        start_loc = get_loc_for_row(start_loc_doc)
        end_loc = get_loc_for_row(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        section = ecwc.Section()
        section.trip_id = trip_entry.get_id()
        if prev_section_entry is None:
            # This is the first point, so we want to start from the start of the trip, not the start of this segment
            start_loc = trip_start_loc
        if i == len(segmentation_points) - 1:
            # This is the last point, so we want to end at the end of the trip, not at the end of this segment
            # Particularly in this case, if we don't do this, then the trip end may overshoot the section end
            end_loc = trip_end_loc

        fill_section(section, start_loc, end_loc, sensed_mode)
        # We create the entry after filling in the section so that we know
        # that the data is included properly
        section_entry = ecwe.Entry.create_entry(user_id,
                                                esda.RAW_SECTION_KEY,
                                                section,
                                                create_id=True)

        if prev_section_entry is not None:
            # If this is not the first section, create a stop to link the two sections together
            # The expectation is prev_section -> stop -> curr_section
            stop = ecws.Stop()
            stop.trip_id = trip_entry.get_id()
            stop_entry = ecwe.Entry.create_entry(user_id,
                                                 esda.RAW_STOP_KEY,
                                                 stop,
                                                 create_id=True)
            logging.debug("stop = %s, stop_entry = %s" % (stop, stop_entry))
            stitch_together(prev_section_entry, stop_entry, section_entry)
            ts.insert(stop_entry)
            ts.update(prev_section_entry)

        # After we go through the loop, we will be left with the last section,
        # which does not have an ending stop. We insert that too.
        ts.insert(section_entry)
        prev_section_entry = section_entry
def segment_trip_into_sections(user_id, trip_id, trip_source):
    ts = esta.TimeSeries.get_time_series(user_id)
    trip = esda.get_object(esda.RAW_TRIP_KEY, trip_id)
    time_query = esda.get_time_query_for_trip_like(esda.RAW_TRIP_KEY, trip_id)

    if (trip_source == "DwellSegmentationTimeFilter"):
        import emission.analysis.intake.segmentation.section_segmentation_methods.smoothed_high_confidence_motion as shcm
        shcmsm = shcm.SmoothedHighConfidenceMotion(60, [ecwm.MotionTypes.TILTING,
                                                        ecwm.MotionTypes.UNKNOWN,
                                                        ecwm.MotionTypes.STILL])
    else:
        assert(trip_source == "DwellSegmentationDistFilter")
        import emission.analysis.intake.segmentation.section_segmentation_methods.smoothed_high_confidence_with_visit_transitions as shcmvt
        shcmsm = shcmvt.SmoothedHighConfidenceMotionWithVisitTransitions(
                                                        49, [ecwm.MotionTypes.TILTING,
                                                        ecwm.MotionTypes.UNKNOWN,
                                                        ecwm.MotionTypes.STILL,
                                                        ecwm.MotionTypes.NONE, # iOS only
                                                        ecwm.MotionTypes.STOPPED_WHILE_IN_VEHICLE]) # iOS only
        
    segmentation_points = shcmsm.segment_into_sections(ts, time_query)

    # Since we are segmenting an existing trip into sections, we do not need to worry about linking with
    # a prior place, since it will be linked through the trip object.
    # So this is much simpler than the trip case.
    # Again, since this is segmenting a trip, we can just start with a section

    prev_section_entry = None

    # TODO: Should we link the locations to the trips this way, or by using a foreign key?
    # If we want to use a foreign key, then we need to include the object id in the data df as well so that we can
    # set it properly.
    ts = esta.TimeSeries.get_time_series(user_id)

    get_loc_for_ts = lambda time: ecwl.Location(ts.get_entry_at_ts("background/filtered_location", "data.ts", time)["data"])
    trip_start_loc = get_loc_for_ts(trip.start_ts)
    trip_end_loc = get_loc_for_ts(trip.end_ts)
    logging.debug("trip_start_loc = %s, trip_end_loc = %s" % (trip_start_loc, trip_end_loc))

    for (i, (start_loc_doc, end_loc_doc, sensed_mode)) in enumerate(segmentation_points):
        logging.debug("start_loc_doc = %s, end_loc_doc = %s" % (start_loc_doc, end_loc_doc))
        get_loc_for_row = lambda row: ts.df_row_to_entry("background/filtered_location", row).data
        start_loc = get_loc_for_row(start_loc_doc)
        end_loc = get_loc_for_row(end_loc_doc)
        logging.debug("start_loc = %s, end_loc = %s" % (start_loc, end_loc))

        section = ecwc.Section()
        section.trip_id = trip_id
        if prev_section_entry is None:
            # This is the first point, so we want to start from the start of the trip, not the start of this segment
            start_loc = trip_start_loc
        if i == len(segmentation_points) - 1:
            # This is the last point, so we want to end at the end of the trip, not at the end of this segment
            # Particularly in this case, if we don't do this, then the trip end may overshoot the section end
            end_loc = trip_end_loc

        fill_section(section, start_loc, end_loc, sensed_mode)
        # We create the entry after filling in the section so that we know
        # that the data is included properly
        section_entry = ecwe.Entry.create_entry(user_id, esda.RAW_SECTION_KEY,
                                                section, create_id=True)

        if prev_section_entry is not None:
            # If this is not the first section, create a stop to link the two sections together
            # The expectation is prev_section -> stop -> curr_section
            stop = ecws.Stop()
            stop.trip_id = trip_id
            stop_entry = ecwe.Entry.create_entry(user_id,
                                                    esda.RAW_STOP_KEY,
                                                    stop, create_id=True)
            logging.debug("stop = %s, stop_entry = %s" % (stop, stop_entry))
            stitch_together(prev_section_entry, stop_entry, section_entry)
            ts.insert(stop_entry)
            ts.update(prev_section_entry)

        # After we go through the loop, we will be left with the last section,
        # which does not have an ending stop. We insert that too.
        ts.insert(section_entry)
        prev_section_entry = section_entry
def section_to_geojson(section, tl):
    """
    This is the trickiest part of the visualization.
    The section is basically a collection of points with a line through them.
    So the representation is a feature in which one feature which is the line, and one feature collection which is the set of point features.
    :param section: the section to be converted
    :return: a feature collection which is the geojson version of the section
    """

    ts = esta.TimeSeries.get_time_series(section.user_id)
    entry_it = ts.find_entries(["analysis/recreated_location"],
                               esda.get_time_query_for_trip_like(
                                   "analysis/cleaned_section",
                                   section.get_id()))

    # TODO: Decide whether we want to use Rewrite to use dataframes throughout instead of python arrays.
    # dataframes insert nans. We could use fillna to fill with default values, but if we are not actually
    # using dataframe features here, it is unclear how much that would help.
    feature_array = []
    section_location_entries = [ecwe.Entry(entry) for entry in entry_it]
    if len(section_location_entries) != 0:
        logging.debug("first element in section_location_array = %s" %
                      section_location_entries[0])

        # Fudge the end point so that we don't have a gap because of the ts != write_ts mismatch
        # TODO: Fix this once we are able to query by the data timestamp instead of the metadata ts

        assert section_location_entries[-1].data.loc == section.data.end_loc, \
                "section_location_array[-1].data.loc %s != section.data.end_loc %s even after df.ts fix" % \
                    (section_location_entries[-1].data.loc, section.data.end_loc)


#             last_loc_doc = ts.get_entry_at_ts("background/filtered_location", "data.ts", section.end_ts)
#             last_loc_data = ecwe.Entry(last_loc_doc).data
#             last_loc_data["_id"] = last_loc_doc["_id"]
#             section_location_array.append(last_loc_data)
#             logging.debug("Adding new entry %s to fill the end point gap between %s and %s"
#                 % (last_loc_data.loc, section_location_array[-2].loc, section.end_loc))

# points_feature_array = [location_to_geojson(l) for l in filtered_section_location_array]

    points_line_feature = point_array_to_line(section_location_entries)
    # If this is the first section, we already start from the trip start. But we actually need to start from the
    # prior place. Fudge this too. Note also that we may want to figure out how to handle this properly in the model
    # without needing fudging. TODO: Unclear how exactly to do this
    if section.data.start_stop is None:
        # This is the first section. So we need to find the start place of the parent trip
        parent_trip = tl.get_object(section.data.trip_id)
        start_place_of_parent_trip = tl.get_object(
            parent_trip.data.start_place)
        points_line_feature.geometry.coordinates.insert(
            0, start_place_of_parent_trip.data.location.coordinates)

    points_line_feature.id = str(section.get_id())
    points_line_feature.properties = copy.copy(section.data)
    points_line_feature.properties["feature_type"] = "section"
    points_line_feature.properties["sensed_mode"] = str(
        points_line_feature.properties.sensed_mode)

    _del_non_derializable(points_line_feature.properties,
                          ["start_loc", "end_loc"])

    # feature_array.append(gj.FeatureCollection(points_feature_array))
    feature_array.append(points_line_feature)

    return gj.FeatureCollection(feature_array)
 def testGetTimeRangeForTrip(self):
     new_trip = self.create_fake_trip()
     ret_tq = esda.get_time_query_for_trip_like(esda.RAW_TRIP_KEY, new_trip.get_id())
     self.assertEqual(ret_tq.timeType, "data.ts")
     self.assertEqual(ret_tq.startTs, 5)
     self.assertEqual(ret_tq.endTs, 6)