Example #1
0
    def testPointFilteringRichmondJump(self):
        classicJumpTrip1 = self.trip_entries[6]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [s for s in self.section_entries
                                if s.data.trip_id == classicJumpTrip1.get_id()]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df("background/filtered_location",
                            esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                              section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" % np.nonzero(to_delete_mask))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            # There is only one section
            self.assertEqual(i, 0)
            # The bad section, should have the third point filtered
            self.assertEqual(np.count_nonzero(to_delete_mask), 1)
            self.assertEqual([str(id) for id in delete_ids], ["55e86dbb7d65cb39ee987e09"])
def filter_jumps(user_id, section_id):
    """
    filters out any jumps in the points related to this section and stores a entry that lists the deleted points for
    this trip and this section.
    :param user_id: the user id to filter the trips for
    :param section_id: the section_id to filter the trips for
    :return: none. saves an entry with the filtered points into the database.
    """

    logging.debug("filter_jumps(%s, %s) called" % (user_id, section_id))
    outlier_algo = eaico.BoxplotOutlier()
    filtering_algo = eaicj.SmoothZigzag()

    tq = esds.get_time_query_for_section(section_id)
    ts = esta.TimeSeries.get_time_series(user_id)
    section_points_df = ts.get_data_df("background/filtered_location", tq)
    logging.debug("len(section_points_df) = %s" % len(section_points_df))
    points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo,
                                               filtering_algo)
    if points_to_ignore_df is None:
        # There were no points to delete
        return
    deleted_point_id_list = list(points_to_ignore_df._id)
    logging.debug("deleted %s points" % len(deleted_point_id_list))

    filter_result = ecws.Smoothresults()
    filter_result.section = section_id
    filter_result.deleted_points = deleted_point_id_list
    filter_result.outlier_algo = "BoxplotOutlier"
    filter_result.filtering_algo = "SmoothZigzag"

    result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing",
                                           filter_result)
    ts.insert(result_entry)
    def testPointFilteringZigzag(self):
        classicJumpTrip1 = self.trip_entries[8]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [
            s for s in self.section_entries
            if s.data.trip_id == classicJumpTrip1.get_id()
        ]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df(
                "background/filtered_location",
                esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                  section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" %
                          np.nonzero(jump_algo.inlier_mask_.to_numpy()))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" %
                          np.nonzero(to_delete_mask.to_numpy()))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            if i == 0:
                # this is the zigzag section
                self.assertEqual(
                    np.nonzero(to_delete_mask.to_numpy())[0].tolist(),
                    [25, 64, 114, 115, 116, 117, 118, 119, 120, 123, 126])
                self.assertEqual(delete_ids, [
                    boi.ObjectId('55edafe77d65cb39ee9882ff'),
                    boi.ObjectId('55edcc157d65cb39ee98836e'),
                    boi.ObjectId('55edcc1f7d65cb39ee988400'),
                    boi.ObjectId('55edcc1f7d65cb39ee988403'),
                    boi.ObjectId('55edcc1f7d65cb39ee988406'),
                    boi.ObjectId('55edcc1f7d65cb39ee988409'),
                    boi.ObjectId('55edcc1f7d65cb39ee98840c'),
                    boi.ObjectId('55edcc207d65cb39ee988410'),
                    boi.ObjectId('55edcc207d65cb39ee988412'),
                    boi.ObjectId('55edcc217d65cb39ee98841f'),
                    boi.ObjectId('55edcc217d65cb39ee988429')
                ])
            else:
                self.assertEqual(len(np.nonzero(to_delete_mask.to_numpy())[0]),
                                 0)
                self.assertEqual(len(delete_ids), 0)
Example #4
0
def filter_jumps(user_id, section_id):
    """
    filters out any jumps in the points related to this section and stores a entry that lists the deleted points for
    this trip and this section.
    :param user_id: the user id to filter the trips for
    :param section_id: the section_id to filter the trips for
    :return: none. saves an entry with the filtered points into the database.
    """

    logging.debug("filter_jumps(%s, %s) called" % (user_id, section_id))
    outlier_algo = eaico.BoxplotOutlier()

    tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section_id)
    ts = esta.TimeSeries.get_time_series(user_id)
    section_points_df = ts.get_data_df("background/filtered_location", tq)
    is_ios = section_points_df["filter"].dropna().unique().tolist() == [
        "distance"
    ]
    if is_ios:
        logging.debug("Found iOS section, filling in gaps with fake data")
        section_points_df = _ios_fill_fake_data(section_points_df)
    filtering_algo = eaicj.SmoothZigzag(is_ios, DEFAULT_SAME_POINT_DISTANCE)

    logging.debug("len(section_points_df) = %s" % len(section_points_df))
    points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo,
                                               filtering_algo)
    if points_to_ignore_df is None:
        # There were no points to delete
        return
    points_to_ignore_df_filtered = points_to_ignore_df._id.dropna()
    logging.debug(
        "after filtering ignored points, %s -> %s" %
        (len(points_to_ignore_df), len(points_to_ignore_df_filtered)))
    # We shouldn't really filter any fuzzed points because they represent 100m in 60 secs
    # but let's actually check for that
    # assert len(points_to_ignore_df) == len(points_to_ignore_df_filtered)
    deleted_point_id_list = list(points_to_ignore_df_filtered)
    logging.debug("deleted %s points" % len(deleted_point_id_list))

    filter_result = ecws.Smoothresults()
    filter_result.section = section_id
    filter_result.deleted_points = deleted_point_id_list
    filter_result.outlier_algo = "BoxplotOutlier"
    filter_result.filtering_algo = "SmoothZigzag"

    result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing",
                                           filter_result)
    ts.insert(result_entry)
    def testPointFilteringShanghaiJump(self):
        classicJumpTrip1 = self.trip_entries[0]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [
            s for s in self.section_entries
            if s.data.trip_id == classicJumpTrip1.get_id()
        ]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df(
                "background/filtered_location",
                esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                  section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" %
                          np.nonzero(jump_algo.inlier_mask_.to_numpy()))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" %
                          np.nonzero(to_delete_mask.to_numpy()))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            # Automated checks. Might be able to remove logging statements later
            if i != 2:
                # Not the bad section. Should not be filtered
                self.assertEqual(np.count_nonzero(to_delete_mask), 0)
                self.assertEqual(len(delete_ids), 0)
            else:
                # The bad section, should have the third point filtered
                self.assertEqual(np.count_nonzero(to_delete_mask), 1)
                self.assertEqual([str(id) for id in delete_ids],
                                 ["55d8c4837d65cb39ee983cb4"])