Ejemplos de BoxplotOutlier en Python, ejemplos de emission.analysis.intake.cleaning.cleaning_methods.speed_outlier_detection.BoxplotOutlier en Python

Ejemplo n.º 1

0

Mostrar archivo

    def testPointFilteringRichmondJump(self):
        classicJumpTrip1 = self.trip_entries[6]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [s for s in self.section_entries
                                if s.data.trip_id == classicJumpTrip1.get_id()]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df("background/filtered_location",
                            esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                              section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" % np.nonzero(jump_algo.inlier_mask_))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" % np.nonzero(to_delete_mask))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            # There is only one section
            self.assertEqual(i, 0)
            # The bad section, should have the third point filtered
            self.assertEqual(np.count_nonzero(to_delete_mask), 1)
            self.assertEqual([str(id) for id in delete_ids], ["55e86dbb7d65cb39ee987e09"])

Ejemplo n.º 2

0

Mostrar archivo

Archivo: location_smoothing.py Proyecto: joshzarrabi/e-mission-server

def filter_jumps(user_id, section_id):
    """
    filters out any jumps in the points related to this section and stores a entry that lists the deleted points for
    this trip and this section.
    :param user_id: the user id to filter the trips for
    :param section_id: the section_id to filter the trips for
    :return: none. saves an entry with the filtered points into the database.
    """

    logging.debug("filter_jumps(%s, %s) called" % (user_id, section_id))
    outlier_algo = eaico.BoxplotOutlier()
    filtering_algo = eaicj.SmoothZigzag()

    tq = esds.get_time_query_for_section(section_id)
    ts = esta.TimeSeries.get_time_series(user_id)
    section_points_df = ts.get_data_df("background/filtered_location", tq)
    logging.debug("len(section_points_df) = %s" % len(section_points_df))
    points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo,
                                               filtering_algo)
    if points_to_ignore_df is None:
        # There were no points to delete
        return
    deleted_point_id_list = list(points_to_ignore_df._id)
    logging.debug("deleted %s points" % len(deleted_point_id_list))

    filter_result = ecws.Smoothresults()
    filter_result.section = section_id
    filter_result.deleted_points = deleted_point_id_list
    filter_result.outlier_algo = "BoxplotOutlier"
    filter_result.filtering_algo = "SmoothZigzag"

    result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing",
                                           filter_result)
    ts.insert(result_entry)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: TestLocationSmoothing.py Proyecto: fabmob/tracemob-server

    def testPointFilteringZigzag(self):
        classicJumpTrip1 = self.trip_entries[8]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [
            s for s in self.section_entries
            if s.data.trip_id == classicJumpTrip1.get_id()
        ]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df(
                "background/filtered_location",
                esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                  section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" %
                          np.nonzero(jump_algo.inlier_mask_.to_numpy()))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" %
                          np.nonzero(to_delete_mask.to_numpy()))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            if i == 0:
                # this is the zigzag section
                self.assertEqual(
                    np.nonzero(to_delete_mask.to_numpy())[0].tolist(),
                    [25, 64, 114, 115, 116, 117, 118, 119, 120, 123, 126])
                self.assertEqual(delete_ids, [
                    boi.ObjectId('55edafe77d65cb39ee9882ff'),
                    boi.ObjectId('55edcc157d65cb39ee98836e'),
                    boi.ObjectId('55edcc1f7d65cb39ee988400'),
                    boi.ObjectId('55edcc1f7d65cb39ee988403'),
                    boi.ObjectId('55edcc1f7d65cb39ee988406'),
                    boi.ObjectId('55edcc1f7d65cb39ee988409'),
                    boi.ObjectId('55edcc1f7d65cb39ee98840c'),
                    boi.ObjectId('55edcc207d65cb39ee988410'),
                    boi.ObjectId('55edcc207d65cb39ee988412'),
                    boi.ObjectId('55edcc217d65cb39ee98841f'),
                    boi.ObjectId('55edcc217d65cb39ee988429')
                ])
            else:
                self.assertEqual(len(np.nonzero(to_delete_mask.to_numpy())[0]),
                                 0)
                self.assertEqual(len(delete_ids), 0)

Ejemplo n.º 4

0

Mostrar archivo

    def filter(self, with_speeds_df):
        self.inlier_mask_ = pd.Series([True] * with_speeds_df.shape[0])
        self.with_speeds_df = with_speeds_df
        self.find_segments()
        logging.debug("After splitting, segment list is %s with size %s" %
                      (self.segment_list, len(self.segment_list)))
        if len(self.segment_list) == 1:
            # there were no jumps, so there's nothing to do
            logging.info("No jumps, nothing to filter")
            return
        start_segment_idx = self.find_start_segment(self.segment_list)
        self.segment_list[start_segment_idx].state = Segment.State.GOOD
        self.mark_segment_states(start_segment_idx,
                                 SmoothZigzag.Direction.RIGHT)
        self.mark_segment_states(start_segment_idx,
                                 SmoothZigzag.Direction.LEFT)
        unknown_segments = [
            segment for segment in self.segment_list
            if segment.state == Segment.State.UNKNOWN
        ]
        logging.debug("unknown_segments = %s" % unknown_segments)
        assert len(
            unknown_segments
        ) == 0, "Found %s unknown segments - early termination of loop?" % len(
            unknown_segments)
        bad_segments = [
            segment for segment in self.segment_list
            if segment.state == Segment.State.BAD
        ]
        logging.debug("bad_segments = %s" % bad_segments)
        for segment in bad_segments:
            self.inlier_mask_[segment.start:segment.end] = False

        logging.debug("after setting values, outlier_mask = %s" %
                      np.nonzero(self.inlier_mask_ == False))
        # logging.debug("point details are %s" % with_speeds_df[np.logical_not(self.inlier_mask_)])

        # TODO: This is not the right place for this - adds too many dependencies
        # Should do this in the outer class in general so that we can do
        # multiple passes of any filtering algorithm
        import emission.analysis.intake.cleaning.cleaning_methods.speed_outlier_detection as cso
        import emission.analysis.intake.cleaning.location_smoothing as ls

        recomputed_speeds_df = ls.recalc_speed(
            self.with_speeds_df[self.inlier_mask_])
        recomputed_threshold = cso.BoxplotOutlier(
            ignore_zeros=True).get_threshold(recomputed_speeds_df)
        # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]
        if recomputed_speeds_df[recomputed_speeds_df.speed >
                                recomputed_threshold].shape[0] != 0:
            logging.info(
                "After first round, still have outliers %s" %
                recomputed_speeds_df[
                    recomputed_speeds_df.speed > recomputed_threshold])

Ejemplo n.º 5

0

Mostrar archivo

def filter_jumps(user_id, section_id):
    """
    filters out any jumps in the points related to this section and stores a entry that lists the deleted points for
    this trip and this section.
    :param user_id: the user id to filter the trips for
    :param section_id: the section_id to filter the trips for
    :return: none. saves an entry with the filtered points into the database.
    """

    logging.debug("filter_jumps(%s, %s) called" % (user_id, section_id))
    outlier_algo = eaico.BoxplotOutlier()

    tq = esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY, section_id)
    ts = esta.TimeSeries.get_time_series(user_id)
    section_points_df = ts.get_data_df("background/filtered_location", tq)
    is_ios = section_points_df["filter"].dropna().unique().tolist() == [
        "distance"
    ]
    if is_ios:
        logging.debug("Found iOS section, filling in gaps with fake data")
        section_points_df = _ios_fill_fake_data(section_points_df)
    filtering_algo = eaicj.SmoothZigzag(is_ios, DEFAULT_SAME_POINT_DISTANCE)

    logging.debug("len(section_points_df) = %s" % len(section_points_df))
    points_to_ignore_df = get_points_to_filter(section_points_df, outlier_algo,
                                               filtering_algo)
    if points_to_ignore_df is None:
        # There were no points to delete
        return
    points_to_ignore_df_filtered = points_to_ignore_df._id.dropna()
    logging.debug(
        "after filtering ignored points, %s -> %s" %
        (len(points_to_ignore_df), len(points_to_ignore_df_filtered)))
    # We shouldn't really filter any fuzzed points because they represent 100m in 60 secs
    # but let's actually check for that
    # assert len(points_to_ignore_df) == len(points_to_ignore_df_filtered)
    deleted_point_id_list = list(points_to_ignore_df_filtered)
    logging.debug("deleted %s points" % len(deleted_point_id_list))

    filter_result = ecws.Smoothresults()
    filter_result.section = section_id
    filter_result.deleted_points = deleted_point_id_list
    filter_result.outlier_algo = "BoxplotOutlier"
    filter_result.filtering_algo = "SmoothZigzag"

    result_entry = ecwe.Entry.create_entry(user_id, "analysis/smoothing",
                                           filter_result)
    ts.insert(result_entry)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: TestLocationSmoothing.py Proyecto: fabmob/tracemob-server

    def testPointFilteringShanghaiJump(self):
        classicJumpTrip1 = self.trip_entries[0]
        self.loadPointsForTrip(classicJumpTrip1.get_id())
        classicJumpSections1 = [
            s for s in self.section_entries
            if s.data.trip_id == classicJumpTrip1.get_id()
        ]
        outlier_algo = eaics.BoxplotOutlier()
        jump_algo = eaicj.SmoothZigzag(False, 100)

        for i, section_entry in enumerate(classicJumpSections1):
            logging.debug("-" * 20 + "Considering section %s" % i + "-" * 20)

            section_df = self.ts.get_data_df(
                "background/filtered_location",
                esda.get_time_query_for_trip_like(esda.RAW_SECTION_KEY,
                                                  section_entry.get_id()))
            with_speeds_df = eaicl.add_dist_heading_speed(section_df)

            maxSpeed = outlier_algo.get_threshold(with_speeds_df)
            logging.debug("Max speed for section %s = %s" % (i, maxSpeed))

            jump_algo.filter(with_speeds_df)
            logging.debug("Retaining points %s" %
                          np.nonzero(jump_algo.inlier_mask_.to_numpy()))

            to_delete_mask = np.logical_not(jump_algo.inlier_mask_)
            logging.debug("Deleting points %s" %
                          np.nonzero(to_delete_mask.to_numpy()))

            delete_ids = list(with_speeds_df[to_delete_mask]._id)
            logging.debug("Deleting ids %s" % delete_ids)

            # Automated checks. Might be able to remove logging statements later
            if i != 2:
                # Not the bad section. Should not be filtered
                self.assertEqual(np.count_nonzero(to_delete_mask), 0)
                self.assertEqual(len(delete_ids), 0)
            else:
                # The bad section, should have the third point filtered
                self.assertEqual(np.count_nonzero(to_delete_mask), 1)
                self.assertEqual([str(id) for id in delete_ids],
                                 ["55d8c4837d65cb39ee983cb4"])