def split_segment(self, i, curr_seg, direction):
        import emission.analysis.intake.cleaning.location_smoothing as ls

        if direction == SmoothZigzag.Direction.RIGHT:
            recomputed_speed_df = ls.recalc_speed(curr_seg.segment_df)
            # Find the first point that does not belong to the cluster
            new_split_point = recomputed_speed_df[recomputed_speed_df.distance > Segment.CLUSTER_RADIUS].index[0]
            new_seg = Segment(new_split_point, curr_seg.end, self)
            replace_seg = Segment(curr_seg.start, new_split_point, self)
            self.segment_list[i] = replace_seg
            self.segment_list.insert(i+1, new_seg)
            return replace_seg

        if direction == SmoothZigzag.Direction.LEFT:
            # Need to compute speeds and distances from the left edge
            recomputed_speed_df = ls.recalc_speed(curr_seg.segment_df.iloc[::-1])
            logging.debug("Recomputed_speed_df = %s", recomputed_speed_df.speed)
            # Find the first point that does not belong to the cluster
            new_split_point = recomputed_speed_df[recomputed_speed_df.distance > Segment.CLUSTER_RADIUS].index[0]
            logging.debug("new split point = %s", new_split_point)
            new_seg = Segment(curr_seg.start, new_split_point + 1, self)
            replace_seg = Segment(new_split_point + 1, curr_seg.end, self)
            self.segment_list[i] = replace_seg
            self.segment_list.insert(i, new_seg)
            return replace_seg
Example #2
0
    def split_segment(self, i, curr_seg, direction):
        import emission.analysis.intake.cleaning.location_smoothing as ls

        if direction == SmoothZigzag.Direction.RIGHT:
            recomputed_speed_df = ls.recalc_speed(curr_seg.segment_df)
            # Find the first point that does not belong to the cluster
            new_split_point = recomputed_speed_df[
                recomputed_speed_df.distance > Segment.CLUSTER_RADIUS].index[0]
            new_seg = Segment(new_split_point, curr_seg.end, self)
            replace_seg = Segment(curr_seg.start, new_split_point, self)
            self.segment_list[i] = replace_seg
            self.segment_list.insert(i + 1, new_seg)
            return replace_seg

        if direction == SmoothZigzag.Direction.LEFT:
            # Need to compute speeds and distances from the left edge
            recomputed_speed_df = ls.recalc_speed(
                curr_seg.segment_df.iloc[::-1])
            logging.debug("Recomputed_speed_df = %s",
                          recomputed_speed_df.speed)
            # Find the first point that does not belong to the cluster
            new_split_point = recomputed_speed_df[
                recomputed_speed_df.distance > Segment.CLUSTER_RADIUS].index[0]
            logging.debug("new split point = %s", new_split_point)
            new_seg = Segment(curr_seg.start, new_split_point + 1, self)
            replace_seg = Segment(new_split_point + 1, curr_seg.end, self)
            self.segment_list[i] = replace_seg
            self.segment_list.insert(i, new_seg)
            return replace_seg
Example #3
0
    def filter(self, with_speeds_df):
        self.inlier_mask_ = pd.Series([True] * with_speeds_df.shape[0])
        self.with_speeds_df = with_speeds_df
        self.find_segments()
        logging.debug("After splitting, segment list is %s with size %s" %
                      (self.segment_list, len(self.segment_list)))
        if len(self.segment_list) == 1:
            # there were no jumps, so there's nothing to do
            logging.info("No jumps, nothing to filter")
            return
        start_segment_idx = self.find_start_segment(self.segment_list)
        self.segment_list[start_segment_idx].state = Segment.State.GOOD
        self.mark_segment_states(start_segment_idx,
                                 SmoothZigzag.Direction.RIGHT)
        self.mark_segment_states(start_segment_idx,
                                 SmoothZigzag.Direction.LEFT)
        unknown_segments = [
            segment for segment in self.segment_list
            if segment.state == Segment.State.UNKNOWN
        ]
        logging.debug("unknown_segments = %s" % unknown_segments)
        assert len(
            unknown_segments
        ) == 0, "Found %s unknown segments - early termination of loop?" % len(
            unknown_segments)
        bad_segments = [
            segment for segment in self.segment_list
            if segment.state == Segment.State.BAD
        ]
        logging.debug("bad_segments = %s" % bad_segments)
        for segment in bad_segments:
            self.inlier_mask_[segment.start:segment.end] = False

        logging.debug("after setting values, outlier_mask = %s" %
                      np.nonzero(self.inlier_mask_ == False))
        # logging.debug("point details are %s" % with_speeds_df[np.logical_not(self.inlier_mask_)])

        # TODO: This is not the right place for this - adds too many dependencies
        # Should do this in the outer class in general so that we can do
        # multiple passes of any filtering algorithm
        import emission.analysis.intake.cleaning.cleaning_methods.speed_outlier_detection as cso
        import emission.analysis.intake.cleaning.location_smoothing as ls

        recomputed_speeds_df = ls.recalc_speed(
            self.with_speeds_df[self.inlier_mask_])
        recomputed_threshold = cso.BoxplotOutlier(
            ignore_zeros=True).get_threshold(recomputed_speeds_df)
        # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold]
        if recomputed_speeds_df[recomputed_speeds_df.speed >
                                recomputed_threshold].shape[0] != 0:
            logging.info(
                "After first round, still have outliers %s" %
                recomputed_speeds_df[
                    recomputed_speeds_df.speed > recomputed_threshold])
    def filter(self, with_speeds_df):
        self.inlier_mask_ = pd.Series([True] * with_speeds_df.shape[0])
        self.with_speeds_df = with_speeds_df
        self.find_segments()
        logging.debug("After splitting, segment list is %s with size %s" % 
                (self.segment_list, len(self.segment_list)))
        if len(self.segment_list) == 1:
            # there were no jumps, so there's nothing to do
            logging.info("No jumps, nothing to filter")
            return
        start_segment_idx = self.find_start_segment(self.segment_list)
        self.segment_list[start_segment_idx].state = Segment.State.GOOD
        self.mark_segment_states(start_segment_idx, SmoothZigzag.Direction.RIGHT)
        self.mark_segment_states(start_segment_idx, SmoothZigzag.Direction.LEFT)
        unknown_segments = [segment for segment in self.segment_list if segment.state == Segment.State.UNKNOWN]
        logging.debug("unknown_segments = %s" % unknown_segments)
        assert len(unknown_segments) == 0, "Found %s unknown segments - early termination of loop?" % len(unknown_segments)
        bad_segments = [segment for segment in self.segment_list if segment.state == Segment.State.BAD]
        logging.debug("bad_segments = %s" % bad_segments)
        for segment in bad_segments:
            self.inlier_mask_[segment.start:segment.end] = False

        logging.debug("after setting values, outlier_mask = %s" % np.nonzero(self.inlier_mask_ == False))
        # logging.debug("point details are %s" % with_speeds_df[np.logical_not(self.inlier_mask_)])

        # TODO: This is not the right place for this - adds too many dependencies
        # Should do this in the outer class in general so that we can do
        # multiple passes of any filtering algorithm
        import emission.analysis.intake.cleaning.cleaning_methods.speed_outlier_detection as cso
        import emission.analysis.intake.cleaning.location_smoothing as ls

        recomputed_speeds_df = ls.recalc_speed(self.with_speeds_df[self.inlier_mask_])
        recomputed_threshold = cso.BoxplotOutlier(ignore_zeros = True).get_threshold(recomputed_speeds_df)
        # assert recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] == 0, "After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold] 
        if recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold].shape[0] != 0:
            logging.info("After first round, still have outliers %s" % recomputed_speeds_df[recomputed_speeds_df.speed > recomputed_threshold])