def test_8(self):
     """test_6 with collars"""
     collar = 1
     global_interval = [0, 25]
     ref_intervals = np.array([[10, 18]])
     sys_intervals = np.array([[5, 12], [16, 23]])
     collars_intervals = IC.compute_collars(ref_intervals,
                                            collar,
                                            crop_to_range=global_interval)
     (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map(
         ref_intervals,
         sys_intervals,
         global_interval,
         collars=collars_intervals)
     self.assertTrue(
         np.array_equal(np.array([0, 2, -1, 3, 1, 3, -1, 2, 0]), cv_m))
 def test_14(self):
     """System output has some interval of zero length, we delete them"""
     collar = 1
     global_interval = [0, 25]
     ref_intervals = np.array([[10, 18]])
     sys_intervals = np.array([[5, 12], [14, 14], [17, 20]])
     collars_intervals = IC.compute_collars(ref_intervals,
                                            collar,
                                            crop_to_range=global_interval)
     (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map(
         ref_intervals,
         sys_intervals,
         global_interval,
         collars=collars_intervals)
     self.assertTrue(
         np.array_equal(np.array([0, 2, -1, 3, 1, -1, 2, 0]), cv_m))
 def test_13(self):
     """System output has overlap between its own intervals, the union should be performed"""
     collar = 1
     global_interval = [0, 25]
     ref_intervals = np.array([[10, 18]])
     sys_intervals = np.array([[5, 16], [12, 20]])
     collars_intervals = IC.compute_collars(ref_intervals,
                                            collar,
                                            crop_to_range=global_interval)
     (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map(
         ref_intervals,
         sys_intervals,
         global_interval,
         collars=collars_intervals)
     self.assertTrue(np.array_equal(np.array([0, 2, -1, 3, -1, 2, 0]),
                                    cv_m))
 def test_12(self):
     """test_8 with intervals shuffled and boundaries inverted  """
     collar = 1
     global_interval = [0, 25]
     ref_intervals = np.array([[10, 18]])
     sys_intervals = np.array([[23, 16], [5, 12]])
     collars_intervals = IC.compute_collars(ref_intervals,
                                            collar,
                                            crop_to_range=global_interval)
     (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map(
         ref_intervals,
         sys_intervals,
         global_interval,
         collars=collars_intervals)
     self.assertTrue(
         np.array_equal(np.array([0, 2, -1, 3, 1, 3, -1, 2, 0]), cv_m))
 def test_15(self):
     """Adding two system no-score zones"""
     collar = 1
     global_interval = [0, 25]
     SNS = np.array([[4, 9], [13, 16]])
     ref_intervals = np.array([[10, 18]])
     sys_intervals = np.array([[5, 12], [17, 20]])
     collars_intervals = IC.compute_collars(ref_intervals,
                                            collar,
                                            crop_to_range=global_interval)
     (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map(
         ref_intervals,
         sys_intervals,
         global_interval,
         collars=collars_intervals,
         SNS=SNS)
     self.assertTrue(
         np.array_equal(np.array([0, -2, -1, 3, 1, -2, 1, -1, 2, 0]), cv_m))
Esempio n. 6
0
    def compute_confusion_map(self,
                              ref_intervals,
                              sys_intervals,
                              global_interval,
                              collars=None,
                              SNS=None,
                              verbose=False):
        """This function is a wrapper to aggreate_intervals, in order to perform a confusion measure
        between an intervals list of reference and the interval list from a system output.
        :param ref_intervals: numpy.array
        :param sys_intervals: numpy.array
        :param global_interval: the global range where the measure is performed
        :param collars: an interval array representing the collars
        :param SNS: an interval array representing a system no_score zone
        """

        # Creation of the intervals_sequence
        interval_param_list = [ref_intervals, sys_intervals, collars, SNS]
        intervals_sequence = [
            IC.compute_intervals_union([i]) for i in interval_param_list
            if i is not None
        ]
        # print("interval sequence before = {}".format(intervals_sequence))
        # We remove all None and 0-length sub-intervals
        interval_filter = lambda x: x.size != 0
        filtered_intervals_sequence = []
        for intervals in intervals_sequence:
            if intervals.size == 0:
                filtered_intervals_sequence.append(intervals)
            else:
                # Removing 0-length sub-intervals
                filtered_sub_intervals = intervals[~(
                    intervals[:, 0] == intervals[:, 1])]
                if filtered_sub_intervals.size == 0:
                    filtered_intervals_sequence.append(np.array([]))
                else:
                    filtered_intervals_sequence.append(filtered_sub_intervals)

        # print("interval sequence after = {}".format(filtered_intervals_sequence))
        # Compute the overlap between all intervals sets
        confusion_vector, all_intervals, all_interval_in_seq_array, weights = IC.aggregate_intervals(
            filtered_intervals_sequence,
            global_interval,
            print_results=verbose)

        if collars is not None or SNS is not None:

            # To put negative values in the confusion vector (np.uint64), we need to cast it in signed
            confusion_vector_masked = confusion_vector.astype(np.int64,
                                                              copy=True)

            # Apply overrided value for no-scores zones
            if collars is not None:
                collars_mask = all_interval_in_seq_array[2].astype(bool)
                confusion_vector_masked[collars_mask] = -1

            if SNS is not None:
                SNS_idx = 3 if collars is not None else 2
                SNS_mask = all_interval_in_seq_array[SNS_idx].astype(bool)
                confusion_vector_masked[SNS_mask] = -2

            # In the case of no-score zone, we compress the intervals containing consecutive and identical values
            confusion_vector_compressed, sizes_compression = zip(
                *[(k, len(list(g)))
                  for k, g in groupby(confusion_vector_masked)])

            sizes_cs = np.cumsum(sizes_compression)
            start_indexes_compressions = sizes_cs - sizes_compression
            end_indexes_compression = sizes_cs - 1
            all_intervals_compressed = []
            for start, end in zip(start_indexes_compressions,
                                  end_indexes_compression):
                if start != end:
                    start_first, _ = all_intervals[start]
                    _, end_last = all_intervals[end]
                    all_intervals_compressed.append([start_first, end_last])
                else:
                    all_intervals_compressed.append(all_intervals[start])

            all_interval_in_seq_array_compressed = all_interval_in_seq_array[:,
                                                                             start_indexes_compressions]
            return (np.array(confusion_vector_compressed), confusion_vector
                    ), np.array(all_intervals_compressed
                                ), all_interval_in_seq_array_compressed

        return confusion_vector, all_intervals, all_interval_in_seq_array
Esempio n. 7
0
    max_range = 20
    # ref_seed, sys_seed = 42, 55
    # ref_intervals = gen_random_intervals(4, max_range, random_seed = ref_seed)
    # sys_intervals = gen_random_intervals(5, max_range, random_seed = sys_seed)
    # ref_intervals = gen_random_intervals(4, max_range)
    # sys_intervals = gen_random_intervals(5, max_range)
    collar = 1
    # global_interval = [0,max_range+10]
    global_interval = [0, 25]
    ref_intervals = np.array([[10, 18]])
    sys_intervals = np.array([[5, 12], [17, 20]])
    print("Ref : {}".format(ref_intervals.tolist()))
    print("Sys : {}".format(sys_intervals.tolist()))

    collars_intervals = IC.compute_collars(ref_intervals,
                                           collar,
                                           crop_to_range=global_interval)
    # print(collars_intervals)
    SNS = np.array([[4, 9], [13, 16]])

    Scorer = VideoScoring()
    confusion_vector, all_intervals, all_interval_in_seq_array = Scorer.compute_confusion_map(
        ref_intervals,
        sys_intervals,
        global_interval,
        collars=collars_intervals,
        SNS=SNS)

    confusion_vector_mapped = [
        Scorer.confusion_mapping[x][0] for x in confusion_vector
    ]
Esempio n. 8
0
    def compute_probes_MCC(self, df_ref, df_sys):

        probes_selection = df_ref.ProbeFileID.drop_duplicates()
        if self.gen_timeline:
            probes_selection_scores_df = pd.DataFrame(
                np.zeros((len(probes_selection), 2)),
                index=probes_selection,
                columns=["MCC", "Timeline"])
        else:
            probes_selection_scores_df = pd.DataFrame(np.zeros(
                len(probes_selection)),
                                                      index=probes_selection,
                                                      columns=["MCC"])

        for ProbeFileID in probes_selection:
            self.writelog("ProbeFileID = {}".format(ProbeFileID))
            collars = None

            # We get the total number of frame
            assert (
                ProbeFileID in self.df_index.ProbeFileID.values
            ), "ProbeFileID ({}) is missing in the index file ({})".format(
                ProbeFileID, self.path_index)
            FrameCount = self.df_index.query(
                "ProbeFileID == '{}'".format(ProbeFileID)).FrameCount.values[0]
            global_range = [1, FrameCount]
            self.writelog("global_range = {}".format(global_range))

            # We first get the system intervals
            assert (
                ProbeFileID in df_sys.ProbeFileID.values
            ), "ProbeFileID ({}) is missing in the system output file".format(
                ProbeFileID)
            # System probe info
            sys_probe = df_sys.query("ProbeFileID == '{}'".format(ProbeFileID))

            if sys_probe.ProbeStatus.values == "Processed" or self.no_opt_out:
                SysVideoFramesSeries = sys_probe.VideoFrameSegments
                SysVideoFramesSeries_list = [
                    interval_list_string_fast_parsing(x, datatype=int)
                    for x in SysVideoFramesSeries.values if x != "[]"
                ]
                if SysVideoFramesSeries_list:
                    sys_intervals = IC.compute_intervals_union(
                        SysVideoFramesSeries_list)
                    if self.truncate:
                        sys_intervals = IC.truncate(sys_intervals, FrameCount)
                else:
                    sys_intervals = np.array([[]])
                self.writelog("sys_intervals = {}".format(sys_intervals))
                # sys_intervals = IC.gen_random_intervals(3, 610)

                # We get any OptOut video region
                SysVideoFramesOptOutSeries = sys_probe.VideoFrameOptOutSegments
                SysVideoFramesOptOutSeries_list = [
                    interval_list_string_fast_parsing(x, datatype=int)
                    for x in SysVideoFramesOptOutSeries.values if x != "[]"
                ]
                if SysVideoFramesOptOutSeries_list and not self.no_video_opt_out:
                    SNS = IC.compute_intervals_union(
                        SysVideoFramesOptOutSeries_list)
                    if self.truncate:
                        SNS = IC.truncate(SNS, FrameCount)
                else:
                    SNS = None
                self.writelog("SNS = {}".format(SNS))

            else:
                self.writelog("This Probe is OptOut")
                probes_selection_scores_df.drop(ProbeFileID, inplace=True)
                continue

            # We get the reference intervals
            RefVideoFramesSeries = df_ref.query(
                "ProbeFileID == '{}'".format(ProbeFileID)).VideoFrame
            RefVideoFramesSeries_list = [
                interval_list_string_fast_parsing(x, datatype=int)
                for x in RefVideoFramesSeries.values if x != "[]"
            ]

            if RefVideoFramesSeries_list:
                ref_intervals = IC.compute_intervals_union(
                    RefVideoFramesSeries_list)
            else:
                ref_intervals = np.array([[]])
            self.writelog("ref_intervals = {}".format(ref_intervals))

            if self.add_collars:
                collars = IC.compute_collars(ref_intervals,
                                             self.collars,
                                             crop_to_range=global_range)
            self.writelog("collars = {}".format(collars))

            # We compute the confusion metrics
            confusion_vector, all_intervals, all_interval_in_seq_array = self.Scorer.compute_confusion_map(
                ref_intervals,
                sys_intervals,
                global_range,
                collars=collars,
                SNS=SNS)

            if collars is not None or SNS is not None:
                confusion_vector_data = confusion_vector
                confusion_vector, confusion_vector_not_masked = confusion_vector

            self.writelog("confusion_vector = {}".format(confusion_vector))
            self.writelog("all_intervals = {}".format(all_intervals))
            Counts = self.Scorer.count_confusion_value(all_intervals,
                                                       confusion_vector)
            MCC = self.Scorer.compute_MCC(
                *[Counts[v] for v in ["TP", "TN", "FP", "FN"]])
            self.writelog("Counts = {}\nMCC = {}".format(Counts, MCC))

            if self.gen_timeline:
                if collars is not None or SNS is not None:
                    confusion_data = [
                        all_intervals, confusion_vector_data,
                        self.Scorer.confusion_mapping
                    ]
                else:
                    confusion_data = [
                        all_intervals, (confusion_vector, None),
                        self.Scorer.confusion_mapping
                    ]

                p = IC.display_confusion_bokeh_2(ref_intervals,
                                                 sys_intervals,
                                                 global_range,
                                                 show_graph=False,
                                                 confusion_data=confusion_data,
                                                 c_mode=2)

                html_file_path = os.path.join(
                    self.graph_path,
                    "timeline_{}_{}.html".format(ProbeFileID,
                                                 self.current_query_idx))
                with open(os.path.join(self.output_path, html_file_path),
                          'w') as f:
                    f.write(file_html(p, CDN, "Generated HTML Report"))
                probes_selection_scores_df.loc[
                    ProbeFileID] = MCC, "TimelinePlot@{}".format(
                        html_file_path)
            else:
                probes_selection_scores_df.loc[ProbeFileID] = MCC
            self.writelog("\n")

        return probes_selection_scores_df