def test_8(self): """test_6 with collars""" collar = 1 global_interval = [0, 25] ref_intervals = np.array([[10, 18]]) sys_intervals = np.array([[5, 12], [16, 23]]) collars_intervals = IC.compute_collars(ref_intervals, collar, crop_to_range=global_interval) (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map( ref_intervals, sys_intervals, global_interval, collars=collars_intervals) self.assertTrue( np.array_equal(np.array([0, 2, -1, 3, 1, 3, -1, 2, 0]), cv_m))
def test_14(self): """System output has some interval of zero length, we delete them""" collar = 1 global_interval = [0, 25] ref_intervals = np.array([[10, 18]]) sys_intervals = np.array([[5, 12], [14, 14], [17, 20]]) collars_intervals = IC.compute_collars(ref_intervals, collar, crop_to_range=global_interval) (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map( ref_intervals, sys_intervals, global_interval, collars=collars_intervals) self.assertTrue( np.array_equal(np.array([0, 2, -1, 3, 1, -1, 2, 0]), cv_m))
def test_13(self): """System output has overlap between its own intervals, the union should be performed""" collar = 1 global_interval = [0, 25] ref_intervals = np.array([[10, 18]]) sys_intervals = np.array([[5, 16], [12, 20]]) collars_intervals = IC.compute_collars(ref_intervals, collar, crop_to_range=global_interval) (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map( ref_intervals, sys_intervals, global_interval, collars=collars_intervals) self.assertTrue(np.array_equal(np.array([0, 2, -1, 3, -1, 2, 0]), cv_m))
def test_12(self): """test_8 with intervals shuffled and boundaries inverted """ collar = 1 global_interval = [0, 25] ref_intervals = np.array([[10, 18]]) sys_intervals = np.array([[23, 16], [5, 12]]) collars_intervals = IC.compute_collars(ref_intervals, collar, crop_to_range=global_interval) (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map( ref_intervals, sys_intervals, global_interval, collars=collars_intervals) self.assertTrue( np.array_equal(np.array([0, 2, -1, 3, 1, 3, -1, 2, 0]), cv_m))
def test_15(self): """Adding two system no-score zones""" collar = 1 global_interval = [0, 25] SNS = np.array([[4, 9], [13, 16]]) ref_intervals = np.array([[10, 18]]) sys_intervals = np.array([[5, 12], [17, 20]]) collars_intervals = IC.compute_collars(ref_intervals, collar, crop_to_range=global_interval) (cv_m, cv), all_intervals, _ = self.Scorer.compute_confusion_map( ref_intervals, sys_intervals, global_interval, collars=collars_intervals, SNS=SNS) self.assertTrue( np.array_equal(np.array([0, -2, -1, 3, 1, -2, 1, -1, 2, 0]), cv_m))
def compute_confusion_map(self, ref_intervals, sys_intervals, global_interval, collars=None, SNS=None, verbose=False): """This function is a wrapper to aggreate_intervals, in order to perform a confusion measure between an intervals list of reference and the interval list from a system output. :param ref_intervals: numpy.array :param sys_intervals: numpy.array :param global_interval: the global range where the measure is performed :param collars: an interval array representing the collars :param SNS: an interval array representing a system no_score zone """ # Creation of the intervals_sequence interval_param_list = [ref_intervals, sys_intervals, collars, SNS] intervals_sequence = [ IC.compute_intervals_union([i]) for i in interval_param_list if i is not None ] # print("interval sequence before = {}".format(intervals_sequence)) # We remove all None and 0-length sub-intervals interval_filter = lambda x: x.size != 0 filtered_intervals_sequence = [] for intervals in intervals_sequence: if intervals.size == 0: filtered_intervals_sequence.append(intervals) else: # Removing 0-length sub-intervals filtered_sub_intervals = intervals[~( intervals[:, 0] == intervals[:, 1])] if filtered_sub_intervals.size == 0: filtered_intervals_sequence.append(np.array([])) else: filtered_intervals_sequence.append(filtered_sub_intervals) # print("interval sequence after = {}".format(filtered_intervals_sequence)) # Compute the overlap between all intervals sets confusion_vector, all_intervals, all_interval_in_seq_array, weights = IC.aggregate_intervals( filtered_intervals_sequence, global_interval, print_results=verbose) if collars is not None or SNS is not None: # To put negative values in the confusion vector (np.uint64), we need to cast it in signed confusion_vector_masked = confusion_vector.astype(np.int64, copy=True) # Apply overrided value for no-scores zones if collars is not None: collars_mask = all_interval_in_seq_array[2].astype(bool) confusion_vector_masked[collars_mask] = -1 if SNS is not None: SNS_idx = 3 if collars is not None else 2 SNS_mask = all_interval_in_seq_array[SNS_idx].astype(bool) confusion_vector_masked[SNS_mask] = -2 # In the case of no-score zone, we compress the intervals containing consecutive and identical values confusion_vector_compressed, sizes_compression = zip( *[(k, len(list(g))) for k, g in groupby(confusion_vector_masked)]) sizes_cs = np.cumsum(sizes_compression) start_indexes_compressions = sizes_cs - sizes_compression end_indexes_compression = sizes_cs - 1 all_intervals_compressed = [] for start, end in zip(start_indexes_compressions, end_indexes_compression): if start != end: start_first, _ = all_intervals[start] _, end_last = all_intervals[end] all_intervals_compressed.append([start_first, end_last]) else: all_intervals_compressed.append(all_intervals[start]) all_interval_in_seq_array_compressed = all_interval_in_seq_array[:, start_indexes_compressions] return (np.array(confusion_vector_compressed), confusion_vector ), np.array(all_intervals_compressed ), all_interval_in_seq_array_compressed return confusion_vector, all_intervals, all_interval_in_seq_array
max_range = 20 # ref_seed, sys_seed = 42, 55 # ref_intervals = gen_random_intervals(4, max_range, random_seed = ref_seed) # sys_intervals = gen_random_intervals(5, max_range, random_seed = sys_seed) # ref_intervals = gen_random_intervals(4, max_range) # sys_intervals = gen_random_intervals(5, max_range) collar = 1 # global_interval = [0,max_range+10] global_interval = [0, 25] ref_intervals = np.array([[10, 18]]) sys_intervals = np.array([[5, 12], [17, 20]]) print("Ref : {}".format(ref_intervals.tolist())) print("Sys : {}".format(sys_intervals.tolist())) collars_intervals = IC.compute_collars(ref_intervals, collar, crop_to_range=global_interval) # print(collars_intervals) SNS = np.array([[4, 9], [13, 16]]) Scorer = VideoScoring() confusion_vector, all_intervals, all_interval_in_seq_array = Scorer.compute_confusion_map( ref_intervals, sys_intervals, global_interval, collars=collars_intervals, SNS=SNS) confusion_vector_mapped = [ Scorer.confusion_mapping[x][0] for x in confusion_vector ]
def compute_probes_MCC(self, df_ref, df_sys): probes_selection = df_ref.ProbeFileID.drop_duplicates() if self.gen_timeline: probes_selection_scores_df = pd.DataFrame( np.zeros((len(probes_selection), 2)), index=probes_selection, columns=["MCC", "Timeline"]) else: probes_selection_scores_df = pd.DataFrame(np.zeros( len(probes_selection)), index=probes_selection, columns=["MCC"]) for ProbeFileID in probes_selection: self.writelog("ProbeFileID = {}".format(ProbeFileID)) collars = None # We get the total number of frame assert ( ProbeFileID in self.df_index.ProbeFileID.values ), "ProbeFileID ({}) is missing in the index file ({})".format( ProbeFileID, self.path_index) FrameCount = self.df_index.query( "ProbeFileID == '{}'".format(ProbeFileID)).FrameCount.values[0] global_range = [1, FrameCount] self.writelog("global_range = {}".format(global_range)) # We first get the system intervals assert ( ProbeFileID in df_sys.ProbeFileID.values ), "ProbeFileID ({}) is missing in the system output file".format( ProbeFileID) # System probe info sys_probe = df_sys.query("ProbeFileID == '{}'".format(ProbeFileID)) if sys_probe.ProbeStatus.values == "Processed" or self.no_opt_out: SysVideoFramesSeries = sys_probe.VideoFrameSegments SysVideoFramesSeries_list = [ interval_list_string_fast_parsing(x, datatype=int) for x in SysVideoFramesSeries.values if x != "[]" ] if SysVideoFramesSeries_list: sys_intervals = IC.compute_intervals_union( SysVideoFramesSeries_list) if self.truncate: sys_intervals = IC.truncate(sys_intervals, FrameCount) else: sys_intervals = np.array([[]]) self.writelog("sys_intervals = {}".format(sys_intervals)) # sys_intervals = IC.gen_random_intervals(3, 610) # We get any OptOut video region SysVideoFramesOptOutSeries = sys_probe.VideoFrameOptOutSegments SysVideoFramesOptOutSeries_list = [ interval_list_string_fast_parsing(x, datatype=int) for x in SysVideoFramesOptOutSeries.values if x != "[]" ] if SysVideoFramesOptOutSeries_list and not self.no_video_opt_out: SNS = IC.compute_intervals_union( SysVideoFramesOptOutSeries_list) if self.truncate: SNS = IC.truncate(SNS, FrameCount) else: SNS = None self.writelog("SNS = {}".format(SNS)) else: self.writelog("This Probe is OptOut") probes_selection_scores_df.drop(ProbeFileID, inplace=True) continue # We get the reference intervals RefVideoFramesSeries = df_ref.query( "ProbeFileID == '{}'".format(ProbeFileID)).VideoFrame RefVideoFramesSeries_list = [ interval_list_string_fast_parsing(x, datatype=int) for x in RefVideoFramesSeries.values if x != "[]" ] if RefVideoFramesSeries_list: ref_intervals = IC.compute_intervals_union( RefVideoFramesSeries_list) else: ref_intervals = np.array([[]]) self.writelog("ref_intervals = {}".format(ref_intervals)) if self.add_collars: collars = IC.compute_collars(ref_intervals, self.collars, crop_to_range=global_range) self.writelog("collars = {}".format(collars)) # We compute the confusion metrics confusion_vector, all_intervals, all_interval_in_seq_array = self.Scorer.compute_confusion_map( ref_intervals, sys_intervals, global_range, collars=collars, SNS=SNS) if collars is not None or SNS is not None: confusion_vector_data = confusion_vector confusion_vector, confusion_vector_not_masked = confusion_vector self.writelog("confusion_vector = {}".format(confusion_vector)) self.writelog("all_intervals = {}".format(all_intervals)) Counts = self.Scorer.count_confusion_value(all_intervals, confusion_vector) MCC = self.Scorer.compute_MCC( *[Counts[v] for v in ["TP", "TN", "FP", "FN"]]) self.writelog("Counts = {}\nMCC = {}".format(Counts, MCC)) if self.gen_timeline: if collars is not None or SNS is not None: confusion_data = [ all_intervals, confusion_vector_data, self.Scorer.confusion_mapping ] else: confusion_data = [ all_intervals, (confusion_vector, None), self.Scorer.confusion_mapping ] p = IC.display_confusion_bokeh_2(ref_intervals, sys_intervals, global_range, show_graph=False, confusion_data=confusion_data, c_mode=2) html_file_path = os.path.join( self.graph_path, "timeline_{}_{}.html".format(ProbeFileID, self.current_query_idx)) with open(os.path.join(self.output_path, html_file_path), 'w') as f: f.write(file_html(p, CDN, "Generated HTML Report")) probes_selection_scores_df.loc[ ProbeFileID] = MCC, "TimelinePlot@{}".format( html_file_path) else: probes_selection_scores_df.loc[ProbeFileID] = MCC self.writelog("\n") return probes_selection_scores_df