def run(self): super(ModFusionHistogram, self).run() speech_frames_count = 0.0 noise_frames_count = 0.0 # initialize histograms speech_histogram = {} noise_histogram = {} lr_histogram = {} probabilites = {} vad_combinations = product([0, 1], repeat=len(self.inputs)) for comb_vec in vad_combinations: speech_histogram[comb_vec] = COMB_MIN_VALUE noise_histogram[comb_vec] = COMB_MIN_VALUE lr_histogram[comb_vec] = COMB_MIN_VALUE probabilites[comb_vec] = COMB_MIN_VALUE for element in self.vadpy.pipeline: lo_list = [] # labels object aka "lo" for attr in self.inputs: lo_list.append(getattr(element, attr)) equalize_framelen(*(lo_list + [element.gt_labels])) frame_len = lo_list[0].frame_len frames_count = min(len(labels) for labels in lo_list) for i in range(0, frames_count): # i-th frame, (start, end, --> speech <-- ) tuple comb_vec = tuple(int(lo[i][2]) for lo in lo_list) probabilites[comb_vec] += 1 if element.gt_labels[i][2]: speech_histogram[comb_vec] += 1 speech_frames_count += 1 else: noise_histogram[comb_vec] += 1 noise_frames_count += 1 # normalize histograms for key in probabilites: speech_histogram[key] /= speech_frames_count noise_histogram[key] /= noise_frames_count probabilites[key] /= (speech_frames_count + noise_frames_count) speech_val = speech_histogram[key] noise_val = noise_histogram[key] if noise_val == 0 or speech_val == 0: lr_histogram[key] = 0.0 elif noise_val == 0: lr_histogram[key] = 1.0 else: lr_histogram[key] = speech_val / noise_val # update pipeline with histogram data self.add_result('speech', speech_histogram) self.add_result('noise', noise_histogram) self.add_result('lr', lr_histogram) self.add_result('p', probabilites)
def run(self): super(ModFusionHistogram, self).run() speech_frames_count = 0.0 noise_frames_count = 0.0 # initialize histograms speech_histogram = {} noise_histogram = {} lr_histogram = {} probabilites = {} vad_combinations = product([0,1], repeat = len(self.inputs)) for comb_vec in vad_combinations: speech_histogram[comb_vec] = COMB_MIN_VALUE noise_histogram[comb_vec] = COMB_MIN_VALUE lr_histogram[comb_vec] = COMB_MIN_VALUE probabilites[comb_vec] = COMB_MIN_VALUE for element in self.vadpy.pipeline: lo_list = [] # labels object aka "lo" for attr in self.inputs: lo_list.append(getattr(element, attr)) equalize_framelen(*(lo_list + [element.gt_labels])) frame_len = lo_list[0].frame_len frames_count = min(len(labels) for labels in lo_list) for i in range(0, frames_count): # i-th frame, (start, end, --> speech <-- ) tuple comb_vec = tuple(int(lo[i][2]) for lo in lo_list) probabilites[comb_vec] += 1 if element.gt_labels[i][2]: speech_histogram[comb_vec] += 1 speech_frames_count += 1 else: noise_histogram[comb_vec] += 1 noise_frames_count += 1 # normalize histograms for key in probabilites: speech_histogram[key] /= speech_frames_count noise_histogram[key] /= noise_frames_count probabilites[key] /= (speech_frames_count + noise_frames_count) speech_val = speech_histogram[key] noise_val = noise_histogram[key] if noise_val == 0 or speech_val == 0: lr_histogram[key] = 0.0 elif noise_val == 0: lr_histogram[key] = 1.0 else: lr_histogram[key] = speech_val / noise_val # update pipeline with histogram data self.add_result('speech', speech_histogram) self.add_result('noise', noise_histogram) self.add_result('lr', lr_histogram) self.add_result('p', probabilites)
def run(self): super(ModCorrelation, self).run() assert len(set(self.inputs)) == 3, 'Q-statistics module requires three different inputs (GT, VAD1, VAD2)' # false positives/false negatives per source # the format of every tuple in dictionary is # [True Positives, True Negatives, False Positives, False Negatives] tuple (list actually :) a_total = 0.0 b_total = 0.0 c_total = 0.0 d_total = 0.0 for element in self.vadpy.pipeline: gt_labels = getattr(element, self.inputs[0]) vad1_labels = getattr(element, self.inputs[1]) vad2_labels = getattr(element, self.inputs[2]) if len(set([len(gt_labels), len(vad1_labels), len(vad2_labels)])) != 1: log.warning('Labels length mismatch: {0} / {1} / {2}, equlizing frame lengths.'.format( len(gt_labels), len(vad1_labels), len(vad2_labels) )) equalize_framelen(gt_labels, vad1_labels, vad2_labels) speechData = zip((int(speech) for start, stop, speech in gt_labels), (int(speech) for start, stop, speech in vad1_labels), (int(speech) for start, stop, speech in vad2_labels)) a = 0; b = 0; c = 0; d = 0; for i in range(0, len(speechData)): valGT = speechData[i][0] valV1 = speechData[i][1] valV2 = speechData[i][2] if valV1 == valGT and valV2 == valGT: a += 1 elif valV1 != valGT and valV2 == valGT: b += 1 elif valV1 == valGT and valV2 != valGT: c += 1 else: d += 1 a_total += a b_total += b c_total += c d_total += d length = a_total + b_total + c_total + d_total; a = a_total / length b = b_total / length c = c_total / length d = d_total / length corrQ = (a*d - b*c) / (a*d + b*c) corrp = (a*d - b*c) / math.sqrt((a + b)*(c + d)*(a + c)*(b + d)) self.add_result('corrQ', corrQ) self.add_result('corrp', corrp)
def run(self): super(ModCorrelation, self).run() assert len( set(self.inputs) ) == 3, 'Q-statistics module requires three different inputs (GT, VAD1, VAD2)' # false positives/false negatives per source # the format of every tuple in dictionary is # [True Positives, True Negatives, False Positives, False Negatives] tuple (list actually :) a_total = 0.0 b_total = 0.0 c_total = 0.0 d_total = 0.0 for element in self.vadpy.pipeline: gt_labels = getattr(element, self.inputs[0]) vad1_labels = getattr(element, self.inputs[1]) vad2_labels = getattr(element, self.inputs[2]) if len(set([len(gt_labels), len(vad1_labels), len(vad2_labels)])) != 1: log.warning( 'Labels length mismatch: {0} / {1} / {2}, equlizing frame lengths.' .format(len(gt_labels), len(vad1_labels), len(vad2_labels))) equalize_framelen(gt_labels, vad1_labels, vad2_labels) speechData = zip( (int(speech) for start, stop, speech in gt_labels), (int(speech) for start, stop, speech in vad1_labels), (int(speech) for start, stop, speech in vad2_labels)) a = 0 b = 0 c = 0 d = 0 for i in range(0, len(speechData)): valGT = speechData[i][0] valV1 = speechData[i][1] valV2 = speechData[i][2] if valV1 == valGT and valV2 == valGT: a += 1 elif valV1 != valGT and valV2 == valGT: b += 1 elif valV1 == valGT and valV2 != valGT: c += 1 else: d += 1 a_total += a b_total += b c_total += c d_total += d length = a_total + b_total + c_total + d_total a = a_total / length b = b_total / length c = c_total / length d = d_total / length corrQ = (a * d - b * c) / (a * d + b * c) corrp = (a * d - b * c) / math.sqrt( (a + b) * (c + d) * (a + c) * (b + d)) self.add_result('corrQ', corrQ) self.add_result('corrp', corrp)
def run(self): super(ModConfusion, self).run() assert len(set(self.inputs)) == 2, 'Confusion module requires two different inputs' # false positives/false negatives per source # the format of every tuple in dictionary is # [True Positives, True Negatives, False Positives, False Negatives] tuple (list actually :) tp_total = 0.0 tn_total = 0.0 fp_total = 0.0 fn_total = 0.0 for element in self.vadpy.pipeline: # Generate a list of decision (speech/noise) pairs for Labels objects # gt_labels = getattr(element, self.inputs[0]) vad_labels = getattr(element, self.inputs[1]) if len(gt_labels) != len(vad_labels): log.warning('Labels length mismatch: {0} / {1}, equlizing frame lengths.'.format( len(gt_labels), len(vad_labels))) equalize_framelen(gt_labels, vad_labels) # zip will concatenate up to min. length of the objects speechAB = zip((int(speech) for start, stop, speech in gt_labels), (int(speech) for start, stop, speech in vad_labels)) # Calculate False alarm and Miss rate tp = 0; tn = 0; fp = 0; fn = 0; for i in range(0, len(speechAB)): valA = speechAB[i][0] if (self.ctx_size < i < len(speechAB) - self.ctx_size): valB = int(round(sum(vAB[1] for vAB in speechAB[i - self.ctx_size : i + self.ctx_size + 1]) / float(self.ctx_size * 2 + 1))) else: valB = speechAB[i][1] if valA: # concluding, valA is a value 'Speech' Ground Truth frame if valB: tp += 1 # true positive else: fn += 1 # false negative, miss else: if valB: fp += 1 # false positive, false alarm else: tn += 1 # true negative tp_total += tp tn_total += tn fp_total += fp fn_total += fn tp = tp_total tn = tn_total fp = fp_total fn = fn_total length = tp + tn + fp + fn; gt_speech = fn + tp gt_noise = fp + tn mr = fn / (tp + fn) far = fp / (tn + fp) #total_len = tn + fn + tp + fp self.add_result('mr', mr) self.add_result('far', far)
def run(self): super(ModConfusion, self).run() assert len(set(self.inputs) ) == 2, 'Confusion module requires two different inputs' # false positives/false negatives per source # the format of every tuple in dictionary is # [True Positives, True Negatives, False Positives, False Negatives] tuple (list actually :) tp_total = 0.0 tn_total = 0.0 fp_total = 0.0 fn_total = 0.0 for element in self.vadpy.pipeline: # Generate a list of decision (speech/noise) pairs for Labels objects # gt_labels = getattr(element, self.inputs[0]) vad_labels = getattr(element, self.inputs[1]) if len(gt_labels) != len(vad_labels): log.warning( 'Labels length mismatch: {0} / {1}, equlizing frame lengths.' .format(len(gt_labels), len(vad_labels))) equalize_framelen(gt_labels, vad_labels) # zip will concatenate up to min. length of the objects speechAB = zip((int(speech) for start, stop, speech in gt_labels), (int(speech) for start, stop, speech in vad_labels)) # Calculate False alarm and Miss rate tp = 0 tn = 0 fp = 0 fn = 0 for i in range(0, len(speechAB)): valA = speechAB[i][0] if (self.ctx_size < i < len(speechAB) - self.ctx_size): valB = int( round( sum(vAB[1] for vAB in speechAB[i - self.ctx_size:i + self.ctx_size + 1]) / float(self.ctx_size * 2 + 1))) else: valB = speechAB[i][1] if valA: # concluding, valA is a value 'Speech' Ground Truth frame if valB: tp += 1 # true positive else: fn += 1 # false negative, miss else: if valB: fp += 1 # false positive, false alarm else: tn += 1 # true negative tp_total += tp tn_total += tn fp_total += fp fn_total += fn tp = tp_total tn = tn_total fp = fp_total fn = fn_total length = tp + tn + fp + fn gt_speech = fn + tp gt_noise = fp + tn mr = fn / (tp + fn) far = fp / (tn + fp) #total_len = tn + fn + tp + fp self.add_result('mr', mr) self.add_result('far', far)