def acc_calculator(pred, gold, window_size=-1): if window_size == -1: pk_score = segeval.pk(pred, gold) windiff_score = segeval.window_diff(pred, gold) else: pk_score = segeval.pk(pred, gold, window_size=window_size) windiff_score = segeval.window_diff(pred, gold, window_size=window_size) b_score = segeval.boundary_similarity(pred, gold) return pk_score, windiff_score, b_score
def seg_eval(algo_group_vec, real_group_vec, rev=True): """ A function computing the Pk and win_diff value for 2 segmentations. Also give random baselines :param algo_group_vec: The algorithm result in the form a token group memberships :type algo_group_vec: Union[list, numpy.ndarray] :param real_group_vec: The real group memberships of tokens :type real_group_vec: Union[list, numpy.ndarray] :return: Pk value, Win_diff value, Pk random value, Win_diff random value :rtype: (float, float, float, float) """ # Transform into segmentation vectors real_segm_vec = convert_positions_to_masses(real_group_vec) algo_segm_vec = convert_positions_to_masses(algo_group_vec) # Make a shuffle group vec rdm_group_vec = real_group_vec.copy() rdm.shuffle(rdm_group_vec) rdm_segm_vec = convert_positions_to_masses(rdm_group_vec) if rev: # Compute the real value pk_res = pk(real_segm_vec, algo_segm_vec) try: win_diff = window_diff(real_segm_vec, algo_segm_vec) except: win_diff = 1 # Compute the random value pk_rdm = pk(real_segm_vec, rdm_segm_vec) try: win_diff_rdm = window_diff(real_segm_vec, rdm_segm_vec) except: win_diff_rdm = 1 else: # Compute the real value pk_res = pk(algo_segm_vec, real_segm_vec) try: win_diff = window_diff(algo_segm_vec, real_segm_vec) except: win_diff = 1 # Compute the random value pk_rdm = pk(rdm_segm_vec, real_segm_vec) try: win_diff_rdm = window_diff(rdm_segm_vec, real_segm_vec) except: win_diff_rdm = 1 # Return return pk_res, win_diff, pk_rdm, win_diff_rdm
def get_similarity(text_1, text_2): #text_1 and text_2 are xml data that uses spans to seperate boundaries #e.g. BOSTON, MA ... <span class="highlighted" id="634541">Steven L. #Davis pled guilty yesterday to federal charges that he stole and disclosed trade secrets of The Gillette Company</span>. if text_1 == '' or text_2 == '': return 'Error Text Input Is Empty' else: xml_soup_1 = BeautifulSoup(text_1) xml_soup_2 = BeautifulSoup(text_2) xml_soup_1 = remove_html_tags(xml_soup_1) xml_soup_2 = remove_html_tags(xml_soup_2) segements_1 = get_segements(xml_soup_1) segements_2 = get_segements(xml_soup_2) seg_check = check_segment_length(segements_1, segements_2) if not seg_check: return 'Error Source Text Was Different' masses_1 = segeval.convert_positions_to_masses(segements_1) masses_2 = segeval.convert_positions_to_masses(segements_2) ss = segeval.segmentation_similarity(masses_1, masses_2) ss = float(ss) pk = segeval.pk(masses_1, masses_2) pk = 1 - float(pk) win_diff = segeval.window_diff(masses_1, masses_2) win_diff = 1 - float(win_diff) return ss, pk, win_diff
def window_diff(ref, pred, punish_zero_seg=False, boundary_label=1): """ Parameter --------- ref : [int or str] ref boundary labels pred : [int or str] pred boundary labels punish_zero_seg : bool if True zero-segmentation will get 1.0 boundary_label : int or str >>> window_diff([-1, -1, 1, -1, -1], [-1, -1, -1, 1, -1]) 0.5 >>> window_diff([-1, -1, 1, -1, -1], [-1, -1, -1, -1, -1]) 0.5 >>> window_diff([-1, -1, 1, -1, -1], [-1, -1, -1, -1, -1], punish_zero_seg=True) 1.0 """ masses_ref = boundary_labels_to_masses(ref, boundary_label=boundary_label) masses_pred = boundary_labels_to_masses(pred, boundary_label=boundary_label) if punish_zero_seg and len(masses_pred) == 1: return 1.0 return float(segeval.window_diff(masses_pred, masses_ref))
def metric_windiff(forest1, forest2): masses1 = [get_untyped_masses(tree) for tree in forest1] masses2 = [get_untyped_masses(tree) for tree in forest2] segments1 = list(chain.from_iterable(masses1)) segments2 = list(chain.from_iterable(masses2)) score = segeval.window_diff(segments1, segments2) * 100 return score
def evaluateSegments(reference, hypothesis): ref, hyp = __initialization(reference, hypothesis) score=np.array([__getscores(reference,hypothesis)[2],\ float(segeval.pk(ref, hyp)),\ float(segeval.window_diff(ref, hyp)),\ float(segeval.boundary_similarity(ref, hyp)),\ float(segeval.segmentation_similarity(ref, hyp))]) # Return pk, windiff, boundary_sim, segmentation_sim and F_1 score. return score
def test_window_diff(self): ''' Test window_diff. ''' mean, std, var, stderr, count = \ summarize(window_diff(KAZANTSEVA2012_G5)) self.assertAlmostEquals(Decimal('0.42514977'), mean) self.assertAlmostEquals(Decimal('0.14960495'), std) self.assertAlmostEquals(Decimal('0.02238164'), var) self.assertAlmostEquals(Decimal('0.02159361'), stderr) self.assertEquals(48, count)
def win_diff(self, h, gold, window_size=-1): """ :param gold: gold segmentation (item in the list contains the number of words in segment) :param h: hypothesis segmentation (each item in the list contains the number of words in segment) :param window_size: optional :return: accuracy """ if window_size != -1: false_seg_count, total_count = seg.window_diff( h, gold, window_size=window_size, return_parts=True) else: false_seg_count, total_count = seg.window_diff(h, gold, return_parts=True) if total_count == 0: false_prob = -1 else: false_prob = float(false_seg_count) / float(total_count) return false_prob, total_count
def eval_tile_text(self, sample): ''' Returns a tuple of metric scores (Pk, WinDiff, B). ''' ### Record paragraph break points sent_bounds, normed_text = self.get_sb_nt(sample) ### Break up text into Pseudosentences # this list maps pseudosentence index to beginning token index ps_bounds = list(range(0, len(normed_text), self.w)) pseudosents = [normed_text[i:i + self.w] for i in ps_bounds] # discard pseudosents of length < self.w if len(pseudosents[-1]) < self.w: del pseudosents[-1] ### Group into blocks and calculate sim scores # List[Tuple(sim score, pseudosent index)] # here, the index is of the first PS in block_b sims = self.calculate_sims(pseudosents) ### Find boundaries (valleys) pred = [] for j in range(0, len(sims)): if j != 0 and j != len(sims) - 1: if sims[j] < sims[j - 1] and sims[j] < sims[j + 1]: pred.append(j) j += 1 pred = [j + self.k for j in pred] ### Evalute # map pseudosentence indices to beginning token index pred_btokis = [ps_bounds[i] for i in pred] # map beginning token index to closest sentence index # (this token is closest to the beginning of which sentence?) pred_sentis = [ self.btoki_to_senti(t, sent_bounds) for t in pred_btokis ] # add last boundary (which we know is always there) pred_sentis += [len(sent_bounds)] gold_sentis = sample.get_sent_bound_idxs() pred = self.array_derivative(pred_sentis) gold = self.array_derivative(gold_sentis) pk = float(segeval.pk(pred, gold)) wd = float(segeval.window_diff(pred, gold)) bs = float(segeval.boundary_similarity(pred, gold, one_minus=True)) return (pk, wd, bs)
[anno_pred, anno_seg, anno_end] = getTextTilingBoundaries(os.path.join(text_dir, f)) anno_idx2range = convertFromIndex2Range(anno_idx, anno_end) print("-----") print(anno_end) print(anno_idx2range) print(anno_seg) print("----") print(anno_pred) print(anno_idx) anno_pred = set(anno_pred) anno_idx = set(anno_idx) union = len(anno_pred.union(anno_idx)) correct = len(anno_pred.intersection(anno_idx)) precision = 1.0 * correct / union recall = 1.0 * correct / len(anno_idx) avg_prec += precision avg_recall += recall print("%s %f %f" % (f, precision, recall)) wd = segeval.window_diff(anno_seg, anno_idx2range) pk = segeval.pk(anno_seg, anno_idx2range) avg_wd += wd avg_pk += pk print("WD: %f P-k: %f" % (wd, pk)) print("Average: %f %f WD: %f Pk: %f (%d)" % (avg_prec / (sel_files), avg_recall / (sel_files), avg_wd / decimal.Decimal(sel_files), avg_pk / decimal.Decimal(sel_files), (sel_files)))
from nltk.metrics.segmentation import pk, windowdiff import segeval as se import horae as ho import codecs if __name__ == '__main__': test = sys.argv[1] classifier = sys.argv[2] type_ = sys.argv[3] level = sys.argv[4] path_pred = "../data/test/seg/" + test + "_" + level + ".pred_" +\ classifier path_ref = "../data/test/choiformat/" + type_ + "/" + test + "_" +\ level + ".ref" ref, nbref1, refs = ho.load_text(path_ref) pred, nbpred1, preds = ho.load_text(path_pred) d = {"stargazer": {"1": refs, "2": preds}} seg1 = d['stargazer']['1'] seg2 = d['stargazer']['2'] segs1 = se.convert_positions_to_masses(seg1) segs2 = se.convert_positions_to_masses(seg2) print("pk\tWindowdiff: \n") print(str(round(se.pk(segs2, segs1), 4)) + "\t" + str(round(se.window_diff(segs2, segs1), 4)))
def evaluate(self, batch, preds, sent=True, word=True): """ For a given batch and its corresponding preds, get metrics batch: Batch instance preds: list Usage: >> from loader import * >> from modules import * >> >> model = TextSeg(lstm_dim=200, score_dim=200, bidir=True, num_layers=2) >> trainer = Trainer(model=model, train_dir='../data/wiki_727/train', val_dir='../data/wiki_50/test', batch_size=10, lr=1e-3) >> evalu = Metrics() >> >> batch = sample_and_batch(trainer.train_dir, trainer.batch_size, TRAIN=True) >> preds = trainer.predict_batch(batch) >> evalu(batch, preds) """ metric_dict = {} assert (sent or word), 'Missing: choose sent- and / or word-level evaluation.' # Word level if word: w_true, w_pred = self._word(batch, preds) metric_dict['w_pk'] = seg.pk(w_pred, w_true) metric_dict['w_wd'] = seg.window_diff(w_pred, w_true, lamprier_et_al_2007_fix=True) metric_dict['w_ss'] = seg.segmentation_similarity(w_pred, w_true) metric_dict['w_bs'] = seg.boundary_similarity(w_pred, w_true) w_confusion = seg.boundary_confusion_matrix(w_pred, w_true) metric_dict['w_precision'] = seg.precision(w_confusion) metric_dict['w_recall'] = seg.recall(w_confusion) metric_dict['w_f1'] = seg.fmeasure(w_confusion) # Sentence level if sent: s_true, s_pred = self._sent(batch, preds) metric_dict['s_pk'] = seg.pk(s_pred, s_true) metric_dict['s_wd'] = seg.window_diff(s_pred, s_true, lamprier_et_al_2007_fix=True) metric_dict['s_ss'] = seg.segmentation_similarity(s_pred, s_true) metric_dict['s_bs'] = seg.boundary_similarity(s_pred, s_true) s_confusion = seg.boundary_confusion_matrix(s_pred, s_true) metric_dict['s_precision'] = seg.precision(s_confusion) metric_dict['s_recall'] = seg.recall(s_confusion) metric_dict['s_f1'] = seg.fmeasure(s_confusion) return metric_dict
output_vector = [] gold_set_files = [] for directory in parsed_arguments.directories: gold_set_files.extend(match(directory, parsed_arguments.gold_sets)) for file in gold_set_files: segment_sizes = read_csv(file).segment_size output_vector.extend(segment_sizes) return output_vector def retrieve_result_set_vector(parsed_arguments): output_vector = [] result_set_files = [] for directory in parsed_arguments.directories: result_set_files.extend(match(directory, parsed_arguments.results)) for file in result_set_files: segment_sizes = read_csv(file).segment_size output_vector.extend(segment_sizes) return output_vector if __name__ == "__main__": parsed_arguments = setup_argument_parser() gold_set_values = retrieve_gold_set_vector(parsed_arguments) result_set_values = retrieve_result_set_vector(parsed_arguments) print "Printing Comparison Statistics:" print "P_k value: {}".format(pk(gold_set_values, result_set_values)) print "WindowDiff value: {}".format( window_diff(gold_set_values, result_set_values))
def get_Windiff_socre(reference, hypothesis): ref, hyp = __initialization(reference, hypothesis) # Evaluate algorithm using window diff metric return segeval.window_diff(ref, hyp)
def wd(hyp_seg, ref_seg): hyp_seg = segeval_converter(hyp_seg) ref_seg = segeval_converter(ref_seg) wd = window_diff(hyp_seg, ref_seg) return float(wd)
ap_vector = [ average_precision_score(rstr_best_real_group_vec == group_id, rstr_algo_group_vec == group_id) for group_id in range(1, max(rstr_real_group_vec) + 1) ] map = np.mean(ap_vector) # Segmentation evaluation real_segm_vec = convert_positions_to_masses(rstr_real_group_vec) algo_segm_vec = convert_positions_to_masses(rstr_algo_group_vec) rdm_group_vec = rstr_real_group_vec.copy() rdm.shuffle(rdm_group_vec) rdm_segm_vec = convert_positions_to_masses(rdm_group_vec) pk_res = pk(algo_segm_vec, real_segm_vec) win_diff = window_diff(algo_segm_vec, real_segm_vec) pk_rdm = pk(rdm_segm_vec, real_segm_vec) win_diff_rdm = window_diff(rdm_segm_vec, real_segm_vec) # Compute the aggregate labels df_results = pd.DataFrame(result_matrix) df_results["Token"] = token_list type_results = df_results.groupby("Token").mean() type_list = list(type_results.index) type_values = type_results.to_numpy() # ------------------------------------- # --- Writing # ------------------------------------- # Write html results
return output_files def retrieve_gold_set_vector(parsed_arguments): output_vector = [] gold_set_files = [] for directory in parsed_arguments.directories: gold_set_files.extend(match(directory, parsed_arguments.gold_sets)) for file in gold_set_files: segment_sizes = read_csv(file).segment_size output_vector.extend(segment_sizes) return output_vector def retrieve_result_set_vector(parsed_arguments): output_vector = [] result_set_files = [] for directory in parsed_arguments.directories: result_set_files.extend(match(directory, parsed_arguments.results)) for file in result_set_files: segment_sizes = read_csv(file).segment_size output_vector.extend(segment_sizes) return output_vector if __name__ == "__main__": parsed_arguments = setup_argument_parser() gold_set_values = retrieve_gold_set_vector(parsed_arguments) result_set_values = retrieve_result_set_vector(parsed_arguments) print "Printing Comparison Statistics:" print "P_k value: {}".format(pk(gold_set_values, result_set_values)) print "WindowDiff value: {}".format(window_diff(gold_set_values, result_set_values))