コード例 #1
0
ファイル: topic_tiling3.py プロジェクト: paulwcoles/msc_diss
def evaluate(predicted_boundaries, doc_name):
    evaluation = windowdiff(predicted_boundaries, gold_boundaries_set[doc_name], 3, boundary="BREAK",
                        weighted=False)
    print "Window Diff Score:\t %f\n" % evaluation
    evaluations[doc_name] = evaluation
    with open(log_dir + timestamp, 'a') as log:
        log.write("\n" + doc_name + "\t"*3 + str(evaluation))
コード例 #2
0
def make_distances(matches):
    """
        returns the matches with additional distance columns
    """
    df = matches.copy()

    df[DISTANCE_GEOMETRIC] = df.apply(lambda x: sd.euclidean(
        [x[MAN_START], x[MAN_END]], [x[AUTO_START], x[AUTO_END]]),
                                      axis=1)
    df[DISTANCE_EDIT] = df.apply(
        lambda x: distance.edit_distance(x[MAN_TEXT], x[AUTO_TEXT]), axis=1)
    df[DISTANCE_HAMMING] = df.apply(
        lambda x: segmentation.ghd(*to_segmentation_metric_form(
            [x[MAN_START], x[MAN_END]], [x[AUTO_START], x[AUTO_END]])),
        axis=1)
    # windowfill k will be 1/2 average segment length
    df[DISTANCE_Windowdiff] = df.apply(
        lambda x: segmentation.windowdiff(*to_segmentation_metric_form(
            [x[MAN_START], x[MAN_END]], [x[AUTO_START], x[AUTO_END]],
            return_window_size=True)),
        axis=1)
    df[DISTANCE_PK] = df.apply(
        lambda x: segmentation.pk(*to_segmentation_metric_form(
            [x[MAN_START], x[MAN_END]], [x[AUTO_START], x[AUTO_END]])),
        axis=1)
    return df
コード例 #3
0
def get_standard_metrics(gt, pred, msn):
    gt_segs = ''.join(['1' if i in gt else '0' for i in range(msn)])
    pred_segs = ''.join(['1' if i in pred else '0' for i in range(msn)])
    k_val = int(round(len(gt_segs) / (gt_segs.count('1') * 2.0)))
    k_val = k_val // 4
    return seg.pk(gt_segs, pred_segs, k=k_val), seg.windowdiff(gt_segs,
                                                               pred_segs,
                                                               k=k_val)
コード例 #4
0
def compute_segmentation_scores(reference, results, k):
    """
    Compute WindowDiff, Beeferman's Pk and Generalized Hamming Distance
    """

    window_diff = float(windowdiff(reference, results, k, boundary="T")) / len(reference)
    bpk = pk(reference, results, boundary="T")
    generalized_hamming_distance = ghd(reference, results, boundary="T") / len(reference)

    return window_diff, bpk, generalized_hamming_distance
コード例 #5
0
def compute_segmentation_scores(reference, results, k):
    """
    Compute WindowDiff, Beeferman's Pk and Generalized Hamming Distance
    """

    window_diff = float(windowdiff(reference, results, k,
                                   boundary="T")) / len(reference)
    bpk = pk(reference, results, boundary="T")
    generalized_hamming_distance = ghd(reference, results,
                                       boundary="T") / len(reference)

    return window_diff, bpk, generalized_hamming_distance
コード例 #6
0
ファイル: utils.py プロジェクト: laic/discourse
def get_seg_scores(y_test, pred_class, evalk=3):
        targetstr = "".join([str(int(x)) for x in y_test])
        predstr = "".join([str(int(x)) for x in pred_class])
        logger.debug(targetstr[0:50])
        logger.debug(predstr[0:50])

        wd =  windowdiff(targetstr, predstr, k=evalk)
        pkval =  pk(targetstr, predstr, k=evalk)
        print "PK: %f" % pkval
        print "WD: %f" % wd

        return pkval, wd
コード例 #7
0
ファイル: get-kk.py プロジェクト: laic/discourse
def get_pk_wd(x, k=3, conv=None):
    # logger.debug("evalk %f: " % evalk)
    if conv == None:
        conv = x["conv"].iloc[0]
    targets = "0" * k + "".join(x["target"].astype(str)) + "0" * k
    preds = "0" * k + "".join(x["pred"].astype(str)) + "0" * k
    wd = windowdiff(targets, preds, k=k)
    pkval = pk(targets, preds, k=k)

    # 	print "PK: %f" % pkval
    # 	print "WD: %f" % wd

    return pd.DataFrame({"conv": conv, "PK": pkval, "WD": wd}, index=[conv])
コード例 #8
0
def score(predicts, labels, windowsize=2, type=1):
    '''
    sample_num * conversation_length
    list of numpy arrays
    :param predicts:
    :param masks:
    :param labels:
    :param type: 1 -- origianl dataset, the windowsize is appropriate; 0 -- augmented datset, the windowsize may be wrong
    :return: windowdiff pk F1-macro
    '''

    acc = 0
    f1_macro = 0
    f1_micro = 0
    windiff = 0
    pkk = 0
    for i in range(len(predicts)):

        predict_str = ''.join(str(x) for x in list(predicts[i]))
        label_str = ''.join(str(x) for x in list(labels[i]))
        acc += np.sum(np.equal(predicts[i], labels[i])) / len(predicts[i])
        f1_macro += f1_score(labels[i], predicts[i], average='macro')
        f1_micro += f1_score(labels[i], predicts[i], average='micro')

        if type:
            windiff += windowdiff(label_str, predict_str, windowsize)
            pkk += pk(label_str, predict_str, windowsize)

    acc = acc / len(predicts)
    f1_macro = f1_macro / len(predicts)
    f1_micro = f1_micro / len(predicts)
    if type:
        windiff = windiff / len(predicts)
        pkk = pkk / len(predicts)

    score = {
        "windowdiff": windiff,
        "pk": pkk,
        "F1-macro": f1_macro,
        "acc": acc
    }

    return score
コード例 #9
0
def evaluate(gold_idx, pred_idx, k):
    """
    gold_idx: golden standart of segmentation in the following format: list of lists of indexes
    pred_idx: predicted segmentation of the text in the following format: list of lists of indexes
    k: window size (preferrably half of the document length divided by the number of gold segments)
    return: pk (Beeferman D., Berger A., Lafferty J. (1999)) and windowdiff (Pevzner, L., and Hearst, M (2002)) 
    metrics for the prediction (less the better)
    """
    gold_idx = [[str(0) for i in j] for j in gold_idx]
    gold = []
    for i in gold_idx:
        i[-1] = "1"
        gold.extend(i)
    gold = "".join(gold)
    pred_idx = [[str(0) for i in j] for j in pred_idx]
    pred = []
    for i in pred_idx:
        i[-1] = "1"
        pred.extend(i)
    pred = "".join(pred)
    return {'pk': pk(gold, pred, k), 'windowdiff': windowdiff(gold, pred, k)}
コード例 #10
0
def evaluate_text_tiling(data):
    text_tiler = TextTiling()
    X = data[0]
    y = data[1]
    window_diffs = []
    for index, lecture_text in enumerate(X):
        boundaries = text_tiler.segment_text(lecture_text)
        ground_truth_boundaries = y[index]
        pred_boundaries = ''.join(str(boundary) for boundary in boundaries)
        ground_truth_boundaries = ''.join(
            str(boundary) for boundary in ground_truth_boundaries)
        k = int(
            len(ground_truth_boundaries) /
            float(2.0 * ground_truth_boundaries.count('1') + 1.0))
        window_diff_score = windowdiff(pred_boundaries,
                                       ground_truth_boundaries, k)
        window_diffs.append(window_diff_score)

    avg_window_diff_score = np.mean(np.array(window_diffs))
    print("Average window diff score:", avg_window_diff_score)
    return avg_window_diff_score
コード例 #11
0
def evaluate_segmentation(bc3=False, limit=0):
    g = data_to_string(WAPITI_GOLD_FILE, limit=limit)  # gold string
    r = data_to_string(WAPITI_RESULT_FILE, limit=limit)  # result string

    if bc3:
        t = data_to_string(BC3_TEXT_TILING_FILE, limit=limit,
                           label_position=0)  # text tiling baseline string
    else:
        t = data_to_string(WAPITI_GOLD_FILE, limit=limit, label_position=-2)

    avg = float(len(g)) / (g.count("T") + 1)  # average segment size
    k = int(avg / 2)  # window size for WindowDiff

    b = ("T" + (int(math.floor(avg)) - 1) * ".") * int(
        math.ceil(float(len(g)) / int(math.floor(avg))))
    b = b[:len(g)]  # baseline string

    print(g[:150])
    print(r[:150])

    # WindowDiff
    wdi = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100

    # Beeferman's Pk
    bpk = (pk(g, r, boundary="T")) * 100

    # Generalized Hamming Distance
    ghd = (GHD(g, r, boundary="T") / len(g)) * 100

    # accuracy
    acc = accuracy(list(g), list(r)) * 100

    # precision, recall, f-measure
    pre = metrics.precision_score(list(g), list(r)) * 100
    rec = metrics.recall_score(list(g), list(r)) * 100
    f_1 = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs)

    return acc, pre, rec, f_1, wdi, bpk, ghd, g.count("T"), r.count("T")
コード例 #12
0
def evaluate_segmentation(bc3=False, limit=0):
    g = data_to_string(WAPITI_GOLD_FILE, limit=limit) # gold string
    r = data_to_string(WAPITI_RESULT_FILE, limit=limit) # result string

    if bc3:
        t = data_to_string(BC3_TEXT_TILING_FILE, limit=limit, label_position=0) # text tiling baseline string
    else:
        t = data_to_string(WAPITI_GOLD_FILE, limit=limit, label_position=-2)

    avg = float(len(g)) / (g.count("T") + 1) # average segment size
    k = int(avg / 2) # window size for WindowDiff

    b = ("T" + (int(math.floor(avg)) - 1) * ".") * int(math.ceil(float(len(g)) / int(math.floor(avg))))
    b = b[:len(g)] # baseline string

    print(g[:150])
    print(r[:150])

    # WindowDiff
    wdi = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100

    # Beeferman's Pk
    bpk = (pk(g, r, boundary="T")) * 100

    # Generalized Hamming Distance
    ghd = (GHD(g, r, boundary="T") / len(g)) * 100

    # accuracy
    acc = accuracy(list(g), list(r)) * 100

    # precision, recall, f-measure
    pre = metrics.precision_score(list(g), list(r)) * 100
    rec = metrics.recall_score(list(g), list(r)) * 100
    f_1 = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs)

    return acc, pre, rec, f_1, wdi, bpk, ghd, g.count("T"), r.count("T")
コード例 #13
0
ファイル: topic_tiling.py プロジェクト: paulwcoles/msc_diss
        elif type(seg_method) is int:
            threshold = (np.mean(non_zero_depth_scores) - np.std(non_zero_depth_scores)) / 2
            for depth_score in depth_scores:
                if depth_score > threshold and depth_score != 0.0:
                    predicted_boundaries.append("BREAK")
                    boundary_count += 1
                else:
                    predicted_boundaries.append(None)
        # There is one more boundary than there are depth scores
        predicted_boundaries.append(None)
        reverse_parse(doc_name, predicted_boundaries)
        test_index += 1
        # print "%i boundaries predicted, %i gold boundaries." % (boundary_count, gold_count)

        if mode == 'evaluate':
            evaluation = windowdiff(predicted_boundaries, gold_boundaries_set[doc_name], 3, boundary="BREAK",
                                    weighted=False)
            print "Window Diff Score:\t %f\n" % evaluation
            evaluations[doc_name] = evaluation
            with open(log_dir + timestamp, 'a') as log:
                log.write(doc_name + "\t"*3 + str(evaluation))

        elif mode == 'resolve':
            print "Resolving topic timings..."
            utt_start_times = get_utt_timing(doc_name)
            topic_start_times = [utt_start_times[0]]  # Initialise with start time of first utterance
            for index in xrange(len(predicted_boundaries)):
                if predicted_boundaries[index] is not None:
                    topic_start_times.append(utt_start_times[index])
            prg_name = doc_name.replace("_parsed","")
            prg_name = prg_name.replace(".txt",".wav")
            topic_timings[prg_name] = topic_start_times
コード例 #14
0
ファイル: analysis.py プロジェクト: hamedn/CrowdFeedback

		predicted_seg="";
		for row in d:
			if (row[3] == .1):
				predicted_seg += "1";
				print( "<---------------------TOPIC CHANGE HERE----------------------->");
			else:
				predicted_seg+="0"
			print(sentences[int(row[0])])
		
		s1 = real_segs.strip();
		s2 = predicted_seg.strip();
		print(s1);
		print(s2);
		difs.append(windowdiff(s1, s2, W))



	print(difs)
	avg = float(sum(difs))/len(difs) if len(difs) > 0 else float('nan')
	print("AVERAGE WINDOW: " + str(avg));
	plt.show()
	#x(block1_lda[0].__class__.__name__);

	#for sentence in sentences:
	#	doc_lda = lda[reviewc.dictionary.doc2bow(reviewc.proc(sentence))];
	#	print(doc_lda)
	#	print (sentence);
	#	print "\n--------\n"
コード例 #15
0
    bayes_boundaries = [
        1, 72, 102, 103, 104, 105, 130, 131, 144, 158, 234, 235, 248
    ]
    perfect_boundaries = [
        4, 21, 30, 49, 72, 104, 127, 131, 146, 169, 220, 225, 237
    ]
    # bayes_boundaries = [13, 14, 15, 16, 21, 69, 106, 170, 171, 172, 222, 233, 248]

    max_i = max(sum([bayes_boundaries, perfect_boundaries], []))

    bayes_seg = bound2seg(bayes_boundaries, max_i)
    perfect_seg = bound2seg(perfect_boundaries, max_i)

    k = int(max_i / (2 * (len(perfect_boundaries))))  # halved avg segment size

    print("wd bayes: ", windowdiff(bayes_seg, perfect_seg, k=k))

    tdf = pd.read_pickle(
        "../processed_transcripts/joe_rogan_elon_musk.pkl")[0:max_i]

    te = TopicExtractor()

    topic_ranges = get_topic_ranges(tdf)

    tr_tuples = [(topic, tr) for topic, trs in topic_ranges.items()
                 for tr in trs]
    tr_tuples = sorted(tr_tuples,
                       key=lambda x: x[1][1] - x[1][0],
                       reverse=True)

    geek_bounds = get_geek_bounds(tr_tuples)
コード例 #16
0
ファイル: get-pk.py プロジェクト: laic/discourse
					#print i, sline, prevdoc, currdoc
					if not prevdoc == None:
						doclens.append(currlen)
					prevdoc = currdoc  
					currlen = 1 
				else:
					currlen += 1
			
			#if i > 400:
			#	break 

	#print targets
	#print preds
	#print doclens
	logger.debug("ndocs: %d" % len(doclens))
	evalk = int(round(numpy.average(doclens)/2))
	logger.debug("evalk %f: " % evalk)
	wd =  windowdiff(targets, preds, k=evalk)
	#logger.debug("WD: %f" % wd)
	pkval =  pk(targets, preds, k=evalk)
	#logger.debug("PK: %f" % pkval)

	fstem = os.path.basename(options.input)
	with open(options.outfile, "w") as f:
		f.write(fstem + "\tPK\t" + str(pkval) + "\n") 
		f.write(fstem + "\tWD\t" + str(wd) + "\n") 

	print "PK: %f" % pkval
	print "WD: %f" % wd

コード例 #17
0
def evaluate_segmentation(bc3=False, limit=-1):
    d = "".join(data_to_list(WAPITI_TRAIN_FILE)) # training data
    g = "".join(data_to_list(WAPITI_GOLD_FILE, limit=limit)) # gold string
    temp_r = data_to_list(WAPITI_RESULT_FILE, limit=limit) # result string
    # n = data_to_list("var/union/ngrams_" + WAPITI_RESULT_FILE[-1], limit=limit)
    
    # scores = {}
    r = ""

    for i, col in enumerate(temp_r):
        # score = 0

        # if n[i][:n[i].index("/")] == "T":
        #     score = 1
        # elif col[:col.index("/")] == "T":
        #     score = float(col[col.index("/") + 1:])

        # scores[i] = 
        r += col[:col.index("/")]

    # sorted_indexes = sorted(scores, key=scores.get, reverse=True)
    # indexes = [index for index, score in scores.iteritems() if score > 0.99]

    # r = "." * len(g)

    # n_boundaries = int((float(g.count("T")) / len(g)) * len(g))

    # for i, index in enumerate(sorted_indexes):
    #     r = r[:index] + "T" + r[index + 1:]
    #     if i == n_boundaries:
    #         break
    
    # for index in indexes:
    #     r = r[:index] + "T" + r[index+1:]

    if bc3:
        t = data_to_list(BC3_TEXT_TILING_FILE, limit=limit, label_position=0) # text tiling baseline string
    else:
        t = data_to_list(WAPITI_GOLD_FILE, limit=limit, label_position=-2)

    avg_g = float(len(g)) / (g.count("T") + 1) # average segment size (reference)
    avg_d = float(len(d)) / (d.count("T") + 1) # average segment size (training)

    k = int(avg_g / 2) # window size for WindowDiff

    b = ("T" + (int(math.floor(avg_d)) - 1) * ".") * int(math.ceil(float(len(d)) / int(math.floor(avg_d))))
    b = b[:len(g)] # baseline string

    # WindowDiff
    wdi_rs = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100
    wdi_bl = (float(windowdiff(g, b, k, boundary="T")) / len(g)) * 100
    wdi_tt = (float(windowdiff(g, t, k, boundary="T")) / len(g)) * 100

    # Beeferman's Pk
    bpk_rs = (pk(g, r, boundary="T")) * 100
    bpk_bl = (pk(g, b, boundary="T")) * 100
    bpk_tt = (pk(g, t, boundary="T")) * 100

    # Generalized Hamming Distance
    ghd_rs = (ghd(g, r, boundary="T") / len(g)) * 100
    ghd_bl = (ghd(g, b, boundary="T") / len(g)) * 100
    ghd_tt = (ghd(g, t, boundary="T") / len(g)) * 100

    # accuracy
    acc_rs = accuracy(list(g), list(r)) * 100
    acc_bl = accuracy(list(g), list(b)) * 100
    acc_tt = accuracy(list(g), list(t)) * 100

    # precision, recall, f-measure
    pre_rs = metrics.precision_score(list(g), list(r), pos_label="T") * 100
    rec_rs = metrics.recall_score(list(g), list(r), pos_label="T") * 100
    f_1_rs = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs)

    pre_bl = metrics.precision_score(list(g), list(b), pos_label="T") * 100
    rec_bl = metrics.recall_score(list(g), list(b), pos_label="T") * 100
    f_1_bl = (2.0 * (rec_bl * pre_bl)) / (rec_bl + pre_bl)
    
    pre_tt = metrics.precision_score(list(g), list(t), pos_label="T") * 100
    rec_tt = metrics.recall_score(list(g), list(t), pos_label="T") * 100
    f_1_tt = (2.0 * (rec_tt * pre_tt)) / (rec_tt + pre_tt)

    return acc_rs, acc_bl, acc_tt, pre_rs, pre_bl, pre_tt, rec_rs, rec_bl, rec_tt, f_1_rs, f_1_bl, f_1_tt, wdi_rs, wdi_bl, wdi_tt, bpk_rs, bpk_bl, bpk_tt, ghd_rs, ghd_bl, ghd_tt, g.count("T"), b.count("T"), r.count("T"), t.count("T")
コード例 #18
0
ファイル: TopicSegmenter.py プロジェクト: laic/discourse
				logger.info(tt.startids)
				logger.info(tt.nsents)
				goldseg = numpy.zeros(tt.nsents+1)
				goldseg[tt.startids] = 1
				goldstr = "".join([str(int(x)) for x in goldseg[1:]])

				logger.info(predstr)
				logger.info(goldstr)

				if len(tt.startids) > 1:
					curr_doc_sizes = numpy.array(tt.startids[1:]) - numpy.array(tt.startids[:-1])	
					#evalk = int(round(numpy.average(curr_doc_sizes)/2))
					evalk = 3
					logger.debug("eval k: %d" % evalk)
					try:
						wd =  windowdiff(goldstr, predstr, k=evalk)	
						logger.info("WD: %f", wd)
						wds.append(wd)
					except ValueError as e:
						logger.error("windowdiff value error")
						logger.error(e)

					try:
						pkval =  pk(goldstr, predstr, k=evalk)	
						logger.info("PK: %f", pkval)
						pks.append(pkval)
					except ValueError as e:
						logger.error("pkval value error")
						logger.error(e)