Esempio n. 1
0
def evaluate_segmentation(result_file, gold_file, train_file, limit=-1, base_result_file=False, smart_combine=True):
    """
    Compute scores for the current fold
    """

    d = data_to_list(train_file) # training label list
    g = data_to_list(gold_file, limit=limit) # gold label list
    t = data_to_list(result_file, limit=limit, label_position=-3) # TextTiling label list

    result_data = data_to_list(result_file, limit=limit, label_position=-2)

    if base_result_file:
        base_result_data = data_to_list(base_result_file, limit=limit, label_position=-2)
        result_data = data_to_list(result_file, limit=limit, label_position=-1)

        max_boundaries = int(d.count("T") * (float(len(g)) / len(d))) if smart_combine else -1

        r = combine_results(result_data, base_result_data, max_boundaries=max_boundaries) # result label list
    else:
        r = data_to_list(result_file, limit=limit, label_position=-2) # result label list

    avg_g = float(len(g)) / (g.count("T") + 1) # average segment size (reference)
    avg_d = float(len(d)) / (d.count("T") + 1) # average segment size (training)

    k = int(avg_g / 2) # window size for WindowDiff

    b = list("T" + (int(ceil(avg_d) - 1) * "F")) * int(float(len(g)) / avg_d)
    b = b[:len(g)] # baseline label list

    ########################################

    # WindowDiff, Beeferman's Pk, Generalized Hamming Distance
    wdi_rs, bpk_rs, ghd_rs = compute_segmentation_scores(g, r, k)
    wdi_bl, bpk_bl, ghd_bl = compute_segmentation_scores(g, b, k)
    wdi_tt, bpk_tt, ghd_tt = compute_segmentation_scores(g, t, k)

    # accuracy
    acc_rs = accuracy(g, r)
    acc_bl = accuracy(g, b)
    acc_tt = accuracy(g, t)

    # precision, recall, f-measure
    pre_rs, rec_rs, f_1_rs = compute_ir_scores(g, r)
    pre_bl, rec_bl, f_1_bl = compute_ir_scores(g, b)
    pre_tt, rec_tt, f_1_tt = compute_ir_scores(g, t)

    ########################################

    return (
        acc_rs, acc_bl, acc_tt, 
        pre_rs, pre_bl, pre_tt, 
        rec_rs, rec_bl, rec_tt, 
        f_1_rs, f_1_bl, f_1_tt, 
        wdi_rs, wdi_bl, wdi_tt, 
        bpk_rs, bpk_bl, bpk_tt, 
        ghd_rs, ghd_bl, ghd_tt, 
        g.count("T"), b.count("T"), r.count("T"), t.count("T")
    )
Esempio n. 2
0
def run_test():
    real = []
    result = []
    vectorizer = TfidfVectorizer()

    start = time.time()
    for answer_id, question in test_cases.items():
        # for t in range(1, 31):
        #     print(f'Running test {t}')
        #     answer_id = str(t)
        #     question = test_cases[answer_id]
        print(f'Running test on id {answer_id}')
        corpus.append(question)
        real.append(answer_id)
        tfidf = vectorizer.fit_transform(corpus)
        question_array = tfidf[-1, :].toarray()[0]
        best_similarity = -1
        best_id = 0

        for i in range(tfidf.shape[0] - 1):
            comparing = tfidf[i, :].toarray()[0]
            similarity = 0
            for j in range(len(question_array)):
                similarity += question_array[j] * comparing[j]
                if similarity > best_similarity:
                    best_similarity = similarity
                    best_id = id_column[i]

        result.append(best_id)
        del corpus[-1]

    print("Accuracy:", accuracy(real, result))
    print(f'Duration: {time.time() - start}')
def test_nltk_classifier():
	tagged_words = pos_tag(pure_tokens)
	nltk_unformatted_prediction = ne_chunk(tagged_words)
	# Convert prediction to multiline string and then to list (includes pos tags)
	multiline_string = chunk.tree2conllstr(nltk_unformatted_prediction)
	listed_pos_and_ne = multiline_string.split()

	# Delete pos tags and rename
	del listed_pos_and_ne[1::3]
	listed_ne = listed_pos_and_ne

	# Amend class annotations for consistency with reference_annotations
	for n,i in enumerate(listed_ne):
		if i == "B-PERSON":
			listed_ne[n] = "PERSON"
		if i == "I-PERSON":
			listed_ne[n] = "PERSON"
		if i == "B-ORGANIZATION":
			listed_ne[n] = "ORGANIZATION"
		if i == "I-ORGANIZATION":
			listed_ne[n] = "ORGANIZATION"
		if i == "B-LOCATION":
			listed_ne[n] = "LOCATION"
		if i == "I-LOCATION":
			listed_ne[n] = "LOCATION"
		if i == "B-GPE":
			listed_ne[n] = "LOCATION"
		if i == "I-GPE":
			listed_ne[n] = "LOCATION"

	# Group prediction into tuples
	nltk_formatted_prediction = list(group(listed_ne, 2))
	nltk_accuracy = accuracy(reference_annotations, nltk_formatted_prediction)
	print(nltk_accuracy)
	return nltk_accuracy
Esempio n. 4
0
def tfidf_cosine(data):
    q_train, a_train, q_test, a_test = data
    best_answers = []
    sentences = q_train[:]
    for test_q in q_test:
        best = 0.25
        a_id = '0'
        el_index = 0
        sentences.append(test_q)

        # Tf-idf-weighted document-term matrix.
        # one sentences per line, one term per column
        tfidf = TfidfVectorizer().fit_transform(sentences)

        # compute cosine similarity between the query and all other sentences
        vals = cosine_similarity(tfidf[-1], tfidf[:-1])[0]

        # get index of highest similarity
        a_id = vals.argmax()

        # compute if similarity is significant
        # otherwise, query is not recognized
        if (vals[a_id] < best):
            best_answers.append('0')
        else:
            best_answers.append(a_train[a_id])

        sentences = sentences[:-1]

    #print("Accuracy : ", accuracy(a_test, best_answers))
    return accuracy(a_test, best_answers)
Esempio n. 5
0
    def calculate_metrics(self):
        included_logs = 0
        metrics = {}
        cc = SmoothingFunction()
        for identifier in self._values:
            if self._values[identifier].get('target_text', None) is not None:
                included_logs += 1
                target_text = self._values[identifier]['target_text']
                output_text = self._values[identifier]['output_text']
                metrics['BLEU'] = metrics.get('BLEU', 0) + sentence_bleu(
                    [target_text], output_text, smoothing_function=cc.method4)
                metrics['accuracy'] = metrics.get('accuracy', 0) + accuracy(
                    target_text, output_text)
                target_text = set(target_text)
                output_text = set(output_text)
                metrics['precision'] = metrics.get('precision', 0) + precision(
                    target_text, output_text)
                metrics['recall'] = metrics.get('recall', 0) + recall(
                    target_text, output_text)
                metrics['f_measure'] = metrics.get('f_measure', 0) + f_measure(
                    target_text, output_text)

        if included_logs != 0:
            for metric in metrics:
                metrics[metric] /= included_logs

        return metrics, included_logs
def test_stanford_classifier():

	st = StanfordNERTagger('../stanford_ner/classifiers/english.all.3class.distsim.crf.ser.gz',
							'../stanford_ner/stanford-ner.jar', encoding='utf-8')
	stanford_prediction = st.tag(pure_tokens)
	stanford_accuracy = accuracy(reference_annotations, stanford_prediction)
	print(stanford_accuracy)
	return stanford_accuracy
Esempio n. 7
0
def demo_eval(alignments, gold_file):
    """
    
    """
    test_values = alignment_util.get_test_values(alignments)
    
    reference_values = alignment_util.get_reference_values(gold_file)
         
    accuracy = scores.accuracy(reference_values, test_values)
    
    print "accuracy: %.2f" % accuracy   
Esempio n. 8
0
 def test(self, test_sequence, **kwargs):
     feature_detector = kwargs.get('feature_detector')        
     def __tags(token):
         return token[-1]
     def __untag(token):
         return token[0]
     def __featurize(tagged_token):
         tag = tagged_token[-1]
         feats = feature_detector(tagged_token)
         return (feats, tag)
     count = sum([len(sent) for sent in test_sequence])
     correct_tags = LazyMap(__tags, test_sequence)        
     untagged_sequence = LazyMap(__untag, LazyMap(__featurize, test_sequence))
     predicted_tags = LazyMap(self.classify, untagged_sequence)
     acc = accuracy(correct_tags, predicted_tags)
     print 'accuracy over %d tokens: %.2f' % (count, acc)
Esempio n. 9
0
def run_test():
    column_id = {}
    column = 0
    corpus = []

    for answer_id, questions in id_questions.items():
        for question in questions:
            corpus.append(question)
            column_id[column] = answer_id
            column += 1

    real = []
    result = []
    vectorizer = TfidfVectorizer()
    start = time.time()
    print(len(test_cases))
    for i in range(1, 542):
        # for answer_id, question in test_cases.items():
        answer_id = str(i)
        question = test_cases[answer_id]
        print(f'Running test on {answer_id}')
        real.append(answer_id)
        corpus.append(preprocess_sentence(question))
        tfidf_matrix = vectorizer.fit_transform(corpus)
        user_input = tfidf_matrix[-1, :].toarray()[0]
        best_similarity = -1
        best_id = 0

        for i in range(tfidf_matrix.shape[1] - 1):
            comparing = tfidf_matrix[i, :].toarray()[0]
            similarity = 0
            for j in range(len(user_input)):
                similarity += user_input[j] * comparing[j]
                if similarity > best_similarity:
                    best_similarity = similarity
                    best_id = column_id[i]

        result.append(best_id)
        del corpus[-1]

    print("Accuracy:", accuracy(real, result))
    print(f'Duration: {time.time() - start}')
Esempio n. 10
0
def main():

    # Splits desen and train in 2: tags and sentences

    # Process desen
    listaTagsDesenvolvimento = extrai('Corpora/dist-desen-sem-acentos.txt', 1)
    listaFrasesDesenvolvimento = extrai('Corpora/dist-desen-sem-acentos.txt',
                                        2)

    # Process treino
    listaTagsTreino = extrai('Corpora/dist-treino.txt', 1)
    listaFrasesTreino = extrai('Corpora/dist-treino.txt', 2)

    #----- Pre-processing-----
    listaFrasesDesenvolvimento = preProc(listaFrasesDesenvolvimento)
    listaFrasesTreino = preProc(listaFrasesTreino)

    #----- Remove stopWords-----
    listaFrasesDesenvolvimento = removeStopWords(listaFrasesDesenvolvimento,
                                                 stopWords)
    listaFrasesTreino = removeStopWords(listaFrasesTreino, stopWords)

    #----- Stemming -----
    listaFrasesDesenvolvimento = tokStem(listaFrasesDesenvolvimento)
    listaFrasesTreino = tokStem(listaFrasesTreino)

    # Call the main function
    listaTagsEstimada = mainFunction(listaTagsTreino, listaFrasesTreino,
                                     listaFrasesDesenvolvimento)[0]
    fraseMaisProxima = mainFunction(listaTagsTreino, listaFrasesTreino,
                                    listaFrasesDesenvolvimento)[1]

    # Show results
    for a, b, c, d in zip(listaFrasesDesenvolvimento, listaTagsEstimada,
                          listaTagsDesenvolvimento, fraseMaisProxima):
        print("Sentence to evaluate: ", a)
        print("Suggested Tag: ", b)
        print("Correct Tag: ", c)
        print("Closest sentence: ", d, "\n\n")

    # Find accuracy
    print("Accuracy:", accuracy(listaTagsDesenvolvimento, listaTagsEstimada))
Esempio n. 11
0
def jaccard_distance(data):
    q_train, a_train, q_test, a_test = data
    best_answers = []
    for test_q in q_test:
        best = 1
        a_id = 0
        el_index = 0

        for el_index in range(len(q_train)):

            if set(test_q.split()) != 0 and set(q_train[el_index].split()):
                res_aux = nltk.jaccard_distance(set(test_q.split()),
                                                set(q_train[el_index].split()))

                if res_aux < best:
                    best = res_aux
                    a_id = a_train[el_index]
        best_answers.append(a_id)

    return accuracy(a_test, best_answers)
Esempio n. 12
0
def get_measures(reference, test):
    tp = tn = fp = fn = 0

    for ((_, r), (_, t)) in zip(reference, test):
        if r == t == "O":
            tn += 1
        elif r == t == "ORG":
            tp += 1
        elif r == "O" and t == "ORG":
            fp += 1
        elif r == "ORG" and t == "O":
            fn += 1
    matrix = [tp, tn, fp, fn]
    acc = accuracy(reference, test)
    reference_set = set(reference)
    test_set = set(test)
    pre = precision(reference_set, test_set)
    rec = recall(reference_set, test_set)
    f = f_measure(reference_set, test_set)
    return acc, pre, rec, f, matrix
def evaluate_segmentation(bc3=False, limit=0):
    g = data_to_string(WAPITI_GOLD_FILE, limit=limit)  # gold string
    r = data_to_string(WAPITI_RESULT_FILE, limit=limit)  # result string

    if bc3:
        t = data_to_string(BC3_TEXT_TILING_FILE, limit=limit,
                           label_position=0)  # text tiling baseline string
    else:
        t = data_to_string(WAPITI_GOLD_FILE, limit=limit, label_position=-2)

    avg = float(len(g)) / (g.count("T") + 1)  # average segment size
    k = int(avg / 2)  # window size for WindowDiff

    b = ("T" + (int(math.floor(avg)) - 1) * ".") * int(
        math.ceil(float(len(g)) / int(math.floor(avg))))
    b = b[:len(g)]  # baseline string

    print(g[:150])
    print(r[:150])

    # WindowDiff
    wdi = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100

    # Beeferman's Pk
    bpk = (pk(g, r, boundary="T")) * 100

    # Generalized Hamming Distance
    ghd = (GHD(g, r, boundary="T") / len(g)) * 100

    # accuracy
    acc = accuracy(list(g), list(r)) * 100

    # precision, recall, f-measure
    pre = metrics.precision_score(list(g), list(r)) * 100
    rec = metrics.recall_score(list(g), list(r)) * 100
    f_1 = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs)

    return acc, pre, rec, f_1, wdi, bpk, ghd, g.count("T"), r.count("T")
Esempio n. 14
0
def evaluate_segmentation(bc3=False, limit=0):
    g = data_to_string(WAPITI_GOLD_FILE, limit=limit) # gold string
    r = data_to_string(WAPITI_RESULT_FILE, limit=limit) # result string

    if bc3:
        t = data_to_string(BC3_TEXT_TILING_FILE, limit=limit, label_position=0) # text tiling baseline string
    else:
        t = data_to_string(WAPITI_GOLD_FILE, limit=limit, label_position=-2)

    avg = float(len(g)) / (g.count("T") + 1) # average segment size
    k = int(avg / 2) # window size for WindowDiff

    b = ("T" + (int(math.floor(avg)) - 1) * ".") * int(math.ceil(float(len(g)) / int(math.floor(avg))))
    b = b[:len(g)] # baseline string

    print(g[:150])
    print(r[:150])

    # WindowDiff
    wdi = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100

    # Beeferman's Pk
    bpk = (pk(g, r, boundary="T")) * 100

    # Generalized Hamming Distance
    ghd = (GHD(g, r, boundary="T") / len(g)) * 100

    # accuracy
    acc = accuracy(list(g), list(r)) * 100

    # precision, recall, f-measure
    pre = metrics.precision_score(list(g), list(r)) * 100
    rec = metrics.recall_score(list(g), list(r)) * 100
    f_1 = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs)

    return acc, pre, rec, f_1, wdi, bpk, ghd, g.count("T"), r.count("T")
Esempio n. 15
0
    print "Loading test data..."
    testset = np.load(path)

    # Load model
    print "Loading model..."
    with open(model, 'rb') as fmodel:
        cls = pickle.load(fmodel)

    # Run test
    sys.stdout.write("Testing:")
    pred = []
    idx = 0
    for i in testset[:, 0]:
        idx += 1
        if idx % 1000 == 0:
            sys.stdout.write(".")
            sys.stdout.flush()
        pred.append(str(cls.classify(i)))

    # Result
    #  * Convert Ref Label to ASCII
    ref = [str(label) for label in testset[:, 1]]
    accuracy = scores.accuracy(ref, pred)
    print "\nAccuracy: %.4f" % accuracy
    cm = ConfusionMatrix(ref, pred)
    print "Confusion Matrix: "
    print (cm.pretty_format(sort_by_count=True, show_percents=True, truncate=9))

    # Finished?
    print "DONE!!"
Esempio n. 16
0
# Features
top_entry = json_response[0]
true_matches = [bool(song['Match']) for song in json_response[1:]]

FEATURE = 'SongName'
NGRAMS = 2
top_entry_value = preproc(top_entry[FEATURE])
print 'Comparing song name to top match reference:', top_entry[FEATURE]
top_entry_word_bigrams = set(ngrams(word_tokenize(top_entry_value), NGRAMS))

matches = []
for song in json_response[1:]:

    this_value = preproc(song[FEATURE])
    print '\t%s' % song[FEATURE]

    this_word_bigrams = set(ngrams(word_tokenize(this_value), NGRAMS))
    wbg_distance = jaccard_distance(top_entry_word_bigrams, this_word_bigrams)
    print '\t\tWord bigrams + Jaccard:\t'+str(wbg_distance)

    is_this_match = is_match(wbg_distance)
    print '\t\tMatch?', is_this_match
    matches.append(is_this_match)

cm = ConfusionMatrix(true_matches, matches)
print 'Confusion matrix'
print cm

print 'Accuracy:', accuracy(true_matches, matches)
Esempio n. 17
0
        listed_ne[n] = "ORGANIZATION"
    if i == "I-ORGANIZATION":
        listed_ne[n] = "ORGANIZATION"
    if i == "B-LOCATION":
        listed_ne[n] = "LOCATION"
    if i == "I-LOCATION":
        listed_ne[n] = "LOCATION"
    if i == "B-GPE":
        listed_ne[n] = "LOCATION"
    if i == "I-GPE":
        listed_ne[n] = "LOCATION"

# Group prediction into tuples
nltk_formatted_prediction = list(group(listed_ne, 2))

nltk_accuracy = accuracy(reference_annotations, nltk_formatted_prediction)
print(nltk_accuracy)

st = StanfordNERTagger(
    '/usr/share/stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz',
    '/usr/share/stanford-ner/stanford-ner.jar',
    encoding='utf-8')
stanford_prediction = st.tag(pure_tokens)
stanford_accuracy = accuracy(reference_annotations, stanford_prediction)
print(stanford_accuracy)

style.use('fivethirtyeight')

N = 1
ind = np.arange(N)  # the x locations for the groups
width = 0.35  # the width of the bars
Esempio n. 18
0
def main():
    aparser = argparse.ArgumentParser(description='Daba disambiguator')
    aparser.add_argument('-v',
                         '--verbose',
                         help='Verbose output',
                         default=False,
                         action='store_true')
    aparser.add_argument(
        '-l',
        '--learn',
        help='Learn model from data (and save as F if provided)',
        default=None)
    aparser.add_argument('-p',
                         '--pos',
                         help='Prediction for POS',
                         default=False,
                         action='store_true')
    aparser.add_argument('-t',
                         '--tone',
                         help='Prediction for tones',
                         default=False,
                         action='store_true')
    aparser.add_argument('-r', '--root', help='Corpus root dir')
    aparser.add_argument('-f',
                         '--filelist',
                         help='Path to a list of files to learn from')
    # aparser.add_argument('-g', '--gloss', help='Prediction for gloses', default=False, action='store_true')
    aparser.add_argument(
        '-e',
        '--evalsize',
        type=int,
        default=10,
        help=
        'Percent of training data with respect to training and test one (default 10)'
    )
    aparser.add_argument(
        '-d',
        '--disambiguate',
        help=
        'Use model F to disambiguate data, the gloss list will be ordered by the probability growth order',
        default=None)
    aparser.add_argument(
        '--select',
        help=
        'Option that will be taken into account only with the use of -d, which specifies the disambiguation modality is to select only the most likely gloss in each list.',
        action='store_true')
    aparser.add_argument('-i',
                         '--infile',
                         help='Input file (.html)',
                         default=sys.stdin)
    aparser.add_argument('-o',
                         '--outfile',
                         help='Output file (.html)',
                         default=sys.stdout)
    aparser.add_argument(
        '-s',
        '--store',
        help=
        'Store tagged raw data in file (.csv) for further research purpose',
        default=None)

    args = aparser.parse_args()
    if args.verbose:
        print(args)

    if args.learn and (args.pos or args.tone or args.gloss):

        if not (args.pos or args.tone or args.gloss):
            print('Choose pos, tone, gloss or combination of them')
            exit(0)

        print('Make list of files')
        allfiles = []
        with codecs.open(args.filelist, 'r', encoding="utf-8") as filelist:
            for line in filelist:
                allfiles.append(line.strip())
        allsents = []

        # pour le débogage
        # allfiles = '../corbama/sisoko-daa_ka_kore.dis.html'

        if args.tone:
            try:
                enc = encoder_tones()
            except:
                enc = None
                print(("Error : unable to initialize the tone encoder !"))

        print('Open files and find features / supervision tags')
        for infile in allfiles:
            if (infile):
                print('-', infile)
                sent = []

                html_parser = FileParser()
                html_parser.read_file(os.path.join(args.root, infile))

                for snum, sentence in enumerate(html_parser.glosses):
                    for tnum, token in enumerate(sentence[2]):
                        tag = ''
                        if token.type == 'w' or token.type == 'c':
                            tags = ''
                            if args.pos:
                                tags = '/'.join(token.gloss.ps)
                                wordform = detone(token.gloss.form)
                                sent.append((wordform, tags))
                            elif args.tone:
                                # Pourquoi ne pas apprendre la forme tonale contenant une barre veticale ?
                                # Parce que dans l'ensemble des corpus désambiguïsés, son occurrence est
                                # au dessous de 10, ce cas de figure semble trop peu fréquent pour apporter
                                # une réélle amélioration dans la modélisation de tonalisation. Néanmoins,
                                # dans la conception du cadre logiciel, rien n'interdit de l'inclure dans
                                # les données d'entraînement et d'en observer le apport
                                if '|' not in token.gloss.form:
                                    [codes, chunks] = enc.differential_encode(
                                        token.token, token.gloss.form)
                                    for chunk, code in zip(chunks, codes):
                                        try:
                                            sent.append((chunk, code))
                                        except LookupError:
                                            pass
                            """
                            elif args.gloss:
                                tags += token.gloss.gloss
                                sent.append((token.token, tags))
                            """

                    if len(sent) > 1:
                        allsents.append(sent)
                        sent = []

        if args.verbose and args.tone:
            enc.report()

        # Constitution des ensmebles d'entraînement de d'évaluation
        p = (1 - args.evalsize / 100.0)
        train_set, eval_set = sampling(allsents, p)
        print('Split the data in train (', len(train_set),
              ' sentences) / test (', len(eval_set), ' sentences)')

        print('Building classifier (CRF/NLTK)')
        # Initialization
        t1 = time.time()
        if args.tone:
            num_phases = len([False, True]) * len(mode_indicators)
            myzip = zipfile.ZipFile(args.learn + '.zip', 'w')
        else:
            num_phases = 1

        # Training
        for phase in range(num_phases):
            tagger = CRFTagger(verbose=args.verbose,
                               training_opt={'feature.minfreq': 10})
            trainer = pycrfsuite.Trainer(verbose=tagger._verbose)
            trainer.set_params(tagger._training_options)
            if num_phases > 1:
                model_name = args.learn + '.' + str(phase)
            else:
                model_name = args.learn

            # train_set : list(list((str,list(str))))
            for sent in train_set:
                tokens = unzip(sent)[0]
                labels = unzip(sent)[1]
                if num_phases > 1:
                    for lab in labels:
                        pass
                    labels = [
                        code_dispatcher(label)[phase] for label in labels
                    ]
                features = [
                    _get_features_customised_for_tones(tokens, i)
                    for i in range(len(tokens))
                ]
                trainer.append(features, labels)
            trainer.train(model=model_name)
            if num_phases > 1:
                myzip.write(model_name)
                os.remove(model_name)
        if num_phases > 1:
            myzip.close()

        print("... done in", get_duration(t1_secs=t1, t2_secs=time.time()))

        # Evaluation
        print('Evaluating classifier')
        # gold_set, predicted_set : list(list((str, str)))
        # input_set, output_gold_set : list(list(str))
        gold_set = eval_set
        input_set = [unzip(sent)[0] for sent in gold_set]
        predicted_set = [list() for sent in gold_set]
        if num_phases > 1:
            myzip = zipfile.ZipFile(args.learn + '.zip', 'r')
        for phase in range(num_phases):
            tagger = CRFTagger(verbose=args.verbose,
                               training_opt={'feature.minfreq': 10})
            trainer = pycrfsuite.Trainer(verbose=tagger._verbose)
            trainer.set_params(tagger._training_options)
            if num_phases > 1:
                model_name = args.learn + '.' + str(phase)
                myzip.extract(model_name)
            else:
                model_name = args.learn
            tagger.set_model_file(model_name)
            for i, sent in enumerate(input_set):
                features = [
                    _get_features_customised_for_tones(sent, j)
                    for j in range(len(sent))
                ]
                labels = tagger._tagger.tag(features)
                if num_phases > 1:
                    labels = [
                        code_dispatcher(label)[phase] for label in labels
                    ]
                tagged_sent = list(zip(sent, labels))
                if not predicted_set[i]:
                    predicted_set[i] = tagged_sent
                else:
                    sent_acc, labels_acc = unzip(predicted_set[i])
                    labels_acc = [
                        label_acc + label
                        for label_acc, label in zip(labels_acc, labels)
                    ]
                    predicted_set[i] = list(zip(sent_acc, labels_acc))
            if num_phases > 1:
                os.remove(model_name)
                myzip.close()

        # gold_tokens, predicted_tokens : list((str,str))
        predicted_tokens = list(itertools.chain(*predicted_set))
        if num_phases > 1:
            predicted_tokens = [
                tuple([pair[0], code_resort(pair[1])])
                for pair in predicted_tokens
            ]
        gold_tokens = list(itertools.chain(*gold_set))
        # gold_tokens_eval, predicted_tokens_eval : list(str)
        if args.tone:
            gold_tokens_eval = getTag(gold_tokens)
            predicted_tokens_eval = getTag(predicted_tokens)
        else:
            gold_tokens_eval = gold_tokens
            predicted_tokens_eval = predicted_tokens

        if args.store and args.tone:
            stored_filename = args.store
            csv_export(enc, stored_filename, gold_tokens, predicted_tokens)

        print("Accuracy : {:>5.3f}".format(
            accuracy(gold_tokens_eval, predicted_tokens_eval)))

        if args.verbose and args.store:
            print(("Tagged result is exported in {}".format(args.store)))

    elif args.disambiguate and args.infile and args.outfile:
        # Lecture de texte en .HTML
        html_parser = FileParser()
        tagger = CRFTagger()

        if args.pos:
            try:
                tagger.set_model_file(args.disambiguate)
            except IOError:
                print("Error : unable to open the model {} !".format(
                    args.infile))
                exit(1)
            try:
                html_parser.read_file(args.infile)
            except IOError:
                print("Error : unable to open the input file {} !".format(
                    args.infile))
                exit(1)

            # Exportation du résultat de désambiguïsation en .HTML
            for snum, sentence in enumerate(html_parser.glosses):
                tokens = [token.token for token in sentence[2]]
                features = [
                    _get_features_customised_for_tones(tokens, i)
                    for i in range(len(tokens))
                ]
                tagger._tagger.set(features)
                for tnum, token in enumerate(sentence[2]):
                    options = list()
                    if token.value and len(token.value) > 2:
                        for nopt, option in enumerate(token.value[2]):
                            try:
                                tag = option.ps[0]
                            except IndexError:
                                tag = ''
                            prob = tagger._tagger.marginal(tag, tnum)
                            options.append((prob, option))
                        reordered_probs, reordered_options = unzip(
                            sorted(options, reverse=True))
                        if args.select:
                            prob_max = reordered_probs[0]
                            reordered_options = tuple([
                                reordered_options[i]
                                for i, p in enumerate(reordered_probs)
                                if p >= prob_max
                            ])
                        html_parser.glosses[snum][1][tnum] = reordered_options

        elif args.tone:
            pass

        try:
            html_parser.write(args.outfile)
        except IOError:
            print("Error : unable to create the output file {}".format(
                args.outfile))

    else:
        aparser.print_help()
    exit(0)
Esempio n. 19
0
 def getAccuracy(self):
     return accuracy(self._classifiedResults, self._actualResults);
Esempio n. 20
0
def lindy_speed(gs, speeds, limit):
    acc = str(accuracy(gs, [i < limit for i in speeds]))
    print("lindy speed under " + str(limit) + ": " + acc)
Esempio n. 21
0
def bal_speed(gs, speeds, limit):
    acc = str(accuracy(gs, [i >= limit for i in speeds]))
    print("bal speed over " + str(limit) + ": " + acc)
Esempio n. 22
0

if __name__ == '__main__':
    bal_gold, lindy_gold = gold_standards()

    attributes = open('song_attributes.csv', 'r')
    attribute_rows = list(csv.reader(attributes))

    speeds = [int(row[0]) for row in attribute_rows]
    triplety = [row[1] == '1' for row in attribute_rows]
    backbeat = [row[2] == '1' for row in attribute_rows]
    accent_m = [row[3] == '1' for row in attribute_rows]
    big_band = [row[4] == '0' for row in attribute_rows]
    crashy_c = [row[5] == '1' for row in attribute_rows]

    print("bal triplety: " + str(accuracy(bal_gold, [not i for i in triplety])))
    print("bal backbeat: " + str(accuracy(bal_gold, [not i for i in backbeat])))
    print("bal accent_m: " + str(accuracy(bal_gold, [not i for i in accent_m])))
    print("bal big_band: " + str(accuracy(bal_gold, big_band)))

    print("lindy triplety: " + str(accuracy(lindy_gold, triplety)))
    print("lindy backbeat: " + str(accuracy(lindy_gold, backbeat)))
    print("lindy accent_m: " + str(accuracy(lindy_gold, accent_m)))
    print("lindy big_band: " + str(accuracy(lindy_gold, big_band)))


    print("bal tempos:")
    for song_index, is_bal in enumerate(bal_gold):
        if is_bal:
            print(speeds[song_index])
def evaluate_segmentation(result_file,
                          gold_file,
                          train_file,
                          limit=-1,
                          base_result_file=False,
                          smart_combine=True):
    """
    Compute scores for the current fold
    """

    d = data_to_list(train_file)  # training label list
    g = data_to_list(gold_file, limit=limit)  # gold label list
    t = data_to_list(result_file, limit=limit,
                     label_position=-3)  # TextTiling label list

    result_data = data_to_list(result_file, limit=limit, label_position=-2)

    if base_result_file:
        base_result_data = data_to_list(base_result_file,
                                        limit=limit,
                                        label_position=-2)
        result_data = data_to_list(result_file, limit=limit, label_position=-1)

        max_boundaries = int(d.count("T") *
                             (float(len(g)) / len(d))) if smart_combine else -1

        r = combine_results(result_data,
                            base_result_data,
                            max_boundaries=max_boundaries)  # result label list
    else:
        r = data_to_list(result_file, limit=limit,
                         label_position=-2)  # result label list

    avg_g = float(len(g)) / (g.count("T") + 1
                             )  # average segment size (reference)
    avg_d = float(len(d)) / (d.count("T") + 1
                             )  # average segment size (training)

    k = int(avg_g / 2)  # window size for WindowDiff

    b = list("T" + (int(ceil(avg_d) - 1) * "F")) * int(float(len(g)) / avg_d)
    b = b[:len(g)]  # baseline label list

    ########################################

    # WindowDiff, Beeferman's Pk, Generalized Hamming Distance
    wdi_rs, bpk_rs, ghd_rs = compute_segmentation_scores(g, r, k)
    wdi_bl, bpk_bl, ghd_bl = compute_segmentation_scores(g, b, k)
    wdi_tt, bpk_tt, ghd_tt = compute_segmentation_scores(g, t, k)

    # accuracy
    acc_rs = accuracy(g, r)
    acc_bl = accuracy(g, b)
    acc_tt = accuracy(g, t)

    # precision, recall, f-measure
    pre_rs, rec_rs, f_1_rs = compute_ir_scores(g, r)
    pre_bl, rec_bl, f_1_bl = compute_ir_scores(g, b)
    pre_tt, rec_tt, f_1_tt = compute_ir_scores(g, t)

    ########################################

    return (acc_rs, acc_bl, acc_tt, pre_rs, pre_bl, pre_tt, rec_rs, rec_bl,
            rec_tt, f_1_rs, f_1_bl, f_1_tt, wdi_rs, wdi_bl, wdi_tt, bpk_rs,
            bpk_bl, bpk_tt, ghd_rs, ghd_bl, ghd_tt, g.count("T"), b.count("T"),
            r.count("T"), t.count("T"))
Esempio n. 24
0
    # [category, "no"] unless this is true then ["no", category]
    flip = classifier.labels()[0] == "no"
    categorized_proportion = len([words for (words, categories) in corpus if category in categories]) * 1.0 / len(corpus)
    if flip:
        model.class_prior = [1-categorized_proportion, categorized_proportion]
    else:
        model.class_prior = [categorized_proportion, 1-categorized_proportion]

    classifier.train(train_set)

    # test classifier
    test_results = classifier.classify_many([feat for (feat, label) in test_set])
    pos_test_set = set(i for i, result in enumerate(test_results) if result == category)
    reference_values = [label for (feat, label) in test_set]
    pos_ref_set = set(i for i, (feat, label) in enumerate(test_set) if label == category)
    accuracy = scores.accuracy(reference_values, test_results)
    accuracies.append(accuracy)
    precision = scores.precision(pos_ref_set, pos_test_set)
    recall = scores.recall(pos_ref_set, pos_test_set)
    f1 = scores.f_measure(pos_ref_set, pos_test_set)
    f1_scores.append(f1)

    print "%s: accuracy %s, precision %s, recall %s, F1 %s" % (colored(category, "blue"), colored(accuracy, "yellow"), colored(precision, "yellow"), colored(recall, "yellow"), colored(f1, "yellow"))
    ## print(nltk.classify.accuracy(classifier, test_set))
    # classifier.show_most_informative_features(5)
    # print ""

    # save trained classifier and word features to file
    dump_file = open("classifiers/%s.pickle" % category, "wb")
    pickle.dump({
        "classifier": classifier,
Esempio n. 25
0
def main():
    aparser = argparse.ArgumentParser(description='Daba disambiguator')
    aparser.add_argument('-v', '--verbose', help='Verbose output', default=False, action='store_true')
    aparser.add_argument('-l', '--learn', help='Learn model from data (and save as F if provided)', default=None)
    aparser.add_argument('-p', '--pos', help='Prediction for POS', default=False, action='store_true')
    aparser.add_argument('-t', '--tone', help='Prediction for tones', default=False, action='store_true')
    aparser.add_argument('-r', '--root', help='Corpus root dir')
    aparser.add_argument('-f', '--filelist', help='Path to a list of files to learn from')
    # aparser.add_argument('-g', '--gloss', help='Prediction for gloses', default=False, action='store_true')
    aparser.add_argument('-e', '--evalsize', type=int, default=10,
                         help='Percent of training data with respect to training and test one (default 10)')
    aparser.add_argument('-d', '--disambiguate', help='Use model F to disambiguate data, the gloss list will be ordered by the probability growth order', default=None)
    aparser.add_argument('--select', help = 'Option that will be taken into account only with the use of -d, which specifies the disambiguation modality is to select only the most likely gloss in each list.', action='store_true')
    aparser.add_argument('-i', '--infile' , help='Input file (.html)' , default=sys.stdin)
    aparser.add_argument('-o', '--outfile', help='Output file (.html)', default=sys.stdout)
    aparser.add_argument('-s', '--store', help='Store tagged raw data in file (.csv) for further research purpose', default=None)

    args = aparser.parse_args()
    if args.verbose:
        print args

    if args.learn and (args.pos or args.tone or args.gloss):

        if not (args.pos or args.tone or args.gloss):
            print 'Choose pos, tone, gloss or combination of them'
            exit(0)

        print 'Make list of files'
        allfiles = []
        with codecs.open(args.filelist, 'r', encoding="utf-8") as filelist:
            for line in filelist:
                allfiles.append(line.strip())
        allsents = []

        # pour le débogage
        # allfiles = '../corbama/sisoko-daa_ka_kore.dis.html'

        if args.tone:
            try:
                enc = encoder_tones()
            except:
                enc = None
                print ("Error : unable to initialize the tone encoder !")

        print 'Open files and find features / supervision tags'
        for infile in allfiles:
            if(infile):
                print '-', infile
                sent = []

                html_parser = FileParser()
                html_parser.read_file(os.path.join(args.root, infile))

                for snum, sentence in enumerate(html_parser.glosses):
                    for tnum, token in enumerate(sentence[2]):
                        tag = ''
                        if token.type == 'w' or token.type == 'c':
                            tags = ''
                            if args.pos:
                                tags = '/'.join(token.gloss.ps).encode('utf-8')
                                wordform = detone(token.gloss.form)
                                sent.append((wordform, tags))
                            elif args.tone:
                                # Pourquoi ne pas apprendre la forme tonale contenant une barre veticale ?
                                # Parce que dans l'ensemble des corpus désambiguïsés, son occurrence est
                                # au dessous de 10, ce cas de figure semble trop peu fréquent pour apporter
                                # une réélle amélioration dans la modélisation de tonalisation. Néanmoins,
                                # dans la conception du cadre logiciel, rien n'interdit de l'inclure dans
                                # les données d'entraînement et d'en observer le apport
                                if '|' not in token.gloss.form :
                                    [codes, chunks] = enc.differential_encode(token.token, token.gloss.form)
                                    for chunk, code in zip(chunks, codes) :
                                        try : sent.append((chunk, code.encode('utf-8')))
                                        except LookupError: pass
                            """
                            elif args.gloss:
                                tags += token.gloss.gloss.encode('utf-8')
                                sent.append((token.token, tags))
                            """

                    if len(sent) > 1:
                        allsents.append(sent)
                        sent = []

        if args.verbose and args.tone:
            enc.report()

        # Constitution des ensmebles d'entraînement de d'évaluation
        p = (1 - args.evalsize / 100.0)
        train_set, eval_set = sampling(allsents, p)
        print 'Split the data in train (', len(train_set),' sentences) / test (', len(eval_set),' sentences)'

        print 'Building classifier (CRF/NLTK)'
        # Initialization
        t1 = time.time()
        if args.tone:
            num_phases = len([False, True]) * len(mode_indicators)
            myzip = zipfile.ZipFile(args.learn + '.zip', 'w')
        else:
            num_phases = 1

        # Training
        for phase in range(num_phases):
            tagger = CRFTagger(verbose = args.verbose, training_opt = {'feature.minfreq' : 10})
            trainer = pycrfsuite.Trainer(verbose = tagger._verbose)
            trainer.set_params(tagger._training_options)
            if num_phases > 1:
                model_name = args.learn + '.' + str(phase)
            else:
                model_name = args.learn

            # train_set : list(list((str,list(str))))
            for sent in train_set:
                tokens = unzip(sent)[0]
                labels = unzip(sent)[1]
                if num_phases > 1:
                    for lab in labels:
                        pass
                    labels = [code_dispatcher(label.decode('utf-8'))[phase].encode('utf-8') for label in labels]
                features = [_get_features_customised_for_tones(tokens, i) for i in range(len(tokens))]
                trainer.append(features, labels)
            trainer.train(model = model_name)
            if num_phases > 1:
                myzip.write(model_name)
                os.remove(model_name)
        if num_phases > 1:
            myzip.close()

        print "... done in", get_duration(t1_secs=t1, t2_secs=time.time())

        # Evaluation
        print 'Evaluating classifier'
        # gold_set, predicted_set : list(list((str, str)))
        # input_set, output_gold_set : list(list(str))
        gold_set = eval_set
        input_set = [unzip(sent)[0] for sent in gold_set]
        predicted_set = [list() for sent in gold_set]
        if num_phases > 1:
            myzip = zipfile.ZipFile(args.learn + '.zip', 'r')
        for phase in range(num_phases):
            tagger = CRFTagger(verbose=args.verbose, training_opt={'feature.minfreq' : 10})
            trainer = pycrfsuite.Trainer(verbose=tagger._verbose)
            trainer.set_params(tagger._training_options)
            if num_phases > 1:
                model_name = args.learn + '.' + str(phase)
                myzip.extract(model_name)
            else:
                model_name = args.learn
            tagger.set_model_file(model_name)
            for i, sent in enumerate(input_set):
                features = [_get_features_customised_for_tones(sent,j) for j in range(len(sent))]
                labels = tagger._tagger.tag(features)
                if num_phases > 1:
                    labels = [code_dispatcher(label.decode('utf-8'))[phase].encode('utf-8') for label in labels]
                tagged_sent = list(zip(sent, labels))
                if not predicted_set[i]:
                    predicted_set[i] = tagged_sent
                else:
                    sent_acc, labels_acc = unzip(predicted_set[i])
                    labels_acc = [label_acc + label for label_acc, label in zip(labels_acc, labels)]
                    predicted_set[i] = list(zip(sent_acc, labels_acc))
            if num_phases > 1:
                os.remove(model_name)
                myzip.close()

        # gold_tokens, predicted_tokens : list((str,str))
        predicted_tokens = list(itertools.chain(*predicted_set))
        if num_phases > 1:
            predicted_tokens = [
                tuple([pair[0], code_resort(pair[1].decode('utf-8')).encode('utf-8')])
                for pair in predicted_tokens]
        gold_tokens = list(itertools.chain(*gold_set))
        # gold_tokens_eval, predicted_tokens_eval : list(str)
        if args.tone:
            gold_tokens_eval = getTag(gold_tokens)
            predicted_tokens_eval = getTag(predicted_tokens)
        else:
            gold_tokens_eval = gold_tokens
            predicted_tokens_eval = predicted_tokens

        if args.store and args.tone:
            stored_filename = args.store
            csv_export(enc, stored_filename, gold_tokens, predicted_tokens)

        print "Accuracy : {:>5.3f}".format(accuracy(gold_tokens_eval, predicted_tokens_eval))

        if args.verbose and args.store:
            print ("Tagged result is exported in {}".format(args.store))

    elif args.disambiguate and args.infile and args.outfile:
        # Lecture de texte en .HTML
        html_parser = FileParser()
        tagger = CRFTagger()

        if args.pos:
            try:
                tagger.set_model_file(args.disambiguate)
            except IOError:
                print "Error : unable to open the model {} !".format(args.infile)
                exit(1)
            try:
                html_parser.read_file(args.infile)
            except IOError:
                print "Error : unable to open the input file {} !".format(args.infile)
                exit(1)

            # Exportation du résultat de désambiguïsation en .HTML
            for snum, sentence in enumerate(html_parser.glosses):
                tokens = [token.token for token in sentence[2]]
                features = [_get_features_customised_for_tones(tokens, i) for i in range(len(tokens))]
                tagger._tagger.set(features)
                for tnum, token in enumerate(sentence[2]):
                    options = list()
                    if token.value and len(token.value) > 2:
                        for nopt, option in enumerate(token.value[2]):
                            try:
                                tag = option.ps[0]
                            except IndexError:
                                tag = ''
                            prob = tagger._tagger.marginal(tag, tnum)
                            options.append((prob, option))
                        reordered_probs, reordered_options = unzip(sorted(options, reverse = True))
                        if args.select:
                            prob_max = reordered_probs[0]
                            reordered_options = tuple([
                                reordered_options[i]
                                for i, p in enumerate(reordered_probs)
                                if p >= prob_max])
                        html_parser.glosses[snum][1][tnum] = reordered_options

        elif args.tone:
            pass

        try:
            html_parser.write(args.outfile)
        except IOError: print "Error : unable to create the output file {}".format(args.outfile)

    else:
        aparser.print_help()
    exit(0)
Esempio n. 26
0

#Printing with more measures, example below
# In[41]:


train_set, test_set = Final_Data[0:747], Final_Data[747:]

import nltk
import collections
from nltk.metrics.scores import (accuracy, precision, recall, f_measure)
nb_classifier = nltk.NaiveBayesClassifier.train(train_set)
nb_classifier.show_most_informative_features(10)

from nltk.classify.util import accuracy
print(accuracy(nb_classifier, test_set))

refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
    
for i, (Final_Data, label) in enumerate(test_set):
    refsets[label].add(i)
    observed = nb_classifier.classify(Final_Data)
    testsets[observed].add(i)
    
print('bullying precision:', precision(refsets['Bullying'], testsets['Bullying']))
print('bullying recall:', recall(refsets['Bullying'], testsets['Bullying']))
print('bullying F-measure:', f_measure(refsets['Bullying'], testsets['Bullying']))
print('not-bullying precision:', precision(refsets['Non-Bullying'], testsets['Non-Bullying']))
print('not-bullying recall:', recall(refsets['Non-Bullying'], testsets['Non-Bullying']))
print('not-bullying F-measure:', f_measure(refsets['Non-Bullying'], testsets['Non-Bullying']))
Esempio n. 27
0
    # Loading the model
    print "Loading the CRF model..."
    tagger = pycrfsuite.Tagger()
    tagger.open(model)

    # Testing progress
    #sys.stdout.write("Testing: ")
    #sys.stdout.flush()
    #pred = []
    #idx = 0
    #for i in featset.items():
    #    idx += 1
    #    if idx % 1000 == 0:
    #        sys.stdout.write('.')
    #        sys.stdout.flush()
    #    pred.append(str(tagger.tag(i)))
    print "Testing..."
    pred = tagger.tag(featset)
    tagger.close()
    pred = [str(p) for p in pred]

    # Show result
    accuracy = scores.accuracy(ref, pred)
    print "\nAccuracy: %.4f" % accuracy
    cm = ConfusionMatrix(ref, pred)
    print "Confusion Matrix:"
    print(cm.pretty_format(sort_by_count=True, show_percents=True, truncate=9))

    # Finished?
    print "DONE!!"
            test_data_new.append(test_set[i][j][0])
            test_data_tags.append(test_set[i][j][1])
gold_sentences = test_data_new
# print ct.evaluate(gold_sentences)

# print test_data_new
pred_tags = []
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)

pred = ct.tag(gold_sentences)
for i in range(len(pred)):
    pred_tags.append(pred[i][1])

for i in range(len(test_data_tags)):
    refsets[test_data_tags[i]].add(i)
    testsets[pred_tags[i]].add(i)

print "CRF language model"
print 'Accuracy:', accuracy(pred_tags, test_data_tags)
print "\n"
print 'Precision of en:', precision(refsets['en'], testsets['en'])
print 'Precision of hi:', precision(refsets['hi'], testsets['hi'])
print "\n"
print 'Recall of en:', recall(refsets['en'], testsets['en'])
print 'Recall of hi:', recall(refsets['hi'], testsets['hi'])
print "\n"
print 'f_measure of en:', f_measure(refsets['en'], testsets['en'])
print 'f_measure of hi:', f_measure(refsets['hi'], testsets['hi'])
print "\n"
Esempio n. 29
0
def evaluate_segmentation(bc3=False, limit=-1):
    d = "".join(data_to_list(WAPITI_TRAIN_FILE)) # training data
    g = "".join(data_to_list(WAPITI_GOLD_FILE, limit=limit)) # gold string
    temp_r = data_to_list(WAPITI_RESULT_FILE, limit=limit) # result string
    # n = data_to_list("var/union/ngrams_" + WAPITI_RESULT_FILE[-1], limit=limit)
    
    # scores = {}
    r = ""

    for i, col in enumerate(temp_r):
        # score = 0

        # if n[i][:n[i].index("/")] == "T":
        #     score = 1
        # elif col[:col.index("/")] == "T":
        #     score = float(col[col.index("/") + 1:])

        # scores[i] = 
        r += col[:col.index("/")]

    # sorted_indexes = sorted(scores, key=scores.get, reverse=True)
    # indexes = [index for index, score in scores.iteritems() if score > 0.99]

    # r = "." * len(g)

    # n_boundaries = int((float(g.count("T")) / len(g)) * len(g))

    # for i, index in enumerate(sorted_indexes):
    #     r = r[:index] + "T" + r[index + 1:]
    #     if i == n_boundaries:
    #         break
    
    # for index in indexes:
    #     r = r[:index] + "T" + r[index+1:]

    if bc3:
        t = data_to_list(BC3_TEXT_TILING_FILE, limit=limit, label_position=0) # text tiling baseline string
    else:
        t = data_to_list(WAPITI_GOLD_FILE, limit=limit, label_position=-2)

    avg_g = float(len(g)) / (g.count("T") + 1) # average segment size (reference)
    avg_d = float(len(d)) / (d.count("T") + 1) # average segment size (training)

    k = int(avg_g / 2) # window size for WindowDiff

    b = ("T" + (int(math.floor(avg_d)) - 1) * ".") * int(math.ceil(float(len(d)) / int(math.floor(avg_d))))
    b = b[:len(g)] # baseline string

    # WindowDiff
    wdi_rs = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100
    wdi_bl = (float(windowdiff(g, b, k, boundary="T")) / len(g)) * 100
    wdi_tt = (float(windowdiff(g, t, k, boundary="T")) / len(g)) * 100

    # Beeferman's Pk
    bpk_rs = (pk(g, r, boundary="T")) * 100
    bpk_bl = (pk(g, b, boundary="T")) * 100
    bpk_tt = (pk(g, t, boundary="T")) * 100

    # Generalized Hamming Distance
    ghd_rs = (ghd(g, r, boundary="T") / len(g)) * 100
    ghd_bl = (ghd(g, b, boundary="T") / len(g)) * 100
    ghd_tt = (ghd(g, t, boundary="T") / len(g)) * 100

    # accuracy
    acc_rs = accuracy(list(g), list(r)) * 100
    acc_bl = accuracy(list(g), list(b)) * 100
    acc_tt = accuracy(list(g), list(t)) * 100

    # precision, recall, f-measure
    pre_rs = metrics.precision_score(list(g), list(r), pos_label="T") * 100
    rec_rs = metrics.recall_score(list(g), list(r), pos_label="T") * 100
    f_1_rs = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs)

    pre_bl = metrics.precision_score(list(g), list(b), pos_label="T") * 100
    rec_bl = metrics.recall_score(list(g), list(b), pos_label="T") * 100
    f_1_bl = (2.0 * (rec_bl * pre_bl)) / (rec_bl + pre_bl)
    
    pre_tt = metrics.precision_score(list(g), list(t), pos_label="T") * 100
    rec_tt = metrics.recall_score(list(g), list(t), pos_label="T") * 100
    f_1_tt = (2.0 * (rec_tt * pre_tt)) / (rec_tt + pre_tt)

    return acc_rs, acc_bl, acc_tt, pre_rs, pre_bl, pre_tt, rec_rs, rec_bl, rec_tt, f_1_rs, f_1_bl, f_1_tt, wdi_rs, wdi_bl, wdi_tt, bpk_rs, bpk_bl, bpk_tt, ghd_rs, ghd_bl, ghd_tt, g.count("T"), b.count("T"), r.count("T"), t.count("T")