def evaluate_segmentation(result_file, gold_file, train_file, limit=-1, base_result_file=False, smart_combine=True): """ Compute scores for the current fold """ d = data_to_list(train_file) # training label list g = data_to_list(gold_file, limit=limit) # gold label list t = data_to_list(result_file, limit=limit, label_position=-3) # TextTiling label list result_data = data_to_list(result_file, limit=limit, label_position=-2) if base_result_file: base_result_data = data_to_list(base_result_file, limit=limit, label_position=-2) result_data = data_to_list(result_file, limit=limit, label_position=-1) max_boundaries = int(d.count("T") * (float(len(g)) / len(d))) if smart_combine else -1 r = combine_results(result_data, base_result_data, max_boundaries=max_boundaries) # result label list else: r = data_to_list(result_file, limit=limit, label_position=-2) # result label list avg_g = float(len(g)) / (g.count("T") + 1) # average segment size (reference) avg_d = float(len(d)) / (d.count("T") + 1) # average segment size (training) k = int(avg_g / 2) # window size for WindowDiff b = list("T" + (int(ceil(avg_d) - 1) * "F")) * int(float(len(g)) / avg_d) b = b[:len(g)] # baseline label list ######################################## # WindowDiff, Beeferman's Pk, Generalized Hamming Distance wdi_rs, bpk_rs, ghd_rs = compute_segmentation_scores(g, r, k) wdi_bl, bpk_bl, ghd_bl = compute_segmentation_scores(g, b, k) wdi_tt, bpk_tt, ghd_tt = compute_segmentation_scores(g, t, k) # accuracy acc_rs = accuracy(g, r) acc_bl = accuracy(g, b) acc_tt = accuracy(g, t) # precision, recall, f-measure pre_rs, rec_rs, f_1_rs = compute_ir_scores(g, r) pre_bl, rec_bl, f_1_bl = compute_ir_scores(g, b) pre_tt, rec_tt, f_1_tt = compute_ir_scores(g, t) ######################################## return ( acc_rs, acc_bl, acc_tt, pre_rs, pre_bl, pre_tt, rec_rs, rec_bl, rec_tt, f_1_rs, f_1_bl, f_1_tt, wdi_rs, wdi_bl, wdi_tt, bpk_rs, bpk_bl, bpk_tt, ghd_rs, ghd_bl, ghd_tt, g.count("T"), b.count("T"), r.count("T"), t.count("T") )
def run_test(): real = [] result = [] vectorizer = TfidfVectorizer() start = time.time() for answer_id, question in test_cases.items(): # for t in range(1, 31): # print(f'Running test {t}') # answer_id = str(t) # question = test_cases[answer_id] print(f'Running test on id {answer_id}') corpus.append(question) real.append(answer_id) tfidf = vectorizer.fit_transform(corpus) question_array = tfidf[-1, :].toarray()[0] best_similarity = -1 best_id = 0 for i in range(tfidf.shape[0] - 1): comparing = tfidf[i, :].toarray()[0] similarity = 0 for j in range(len(question_array)): similarity += question_array[j] * comparing[j] if similarity > best_similarity: best_similarity = similarity best_id = id_column[i] result.append(best_id) del corpus[-1] print("Accuracy:", accuracy(real, result)) print(f'Duration: {time.time() - start}')
def test_nltk_classifier(): tagged_words = pos_tag(pure_tokens) nltk_unformatted_prediction = ne_chunk(tagged_words) # Convert prediction to multiline string and then to list (includes pos tags) multiline_string = chunk.tree2conllstr(nltk_unformatted_prediction) listed_pos_and_ne = multiline_string.split() # Delete pos tags and rename del listed_pos_and_ne[1::3] listed_ne = listed_pos_and_ne # Amend class annotations for consistency with reference_annotations for n,i in enumerate(listed_ne): if i == "B-PERSON": listed_ne[n] = "PERSON" if i == "I-PERSON": listed_ne[n] = "PERSON" if i == "B-ORGANIZATION": listed_ne[n] = "ORGANIZATION" if i == "I-ORGANIZATION": listed_ne[n] = "ORGANIZATION" if i == "B-LOCATION": listed_ne[n] = "LOCATION" if i == "I-LOCATION": listed_ne[n] = "LOCATION" if i == "B-GPE": listed_ne[n] = "LOCATION" if i == "I-GPE": listed_ne[n] = "LOCATION" # Group prediction into tuples nltk_formatted_prediction = list(group(listed_ne, 2)) nltk_accuracy = accuracy(reference_annotations, nltk_formatted_prediction) print(nltk_accuracy) return nltk_accuracy
def tfidf_cosine(data): q_train, a_train, q_test, a_test = data best_answers = [] sentences = q_train[:] for test_q in q_test: best = 0.25 a_id = '0' el_index = 0 sentences.append(test_q) # Tf-idf-weighted document-term matrix. # one sentences per line, one term per column tfidf = TfidfVectorizer().fit_transform(sentences) # compute cosine similarity between the query and all other sentences vals = cosine_similarity(tfidf[-1], tfidf[:-1])[0] # get index of highest similarity a_id = vals.argmax() # compute if similarity is significant # otherwise, query is not recognized if (vals[a_id] < best): best_answers.append('0') else: best_answers.append(a_train[a_id]) sentences = sentences[:-1] #print("Accuracy : ", accuracy(a_test, best_answers)) return accuracy(a_test, best_answers)
def calculate_metrics(self): included_logs = 0 metrics = {} cc = SmoothingFunction() for identifier in self._values: if self._values[identifier].get('target_text', None) is not None: included_logs += 1 target_text = self._values[identifier]['target_text'] output_text = self._values[identifier]['output_text'] metrics['BLEU'] = metrics.get('BLEU', 0) + sentence_bleu( [target_text], output_text, smoothing_function=cc.method4) metrics['accuracy'] = metrics.get('accuracy', 0) + accuracy( target_text, output_text) target_text = set(target_text) output_text = set(output_text) metrics['precision'] = metrics.get('precision', 0) + precision( target_text, output_text) metrics['recall'] = metrics.get('recall', 0) + recall( target_text, output_text) metrics['f_measure'] = metrics.get('f_measure', 0) + f_measure( target_text, output_text) if included_logs != 0: for metric in metrics: metrics[metric] /= included_logs return metrics, included_logs
def test_stanford_classifier(): st = StanfordNERTagger('../stanford_ner/classifiers/english.all.3class.distsim.crf.ser.gz', '../stanford_ner/stanford-ner.jar', encoding='utf-8') stanford_prediction = st.tag(pure_tokens) stanford_accuracy = accuracy(reference_annotations, stanford_prediction) print(stanford_accuracy) return stanford_accuracy
def demo_eval(alignments, gold_file): """ """ test_values = alignment_util.get_test_values(alignments) reference_values = alignment_util.get_reference_values(gold_file) accuracy = scores.accuracy(reference_values, test_values) print "accuracy: %.2f" % accuracy
def test(self, test_sequence, **kwargs): feature_detector = kwargs.get('feature_detector') def __tags(token): return token[-1] def __untag(token): return token[0] def __featurize(tagged_token): tag = tagged_token[-1] feats = feature_detector(tagged_token) return (feats, tag) count = sum([len(sent) for sent in test_sequence]) correct_tags = LazyMap(__tags, test_sequence) untagged_sequence = LazyMap(__untag, LazyMap(__featurize, test_sequence)) predicted_tags = LazyMap(self.classify, untagged_sequence) acc = accuracy(correct_tags, predicted_tags) print 'accuracy over %d tokens: %.2f' % (count, acc)
def run_test(): column_id = {} column = 0 corpus = [] for answer_id, questions in id_questions.items(): for question in questions: corpus.append(question) column_id[column] = answer_id column += 1 real = [] result = [] vectorizer = TfidfVectorizer() start = time.time() print(len(test_cases)) for i in range(1, 542): # for answer_id, question in test_cases.items(): answer_id = str(i) question = test_cases[answer_id] print(f'Running test on {answer_id}') real.append(answer_id) corpus.append(preprocess_sentence(question)) tfidf_matrix = vectorizer.fit_transform(corpus) user_input = tfidf_matrix[-1, :].toarray()[0] best_similarity = -1 best_id = 0 for i in range(tfidf_matrix.shape[1] - 1): comparing = tfidf_matrix[i, :].toarray()[0] similarity = 0 for j in range(len(user_input)): similarity += user_input[j] * comparing[j] if similarity > best_similarity: best_similarity = similarity best_id = column_id[i] result.append(best_id) del corpus[-1] print("Accuracy:", accuracy(real, result)) print(f'Duration: {time.time() - start}')
def main(): # Splits desen and train in 2: tags and sentences # Process desen listaTagsDesenvolvimento = extrai('Corpora/dist-desen-sem-acentos.txt', 1) listaFrasesDesenvolvimento = extrai('Corpora/dist-desen-sem-acentos.txt', 2) # Process treino listaTagsTreino = extrai('Corpora/dist-treino.txt', 1) listaFrasesTreino = extrai('Corpora/dist-treino.txt', 2) #----- Pre-processing----- listaFrasesDesenvolvimento = preProc(listaFrasesDesenvolvimento) listaFrasesTreino = preProc(listaFrasesTreino) #----- Remove stopWords----- listaFrasesDesenvolvimento = removeStopWords(listaFrasesDesenvolvimento, stopWords) listaFrasesTreino = removeStopWords(listaFrasesTreino, stopWords) #----- Stemming ----- listaFrasesDesenvolvimento = tokStem(listaFrasesDesenvolvimento) listaFrasesTreino = tokStem(listaFrasesTreino) # Call the main function listaTagsEstimada = mainFunction(listaTagsTreino, listaFrasesTreino, listaFrasesDesenvolvimento)[0] fraseMaisProxima = mainFunction(listaTagsTreino, listaFrasesTreino, listaFrasesDesenvolvimento)[1] # Show results for a, b, c, d in zip(listaFrasesDesenvolvimento, listaTagsEstimada, listaTagsDesenvolvimento, fraseMaisProxima): print("Sentence to evaluate: ", a) print("Suggested Tag: ", b) print("Correct Tag: ", c) print("Closest sentence: ", d, "\n\n") # Find accuracy print("Accuracy:", accuracy(listaTagsDesenvolvimento, listaTagsEstimada))
def jaccard_distance(data): q_train, a_train, q_test, a_test = data best_answers = [] for test_q in q_test: best = 1 a_id = 0 el_index = 0 for el_index in range(len(q_train)): if set(test_q.split()) != 0 and set(q_train[el_index].split()): res_aux = nltk.jaccard_distance(set(test_q.split()), set(q_train[el_index].split())) if res_aux < best: best = res_aux a_id = a_train[el_index] best_answers.append(a_id) return accuracy(a_test, best_answers)
def get_measures(reference, test): tp = tn = fp = fn = 0 for ((_, r), (_, t)) in zip(reference, test): if r == t == "O": tn += 1 elif r == t == "ORG": tp += 1 elif r == "O" and t == "ORG": fp += 1 elif r == "ORG" and t == "O": fn += 1 matrix = [tp, tn, fp, fn] acc = accuracy(reference, test) reference_set = set(reference) test_set = set(test) pre = precision(reference_set, test_set) rec = recall(reference_set, test_set) f = f_measure(reference_set, test_set) return acc, pre, rec, f, matrix
def evaluate_segmentation(bc3=False, limit=0): g = data_to_string(WAPITI_GOLD_FILE, limit=limit) # gold string r = data_to_string(WAPITI_RESULT_FILE, limit=limit) # result string if bc3: t = data_to_string(BC3_TEXT_TILING_FILE, limit=limit, label_position=0) # text tiling baseline string else: t = data_to_string(WAPITI_GOLD_FILE, limit=limit, label_position=-2) avg = float(len(g)) / (g.count("T") + 1) # average segment size k = int(avg / 2) # window size for WindowDiff b = ("T" + (int(math.floor(avg)) - 1) * ".") * int( math.ceil(float(len(g)) / int(math.floor(avg)))) b = b[:len(g)] # baseline string print(g[:150]) print(r[:150]) # WindowDiff wdi = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100 # Beeferman's Pk bpk = (pk(g, r, boundary="T")) * 100 # Generalized Hamming Distance ghd = (GHD(g, r, boundary="T") / len(g)) * 100 # accuracy acc = accuracy(list(g), list(r)) * 100 # precision, recall, f-measure pre = metrics.precision_score(list(g), list(r)) * 100 rec = metrics.recall_score(list(g), list(r)) * 100 f_1 = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs) return acc, pre, rec, f_1, wdi, bpk, ghd, g.count("T"), r.count("T")
def evaluate_segmentation(bc3=False, limit=0): g = data_to_string(WAPITI_GOLD_FILE, limit=limit) # gold string r = data_to_string(WAPITI_RESULT_FILE, limit=limit) # result string if bc3: t = data_to_string(BC3_TEXT_TILING_FILE, limit=limit, label_position=0) # text tiling baseline string else: t = data_to_string(WAPITI_GOLD_FILE, limit=limit, label_position=-2) avg = float(len(g)) / (g.count("T") + 1) # average segment size k = int(avg / 2) # window size for WindowDiff b = ("T" + (int(math.floor(avg)) - 1) * ".") * int(math.ceil(float(len(g)) / int(math.floor(avg)))) b = b[:len(g)] # baseline string print(g[:150]) print(r[:150]) # WindowDiff wdi = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100 # Beeferman's Pk bpk = (pk(g, r, boundary="T")) * 100 # Generalized Hamming Distance ghd = (GHD(g, r, boundary="T") / len(g)) * 100 # accuracy acc = accuracy(list(g), list(r)) * 100 # precision, recall, f-measure pre = metrics.precision_score(list(g), list(r)) * 100 rec = metrics.recall_score(list(g), list(r)) * 100 f_1 = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs) return acc, pre, rec, f_1, wdi, bpk, ghd, g.count("T"), r.count("T")
print "Loading test data..." testset = np.load(path) # Load model print "Loading model..." with open(model, 'rb') as fmodel: cls = pickle.load(fmodel) # Run test sys.stdout.write("Testing:") pred = [] idx = 0 for i in testset[:, 0]: idx += 1 if idx % 1000 == 0: sys.stdout.write(".") sys.stdout.flush() pred.append(str(cls.classify(i))) # Result # * Convert Ref Label to ASCII ref = [str(label) for label in testset[:, 1]] accuracy = scores.accuracy(ref, pred) print "\nAccuracy: %.4f" % accuracy cm = ConfusionMatrix(ref, pred) print "Confusion Matrix: " print (cm.pretty_format(sort_by_count=True, show_percents=True, truncate=9)) # Finished? print "DONE!!"
# Features top_entry = json_response[0] true_matches = [bool(song['Match']) for song in json_response[1:]] FEATURE = 'SongName' NGRAMS = 2 top_entry_value = preproc(top_entry[FEATURE]) print 'Comparing song name to top match reference:', top_entry[FEATURE] top_entry_word_bigrams = set(ngrams(word_tokenize(top_entry_value), NGRAMS)) matches = [] for song in json_response[1:]: this_value = preproc(song[FEATURE]) print '\t%s' % song[FEATURE] this_word_bigrams = set(ngrams(word_tokenize(this_value), NGRAMS)) wbg_distance = jaccard_distance(top_entry_word_bigrams, this_word_bigrams) print '\t\tWord bigrams + Jaccard:\t'+str(wbg_distance) is_this_match = is_match(wbg_distance) print '\t\tMatch?', is_this_match matches.append(is_this_match) cm = ConfusionMatrix(true_matches, matches) print 'Confusion matrix' print cm print 'Accuracy:', accuracy(true_matches, matches)
listed_ne[n] = "ORGANIZATION" if i == "I-ORGANIZATION": listed_ne[n] = "ORGANIZATION" if i == "B-LOCATION": listed_ne[n] = "LOCATION" if i == "I-LOCATION": listed_ne[n] = "LOCATION" if i == "B-GPE": listed_ne[n] = "LOCATION" if i == "I-GPE": listed_ne[n] = "LOCATION" # Group prediction into tuples nltk_formatted_prediction = list(group(listed_ne, 2)) nltk_accuracy = accuracy(reference_annotations, nltk_formatted_prediction) print(nltk_accuracy) st = StanfordNERTagger( '/usr/share/stanford-ner/classifiers/english.all.3class.distsim.crf.ser.gz', '/usr/share/stanford-ner/stanford-ner.jar', encoding='utf-8') stanford_prediction = st.tag(pure_tokens) stanford_accuracy = accuracy(reference_annotations, stanford_prediction) print(stanford_accuracy) style.use('fivethirtyeight') N = 1 ind = np.arange(N) # the x locations for the groups width = 0.35 # the width of the bars
def main(): aparser = argparse.ArgumentParser(description='Daba disambiguator') aparser.add_argument('-v', '--verbose', help='Verbose output', default=False, action='store_true') aparser.add_argument( '-l', '--learn', help='Learn model from data (and save as F if provided)', default=None) aparser.add_argument('-p', '--pos', help='Prediction for POS', default=False, action='store_true') aparser.add_argument('-t', '--tone', help='Prediction for tones', default=False, action='store_true') aparser.add_argument('-r', '--root', help='Corpus root dir') aparser.add_argument('-f', '--filelist', help='Path to a list of files to learn from') # aparser.add_argument('-g', '--gloss', help='Prediction for gloses', default=False, action='store_true') aparser.add_argument( '-e', '--evalsize', type=int, default=10, help= 'Percent of training data with respect to training and test one (default 10)' ) aparser.add_argument( '-d', '--disambiguate', help= 'Use model F to disambiguate data, the gloss list will be ordered by the probability growth order', default=None) aparser.add_argument( '--select', help= 'Option that will be taken into account only with the use of -d, which specifies the disambiguation modality is to select only the most likely gloss in each list.', action='store_true') aparser.add_argument('-i', '--infile', help='Input file (.html)', default=sys.stdin) aparser.add_argument('-o', '--outfile', help='Output file (.html)', default=sys.stdout) aparser.add_argument( '-s', '--store', help= 'Store tagged raw data in file (.csv) for further research purpose', default=None) args = aparser.parse_args() if args.verbose: print(args) if args.learn and (args.pos or args.tone or args.gloss): if not (args.pos or args.tone or args.gloss): print('Choose pos, tone, gloss or combination of them') exit(0) print('Make list of files') allfiles = [] with codecs.open(args.filelist, 'r', encoding="utf-8") as filelist: for line in filelist: allfiles.append(line.strip()) allsents = [] # pour le débogage # allfiles = '../corbama/sisoko-daa_ka_kore.dis.html' if args.tone: try: enc = encoder_tones() except: enc = None print(("Error : unable to initialize the tone encoder !")) print('Open files and find features / supervision tags') for infile in allfiles: if (infile): print('-', infile) sent = [] html_parser = FileParser() html_parser.read_file(os.path.join(args.root, infile)) for snum, sentence in enumerate(html_parser.glosses): for tnum, token in enumerate(sentence[2]): tag = '' if token.type == 'w' or token.type == 'c': tags = '' if args.pos: tags = '/'.join(token.gloss.ps) wordform = detone(token.gloss.form) sent.append((wordform, tags)) elif args.tone: # Pourquoi ne pas apprendre la forme tonale contenant une barre veticale ? # Parce que dans l'ensemble des corpus désambiguïsés, son occurrence est # au dessous de 10, ce cas de figure semble trop peu fréquent pour apporter # une réélle amélioration dans la modélisation de tonalisation. Néanmoins, # dans la conception du cadre logiciel, rien n'interdit de l'inclure dans # les données d'entraînement et d'en observer le apport if '|' not in token.gloss.form: [codes, chunks] = enc.differential_encode( token.token, token.gloss.form) for chunk, code in zip(chunks, codes): try: sent.append((chunk, code)) except LookupError: pass """ elif args.gloss: tags += token.gloss.gloss sent.append((token.token, tags)) """ if len(sent) > 1: allsents.append(sent) sent = [] if args.verbose and args.tone: enc.report() # Constitution des ensmebles d'entraînement de d'évaluation p = (1 - args.evalsize / 100.0) train_set, eval_set = sampling(allsents, p) print('Split the data in train (', len(train_set), ' sentences) / test (', len(eval_set), ' sentences)') print('Building classifier (CRF/NLTK)') # Initialization t1 = time.time() if args.tone: num_phases = len([False, True]) * len(mode_indicators) myzip = zipfile.ZipFile(args.learn + '.zip', 'w') else: num_phases = 1 # Training for phase in range(num_phases): tagger = CRFTagger(verbose=args.verbose, training_opt={'feature.minfreq': 10}) trainer = pycrfsuite.Trainer(verbose=tagger._verbose) trainer.set_params(tagger._training_options) if num_phases > 1: model_name = args.learn + '.' + str(phase) else: model_name = args.learn # train_set : list(list((str,list(str)))) for sent in train_set: tokens = unzip(sent)[0] labels = unzip(sent)[1] if num_phases > 1: for lab in labels: pass labels = [ code_dispatcher(label)[phase] for label in labels ] features = [ _get_features_customised_for_tones(tokens, i) for i in range(len(tokens)) ] trainer.append(features, labels) trainer.train(model=model_name) if num_phases > 1: myzip.write(model_name) os.remove(model_name) if num_phases > 1: myzip.close() print("... done in", get_duration(t1_secs=t1, t2_secs=time.time())) # Evaluation print('Evaluating classifier') # gold_set, predicted_set : list(list((str, str))) # input_set, output_gold_set : list(list(str)) gold_set = eval_set input_set = [unzip(sent)[0] for sent in gold_set] predicted_set = [list() for sent in gold_set] if num_phases > 1: myzip = zipfile.ZipFile(args.learn + '.zip', 'r') for phase in range(num_phases): tagger = CRFTagger(verbose=args.verbose, training_opt={'feature.minfreq': 10}) trainer = pycrfsuite.Trainer(verbose=tagger._verbose) trainer.set_params(tagger._training_options) if num_phases > 1: model_name = args.learn + '.' + str(phase) myzip.extract(model_name) else: model_name = args.learn tagger.set_model_file(model_name) for i, sent in enumerate(input_set): features = [ _get_features_customised_for_tones(sent, j) for j in range(len(sent)) ] labels = tagger._tagger.tag(features) if num_phases > 1: labels = [ code_dispatcher(label)[phase] for label in labels ] tagged_sent = list(zip(sent, labels)) if not predicted_set[i]: predicted_set[i] = tagged_sent else: sent_acc, labels_acc = unzip(predicted_set[i]) labels_acc = [ label_acc + label for label_acc, label in zip(labels_acc, labels) ] predicted_set[i] = list(zip(sent_acc, labels_acc)) if num_phases > 1: os.remove(model_name) myzip.close() # gold_tokens, predicted_tokens : list((str,str)) predicted_tokens = list(itertools.chain(*predicted_set)) if num_phases > 1: predicted_tokens = [ tuple([pair[0], code_resort(pair[1])]) for pair in predicted_tokens ] gold_tokens = list(itertools.chain(*gold_set)) # gold_tokens_eval, predicted_tokens_eval : list(str) if args.tone: gold_tokens_eval = getTag(gold_tokens) predicted_tokens_eval = getTag(predicted_tokens) else: gold_tokens_eval = gold_tokens predicted_tokens_eval = predicted_tokens if args.store and args.tone: stored_filename = args.store csv_export(enc, stored_filename, gold_tokens, predicted_tokens) print("Accuracy : {:>5.3f}".format( accuracy(gold_tokens_eval, predicted_tokens_eval))) if args.verbose and args.store: print(("Tagged result is exported in {}".format(args.store))) elif args.disambiguate and args.infile and args.outfile: # Lecture de texte en .HTML html_parser = FileParser() tagger = CRFTagger() if args.pos: try: tagger.set_model_file(args.disambiguate) except IOError: print("Error : unable to open the model {} !".format( args.infile)) exit(1) try: html_parser.read_file(args.infile) except IOError: print("Error : unable to open the input file {} !".format( args.infile)) exit(1) # Exportation du résultat de désambiguïsation en .HTML for snum, sentence in enumerate(html_parser.glosses): tokens = [token.token for token in sentence[2]] features = [ _get_features_customised_for_tones(tokens, i) for i in range(len(tokens)) ] tagger._tagger.set(features) for tnum, token in enumerate(sentence[2]): options = list() if token.value and len(token.value) > 2: for nopt, option in enumerate(token.value[2]): try: tag = option.ps[0] except IndexError: tag = '' prob = tagger._tagger.marginal(tag, tnum) options.append((prob, option)) reordered_probs, reordered_options = unzip( sorted(options, reverse=True)) if args.select: prob_max = reordered_probs[0] reordered_options = tuple([ reordered_options[i] for i, p in enumerate(reordered_probs) if p >= prob_max ]) html_parser.glosses[snum][1][tnum] = reordered_options elif args.tone: pass try: html_parser.write(args.outfile) except IOError: print("Error : unable to create the output file {}".format( args.outfile)) else: aparser.print_help() exit(0)
def getAccuracy(self): return accuracy(self._classifiedResults, self._actualResults);
def lindy_speed(gs, speeds, limit): acc = str(accuracy(gs, [i < limit for i in speeds])) print("lindy speed under " + str(limit) + ": " + acc)
def bal_speed(gs, speeds, limit): acc = str(accuracy(gs, [i >= limit for i in speeds])) print("bal speed over " + str(limit) + ": " + acc)
if __name__ == '__main__': bal_gold, lindy_gold = gold_standards() attributes = open('song_attributes.csv', 'r') attribute_rows = list(csv.reader(attributes)) speeds = [int(row[0]) for row in attribute_rows] triplety = [row[1] == '1' for row in attribute_rows] backbeat = [row[2] == '1' for row in attribute_rows] accent_m = [row[3] == '1' for row in attribute_rows] big_band = [row[4] == '0' for row in attribute_rows] crashy_c = [row[5] == '1' for row in attribute_rows] print("bal triplety: " + str(accuracy(bal_gold, [not i for i in triplety]))) print("bal backbeat: " + str(accuracy(bal_gold, [not i for i in backbeat]))) print("bal accent_m: " + str(accuracy(bal_gold, [not i for i in accent_m]))) print("bal big_band: " + str(accuracy(bal_gold, big_band))) print("lindy triplety: " + str(accuracy(lindy_gold, triplety))) print("lindy backbeat: " + str(accuracy(lindy_gold, backbeat))) print("lindy accent_m: " + str(accuracy(lindy_gold, accent_m))) print("lindy big_band: " + str(accuracy(lindy_gold, big_band))) print("bal tempos:") for song_index, is_bal in enumerate(bal_gold): if is_bal: print(speeds[song_index])
def evaluate_segmentation(result_file, gold_file, train_file, limit=-1, base_result_file=False, smart_combine=True): """ Compute scores for the current fold """ d = data_to_list(train_file) # training label list g = data_to_list(gold_file, limit=limit) # gold label list t = data_to_list(result_file, limit=limit, label_position=-3) # TextTiling label list result_data = data_to_list(result_file, limit=limit, label_position=-2) if base_result_file: base_result_data = data_to_list(base_result_file, limit=limit, label_position=-2) result_data = data_to_list(result_file, limit=limit, label_position=-1) max_boundaries = int(d.count("T") * (float(len(g)) / len(d))) if smart_combine else -1 r = combine_results(result_data, base_result_data, max_boundaries=max_boundaries) # result label list else: r = data_to_list(result_file, limit=limit, label_position=-2) # result label list avg_g = float(len(g)) / (g.count("T") + 1 ) # average segment size (reference) avg_d = float(len(d)) / (d.count("T") + 1 ) # average segment size (training) k = int(avg_g / 2) # window size for WindowDiff b = list("T" + (int(ceil(avg_d) - 1) * "F")) * int(float(len(g)) / avg_d) b = b[:len(g)] # baseline label list ######################################## # WindowDiff, Beeferman's Pk, Generalized Hamming Distance wdi_rs, bpk_rs, ghd_rs = compute_segmentation_scores(g, r, k) wdi_bl, bpk_bl, ghd_bl = compute_segmentation_scores(g, b, k) wdi_tt, bpk_tt, ghd_tt = compute_segmentation_scores(g, t, k) # accuracy acc_rs = accuracy(g, r) acc_bl = accuracy(g, b) acc_tt = accuracy(g, t) # precision, recall, f-measure pre_rs, rec_rs, f_1_rs = compute_ir_scores(g, r) pre_bl, rec_bl, f_1_bl = compute_ir_scores(g, b) pre_tt, rec_tt, f_1_tt = compute_ir_scores(g, t) ######################################## return (acc_rs, acc_bl, acc_tt, pre_rs, pre_bl, pre_tt, rec_rs, rec_bl, rec_tt, f_1_rs, f_1_bl, f_1_tt, wdi_rs, wdi_bl, wdi_tt, bpk_rs, bpk_bl, bpk_tt, ghd_rs, ghd_bl, ghd_tt, g.count("T"), b.count("T"), r.count("T"), t.count("T"))
# [category, "no"] unless this is true then ["no", category] flip = classifier.labels()[0] == "no" categorized_proportion = len([words for (words, categories) in corpus if category in categories]) * 1.0 / len(corpus) if flip: model.class_prior = [1-categorized_proportion, categorized_proportion] else: model.class_prior = [categorized_proportion, 1-categorized_proportion] classifier.train(train_set) # test classifier test_results = classifier.classify_many([feat for (feat, label) in test_set]) pos_test_set = set(i for i, result in enumerate(test_results) if result == category) reference_values = [label for (feat, label) in test_set] pos_ref_set = set(i for i, (feat, label) in enumerate(test_set) if label == category) accuracy = scores.accuracy(reference_values, test_results) accuracies.append(accuracy) precision = scores.precision(pos_ref_set, pos_test_set) recall = scores.recall(pos_ref_set, pos_test_set) f1 = scores.f_measure(pos_ref_set, pos_test_set) f1_scores.append(f1) print "%s: accuracy %s, precision %s, recall %s, F1 %s" % (colored(category, "blue"), colored(accuracy, "yellow"), colored(precision, "yellow"), colored(recall, "yellow"), colored(f1, "yellow")) ## print(nltk.classify.accuracy(classifier, test_set)) # classifier.show_most_informative_features(5) # print "" # save trained classifier and word features to file dump_file = open("classifiers/%s.pickle" % category, "wb") pickle.dump({ "classifier": classifier,
def main(): aparser = argparse.ArgumentParser(description='Daba disambiguator') aparser.add_argument('-v', '--verbose', help='Verbose output', default=False, action='store_true') aparser.add_argument('-l', '--learn', help='Learn model from data (and save as F if provided)', default=None) aparser.add_argument('-p', '--pos', help='Prediction for POS', default=False, action='store_true') aparser.add_argument('-t', '--tone', help='Prediction for tones', default=False, action='store_true') aparser.add_argument('-r', '--root', help='Corpus root dir') aparser.add_argument('-f', '--filelist', help='Path to a list of files to learn from') # aparser.add_argument('-g', '--gloss', help='Prediction for gloses', default=False, action='store_true') aparser.add_argument('-e', '--evalsize', type=int, default=10, help='Percent of training data with respect to training and test one (default 10)') aparser.add_argument('-d', '--disambiguate', help='Use model F to disambiguate data, the gloss list will be ordered by the probability growth order', default=None) aparser.add_argument('--select', help = 'Option that will be taken into account only with the use of -d, which specifies the disambiguation modality is to select only the most likely gloss in each list.', action='store_true') aparser.add_argument('-i', '--infile' , help='Input file (.html)' , default=sys.stdin) aparser.add_argument('-o', '--outfile', help='Output file (.html)', default=sys.stdout) aparser.add_argument('-s', '--store', help='Store tagged raw data in file (.csv) for further research purpose', default=None) args = aparser.parse_args() if args.verbose: print args if args.learn and (args.pos or args.tone or args.gloss): if not (args.pos or args.tone or args.gloss): print 'Choose pos, tone, gloss or combination of them' exit(0) print 'Make list of files' allfiles = [] with codecs.open(args.filelist, 'r', encoding="utf-8") as filelist: for line in filelist: allfiles.append(line.strip()) allsents = [] # pour le débogage # allfiles = '../corbama/sisoko-daa_ka_kore.dis.html' if args.tone: try: enc = encoder_tones() except: enc = None print ("Error : unable to initialize the tone encoder !") print 'Open files and find features / supervision tags' for infile in allfiles: if(infile): print '-', infile sent = [] html_parser = FileParser() html_parser.read_file(os.path.join(args.root, infile)) for snum, sentence in enumerate(html_parser.glosses): for tnum, token in enumerate(sentence[2]): tag = '' if token.type == 'w' or token.type == 'c': tags = '' if args.pos: tags = '/'.join(token.gloss.ps).encode('utf-8') wordform = detone(token.gloss.form) sent.append((wordform, tags)) elif args.tone: # Pourquoi ne pas apprendre la forme tonale contenant une barre veticale ? # Parce que dans l'ensemble des corpus désambiguïsés, son occurrence est # au dessous de 10, ce cas de figure semble trop peu fréquent pour apporter # une réélle amélioration dans la modélisation de tonalisation. Néanmoins, # dans la conception du cadre logiciel, rien n'interdit de l'inclure dans # les données d'entraînement et d'en observer le apport if '|' not in token.gloss.form : [codes, chunks] = enc.differential_encode(token.token, token.gloss.form) for chunk, code in zip(chunks, codes) : try : sent.append((chunk, code.encode('utf-8'))) except LookupError: pass """ elif args.gloss: tags += token.gloss.gloss.encode('utf-8') sent.append((token.token, tags)) """ if len(sent) > 1: allsents.append(sent) sent = [] if args.verbose and args.tone: enc.report() # Constitution des ensmebles d'entraînement de d'évaluation p = (1 - args.evalsize / 100.0) train_set, eval_set = sampling(allsents, p) print 'Split the data in train (', len(train_set),' sentences) / test (', len(eval_set),' sentences)' print 'Building classifier (CRF/NLTK)' # Initialization t1 = time.time() if args.tone: num_phases = len([False, True]) * len(mode_indicators) myzip = zipfile.ZipFile(args.learn + '.zip', 'w') else: num_phases = 1 # Training for phase in range(num_phases): tagger = CRFTagger(verbose = args.verbose, training_opt = {'feature.minfreq' : 10}) trainer = pycrfsuite.Trainer(verbose = tagger._verbose) trainer.set_params(tagger._training_options) if num_phases > 1: model_name = args.learn + '.' + str(phase) else: model_name = args.learn # train_set : list(list((str,list(str)))) for sent in train_set: tokens = unzip(sent)[0] labels = unzip(sent)[1] if num_phases > 1: for lab in labels: pass labels = [code_dispatcher(label.decode('utf-8'))[phase].encode('utf-8') for label in labels] features = [_get_features_customised_for_tones(tokens, i) for i in range(len(tokens))] trainer.append(features, labels) trainer.train(model = model_name) if num_phases > 1: myzip.write(model_name) os.remove(model_name) if num_phases > 1: myzip.close() print "... done in", get_duration(t1_secs=t1, t2_secs=time.time()) # Evaluation print 'Evaluating classifier' # gold_set, predicted_set : list(list((str, str))) # input_set, output_gold_set : list(list(str)) gold_set = eval_set input_set = [unzip(sent)[0] for sent in gold_set] predicted_set = [list() for sent in gold_set] if num_phases > 1: myzip = zipfile.ZipFile(args.learn + '.zip', 'r') for phase in range(num_phases): tagger = CRFTagger(verbose=args.verbose, training_opt={'feature.minfreq' : 10}) trainer = pycrfsuite.Trainer(verbose=tagger._verbose) trainer.set_params(tagger._training_options) if num_phases > 1: model_name = args.learn + '.' + str(phase) myzip.extract(model_name) else: model_name = args.learn tagger.set_model_file(model_name) for i, sent in enumerate(input_set): features = [_get_features_customised_for_tones(sent,j) for j in range(len(sent))] labels = tagger._tagger.tag(features) if num_phases > 1: labels = [code_dispatcher(label.decode('utf-8'))[phase].encode('utf-8') for label in labels] tagged_sent = list(zip(sent, labels)) if not predicted_set[i]: predicted_set[i] = tagged_sent else: sent_acc, labels_acc = unzip(predicted_set[i]) labels_acc = [label_acc + label for label_acc, label in zip(labels_acc, labels)] predicted_set[i] = list(zip(sent_acc, labels_acc)) if num_phases > 1: os.remove(model_name) myzip.close() # gold_tokens, predicted_tokens : list((str,str)) predicted_tokens = list(itertools.chain(*predicted_set)) if num_phases > 1: predicted_tokens = [ tuple([pair[0], code_resort(pair[1].decode('utf-8')).encode('utf-8')]) for pair in predicted_tokens] gold_tokens = list(itertools.chain(*gold_set)) # gold_tokens_eval, predicted_tokens_eval : list(str) if args.tone: gold_tokens_eval = getTag(gold_tokens) predicted_tokens_eval = getTag(predicted_tokens) else: gold_tokens_eval = gold_tokens predicted_tokens_eval = predicted_tokens if args.store and args.tone: stored_filename = args.store csv_export(enc, stored_filename, gold_tokens, predicted_tokens) print "Accuracy : {:>5.3f}".format(accuracy(gold_tokens_eval, predicted_tokens_eval)) if args.verbose and args.store: print ("Tagged result is exported in {}".format(args.store)) elif args.disambiguate and args.infile and args.outfile: # Lecture de texte en .HTML html_parser = FileParser() tagger = CRFTagger() if args.pos: try: tagger.set_model_file(args.disambiguate) except IOError: print "Error : unable to open the model {} !".format(args.infile) exit(1) try: html_parser.read_file(args.infile) except IOError: print "Error : unable to open the input file {} !".format(args.infile) exit(1) # Exportation du résultat de désambiguïsation en .HTML for snum, sentence in enumerate(html_parser.glosses): tokens = [token.token for token in sentence[2]] features = [_get_features_customised_for_tones(tokens, i) for i in range(len(tokens))] tagger._tagger.set(features) for tnum, token in enumerate(sentence[2]): options = list() if token.value and len(token.value) > 2: for nopt, option in enumerate(token.value[2]): try: tag = option.ps[0] except IndexError: tag = '' prob = tagger._tagger.marginal(tag, tnum) options.append((prob, option)) reordered_probs, reordered_options = unzip(sorted(options, reverse = True)) if args.select: prob_max = reordered_probs[0] reordered_options = tuple([ reordered_options[i] for i, p in enumerate(reordered_probs) if p >= prob_max]) html_parser.glosses[snum][1][tnum] = reordered_options elif args.tone: pass try: html_parser.write(args.outfile) except IOError: print "Error : unable to create the output file {}".format(args.outfile) else: aparser.print_help() exit(0)
#Printing with more measures, example below # In[41]: train_set, test_set = Final_Data[0:747], Final_Data[747:] import nltk import collections from nltk.metrics.scores import (accuracy, precision, recall, f_measure) nb_classifier = nltk.NaiveBayesClassifier.train(train_set) nb_classifier.show_most_informative_features(10) from nltk.classify.util import accuracy print(accuracy(nb_classifier, test_set)) refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) for i, (Final_Data, label) in enumerate(test_set): refsets[label].add(i) observed = nb_classifier.classify(Final_Data) testsets[observed].add(i) print('bullying precision:', precision(refsets['Bullying'], testsets['Bullying'])) print('bullying recall:', recall(refsets['Bullying'], testsets['Bullying'])) print('bullying F-measure:', f_measure(refsets['Bullying'], testsets['Bullying'])) print('not-bullying precision:', precision(refsets['Non-Bullying'], testsets['Non-Bullying'])) print('not-bullying recall:', recall(refsets['Non-Bullying'], testsets['Non-Bullying'])) print('not-bullying F-measure:', f_measure(refsets['Non-Bullying'], testsets['Non-Bullying']))
# Loading the model print "Loading the CRF model..." tagger = pycrfsuite.Tagger() tagger.open(model) # Testing progress #sys.stdout.write("Testing: ") #sys.stdout.flush() #pred = [] #idx = 0 #for i in featset.items(): # idx += 1 # if idx % 1000 == 0: # sys.stdout.write('.') # sys.stdout.flush() # pred.append(str(tagger.tag(i))) print "Testing..." pred = tagger.tag(featset) tagger.close() pred = [str(p) for p in pred] # Show result accuracy = scores.accuracy(ref, pred) print "\nAccuracy: %.4f" % accuracy cm = ConfusionMatrix(ref, pred) print "Confusion Matrix:" print(cm.pretty_format(sort_by_count=True, show_percents=True, truncate=9)) # Finished? print "DONE!!"
test_data_new.append(test_set[i][j][0]) test_data_tags.append(test_set[i][j][1]) gold_sentences = test_data_new # print ct.evaluate(gold_sentences) # print test_data_new pred_tags = [] refsets = collections.defaultdict(set) testsets = collections.defaultdict(set) pred = ct.tag(gold_sentences) for i in range(len(pred)): pred_tags.append(pred[i][1]) for i in range(len(test_data_tags)): refsets[test_data_tags[i]].add(i) testsets[pred_tags[i]].add(i) print "CRF language model" print 'Accuracy:', accuracy(pred_tags, test_data_tags) print "\n" print 'Precision of en:', precision(refsets['en'], testsets['en']) print 'Precision of hi:', precision(refsets['hi'], testsets['hi']) print "\n" print 'Recall of en:', recall(refsets['en'], testsets['en']) print 'Recall of hi:', recall(refsets['hi'], testsets['hi']) print "\n" print 'f_measure of en:', f_measure(refsets['en'], testsets['en']) print 'f_measure of hi:', f_measure(refsets['hi'], testsets['hi']) print "\n"
def evaluate_segmentation(bc3=False, limit=-1): d = "".join(data_to_list(WAPITI_TRAIN_FILE)) # training data g = "".join(data_to_list(WAPITI_GOLD_FILE, limit=limit)) # gold string temp_r = data_to_list(WAPITI_RESULT_FILE, limit=limit) # result string # n = data_to_list("var/union/ngrams_" + WAPITI_RESULT_FILE[-1], limit=limit) # scores = {} r = "" for i, col in enumerate(temp_r): # score = 0 # if n[i][:n[i].index("/")] == "T": # score = 1 # elif col[:col.index("/")] == "T": # score = float(col[col.index("/") + 1:]) # scores[i] = r += col[:col.index("/")] # sorted_indexes = sorted(scores, key=scores.get, reverse=True) # indexes = [index for index, score in scores.iteritems() if score > 0.99] # r = "." * len(g) # n_boundaries = int((float(g.count("T")) / len(g)) * len(g)) # for i, index in enumerate(sorted_indexes): # r = r[:index] + "T" + r[index + 1:] # if i == n_boundaries: # break # for index in indexes: # r = r[:index] + "T" + r[index+1:] if bc3: t = data_to_list(BC3_TEXT_TILING_FILE, limit=limit, label_position=0) # text tiling baseline string else: t = data_to_list(WAPITI_GOLD_FILE, limit=limit, label_position=-2) avg_g = float(len(g)) / (g.count("T") + 1) # average segment size (reference) avg_d = float(len(d)) / (d.count("T") + 1) # average segment size (training) k = int(avg_g / 2) # window size for WindowDiff b = ("T" + (int(math.floor(avg_d)) - 1) * ".") * int(math.ceil(float(len(d)) / int(math.floor(avg_d)))) b = b[:len(g)] # baseline string # WindowDiff wdi_rs = (float(windowdiff(g, r, k, boundary="T")) / len(g)) * 100 wdi_bl = (float(windowdiff(g, b, k, boundary="T")) / len(g)) * 100 wdi_tt = (float(windowdiff(g, t, k, boundary="T")) / len(g)) * 100 # Beeferman's Pk bpk_rs = (pk(g, r, boundary="T")) * 100 bpk_bl = (pk(g, b, boundary="T")) * 100 bpk_tt = (pk(g, t, boundary="T")) * 100 # Generalized Hamming Distance ghd_rs = (ghd(g, r, boundary="T") / len(g)) * 100 ghd_bl = (ghd(g, b, boundary="T") / len(g)) * 100 ghd_tt = (ghd(g, t, boundary="T") / len(g)) * 100 # accuracy acc_rs = accuracy(list(g), list(r)) * 100 acc_bl = accuracy(list(g), list(b)) * 100 acc_tt = accuracy(list(g), list(t)) * 100 # precision, recall, f-measure pre_rs = metrics.precision_score(list(g), list(r), pos_label="T") * 100 rec_rs = metrics.recall_score(list(g), list(r), pos_label="T") * 100 f_1_rs = (2.0 * (rec_rs * pre_rs)) / (rec_rs + pre_rs) pre_bl = metrics.precision_score(list(g), list(b), pos_label="T") * 100 rec_bl = metrics.recall_score(list(g), list(b), pos_label="T") * 100 f_1_bl = (2.0 * (rec_bl * pre_bl)) / (rec_bl + pre_bl) pre_tt = metrics.precision_score(list(g), list(t), pos_label="T") * 100 rec_tt = metrics.recall_score(list(g), list(t), pos_label="T") * 100 f_1_tt = (2.0 * (rec_tt * pre_tt)) / (rec_tt + pre_tt) return acc_rs, acc_bl, acc_tt, pre_rs, pre_bl, pre_tt, rec_rs, rec_bl, rec_tt, f_1_rs, f_1_bl, f_1_tt, wdi_rs, wdi_bl, wdi_tt, bpk_rs, bpk_bl, bpk_tt, ghd_rs, ghd_bl, ghd_tt, g.count("T"), b.count("T"), r.count("T"), t.count("T")