file.close() t_ = [] y_ = [] for i in range(len(t)): label = t[i] pred = y[i] if (label == 'ambiguous' or label == 'fw' or label == 'mixed' or label == 'ne' or label == 'unk'): continue else: t_.append(label) y_.append(pred) if (evaluation_dataset == 'test-original'): save_predictions(y_, './results/predictions/en-es/predictions_test_original_mBERT.txt') exit(1) # Get accuracy acc = accuracy_score(t_, y_) print("Accuracy: " + str(acc)) # F1 score f1 = f1_score(t_, y_, average=None) print("F1 score per class: " + str(f1)) # F1 score weighted f1_weighted = f1_score(t_, y_, average='weighted') print("Weighted F1 score: " + str(f1_weighted)) # Confusion matrix conf_matrix = confusion_matrix(t_, y_)
for i in range(nr_words): if (words[i] != ''): for j in range(nr_predictions): predicted_label = results[j][i] results_dict[i][predicted_label] += 1 predictions.append(max(results_dict[i].items(), key=operator.itemgetter(1))[0]) else: predictions.append('') best_ensembly = perm best_predictions = predictions print(best_ensembly) if (evaluation_dataset == 'test-original'): save_predictions(best_predictions, './results/predictions/' + lang1_code + '-' + lang2_code + '/predictions_test_original_ensemble_all.txt') exit(1) # Get accuracy print("Accuracy: " + str(best_acc)) # F1 score f1 = f1_score(t, best_predictions, average=None) print("F1 score per class: " + str(f1)) # F1 score weighted f1_weighted = f1_score(t, best_predictions, average='weighted') print("Weighted F1 score: " + str(f1_weighted)) # Confusion matrix conf_matrix = confusion_matrix(t, best_predictions)
if (is_other(word)): lang = 'other' else: if (prob_lang1 >= prob_lang2): lang = 'lang1' else: lang = 'lang2' y.append(lang) predictions_dict[word] = lang else: y.append('') if (evaluation_dataset == 'test-original'): save_predictions( y, './results/predictions/' + lang1_code + '-' + lang2_code + '/predictions_test_original_probabilities.txt') exit(1) # Get accuracy acc = accuracy_score(t, y) print("Accuracy: " + str(acc)) # F1 score f1 = f1_score(t, y, average=None) print("F1 score per class: " + str(f1)) # F1 score weighted f1_weighted = f1_score(t, y, average='weighted') print("Weighted F1 score: " + str(f1_weighted))
lang = 'lang1' else: lang = 'lang2' y.append(lang) predictions_dict[word] = lang if counter % 10000 == 0: print(f"{counter} of {len(words)}") counter += 1 else: y.append('') if (evaluation_dataset == 'test-original'): save_predictions( y, './results/predictions/' + lang1_code + '-' + lang2_code + '/predictions_test_original_char_' + str(n) + '_grams.txt') exit(1) # Get accuracy acc = accuracy_score(t, y) print("Accuracy: " + str(acc)) # F1 score f1 = f1_score(t, y, average=None) print("F1 score per class: " + str(f1)) # F1 score weighted f1_weighted = f1_score(t, y, average='weighted') print("Weighted F1 score: " + str(f1_weighted))
y_sentence = identifier.identify(lang_tokens) for index in other_indexes: y_sentence.insert(index, 'other') # For sentences that are made up only of 'other' words else: y_sentence = [] for index in other_indexes: y_sentence.append('other') for i in range(len(tokens)): predictions_dict[tokens[i]] = y_sentence[i] y.append(y_sentence) if (evaluation_dataset == 'test-original'): save_predictions( y, './results/predictions/' + lang1_code + '-' + lang2_code + '/predictions_test_original_viterbi_v1.txt') exit(1) # Own test set with labels # Flatten y list y = [item for y_sent in y for item in y_sent] # Get accuracy acc = accuracy_score(t, y) print("Accuracy: " + str(acc)) # F1 score f1 = f1_score(t, y, average=None) print("F1 score per class: " + str(f1))
predictions_dict = dict() for word in words: if (word != ''): if (is_other(word)): lang = 'other' else: word_vect = vectorizer.transform([word]) lang = logist_regression.predict(word_vect)[0] y.append(lang) predictions_dict[word] = lang else: y.append('') if (evaluation_dataset == 'test-original'): save_predictions( y, './results/predictions/' + lang1_code + '-' + lang2_code + '/predictions_test_original_LogisticRegression.txt') exit(1) # Get accuracy acc = accuracy_score(t, y) print("Accuracy: " + str(acc)) # F1 score f1 = f1_score(t, y, average=None) print("F1 score per class: " + str(f1)) # F1 score weighted f1_weighted = f1_score(t, y, average='weighted') print("Weighted F1 score: " + str(f1_weighted))