Esempio n. 1
0
file.close()

t_ = []
y_ = []

for i in range(len(t)):
	label = t[i]
	pred = y[i]
	if (label == 'ambiguous' or label == 'fw' or label == 'mixed' or label == 'ne' or label == 'unk'):
		continue
	else:
		t_.append(label)
		y_.append(pred)

if (evaluation_dataset == 'test-original'):
	save_predictions(y_, './results/predictions/en-es/predictions_test_original_mBERT.txt')
	exit(1)
# Get accuracy
acc = accuracy_score(t_, y_)
print("Accuracy: " + str(acc))

# F1 score
f1 = f1_score(t_, y_, average=None)
print("F1 score per class: " + str(f1))

# F1 score weighted
f1_weighted = f1_score(t_, y_, average='weighted')
print("Weighted F1 score: " + str(f1_weighted))

# Confusion matrix
conf_matrix = confusion_matrix(t_, y_)
Esempio n. 2
0
		for i in range(nr_words):
			if (words[i] != ''):
				for j in range(nr_predictions):
					predicted_label = results[j][i]
					results_dict[i][predicted_label] += 1
				predictions.append(max(results_dict[i].items(), key=operator.itemgetter(1))[0])
			else:
				predictions.append('')
		
		best_ensembly = perm
		best_predictions = predictions

print(best_ensembly)

if (evaluation_dataset == 'test-original'):
	save_predictions(best_predictions, './results/predictions/' + lang1_code + '-' + lang2_code + '/predictions_test_original_ensemble_all.txt')
	exit(1)

# Get accuracy
print("Accuracy: " + str(best_acc))

# F1 score
f1 = f1_score(t, best_predictions, average=None)
print("F1 score per class: " + str(f1))

# F1 score weighted
f1_weighted = f1_score(t, best_predictions, average='weighted')
print("Weighted F1 score: " + str(f1_weighted))

# Confusion matrix
conf_matrix = confusion_matrix(t, best_predictions)
Esempio n. 3
0
        if (is_other(word)):
            lang = 'other'
        else:
            if (prob_lang1 >= prob_lang2):
                lang = 'lang1'
            else:
                lang = 'lang2'

        y.append(lang)
        predictions_dict[word] = lang
    else:
        y.append('')

if (evaluation_dataset == 'test-original'):
    save_predictions(
        y, './results/predictions/' + lang1_code + '-' + lang2_code +
        '/predictions_test_original_probabilities.txt')
    exit(1)

# Get accuracy
acc = accuracy_score(t, y)
print("Accuracy: " + str(acc))

# F1 score
f1 = f1_score(t, y, average=None)
print("F1 score per class: " + str(f1))

# F1 score weighted
f1_weighted = f1_score(t, y, average='weighted')
print("Weighted F1 score: " + str(f1_weighted))
Esempio n. 4
0
                lang = 'lang1'
            else:
                lang = 'lang2'

        y.append(lang)
        predictions_dict[word] = lang

        if counter % 10000 == 0:
            print(f"{counter} of {len(words)}")
        counter += 1
    else:
        y.append('')

if (evaluation_dataset == 'test-original'):
    save_predictions(
        y, './results/predictions/' + lang1_code + '-' + lang2_code +
        '/predictions_test_original_char_' + str(n) + '_grams.txt')
    exit(1)

# Get accuracy
acc = accuracy_score(t, y)
print("Accuracy: " + str(acc))

# F1 score
f1 = f1_score(t, y, average=None)
print("F1 score per class: " + str(f1))

# F1 score weighted
f1_weighted = f1_score(t, y, average='weighted')
print("Weighted F1 score: " + str(f1_weighted))
            y_sentence = identifier.identify(lang_tokens)
            for index in other_indexes:
                y_sentence.insert(index, 'other')

        # For sentences that are made up only of 'other' words
        else:
            y_sentence = []
            for index in other_indexes:
                y_sentence.append('other')
        for i in range(len(tokens)):
            predictions_dict[tokens[i]] = y_sentence[i]
        y.append(y_sentence)

if (evaluation_dataset == 'test-original'):
    save_predictions(
        y, './results/predictions/' + lang1_code + '-' + lang2_code +
        '/predictions_test_original_viterbi_v1.txt')
    exit(1)

# Own test set with labels
# Flatten y list
y = [item for y_sent in y for item in y_sent]

# Get accuracy
acc = accuracy_score(t, y)
print("Accuracy: " + str(acc))

# F1 score
f1 = f1_score(t, y, average=None)
print("F1 score per class: " + str(f1))
Esempio n. 6
0
predictions_dict = dict()
for word in words:
    if (word != ''):
        if (is_other(word)):
            lang = 'other'
        else:
            word_vect = vectorizer.transform([word])
            lang = logist_regression.predict(word_vect)[0]
        y.append(lang)
        predictions_dict[word] = lang
    else:
        y.append('')

if (evaluation_dataset == 'test-original'):
    save_predictions(
        y, './results/predictions/' + lang1_code + '-' + lang2_code +
        '/predictions_test_original_LogisticRegression.txt')
    exit(1)

# Get accuracy
acc = accuracy_score(t, y)
print("Accuracy: " + str(acc))

# F1 score
f1 = f1_score(t, y, average=None)
print("F1 score per class: " + str(f1))

# F1 score weighted
f1_weighted = f1_score(t, y, average='weighted')
print("Weighted F1 score: " + str(f1_weighted))