def analyse_func(self): """ variables to change: result_land self.result_land = magic(input_text) just copy the above code under the input_text variable """ try: input_text = self.text_box.get(1.0, tkinter.END) self.result_country = writing_style_analyzer.predict_geo_location(input_text, 'data/model/') self.open_results() except ZeroDivisionError as e: print(e)
def main(): count = 0 true_positive_count = 0 country_occurrences = {} test = [] prediction = [] test_country_list = [] used_countries = pickle.load(open("../data/model/used_countries", "rb")) result_file = open("../result/result_stat.csv", "w") for file_name in os.listdir("../data/test_pickles"): if file_name == ".DS_Store": continue if os.path.isdir("../data/test_pickles" + "/" + file_name): continue test_country_list.append(file_name) page = pickle.load(open("../data/test_pickles/" + file_name, "rb")) # Iterate over all revisions, predict the geo-location and count the positive occurrences for revision in page.revisions: test_content = revision.diff_content # country was not trained test_country = revision.country if test_country not in used_countries: continue predicted_geo_location = predict_geo_location(test_content) test.append(revision.country) prediction.append(predicted_geo_location) local_true_positive_count = 0 if predicted_geo_location == revision.country: true_positive_count += 1 local_true_positive_count = 1 # Also count the occurrences for each geo-location if predicted_geo_location in country_occurrences: country_occurrences[predicted_geo_location] = [ country_occurrences[predicted_geo_location][0] + 1, country_occurrences[predicted_geo_location][1] + local_true_positive_count, ] else: country_occurrences[predicted_geo_location] = [1, local_true_positive_count] count += 1 print("Processed: " + page.title) # Output results print() print(12 * "-" + " Result " + 12 * "-") print() trained_data_stat_to_csv() print() print("Countries with [Positive Count, True Positive Count]") print() result_file.write("country, positive_count, true_positive_count\n") for country, amount in country_occurrences.items(): print(country + ": " + str(amount)) result_file.write(country + ", " + str(amount[0]) + ", " + str(amount[1]) + "\n") result_file.write("Total, " + str(count) + ", " + str(true_positive_count) + "\n") result_file.write("F-Score, " + str(f1_score(test, prediction, average="macro") * 100) + ", " + str(-1) + "\n") result_file.close() test_countries_to_file(test_country_list) print() print("Count Revisions: " + str(count)) print("True positive count: {0}".format(str(true_positive_count))) print("Accuracy: %.4f" % ((true_positive_count / count) * 100) + "%") print("F1-Score with macro-average: %.4f" % (f1_score(test, prediction, average="macro") * 100) + "%") cm = confusion_matrix(test, prediction, test_country_list) pl.matshow(cm) pl.title("Confusion Matrix") pl.colorbar() pl.ylabel("True label") pl.xlabel("Predicted label") pl.savefig("../result/cm.png") pl.xlabel("Predicted label \n\n" + str(test_country_list)) pl.show()