def correct_count(result_dict): correct = 0 no_error_correct = 0 no_error_sentences = 0 correct_spot = 0 found = 0 missed_spans = 0 total_spans = 0 counted_as_no_err = 0 fce_data = fd.extract_data('fce_train.gold.max.rasp.old_cat.m2') for key in result_dict.keys(): if -2 in result_dict[key]: counted_as_no_err += 1 for sentence in fce_data: spans_selected = [] if key == sentence[0][1:]: found += 1 if len(sentence[1]) < 1: no_error_sentences += 1 for start in result_dict[key]: if start == -2 and len(sentence[1]) == 0: correct += 1 no_error_correct += 1 else: for i, span in enumerate(sentence[1]): if int(start) >= span[0] and int(start) < span[1]: correct_spot += 1 correct += 1 spans_selected.append(i) missed_spans += len(sentence[1]) - len( list(set(spans_selected))) total_spans += len(sentence[1]) print('No error match: ', no_error_correct) print('No error sentences: ', no_error_sentences) print('Has no error answer: ', counted_as_no_err) print('Error match: ', correct_spot) print('Error missed: ', missed_spans) print('Found sentences: ', found) print('Total spans: ', total_spans) return correct
rows=(len(error_sentences) + 1), cols=1) worksheet.update_acell('A1', 'Sentence') counterOffset = 2 for sentence in _error_sentences: worksheet.update_acell('A' + str(counterOffset), sentence[0]) counterOffset = counterOffset + 1 json_key = 'gspread-test.json' scope = ['https://spreadsheets.google.com/feeds'] credentials = ServiceAccountCredentials.from_json_keyfile_name(json_key, scope) gc = gspread.authorize(credentials) spreadsheet = gc.open("FCE_ERR") error_sentences = fd.extract_data('fce_train.gold.max.rasp.old_cat.m2') threads = [] batch_size = 5000 batch_count = len(error_sentences) // batch_size for i in range(batch_count): t = threading.Thread(target=create_worksheet_sentence_batch, args=(spreadsheet, "fce_errors" + str(i), error_sentences[i * batch_size:(i + 1) * batch_size])) threads.append(t) t.start()
import tensorflow as tf import sklearn as sk import sklearn.metrics as skm import numpy as np import math import fce_api as fd import re import matplotlib.pyplot as plt # variables # data data = fd.extract_data('fce_train.gold.max.rasp.old_cat.m2') window_size = 3 labels = [] PAD = 0 display_step = 1 # classes has_error = 1 no_error = 0 # learning variables learning_rate = 0.01 epochs = 50 # generating the word windows, including the spaces. def feed_windows_with_spaces(_data, _window_size): windows = []
# general variables window_size = 5 # classes has_error = 1 no_error = 0 # # training data # training_data = fd.extract_data_from_tsv('fce-public.train.original.tsv') # # # dev data # dev_data = fd.extract_data_from_tsv('fce-public.dev.original.tsv') # amt golden data amt_golden = fd.extract_data( 'amt_data_sets/fce_amt.experiment_two.max.rasp.m2') # amt non-expert data amt_non_expert = fd.extract_data( 'amt_data_sets/fce_amt.experiment_two.max.rasp.m2') # omitting the spaces e_feed_windows = feed_windows_only_tokens(amt_golden, window_size) ne_feed_windows = feed_windows_only_tokens(amt_non_expert, window_size) labels = [] PAD = 0.0 OOV = 1.0 display_step = 1