def correct_count(result_dict):
    correct = 0
    no_error_correct = 0
    no_error_sentences = 0
    correct_spot = 0
    found = 0
    missed_spans = 0
    total_spans = 0
    counted_as_no_err = 0
    fce_data = fd.extract_data('fce_train.gold.max.rasp.old_cat.m2')
    for key in result_dict.keys():
        if -2 in result_dict[key]:
            counted_as_no_err += 1
        for sentence in fce_data:
            spans_selected = []
            if key == sentence[0][1:]:
                found += 1
                if len(sentence[1]) < 1:
                    no_error_sentences += 1
                for start in result_dict[key]:
                    if start == -2 and len(sentence[1]) == 0:
                        correct += 1
                        no_error_correct += 1
                    else:
                        for i, span in enumerate(sentence[1]):
                            if int(start) >= span[0] and int(start) < span[1]:
                                correct_spot += 1
                                correct += 1
                                spans_selected.append(i)
                missed_spans += len(sentence[1]) - len(
                    list(set(spans_selected)))
                total_spans += len(sentence[1])
    print('No error match: ', no_error_correct)
    print('No error sentences: ', no_error_sentences)
    print('Has no error answer: ', counted_as_no_err)
    print('Error match: ', correct_spot)
    print('Error missed: ', missed_spans)
    print('Found sentences: ', found)
    print('Total spans: ', total_spans)
    return correct
Ejemplo n.º 2
0
                                           rows=(len(error_sentences) + 1),
                                           cols=1)
    worksheet.update_acell('A1', 'Sentence')
    counterOffset = 2
    for sentence in _error_sentences:
        worksheet.update_acell('A' + str(counterOffset), sentence[0])
        counterOffset = counterOffset + 1


json_key = 'gspread-test.json'
scope = ['https://spreadsheets.google.com/feeds']

credentials = ServiceAccountCredentials.from_json_keyfile_name(json_key, scope)

gc = gspread.authorize(credentials)

spreadsheet = gc.open("FCE_ERR")

error_sentences = fd.extract_data('fce_train.gold.max.rasp.old_cat.m2')

threads = []
batch_size = 5000
batch_count = len(error_sentences) // batch_size
for i in range(batch_count):
    t = threading.Thread(target=create_worksheet_sentence_batch,
                         args=(spreadsheet, "fce_errors" + str(i),
                               error_sentences[i * batch_size:(i + 1) *
                                               batch_size]))
    threads.append(t)
    t.start()
Ejemplo n.º 3
0
import tensorflow as tf
import sklearn as sk
import sklearn.metrics as skm
import numpy as np
import math
import fce_api as fd
import re
import matplotlib.pyplot as plt

# variables
# data
data = fd.extract_data('fce_train.gold.max.rasp.old_cat.m2')

window_size = 3

labels = []
PAD = 0
display_step = 1

# classes
has_error = 1
no_error = 0

# learning variables
learning_rate = 0.01
epochs = 50


# generating the word windows, including the spaces.
def feed_windows_with_spaces(_data, _window_size):
    windows = []
Ejemplo n.º 4
0
# general variables
window_size = 5

# classes
has_error = 1
no_error = 0

# # training data
# training_data = fd.extract_data_from_tsv('fce-public.train.original.tsv')
#
# # dev data
# dev_data = fd.extract_data_from_tsv('fce-public.dev.original.tsv')

# amt golden data
amt_golden = fd.extract_data(
    'amt_data_sets/fce_amt.experiment_two.max.rasp.m2')

# amt non-expert data
amt_non_expert = fd.extract_data(
    'amt_data_sets/fce_amt.experiment_two.max.rasp.m2')

# omitting the spaces
e_feed_windows = feed_windows_only_tokens(amt_golden, window_size)

ne_feed_windows = feed_windows_only_tokens(amt_non_expert, window_size)

labels = []
PAD = 0.0
OOV = 1.0
display_step = 1