def evaluate_oov(data, source_attr, target_attr, forms_attr, oov_test_file, oov_part): """\ Out-of-vocabulary evaluation """ log_info('Loading known lemmas and forms from: ' + oov_test_file) train = DataSet() train.load_from_arff(oov_test_file) if oov_part < 1: log_info('Using only %f-part of the file.' % oov_part) train = train.subset(0, int(round(oov_part * len(train))), copy=False) known_forms = {i[target_attr].lower() for i in train} known_lemmas = {i[source_attr].lower() for i in train} oov_forms = [ 1 if i[target_attr].lower() not in known_forms else 0 for i in data ] oov_lemmas = [1 if i[source_attr] not in known_lemmas else 0 for i in data] data.add_attrib(Attribute('OOV_FORM', 'numeric'), oov_forms) data.add_attrib(Attribute('OOV_LEMMA', 'numeric'), oov_lemmas) oov_forms_count = sum(oov_forms) oov_forms_good = count_correct(data, target_attr, forms_attr, lambda i: i['OOV_FORM']) print_score(oov_forms_good, oov_forms_count, 'OOV forms') oov_lemmas_count = sum(oov_lemmas) oov_lemmas_good = count_correct(data, target_attr, forms_attr, lambda i: i['OOV_LEMMA']) print_score(oov_lemmas_good, oov_lemmas_count, 'OOV lemmas')
def evaluate_oov(data, source_attr, target_attr, forms_attr, oov_test_file, oov_part): """\ Out-of-vocabulary evaluation """ log_info('Loading known lemmas and forms from: ' + oov_test_file) train = DataSet() train.load_from_arff(oov_test_file) if oov_part < 1: log_info('Using only %f-part of the file.' % oov_part) train = train.subset(0, int(round(oov_part * len(train))), copy=False) known_forms = {i[target_attr].lower() for i in train} known_lemmas = {i[source_attr].lower() for i in train} oov_forms = [1 if i[target_attr].lower() not in known_forms else 0 for i in data] oov_lemmas = [1 if i[source_attr] not in known_lemmas else 0 for i in data] data.add_attrib(Attribute('OOV_FORM', 'numeric'), oov_forms) data.add_attrib(Attribute('OOV_LEMMA', 'numeric'), oov_lemmas) oov_forms_count = sum(oov_forms) oov_forms_good = count_correct(data, target_attr, forms_attr, lambda i: i['OOV_FORM']) print_score(oov_forms_good, oov_forms_count, 'OOV forms') oov_lemmas_count = sum(oov_lemmas) oov_lemmas_good = count_correct(data, target_attr, forms_attr, lambda i: i['OOV_LEMMA']) print_score(oov_lemmas_good, oov_lemmas_count, 'OOV lemmas')
def pairwise_bootstrap(file1, file2, gold_attr, pred_attr, cmp_func, iters): d1, d2 = DataSet(), DataSet() log_info('Loading File1: %s' % file1) d1.load_from_arff(file1) log_info('Loading File2: %s' % file2) d2.load_from_arff(file2) gold = d1.attrib_as_vect(gold_attr) p1 = d1.attrib_as_vect(pred_attr) p2 = d2.attrib_as_vect(pred_attr) p1_better, p2_better, ties = 0, 0, 0 for i in xrange(iters): sample = rnd.randint(0, len(gold), len(gold)) s_p1_good = sum(1 if cmp_func(gold[i], p1[i]) else 0 for i in sample) s_p2_good = sum(1 if cmp_func(gold[i], p2[i]) else 0 for i in sample) log_info('Round %d: File1 - %2.2f vs. File2 - %2.2f' % (i, float(s_p1_good) / len(gold) * 100, float(s_p2_good) / len(gold) * 100)) if s_p1_good > s_p2_good: p1_better += 1 elif s_p2_good > s_p1_good: p2_better += 1 else: ties += 1 print ('File1 better: %d (%2.2f) | File2 better: %d (%2.2f) |' + ' ties: %d (%2.2f)') % (p1_better, float(p1_better) / iters * 100, p2_better, float(p2_better) / iters * 100, ties, float(ties) / iters * 100,)
def main(): """\ Main application entry: parse command line and run the test. """ opts, filenames = getopt.getopt(sys.argv[1:], 'ca:s:n:') show_help = False combine_cng = False subsets = [] neighbors = [] substrs = [] for opt, arg in opts: if opt == '-c': combine_cng = True elif opt == '-s': sub_len, attr = arg.split(':', 1) substrs.append((int(sub_len), attr)) elif opt == '-a': size, attrs = arg.split(':', 1) subsets.append((int(size), re.split(r'[, ]+', attrs))) elif opt == '-n': shift, attrs = arg.split(':', 1) neighbors.append((int(shift), re.split(r'[, ]+', attrs))) # display help and exit if len(filenames) != 2 or not (combine_cng or substrs or subsets or neighbors) or show_help: display_usage() sys.exit(1) # run the training filename_in, filename_out = filenames data = DataSet() log_info('Loading data: ' + filename_in) data.load_from_arff(filename_in) if substrs: for (sub_len, attr) in substrs: log_info(('Adding substrings from the %s of %s ' + 'up to %d characters long ...') % (('beginning' if sub_len > 0 else 'end'), attr, abs(sub_len))) add_substr_attributes(data, sub_len, attr) if combine_cng: log_info('Combining case, number, gender ...') combine_tag_num_gen_cas(data) if subsets: for (set_size, set_attrs) in subsets: log_info('Combining up to %d attributes from [%s] ...' % (set_size, ','.join(set_attrs))) combine_subsets(data, set_attrs, set_size) if neighbors: for (shift, attrs) in neighbors: log_info('Adding neighbor %d\'s attributes [%s] ...' % (shift, ','.join(attrs))) add_neighbor_attributes(data, shift, attrs) log_info('Saving data: ' + filename_out) data.save_to_arff(filename_out)
def get_stats(data_file, train_file, source_attr, target_attr): """\ """ data = DataSet() log_info('Loading data from %s...' % data_file) data.load_from_arff(data_file) print_feat(data, lambda a, b: True, 'total') print_feat(data, lambda _, i: not regex.match(r'^\p{P}', i[source_attr]), 'excluding punctuation') print_feat(data, lambda _, i: i[source_attr].lower() != \ i[target_attr].lower(), 'inflected forms') if train_file is not None: log_info('Loading known data from %s...' % train_file) train = DataSet() train.load_from_arff(train_file) known = {i[target_attr].lower() for i in train} print_feat(data, lambda _, i: not i[target_attr].lower() in known, 'unknown')
def test_models(file_in, file_out, model_files, source_attr, target_attr, oov_test_file, oov_part, pos_attr, test_indiv): """\ Test all the given models on the selected file and save the target. If oov_test_file is set, performs also OOV evaluation. If test_pos is True, prints detailed results for various POSs. """ # load testing data log_info('Loading data: ' + file_in) data = DataSet() data.load_from_arff(file_in) forms = data[source_attr] # apply all models for model_num, model_file in enumerate(model_files, start=1): model = Model.load_from_file(model_file) log_info('Applying model: ' + model_file) rules = model.classify(data) output_attr = 'OUTPUT_M' + str(model_num) data.add_attrib(Attribute(output_attr, 'string'), rules) if test_indiv: good = count_correct(data, model.class_attr, output_attr) print_score(good, len(data), 'Model accuracy') forms = [inflect(form, rule) for form, rule in zip(forms, rules)] forms_attr = 'FORMS_M' + str(model_num) data.add_attrib(Attribute(forms_attr, 'string'), forms) # test the final performance log_info('Evaluating...') good = count_correct(data, target_attr, forms_attr) print_score(good, len(data), 'ALL') # evaluate without punctuation evaluate_nopunct(data, source_attr, target_attr, forms_attr) # evaluate forms different from lemma evaluate_nolemma(data, source_attr, target_attr, forms_attr) # load training data for OOV tests, evaluate on OOV if oov_test_file: evaluate_oov(data, source_attr, target_attr, forms_attr, oov_test_file, oov_part) # test on different POSes if pos_attr: evaluate_poses(data, target_attr, forms_attr, pos_attr) # save the classification results log_info('Saving data: ' + file_out) data.save_to_arff(file_out)
def main(): """\ Main application entry: parse command line and run the test. """ opts, filenames = getopt.getopt(sys.argv[1:], 'g:p:ai') show_help = False annot_errors = False gold = None predicted = 'PREDICTED' ignore_case = False for opt, arg in opts: if opt == '-g': gold = arg elif opt == '-p': predicted = arg elif opt == '-a': annot_errors = True elif opt == '-i': ignore_case = True # display help and exit if len(filenames) != 2 or not gold or show_help: display_usage() sys.exit(1) # run the training filename_in, filename_out = filenames data = DataSet() log_info('Loading data: ' + filename_in) data.load_from_arff(filename_in) if ignore_case: cmp_func = lambda a, b: a.lower() != b.lower() else: cmp_func = lambda a, b: a != b if annot_errors: log_info('Annotating errors...') err_ind = [ 'ERR' if cmp_func(i[gold], i[predicted]) else '' for i in data ] data.add_attrib(Attribute('ERROR_IND', 'string'), err_ind) else: log_info('Selecting errors...') data = data[lambda _, i: cmp_func(i[gold], i[predicted])] log_info('Saving data: ' + filename_out) data.save_to_arff(filename_out)
def main(): """\ Main application entry: parse command line and run the test. """ opts, filenames = getopt.getopt(sys.argv[1:], "g:p:ai") show_help = False annot_errors = False gold = None predicted = "PREDICTED" ignore_case = False for opt, arg in opts: if opt == "-g": gold = arg elif opt == "-p": predicted = arg elif opt == "-a": annot_errors = True elif opt == "-i": ignore_case = True # display help and exit if len(filenames) != 2 or not gold or show_help: display_usage() sys.exit(1) # run the training filename_in, filename_out = filenames data = DataSet() log_info("Loading data: " + filename_in) data.load_from_arff(filename_in) if ignore_case: cmp_func = lambda a, b: a.lower() != b.lower() else: cmp_func = lambda a, b: a != b if annot_errors: log_info("Annotating errors...") err_ind = ["ERR" if cmp_func(i[gold], i[predicted]) else "" for i in data] data.add_attrib(Attribute("ERROR_IND", "string"), err_ind) else: log_info("Selecting errors...") data = data[lambda _, i: cmp_func(i[gold], i[predicted])] log_info("Saving data: " + filename_out) data.save_to_arff(filename_out)