portion = 0 if len(sys.argv) > 1: portion = int(sys.argv[1]) fg = FeatureGenerator() Xs, ys = fg.generate_training("emnlp") classifier = LogisticRegressionClassifier() classifier.train(Xs, ys) import csv base = "data/emnlp" rels = [] tables = load_collection("emnlp") from collections import defaultdict import re def fact_check_and_test(q, rel): question = Question(text=q, type="NUM") tuples, q_features = fg.generate_test(tables, question) q_match = False matches = dict() p_match = 0.0 found_match = False total_geq = 0
s = pickle.dumps(lr) print(lr) print("Trained") queries = [ "Hamas was founded in 1985", "In America, in June 1901, the average temperature was 16.6C", "The World life expectancy was 52 in 1960", "world life expectancy rose sharply to 80 in 2014", "In 2012 there were 3,282,570 bee colonies in America", "In 2016, the USA contributed $550bn to the financial intermediary funds" ] tables = load_collection("herox") for question in queries: q = Question(text=question, type="NUM") q.parse() q_match = False tuples = [] for obj in q.nps.union(q.nes): tuples.extend(get_all_tuples(tables, obj)) done_tuple = False for tuple in tuples: done_tuple = True table_name = tuple[0]
from tabular.filtering import load_collection from tabular.tuples import get_all_tuples if __name__ == "__main__": tables = load_collection("training") print(get_all_tuples(tables, "Japan"))