def write_explanations_for_expressions(iteration_number): print("Writing explanations") DATA_FILE1 = 'data/expressions/expressions_train_list' + str(iteration_number) + '.pkl' DATA_FILE2 = 'data/explanations/my_explanations_expressions' + str(iteration_number) + '.tsv' with open(DATA_FILE1, 'rb') as f: expressions_list = pickle.load(f) index = 0 explanations = [] for expressions in progressbar.progressbar(expressions_list): for expression in expressions: explanation = Explanation( name='LF_' + str(index), label=expression[1], condition=create_condition_for_expressions(expression[0]), word=expression[0] ) explanations.append(explanation) index = index + 1 exp_io = ExplanationIO2() exp_io.write(explanations, DATA_FILE2) print("Done")
def apply_results(): best_iteration = get_best_iteration() print(best_iteration) DATA_FILE3 = 'data/tokens/correct_expressions_list' + str(best_iteration[1]) + '.pkl' DATA_FILE4 = 'data/tokens/wrong_expressions_list' + str(best_iteration[1]) + '.pkl' DATA_FILE5 = 'data/tokens/expressions_train_list' + str(best_iteration[1]) + '.pkl' with open(DATA_FILE1, 'rb') as f: Cs = pickle.load(f) with open(DATA_FILE2, 'rb') as f: Ys = pickle.load(f) with open(DATA_FILE3, 'rb') as f: correct_tokens_list = pickle.load(f) with open(DATA_FILE4, 'rb') as f: wrong_tokens_list = pickle.load(f) with open(DATA_FILE5, 'rb') as f: tokens_train_list = pickle.load(f) tokens = [] for correct_tokens in correct_tokens_list: for token in correct_tokens: if token not in tokens: tokens.append(token) tokens2 = [] for tokens_train in tokens_train_list: for token in tokens_train: if token not in tokens2: tokens2.append(token) index = 0 explanations = [] for word in progressbar.progressbar(tokens2): explanation = Explanation( name='LF_' + str(index), label=word[1], condition=create_condition(word[0]), word=word[0] ) explanations.append(explanation) index = index + 1 babbler = Babbler(Cs, Ys) babbler.apply(explanations, split=0) Ls = [] for split in [0, 1, 2]: L = babbler.get_label_matrix(split) Ls.append(L) L_train = Ls[0].toarray() L_test = Ls[2].toarray() predicted_training_labels = [] predicted_test_labels = [] for line in L_train: predicted_training_labels.append(most_frequent(line)) count = 0 for label in predicted_training_labels: if label != 0: count += 1 print(count) for line in L_test: predicted_test_labels.append(most_frequent(line)) len_wrong_train = calculate_number_wrong(Ys[0], predicted_training_labels) len_wrong_test = calculate_number_wrong(Ys[2], predicted_test_labels) training_accuracy = percentage(len(Ys[0]) - len_wrong_train, len(Ys[0])) test_accuracy = percentage(len(Ys[2]) - len_wrong_test, len(Ys[2])) print("Number of wrong in training set: " + str(len_wrong_train)) print("Number of wrong in test set: " + str(len_wrong_test)) print("Training Accuracy: " + str(training_accuracy)) print("Test Accuracy: " + str(test_accuracy))
def test2(): with open(DATA_FILE1, 'rb') as f: Cs = pickle.load(f) with open(DATA_FILE2, 'rb') as f: Ys = pickle.load(f) with open(DATA_FILE4, 'rb') as f: correct_expressions_list = pickle.load(f) index = 0 explanations = [] for expressions in progressbar.progressbar(correct_expressions_list): for expression in expressions: explanation = Explanation( name='LF_' + str(index), label=expression[1], condition=create_condition_for_expressions(expression[0]), word=expression[0] ) explanations.append(explanation) index = index + 1 babbler = Babbler(Cs, Ys) babbler.apply(explanations, split=0) Ls = [] for split in [0, 1, 2]: L = babbler.get_label_matrix(split) Ls.append(L) babbler.commit() parses = babbler.get_parses() ''' parse = parses[222] print(parse) ''' L_train = Ls[0].toarray() L_test = Ls[2].toarray() predicted_training_labels = [] predicted_test_labels = [] for line in L_train: predicted_training_labels.append(most_frequent(line)) for line in L_test: predicted_test_labels.append(most_frequent(line)) len_wrong_train, training_accuracy = calculate_number_wrong_no_abstain(Ys[0], predicted_training_labels) len_wrong_test, test_accuracy = calculate_number_wrong_no_abstain(Ys[2], predicted_test_labels) print(predicted_training_labels) print(predicted_test_labels) print("Number of wrong in training set: " + str(len_wrong_train)) print("Number of wrong in test set: " + str(len_wrong_test)) print("Training Accuracy: " + str(training_accuracy)) print("Test Accuracy: " + str(test_accuracy))
'letters':['a','B','C'], 'smalls':['a','b','c','d'], 'luckies': [7, 8, 9], 'unluckies': [0, 13, 66], } # Test candidate (hash: 668761641257950361): # "City land records show that GM President [Daniel Ammann] and his wife, # [Pernilla Ammann], bought the 15-bedroom mansion on Balmoral Drive in # the upscale historic neighborhood on July 31." logic = [ # Base Explanation( condition="True", label=1, candidate=('foo', 'bar'), semantics=('.root', ('.label', ('.int', 1), ('.bool', True)))), # And Explanation( condition="True and True", label=1, candidate=('foo', 'bar'), semantics=('.root', ('.label', ('.int', 1), ('.and', ('.bool', True), ('.bool', True))))), # Or Explanation( condition="False or True", label=1, candidate=('foo', 'bar'), semantics=('.root', ('.label', ('.int', 1), ('.or', ('.bool', False), ('.bool', True))))), # Not boolean
'spouse': ['spouse', 'wife', 'husband', 'ex-wife', 'ex-husband'], 'family': [ 'father', 'father', 'mother', 'sister', 'sisters', 'brother', 'brothers', 'son', 'sons', 'daughter', 'daughters', 'grandfather', 'grandmother', 'uncle', 'uncles', 'aunt', 'aunts', 'cousin', 'cousins' ], 'friend': ['boyfriend', 'girlfriend', 'boss', 'employee', 'secretary', 'co-worker'], } aliases['family'] += ["{}-in-law".format(f) for f in aliases['family']] explanations = [ Explanation( name='LF_and_married', condition= "the word 'and' is between X and Y and 'married' within five words of Y", candidate= '1bcd8648-8a80-47a3-82d4-38a4a594092f::span:1223:1228~~1bcd8648-8a80-47a3-82d4-38a4a594092f::span:1234:1238', label=1), Explanation( name='LF_third_wheel', condition="there is a person between X and Y", candidate= '6cd34ab0-653b-438e-b966-d7365a31651d::span:595:607~~6cd34ab0-653b-438e-b966-d7365a31651d::span:712:719', label=2), Explanation( name='LF_married_two_people', condition= "the word 'married' is in the sentence and there are only two people in the sentence", candidate= 'd535c921-f102-4d3b-9891-5a36ed93259e::span:823:830~~d535c921-f102-4d3b-9891-5a36ed93259e::span:836:839',