コード例 #1
0
def write_explanations_for_expressions(iteration_number):
    print("Writing explanations")

    DATA_FILE1 = 'data/expressions/expressions_train_list' + str(iteration_number) + '.pkl'
    DATA_FILE2 = 'data/explanations/my_explanations_expressions' + str(iteration_number) + '.tsv'

    with open(DATA_FILE1, 'rb') as f:
        expressions_list = pickle.load(f)

    index = 0
    explanations = []

    for expressions in progressbar.progressbar(expressions_list):

        for expression in expressions:
            explanation = Explanation(
                name='LF_' + str(index),
                label=expression[1],
                condition=create_condition_for_expressions(expression[0]),
                word=expression[0]
            )

            explanations.append(explanation)
            index = index + 1

    exp_io = ExplanationIO2()
    exp_io.write(explanations, DATA_FILE2)

    print("Done")
コード例 #2
0
def apply_results():

    best_iteration = get_best_iteration()

    print(best_iteration)

    DATA_FILE3 = 'data/tokens/correct_expressions_list' + str(best_iteration[1]) + '.pkl'
    DATA_FILE4 = 'data/tokens/wrong_expressions_list' + str(best_iteration[1]) + '.pkl'
    DATA_FILE5 = 'data/tokens/expressions_train_list' + str(best_iteration[1]) + '.pkl'

    with open(DATA_FILE1, 'rb') as f:
        Cs = pickle.load(f)

    with open(DATA_FILE2, 'rb') as f:
        Ys = pickle.load(f)

    with open(DATA_FILE3, 'rb') as f:
        correct_tokens_list = pickle.load(f)

    with open(DATA_FILE4, 'rb') as f:
        wrong_tokens_list = pickle.load(f)

    with open(DATA_FILE5, 'rb') as f:
        tokens_train_list = pickle.load(f)

    tokens = []
    for correct_tokens in correct_tokens_list:
        for token in correct_tokens:
            if token not in tokens:
                tokens.append(token)

    tokens2 = []
    for tokens_train in tokens_train_list:
        for token in tokens_train:
            if token not in tokens2:
                tokens2.append(token)
    index = 0
    explanations = []

    for word in progressbar.progressbar(tokens2):
        explanation = Explanation(
            name='LF_' + str(index),
            label=word[1],
            condition=create_condition(word[0]),
            word=word[0]
        )

        explanations.append(explanation)
        index = index + 1

    babbler = Babbler(Cs, Ys)

    babbler.apply(explanations, split=0)

    Ls = []
    for split in [0, 1, 2]:
        L = babbler.get_label_matrix(split)
        Ls.append(L)

    L_train = Ls[0].toarray()
    L_test = Ls[2].toarray()

    predicted_training_labels = []
    predicted_test_labels = []

    for line in L_train:
        predicted_training_labels.append(most_frequent(line))
    count = 0
    for label in predicted_training_labels:
        if label != 0:
            count += 1
    print(count)

    for line in L_test:
        predicted_test_labels.append(most_frequent(line))

    len_wrong_train = calculate_number_wrong(Ys[0], predicted_training_labels)

    len_wrong_test = calculate_number_wrong(Ys[2], predicted_test_labels)

    training_accuracy = percentage(len(Ys[0]) - len_wrong_train, len(Ys[0]))

    test_accuracy = percentage(len(Ys[2]) - len_wrong_test, len(Ys[2]))

    print("Number of wrong in training set: " + str(len_wrong_train))
    print("Number of wrong in test set: " + str(len_wrong_test))
    print("Training Accuracy: " + str(training_accuracy))
    print("Test Accuracy: " + str(test_accuracy))
コード例 #3
0
def test2():
    with open(DATA_FILE1, 'rb') as f:
        Cs = pickle.load(f)

    with open(DATA_FILE2, 'rb') as f:
        Ys = pickle.load(f)

    with open(DATA_FILE4, 'rb') as f:
        correct_expressions_list = pickle.load(f)

    index = 0
    explanations = []

    for expressions in progressbar.progressbar(correct_expressions_list):

        for expression in expressions:
            explanation = Explanation(
                name='LF_' + str(index),
                label=expression[1],
                condition=create_condition_for_expressions(expression[0]),
                word=expression[0]
            )

            explanations.append(explanation)
            index = index + 1


    babbler = Babbler(Cs, Ys)

    babbler.apply(explanations, split=0)

    Ls = []
    for split in [0, 1, 2]:
        L = babbler.get_label_matrix(split)
        Ls.append(L)

    babbler.commit()

    parses = babbler.get_parses()
    '''
    parse = parses[222]
    print(parse)
    '''
    L_train = Ls[0].toarray()
    L_test = Ls[2].toarray()

    predicted_training_labels = []
    predicted_test_labels = []

    for line in L_train:
        predicted_training_labels.append(most_frequent(line))

    for line in L_test:
        predicted_test_labels.append(most_frequent(line))

    len_wrong_train, training_accuracy = calculate_number_wrong_no_abstain(Ys[0], predicted_training_labels)

    len_wrong_test, test_accuracy = calculate_number_wrong_no_abstain(Ys[2], predicted_test_labels)

    print(predicted_training_labels)
    print(predicted_test_labels)
    print("Number of wrong in training set: " + str(len_wrong_train))
    print("Number of wrong in test set: " + str(len_wrong_test))
    print("Training Accuracy: " + str(training_accuracy))
    print("Test Accuracy: " + str(test_accuracy))
コード例 #4
0
ファイル: core_explanations.py プロジェクト: we1l1n/babble
        'letters':['a','B','C'],
        'smalls':['a','b','c','d'],
        'luckies': [7, 8, 9],
        'unluckies': [0, 13, 66],
    }

# Test candidate (hash: 668761641257950361):
# "City land records show that GM President [Daniel Ammann] and his wife,
# [Pernilla Ammann], bought the 15-bedroom mansion on Balmoral Drive in
# the upscale historic neighborhood on July 31."

logic = [
    # Base
    Explanation(
        condition="True",
        label=1,
        candidate=('foo', 'bar'),
        semantics=('.root', ('.label', ('.int', 1), ('.bool', True)))),
    # And
    Explanation(
        condition="True and True",
        label=1,
        candidate=('foo', 'bar'),
        semantics=('.root', ('.label', ('.int', 1), ('.and', ('.bool', True), ('.bool', True))))),
    # Or
    Explanation(
        condition="False or True",
        label=1,
        candidate=('foo', 'bar'),
        semantics=('.root', ('.label', ('.int', 1), ('.or', ('.bool', False), ('.bool', True))))),
    # Not boolean
コード例 #5
0
ファイル: sample_explanations.py プロジェクト: we1l1n/babble
    'spouse': ['spouse', 'wife', 'husband', 'ex-wife', 'ex-husband'],
    'family': [
        'father', 'father', 'mother', 'sister', 'sisters', 'brother',
        'brothers', 'son', 'sons', 'daughter', 'daughters', 'grandfather',
        'grandmother', 'uncle', 'uncles', 'aunt', 'aunts', 'cousin', 'cousins'
    ],
    'friend':
    ['boyfriend', 'girlfriend', 'boss', 'employee', 'secretary', 'co-worker'],
}
aliases['family'] += ["{}-in-law".format(f) for f in aliases['family']]

explanations = [
    Explanation(
        name='LF_and_married',
        condition=
        "the word 'and' is between X and Y and 'married' within five words of Y",
        candidate=
        '1bcd8648-8a80-47a3-82d4-38a4a594092f::span:1223:1228~~1bcd8648-8a80-47a3-82d4-38a4a594092f::span:1234:1238',
        label=1),
    Explanation(
        name='LF_third_wheel',
        condition="there is a person between X and Y",
        candidate=
        '6cd34ab0-653b-438e-b966-d7365a31651d::span:595:607~~6cd34ab0-653b-438e-b966-d7365a31651d::span:712:719',
        label=2),
    Explanation(
        name='LF_married_two_people',
        condition=
        "the word 'married' is in the sentence and there are only two people in the sentence",
        candidate=
        'd535c921-f102-4d3b-9891-5a36ed93259e::span:823:830~~d535c921-f102-4d3b-9891-5a36ed93259e::span:836:839',