コード例 #1
0
ファイル: model_util.py プロジェクト: h0m3brew/SummerProject
def crazy2_get_feed(path,
                    batch_size,
                    word_to_id,
                    max_premise_length,
                    max_hypothesis_length,
                    num_iter=None,
                    shuffle=False):
    data, _, _ = gd.process_data(1.0)
    premises = []
    premise_lengths = []
    hypotheses = []
    hypothesis_lengths = []
    labels = []
    with open(path + "1256", 'r') as f:
        lines = f.readlines()
        if shuffle:
            random.shuffle(lines)
        for line in lines:
            example = json.loads(line)
            if " and " in example["sentence1"] or " or " in example[
                    "sentence1"] or " then " in example["sentence1"]:
                prem = du.parse_sentence(
                    data, example["sentence1"]
                )[0].emptystring + " " + du.parse_sentence(
                    data, example["sentence1"])[1] + " " + du.parse_sentence(
                        data, example["sentence1"])[2].emptystring
                hyp = du.parse_sentence(
                    data, example["sentence2"]
                )[0].emptystring + " " + du.parse_sentence(
                    data, example["sentence2"])[1] + " " + du.parse_sentence(
                        data, example["sentence2"])[2].emptystring
                premises.append(
                    sentence_to_id(prem, word_to_id, max_premise_length))
                premise_lengths.append(len(prem.split()))
                hypotheses.append(
                    sentence_to_id(hyp, word_to_id, max_hypothesis_length))
                hypothesis_lengths.append(len(hyp.split()))
            else:
                sentence1 = example["sentence1"]
                sentence2 = example["sentence2"]
                premises.append(
                    sentence_to_id(sentence1, word_to_id, max_premise_length))
                premise_lengths.append(len(sentence1.split()))
                hypotheses.append(
                    sentence_to_id(sentence2, word_to_id,
                                   max_hypothesis_length))
                hypothesis_lengths.append(len(sentence2.split()))
            labels.append(
                [label_to_num(example["gold_label"][i]) for i in range(12)])
            if num_iter is not None and len(labels) > num_iter * batch_size:
                break
    if num_iter is None:
        num_iter = int(math.ceil(len(labels) / batch_size))
    for i in range(num_iter):
        yield (np.array(premises[i * batch_size:(i + 1) * batch_size]),
               np.array(premise_lengths[i * batch_size:(i + 1) * batch_size]),
               np.array(hypotheses[i * batch_size:(i + 1) * batch_size]),
               np.array(hypothesis_lengths[i * batch_size:(i + 1) *
                                           batch_size]),
               np.array(labels[i * batch_size:(i + 1) * batch_size]), 1256)
コード例 #2
0
ファイル: interface.py プロジェクト: h0m3brew/SummerProject
import natural_logic_model as nlm
import data_util
import generate_data as gd

data, _, _ = gd.process_data(1.0)
print(
    "Input a premise sentence and hypothesis sentence of the form:\n Determiner (Adjective) Noun (does not) Verb Determiner Adjective Noun \n Make sure you conjugate to the present tense and use vocabulary from the files in the Data folder\n You can also combine two simple sentences of that form with: or, and, if...then"
)
while True:
    premise = data_util.parse_sentence(data,
                                       input("Enter a premise sentence:\n"))
    while premise == None:
        premise = data_util.parse_sentence(
            data,
            input(
                "There was some issue with the entered premise\n Enter a premise sentence:\n"
            ))
    hypothesis = data_util.parse_sentence(
        data, input("Enter a hypothesis sentence:\n"))
    while hypothesis == None:
        hypothesis = data_util.parse_sentence(
            data,
            input(
                "There was some issue with the entered premise\n Enter a premise sentence:\n"
            ))
    if len(premise) == 1:
        label = nlm.get_label(
            nlm.compute_simple_relation(premise[0], hypothesis[0]))
    else:
        label = nlm.get_label(
            nlm.compute_boolean_relation(premise[0], premise[1], premise[2],
コード例 #3
0
         example2["gold_label"] = "equivalence"
     elif example["sentence2"].split(
     )[i] == "emptystring" and example["sentence2"].split()[
             i + 1] == example["sentence1"].split()[i + 1]:
         example2["gold_label"] = "entails"
     elif example["sentence1"].split(
     )[i] == "emptystring" and example["sentence2"].split()[
             i + 1] == example["sentence1"].split()[i + 1]:
         example2["gold_label"] = "reverse entails"
     else:
         example2["gold_label"] = "independence"
     label.append(example2["gold_label"])
 example5 = dict()
 example5["sentence1"] = adjoin(example["sentence1"].split()[-5:])
 example5["sentence2"] = adjoin(example["sentence2"].split()[-5:])
 premise = du.parse_sentence(data, example["sentence1"])[0]
 hypothesis = du.parse_sentence(data, example["sentence2"])[0]
 verb_relation = nlm.standard_lexical_merge(premise.verb,
                                            hypothesis.verb)
 adverb_relation = nlm.standard_lexical_merge(
     premise.adverb, hypothesis.adverb)
 object_negation_signature = nlm.negation_merge(
     premise.object_negation, hypothesis.object_negation)
 object_determiner_signature = nlm.determiner_merge(
     premise.natlog_object_determiner,
     hypothesis.natlog_object_determiner)
 object_noun_relation = nlm.standard_lexical_merge(
     premise.object_noun, hypothesis.object_noun)
 object_adjective_relation = nlm.standard_lexical_merge(
     premise.object_adjective, hypothesis.object_adjective)
 VP_relation = nlm.standard_phrase(adverb_relation, verb_relation)
コード例 #4
0
 with open("simple_solutions", "r") as f:
     simple_solutions = json.loads(f.read())
 for encoding in simple_solutions:
     encoding = json.loads(encoding)
     premise, hypothesis = gd.encoding_to_example(data, encoding)
     if gd.example_to_encoding(premise, hypothesis) != encoding:
         print("We have a problem with the simple encoding")
     nlm_label = nlm.get_label(
         nlm.compute_simple_relation(premise, hypothesis))
     if simple_solutions[json.dumps(encoding)] != nlm_label:
         print("We have a problem with the simple file")
 print("simple file is good")
 with open("boolean_solutions", "r") as f:
     boolean_solutions = json.loads(f.read())
 simple1 = [
     (data_util.parse_sentence(data, "some wizard eats some flute")[0],
      data_util.parse_sentence(data, "some wizard eats some flute")[0])
 ]
 simple1.append(
     (data_util.parse_sentence(data,
                               "every wizard eats every flute")[0],
      data_util.parse_sentence(data, "some wizard eats some flute")[0]))
 simple1.append(
     (data_util.parse_sentence(data, "some wizard eats some flute")[0],
      data_util.parse_sentence(data,
                               "every wizard eats every flute")[0]))
 simple1.append(
     (data_util.parse_sentence(data, "no wizard eats some flute")[0],
      data_util.parse_sentence(data,
                               "some wizard eats every flute")[0]))
 simple1.append(
コード例 #5
0
ファイル: model_util.py プロジェクト: h0m3brew/SummerProject
def crazy_get_feed(path,
                   batch_size,
                   word_to_id,
                   max_premise_length,
                   max_hypothesis_length,
                   num_iter=None,
                   shuffle=False):
    data, _, _ = gd.process_data(1.0)
    premises = [[], [], [], [], []]
    premise_lengths = [[], [], [], [], []]
    hypotheses = [[], [], [], [], []]
    hypothesis_lengths = [[], [], [], [], []]
    labels = [[], [], [], [], []]
    for i, type in enumerate(["", "1", "2", "5", "6"]):
        with open(path + type, 'r') as f:
            lines = f.readlines()
            if shuffle:
                random.shuffle(lines)
            for line in lines:
                example = json.loads(line)
                if " and " in example["sentence1"] or " or " in example[
                        "sentence1"] or " then " in example["sentence1"]:
                    prem = du.parse_sentence(
                        data, example["sentence1"]
                    )[0].emptystring + " " + du.parse_sentence(
                        data,
                        example["sentence1"])[1] + " " + du.parse_sentence(
                            data, example["sentence1"])[2].emptystring
                    hyp = du.parse_sentence(
                        data, example["sentence2"]
                    )[0].emptystring + " " + du.parse_sentence(
                        data,
                        example["sentence2"])[1] + " " + du.parse_sentence(
                            data, example["sentence2"])[2].emptystring
                    premises.append(
                        sentence_to_id(prem, word_to_id, max_premise_length))
                    premise_lengths.append(len(prem.split()))
                    hypotheses.append(
                        sentence_to_id(hyp, word_to_id, max_hypothesis_length))
                    hypothesis_lengths.append(len(hyp.split()))
                else:
                    sentence1 = example["sentence1"]
                    sentence2 = example["sentence2"]
                    premises[i].append(
                        sentence_to_id(sentence1, word_to_id,
                                       max_premise_length))
                    premise_lengths[i].append(len(sentence1.split()))
                    hypotheses[i].append(
                        sentence_to_id(sentence2, word_to_id,
                                       max_hypothesis_length))
                    hypothesis_lengths[i].append(len(sentence2.split()))
                labels[i].append(label_to_num(example["gold_label"]))
                if num_iter is not None and len(
                        labels) > num_iter * batch_size:
                    break
    if num_iter is None:
        num_iter = int(math.ceil(len(labels[0]) / batch_size))
    batches = []
    for i in range(num_iter):
        for j in range(5):
            batches.append((i, j))
    lengths = {0: 9, 1: 1, 2: 2, 3: 5, 4: 6}
    random.shuffle(batches)
    random.shuffle(batches)
    random.shuffle(batches)
    for i, j in batches:
        yield (np.array(premises[j % 5][i * batch_size:(i + 1) * batch_size]),
               np.array(premise_lengths[j % 5][i * batch_size:(i + 1) *
                                               batch_size]),
               np.array(hypotheses[j % 5][i * batch_size:(i + 1) *
                                          batch_size]),
               np.array(hypothesis_lengths[j % 5][i * batch_size:(i + 1) *
                                                  batch_size]),
               np.array(labels[j % 5][i * batch_size:(i + 1) * batch_size]),
               lengths[j])
コード例 #6
0
         premise, hypothesis = gd.encoding_to_example(data,encoding)
         if gd.example_to_encoding(premise,hypothesis) != encoding:
             print("We have a problem with the simple encoding")
         nlm_label = nlm.get_label(nlm.compute_simple_relation(premise, hypothesis))
         if convert(simple_solutions[json.dumps(encoding)]) != nlm_label:
             print("We have a problem with the simple file")
     print("simple file is good")
 examples = gd.generate_balanced_data("simple_solutions", "boolean_solutions", 100, 0, data,simple_sampling = "level 2", boolean_sampling = "level 1")
 gd.save_data(examples, "test")
 examples = []
 with open("test", "r") as f:
     lines = f.readlines()
     for line in lines:
         examples.append(json.loads(line))
 for example in examples:
     premise = data_util.parse_sentence(data,example["sentence1"])
     hypothesis = data_util.parse_sentence(data,example["sentence2"])
     if len(premise) == 1:
         fol_label = fol.get_label(premise[0], hypothesis[0])
         nlm_label = nlm.get_label(nlm.compute_simple_relation(premise[0], hypothesis[0]))
         if example["gold_label"] != fol_label or fol_label != nlm_label:
             print(example["gold_label"] , fol_label,nlm_label)
             print("We have a problem with simple generation")
     else:
         premise1 = premise[0]
         premise_conjunction = premise[1]
         premise2 = premise[2]
         hypothesis1 = hypothesis[0]
         hypothesis_conjunction = hypothesis[1]
         hypothesis2 = hypothesis[2]
         nlm_label = nlm.get_label(nlm.compute_boolean_relation(premise1, premise_conjunction, premise2, hypothesis1, hypothesis_conjunction, hypothesis2))