Example #1
0
def main():
    while 1 == 1:
        print("Enter a statement")
        statement = raw_input()
        statement = statement.lower()

        tagged_arr = Viterbi(statement)

        #check if all of the elements are same
        count = 1
        tag = tagged_arr[1]
        for i in range(2, tagged_arr.__len__()):
            if tagged_arr[i] == tag:
                count = count + 1

        if count == tagged_arr.__len__() - 1:
            tokens = word_tokenize(statement)
            n = tokens.__len__()
            for i in range(0, n):
                tag_temp = Viterbi(tokens[i])[1]
                tagged_arr[i + 1] = tag_temp
                if tokens[i] == 'open':
                    tagged_arr[i + 1] = 'VB'
                if tokens[i] == 'file':
                    tagged_arr[i + 1] = 'NN'

        print(tagged_arr)
        CKY(tagged_arr)
Example #2
0
"Prep -> 'to' | 'with'",
"Vi -> 'ate'", #intransitive
"Vt -> 'ate' | 'book' | 'Book' | 'gave' | 'told'", #transitive
"Vdt -> 'gave' | 'told' ", #ditransitive
"Subconj -> 'that'", #subordinating conjunction
"Mod -> 'Can' | 'will'", #modal verbs
"Aux -> 'did' ", #auxiliary verbs
"WhAdv -> 'Why'",
"PropN -> 'John' | 'Mary' | 'NYC' | 'London'",
"Adj -> 'nice' | 'drawing'",
"Pro -> 'you' | 'he'",
"Adv -> 'today'"
])

print(grammar) #the simpler grammar
chart=CKY(grammar)
 #this illustrates tracing of a very simple sentence; feel free to try others.
chart.recognise(tokenise("the frogs swim"),True)
chart.pprint()

#build a chart with the larger grammar
chart2=CKY(grammar2)

# Note, please do _not_ use the Tree.draw() method uncommented
# _anywhere in this file_ (you are encouraged to use it in preparing
# your report).

# The sentences to examine.
#
for s in ["John gave a book to Mary.",
           "John gave Mary a book.",
Example #3
0
NP -> Det Nom | Nom | NP PP
Det -> NP "'s"
Nom -> N SRel | N
VP -> Vi | Vt NP | VP PP
PP -> Prep NP
SRel -> Relpro VP
Det -> 'a' | 'the'
N -> 'fish' | 'frogs' | 'soup' | 'children' | 'books'
Prep -> 'in' | 'for'
Vt -> 'saw' | 'ate' | 'read'
Vi -> 'fish' | 'swim'
Relpro -> 'that'
""")

print(grammar)
chart = CKY(grammar)
chart.recognise("the frogs swim".split())  # Should use
# tokenise(s) once that's fixed
chart.pprint()

# Q1: Uncomment this once you've completed Q1
chart.recognise(tokenise("the frogs swim"), True)
# Q3 Uncomment the next three once when you're working on Q3
chart.recognise(tokenise("fish fish"))
chart.pprint()
chart.recognise(tokenise("fish fish"), True)

# Use this grammar for the rest of the assignment
grammar2 = parse_grammar([
    "S -> Sdecl '.' | Simp '.' | Sq '?' ", "Sdecl -> NP VP", "Simp -> VP",
    "Sq -> Sqyn | Swhadv", "Sqyn -> Mod Sdecl | Aux Sdecl",
Example #4
0
    def single_layer_parser(self, trainer, diora, info):
        logger = self.logger
        epoch = info.get('epoch', 0)

        original_K = diora.K
        if self.K is not None:
            diora.safe_set_K(self.K)

        # set choose_tree
        if hasattr(diora, 'choose_tree'):
            original_choose_tree = diora.choose_tree
            diora.choose_tree = self.choose_tree

        word2idx = self.dataset['word2idx']
        if self.cky_mode == 'cky':
            parse_predictor = CKY(net=diora,
                                  word2idx=word2idx,
                                  add_bos_token=trainer.net.add_bos_token,
                                  add_eos_token=trainer.net.add_eos_token)
        elif self.cky_mode == 'diora':
            parse_predictor = TreesFromDiora(diora=diora,
                                             word2idx=word2idx,
                                             outside=self.outside,
                                             oracle=self.oracle)

        batches = self.batch_iterator.get_iterator(random_seed=self.seed,
                                                   epoch=epoch)

        logger.info('Parsing.')

        pred_lst = []
        counter = 0
        eval_cache = {}

        if self.ground_truth is not None:
            self.ground_truth = os.path.expanduser(self.ground_truth)
            ground_truth_data = {}
            with open(self.ground_truth) as f:
                for line in f:
                    ex = json.loads(line)
                    ground_truth_data[ex['example_id']] = ex

        # Eval loop.
        with torch.no_grad():
            for i, batch_map in enumerate(batches):
                batch_size, length = batch_map['sentences'].shape

                if length <= 2:
                    continue

                example_ids = batch_map['example_ids']
                if self.ground_truth is not None:
                    batch_ground_truth = [
                        ground_truth_data[x] for x in example_ids
                    ]
                    batch_map['ground_truth'] = batch_ground_truth

                _ = trainer.step(batch_map,
                                 train=False,
                                 compute_loss=False,
                                 info={
                                     'inside_pool': self.inside_pool,
                                     'outside': self.outside
                                 })

                for j, x in enumerate(parse_predictor.predict(batch_map)):

                    pred_lst.append(x)

                self.eval_loop_hook(trainer, diora, info, eval_cache,
                                    batch_map)

        self.post_eval_hook(trainer, diora, info, eval_cache)

        diora.safe_set_K(original_K)

        # set choose_tree
        if hasattr(diora, 'choose_tree'):
            diora.choose_tree = original_choose_tree

        return pred_lst
Example #5
0
NP -> Det Nom | Nom | NP PP
Det -> NP "'s"
Nom -> N SRel | N
VP -> Vi | Vt NP | VP PP
PP -> Prep NP
SRel -> Relpro VP
Det -> 'a' | 'the'
N -> 'fish' | 'frogs' | 'soup' | 'children' | 'books'
Prep -> 'in' | 'for'
Vt -> 'saw' | 'ate' | 'read'
Vi -> 'fish' | 'swim'
Relpro -> 'that'
""")

print grammar
chart=CKY(grammar)
chart.parse("the frogs swim".split()) # Should use
                                      # tokenise(s) once that's fixed
chart.pprint()
# Use this grammar for the rest of the assignment

grammar2=parse_grammar([
"S -> Sdecl '.' | Simp '.' | Sq '?' ",
"Sdecl -> NP VP",
"Simp -> VP",
"Sq -> Sqyn | Swhadv",
"Sqyn -> Mod Sdecl | Aux Sdecl",
"Swhadv -> WhAdv Sqyn",
"Sc -> Subconj Sdecl",
"NP -> PropN | Pro | NP0 ",
"NP0 -> NP1 | NP0 PP",
Example #6
0
    "Prep -> 'to' | 'with'",
    "Vi -> 'ate'",  #intransitive
    "Vt -> 'ate' | 'book' | 'Book' | 'gave' | 'told'",  #transitive
    "Vdt -> 'gave' | 'told' ",  #ditransitive
    "Subconj -> 'that'",  #subordinating conjunction
    "Mod -> 'Can' | 'will'",  #modal verbs
    "Aux -> 'did' ",  #auxiliary verbs
    "WhAdv -> 'Why'",
    "PropN -> 'John' | 'Mary' | 'NYC' | 'London'",
    "Adj -> 'nice' | 'drawing'",
    "Pro -> 'you' | 'he'",
    "Adv -> 'today'"
])

print(grammar)  #the simpler grammar
chart = CKY(grammar)
#this illustrates tracing of a very simple sentence; feel free to try others.
# chart.recognise(tokenise("the frogs swim"),True)
# chart.pprint()

#build a chart with the larger grammar
chart2 = CKY(grammar2)

# Note, please do _not_ use the Tree.draw() method uncommented
# _anywhere in this file_ (you are encouraged to use it in preparing
# your report).

# The sentences to examine.
#
for s in ["Book a flight to NYC."]:
    print(s, chart2.recognise(tokenise(s), True))