예제 #1
0
파일: cky.py 프로젝트: Michaellee955/NLP
 def is_in_language(self, tokens):
     """
     Membership checking. Parse the input tokens and return True if 
     the sentence is in the language described by the grammar. Otherwise
     return False
     """
     # TODO, part 2
     table = {}
     le = len(tokens)
     with open('atis3.pcfg', 'r') as grammar_file:
         grammar = Pcfg(grammar_file)
     for i in range(le):
         tuple = (i, i + 1)
         tuple_word = (tokens[i], )
         table[tuple] = [
             grammar.rhs_to_rules[tuple_word][x][0]
             for x in range(len(grammar.rhs_to_rules[tuple_word]))
         ]
     for length in range(2, le + 1):
         for i in range(0, le - length + 1):
             j = i + length
             for k in range(i + 1, j):
                 try:
                     left_length = len(table[(i, k)])
                     for left in range(left_length):
                         right_length = len(table[(k, j)])
                         if left_length != 0 and right_length != 0:
                             for right in range(right_length):
                                 tuple_words = (table[(i, k)][left],
                                                table[(k, j)][right])
                                 if (grammar.rhs_to_rules[tuple_words]):
                                     for n in range(
                                             len(grammar.rhs_to_rules[
                                                 tuple_words])):
                                         word = grammar.rhs_to_rules[
                                             tuple_words][n][0]
                                         try:
                                             if word not in table[(i, j)]:
                                                 table[(i, j)].append(word)
                                         except:
                                             table[(i, j)] = [word]
                 except:
                     pass
     print(table)
     try:
         if "TOP" in table[(0, le)]:
             return True
         else:
             return False
     except:
         return False
예제 #2
0
            # Reached terminal
            return (nt, curr)
        else:
            # Yet to reach terminal
            left = curr[0]
            right = curr[1]
            return (nt, get_tree(chart, left[1], left[2], left[0]),
                    get_tree(chart, right[1], right[2], right[0]))

    answer = util(chart, i, j, nt)
    return answer


if __name__ == "__main__":
    with open('atis3.pcfg', 'r') as grammar_file:
        grammar = Pcfg(grammar_file)
        parser = CkyParser(grammar)

        # IN
        in_toks = ['flights', 'from', 'miami', 'to', 'cleveland', '.']
        # NOT IN
        not_in_toks = ['miami', 'flights', 'cleveland', 'from', 'to', '.']
        print(
            "Is ['flights', 'from','miami', 'to', 'cleveland','.'] present in language?\n",
            parser.is_in_language(in_toks), "\n")
        print(
            "Is ['miami', 'flights','cleveland', 'from', 'to','.'] present in language?\n",
            parser.is_in_language(not_in_toks), "\n")

        table, probs = parser.parse_with_backpointers(in_toks)
        assert check_table_format(table)
예제 #3
0
파일: cky.py 프로젝트: Michaellee955/NLP
    def parse_with_backpointers(self, tokens):
        """
        Parse the input tokens and return a parse table and a probability table.
        """
        # TODO, part 3
        table = {}
        probs = {}
        le = len(tokens)
        with open('atis3.pcfg', 'r') as grammar_file:
            grammar = Pcfg(grammar_file)
        for i in range(le):
            tuple = (i, i + 1)
            tuple_word = (tokens[i], )
            length = len(grammar.rhs_to_rules[tuple_word])
            table[tuple] = {}
            probs[tuple] = {}
            for x in range(length):
                word = grammar.rhs_to_rules[tuple_word][x][0]
                prob_word = grammar.rhs_to_rules[tuple_word][x][2]
                table[tuple][word] = tokens[i]
                probs[tuple][word] = math.log10(prob_word)

        for length in range(2, le + 1):
            for i in range(0, le - length + 1):
                j = i + length
                table[(i, j)] = {}
                probs[(i, j)] = {}
                for k in range(i + 1, j):
                    try:
                        left_length = len(table[(i, k)])
                        right_length = len(table[(k, j)])
                        if left_length != 0 and right_length != 0:
                            left_key = [q for q in table[(i, k)].keys()]
                            right_key = [p for p in table[(k, j)].keys()]
                            for left in range(left_length):
                                for right in range(right_length):
                                    tuple_words = (left_key[left],
                                                   right_key[right])
                                    if (grammar.rhs_to_rules[tuple_words]):
                                        for n in range(
                                                len(grammar.rhs_to_rules[
                                                    tuple_words])):
                                            word = grammar.rhs_to_rules[
                                                tuple_words][n][0]
                                            prob_word = grammar.rhs_to_rules[
                                                tuple_words][n][2]
                                            cur_prob = probs[(
                                                i, k
                                            )][left_key[left]] + probs[k, j][
                                                right_key[right]] + math.log10(
                                                    prob_word)
                                            if None == table[(i, j)].get(word):
                                                table[(i, j)][word] = (
                                                    (left_key[left], i, k),
                                                    (right_key[right], k, j))
                                                probs[(i, j)][word] = cur_prob
                                            elif cur_prob > probs[(i,
                                                                   j)][word]:
                                                table[(i, j)][word] = (
                                                    (left_key[left], i, k),
                                                    (right_key[right], k, j))
                                                probs[(i, j)][word] = cur_prob
                    except:
                        pass
        return table, probs