def is_in_language(self, tokens): """ Membership checking. Parse the input tokens and return True if the sentence is in the language described by the grammar. Otherwise return False """ # TODO, part 2 table = {} le = len(tokens) with open('atis3.pcfg', 'r') as grammar_file: grammar = Pcfg(grammar_file) for i in range(le): tuple = (i, i + 1) tuple_word = (tokens[i], ) table[tuple] = [ grammar.rhs_to_rules[tuple_word][x][0] for x in range(len(grammar.rhs_to_rules[tuple_word])) ] for length in range(2, le + 1): for i in range(0, le - length + 1): j = i + length for k in range(i + 1, j): try: left_length = len(table[(i, k)]) for left in range(left_length): right_length = len(table[(k, j)]) if left_length != 0 and right_length != 0: for right in range(right_length): tuple_words = (table[(i, k)][left], table[(k, j)][right]) if (grammar.rhs_to_rules[tuple_words]): for n in range( len(grammar.rhs_to_rules[ tuple_words])): word = grammar.rhs_to_rules[ tuple_words][n][0] try: if word not in table[(i, j)]: table[(i, j)].append(word) except: table[(i, j)] = [word] except: pass print(table) try: if "TOP" in table[(0, le)]: return True else: return False except: return False
# Reached terminal return (nt, curr) else: # Yet to reach terminal left = curr[0] right = curr[1] return (nt, get_tree(chart, left[1], left[2], left[0]), get_tree(chart, right[1], right[2], right[0])) answer = util(chart, i, j, nt) return answer if __name__ == "__main__": with open('atis3.pcfg', 'r') as grammar_file: grammar = Pcfg(grammar_file) parser = CkyParser(grammar) # IN in_toks = ['flights', 'from', 'miami', 'to', 'cleveland', '.'] # NOT IN not_in_toks = ['miami', 'flights', 'cleveland', 'from', 'to', '.'] print( "Is ['flights', 'from','miami', 'to', 'cleveland','.'] present in language?\n", parser.is_in_language(in_toks), "\n") print( "Is ['miami', 'flights','cleveland', 'from', 'to','.'] present in language?\n", parser.is_in_language(not_in_toks), "\n") table, probs = parser.parse_with_backpointers(in_toks) assert check_table_format(table)
def parse_with_backpointers(self, tokens): """ Parse the input tokens and return a parse table and a probability table. """ # TODO, part 3 table = {} probs = {} le = len(tokens) with open('atis3.pcfg', 'r') as grammar_file: grammar = Pcfg(grammar_file) for i in range(le): tuple = (i, i + 1) tuple_word = (tokens[i], ) length = len(grammar.rhs_to_rules[tuple_word]) table[tuple] = {} probs[tuple] = {} for x in range(length): word = grammar.rhs_to_rules[tuple_word][x][0] prob_word = grammar.rhs_to_rules[tuple_word][x][2] table[tuple][word] = tokens[i] probs[tuple][word] = math.log10(prob_word) for length in range(2, le + 1): for i in range(0, le - length + 1): j = i + length table[(i, j)] = {} probs[(i, j)] = {} for k in range(i + 1, j): try: left_length = len(table[(i, k)]) right_length = len(table[(k, j)]) if left_length != 0 and right_length != 0: left_key = [q for q in table[(i, k)].keys()] right_key = [p for p in table[(k, j)].keys()] for left in range(left_length): for right in range(right_length): tuple_words = (left_key[left], right_key[right]) if (grammar.rhs_to_rules[tuple_words]): for n in range( len(grammar.rhs_to_rules[ tuple_words])): word = grammar.rhs_to_rules[ tuple_words][n][0] prob_word = grammar.rhs_to_rules[ tuple_words][n][2] cur_prob = probs[( i, k )][left_key[left]] + probs[k, j][ right_key[right]] + math.log10( prob_word) if None == table[(i, j)].get(word): table[(i, j)][word] = ( (left_key[left], i, k), (right_key[right], k, j)) probs[(i, j)][word] = cur_prob elif cur_prob > probs[(i, j)][word]: table[(i, j)][word] = ( (left_key[left], i, k), (right_key[right], k, j)) probs[(i, j)][word] = cur_prob except: pass return table, probs