def ObtainWRTG(weighted_tree_pair, print_result=True): """ Given a transducer and a weighted source/target tree, it returns a tuple that contains the wRTG and the weighted pair. If the transducer fails at explaining the source/target tree with the rules it has, then it returns a tuple (None, None). The weights of the RTG are not estimated here. global variables used here (bad practice, but need for parallelization): * transducer * feat_inst * model_class * GetScoreOfDerivation * CombineScoresOfDerivations """ intree_str, outtree_str, pair_weight = weighted_tree_pair intree = immutable(tree_or_string(intree_str)) outtree = None if outtree_str is None else immutable(tree_or_string(outtree_str)) wrtg = transducer.Transduce(intree, outtree, convert_to_prob=False) sys.stdout.flush() if not wrtg.P: output = (None, None) result_str = 'x' else: wrtg.ScoreDerivation = GetScoreOfDerivation wrtg.CombineDerivationScores = CombineScoresOfDerivations if feat_inst: feat_inst.SetContext({'src_tree' : intree_str}) model_class.populate_wrtg_feats(wrtg, feat_inst) output = (wrtg, weighted_tree_pair) result_str = 'o' if print_result: result_str = result_str if outtree is not None else result_str.upper() print(result_str, end='', file=sys.stderr) return output
def test_Preterminal(self): input_tree = immutable(tree_or_string('(B D E)')) output_tree = immutable(tree_or_string('U')) productions, _ = self.transducer.Produce(input_tree, output_tree, 'q', (), ()) rule2 = XTRule('q', tree_or_string('(B ?x0| ?x1|)'), tree_or_string('U'), {}, 1.0) deriv_rhs = RHS(rule2) expected_production = Production(('q', (), ()), deriv_rhs, rule2.weight) self.assertIn(expected_production, productions)
def test_Nonterminal(self): input_tree = immutable(tree_or_string('(A (B D E) (C F G))')) output_tree = immutable(tree_or_string('(A (R (T V W) U) (S X))')) productions, _ = self.transducer.Produce(input_tree, output_tree, 'q', (), ()) expected_productions = [] deriv_rhs1 = RHS(self.rules[0], [('q', (0, ), (0, 1)), \ ('q', (1, ), (0, 0))]) expected_productions.append(\ Production(('q', (), ()), deriv_rhs1, self.rules[0].weight)) deriv_rhs2 = RHS(self.rules[1]) expected_productions.append(\ Production(('q', (0,), (0, 1)), deriv_rhs2, self.rules[1].weight)) deriv_rhs3 = RHS(self.rules[2], [('q', (1, 0), (0, 0, 0)), \ ('q', (1, 1), (0, 0, 1))]) expected_productions.append(\ Production(('q', (1,), (0, 0)), deriv_rhs3, self.rules[2].weight)) deriv_rhs4 = RHS(self.rules[3], [('q', (1, 0), (0, 0, 1)), \ ('q', (1, 1), (0, 0, 0))]) expected_productions.append(\ Production(('q', (1,), (0, 0)), deriv_rhs4, self.rules[3].weight)) deriv_rhs5 = RHS(self.rules[4]) expected_productions.append(\ Production(('q', (1, 0), (0, 0, 0)), deriv_rhs5, self.rules[4].weight)) deriv_rhs8 = RHS(self.rules[7]) expected_productions.append(\ Production(('q', (1, 1), (0, 0, 1)), deriv_rhs8, self.rules[7].weight)) deriv_rhs6 = RHS(self.rules[5]) expected_productions.append(\ Production(('q', (1, 0), (0, 0, 1)), deriv_rhs6, self.rules[5].weight)) deriv_rhs7 = RHS(self.rules[6]) expected_productions.append(\ Production(('q', (1, 1), (0, 0, 0)), deriv_rhs7, self.rules[6].weight)) self.assertEqual(len(expected_productions), len(productions)) self.assertIn(expected_productions[0], productions) self.assertIn(expected_productions[1], productions) self.assertIn(expected_productions[2], productions) self.assertIn(expected_productions[3], productions) self.assertIn(expected_productions[4], productions) self.assertIn(expected_productions[5], productions) self.assertIn(expected_productions[6], productions) self.assertIn(expected_productions[7], productions)
def test_NonterminalPreterminalIdentity(self): """ Using the Identity back-off, the state of the parent rule is applied to the paths of the variables in the RHS. However, the states of the paths of the variables in the RHS should be more specific: "copy" and "hypernym". """ intree = tree_or_string('(NP (DT the) (NN dog))') rule0 = XTRule('q', tree_or_string('(NP ?x0|DT ?x1|NN)'), tree_or_string('(NPP ?x0|DTT ?x1|NNN)'), { (0, ): 'q', (1, ): 'q' }, 1.0) rule1 = XTRule('q', tree_or_string('(DT ?x0|)'), tree_or_string('(DTT ?x0|)'), {(0, ): 'copy'}, 1.0) rule2 = XTRule('copy', tree_or_string('the'), tree_or_string('the'), {}, 1.0) rule3 = XTRule('hypernym', tree_or_string('dog'), tree_or_string('canine'), {}, 1.0) rules = [rule0, rule1, rule2, rule3] rule_backoffs = [Identity()] initial_state = 'q' transducer = xT(initial_state, rules, rule_backoffs) wrtg = transducer.Transduce(intree, None) outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()] expected_outtree = immutable( tree_or_string('(NPP (DTT the) (NN canine))')) self.assertIn(expected_outtree, outtrees)
def test_Nonterminal(self): input_tree = immutable(tree_or_string('(A (B D E) (C F G))')) output_tree = immutable(tree_or_string('(A (R (T V W) U) (S X))')) productions, non_terminals = \ self.transducer.Produce(input_tree, output_tree, 'q', (), ()) expected_non_terminals = [('q', (), (), ''), ('q', (0, ), (0, 1), ''), ('q', (1, ), (0, 0), ''), ('q', (1, 0), (0, 0, 0), ''), ('q', (1, 1), (0, 0, 1), ''), ('q', (1, 0), (0, 0, 1), ''), ('q', (1, 1), (0, 0, 0), '')] self.assertIn(expected_non_terminals[0], non_terminals) self.assertIn(expected_non_terminals[1], non_terminals) self.assertIn(expected_non_terminals[2], non_terminals) self.assertIn(expected_non_terminals[3], non_terminals) self.assertIn(expected_non_terminals[4], non_terminals) self.assertIn(expected_non_terminals[5], non_terminals) self.assertIn(expected_non_terminals[6], non_terminals)
def GenerateNBestTrees(self, max_derivations = 50, direction = 'target'): accumulated_tree_weight = defaultdict(float) for i, (tree, weight) in enumerate(self.GenerateTrees(direction)): if i > max_derivations: break tree_immutable = immutable(tree) current_weight = float(weight) accumulated_tree_weight[tree_immutable] += current_weight sorted_trees_by_weight = \ sorted([(tree, weight) for (tree, weight) in accumulated_tree_weight.items()], \ key=lambda x: x[1], reverse=True) return sorted_trees_by_weight
def GenerateNBestTreesMax(self, max_derivations = 50, direction = 'target'): tree_to_weight = defaultdict(float) for i, (tree, weight) in enumerate(self.GenerateTrees(direction)): if i > max_derivations: break tree_immutable = immutable(tree) current_weight = float(weight) if tree_immutable in tree_to_weight: assert tree_to_weight[tree_immutable] >= current_weight continue tree_to_weight[tree_immutable] = current_weight yield tree, weight
def test_PreterminalIdentity(self): intree = tree_or_string('(NN dog)') rule1 = XTRule('q', tree_or_string('dog'), tree_or_string('perro'), {}, 1.0) rules = [rule1] rule_backoffs = [Identity(), LexicalSimilarity()] initial_state = 'q' transducer = xT(initial_state, rules, rule_backoffs) wrtg = transducer.Transduce(intree, None) outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()] expected_outtree = immutable(tree_or_string('(NN perro)')) self.assertIn(expected_outtree, outtrees)
def test_PreterminalUnseenTerminalEqual(self): intree = tree_or_string('(NN dog)') rule0 = XTRule('q', tree_or_string('(NN ?x0|)'), tree_or_string('(JJ ?x0|)'), {(0, ): 'copy'}, 1.0) rule1 = XTRule('copy', tree_or_string('italian'), tree_or_string('italian'), {}, 1.0) rules = [rule0, rule1] rule_backoffs = [LexicalSimilarity()] initial_state = 'q' transducer = xT(initial_state, rules, rule_backoffs) wrtg = transducer.Transduce(intree, None) outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()] expected_outtree = immutable(tree_or_string('(JJ dog)')) self.assertIn(expected_outtree, outtrees)
def GenerateNBestTreesMax_(self, max_derivations = 50, direction = 'target'): tree_to_weight = defaultdict(float) for i, (tree, weight) in enumerate(self.GenerateTrees(direction)): if i > max_derivations: break tree_immutable = immutable(tree) current_weight = float(weight) if tree_immutable in tree_to_weight: assert tree_to_weight[tree_immutable] >= current_weight continue tree_to_weight[tree_immutable] = current_weight sorted_trees_by_weight = \ sorted([(tree, weight) for (tree, weight) in tree_to_weight.items()], \ key=lambda x: x[1], reverse=True) return sorted_trees_by_weight
def test_NonConsumingLHSAvoidsInfiniteRTG(self): intree = tree_or_string('(NN dog)') rule0 = XTRule('q', tree_or_string('?x0|NN'), tree_or_string('(NN ?x0|)'), {(0, ): 'q'}, 0.9) rule1 = XTRule('q', tree_or_string('?x0|NN'), tree_or_string('(JJ ?x0|)'), {(0, ): 't'}, 0.9) rule2 = XTRule('t', tree_or_string('(NN dog)'), tree_or_string('canine'), {}, 1.0) rules = [rule0, rule1, rule2] initial_state = 'q' transducer = xT(initial_state, rules) wrtg = transducer.Transduce(intree, None) outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()] expected_outtree = immutable(tree_or_string('(JJ canine)')) self.assertIn(expected_outtree, outtrees)
def test_OnlySourceDifferentVarTypes(self): rule0 = XTRule('q', tree_or_string('(A ?x0|AA)'), tree_or_string('(a ?x0|aa)'), {(0, ): 't'}, 1.0) rule1 = XTRule('t', tree_or_string('(AA AAA)'), tree_or_string('(aa aaa)'), {}, 1.0) rule2 = XTRule('t', tree_or_string('(AA AAA)'), tree_or_string('(bb bbb)'), {}, 1.0) rules = [rule0, rule1, rule2] self.transducer = xT('q', rules) input_tree = immutable(tree_or_string('(A (AA AAA))')) output_tree = None productions, _ = self.transducer.Produce(input_tree, output_tree, 'q', (), ()) self.assertEqual(2, len(productions)) self.assertIn(rule0, [p.rhs.rule for p in productions]) self.assertIn(rule1, [p.rhs.rule for p in productions]) self.assertNotIn(rule2, [p.rhs.rule for p in productions])
def test_PreterminalIdentityUnseenTerminalSimilar(self): """ Using the Identity back-off, the state of the parent rule is applied to the path of the variable in the RHS. However, the states of the path of the variable in the RHS should be more specific: "hypernym". """ intree = tree_or_string('(NN dog)') rule1 = XTRule('hypernym', tree_or_string('italian'), tree_or_string('european'), {}, 1.0) rules = [rule1] rule_backoffs = [Identity(), LexicalSimilarity()] initial_state = 'q' transducer = xT(initial_state, rules, rule_backoffs) wrtg = transducer.Transduce(intree, None) outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()] expected_outtree = immutable(tree_or_string('(NN canine)')) self.assertIn(expected_outtree, outtrees)
def test_NonterminalIdentityNoBackoff(self): intree = tree_or_string('(NP (DT the) (NN dog))') rule0 = XTRule('q', tree_or_string('(DT ?x0|)'), tree_or_string('(DTT ?x0|)'), {(0, ): 'copy'}, 1.0) rule1 = XTRule('copy', tree_or_string('the'), tree_or_string('the'), {}, 1.0) rule2 = XTRule('q', tree_or_string('(NN ?x0|)'), tree_or_string('(NNN ?x0|)'), {(0, ): 'hypernym'}, 1.0) rule3 = XTRule('hypernym', tree_or_string('dog'), tree_or_string('canine'), {}, 1.0) rules = [rule0, rule1, rule2, rule3] rule_backoffs = [] initial_state = 'q' transducer = xT(initial_state, rules, rule_backoffs) wrtg = transducer.Transduce(intree, None) outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()] expected_outtree = immutable( tree_or_string('(NP (DTT the) (NNN canine))')) self.assertNotIn(expected_outtree, outtrees)
def test_NonterminalUnseenTerminalEqualAndSimilar(self): intree = tree_or_string('(NP (DT the) (NN dog))') rule0 = XTRule('q', tree_or_string('(NP (DT ?x0|) (NN ?x1|))'), tree_or_string('(NP (DT ?x0|) (NN ?x1|))'), { (0, 0): 'copy', (1, 0): 'hypernym' }, 1.0) rule1 = XTRule('copy', tree_or_string('the'), tree_or_string('the'), {}, 1.0) rule2 = XTRule('hypernym', tree_or_string('italian'), tree_or_string('european'), {}, 1.0) rules = [rule0, rule1, rule2] rule_backoffs = [LexicalSimilarity()] initial_state = 'q' transducer = xT(initial_state, rules, rule_backoffs) wrtg = transducer.Transduce(intree, None) outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()] expected_outtree = immutable( tree_or_string('(NP (DT the) (NN canine))')) self.assertIn(expected_outtree, outtrees)
def test_PreterminalEmptyRHSfail(self): input_tree = immutable(tree_or_string('(B D E)')) output_tree = immutable(tree_or_string('Z')) productions, _ = self.transducer.Produce(input_tree, output_tree, 'q', (), ()) self.assertEqual(0, len(productions))