コード例 #1
0
def ObtainWRTG(weighted_tree_pair, print_result=True):
  """
  Given a transducer and a weighted source/target tree, it returns a tuple
  that contains the wRTG and the weighted pair. If the transducer fails at
  explaining the source/target tree with the rules it has, then it returns
  a tuple (None, None). The weights of the RTG are not estimated here.

  global variables used here (bad practice, but need for parallelization):
    * transducer
    * feat_inst
    * model_class
    * GetScoreOfDerivation
    * CombineScoresOfDerivations
  """
  intree_str, outtree_str, pair_weight = weighted_tree_pair
  intree  = immutable(tree_or_string(intree_str))
  outtree = None if outtree_str is None else immutable(tree_or_string(outtree_str))
  wrtg = transducer.Transduce(intree, outtree, convert_to_prob=False)
  sys.stdout.flush()
  if not wrtg.P:
    output = (None, None)
    result_str = 'x'
  else:
    wrtg.ScoreDerivation = GetScoreOfDerivation
    wrtg.CombineDerivationScores = CombineScoresOfDerivations
    if feat_inst:
      feat_inst.SetContext({'src_tree' : intree_str})
    model_class.populate_wrtg_feats(wrtg, feat_inst)
    output = (wrtg, weighted_tree_pair)
    result_str = 'o'
  if print_result:
    result_str = result_str if outtree is not None else result_str.upper()
    print(result_str, end='', file=sys.stderr)
  return output
コード例 #2
0
 def test_Preterminal(self):
     input_tree = immutable(tree_or_string('(B D E)'))
     output_tree = immutable(tree_or_string('U'))
     productions, _ = self.transducer.Produce(input_tree, output_tree, 'q',
                                              (), ())
     rule2 = XTRule('q', tree_or_string('(B ?x0| ?x1|)'),
                    tree_or_string('U'), {}, 1.0)
     deriv_rhs = RHS(rule2)
     expected_production = Production(('q', (), ()), deriv_rhs,
                                      rule2.weight)
     self.assertIn(expected_production, productions)
コード例 #3
0
    def test_Nonterminal(self):
        input_tree = immutable(tree_or_string('(A (B D E) (C F G))'))
        output_tree = immutable(tree_or_string('(A (R (T V W) U) (S X))'))
        productions, _ = self.transducer.Produce(input_tree, output_tree, 'q',
                                                 (), ())
        expected_productions = []

        deriv_rhs1 = RHS(self.rules[0], [('q', (0, ), (0, 1)), \
                                         ('q', (1, ), (0, 0))])
        expected_productions.append(\
          Production(('q', (), ()), deriv_rhs1, self.rules[0].weight))

        deriv_rhs2 = RHS(self.rules[1])
        expected_productions.append(\
          Production(('q', (0,), (0, 1)), deriv_rhs2, self.rules[1].weight))

        deriv_rhs3 = RHS(self.rules[2], [('q', (1, 0), (0, 0, 0)), \
                                         ('q', (1, 1), (0, 0, 1))])
        expected_productions.append(\
          Production(('q', (1,), (0, 0)), deriv_rhs3, self.rules[2].weight))

        deriv_rhs4 = RHS(self.rules[3], [('q', (1, 0), (0, 0, 1)), \
                                         ('q', (1, 1), (0, 0, 0))])
        expected_productions.append(\
          Production(('q', (1,), (0, 0)), deriv_rhs4, self.rules[3].weight))

        deriv_rhs5 = RHS(self.rules[4])
        expected_productions.append(\
          Production(('q', (1, 0), (0, 0, 0)), deriv_rhs5, self.rules[4].weight))

        deriv_rhs8 = RHS(self.rules[7])
        expected_productions.append(\
          Production(('q', (1, 1), (0, 0, 1)), deriv_rhs8, self.rules[7].weight))

        deriv_rhs6 = RHS(self.rules[5])
        expected_productions.append(\
          Production(('q', (1, 0), (0, 0, 1)), deriv_rhs6, self.rules[5].weight))

        deriv_rhs7 = RHS(self.rules[6])
        expected_productions.append(\
          Production(('q', (1, 1), (0, 0, 0)), deriv_rhs7, self.rules[6].weight))

        self.assertEqual(len(expected_productions), len(productions))
        self.assertIn(expected_productions[0], productions)
        self.assertIn(expected_productions[1], productions)
        self.assertIn(expected_productions[2], productions)
        self.assertIn(expected_productions[3], productions)
        self.assertIn(expected_productions[4], productions)
        self.assertIn(expected_productions[5], productions)
        self.assertIn(expected_productions[6], productions)
        self.assertIn(expected_productions[7], productions)
コード例 #4
0
 def test_NonterminalPreterminalIdentity(self):
     """
 Using the Identity back-off, the state of the parent rule
 is applied to the paths of the variables in the RHS.
 However, the states of the paths of the variables in the RHS
 should be more specific: "copy" and "hypernym".
 """
     intree = tree_or_string('(NP (DT the) (NN dog))')
     rule0 = XTRule('q', tree_or_string('(NP ?x0|DT ?x1|NN)'),
                    tree_or_string('(NPP ?x0|DTT ?x1|NNN)'), {
                        (0, ): 'q',
                        (1, ): 'q'
                    }, 1.0)
     rule1 = XTRule('q', tree_or_string('(DT ?x0|)'),
                    tree_or_string('(DTT ?x0|)'), {(0, ): 'copy'}, 1.0)
     rule2 = XTRule('copy', tree_or_string('the'), tree_or_string('the'),
                    {}, 1.0)
     rule3 = XTRule('hypernym', tree_or_string('dog'),
                    tree_or_string('canine'), {}, 1.0)
     rules = [rule0, rule1, rule2, rule3]
     rule_backoffs = [Identity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(
         tree_or_string('(NPP (DTT the) (NN canine))'))
     self.assertIn(expected_outtree, outtrees)
コード例 #5
0
 def test_Nonterminal(self):
     input_tree = immutable(tree_or_string('(A (B D E) (C F G))'))
     output_tree = immutable(tree_or_string('(A (R (T V W) U) (S X))'))
     productions, non_terminals = \
       self.transducer.Produce(input_tree, output_tree, 'q', (), ())
     expected_non_terminals = [('q', (), (), ''), ('q', (0, ), (0, 1), ''),
                               ('q', (1, ), (0, 0), ''),
                               ('q', (1, 0), (0, 0, 0), ''),
                               ('q', (1, 1), (0, 0, 1), ''),
                               ('q', (1, 0), (0, 0, 1), ''),
                               ('q', (1, 1), (0, 0, 0), '')]
     self.assertIn(expected_non_terminals[0], non_terminals)
     self.assertIn(expected_non_terminals[1], non_terminals)
     self.assertIn(expected_non_terminals[2], non_terminals)
     self.assertIn(expected_non_terminals[3], non_terminals)
     self.assertIn(expected_non_terminals[4], non_terminals)
     self.assertIn(expected_non_terminals[5], non_terminals)
     self.assertIn(expected_non_terminals[6], non_terminals)
コード例 #6
0
ファイル: wrtg.py プロジェクト: ct-clmsn/t2t-qa
 def GenerateNBestTrees(self, max_derivations = 50, direction = 'target'):
   accumulated_tree_weight = defaultdict(float)
   for i, (tree, weight) in enumerate(self.GenerateTrees(direction)):
     if i > max_derivations:
       break
     tree_immutable = immutable(tree)
     current_weight = float(weight)
     accumulated_tree_weight[tree_immutable] += current_weight
   sorted_trees_by_weight = \
     sorted([(tree, weight) for (tree, weight) in accumulated_tree_weight.items()], \
            key=lambda x: x[1], reverse=True)
   return sorted_trees_by_weight
コード例 #7
0
ファイル: wrtg.py プロジェクト: ct-clmsn/t2t-qa
 def GenerateNBestTreesMax(self, max_derivations = 50, direction = 'target'):
   tree_to_weight = defaultdict(float)
   for i, (tree, weight) in enumerate(self.GenerateTrees(direction)):
     if i > max_derivations:
       break
     tree_immutable = immutable(tree)
     current_weight = float(weight)
     if tree_immutable in tree_to_weight:
       assert tree_to_weight[tree_immutable] >= current_weight
       continue
     tree_to_weight[tree_immutable] = current_weight
     yield tree, weight
コード例 #8
0
 def test_PreterminalIdentity(self):
     intree = tree_or_string('(NN dog)')
     rule1 = XTRule('q', tree_or_string('dog'), tree_or_string('perro'), {},
                    1.0)
     rules = [rule1]
     rule_backoffs = [Identity(), LexicalSimilarity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(tree_or_string('(NN perro)'))
     self.assertIn(expected_outtree, outtrees)
コード例 #9
0
 def test_PreterminalUnseenTerminalEqual(self):
     intree = tree_or_string('(NN dog)')
     rule0 = XTRule('q', tree_or_string('(NN ?x0|)'),
                    tree_or_string('(JJ ?x0|)'), {(0, ): 'copy'}, 1.0)
     rule1 = XTRule('copy', tree_or_string('italian'),
                    tree_or_string('italian'), {}, 1.0)
     rules = [rule0, rule1]
     rule_backoffs = [LexicalSimilarity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(tree_or_string('(JJ dog)'))
     self.assertIn(expected_outtree, outtrees)
コード例 #10
0
ファイル: wrtg.py プロジェクト: ct-clmsn/t2t-qa
 def GenerateNBestTreesMax_(self, max_derivations = 50, direction = 'target'):
   tree_to_weight = defaultdict(float)
   for i, (tree, weight) in enumerate(self.GenerateTrees(direction)):
     if i > max_derivations:
       break
     tree_immutable = immutable(tree)
     current_weight = float(weight)
     if tree_immutable in tree_to_weight:
       assert tree_to_weight[tree_immutable] >= current_weight
       continue
     tree_to_weight[tree_immutable] = current_weight
   sorted_trees_by_weight = \
     sorted([(tree, weight) for (tree, weight) in tree_to_weight.items()], \
            key=lambda x: x[1], reverse=True)
   return sorted_trees_by_weight
コード例 #11
0
 def test_NonConsumingLHSAvoidsInfiniteRTG(self):
     intree = tree_or_string('(NN dog)')
     rule0 = XTRule('q', tree_or_string('?x0|NN'),
                    tree_or_string('(NN ?x0|)'), {(0, ): 'q'}, 0.9)
     rule1 = XTRule('q', tree_or_string('?x0|NN'),
                    tree_or_string('(JJ ?x0|)'), {(0, ): 't'}, 0.9)
     rule2 = XTRule('t', tree_or_string('(NN dog)'),
                    tree_or_string('canine'), {}, 1.0)
     rules = [rule0, rule1, rule2]
     initial_state = 'q'
     transducer = xT(initial_state, rules)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(tree_or_string('(JJ canine)'))
     self.assertIn(expected_outtree, outtrees)
コード例 #12
0
    def test_OnlySourceDifferentVarTypes(self):
        rule0 = XTRule('q', tree_or_string('(A ?x0|AA)'),
                       tree_or_string('(a ?x0|aa)'), {(0, ): 't'}, 1.0)
        rule1 = XTRule('t', tree_or_string('(AA AAA)'),
                       tree_or_string('(aa aaa)'), {}, 1.0)
        rule2 = XTRule('t', tree_or_string('(AA AAA)'),
                       tree_or_string('(bb bbb)'), {}, 1.0)
        rules = [rule0, rule1, rule2]
        self.transducer = xT('q', rules)
        input_tree = immutable(tree_or_string('(A (AA AAA))'))
        output_tree = None
        productions, _ = self.transducer.Produce(input_tree, output_tree, 'q',
                                                 (), ())

        self.assertEqual(2, len(productions))
        self.assertIn(rule0, [p.rhs.rule for p in productions])
        self.assertIn(rule1, [p.rhs.rule for p in productions])
        self.assertNotIn(rule2, [p.rhs.rule for p in productions])
コード例 #13
0
 def test_PreterminalIdentityUnseenTerminalSimilar(self):
     """
 Using the Identity back-off, the state of the parent rule
 is applied to the path of the variable in the RHS.
 However, the states of the path of the variable in the RHS
 should be more specific: "hypernym".
 """
     intree = tree_or_string('(NN dog)')
     rule1 = XTRule('hypernym', tree_or_string('italian'),
                    tree_or_string('european'), {}, 1.0)
     rules = [rule1]
     rule_backoffs = [Identity(), LexicalSimilarity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(tree_or_string('(NN canine)'))
     self.assertIn(expected_outtree, outtrees)
コード例 #14
0
 def test_NonterminalIdentityNoBackoff(self):
     intree = tree_or_string('(NP (DT the) (NN dog))')
     rule0 = XTRule('q', tree_or_string('(DT ?x0|)'),
                    tree_or_string('(DTT ?x0|)'), {(0, ): 'copy'}, 1.0)
     rule1 = XTRule('copy', tree_or_string('the'), tree_or_string('the'),
                    {}, 1.0)
     rule2 = XTRule('q', tree_or_string('(NN ?x0|)'),
                    tree_or_string('(NNN ?x0|)'), {(0, ): 'hypernym'}, 1.0)
     rule3 = XTRule('hypernym', tree_or_string('dog'),
                    tree_or_string('canine'), {}, 1.0)
     rules = [rule0, rule1, rule2, rule3]
     rule_backoffs = []
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(
         tree_or_string('(NP (DTT the) (NNN canine))'))
     self.assertNotIn(expected_outtree, outtrees)
コード例 #15
0
 def test_NonterminalUnseenTerminalEqualAndSimilar(self):
     intree = tree_or_string('(NP (DT the) (NN dog))')
     rule0 = XTRule('q', tree_or_string('(NP (DT ?x0|) (NN ?x1|))'),
                    tree_or_string('(NP (DT ?x0|) (NN ?x1|))'), {
                        (0, 0): 'copy',
                        (1, 0): 'hypernym'
                    }, 1.0)
     rule1 = XTRule('copy', tree_or_string('the'), tree_or_string('the'),
                    {}, 1.0)
     rule2 = XTRule('hypernym', tree_or_string('italian'),
                    tree_or_string('european'), {}, 1.0)
     rules = [rule0, rule1, rule2]
     rule_backoffs = [LexicalSimilarity()]
     initial_state = 'q'
     transducer = xT(initial_state, rules, rule_backoffs)
     wrtg = transducer.Transduce(intree, None)
     outtrees = [tree for tree, _ in wrtg.GenerateNBestTrees()]
     expected_outtree = immutable(
         tree_or_string('(NP (DT the) (NN canine))'))
     self.assertIn(expected_outtree, outtrees)
コード例 #16
0
 def test_PreterminalEmptyRHSfail(self):
     input_tree = immutable(tree_or_string('(B D E)'))
     output_tree = immutable(tree_or_string('Z'))
     productions, _ = self.transducer.Produce(input_tree, output_tree, 'q',
                                              (), ())
     self.assertEqual(0, len(productions))