Ejemplo n.º 1
0
 def IsEligible(self, src_treep, trg_treep):
   if not self.IsEligibleSrc(src_treep):
     return False
   if trg_treep is not None:
     trg_leaves = [leaf for leaf in GetLeaves(trg_treep) if not IsVariable(leaf)]
     num_trg_vars = trg_treep.GetNumVariables()
     if len(trg_leaves) != self.trg_phrase_length or num_trg_vars > 1:
       return False
   return True
Ejemplo n.º 2
0
 def rule_meets_conds(self, rule, conds):
     if not conds:
         return True
     for cond in conds:
         target = rule.lhs if cond.startswith('lhs:') else rule.rhs
         if cond.endswith('is_var') and not IsVariable(target):
             return False
         if cond.endswith('is_str') and not IsString(target):
             return False
     return True
Ejemplo n.º 3
0
def FilterOutRulesWithCVT(rules):
    remaining_rules = []
    for r in rules:
        all_leaves = GetLeaves(r.rhs)
        for leaf in all_leaves:
            if IsVariable(leaf) or IsOperator(leaf):
                continue
            if leaf.lstrip('!') in cvts:
                break
        else:
            remaining_rules.append(r)
    return remaining_rules
Ejemplo n.º 4
0
 def IsEligible(self, src_treep, trg_treep):
     """
 The source tree pattern should not contain any variable (hence,
 no variables in target tree pattern either), have equal or less leaves
 than self.max_src_phrase_length and the target tree pattern have
 self.trg_phrase_length leaves.
 """
     if not self.IsEligibleSrc(src_treep):
         return False
     if trg_treep is not None:
         trg_leaves = [
             leaf for leaf in GetLeaves(trg_treep) if not IsVariable(leaf)
         ]
         if not len(trg_leaves) == self.trg_phrase_length:
             return False
     return True
Ejemplo n.º 5
0
def GetTreePattern(tree, subpaths):
    """
  Converts a rule LHS or RHS into a TreePattern.
  The tree attribute of the TreePattern would simply be the
  LHS or RHS tree.
  The path to the root (beginning) of the TreePattern would be (),
  because we do not have the real information on at what level this
  rule was originally extracted (or is being applied).
  The subpaths of the TreePattern would be the relative paths of the
  variables in the LHS or RHS.
  """
    path = ()
    if IsString(tree):
        if IsVariable(tree):
            return TreePattern(tree, path, [()])
        else:
            return TreePattern(tree, path, [])
    subpaths_sorted = sorted(subpaths)
    return TreePattern(tree, path, subpaths_sorted)
Ejemplo n.º 6
0
def BuildTiburonLHS(tree, quote_tokens=True):
    """
  1. Quote terminals,
  2. Rename variables ?x0|NP -> x0:NP
  3. Change bracketing (NP (DT the) (NN house)) -> NP(DT(the) NN(house))
  """
    lhs_str = ''
    if IsString(tree):
        if IsVariable(tree):
            lhs_str = ConvertVarToTiburon(tree)
        else:
            lhs_str = ConvertTokenToTiburon(tree, quote=quote_tokens)
    else:
        pos = get_top(tree)
        lhs_str = ConvertPOSToTiburon(pos) + '('
        lhs_str += ' '.join([
            BuildTiburonLHS(child, quote_tokens=quote_tokens) for child in tree
        ])
        lhs_str += ')'
    return lhs_str