Esempi in Python per IsString, esempi in Python per utils.tree_tools.IsString

Esempio n. 1

0

Mostra file

File: transductionrule.py Progetto: ct-clmsn/t2t-qa

 def MakeDeletingRule(self):
     """
 If the LHS does not produce any leaf but RHS does, such rule can be
 considered as a leaf-deleting rule. It is not clear when lexicalized
 branches should be replaced by a deleting variable (it depends on the
 application). Here we replace fully lexicalied branches at level 1
 by a deleting variable, only when the RHS does not contain any leaf
 that is not a variable.
 """
     if IsString(self.lhs):
         return self
     if IsString(self.rhs) and not IsVariable(self.rhs):
         return self
     if not IsString(self.rhs):
         rhs_leaves = self.rhs.leaves()
         if rhs_leaves and any([not IsVariable(l) for l in rhs_leaves]):
             return self
     # Make generator of fresh variables.
     index_new_variable = ('?xx%d|' % i for i in xrange(20))
     # Substitute branches at level 1 if they are fully lexicalized.
     lhs_paths_prefix_1 = set(
         [p[0] for p in self.lhs_vars_to_paths.values()])
     if not lhs_paths_prefix_1:
         return self
     for i, branch in enumerate(self.lhs):
         if i not in lhs_paths_prefix_1:
             if IsString(branch):
                 self.lhs[i] = index_new_variable.next()
             else:
                 self.lhs[i] = index_new_variable.next() + get_top(branch)
             self.lhs_vars_to_paths[self.lhs[i]] = (i, )
     return self

Esempio n. 2

0

Mostra file

File: sparql_utils.py Progetto: ct-clmsn/t2t-qa

def get_statements_from_ldcsc(ldcsc, var_counter=0):
    statements = []
    if not isinstance(ldcsc, Tree):
        return statements
    if get_top(ldcsc) in ['DATE', 'NUMBER']:
        return statements
    if IsString(ldcsc[0]) and not is_operator(ldcsc[0]):
        new_var = '?x' + str(var_counter)
        pred = ldcsc[0].strip('!')
        if IsString(ldcsc[1]):
            entity_or_var = ldcsc[1]
        elif get_top(ldcsc[1]) == 'DATE':
            entity_or_var = '?d0'
            statements.extend(get_statements_from_date(ldcsc[1], '?d0'))
        elif get_top(ldcsc[1]) == 'NUMBER':
            entity_or_var = get_number_from_constituent(ldcsc[1])
            # statements.extend(get_statements_from_number(ldcsc[1], '?n0'))
        else:
            entity_or_var = '?x' + str(var_counter + 1)
        if IsString(ldcsc[0]) and ldcsc[0].startswith('!'):
            subj, obj = entity_or_var, new_var
        else:
            subj, obj = new_var, entity_or_var
        s = Statement(subj, pred, obj)
        statements.append(s)
        var_counter += 1
    subtree_ini_index = 1 if IsString(ldcsc[0]) else 0
    for subtree in ldcsc[subtree_ini_index:]:
        statements.extend(get_statements_from_ldcsc(subtree, var_counter))
    return statements

Esempio n. 3

0

Mostra file

def GetURIsFromRules(rules):
  uris = set()
  for rule in rules:
    if IsString(rule.rhs) and not IsVariable(rule.rhs):
      uris.add(rule.rhs)
    if not IsString(rule.rhs):
      uris.update(u for u in rule.rhs.leaves() if not IsVariable(u))
  return uris

Esempio n. 4

0

Mostra file

 def GetSimilarity(self, tree1, tree2):
     similarities = []
     tree1_str = tree1 if IsString(tree1) else repr(tree1)
     tree2_str = tree2 if IsString(tree2) else repr(tree2)
     if tree1_str == tree2_str:
         similarities = [
             Similarity(self.kCost, self.kDefaultState, tree1, tree2)
         ]
     return similarities

Esempio n. 5

0

Mostra file

File: transductionrule.py Progetto: ct-clmsn/t2t-qa

 def __repr__(self):
     lhs_str = self.lhs.encode('utf-8') if IsString(self.lhs) else repr(
         self.lhs)
     rhs_str = self.rhs.encode('utf-8') if IsString(self.rhs) else repr(
         self.rhs)
     return (("<rule.\n  state: {0}\n  lhs: {1}\n  rhs: {2}\n" +
              "  newstates: {3}\n  weight: {4}>").format(
                  self.state, lhs_str, rhs_str, self.newstates,
                  self.weight))

Esempio n. 6

0

Mostra file

 def GetSimilarity(self, tree1, tree2):
   num_nodes_tree1 = 0 if IsString(tree1) \
                       else tree1.GetNumSubtrees()
   num_nodes_tree2 = 0 if IsString(tree2) \
                       else tree2.GetNumSubtrees()
   weight = 0.0
   if not (num_nodes_tree1 == 0 and num_nodes_tree2 == 0):
     weight = (float(abs(num_nodes_tree1 - num_nodes_tree2)) \
               / max(num_nodes_tree1, num_nodes_tree2))
   return [Similarity(weight, 'nodes_difference', tree1, tree2)]

Esempio n. 7

0

Mostra file

File: transductionrule.py Progetto: ct-clmsn/t2t-qa

 def StringifyWithoutWeight(self):
     if not self.stringified:
         lhs_str = self.lhs.encode('utf-8') if IsString(self.lhs) else repr(
             self.lhs)
         rhs_str = self.rhs.encode('utf-8') if IsString(self.rhs) else repr(
             self.rhs)
         self.stringified = (
             "<rule.\n  state: {0}\n  lhs: {1}\n  rhs: {2}\n" +
             "  newstates: {3}>").format(self.state, lhs_str, rhs_str,
                                         self.newstates)
     return self.stringified

Esempio n. 8

0

Mostra file

File: similarity_ordering.py Progetto: ct-clmsn/t2t-qa

 def GetVariables(self, tree):
     if isinstance(tree, TreePattern):
         tree_vars = self.MakeVariablesFromTreePattern(tree)
     elif isinstance(tree, NLTKTree):
         tree_vars = [
             var.split('|')[0] for (var, path) in variables_to_paths(tree)
         ]
     elif IsString(tree) and tree.startswith('?x'):
         tree_vars = [tree]
     elif IsString(tree) and not tree.startswith('?x'):
         tree_vars = []
     else:
         tree_vars = None
     return tree_vars

Esempio n. 9

0

Mostra file

 def insert_cvt_if_needed(self, tree):
     predicate = get_main_predicate_from_tree(tree)
     cvt = self.get_cvt_cached(predicate)
     if cvt:
         if IsString(tree):
             tree = tree_or_string('(ID !{0} {1})'.format(cvt, tree))
         elif tree.label() == u'COUNT':
             tree = tree_or_string('(COUNT (ID !{0} {1}))'.format(
                 cvt, tree[0]))
         elif not IsString(tree[0]):
             tree_repr = ' '.join(map(str, tree))
             tree = tree_or_string('(ID !{0} {1})'.format(cvt, tree_repr))
         else:
             tree = tree_or_string('(ID !{0} {1})'.format(cvt, tree))
     return tree

Esempio n. 10

0

Mostra file

def _GetURIField(uri, field):
    """
  Retrieves information of URIs or words according to the index of Freebase.
  URIs that are prefixed with "!" are stripped to remove that operator.
  """
    if field == 'numFound':
        try:
            assert _IsConnectionAlive()
            words = [] if uri == '<total>' else [uri]
            num_docs = _GetNumDocsFound(words)
        except ValueError:
            num_docs = 0
        return num_docs
    if field.startswith('uri_type'):
        arg = field.split('|')[1]
        return GetURIType(uri, arg)
    if not IsString(uri):
        return None
    if field == 'role':
        return _GetURIRole(uri)
    elif field == 'text':
        return _GetURIText(uri)
    try:
        uri_field = _GetFieldFromURI(uri, field)
    except ValueError:
        uri_field = None
    return uri_field

Esempio n. 11

0

Mostra file

def DecodeInputTree(wrtg, nbest, lambda_dcs_str_list):
    """
  lambda_dcs_str_list is an output parameter, where we store the valid
  output trees (string representations of lambda-DCS trees).
  An output parameter is used in order to retrieve partial lists in
  case of timeouts.
  """
    transductions = wrtg.GenerateNBestTreesMax(nbest)
    for best_tree, optimal_weight in transductions:
        if cvt_inserter:
            best_tree = cvt_inserter.insert_cvt_if_needed(best_tree)
        constituent_str = \
          best_tree if IsString(best_tree) else best_tree.pprint(margin=10000)
        query_results = QueryLambdaDCSC(constituent_str, query_manager)
        logging.info('\nConstituent: {0}\nWeight: {1}'\
          .format(constituent_str, optimal_weight))
        if query_results is None:
            continue
        if query_results not in invalid_results:
            lambda_dcs_str = ConvertConstituent2DCS(constituent_str)
            logging.info('Found. Weight: {0}\tTransduction: {1}'\
                          .format(optimal_weight, lambda_dcs_str))
            logging.info(u'Answer: {0}'.format(query_results))
            lambda_dcs_str_list.append(str(lambda_dcs_str))
    return

Esempio n. 12

0

Mostra file

File: train_perceptron.py Progetto: ct-clmsn/t2t-qa

def GetBestValidDerivations(
  wrtg, cvt_inserter, nbest=1000, nvalid=100, query_manager=None):
  """
  It obtains derivations in descending order of score from wRTG wrtg.
  It inserts CVTs when necessary.
  If a derivation produces a sparql query that retrieves an invalid
  result, then such derivation is skipped until a good derivation is found.
  The maximum number of explored derivations is given by nbest.
  It returns a list of up to nvalid valid derivations and corresponding
  constituent trees with a CVT inserted (when necessary).
  If not found, returns an empty list.
  """
  # This variable contains the result as a list of tuples.
  valid_derivations = []
  wrtg.ClearCaches()
  derivations = wrtg.ObtainDerivationsFromNT()
  for i, derivation in enumerate(derivations):
    if i >= nbest:
      break
    constituent, _ = TargetProjectionFromDerivation(derivation)
    constituent_str = \
      constituent if IsString(constituent) else constituent.pprint(margin=10000)
    valid_derivations.append((derivation, constituent))
    if i == 0:
      first_derivation = derivation
      first_tree = constituent
    if len(valid_derivations) >= nvalid:
      break
  if not valid_derivations:
    valid_derivations.append((first_derivation, first_tree))
  return valid_derivations

Esempio n. 13

0

Mostra file

File: sparql_utils.py Progetto: ct-clmsn/t2t-qa

 def fromldcsc(ldcsc, var_prefs=None):
     """
 other_query_vars_prefs is a list with the prefixes
 of the other query variable instantiations that we
 also want to retrieve. E.g. if ['p', 'r'], then
 SELECT DISTINCT ?x0 , ?p0, ?p1, ?r0, ?r1 WHERE { ...
 """
     if not isinstance(ldcsc, Tree):
         if IsString(ldcsc) and not ldcsc.startswith(
                 '(') and not ldcsc.endswith(')'):
             return None
         else:
             raise (ValueError(
                 'This method expects a Tree instance. Got type {0} for instance {1}'
                 .format(type(ldcsc), ldcsc)))
     try:
         statements = get_statements_from_ldcsc(ldcsc)
     except:
         logging.warning('Failed to get statements from l-dcsc: {0}'.format(
             str(ldcsc)))
         statements = []
     if not statements:
         return None
     operator = ldcsc[0] if is_operator(ldcsc[0]) else ""
     query_vars = get_query_vars(statements, var_prefs)
     query_str = build_query_str(statements, '?x0', query_vars, operator)
     query = Query(query_str)
     query.query_vars = query_vars
     query.ldcsc = ldcsc
     return query

Esempio n. 14

0

Mostra file

File: transductionrule.py Progetto: ct-clmsn/t2t-qa

def BuildTiburonRHS(tree, newstates, path=(), quote_tokens=True):
    """
  1. Quote terminals,
  2. Rename variables ?x0|NP would change into x0:NP
  3. Remove types of variables. x0:NP would change into x0.
  3. Change bracketing (NP (DT the) (NN house)) would
     change into NP(DT(the) NN(house))
  4. Apply states to variables. (NP (DT ?x0|) ?x1|NN) and
     {(0,0): 'q1', (1,) : 'q2'} would change into
     NP(DT(q1.x0) q2.x1)
  """
    rhs_str = ''
    if IsString(tree):
        if IsVariable(tree):
            assert tree.startswith('?')
            assert '|' in tree
            assert path in newstates, 'path {0} not in {1}'.format(
                path, newstates)
            rhs_str = newstates[path] + '.' + tree[1:tree.index('|')]
        else:
            rhs_str = ConvertTokenToTiburon(tree, quote=quote_tokens)
    else:
        pos = get_top(tree)
        rhs_str = ConvertPOSToTiburon(pos) + '('
        rhs_str += ' '.join(
          [BuildTiburonRHS(child, newstates, path + (i,), quote_tokens=quote_tokens) \
             for i, child in enumerate(tree)])
        rhs_str += ')'
    return rhs_str

Esempio n. 15

0

Mostra file

File: sparql_utils.py Progetto: ct-clmsn/t2t-qa

def is_operator(op):
    if isinstance(op, Tree):
        return False
    assert IsString(op)
    return op == "COUNT" or \
           op == "MAX" or \
           op == "MIN"

Esempio n. 16

0

Mostra file

File: dcs_tools.py Progetto: ct-clmsn/t2t-qa

def constituent2dcs(tree):
  '''convert a constituent structure into a DCS tree'''
  if not isinstance(tree, Tree):
    return [tree]
  # elif get_top(tree) == 'COUNT':
  #   assert len(tree) == 1
  #   return [Tree('count', constituent2dcs(tree[0]))]
  elif get_top(tree) == 'NUMBER':
    assert len(tree) == 2
    return [Tree('number', tree[:])]
  elif get_top(tree) == 'DATE':
    # tree contains a list with only one element, which is the data
    # joined with underscores. We re-establish the list.
    if IsString(tree[0]):
      date_info = tree[0].split('_')
      try:
        map(int, date_info)
      except ValueError:
        date_info = [tree[0]]
      return [Tree('date', date_info)]
    else:
      return [Tree(get_top(tree[0]), flatten(map(constituent2dcs, tree[1:])))]
  if get_top(tree) == 'ID' and len(tree) == 2:
    # The first child is the predicate. The rest are the arguments.
    assert len(tree) == 2, '%s' % tree
    predicate = get_top(tree[0])
    if predicate == 'COUNT':
      predicate = predicate.lower()
    return [Tree(predicate, flatten(map(constituent2dcs, tree[1:])))]
  if len(tree) > 2:
    # A length greater than 2 is the only signal we have for "and".
    return [Tree(get_top(tree[0]), [Tree('and', flatten(map(constituent2dcs, tree[1:])))])]
  return [tree]

Esempio n. 17

0

Mostra file

File: transductionrule.py Progetto: ct-clmsn/t2t-qa

def UnconvertAllPOSFromTiburon(tree):
    leaf_paths = tree.treepositions('leaves')
    nt_paths = set(tree.treepositions()) - set(leaf_paths)
    for nt_path in nt_paths:
        assert not IsString(tree[nt_path])
        tiburon_pos = get_top(tree[nt_path])
        tree[nt_path].set_label(UnconvertPOSFromTiburon(tiburon_pos))
    return tree

Esempio n. 18

0

Mostra file

def GetLeavePositions(tree):
  positions = []
  if IsString(tree):
    if not tree.startswith(u'?x'):
      positions.append( () )
  else:
    positions = [position for position in tree.treepositions('leaves') \
                   if not tree[position].startswith(u'?x')]
  return positions

Esempio n. 19

0

Mostra file

def QueryLambdaDCSC(ldcsc_str, query_manager=None):
    assert IsString(ldcsc_str)
    if query_manager is None:
        query_manager = query_manager_global
    results = []
    ldcsc = tree_or_string(ldcsc_str)
    query = Query.fromldcsc(ldcsc)
    if query is not None:
        results = [r[0] for r in query.get_results(query_manager)]
    return results

Esempio n. 20

0

Mostra file

File: filter_rules_utils.py Progetto: ct-clmsn/t2t-qa

 def rule_meets_conds(self, rule, conds):
     if not conds:
         return True
     for cond in conds:
         target = rule.lhs if cond.startswith('lhs:') else rule.rhs
         if cond.endswith('is_var') and not IsVariable(target):
             return False
         if cond.endswith('is_str') and not IsString(target):
             return False
     return True

Esempio n. 21

0

Mostra file

File: transductionrule.py Progetto: ct-clmsn/t2t-qa

def GetNewstatesFromRHSInTiburon(rhs_str):
    """
  Given a string representation of a RHS in Tiburon format,
  it returns a dictionary: path -> varname, where varname is converted to our
  software's variable name convention (.e.g ?x0|).
  """
    rhs_str_nltk = TiburonToStanford(rhs_str)
    rhs_nltk = tree_or_string(rhs_str_nltk)
    newstates = {}
    if IsString(rhs_nltk) and IsTiburonStateVariable(rhs_nltk):
        state = rhs_nltk[:rhs_nltk.index('.')]
        newstates[()] = state
    elif not IsString(rhs_nltk):
        for path in rhs_nltk.treepositions('leaves'):
            if IsTiburonStateVariable(rhs_nltk[path]):
                state_var = rhs_nltk[path]
                state = state_var[:state_var.index('.')]
                newstates[path] = state
    return newstates

Esempio n. 22

0

Mostra file

def get_entity_label(entity):
  assert IsString(entity)
  if ' ' in entity:
    return entity
  label_results = QueryLambdaDCSC(u'(ID !fb:type.object.name <{0}>)'.format(entity))
  if not label_results:
    return entity
  if len(label_results) > 1:
    logging.warning(
      u'More than one label results for entity {0} = {1}'.format(
      entity, ', '.join(label_results)))
  return label_results[0]

Esempio n. 23

0

Mostra file

File: dcs_tools.py Progetto: ct-clmsn/t2t-qa

def ConvertConstituent2DCS(constituent_tree):
  """
  Wrapper for constituent2dcs, where we try to convert an eventual
  tree string into a tree.
  This function also retrieves the first item of the resulting list,
  which contains the final constituent structure, and transforms it
  into a utils.tree_tools.Tree object.
  """
  if IsString(constituent_tree):
    constituent_tree = tree_or_string(constituent_tree)
  dcs_tree_fragments = constituent2dcs(constituent_tree)
  assert isinstance(dcs_tree_fragments, list) and len(dcs_tree_fragments) == 1
  dcs_tree = tree_or_string(str(dcs_tree_fragments[0]))
  return dcs_tree

Esempio n. 24

0

Mostra file

def LoadAlignments(alignment_fname):
    """
  Load a filename with the following structure:
    src_tree
    trg_tree
    alignment
    ...
    src_tree
    trg_tree
    alignment
  into a dictionary indexed by a tuple (src_tree_str, trg_tree_str),
  whose values are Alignment objects.
  """
    alignments = {}
    with codecs.open(alignment_fname, 'r', 'utf-8') as fin:
        lines = fin.readlines()
        assert len(
            lines) % 3 == 0, 'Lines in {0} are not a multiple of 3.'.format(
                alignment_fname)
        for i, line in enumerate(lines):
            if i % 3 == 0:
                src_tree_str = line.strip()
                src_tree = tree_or_string(src_tree_str)
                src_leaves = src_tree.leaves() if not IsString(src_tree) else [
                    src_tree
                ]
            if i % 3 == 1:
                trg_tree_str = line.strip()
                trg_tree = tree_or_string(trg_tree_str)
                trg_leaves = trg_tree.leaves() if not IsString(trg_tree) else [
                    trg_tree
                ]
            if i % 3 == 2:
                alignment_str = line.strip()
                alignment = Alignment(alignment_str, src_leaves, trg_leaves)
                alignments[(src_tree_str, trg_tree_str)] = alignment
    return alignments

Esempio n. 25

0

Mostra file

File: sparql_utils.py Progetto: ct-clmsn/t2t-qa

def get_statements_from_date(ldcsc, var):
    assert get_top(ldcsc) == 'DATE' and len(ldcsc) == 1
    statements = []
    if not IsString(ldcsc[0]):
        return statements
    try:
        year = int(ldcsc[0].split('_')[0])
    except ValueError:
        return statements
    statements = [
      'FILTER (xsd:dateTime({0}) >= xsd:dateTime("{1}"^^xsd:datetime)) .'\
      .format(var, year),
      'FILTER (xsd:dateTime({0}) < xsd:dateTime("{1}"^^xsd:datetime)) .'\
      .format(var, year + 1)]
    return statements

Esempio n. 26

0

Mostra file

def get_main_predicate_from_tree(tree):
    """
  Given a constituent representation of a sparql query,
  it returns the main predicate (as in lambda-DCS) by
  returning the left-most leaf. If "COUNT" operator is
  the left-most leaf, then it returns the leaf immediately
  on the right of the "COUNT" operator.
  """
    if IsString(tree):
        predicate = tree
    else:
        leaves = tree.leaves()
        assert leaves
        predicate = leaves[0]  # left-most-leaf
        if predicate.lower() == 'count':
            predicate = leaves[1]
    return predicate

Esempio n. 27

0

Mostra file

File: transductionrule.py Progetto: ct-clmsn/t2t-qa

def GetTreePattern(tree, subpaths):
    """
  Converts a rule LHS or RHS into a TreePattern.
  The tree attribute of the TreePattern would simply be the
  LHS or RHS tree.
  The path to the root (beginning) of the TreePattern would be (),
  because we do not have the real information on at what level this
  rule was originally extracted (or is being applied).
  The subpaths of the TreePattern would be the relative paths of the
  variables in the LHS or RHS.
  """
    path = ()
    if IsString(tree):
        if IsVariable(tree):
            return TreePattern(tree, path, [()])
        else:
            return TreePattern(tree, path, [])
    subpaths_sorted = sorted(subpaths)
    return TreePattern(tree, path, subpaths_sorted)

Esempio n. 28

0

Mostra file

File: sparql_utils.py Progetto: ct-clmsn/t2t-qa

def get_query_vars(statements, prefixes):
    if prefixes is None:
        prefixes = []
    query_vars = list()
    for s in statements:
        if IsString(s):
            continue
        if is_var(s.subj):
            query_vars.append(s.subj)
        if is_var(s.rel):
            query_vars.append(s.rel)
        if is_var(s.obj):
            query_vars.append(s.obj)
    out_vars = set()
    for pref in prefixes:
        for v in query_vars:
            if v.startswith(pref):
                out_vars.add(v)
    return sorted(out_vars)

Esempio n. 29

0

Mostra file

File: transductionrule.py Progetto: ct-clmsn/t2t-qa

def BuildTiburonLHS(tree, quote_tokens=True):
    """
  1. Quote terminals,
  2. Rename variables ?x0|NP -> x0:NP
  3. Change bracketing (NP (DT the) (NN house)) -> NP(DT(the) NN(house))
  """
    lhs_str = ''
    if IsString(tree):
        if IsVariable(tree):
            lhs_str = ConvertVarToTiburon(tree)
        else:
            lhs_str = ConvertTokenToTiburon(tree, quote=quote_tokens)
    else:
        pos = get_top(tree)
        lhs_str = ConvertPOSToTiburon(pos) + '('
        lhs_str += ' '.join([
            BuildTiburonLHS(child, quote_tokens=quote_tokens) for child in tree
        ])
        lhs_str += ')'
    return lhs_str

Esempio n. 30

0

Mostra file

def get_alignment(pair_tt, entities_lex, predicates_lex):
    """
  Obtains maximum-length alignment between leaves of source and target tree.
  @pair_tt is a tuple (src_tree, trg_tree).
  @entities_lex and @predicates_lex are dictionaries, as constructed above.
  """
    assert len(pair_tt) == 2
    src_tree, trg_tree = map(tree_or_string, pair_tt)
    src_leaves = src_tree.leaves()
    if IsString(trg_tree):
        trg_leaves = [trg_tree]
    else:
        trg_leaves = trg_tree.leaves()
    # This is a list of lists. Each list will have source word indices.
    alignments = []
    for i, trg_leaf in enumerate(trg_leaves):
        alignment = align(trg_leaf, src_leaves, entities_lex, predicates_lex)
        alignments.append(alignment)
    alignments = fix_unaligned(alignments, trg_leaves)
    return alignments