def replace(concrete, lhs, rhs, failok=False): """Given a concrete tree (ie, maybe a subtree of a larger tree) and lhs and rhs patterns, produce the new tree resulting from substituting the things in concrete in the positions determined by lhs into the rhs pattern.""" # Get the (untyped) variable name and path from LHS and RHS. # Remember that variables have the form ?x0|NP for variables of type NP. lhs_vps = [(var.split('|')[0], path) for (var, path) in variables_to_paths(lhs)] rhs_vps = [(var.split('|')[0], path) for (var, path) in variables_to_paths(rhs)] # Check if all variables of the RHS are present in the LHS. lhs_vars_dict = {var: path for (var, path) in lhs_vps} for (rhs_var, _) in rhs_vps: if rhs_var not in lhs_vars_dict: if failok: return None else: raise ValueError( "LHS {0} missing expected variable {1}.".format( lhs, rhs_var)) # Do replacement. out = deepcopy(rhs) for (rhs_var, rhs_path) in rhs_vps: lhs_path = lhs_vars_dict[rhs_var] if not lhs_path and not rhs_path: out = deepcopy(concrete) elif not lhs_path and rhs_path: out[rhs_path] = deepcopy(concrete) elif lhs_path and rhs_path: out[rhs_path] = deepcopy(concrete[lhs_path]) elif lhs_path and not rhs_path: out = deepcopy(concrete[lhs_path]) return out
def GetRemappedRulePaths(production, remapped_rule_paths): in_path = production.non_terminal[1] in_path_remapped = remapped_rule_paths[in_path] rule = production.rhs.rule src_vars_paths = variables_to_paths(rule.lhs) src_vars_paths = [(var.split('|')[0], path) for var, path in src_vars_paths] src_vars_to_paths = {x[0]: x[1] for x in src_vars_paths} assert sorted(src_vars_paths, key=lambda x: x[0]) == src_vars_paths, \ 'Variables in lhs are not sorted: {0}'.format(rule.lhs) trg_vars_paths = variables_to_paths(rule.rhs) trg_vars_paths = [(var.split('|')[0], path) for var, path in trg_vars_paths] trg_vars_to_paths = {x[0]: x[1] for x in trg_vars_paths} # Check whether the lhs and rhs have the same variable names. src_vars = [varpath[0] for varpath in src_vars_paths] trg_vars = [varpath[0] for varpath in trg_vars_paths] assert set(src_vars) == set(trg_vars), \ 'Variables in lhs {0} and rhs {1} differ:'.format(src_vars, trg_vars) for src_var, trg_var in zip(src_vars, trg_vars): src_path = src_vars_to_paths[trg_var] src_path_remapped = in_path_remapped + src_path remapped_rule_paths[in_path + src_vars_to_paths[src_var]] = src_path_remapped return remapped_rule_paths
def num_del_variables(self, src_treep, trg_treep): src_vars = [var for var, path in variables_to_paths(src_treep.tree) \ if var.startswith('?xx')] trg_vars = [var for var, path in variables_to_paths(trg_treep.tree) \ if var.startswith('?xx')] src_num_del_vars = len(src_vars) trg_num_del_vars = len(trg_vars) src_trg_num_del_vars = abs(src_num_del_vars - trg_num_del_vars) features = [(('num_del_variables: lhs', src_num_del_vars), 1.0), (('num_del_variables: rhs', trg_num_del_vars), 1.0), (('num_del_variables: abs(lhs - rhs)', src_trg_num_del_vars), 1.0)] return features
def MakeNewStates_(target, relation): newstates = {} vars_paths = variables_to_paths(target) for (var, path) in vars_paths: state_name = relation if IsVarTyped(var) else 't' newstates[path] = state_name return newstates
def GetVariables(self, tree): if isinstance(tree, TreePattern): tree_vars = self.MakeVariablesFromTreePattern(tree) elif isinstance(tree, NLTKTree): tree_vars = [ var.split('|')[0] for (var, path) in variables_to_paths(tree) ] elif IsString(tree) and tree.startswith('?x'): tree_vars = [tree] elif IsString(tree) and not tree.startswith('?x'): tree_vars = [] else: tree_vars = None return tree_vars
def TargetProjectionFromDerivation(derivation): """ Returns a tuple (Tree, weight), given a derivation (sequence of productions). This version does not use the left-hand-sides of productions to figure out the output path o that corresponds to each input path. """ weight = 1.0 tree = None # Mapping between input paths i to output paths o (as in (q, i, o)). in_to_out_paths = GetIn2OutPathMapping(derivation) out_start = GetInitialOutPathFromDerivation(derivation) for production in derivation: q, in_path = production.non_terminal[: 2] # NTs are (q, i, o) or (q, i). out_path = in_to_out_paths[(q, in_path)] rule = production.rhs.rule if tree == None or IsString(tree): tree = deepcopy(rule.rhs) else: tree[out_path[len(out_start):]] = deepcopy(rule.rhs) weight *= rule.weight assert not variables_to_paths(tree), \ 'Tree was left incomplete: %s for derivation %s' % (tree, derivation) return (tree, weight)
def rhs_vars_to_paths(self): if self._rhs_vars_to_paths is None: self._rhs_vars_to_paths = \ {var.split('|')[0] : path for (var, path) in variables_to_paths(self.rhs)} return self._rhs_vars_to_paths
def MakeNewStates(state, rule): vars_paths = variables_to_paths(rule.rhs) newstates = {path : state for var, path in vars_paths} return newstates