Beispiel #1
0
 def __init__(self, ev_context):
     super(self.__class__, self).__init__()
     self.nodedesc_normalizer = NodeDescriptionNormalizer(
         ev_context.corpus_info.feature_types)
     self._ev_context = ev_context
     for cls in self.constraint_factory:
         cls.setup_context(self._ev_context)
     self._ntyper = NodeTypeInferencer(ev_context.corpus_info.feature_types)
Beispiel #2
0
 def __init__(self, ev_context):
     super(self.__class__, self).__init__()
     self.nodedesc_normalizer = NodeDescriptionNormalizer(ev_context.corpus_info.feature_types)
     self._ev_context = ev_context
     for cls in self.constraint_factory:
         cls.setup_context(self._ev_context)
     self._ntyper = NodeTypeInferencer(ev_context.corpus_info.feature_types)
Beispiel #3
0
class QueryFactory(ast_visitor.AstVisitor):
    """Creates the internal representation from a query AST.
    
    A query AST is split into three parts:
     * node descriptions: a dictionary of `varname: AST`
     * predicates: a dictionary with `varname: nltk_contrib.tiger.query.predicates.Predicate` entries
     * constraints: a list of ((left, right), nltk_contrib.tiger.query.constraints.Constraint)` tuples
    
    These collections will be used to instantiate a `Query` object.
    Anonymous node descriptions will be wrapped into a variable definition with an 
    automatically generated, globally unique variable name.
    """
    get_anon_nodevar = (":anon:%i" % (c, ) for c in count()).next
    constraint_factory = ConstraintFactory()
    predicate_factory = PredicateFactory()

    def __init__(self, ev_context):
        super(self.__class__, self).__init__()
        self.nodedesc_normalizer = NodeDescriptionNormalizer(
            ev_context.corpus_info.feature_types)
        self._ev_context = ev_context
        for cls in self.constraint_factory:
            cls.setup_context(self._ev_context)
        self._ntyper = NodeTypeInferencer(ev_context.corpus_info.feature_types)

    @ast_visitor.node_handler(ast.NodeDescription)
    def handle_node_description(self, child_node):
        """Replaces an anonymous node description with a reference to a fresh node variable.
        
        The node description is stored for later reference.
        """
        variable = NodeVariable(self.get_anon_nodevar(), False)
        self.node_defs[variable] = child_node
        self.node_vars[variable.name] = variable
        return self.REPLACE(create_varref(variable.name))

    @ast_visitor.node_handler(ast.VariableDefinition)
    def handle_node_variable_def(self, child_node):
        """Replaces a node variable definition with a reference, and stores it.
        
        If the variable has already been defined, the node descriptions are merged.
        """
        assert child_node.variable.type == ast.VariableTypes.NodeIdentifier
        node_variable = NodeVariable.from_node(child_node.variable)
        self.node_vars[child_node.variable.name] = node_variable

        if node_variable in self.node_defs:
            self.node_defs[node_variable] = ast.NodeDescription(
                ast.Conjunction([
                    self.node_defs[node_variable].expression,
                    child_node.expression.expression
                ]))
        else:
            self.node_defs[node_variable] = child_node.expression

        return self.REPLACE(
            create_varref(child_node.variable.name,
                          container_type=child_node.variable.container))

    @ast_visitor.node_handler(ast.Predicate)
    def handle_predicate(self, child_node):
        """Stores the predicate in the list of predicates."""
        self.predicates.append(child_node)
        return self.CONTINUE(child_node)

    @ast_visitor.node_handler(ast.SiblingOperator, ast.CornerOperator,
                              ast.DominanceOperator, ast.PrecedenceOperator,
                              ast.SecEdgeOperator)
    def constraint_op(self, child_node):
        """Stores the constraint in the list of constraints."""
        self.constraints.append(child_node)
        return self.CONTINUE(child_node)

    def setup(self, query_ast):
        """Creates the collections for the internal representation of the query."""
        self.predicates = []
        self.node_defs = {}
        self.node_vars = {}
        self.constraints = []

    def _get_variable(self, variable):
        """Returns a node variable object associated with the AST fragment `variable`.
        
        If `variable` is seen the first time, a new node variable is created using
        `NodeVariable.from_node`.
        """
        try:
            return self.node_vars[variable.name]
        except KeyError:
            node_variable = self.node_vars[
                variable.name] = NodeVariable.from_node(variable)
            self.node_defs[node_variable] = ast.NodeDescription(ast.Nop())
            return node_variable

    def _process_predicates(self, predicates):
        """Creates the predicate objects.
        
        The predicate objects are created from the AST nodes using the `predicate_factory`.
        """
        for pred_ast_node in self.predicates:
            ast_var, predicate = self.predicate_factory.create(pred_ast_node)
            predicates[self._get_variable(ast_var)].append(predicate)

    def _process_constraints(self, predicates):
        """Creates the constraint objects.
        
        The constraints are created from the AST representations using the `constraint_factory`.
        """
        result = []
        for constraint_ast_node in self.constraints:
            left_var = self._get_variable(
                constraint_ast_node.left_operand.variable)
            right_var = self._get_variable(
                constraint_ast_node.right_operand.variable)

            constraint = self.constraint_factory.create(
                constraint_ast_node, (left_var.var_type, right_var.var_type),
                self._ev_context)

            result.append(((left_var, right_var), constraint))

            for node_var, var_type in zip(
                (left_var, right_var), constraint.get_node_variable_types()):
                node_var.refine_type(var_type)

        for (left_var, right_var), constraint in result:
            left_p, right_p = constraint.get_predicates(left_var, right_var)
            predicates[left_var].extend(left_p)
            predicates[right_var].extend(right_p)

        return result

    def _add_type_predicates(self, predicates):
        """Adds type predicates to the predicate lists if necessary.
        
        A type predicate is only added for a node variable if all of the following conditions
        are true:
         * the node description is empty
         * no predicates are defined for the variable
         * the variable type is not `NodeType.UNKNOWN`
        
        This mechanism is different from handling of feature records. The type predicate
        is added to each disjunct, while the feature record can differ between each disjunct.
        """
        for node_variable, description in self.node_defs.iteritems():
            if description.expression.TYPE is ast.Nop and len(predicates[node_variable]) == 0 \
                and node_variable.var_type is not NodeType.UNKNOWN:
                predicates[node_variable].append(
                    NodeTypePredicate(node_variable.var_type))

    def from_ast(self, query_ast):
        """Convert a query AST into a result builder object.
        
        Query ASTs are in the same state as returned by the parser.
        
        The result builder class is injected using the `get_result_builder_class`
        on the evaluator context.
        """
        return self.run(query_ast)

    def result(self, query_ast):
        """Processes the collected items and returns the query object."""
        predicates = defaultdict(list)

        for node_variable, node_desc in self.node_defs.iteritems():
            self.nodedesc_normalizer.run(node_desc)
            node_var_type, has_frec = self._ntyper.run(node_desc,
                                                       node_variable)
            node_variable.refine_type(node_var_type)

            if has_frec:
                predicates[node_variable].append(
                    NodeTypePredicate(node_var_type))

        self._process_predicates(predicates)
        constraints = self._process_constraints(predicates)
        self._add_type_predicates(predicates)

        return self._ev_context.get_result_builder_class(len(constraints) > 0)(
            self._ev_context, self.node_defs, predicates, constraints)
Beispiel #4
0
class QueryFactory(ast_visitor.AstVisitor):
    """Creates the internal representation from a query AST.
    
    A query AST is split into three parts:
     * node descriptions: a dictionary of `varname: AST`
     * predicates: a dictionary with `varname: nltk_contrib.tiger.query.predicates.Predicate` entries
     * constraints: a list of ((left, right), nltk_contrib.tiger.query.constraints.Constraint)` tuples
    
    These collections will be used to instantiate a `Query` object.
    Anonymous node descriptions will be wrapped into a variable definition with an 
    automatically generated, globally unique variable name.
    """
    get_anon_nodevar = (":anon:%i" % (c, ) for c in count()).next
    constraint_factory = ConstraintFactory()
    predicate_factory = PredicateFactory()
    
    def __init__(self, ev_context):
        super(self.__class__, self).__init__()
        self.nodedesc_normalizer = NodeDescriptionNormalizer(ev_context.corpus_info.feature_types)
        self._ev_context = ev_context
        for cls in self.constraint_factory:
            cls.setup_context(self._ev_context)
        self._ntyper = NodeTypeInferencer(ev_context.corpus_info.feature_types)
        
    @ast_visitor.node_handler(ast.NodeDescription)
    def handle_node_description(self, child_node):
        """Replaces an anonymous node description with a reference to a fresh node variable.
        
        The node description is stored for later reference.
        """
        variable = NodeVariable(self.get_anon_nodevar(), False)
        self.node_defs[variable] = child_node
        self.node_vars[variable.name] = variable
        return self.REPLACE(create_varref(variable.name))
    
    @ast_visitor.node_handler(ast.VariableDefinition)
    def handle_node_variable_def(self, child_node):
        """Replaces a node variable definition with a reference, and stores it.
        
        If the variable has already been defined, the node descriptions are merged.
        """
        assert child_node.variable.type == ast.VariableTypes.NodeIdentifier
        node_variable = NodeVariable.from_node(child_node.variable)
        self.node_vars[child_node.variable.name] = node_variable
        
        if node_variable in self.node_defs:
            self.node_defs[node_variable] = ast.NodeDescription(
                ast.Conjunction([self.node_defs[node_variable].expression, 
                                 child_node.expression.expression]))
        else:
            self.node_defs[node_variable] = child_node.expression
        
        return self.REPLACE(create_varref(child_node.variable.name, 
                                          container_type = child_node.variable.container))
    
    @ast_visitor.node_handler(ast.Predicate)
    def handle_predicate(self, child_node):
        """Stores the predicate in the list of predicates."""
        self.predicates.append(child_node)
        return self.CONTINUE(child_node)
    
    @ast_visitor.node_handler(ast.SiblingOperator, 
               ast.CornerOperator,
               ast.DominanceOperator,
               ast.PrecedenceOperator,
               ast.SecEdgeOperator)
    def constraint_op(self, child_node):
        """Stores the constraint in the list of constraints."""
        self.constraints.append(child_node)
        return self.CONTINUE(child_node)
    
    def setup(self, query_ast):
        """Creates the collections for the internal representation of the query."""
        self.predicates = []
        self.node_defs = {}
        self.node_vars = {}
        self.constraints = []

    def _get_variable(self, variable):
        """Returns a node variable object associated with the AST fragment `variable`.
        
        If `variable` is seen the first time, a new node variable is created using
        `NodeVariable.from_node`.
        """
        try:
            return self.node_vars[variable.name]
        except KeyError:
            node_variable = self.node_vars[variable.name] = NodeVariable.from_node(variable)
            self.node_defs[node_variable] = ast.NodeDescription(ast.Nop())
            return node_variable
        
    def _process_predicates(self, predicates):
        """Creates the predicate objects.
        
        The predicate objects are created from the AST nodes using the `predicate_factory`.
        """
        for pred_ast_node in self.predicates:
            ast_var, predicate = self.predicate_factory.create(pred_ast_node)
            predicates[self._get_variable(ast_var)].append(predicate)
    
    def _process_constraints(self, predicates):
        """Creates the constraint objects.
        
        The constraints are created from the AST representations using the `constraint_factory`.
        """
        result = []
        for constraint_ast_node in self.constraints:
            left_var = self._get_variable(constraint_ast_node.left_operand.variable)
            right_var = self._get_variable(constraint_ast_node.right_operand.variable)
            
            constraint = self.constraint_factory.create(
                constraint_ast_node, (left_var.var_type, right_var.var_type), self._ev_context)
            
            result.append(((left_var, right_var), constraint))
            
            for node_var, var_type in zip((left_var, right_var), 
                                          constraint.get_node_variable_types()):
                node_var.refine_type(var_type)
        
        for (left_var, right_var), constraint in result:
            left_p, right_p = constraint.get_predicates(left_var, right_var)
            predicates[left_var].extend(left_p)
            predicates[right_var].extend(right_p)
            
        return result

    def _add_type_predicates(self, predicates):
        """Adds type predicates to the predicate lists if necessary.
        
        A type predicate is only added for a node variable if all of the following conditions
        are true:
         * the node description is empty
         * no predicates are defined for the variable
         * the variable type is not `NodeType.UNKNOWN`
        
        This mechanism is different from handling of feature records. The type predicate
        is added to each disjunct, while the feature record can differ between each disjunct.
        """
        for node_variable, description in self.node_defs.iteritems():
            if description.expression.TYPE is ast.Nop and len(predicates[node_variable]) == 0 \
                and node_variable.var_type is not NodeType.UNKNOWN:
                predicates[node_variable].append(NodeTypePredicate(node_variable.var_type))

    def from_ast(self, query_ast):
        """Convert a query AST into a result builder object.
        
        Query ASTs are in the same state as returned by the parser.
        
        The result builder class is injected using the `get_result_builder_class`
        on the evaluator context.
        """
        return self.run(query_ast)
    
    def result(self, query_ast):
        """Processes the collected items and returns the query object."""        
        predicates = defaultdict(list)
        
        for node_variable, node_desc in self.node_defs.iteritems():
            self.nodedesc_normalizer.run(node_desc)
            node_var_type, has_frec = self._ntyper.run(node_desc, node_variable)
            node_variable.refine_type(node_var_type)

            if has_frec:
                predicates[node_variable].append(NodeTypePredicate(node_var_type))
                
        
        self._process_predicates(predicates)
        constraints = self._process_constraints(predicates)
        self._add_type_predicates(predicates)

        return self._ev_context.get_result_builder_class(len(constraints) > 0)(
            self._ev_context, self.node_defs, predicates, constraints)