class QueryFactory(ast_visitor.AstVisitor): """Creates the internal representation from a query AST. A query AST is split into three parts: * node descriptions: a dictionary of `varname: AST` * predicates: a dictionary with `varname: nltk_contrib.tiger.query.predicates.Predicate` entries * constraints: a list of ((left, right), nltk_contrib.tiger.query.constraints.Constraint)` tuples These collections will be used to instantiate a `Query` object. Anonymous node descriptions will be wrapped into a variable definition with an automatically generated, globally unique variable name. """ get_anon_nodevar = (":anon:%i" % (c, ) for c in count()).next constraint_factory = ConstraintFactory() predicate_factory = PredicateFactory() def __init__(self, ev_context): super(self.__class__, self).__init__() self.nodedesc_normalizer = NodeDescriptionNormalizer( ev_context.corpus_info.feature_types) self._ev_context = ev_context for cls in self.constraint_factory: cls.setup_context(self._ev_context) self._ntyper = NodeTypeInferencer(ev_context.corpus_info.feature_types) @ast_visitor.node_handler(ast.NodeDescription) def handle_node_description(self, child_node): """Replaces an anonymous node description with a reference to a fresh node variable. The node description is stored for later reference. """ variable = NodeVariable(self.get_anon_nodevar(), False) self.node_defs[variable] = child_node self.node_vars[variable.name] = variable return self.REPLACE(create_varref(variable.name)) @ast_visitor.node_handler(ast.VariableDefinition) def handle_node_variable_def(self, child_node): """Replaces a node variable definition with a reference, and stores it. If the variable has already been defined, the node descriptions are merged. """ assert child_node.variable.type == ast.VariableTypes.NodeIdentifier node_variable = NodeVariable.from_node(child_node.variable) self.node_vars[child_node.variable.name] = node_variable if node_variable in self.node_defs: self.node_defs[node_variable] = ast.NodeDescription( ast.Conjunction([ self.node_defs[node_variable].expression, child_node.expression.expression ])) else: self.node_defs[node_variable] = child_node.expression return self.REPLACE( create_varref(child_node.variable.name, container_type=child_node.variable.container)) @ast_visitor.node_handler(ast.Predicate) def handle_predicate(self, child_node): """Stores the predicate in the list of predicates.""" self.predicates.append(child_node) return self.CONTINUE(child_node) @ast_visitor.node_handler(ast.SiblingOperator, ast.CornerOperator, ast.DominanceOperator, ast.PrecedenceOperator, ast.SecEdgeOperator) def constraint_op(self, child_node): """Stores the constraint in the list of constraints.""" self.constraints.append(child_node) return self.CONTINUE(child_node) def setup(self, query_ast): """Creates the collections for the internal representation of the query.""" self.predicates = [] self.node_defs = {} self.node_vars = {} self.constraints = [] def _get_variable(self, variable): """Returns a node variable object associated with the AST fragment `variable`. If `variable` is seen the first time, a new node variable is created using `NodeVariable.from_node`. """ try: return self.node_vars[variable.name] except KeyError: node_variable = self.node_vars[ variable.name] = NodeVariable.from_node(variable) self.node_defs[node_variable] = ast.NodeDescription(ast.Nop()) return node_variable def _process_predicates(self, predicates): """Creates the predicate objects. The predicate objects are created from the AST nodes using the `predicate_factory`. """ for pred_ast_node in self.predicates: ast_var, predicate = self.predicate_factory.create(pred_ast_node) predicates[self._get_variable(ast_var)].append(predicate) def _process_constraints(self, predicates): """Creates the constraint objects. The constraints are created from the AST representations using the `constraint_factory`. """ result = [] for constraint_ast_node in self.constraints: left_var = self._get_variable( constraint_ast_node.left_operand.variable) right_var = self._get_variable( constraint_ast_node.right_operand.variable) constraint = self.constraint_factory.create( constraint_ast_node, (left_var.var_type, right_var.var_type), self._ev_context) result.append(((left_var, right_var), constraint)) for node_var, var_type in zip( (left_var, right_var), constraint.get_node_variable_types()): node_var.refine_type(var_type) for (left_var, right_var), constraint in result: left_p, right_p = constraint.get_predicates(left_var, right_var) predicates[left_var].extend(left_p) predicates[right_var].extend(right_p) return result def _add_type_predicates(self, predicates): """Adds type predicates to the predicate lists if necessary. A type predicate is only added for a node variable if all of the following conditions are true: * the node description is empty * no predicates are defined for the variable * the variable type is not `NodeType.UNKNOWN` This mechanism is different from handling of feature records. The type predicate is added to each disjunct, while the feature record can differ between each disjunct. """ for node_variable, description in self.node_defs.iteritems(): if description.expression.TYPE is ast.Nop and len(predicates[node_variable]) == 0 \ and node_variable.var_type is not NodeType.UNKNOWN: predicates[node_variable].append( NodeTypePredicate(node_variable.var_type)) def from_ast(self, query_ast): """Convert a query AST into a result builder object. Query ASTs are in the same state as returned by the parser. The result builder class is injected using the `get_result_builder_class` on the evaluator context. """ return self.run(query_ast) def result(self, query_ast): """Processes the collected items and returns the query object.""" predicates = defaultdict(list) for node_variable, node_desc in self.node_defs.iteritems(): self.nodedesc_normalizer.run(node_desc) node_var_type, has_frec = self._ntyper.run(node_desc, node_variable) node_variable.refine_type(node_var_type) if has_frec: predicates[node_variable].append( NodeTypePredicate(node_var_type)) self._process_predicates(predicates) constraints = self._process_constraints(predicates) self._add_type_predicates(predicates) return self._ev_context.get_result_builder_class(len(constraints) > 0)( self._ev_context, self.node_defs, predicates, constraints)
class QueryFactory(ast_visitor.AstVisitor): """Creates the internal representation from a query AST. A query AST is split into three parts: * node descriptions: a dictionary of `varname: AST` * predicates: a dictionary with `varname: nltk_contrib.tiger.query.predicates.Predicate` entries * constraints: a list of ((left, right), nltk_contrib.tiger.query.constraints.Constraint)` tuples These collections will be used to instantiate a `Query` object. Anonymous node descriptions will be wrapped into a variable definition with an automatically generated, globally unique variable name. """ get_anon_nodevar = (":anon:%i" % (c, ) for c in count()).next constraint_factory = ConstraintFactory() predicate_factory = PredicateFactory() def __init__(self, ev_context): super(self.__class__, self).__init__() self.nodedesc_normalizer = NodeDescriptionNormalizer(ev_context.corpus_info.feature_types) self._ev_context = ev_context for cls in self.constraint_factory: cls.setup_context(self._ev_context) self._ntyper = NodeTypeInferencer(ev_context.corpus_info.feature_types) @ast_visitor.node_handler(ast.NodeDescription) def handle_node_description(self, child_node): """Replaces an anonymous node description with a reference to a fresh node variable. The node description is stored for later reference. """ variable = NodeVariable(self.get_anon_nodevar(), False) self.node_defs[variable] = child_node self.node_vars[variable.name] = variable return self.REPLACE(create_varref(variable.name)) @ast_visitor.node_handler(ast.VariableDefinition) def handle_node_variable_def(self, child_node): """Replaces a node variable definition with a reference, and stores it. If the variable has already been defined, the node descriptions are merged. """ assert child_node.variable.type == ast.VariableTypes.NodeIdentifier node_variable = NodeVariable.from_node(child_node.variable) self.node_vars[child_node.variable.name] = node_variable if node_variable in self.node_defs: self.node_defs[node_variable] = ast.NodeDescription( ast.Conjunction([self.node_defs[node_variable].expression, child_node.expression.expression])) else: self.node_defs[node_variable] = child_node.expression return self.REPLACE(create_varref(child_node.variable.name, container_type = child_node.variable.container)) @ast_visitor.node_handler(ast.Predicate) def handle_predicate(self, child_node): """Stores the predicate in the list of predicates.""" self.predicates.append(child_node) return self.CONTINUE(child_node) @ast_visitor.node_handler(ast.SiblingOperator, ast.CornerOperator, ast.DominanceOperator, ast.PrecedenceOperator, ast.SecEdgeOperator) def constraint_op(self, child_node): """Stores the constraint in the list of constraints.""" self.constraints.append(child_node) return self.CONTINUE(child_node) def setup(self, query_ast): """Creates the collections for the internal representation of the query.""" self.predicates = [] self.node_defs = {} self.node_vars = {} self.constraints = [] def _get_variable(self, variable): """Returns a node variable object associated with the AST fragment `variable`. If `variable` is seen the first time, a new node variable is created using `NodeVariable.from_node`. """ try: return self.node_vars[variable.name] except KeyError: node_variable = self.node_vars[variable.name] = NodeVariable.from_node(variable) self.node_defs[node_variable] = ast.NodeDescription(ast.Nop()) return node_variable def _process_predicates(self, predicates): """Creates the predicate objects. The predicate objects are created from the AST nodes using the `predicate_factory`. """ for pred_ast_node in self.predicates: ast_var, predicate = self.predicate_factory.create(pred_ast_node) predicates[self._get_variable(ast_var)].append(predicate) def _process_constraints(self, predicates): """Creates the constraint objects. The constraints are created from the AST representations using the `constraint_factory`. """ result = [] for constraint_ast_node in self.constraints: left_var = self._get_variable(constraint_ast_node.left_operand.variable) right_var = self._get_variable(constraint_ast_node.right_operand.variable) constraint = self.constraint_factory.create( constraint_ast_node, (left_var.var_type, right_var.var_type), self._ev_context) result.append(((left_var, right_var), constraint)) for node_var, var_type in zip((left_var, right_var), constraint.get_node_variable_types()): node_var.refine_type(var_type) for (left_var, right_var), constraint in result: left_p, right_p = constraint.get_predicates(left_var, right_var) predicates[left_var].extend(left_p) predicates[right_var].extend(right_p) return result def _add_type_predicates(self, predicates): """Adds type predicates to the predicate lists if necessary. A type predicate is only added for a node variable if all of the following conditions are true: * the node description is empty * no predicates are defined for the variable * the variable type is not `NodeType.UNKNOWN` This mechanism is different from handling of feature records. The type predicate is added to each disjunct, while the feature record can differ between each disjunct. """ for node_variable, description in self.node_defs.iteritems(): if description.expression.TYPE is ast.Nop and len(predicates[node_variable]) == 0 \ and node_variable.var_type is not NodeType.UNKNOWN: predicates[node_variable].append(NodeTypePredicate(node_variable.var_type)) def from_ast(self, query_ast): """Convert a query AST into a result builder object. Query ASTs are in the same state as returned by the parser. The result builder class is injected using the `get_result_builder_class` on the evaluator context. """ return self.run(query_ast) def result(self, query_ast): """Processes the collected items and returns the query object.""" predicates = defaultdict(list) for node_variable, node_desc in self.node_defs.iteritems(): self.nodedesc_normalizer.run(node_desc) node_var_type, has_frec = self._ntyper.run(node_desc, node_variable) node_variable.refine_type(node_var_type) if has_frec: predicates[node_variable].append(NodeTypePredicate(node_var_type)) self._process_predicates(predicates) constraints = self._process_constraints(predicates) self._add_type_predicates(predicates) return self._ev_context.get_result_builder_class(len(constraints) > 0)( self._ev_context, self.node_defs, predicates, constraints)