Exemple #1
0
    def _solve_equivalence(self, expr, binding, literal):
        literal_value = literal.value
        if isinstance(literal_value, entity_id.Identity):
            results = set()
            for index in literal_value.indices:
                results |= self._solve_equivalence(expr, binding,
                                                   expression.Literal(index))

            return results

        table = self.lookup_tables.get(binding.value, None)
        if table:
            # Sweet, we have exact index for this.
            return self._as_entities(table.table.get(literal_value, set()))

        # Don't have an exact index, but can prefilter by component index.
        component, _ = binding.value.split("/", 1)
        slow_matcher = matcher.QueryMatcher(self._subquery(expr))
        entities = set()
        candidates = self.lookup_tables["components"].table.get(component, [])
        for identity in candidates:
            entity = self.entities[identity.first_index]
            if slow_matcher.match(entity):
                entities.add(entity)

        return entities
Exemple #2
0
    def _slow_solve(self, expr, seed):
        slow_matcher = matcher.QueryMatcher(self._subquery(expr))
        entities = set()
        for entity in seed:
            if slow_matcher.match(entity):
                entities.add(entity)

        return entities
Exemple #3
0
    def collect_for(self, wanted, use_hint=False, result_stream_handler=None):
        """Will find and run the appropriate collectors to satisfy the query.

        If use_hint is set to True, 'wanted' will be passed on as hint to
        the collectors. This may result in faster collection, but may result
        in collectors having to run repeatedly.
        """
        # Planning stage.

        if callable(result_stream_handler):
            wanted_matcher = query_matcher.QueryMatcher(wanted)
        else:
            wanted_matcher = None

        self.update_collectors()

        # to_process is used as a FIFO queue below.
        analysis = self.analyze(wanted)
        to_process = analysis["collectors"][:]
        suggested_indices = analysis["lookups"]

        # Create indices as suggested by the analyzer.
        for attribute in suggested_indices:
            self.add_attribute_lookup(attribute)

        collectors_seen = set(self.finished_collectors)

        # Collectors with an ingest query are de-facto parsers for things
        # produced by collectors with no ingest query. They may run repeatedly
        # as required.
        repeated = list()

        # Collectors with no dependencies (my favorite).
        simple = list()

        # Queries that collectors depend on.
        queries = set()

        # Build up a list of collectors to run, based on dependencies.
        while to_process:
            collector = to_process.pop(0)
            if collector.name in collectors_seen:
                continue

            collectors_seen.add(collector.name)
            if collector.collect_queries:
                logging.debug("Collector %s deferred until stage 2.",
                              collector.name)
                repeated.append(collector)
                queries |= set(collector.collect_queries.itervalues())

                # Discard the indexing suggestions for ingestion queries
                # because they don't represent normal usage.
                additional = set()
                for query in collector.collect_queries.itervalues():
                    additional |= set(self.analyze(query)["collectors"])

                for dependency in additional:
                    logging.debug("Collector %s depends on collector %s.",
                                  collector.name, dependency.name)
                    if dependency.name not in collectors_seen:
                        to_process.append(dependency)
            else:
                logging.debug("%s will run in stage 1.", collector.name)
                simple.append(collector)

        if not collectors_seen.difference(self.finished_collectors):
            # Looks like we're already populated - no need to do anything.
            return

        logging.info(
            "Will now run %d first-order collectors and %d collectors with "
            "dependencies to satisfy query %s.", len(simple), len(repeated),
            wanted)

        # Execution stage 1: no dependencies.

        for collector in simple:
            effects = {
                entity_collector.EffectEnum.Duplicate: 0,
                entity_collector.EffectEnum.Merged: 0,
                entity_collector.EffectEnum.Added: 0
            }

            if use_hint or collector.enforce_hint:
                hint = wanted
            else:
                hint = None
                self.finished_collectors.add(collector.name)

            for entity, effect in self.collect(collector, hint=hint):
                if result_stream_handler and wanted_matcher.match(entity):
                    result_stream_handler(entity)

                effects[effect] += 1

            logging.debug(
                "%s produced %d new entities, %d updated and %d duplicates",
                collector.name, effects[entity_collector.EffectEnum.Added],
                effects[entity_collector.EffectEnum.Merged],
                effects[entity_collector.EffectEnum.Duplicate])

        if not repeated:
            # No higher-order collectors scheduled. We're done.
            return

        # Seeding stage for higher-order collectors.
        in_pipeline = IngestionPipeline(queries=queries)
        out_pipeline = IngestionPipeline(queries=queries)
        for query in queries:
            results = self.find(query, complete=False)
            in_pipeline.seed(query, results)
            if results:
                logging.debug("Pipeline seeded with %d entities matching '%s'",
                              len(results), query)

        # Execution stage 2: collectors with dependencies.

        # Collectors should run in FIFO order:
        repeated.reverse()

        counter = 0
        # This will spin until none of the remaining collectors want to run.
        while not in_pipeline.empty:
            # TODO (adamsh):
            # There is a better way to detect faulty collector output and
            # infinite loops, but this counter will do for now.
            if counter > 100:
                raise RuntimeError(
                    ("Entity manager exceeded 100 iterations during "
                     "higher-order collector resolution. You most likely "
                     "have a faulty collector."))

            # Collectors will read from the in_pipeline and fill the
            # out_pipeline. At the end of each spin the pipelines swap and
            # the new out_pipeline is flushed.
            for collector in repeated:
                # If the collector wants complete input, we pull it from the
                # database. If it just wants one entity at a time, we can use
                # the ingestion pipeline. The semantics of both find methods
                # are identical.
                if collector.complete_input:
                    collector_input = self.find(collector.collect_queries,
                                                complete=False)
                else:
                    collector_input = in_pipeline.find(
                        collector.collect_queries)

                # The collector requests its prefilter to be called.
                if collector.filter_input:
                    collector_input_filtered = {}
                    for key, val in collector_input.iteritems():
                        collector_input_filtered[key] = collector.input_filter(
                            hint=hint, entities=val)
                    collector_input = collector_input_filtered

                # The collector requests that we always pass the query hint.
                if use_hint or collector.enforce_hint:
                    hint = wanted
                else:
                    hint = None

                # Feed output back into the pipeline.
                results = self.collect(collector=collector,
                                       collector_input=collector_input,
                                       hint=hint)

                out_pipeline.fill(collector=collector,
                                  ingest=results,
                                  wanted_handler=result_stream_handler,
                                  wanted_matcher=wanted_matcher)

            # Swap & flush, rinse & repeat.
            in_pipeline, out_pipeline = out_pipeline, in_pipeline
            out_pipeline.flush()

        for collector in repeated:
            if not use_hint and not collector.enforce_hint:
                self.finished_collectors.add(collector.name)
Exemple #4
0
 def __init__(self, queries):
     self.queues = {}
     self.matchers = {}
     for query in queries:
         self.queues[query] = []
         self.matchers[query] = query_matcher.QueryMatcher(query)
Exemple #5
0
    def matcher_for(self, query):
        """Returns a query matcher for the query (cached)."""
        matcher = self._cached_matchers.setdefault(
            query, query_matcher.QueryMatcher(query))

        return matcher