Ejemplo n.º 1
0
def validate_fact(indices: List[str], query: dict, fact: str):
    """ Check if given fact exists in the selected indices. """
    ag = ElasticAggregator(indices=indices, query=deepcopy(query))
    fact_values = ag.get_fact_values_distribution(
        fact, fact_name_size=choices.DEFAULT_MAX_FACT_AGGREGATION_SIZE)
    if not fact_values:
        raise ValidationError(
            f"Fact '{fact}' not present in any of the selected indices ({indices})."
        )
    return True
Ejemplo n.º 2
0
    def _get_max_class_size(self) -> int:
        """Aggregates over values of the selected fact and returns the size of the largest class."""
        max_class_size = 0
        fact_name = self._get_fact_name()

        try:
            query = json.loads(self.tagger_object.query)
        except:
            query = self.tagger_object.query

        if fact_name:
            es_aggregator = ElasticAggregator(indices=self.indices,
                                              query=query)
            facts = es_aggregator.get_fact_values_distribution(
                fact_name=fact_name, fact_name_size=10, fact_value_size=10)
            logging.getLogger(INFO_LOGGER).info(f"Class frequencies: {facts}")
            max_class_size = max(facts.values())
        return max_class_size