def validate_fact(indices: List[str], query: dict, fact: str): """ Check if given fact exists in the selected indices. """ ag = ElasticAggregator(indices=indices, query=deepcopy(query)) fact_values = ag.get_fact_values_distribution( fact, fact_name_size=choices.DEFAULT_MAX_FACT_AGGREGATION_SIZE) if not fact_values: raise ValidationError( f"Fact '{fact}' not present in any of the selected indices ({indices})." ) return True
def _get_max_class_size(self) -> int: """Aggregates over values of the selected fact and returns the size of the largest class.""" max_class_size = 0 fact_name = self._get_fact_name() try: query = json.loads(self.tagger_object.query) except: query = self.tagger_object.query if fact_name: es_aggregator = ElasticAggregator(indices=self.indices, query=query) facts = es_aggregator.get_fact_values_distribution( fact_name=fact_name, fact_name_size=10, fact_value_size=10) logging.getLogger(INFO_LOGGER).info(f"Class frequencies: {facts}") max_class_size = max(facts.values()) return max_class_size