def do_multisearch(body): """ :param body: body of the multisearch :return: the result of the multisearch """ cache_key = get_multisearch_cache_key(body) app_logging.debug(f'cache_key: {cache_key}') start_time = time.time() cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: end_time = time.time() time_taken = end_time - start_time app_logging.debug(f'results were cached') record_that_response_was_cached('multisearch', {'query': body}, time_taken) return cache_response app_logging.debug(f'results were not cached') start_time = time.time() result = ES.msearch(body=body) end_time = time.time() time_taken = end_time - start_time record_that_response_not_cached('multisearch', {'query': body}, time_taken) seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds') cache.fail_proof_set(key=cache_key, value=result, timeout=seconds_valid) return result
def get_config_for_prop(self, index_name, prop_id): """ :param index_name: name of the index to which the property belongs :param prop_id: full path of the property, such as '_metadata.assay_data.assay_subcellular_fraction' :return: a dict describing the configuration of a property """ cache_key = f'config_for_{index_name}-{prop_id}' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') app_logging.debug( f'getting property config for {prop_id} of index {index_name}') es_property_description = self.get_property_base_es_description( index_name, prop_id) property_override_description = self.get_property_base_override_description( index_name, prop_id) config = self.get_merged_prop_config(index_name, prop_id, es_property_description, property_override_description) seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds') cache.fail_proof_set(key=cache_key, value=config, timeout=seconds_valid) return config
def get_list_of_configured_properties(self, index_name): """ :param index_name: the index to check :return: a list of all the configured properties among all the groups """ cache_key = f'facets_configured_properties_for_{index_name}' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') with open(self.facets_groups_file_path, 'rt') as groups_file: groups_config = yaml.load(groups_file, Loader=yaml.FullLoader) index_groups = groups_config.get(index_name) if index_groups is None: raise self.FacetsGroupsConfigurationManagerError( f'The index {index_name} does not have a configuration set up!' ) properties_identified = set() for subgroup in index_groups.values(): for properties_list in subgroup.values(): properties_identified.update(properties_list) seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds') cache.fail_proof_set(key=cache_key, value=properties_identified, timeout=seconds_valid) return list(properties_identified)
def parse_search(search_term, es_indexes, selected_es_index): """ :param search_term: Term to parse :param es_indexes: indexes in which the search will be done, separated by a comma :param selected_es_index: es index to focus on :return: the query to send to elasticsearch based on the search term provided """ cache_key = f'{search_term}-{es_indexes}-{selected_es_index}' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') parsed_query = parse_query_str(search_term) indexes_list = es_indexes.split(',') best_queries, sorted_indexes_by_score = QueryBuilder.get_best_es_query( parsed_query, indexes_list, selected_es_index) response_dict = { 'parsed_query': parsed_query, 'best_es_base_queries': best_queries, 'sorted_indexes_by_score': sorted_indexes_by_score } seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds') cache.fail_proof_set(key=cache_key, value=response_dict, timeout=seconds_valid) return response_dict
def get_facets_config_for_group(self, index_name, group_name): """ :param index_name: name of the index to which the group belongs :param group_name: name of the facets group :return: the configuration for the facets group """ cache_key = f'facets_config_for_group_{index_name}-{group_name}_3' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') with open(self.facets_groups_file_path, 'rt') as groups_file: groups_config = yaml.load(groups_file, Loader=yaml.FullLoader) index_groups = groups_config.get(index_name) if index_groups is None: raise self.FacetsGroupsConfigurationManagerError( f'The index {index_name} does not have a configuration set up!' ) group_config = index_groups.get(group_name) if group_config is None: raise self.FacetsGroupsConfigurationManagerError( f'The group {group_name} does not exist in index {index_name}!' ) default_properties = group_config.get('default', {}) optional_properties = group_config.get('optional', {}) config = { 'properties': { 'default': self.get_facets_config_for_properties(default_properties, index_name), 'optional': self.get_facets_config_for_properties(optional_properties, index_name) } } seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds') cache.fail_proof_set(key=cache_key, value=config, timeout=seconds_valid) return config
def get_config_for_group(self, index_name, group_name): """ :param index_name: name of the index :param group_name: group name as defined in the groups file :return: the configuration of the group with the following structure: { "properties": { "default": [...], # properties to show by default "optional:" [...] # properties to show as optional for the user } } """ cache_key = f'config_for_group_{index_name}-{group_name}' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') with open(self.groups_file_path, 'rt') as groups_file: groups_config = yaml.load(groups_file, Loader=yaml.FullLoader) index_groups = groups_config.get(index_name, {}) group_config = index_groups.get(group_name) if group_config is None: raise self.GroupsConfigurationManagerError( f'The group {group_name} does not exist in index {index_name}!' ) props_configs = {} for sub_group, props_list in group_config.items(): props_configs[sub_group] = self.get_config_for_props_list( index_name, props_list) config = {'properties': props_configs} seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds') cache.fail_proof_set(key=cache_key, value=config, timeout=seconds_valid) return config
def get_es_response(index_name, es_query, ignore_cache=False): """"" :param index_name: name of the index to query against :param es_query: dict with the query to send :param ignore_cache: determines if cache must be ignored or not :return: the dict with the response from es """ cache_key = get_es_query_cache_key(index_name, es_query) app_logging.debug(f'cache_key: {cache_key}') start_time = time.time() if not ignore_cache: cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: end_time = time.time() time_taken = end_time - start_time app_logging.debug(f'results were cached') record_that_response_was_cached(index_name, es_query, time_taken) return cache_response app_logging.debug(f'results were not cached') try: start_time = time.time() response = ES.search(index=index_name, body=es_query) end_time = time.time() time_taken = end_time - start_time record_that_response_not_cached(index_name, es_query, time_taken) except elasticsearch.exceptions.RequestError as error: app_logging.error(f'This query caused an error: ') app_logging.error(f'index_name:{index_name}') app_logging.error(f'es_query:') app_logging.error(es_query) raise error seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds') if not ignore_cache: cache.fail_proof_set(key=cache_key, value=response, timeout=seconds_valid) return response
def get_es_doc(index_name, doc_id): """ :param index_name: name of the intex to which the document belongs :param doc_id: id of the document :return: the dict with the response from es corresponding to the document """ cache_key = f'document-{doc_id}' app_logging.debug(f'cache_key: {cache_key}') equivalent_query = { "query": { "ids": { "values": doc_id } } } start_time = time.time() cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: end_time = time.time() time_taken = end_time - start_time app_logging.debug(f'results were cached') record_that_response_was_cached(index_name, equivalent_query, time_taken) return cache_response app_logging.debug(f'results were not cached') try: start_time = time.time() response = ES.get(index=index_name, id=doc_id) end_time = time.time() time_taken = end_time - start_time record_that_response_not_cached(index_name, equivalent_query, time_taken) except elasticsearch.exceptions.NotFoundError as error: raise ESDataNotFoundError(repr(error)) seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds') cache.fail_proof_set(key=cache_key, value=response, timeout=seconds_valid) return response
def get_list_of_configured_properties(self, index_name): """ :param index_name: the index to check :return: a list of all the configured properties among all the groups """ cache_key = f'configured_properties_for_{index_name}' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug(f'results were cached') return cache_response app_logging.debug(f'results were not cached') with open(self.groups_file_path, 'rt') as groups_file: groups_config = yaml.load(groups_file, Loader=yaml.FullLoader) properties_identified = set() index_groups = groups_config.get(index_name, {}) if index_groups is None: raise self.GroupsConfigurationManagerError( f'The index {index_name} does not have a configuration set up!' ) for subgroup in index_groups.values(): for properties_list in subgroup.values(): for property_id in properties_list: property_config = self.property_configuration_manager.get_config_for_prop( index_name, property_id) is_virtual = property_config.get('is_virtual', False) # Do not include virtual properties if is_virtual: continue properties_identified.add(property_id) seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds') cache.fail_proof_set(key=cache_key, value=properties_identified, timeout=seconds_valid) return list(properties_identified)
def get_classification_tree(): """ :return: the go slim target classification tree """ cache_key = 'target_classifications_go_slim_1' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug('results are in cache') return cache_response tree_generator = GoSlimTreeGenerator() final_tree = tree_generator.get_classification_tree() cache_time = int(3.154e7) cache.fail_proof_set(key=cache_key, value=final_tree, timeout=cache_time) return final_tree
def load_context_index(context_id, id_properties_list, context): """ Loads an index based on the id property of the context, for fast access :param context_id: id of the context loaded :param id_properties_list: property used to identify each item :param context: context loaded :return: """ context_index_key = 'context_index-{}'.format(context_id) context_index = cache.fail_proof_get(context_index_key) if context_index is None: context_index = {} for index_number, item in enumerate(context): id_value = id_properties.get_id_value(id_properties_list, item) context_index[id_value] = item context_index[id_value]['index'] = index_number cache.fail_proof_set(context_index_key, context_index, 3600) return context_index
def get_classification_tree(): """ :return: the protein target classification tree """ cache_key = 'target_classifications_protein_class_1' app_logging.debug(f'cache_key: {cache_key}') cache_response = cache.fail_proof_get(key=cache_key) if cache_response is not None: app_logging.debug('results are in cache') return cache_response index_name = 'chembl_protein_class' es_query = { "aggs": { "children": { "terms": { "field": "l1", "size": 1000, "order": { "_count": "desc" } }, "aggs": { "children": { "terms": { "field": "l2", "size": 1000, "order": { "_count": "desc" } }, "aggs": { "children": { "terms": { "field": "l3", "size": 1000, "order": { "_count": "desc" } }, "aggs": { "children": { "terms": { "field": "l4", "size": 1000, "order": { "_count": "desc" } }, "aggs": { "children": { "terms": { "field": "l5", "size": 1000, "order": { "_count": "desc" } }, "aggs": { "children": { "terms": { "field": "l6", "size": 1000, "order": { "_count": "desc" } } } } } } } } } } } } } } } def generate_count_query(path_to_node): queries = [] level = 1 for node in path_to_node: queries.append('_metadata.protein_classification.l{level}:("{class_name}")'.format(level=level, class_name=node)) level += 1 return ' AND '.join(queries) tree_generator = TargetHierarchyTreeGenerator(index_name=index_name, es_query=es_query, query_generator=generate_count_query, count_index='chembl_target') final_tree = tree_generator.get_classification_tree() cache_time = int(3.154e7) cache.fail_proof_set(key=cache_key, value=final_tree, timeout=cache_time) return final_tree