Ejemplo n.º 1
0
def do_multisearch(body):
    """
    :param body: body of the multisearch
    :return: the result of the multisearch
    """
    cache_key = get_multisearch_cache_key(body)
    app_logging.debug(f'cache_key: {cache_key}')

    start_time = time.time()
    cache_response = cache.fail_proof_get(key=cache_key)
    if cache_response is not None:
        end_time = time.time()
        time_taken = end_time - start_time
        app_logging.debug(f'results were cached')
        record_that_response_was_cached('multisearch', {'query': body}, time_taken)
        return cache_response

    app_logging.debug(f'results were not cached')

    start_time = time.time()
    result = ES.msearch(body=body)
    end_time = time.time()
    time_taken = end_time - start_time

    record_that_response_not_cached('multisearch', {'query': body}, time_taken)

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
    cache.fail_proof_set(key=cache_key, value=result, timeout=seconds_valid)

    return result
    def get_config_for_prop(self, index_name, prop_id):
        """
        :param index_name: name of the index to which the property belongs
        :param prop_id: full path of the property, such as  '_metadata.assay_data.assay_subcellular_fraction'
        :return: a dict describing the configuration of a property
        """
        cache_key = f'config_for_{index_name}-{prop_id}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        app_logging.debug(
            f'getting property config for {prop_id} of index {index_name}')
        es_property_description = self.get_property_base_es_description(
            index_name, prop_id)
        property_override_description = self.get_property_base_override_description(
            index_name, prop_id)
        config = self.get_merged_prop_config(index_name, prop_id,
                                             es_property_description,
                                             property_override_description)

        seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=config,
                             timeout=seconds_valid)
        return config
Ejemplo n.º 3
0
    def get_list_of_configured_properties(self, index_name):
        """
        :param index_name: the index to check
        :return: a list of all the configured properties among all the groups
        """

        cache_key = f'facets_configured_properties_for_{index_name}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.facets_groups_file_path, 'rt') as groups_file:

            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)
            index_groups = groups_config.get(index_name)
            if index_groups is None:
                raise self.FacetsGroupsConfigurationManagerError(
                    f'The index {index_name} does not have a configuration set up!'
                )
            properties_identified = set()
            for subgroup in index_groups.values():
                for properties_list in subgroup.values():
                    properties_identified.update(properties_list)

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=properties_identified,
                             timeout=seconds_valid)
        return list(properties_identified)
def parse_search(search_term, es_indexes, selected_es_index):
    """
    :param search_term: Term to parse
    :param es_indexes: indexes in which the search will be done, separated by a comma
    :param selected_es_index: es index to focus on
    :return: the query to send to elasticsearch based on the search term provided
    """

    cache_key = f'{search_term}-{es_indexes}-{selected_es_index}'
    app_logging.debug(f'cache_key: {cache_key}')

    cache_response = cache.fail_proof_get(key=cache_key)
    if cache_response is not None:
        app_logging.debug(f'results were cached')
        return cache_response

    app_logging.debug(f'results were not cached')

    parsed_query = parse_query_str(search_term)
    indexes_list = es_indexes.split(',')
    best_queries, sorted_indexes_by_score = QueryBuilder.get_best_es_query(
        parsed_query, indexes_list, selected_es_index)

    response_dict = {
        'parsed_query': parsed_query,
        'best_es_base_queries': best_queries,
        'sorted_indexes_by_score': sorted_indexes_by_score
    }

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
    cache.fail_proof_set(key=cache_key,
                         value=response_dict,
                         timeout=seconds_valid)

    return response_dict
Ejemplo n.º 5
0
    def get_facets_config_for_group(self, index_name, group_name):
        """
        :param index_name: name of the index to which the group belongs
        :param group_name: name of the facets group
        :return: the configuration for the facets group
        """

        cache_key = f'facets_config_for_group_{index_name}-{group_name}_3'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.facets_groups_file_path, 'rt') as groups_file:

            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)

            index_groups = groups_config.get(index_name)
            if index_groups is None:
                raise self.FacetsGroupsConfigurationManagerError(
                    f'The index {index_name} does not have a configuration set up!'
                )

            group_config = index_groups.get(group_name)
            if group_config is None:
                raise self.FacetsGroupsConfigurationManagerError(
                    f'The group {group_name} does not exist in index {index_name}!'
                )

            default_properties = group_config.get('default', {})
            optional_properties = group_config.get('optional', {})

        config = {
            'properties': {
                'default':
                self.get_facets_config_for_properties(default_properties,
                                                      index_name),
                'optional':
                self.get_facets_config_for_properties(optional_properties,
                                                      index_name)
            }
        }

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=config,
                             timeout=seconds_valid)
        return config
    def get_config_for_group(self, index_name, group_name):
        """
        :param index_name: name of the index
        :param group_name: group name as defined in the groups file
        :return: the configuration of the group with the following structure:
        {
            "properties": {
                "default": [...], # properties to show by default
                "optional:" [...] # properties to show as optional for the user
            }
        }
        """

        cache_key = f'config_for_group_{index_name}-{group_name}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.groups_file_path, 'rt') as groups_file:

            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)

            index_groups = groups_config.get(index_name, {})
            group_config = index_groups.get(group_name)
            if group_config is None:
                raise self.GroupsConfigurationManagerError(
                    f'The group {group_name} does not exist in index {index_name}!'
                )

            props_configs = {}

            for sub_group, props_list in group_config.items():
                props_configs[sub_group] = self.get_config_for_props_list(
                    index_name, props_list)

            config = {'properties': props_configs}

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=config,
                             timeout=seconds_valid)

        return config
Ejemplo n.º 7
0
def get_es_response(index_name, es_query, ignore_cache=False):
    """""
    :param index_name: name of the index to query against
    :param es_query: dict with the query to send
    :param ignore_cache: determines if cache must be ignored or not
    :return: the dict with the response from es
    """

    cache_key = get_es_query_cache_key(index_name, es_query)
    app_logging.debug(f'cache_key: {cache_key}')

    start_time = time.time()

    if not ignore_cache:

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            end_time = time.time()
            time_taken = end_time - start_time
            app_logging.debug(f'results were cached')
            record_that_response_was_cached(index_name, es_query, time_taken)
            return cache_response

    app_logging.debug(f'results were not cached')

    try:

        start_time = time.time()
        response = ES.search(index=index_name, body=es_query)
        end_time = time.time()
        time_taken = end_time - start_time

        record_that_response_not_cached(index_name, es_query, time_taken)

    except elasticsearch.exceptions.RequestError as error:
        app_logging.error(f'This query caused an error: ')
        app_logging.error(f'index_name:{index_name}')
        app_logging.error(f'es_query:')
        app_logging.error(es_query)
        raise error

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')

    if not ignore_cache:
        cache.fail_proof_set(key=cache_key, value=response, timeout=seconds_valid)

    return response
Ejemplo n.º 8
0
def get_es_doc(index_name, doc_id):
    """
    :param index_name: name of the intex to which the document belongs
    :param doc_id: id of the document
    :return: the dict with the response from es corresponding to the document
    """

    cache_key = f'document-{doc_id}'
    app_logging.debug(f'cache_key: {cache_key}')

    equivalent_query = {
        "query": {
            "ids": {
                "values": doc_id
            }
        }
    }

    start_time = time.time()
    cache_response = cache.fail_proof_get(key=cache_key)
    if cache_response is not None:
        end_time = time.time()
        time_taken = end_time - start_time
        app_logging.debug(f'results were cached')
        record_that_response_was_cached(index_name, equivalent_query, time_taken)
        return cache_response

    app_logging.debug(f'results were not cached')

    try:
        start_time = time.time()
        response = ES.get(index=index_name, id=doc_id)
        end_time = time.time()
        time_taken = end_time - start_time

        record_that_response_not_cached(index_name, equivalent_query, time_taken)
    except elasticsearch.exceptions.NotFoundError as error:
        raise ESDataNotFoundError(repr(error))

    seconds_valid = RUN_CONFIG.get('es_proxy_cache_seconds')
    cache.fail_proof_set(key=cache_key, value=response, timeout=seconds_valid)

    return response
    def get_list_of_configured_properties(self, index_name):
        """
        :param index_name: the index to check
        :return: a list of all the configured properties among all the groups
        """
        cache_key = f'configured_properties_for_{index_name}'
        app_logging.debug(f'cache_key: {cache_key}')

        cache_response = cache.fail_proof_get(key=cache_key)
        if cache_response is not None:
            app_logging.debug(f'results were cached')
            return cache_response

        app_logging.debug(f'results were not cached')

        with open(self.groups_file_path, 'rt') as groups_file:
            groups_config = yaml.load(groups_file, Loader=yaml.FullLoader)

            properties_identified = set()
            index_groups = groups_config.get(index_name, {})
            if index_groups is None:
                raise self.GroupsConfigurationManagerError(
                    f'The index {index_name} does not have a configuration set up!'
                )
            for subgroup in index_groups.values():
                for properties_list in subgroup.values():
                    for property_id in properties_list:
                        property_config = self.property_configuration_manager.get_config_for_prop(
                            index_name, property_id)
                        is_virtual = property_config.get('is_virtual', False)
                        # Do not include virtual properties
                        if is_virtual:
                            continue

                        properties_identified.add(property_id)

        seconds_valid = RUN_CONFIG.get('es_mappings_cache_seconds')
        cache.fail_proof_set(key=cache_key,
                             value=properties_identified,
                             timeout=seconds_valid)

        return list(properties_identified)
Ejemplo n.º 10
0
def get_classification_tree():
    """
    :return: the go slim target classification tree
    """

    cache_key = 'target_classifications_go_slim_1'
    app_logging.debug(f'cache_key: {cache_key}')

    cache_response = cache.fail_proof_get(key=cache_key)

    if cache_response is not None:
        app_logging.debug('results are in cache')
        return cache_response

    tree_generator = GoSlimTreeGenerator()
    final_tree = tree_generator.get_classification_tree()

    cache_time = int(3.154e7)
    cache.fail_proof_set(key=cache_key, value=final_tree, timeout=cache_time)

    return final_tree
def load_context_index(context_id, id_properties_list, context):
    """
    Loads an index based on the id property of the context, for fast access
    :param context_id: id of the context loaded
    :param id_properties_list: property used to identify each item
    :param context: context loaded
    :return:
    """

    context_index_key = 'context_index-{}'.format(context_id)
    context_index = cache.fail_proof_get(context_index_key)
    if context_index is None:
        context_index = {}

        for index_number, item in enumerate(context):
            id_value = id_properties.get_id_value(id_properties_list, item)
            context_index[id_value] = item
            context_index[id_value]['index'] = index_number

        cache.fail_proof_set(context_index_key, context_index, 3600)

    return context_index
Ejemplo n.º 12
0
def get_classification_tree():
    """
    :return: the protein target classification tree
    """
    cache_key = 'target_classifications_protein_class_1'
    app_logging.debug(f'cache_key: {cache_key}')

    cache_response = cache.fail_proof_get(key=cache_key)

    if cache_response is not None:
        app_logging.debug('results are in cache')
        return cache_response

    index_name = 'chembl_protein_class'
    es_query = {
        "aggs": {
            "children": {
                "terms": {
                    "field": "l1",
                    "size": 1000,
                    "order": {
                        "_count": "desc"
                    }
                },
                "aggs": {
                    "children": {
                        "terms": {
                            "field": "l2",
                            "size": 1000,
                            "order": {
                                "_count": "desc"
                            }
                        },
                        "aggs": {
                            "children": {
                                "terms": {
                                    "field": "l3",
                                    "size": 1000,
                                    "order": {
                                        "_count": "desc"
                                    }
                                },
                                "aggs": {
                                    "children": {
                                        "terms": {
                                            "field": "l4",
                                            "size": 1000,
                                            "order": {
                                                "_count": "desc"
                                            }
                                        },
                                        "aggs": {
                                            "children": {
                                                "terms": {
                                                    "field": "l5",
                                                    "size": 1000,
                                                    "order": {
                                                        "_count": "desc"
                                                    }
                                                },
                                                "aggs": {
                                                    "children": {
                                                        "terms": {
                                                            "field": "l6",
                                                            "size": 1000,
                                                            "order": {
                                                                "_count": "desc"
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    def generate_count_query(path_to_node):

        queries = []
        level = 1
        for node in path_to_node:
            queries.append('_metadata.protein_classification.l{level}:("{class_name}")'.format(level=level,
                                                                                               class_name=node))
            level += 1

        return ' AND '.join(queries)

    tree_generator = TargetHierarchyTreeGenerator(index_name=index_name, es_query=es_query,
                                                  query_generator=generate_count_query,
                                                  count_index='chembl_target')

    final_tree = tree_generator.get_classification_tree()

    cache_time = int(3.154e7)
    cache.fail_proof_set(key=cache_key, value=final_tree, timeout=cache_time)

    return final_tree