Beispiel #1
0
    def test_saves_in_cache(self):
        """
        Tests that it saves a response in the cache after receiving it.
        """
        with self.flask_app.app_context():
            es_query = {
                'size': 24,
                'from': 0,
                'query': {
                    'bool': {
                        'filter': [
                            {
                                'terms': {
                                    'molecule_chembl_id': [
                                        'CHEMBL59',
                                        'CHEMBL3247442',
                                        'CHEMBL160074',
                                        'CHEMBL2219748'
                                    ]
                                }
                            }
                        ]
                    }
                }
            }

            es_index = 'chembl_molecule'

            es_data.get_es_response(es_index, es_query)

            cache_key = es_data.get_es_query_cache_key(es_index, es_query)
            cache_response_got = CACHE.get(key=cache_key)

            self.assertIsNotNone(cache_response_got, msg='the response was not saved in cache!')
def get_items_with_context(index_name, raw_es_query, raw_context, raw_contextual_sort_data='{}'):
    """
    :param index_name: name of the index to query
    :param raw_es_query: es_query stringifyied
    :param raw_context: context dict stringifyied
    :param raw_contextual_sort_data:
    :return: the items in the es_query with the context given in the context description
    """

    context_dict = json.loads(raw_context)
    context, total_results = context_loader.get_context(context_dict)

    id_properties = es_mappings.get_id_properties_for_index(index_name)
    # create a context index so access is faster
    context_id = context_dict['context_id']
    context_index = context_loader.load_context_index(context_id, id_properties, context)

    if raw_contextual_sort_data is not None:
        contextual_sort_data = json.loads(raw_contextual_sort_data)
    else:
        contextual_sort_data = {}

    search_data_with_injections = get_search_data_with_injections(raw_es_query, contextual_sort_data, id_properties,
                                                                  total_results, context_index)
    raw_search_data_with_injections = json.dumps(search_data_with_injections)
    es_response = es_data.get_es_response(index_name, json.loads(raw_search_data_with_injections))
    add_context_values_to_response(es_response, context_index)

    metadata = {
        'total_results': len(context_index),
        'max_results_injected': RUN_CONFIG.get('filter_query_max_clauses')
    }
    return es_response, metadata
def get_es_data(index_name, raw_es_query, raw_context, raw_contextual_sort_data):
    """
    :param index_name: name of the index to query
    :param raw_es_query: stringifyied version of the query to send to elasticsearch
    :param raw_context: stringifyied version of a JSON object describing the context of the query
    :param id_property: property that identifies every item. Required when context provided
    :param raw_contextual_sort_data: description of sorting if sorting by contextual properties
    :return: Returns the json response from elasticsearch and some metadata if necessary
    """
    if raw_context is None:
        app_logging.debug('No context detected')
        es_query = json.loads(raw_es_query)
        es_response = es_data.get_es_response(index_name, es_query)
        response = {
            'es_response': es_response,
        }
        return response

    app_logging.debug(f'Using context: {raw_context}')
    es_response, metadata = get_items_with_context(index_name, raw_es_query, raw_context, raw_contextual_sort_data)

    response = {
        'es_response': es_response,
        'metadata': metadata
    }
    return response
Beispiel #4
0
    def test_gets_es_data(self):
        """
        Tests that it returns the es data correctly
        """
        es_query = {
            'size': 24,
            'from': 0,
            'query': {
                'bool': {
                    'filter': [
                        {
                            'terms': {
                                'molecule_chembl_id': [
                                    'CHEMBL59',
                                    'CHEMBL3247442',
                                    'CHEMBL160074',
                                    'CHEMBL2219748'
                                ]
                            }
                        }
                    ]
                }
            }
        }

        es_index = 'chembl_molecule'
        response_got = es_data.get_es_response(es_index, es_query)

        total_hits_must_be = {'value': 4, 'relation': 'eq'}
        total_hits_got = response_got['hits']['total']

        self.assertEqual(total_hits_got, total_hits_must_be, msg='The response from elasticsearch was not correct')
def get_url_shortening(url_hash):
    """
    :param url_hash: hash of the url to look for
    :return: url shortening dict from elasticsearch
    """

    index_name = RUN_CONFIG.get('url_shortening').get('index_name')
    es_query = {
        "query": {
            "query_string": {
                "query": f'"{url_hash}"',
                "default_field": "hash"
            }
        }
    }

    shortening_response = es_data.get_es_response(index_name,
                                                  es_query,
                                                  ignore_cache=True)
    total_hits = shortening_response['hits']['total']['value']
    app_logging.debug(f'total_hits {total_hits}')

    if shortening_response['hits']['total']['value'] == 0:
        return None

    raw_document = shortening_response['hits']['hits'][0]
    return raw_document
    def get_classification_tree(self):
        """
        :return:
        """

        es_response = es_data.get_es_response(self.index_name, self.es_query)

        self.raw_tree_root = es_response['aggregations']['children']['buckets']
        self.build_final_tree()

        return self.parsed_tree_root
    def run(self):

        dice = random.randint(0, 9)
        dice_must_be = [0, 1, 2]
        do_this = dice in dice_must_be
        if not do_this:
            app_logging.info(
                f'Not trying to delete expired urls. Dice was {dice}. Must be {str(dice_must_be)}'
            )
            return

        now = datetime.utcnow().replace(tzinfo=timezone.utc)

        index_name = RUN_CONFIG.get('url_shortening').get('index_name')
        query = {
            "query": {
                "range": {
                    "expires": {
                        "lte": str(int(now.timestamp() * 1000))
                    }
                }
            }
        }

        expired_urls_response = es_data.get_es_response(index_name,
                                                        query,
                                                        ignore_cache=True)

        num_items = expired_urls_response['hits']['total']['value']
        max_items = RUN_CONFIG.get('url_shortening').get(
            'expired_urls_lazy_deletion_threshold')
        must_do_deletion = num_items > max_items

        if must_do_deletion:

            # pylint: disable=unexpected-keyword-arg
            ES.delete_by_query(index=index_name,
                               body=query,
                               conflicts='proceed')
            app_logging.info(f'Deleted {num_items} expired shortened urls.')
            statistics_saver.record_expired_urls_were_deleted()

        else:
            app_logging.info(
                f'Not deleting expired urls because there are just {num_items}, '
                f'will do deletion when more than {max_items}')