def test_saves_in_cache(self): """ Tests that it saves a response in the cache after receiving it. """ with self.flask_app.app_context(): es_query = { 'size': 24, 'from': 0, 'query': { 'bool': { 'filter': [ { 'terms': { 'molecule_chembl_id': [ 'CHEMBL59', 'CHEMBL3247442', 'CHEMBL160074', 'CHEMBL2219748' ] } } ] } } } es_index = 'chembl_molecule' es_data.get_es_response(es_index, es_query) cache_key = es_data.get_es_query_cache_key(es_index, es_query) cache_response_got = CACHE.get(key=cache_key) self.assertIsNotNone(cache_response_got, msg='the response was not saved in cache!')
def get_items_with_context(index_name, raw_es_query, raw_context, raw_contextual_sort_data='{}'): """ :param index_name: name of the index to query :param raw_es_query: es_query stringifyied :param raw_context: context dict stringifyied :param raw_contextual_sort_data: :return: the items in the es_query with the context given in the context description """ context_dict = json.loads(raw_context) context, total_results = context_loader.get_context(context_dict) id_properties = es_mappings.get_id_properties_for_index(index_name) # create a context index so access is faster context_id = context_dict['context_id'] context_index = context_loader.load_context_index(context_id, id_properties, context) if raw_contextual_sort_data is not None: contextual_sort_data = json.loads(raw_contextual_sort_data) else: contextual_sort_data = {} search_data_with_injections = get_search_data_with_injections(raw_es_query, contextual_sort_data, id_properties, total_results, context_index) raw_search_data_with_injections = json.dumps(search_data_with_injections) es_response = es_data.get_es_response(index_name, json.loads(raw_search_data_with_injections)) add_context_values_to_response(es_response, context_index) metadata = { 'total_results': len(context_index), 'max_results_injected': RUN_CONFIG.get('filter_query_max_clauses') } return es_response, metadata
def get_es_data(index_name, raw_es_query, raw_context, raw_contextual_sort_data): """ :param index_name: name of the index to query :param raw_es_query: stringifyied version of the query to send to elasticsearch :param raw_context: stringifyied version of a JSON object describing the context of the query :param id_property: property that identifies every item. Required when context provided :param raw_contextual_sort_data: description of sorting if sorting by contextual properties :return: Returns the json response from elasticsearch and some metadata if necessary """ if raw_context is None: app_logging.debug('No context detected') es_query = json.loads(raw_es_query) es_response = es_data.get_es_response(index_name, es_query) response = { 'es_response': es_response, } return response app_logging.debug(f'Using context: {raw_context}') es_response, metadata = get_items_with_context(index_name, raw_es_query, raw_context, raw_contextual_sort_data) response = { 'es_response': es_response, 'metadata': metadata } return response
def test_gets_es_data(self): """ Tests that it returns the es data correctly """ es_query = { 'size': 24, 'from': 0, 'query': { 'bool': { 'filter': [ { 'terms': { 'molecule_chembl_id': [ 'CHEMBL59', 'CHEMBL3247442', 'CHEMBL160074', 'CHEMBL2219748' ] } } ] } } } es_index = 'chembl_molecule' response_got = es_data.get_es_response(es_index, es_query) total_hits_must_be = {'value': 4, 'relation': 'eq'} total_hits_got = response_got['hits']['total'] self.assertEqual(total_hits_got, total_hits_must_be, msg='The response from elasticsearch was not correct')
def get_url_shortening(url_hash): """ :param url_hash: hash of the url to look for :return: url shortening dict from elasticsearch """ index_name = RUN_CONFIG.get('url_shortening').get('index_name') es_query = { "query": { "query_string": { "query": f'"{url_hash}"', "default_field": "hash" } } } shortening_response = es_data.get_es_response(index_name, es_query, ignore_cache=True) total_hits = shortening_response['hits']['total']['value'] app_logging.debug(f'total_hits {total_hits}') if shortening_response['hits']['total']['value'] == 0: return None raw_document = shortening_response['hits']['hits'][0] return raw_document
def get_classification_tree(self): """ :return: """ es_response = es_data.get_es_response(self.index_name, self.es_query) self.raw_tree_root = es_response['aggregations']['children']['buckets'] self.build_final_tree() return self.parsed_tree_root
def run(self): dice = random.randint(0, 9) dice_must_be = [0, 1, 2] do_this = dice in dice_must_be if not do_this: app_logging.info( f'Not trying to delete expired urls. Dice was {dice}. Must be {str(dice_must_be)}' ) return now = datetime.utcnow().replace(tzinfo=timezone.utc) index_name = RUN_CONFIG.get('url_shortening').get('index_name') query = { "query": { "range": { "expires": { "lte": str(int(now.timestamp() * 1000)) } } } } expired_urls_response = es_data.get_es_response(index_name, query, ignore_cache=True) num_items = expired_urls_response['hits']['total']['value'] max_items = RUN_CONFIG.get('url_shortening').get( 'expired_urls_lazy_deletion_threshold') must_do_deletion = num_items > max_items if must_do_deletion: # pylint: disable=unexpected-keyword-arg ES.delete_by_query(index=index_name, body=query, conflicts='proceed') app_logging.info(f'Deleted {num_items} expired shortened urls.') statistics_saver.record_expired_urls_were_deleted() else: app_logging.info( f'Not deleting expired urls because there are just {num_items}, ' f'will do deletion when more than {max_items}')