class DataSetSearch(DataCatalogModel): """ Responsible for searching the ElasticSearch index for data sets (the metadata describing them). """ SEARCH_ERROR_MESSAGE = 'Searching in the index failed' INVALID_QUERY_ERROR_MESSAGE = SEARCH_ERROR_MESSAGE + ': invalid query.' NO_CONNECTION_ERROR_MESSAGE = SEARCH_ERROR_MESSAGE + ': failed to connect to ElasticSearch.' def __init__(self): super(DataSetSearch, self).__init__() self._translator = ElasticSearchQueryTranslator() def search(self, query, org_uuid_list, dataset_filtering, is_admin): query_string = self._translator.translate(query, org_uuid_list, dataset_filtering, is_admin) try: elastic_search_results = self._elastic_search.search( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, body=query_string) return self._extract_metadata(elastic_search_results) except RequestError: self._log.exception(self.INVALID_QUERY_ERROR_MESSAGE) raise InvalidQueryError(self.INVALID_QUERY_ERROR_MESSAGE) except ConnectionError: self._log.exception(self.NO_CONNECTION_ERROR_MESSAGE) raise IndexConnectionError(self.NO_CONNECTION_ERROR_MESSAGE) @staticmethod def _extract_metadata(es_query_result): hits = es_query_result['hits'] category_aggregations = es_query_result['aggregations']['categories'][ 'buckets'] format_aggregations = es_query_result['aggregations']['formats'][ 'buckets'] entries = [] for entry in hits['hits']: entries.append(entry['_source']) entries[-1]['id'] = entry['_id'] categories = [cat['key'] for cat in category_aggregations] formats = [obj['key'] for obj in format_aggregations] return { 'hits': entries, 'total': hits['total'], 'categories': categories, 'formats': formats } @staticmethod def get_params_from_request_args(args): dataset_filtering = DataSetFiltering.PRIVATE_AND_PUBLIC if args.get('onlyPublic', default="", type=str).lower() == 'true': dataset_filtering = DataSetFiltering.ONLY_PUBLIC if args.get('onlyPrivate', default="", type=str).lower() == 'true': dataset_filtering = DataSetFiltering.ONLY_PRIVATE return {'dataset_filtering': dataset_filtering}
class DataSetSearch(DataCatalogModel): """ Responsible for searching the ElasticSearch index for data sets (the metadata describing them). """ SEARCH_ERROR_MESSAGE = 'Searching in the index failed' INVALID_QUERY_ERROR_MESSAGE = SEARCH_ERROR_MESSAGE + ': invalid query.' NO_CONNECTION_ERROR_MESSAGE = SEARCH_ERROR_MESSAGE + ': failed to connect to ElasticSearch.' def __init__(self): super(DataSetSearch, self).__init__() self._translator = ElasticSearchQueryTranslator() def search(self, query, org_uuid_list, dataset_filtering, is_admin): query_string = self._translator.translate(query, org_uuid_list, dataset_filtering, is_admin) try: elastic_search_results = self._elastic_search.search( index=self._config.elastic.elastic_index, doc_type=self._config.elastic.elastic_metadata_type, body=query_string ) return self._extract_metadata(elastic_search_results) except RequestError: self._log.exception(self.INVALID_QUERY_ERROR_MESSAGE) raise InvalidQueryError(self.INVALID_QUERY_ERROR_MESSAGE) except ConnectionError: self._log.exception(self.NO_CONNECTION_ERROR_MESSAGE) raise IndexConnectionError(self.NO_CONNECTION_ERROR_MESSAGE) @staticmethod def _extract_metadata(es_query_result): hits = es_query_result['hits'] category_aggregations = es_query_result['aggregations']['categories']['buckets'] format_aggregations = es_query_result['aggregations']['formats']['buckets'] entries = [] for entry in hits['hits']: entries.append(entry['_source']) entries[-1]['id'] = entry['_id'] categories = [cat['key'] for cat in category_aggregations] formats = [obj['key'] for obj in format_aggregations] return {'hits': entries, 'total': hits['total'], 'categories': categories, 'formats': formats} @staticmethod def get_params_from_request_args(args): dataset_filtering = DataSetFiltering.PRIVATE_AND_PUBLIC if args.get('onlyPublic', default="", type=str).lower() == 'true': dataset_filtering = DataSetFiltering.ONLY_PUBLIC if args.get('onlyPrivate', default="", type=str).lower() == 'true': dataset_filtering = DataSetFiltering.ONLY_PRIVATE return {'dataset_filtering': dataset_filtering}
class ElasticSearchQueryTranslationTests(TestCase): def setUp(self): self.translator = ElasticSearchQueryTranslator() self.org_uuid = ['orgid007'] def test_queryTranslation_sizeInQuery_sizeAddedToOutput(self): SIZE = 123 size_query = json.dumps({'size': SIZE}) translated_query = self.translator.translate(size_query, self.org_uuid, None, False) self.assertEqual(SIZE, json.loads(translated_query)['size']) def test_queryTranslation_fromInQuery_fromAddedToOutput(self): FROM = 345 from_query = json.dumps({'from': FROM}) translated_query = self.translator.translate(from_query, self.org_uuid, True, False) self.assertEqual(FROM, json.loads(translated_query)['from']) def test_combiningQueryAndFilter_queryWithFilter_filteredQueryCreated( self): FAKE_BASE_QUERY = {'yup': 'totally fake'} FAKE_FILTER = {'uhuh': 'this filter is also fake'} FAKE_POST_FILTER = {'hello': 'fake filter'} expected_query = { 'query': { 'filtered': { 'filter': FAKE_FILTER, 'query': FAKE_BASE_QUERY } }, 'post_filter': FAKE_POST_FILTER, 'aggregations': { 'categories': { 'terms': { 'size': 100, 'field': 'category' } }, 'formats': { 'terms': { 'field': 'format' } } } } output_query = self.translator._combine_query_and_filters( FAKE_BASE_QUERY, FAKE_FILTER, FAKE_POST_FILTER) self.assertDictEqual(expected_query, output_query) def test_queryTranslation_queryIsNotJson_invalidQueryError(self): with self.assertRaises(InvalidQueryError): self.translator.translate('{"this is not a proper JSON"}', self.org_uuid, None, False) def test_decodingInputQuery_noneQuery_emptyDictReturned(self): self.assertDictEqual({}, self.translator._get_query_dict(None)) def test_queryTranslation_fullFeaturedQuery_queryTranslated(self): input_query = { 'query': 'blabla', 'filters': [{ 'format': ['csv'] }], 'size': 3, 'from': 14 } output_query_string = self.translator.translate( json.dumps(input_query), self.org_uuid, True, False) output_query = json.loads(output_query_string) self.assertIn('filtered', output_query['query']) self.assertIn('size', output_query) self.assertIn('from', output_query)