from elastic_app_search import Client import json client = Client( base_endpoint='34.87.101.217:3002/api/as/v1', api_key='private-15enfbz3zdf59jvchr94mu7k', use_https=False ) engine_name = "sensitive-data-engine" # load data from json file with open("outfile.json", "r") as fp: documents = json.load(fp) print (documents) # update the appsearch client.index_documents(engine_name, documents)
class TestClient(TestCase): def setUp(self): self.engine_name = 'some-engine-name' self.client = Client('host_identifier', 'api_key') self.document_index_url = "{}/{}".format( self.client.session.base_url, "engines/{}/documents".format(self.engine_name)) def test_deprecated_init_support_with_old_names(self): self.client = Client(account_host_key='host_identifier', api_key='api_key') self.assertEqual(self.client.account_host_key, 'host_identifier') def test_deprecated_init_support_with_new_names(self): self.client = Client(host_identifier='host_identifier', api_key='api_key') self.assertEqual(self.client.account_host_key, 'host_identifier') def test_deprecated_init_support_with_positional(self): self.client = Client('host_identifier', 'api_key', 'example.com', False) self.assertEqual(self.client.account_host_key, 'host_identifier') def test_host_identifier_is_optional(self): client = Client('', 'api_key', 'localhost:3002/api/as/v1', False) query = 'query' with requests_mock.Mocker() as m: url = "http://localhost:3002/api/as/v1/engines/some-engine-name/search" m.register_uri('GET', url, json={}, status_code=200) client.search(self.engine_name, query, {}) def test_index_document_processing_error(self): invalid_document = {'id': 'something', 'bad': {'no': 'nested'}} error = 'some processing error' stubbed_return = [{'id': 'something', 'errors': [error]}] with requests_mock.Mocker() as m: m.register_uri('POST', self.document_index_url, json=stubbed_return, status_code=200) with self.assertRaises(InvalidDocument) as context: self.client.index_document(self.engine_name, invalid_document) self.assertEqual(str(context.exception), error) def test_index_document_no_error_key_in_response(self): document_without_id = {'body': 'some value'} stubbed_return = [{'id': 'auto generated', 'errors': []}] with requests_mock.Mocker() as m: m.register_uri('POST', self.document_index_url, json=stubbed_return, status_code=200) response = self.client.index_document(self.engine_name, document_without_id) self.assertEqual(response, {'id': 'auto generated'}) def test_index_documents(self): id = 'INscMGmhmX4' valid_document = {'id': id} other_document = {'body': 'some value'} expected_return = [{ 'id': id, 'errors': [] }, { 'id': 'some autogenerated id', 'errors': [] }] with requests_mock.Mocker() as m: m.register_uri('POST', self.document_index_url, json=expected_return, status_code=200) response = self.client.index_documents( self.engine_name, [valid_document, other_document]) self.assertEqual(response, expected_return) def test_update_documents(self): id = 'INscMGmhmX4' valid_document = {'id': id} other_document = {'body': 'some value'} expected_return = [{ 'id': id, 'errors': [] }, { 'id': 'some autogenerated id', 'errors': [] }] with requests_mock.Mocker() as m: m.register_uri('PATCH', self.document_index_url, json=expected_return, status_code=200) response = self.client.update_documents( self.engine_name, [valid_document, other_document]) self.assertEqual(response, expected_return) def test_get_documents(self): id = 'INscMGmhmX4' expected_return = [{ 'id': id, 'url': 'http://www.youtube.com/watch?v=v1uyQZNg2vE', 'title': 'The Original Grumpy Cat', 'body': 'this is a test' }] with requests_mock.Mocker() as m: m.register_uri('GET', self.document_index_url, json=expected_return, status_code=200) response = self.client.get_documents(self.engine_name, [id]) self.assertEqual(response, expected_return) def test_list_documents(self): expected_return = { 'meta': { 'page': { 'current': 1, 'total_results': 1, 'total_pages': 1, 'size': 20 }, 'results': [{ 'body': 'this is a test', 'id': '1' }, { 'body': 'this is also a test', 'id': '2' }] } } def match_request_text(request): data = json.loads(request.text) return data["page"]["current"] == 1 and data["page"]["size"] == 20 with requests_mock.Mocker() as m: url = "{}/engines/{}/documents/list".format( self.client.session.base_url, self.engine_name) m.register_uri('GET', url, additional_matcher=match_request_text, json=expected_return, status_code=200) response = self.client.list_documents(self.engine_name) self.assertEqual(response, expected_return) def test_destroy_documents(self): id = 'INscMGmhmX4' expected_return = [{'id': id, 'result': True}] with requests_mock.Mocker() as m: m.register_uri('DELETE', self.document_index_url, json=expected_return, status_code=200) response = self.client.destroy_documents(self.engine_name, [id]) self.assertEqual(response, expected_return) def test_get_schema(self): expected_return = {'square_km': 'text'} with requests_mock.Mocker() as m: url = "{}/engines/{}/schema".format(self.client.session.base_url, self.engine_name) m.register_uri('GET', url, json=expected_return, status_code=200) response = self.client.get_schema(self.engine_name) self.assertEqual(response, expected_return) def test_update_schema(self): expected_return = {'square_mi': 'number', 'square_km': 'number'} with requests_mock.Mocker() as m: url = "{}/engines/{}/schema".format(self.client.session.base_url, self.engine_name) m.register_uri('POST', url, json=expected_return, status_code=200) response = self.client.update_schema(self.engine_name, expected_return) self.assertEqual(response, expected_return) def test_list_engines(self): expected_return = [{'name': 'myawesomeengine'}] def match_request_text(request): data = json.loads(request.text) return data["page"]["current"] == 1 and data["page"]["size"] == 20 with requests_mock.Mocker() as m: url = "{}/{}".format(self.client.session.base_url, 'engines') m.register_uri('GET', url, additional_matcher=match_request_text, json=expected_return, status_code=200) response = self.client.list_engines() self.assertEqual(response, expected_return) def test_list_engines_with_paging(self): expected_return = [{'name': 'myawesomeengine'}] def match_request_text(request): data = json.loads(request.text) return data["page"]["current"] == 10 and data["page"]["size"] == 2 with requests_mock.Mocker() as m: url = "{}/{}".format(self.client.session.base_url, 'engines') m.register_uri('GET', url, additional_matcher=match_request_text, json=expected_return, status_code=200) response = self.client.list_engines(current=10, size=2) self.assertEqual(response, expected_return) def test_get_engine(self): engine_name = 'myawesomeengine' expected_return = [{'name': engine_name}] with requests_mock.Mocker() as m: url = "{}/{}/{}".format(self.client.session.base_url, 'engines', engine_name) m.register_uri('GET', url, json=expected_return, status_code=200) response = self.client.get_engine(engine_name) self.assertEqual(response, expected_return) def test_create_engine(self): engine_name = 'myawesomeengine' expected_return = {'name': engine_name, 'language': 'en'} with requests_mock.Mocker() as m: url = "{}/{}".format(self.client.session.base_url, 'engines') m.register_uri('POST', url, json=expected_return, status_code=200) response = self.client.create_engine(engine_name=engine_name, language='en') self.assertEqual(response, expected_return) def test_create_engine_with_options(self): engine_name = 'myawesomeengine' expected_return = { 'name': engine_name, 'type': 'meta', 'source_engines': ['source-engine-1', 'source-engine-2'] } with requests_mock.Mocker() as m: url = "{}/{}".format(self.client.session.base_url, 'engines') m.register_uri('POST', url, json=expected_return, status_code=200) response = self.client.create_engine( engine_name=engine_name, options={ 'type': 'meta', 'source_engines': ['source-engine-1', 'source-engine-2'] }) self.assertEqual(response, expected_return) def test_destroy_engine(self): engine_name = 'myawesomeengine' expected_return = {'deleted': True} with requests_mock.Mocker() as m: url = "{}/{}/{}".format(self.client.session.base_url, 'engines', engine_name) m.register_uri('DELETE', url, json=expected_return, status_code=200) response = self.client.destroy_engine(engine_name) self.assertEqual(response, expected_return) def test_list_synonym_sets(self): expected_return = { 'meta': { 'page': { 'current': 1, 'total_pages': 1, 'total_results': 3, 'size': 20 } }, 'results': [{ 'id': 'syn-5b11ac66c9f9292013220ad3', 'synonyms': ['park', 'trail'] }, { 'id': 'syn-5b11ac72c9f9296b35220ac9', 'synonyms': ['protected', 'heritage'] }, { 'id': 'syn-5b11ac66c9f9292013220ad3', 'synonyms': ['hectares', 'acres'] }] } with requests_mock.Mocker() as m: url = "{}/engines/{}/synonyms".format(self.client.session.base_url, self.engine_name) def match_request_text(request): data = json.loads(request.text) return data["page"]["current"] == 1 and data["page"][ "size"] == 20 m.register_uri('GET', url, additional_matcher=match_request_text, json=expected_return, status_code=200) response = self.client.list_synonym_sets(self.engine_name) def test_get_synonym_set(self): synonym_id = 'syn-5b11ac66c9f9292013220ad3' expected_return = {'id': synonym_id, 'synonyms': ['park', 'trail']} with requests_mock.Mocker() as m: url = "{}/engines/{}/synonyms/{}".format( self.client.session.base_url, self.engine_name, synonym_id) m.register_uri('GET', url, json=expected_return, status_code=200) response = self.client.get_synonym_set(self.engine_name, synonym_id) self.assertEqual(response, expected_return) def test_create_synonym_set(self): synonym_set = ['park', 'trail'] expected_return = { 'id': 'syn-5b11ac72c9f9296b35220ac9', 'synonyms': ['park', 'trail'] } with requests_mock.Mocker() as m: url = "{}/engines/{}/synonyms".format(self.client.session.base_url, self.engine_name) m.register_uri('POST', url, json=expected_return, status_code=200) response = self.client.create_synonym_set(self.engine_name, synonym_set) self.assertEqual(response, expected_return) def test_update_synonym_set(self): synonym_id = 'syn-5b11ac72c9f9296b35220ac9' synonym_set = ['park', 'trail', 'ground'] expected_return = { 'id': synonym_id, 'synonyms': ['park', 'trail', 'ground'] } with requests_mock.Mocker() as m: url = "{}/engines/{}/synonyms/{}".format( self.client.session.base_url, self.engine_name, synonym_id) m.register_uri('PUT', url, json=expected_return, status_code=200) response = self.client.update_synonym_set(self.engine_name, synonym_id, synonym_set) self.assertEqual(response, expected_return) def test_destroy_synonym_set(self): synonym_id = 'syn-5b11ac66c9f9292013220ad3' expected_return = {'deleted': True} with requests_mock.Mocker() as m: url = "{}/engines/{}/synonyms/{}".format( self.client.session.base_url, self.engine_name, synonym_id) m.register_uri('DELETE', url, json=expected_return, status_code=200) response = self.client.destroy_synonym_set(self.engine_name, synonym_id) self.assertEqual(response, expected_return) def test_search(self): query = 'query' expected_return = {'meta': {}, 'results': []} with requests_mock.Mocker() as m: url = "{}/{}".format(self.client.session.base_url, "engines/{}/search".format(self.engine_name)) m.register_uri('GET', url, json=expected_return, status_code=200) response = self.client.search(self.engine_name, query, {}) self.assertEqual(response, expected_return) def test_multi_search(self): expected_return = [{ 'meta': {}, 'results': [] }, { 'meta': {}, 'results': [] }] with requests_mock.Mocker() as m: url = "{}/{}".format( self.client.session.base_url, "engines/{}/multi_search".format(self.engine_name)) m.register_uri('GET', url, json=expected_return, status_code=200) response = self.client.multi_search(self.engine_name, {}) self.assertEqual(response, expected_return) def test_query_suggestion(self): query = 'query' expected_return = {'meta': {}, 'results': {}} with requests_mock.Mocker() as m: url = "{}/{}".format( self.client.session.base_url, "engines/{}/query_suggestion".format(self.engine_name)) m.register_uri('GET', url, json=expected_return, status_code=200) response = self.client.query_suggestion(self.engine_name, query, {}) self.assertEqual(response, expected_return) def test_click(self): with requests_mock.Mocker() as m: url = "{}/{}".format(self.client.session.base_url, "engines/{}/click".format(self.engine_name)) m.register_uri('POST', url, json={}, status_code=200) self.client.click(self.engine_name, { 'query': 'cat', 'document_id': 'INscMGmhmX4' }) def test_create_meta_engine(self): source_engines = ['source-engine-1', 'source-engine-2'] expected_return = { 'source_engines': source_engines, 'type': 'meta', 'name': self.engine_name } with requests_mock.Mocker() as m: url = "{}/{}".format(self.client.session.base_url, 'engines') m.register_uri('POST', url, json=expected_return, status_code=200) response = self.client.create_meta_engine(self.engine_name, source_engines) self.assertEqual(response, expected_return) def test_add_meta_engine_sources(self): target_source_engine_name = 'source-engine-3' expected_return = { 'source_engines': ['source-engine-1', 'source-engine-2', target_source_engine_name], 'type': 'meta', 'name': self.engine_name } with requests_mock.Mocker() as m: url = "{}/{}".format( self.client.session.base_url, "engines/{}/source_engines".format(self.engine_name)) m.register_uri('POST', url, json=expected_return, status_code=200) response = self.client.add_meta_engine_sources( self.engine_name, [target_source_engine_name]) self.assertEqual(response, expected_return) def test_delete_meta_engine_sources(self): source_engine_name = 'source-engine-3' expected_return = { 'source_engines': ['source-engine-1', 'source-engine-2'], 'type': 'meta', 'name': self.engine_name } with requests_mock.Mocker() as m: url = "{}/{}".format( self.client.session.base_url, "engines/{}/source_engines".format(self.engine_name)) m.register_uri('DELETE', url, json=expected_return, status_code=200) response = self.client.delete_meta_engine_sources( self.engine_name, [source_engine_name]) self.assertEqual(response, expected_return)
it = iter(iterable) while True: chunk = tuple(itertools.islice(it, n)) if not chunk: return yield chunk client = pymongo.MongoClient(os.getenv("COVID_HOST"), username=os.getenv("COVID_USER"), password=os.getenv("COVID_PASS"), authSource=os.getenv("COVID_DB")) db = client[os.getenv("COVID_DB")] doc_post_url = os.getenv( "APPSEARCH_API_ENDPOINT") + "/api/as/v1/engines/entries/documents" elastic_app_client = Client(base_endpoint='{}/api/as/v1'.format( os.getenv("APPSEARCH_API_ENDPOINT")), api_key=os.getenv("APPSEARCH_API_KEY"), use_https=False) for docs in grouper( 100, db.entries_searchable.find({"category_ML": { "$exists": False }})): for doc in docs: doc['id'] = str(doc['_id']) del (doc['_id']) pprint(elastic_app_client.index_documents("entries", docs))
api_key = 'private-key' engine_name = 'flask-app-search' file_name = 'movies.json' client = Client(api_key=api_key, base_endpoint=host_identifier, use_https=False) f = open(file_name, "r") document = f.read() records = loads(document) batched_records = list(batching_function(records, MAX_BATCH_SIZE)) number_of_batches = len(batched_records) print("Indexing " + str(len(records)) + " records using " + str(number_of_batches) + " batches, each carrying up to " + str(MAX_BATCH_SIZE) + " documents") for i in range(number_of_batches): indexing_response = client.index_documents(engine_name, batched_records[i]) print("...batch " + str(i + 1) + " with " + str(len(list(filter(None, batched_records[i])))) + " documents completed"), number_of_responses = len(indexing_response) errors_encountered = 0 for j in range(number_of_responses): if len(indexing_response[j].get("errors")) != 0: errors_encountered += 1 if errors_encountered == 0: print("with no errors") else: print("with " + str(errors_encountered) + " errors")
print('Total new room messages for this indexing run ' + str(len(new_messages))) if (config['index']): print('Indexing documents ...') tot = len(new_es_messages) r = tot % 100 if (tot > 100): ''' Can only index 100 documents at a time ''' step = 100 r = tot % 100 imax = tot // 100 i = 1 while i <= imax: client.index_documents( engine_name, new_es_messages[(i - 1) * step:(i * step) - 1]) print('Indexed %s documents' % str(step)) i = i + 1 client.index_documents( engine_name, new_es_messages[imax * step:imax * step + r - 1]) print('Indexed %s documents' % str(r)) else: client.index_documents(engine_name, new_es_messages) print('Indexed %s documents' % str(tot)) print('Saving messages to disk ...') with open(dest, 'w') as f: json.dump(room_messages, f, sort_keys=True, indent=1) with open(es_dest, 'w') as ff: json.dump(es_messages, ff, sort_keys=False, indent=1)