Example #1
0
from elastic_app_search import Client
import json
client = Client(
    base_endpoint='34.87.101.217:3002/api/as/v1',
    api_key='private-15enfbz3zdf59jvchr94mu7k',
    use_https=False
)

engine_name = "sensitive-data-engine"

# load data from json file
with open("outfile.json", "r") as fp:
    documents = json.load(fp)


print (documents)

# update the appsearch
client.index_documents(engine_name, documents)

Example #2
0
class TestClient(TestCase):
    def setUp(self):
        self.engine_name = 'some-engine-name'
        self.client = Client('host_identifier', 'api_key')

        self.document_index_url = "{}/{}".format(
            self.client.session.base_url,
            "engines/{}/documents".format(self.engine_name))

    def test_deprecated_init_support_with_old_names(self):
        self.client = Client(account_host_key='host_identifier',
                             api_key='api_key')
        self.assertEqual(self.client.account_host_key, 'host_identifier')

    def test_deprecated_init_support_with_new_names(self):
        self.client = Client(host_identifier='host_identifier',
                             api_key='api_key')
        self.assertEqual(self.client.account_host_key, 'host_identifier')

    def test_deprecated_init_support_with_positional(self):
        self.client = Client('host_identifier', 'api_key', 'example.com',
                             False)
        self.assertEqual(self.client.account_host_key, 'host_identifier')

    def test_host_identifier_is_optional(self):
        client = Client('', 'api_key', 'localhost:3002/api/as/v1', False)
        query = 'query'

        with requests_mock.Mocker() as m:
            url = "http://localhost:3002/api/as/v1/engines/some-engine-name/search"
            m.register_uri('GET', url, json={}, status_code=200)
            client.search(self.engine_name, query, {})

    def test_index_document_processing_error(self):
        invalid_document = {'id': 'something', 'bad': {'no': 'nested'}}
        error = 'some processing error'
        stubbed_return = [{'id': 'something', 'errors': [error]}]
        with requests_mock.Mocker() as m:
            m.register_uri('POST',
                           self.document_index_url,
                           json=stubbed_return,
                           status_code=200)

            with self.assertRaises(InvalidDocument) as context:
                self.client.index_document(self.engine_name, invalid_document)
                self.assertEqual(str(context.exception), error)

    def test_index_document_no_error_key_in_response(self):
        document_without_id = {'body': 'some value'}
        stubbed_return = [{'id': 'auto generated', 'errors': []}]

        with requests_mock.Mocker() as m:
            m.register_uri('POST',
                           self.document_index_url,
                           json=stubbed_return,
                           status_code=200)
            response = self.client.index_document(self.engine_name,
                                                  document_without_id)
            self.assertEqual(response, {'id': 'auto generated'})

    def test_index_documents(self):
        id = 'INscMGmhmX4'
        valid_document = {'id': id}
        other_document = {'body': 'some value'}

        expected_return = [{
            'id': id,
            'errors': []
        }, {
            'id': 'some autogenerated id',
            'errors': []
        }]

        with requests_mock.Mocker() as m:
            m.register_uri('POST',
                           self.document_index_url,
                           json=expected_return,
                           status_code=200)
            response = self.client.index_documents(
                self.engine_name, [valid_document, other_document])
            self.assertEqual(response, expected_return)

    def test_update_documents(self):
        id = 'INscMGmhmX4'
        valid_document = {'id': id}
        other_document = {'body': 'some value'}

        expected_return = [{
            'id': id,
            'errors': []
        }, {
            'id': 'some autogenerated id',
            'errors': []
        }]

        with requests_mock.Mocker() as m:
            m.register_uri('PATCH',
                           self.document_index_url,
                           json=expected_return,
                           status_code=200)
            response = self.client.update_documents(
                self.engine_name, [valid_document, other_document])
            self.assertEqual(response, expected_return)

    def test_get_documents(self):
        id = 'INscMGmhmX4'
        expected_return = [{
            'id': id,
            'url': 'http://www.youtube.com/watch?v=v1uyQZNg2vE',
            'title': 'The Original Grumpy Cat',
            'body': 'this is a test'
        }]

        with requests_mock.Mocker() as m:
            m.register_uri('GET',
                           self.document_index_url,
                           json=expected_return,
                           status_code=200)
            response = self.client.get_documents(self.engine_name, [id])
            self.assertEqual(response, expected_return)

    def test_list_documents(self):
        expected_return = {
            'meta': {
                'page': {
                    'current': 1,
                    'total_results': 1,
                    'total_pages': 1,
                    'size': 20
                },
                'results': [{
                    'body': 'this is a test',
                    'id': '1'
                }, {
                    'body': 'this is also a test',
                    'id': '2'
                }]
            }
        }

        def match_request_text(request):
            data = json.loads(request.text)
            return data["page"]["current"] == 1 and data["page"]["size"] == 20

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/documents/list".format(
                self.client.session.base_url, self.engine_name)
            m.register_uri('GET',
                           url,
                           additional_matcher=match_request_text,
                           json=expected_return,
                           status_code=200)

            response = self.client.list_documents(self.engine_name)
            self.assertEqual(response, expected_return)

    def test_destroy_documents(self):
        id = 'INscMGmhmX4'
        expected_return = [{'id': id, 'result': True}]

        with requests_mock.Mocker() as m:
            m.register_uri('DELETE',
                           self.document_index_url,
                           json=expected_return,
                           status_code=200)
            response = self.client.destroy_documents(self.engine_name, [id])
            self.assertEqual(response, expected_return)

    def test_get_schema(self):
        expected_return = {'square_km': 'text'}

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/schema".format(self.client.session.base_url,
                                                self.engine_name)
            m.register_uri('GET', url, json=expected_return, status_code=200)

            response = self.client.get_schema(self.engine_name)
            self.assertEqual(response, expected_return)

    def test_update_schema(self):
        expected_return = {'square_mi': 'number', 'square_km': 'number'}

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/schema".format(self.client.session.base_url,
                                                self.engine_name)
            m.register_uri('POST', url, json=expected_return, status_code=200)

            response = self.client.update_schema(self.engine_name,
                                                 expected_return)
            self.assertEqual(response, expected_return)

    def test_list_engines(self):
        expected_return = [{'name': 'myawesomeengine'}]

        def match_request_text(request):
            data = json.loads(request.text)
            return data["page"]["current"] == 1 and data["page"]["size"] == 20

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('GET',
                           url,
                           additional_matcher=match_request_text,
                           json=expected_return,
                           status_code=200)
            response = self.client.list_engines()
            self.assertEqual(response, expected_return)

    def test_list_engines_with_paging(self):
        expected_return = [{'name': 'myawesomeengine'}]

        def match_request_text(request):
            data = json.loads(request.text)
            return data["page"]["current"] == 10 and data["page"]["size"] == 2

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('GET',
                           url,
                           additional_matcher=match_request_text,
                           json=expected_return,
                           status_code=200)
            response = self.client.list_engines(current=10, size=2)
            self.assertEqual(response, expected_return)

    def test_get_engine(self):
        engine_name = 'myawesomeengine'
        expected_return = [{'name': engine_name}]

        with requests_mock.Mocker() as m:
            url = "{}/{}/{}".format(self.client.session.base_url, 'engines',
                                    engine_name)
            m.register_uri('GET', url, json=expected_return, status_code=200)
            response = self.client.get_engine(engine_name)
            self.assertEqual(response, expected_return)

    def test_create_engine(self):
        engine_name = 'myawesomeengine'
        expected_return = {'name': engine_name, 'language': 'en'}

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('POST', url, json=expected_return, status_code=200)
            response = self.client.create_engine(engine_name=engine_name,
                                                 language='en')
            self.assertEqual(response, expected_return)

    def test_create_engine_with_options(self):
        engine_name = 'myawesomeengine'
        expected_return = {
            'name': engine_name,
            'type': 'meta',
            'source_engines': ['source-engine-1', 'source-engine-2']
        }

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('POST', url, json=expected_return, status_code=200)
            response = self.client.create_engine(
                engine_name=engine_name,
                options={
                    'type': 'meta',
                    'source_engines': ['source-engine-1', 'source-engine-2']
                })
            self.assertEqual(response, expected_return)

    def test_destroy_engine(self):
        engine_name = 'myawesomeengine'
        expected_return = {'deleted': True}

        with requests_mock.Mocker() as m:
            url = "{}/{}/{}".format(self.client.session.base_url, 'engines',
                                    engine_name)
            m.register_uri('DELETE',
                           url,
                           json=expected_return,
                           status_code=200)
            response = self.client.destroy_engine(engine_name)
            self.assertEqual(response, expected_return)

    def test_list_synonym_sets(self):
        expected_return = {
            'meta': {
                'page': {
                    'current': 1,
                    'total_pages': 1,
                    'total_results': 3,
                    'size': 20
                }
            },
            'results': [{
                'id': 'syn-5b11ac66c9f9292013220ad3',
                'synonyms': ['park', 'trail']
            }, {
                'id': 'syn-5b11ac72c9f9296b35220ac9',
                'synonyms': ['protected', 'heritage']
            }, {
                'id': 'syn-5b11ac66c9f9292013220ad3',
                'synonyms': ['hectares', 'acres']
            }]
        }

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms".format(self.client.session.base_url,
                                                  self.engine_name)

            def match_request_text(request):
                data = json.loads(request.text)
                return data["page"]["current"] == 1 and data["page"][
                    "size"] == 20

            m.register_uri('GET',
                           url,
                           additional_matcher=match_request_text,
                           json=expected_return,
                           status_code=200)

            response = self.client.list_synonym_sets(self.engine_name)

    def test_get_synonym_set(self):
        synonym_id = 'syn-5b11ac66c9f9292013220ad3'
        expected_return = {'id': synonym_id, 'synonyms': ['park', 'trail']}

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms/{}".format(
                self.client.session.base_url, self.engine_name, synonym_id)
            m.register_uri('GET', url, json=expected_return, status_code=200)

            response = self.client.get_synonym_set(self.engine_name,
                                                   synonym_id)
            self.assertEqual(response, expected_return)

    def test_create_synonym_set(self):
        synonym_set = ['park', 'trail']
        expected_return = {
            'id': 'syn-5b11ac72c9f9296b35220ac9',
            'synonyms': ['park', 'trail']
        }

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms".format(self.client.session.base_url,
                                                  self.engine_name)
            m.register_uri('POST', url, json=expected_return, status_code=200)

            response = self.client.create_synonym_set(self.engine_name,
                                                      synonym_set)
            self.assertEqual(response, expected_return)

    def test_update_synonym_set(self):
        synonym_id = 'syn-5b11ac72c9f9296b35220ac9'
        synonym_set = ['park', 'trail', 'ground']
        expected_return = {
            'id': synonym_id,
            'synonyms': ['park', 'trail', 'ground']
        }

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms/{}".format(
                self.client.session.base_url, self.engine_name, synonym_id)
            m.register_uri('PUT', url, json=expected_return, status_code=200)

            response = self.client.update_synonym_set(self.engine_name,
                                                      synonym_id, synonym_set)
            self.assertEqual(response, expected_return)

    def test_destroy_synonym_set(self):
        synonym_id = 'syn-5b11ac66c9f9292013220ad3'
        expected_return = {'deleted': True}

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms/{}".format(
                self.client.session.base_url, self.engine_name, synonym_id)
            m.register_uri('DELETE',
                           url,
                           json=expected_return,
                           status_code=200)

            response = self.client.destroy_synonym_set(self.engine_name,
                                                       synonym_id)
            self.assertEqual(response, expected_return)

    def test_search(self):
        query = 'query'
        expected_return = {'meta': {}, 'results': []}

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url,
                                 "engines/{}/search".format(self.engine_name))
            m.register_uri('GET', url, json=expected_return, status_code=200)
            response = self.client.search(self.engine_name, query, {})
            self.assertEqual(response, expected_return)

    def test_multi_search(self):
        expected_return = [{
            'meta': {},
            'results': []
        }, {
            'meta': {},
            'results': []
        }]

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(
                self.client.session.base_url,
                "engines/{}/multi_search".format(self.engine_name))
            m.register_uri('GET', url, json=expected_return, status_code=200)
            response = self.client.multi_search(self.engine_name, {})
            self.assertEqual(response, expected_return)

    def test_query_suggestion(self):
        query = 'query'
        expected_return = {'meta': {}, 'results': {}}

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(
                self.client.session.base_url,
                "engines/{}/query_suggestion".format(self.engine_name))
            m.register_uri('GET', url, json=expected_return, status_code=200)
            response = self.client.query_suggestion(self.engine_name, query,
                                                    {})
            self.assertEqual(response, expected_return)

    def test_click(self):
        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url,
                                 "engines/{}/click".format(self.engine_name))
            m.register_uri('POST', url, json={}, status_code=200)
            self.client.click(self.engine_name, {
                'query': 'cat',
                'document_id': 'INscMGmhmX4'
            })

    def test_create_meta_engine(self):
        source_engines = ['source-engine-1', 'source-engine-2']
        expected_return = {
            'source_engines': source_engines,
            'type': 'meta',
            'name': self.engine_name
        }

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('POST', url, json=expected_return, status_code=200)
            response = self.client.create_meta_engine(self.engine_name,
                                                      source_engines)
            self.assertEqual(response, expected_return)

    def test_add_meta_engine_sources(self):
        target_source_engine_name = 'source-engine-3'
        expected_return = {
            'source_engines':
            ['source-engine-1', 'source-engine-2', target_source_engine_name],
            'type':
            'meta',
            'name':
            self.engine_name
        }

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(
                self.client.session.base_url,
                "engines/{}/source_engines".format(self.engine_name))
            m.register_uri('POST', url, json=expected_return, status_code=200)
            response = self.client.add_meta_engine_sources(
                self.engine_name, [target_source_engine_name])
            self.assertEqual(response, expected_return)

    def test_delete_meta_engine_sources(self):
        source_engine_name = 'source-engine-3'
        expected_return = {
            'source_engines': ['source-engine-1', 'source-engine-2'],
            'type': 'meta',
            'name': self.engine_name
        }

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(
                self.client.session.base_url,
                "engines/{}/source_engines".format(self.engine_name))
            m.register_uri('DELETE',
                           url,
                           json=expected_return,
                           status_code=200)
            response = self.client.delete_meta_engine_sources(
                self.engine_name, [source_engine_name])
            self.assertEqual(response, expected_return)
    it = iter(iterable)
    while True:
        chunk = tuple(itertools.islice(it, n))
        if not chunk:
            return
        yield chunk


client = pymongo.MongoClient(os.getenv("COVID_HOST"),
                             username=os.getenv("COVID_USER"),
                             password=os.getenv("COVID_PASS"),
                             authSource=os.getenv("COVID_DB"))
db = client[os.getenv("COVID_DB")]

doc_post_url = os.getenv(
    "APPSEARCH_API_ENDPOINT") + "/api/as/v1/engines/entries/documents"

elastic_app_client = Client(base_endpoint='{}/api/as/v1'.format(
    os.getenv("APPSEARCH_API_ENDPOINT")),
                            api_key=os.getenv("APPSEARCH_API_KEY"),
                            use_https=False)

for docs in grouper(
        100, db.entries_searchable.find({"category_ML": {
            "$exists": False
        }})):
    for doc in docs:
        doc['id'] = str(doc['_id'])
        del (doc['_id'])
    pprint(elastic_app_client.index_documents("entries", docs))
Example #4
0
api_key = 'private-key'
engine_name = 'flask-app-search'
file_name = 'movies.json'

client = Client(api_key=api_key,
                base_endpoint=host_identifier,
                use_https=False)

f = open(file_name, "r")
document = f.read()
records = loads(document)
batched_records = list(batching_function(records, MAX_BATCH_SIZE))
number_of_batches = len(batched_records)
print("Indexing " + str(len(records)) + " records using " +
      str(number_of_batches) + " batches, each carrying up to " +
      str(MAX_BATCH_SIZE) + " documents")
for i in range(number_of_batches):
    indexing_response = client.index_documents(engine_name, batched_records[i])
    print("...batch " + str(i + 1) + " with " +
          str(len(list(filter(None, batched_records[i])))) +
          " documents completed"),
    number_of_responses = len(indexing_response)
    errors_encountered = 0
    for j in range(number_of_responses):
        if len(indexing_response[j].get("errors")) != 0:
            errors_encountered += 1
    if errors_encountered == 0:
        print("with no errors")
    else:
        print("with " + str(errors_encountered) + " errors")
Example #5
0
    print('Total new room messages for this indexing run ' +
          str(len(new_messages)))

    if (config['index']):
        print('Indexing documents ...')
        tot = len(new_es_messages)
        r = tot % 100
        if (tot > 100):
            ''' Can only index 100 documents at a time '''
            step = 100
            r = tot % 100
            imax = tot // 100
            i = 1
            while i <= imax:
                client.index_documents(
                    engine_name,
                    new_es_messages[(i - 1) * step:(i * step) - 1])
                print('Indexed %s documents' % str(step))
                i = i + 1
            client.index_documents(
                engine_name, new_es_messages[imax * step:imax * step + r - 1])
            print('Indexed %s documents' % str(r))
        else:
            client.index_documents(engine_name, new_es_messages)
            print('Indexed %s documents' % str(tot))

    print('Saving messages to disk ...')
    with open(dest, 'w') as f:
        json.dump(room_messages, f, sort_keys=True, indent=1)
    with open(es_dest, 'w') as ff:
        json.dump(es_messages, ff, sort_keys=False, indent=1)