Example #1
0
    def setUp(self):
        self.engine_name = 'some-engine-name'
        self.client = Client('host_identifier', 'api_key')

        self.document_index_url = "{}/{}".format(
            self.client.session.base_url,
            "engines/{}/documents".format(self.engine_name))
Example #2
0
    def test_host_identifier_is_optional(self):
        client = Client('', 'api_key', 'localhost:3002/api/as/v1', False)
        query = 'query'

        with requests_mock.Mocker() as m:
            url = "http://localhost:3002/api/as/v1/engines/some-engine-name/search"
            m.register_uri('GET', url, json={}, status_code=200)
            client.search(self.engine_name, query, {})
Example #3
0
 def process_item(self, item, spider):
     client = Client(
         base_endpoint=os.getenv("ENDPOINT"),
         api_key=os.getenv("DATABASE_KEY"),
         use_https=False
     )
     engine_name = os.getenv("ENGINE_NAME")
     client.index_document(engine_name, dict(item))
     logging.info(f'Item sent to App Search: {item["title"]}')
     return item
Example #4
0
def app_search(ENDPOINT,PRIVATE_KEY):
    client = Client(
        base_endpoint=ENDPOINT,
        api_key=PRIVATE_KEY,
        use_https=True
    )
    rpt = client.list_engines(current=1, size=20)
    print("\nClient list engines")
    print_json(rpt)
    rpt = client.get_search_settings(engine_name='sunat-ruc')
    print("\nEngine Name:")
    print_json(rpt)
    return client
Example #5
0
def elastic_search(video_data):
    
    es = Client(
    base_endpoint='',
    api_key='',
    use_https=True
    )

    doc = {
        'fileid': video_data['fileid'],
        'filename': video_data['filename'],
        'transcript': video_data['transcript'],
        'creation_time_utc': video_data['creation_time_utc'],
        'duration_seconds': video_data['duration_seconds'],
        'words': video_data['words']
    }
    res = es.index_document("via-engine",doc)
    logging.info(res)

    return video_data
Example #6
0
def get_api_v1_client():
    """Return the app search client."""
    config = apps.get_app_config('django_elastic_appsearch')

    base_endpoint = config.api_v1_base_endpoint
    api_key = config.api_key
    use_https = config.use_https

    return Client(api_key=api_key,
                  base_endpoint=base_endpoint,
                  use_https=use_https)
Example #7
0
class site(Site):
    strict = True
    HEADER_LINKS = (
        Link(name='About', url='/about.html'),
        Link(name='Blog', url='/blog-0.html'),
        Link(name='Projects', url='/projects.html'),
        Link(name='Resume',
             url='/static/files/Jay_Miller_-_Software_Engineer.pdf'),
        Link(name='Newsletter', url='/subscribe'),
        Link(name='Contact', url="/contact"),
    )

    timezone = 'US/Pacific'
    SITE_TITLE = '(K) Jay Miller'
    SITE_URL = 'https://kjaymiller.com'
    AUTHOR = 'Jay Miller'
    HEADER_LINKS = HEADER_LINKS
    PODCASTS = [
        PodcastLink(
            name="Bob's Taverncast",
            url='https://bobstavern.pub',
            image="/bobstavern_256.jpg",
            feed=
            'https://feeds.transistor.fm/bobs-taverncast-a-hearthstone-battlegrounds-podcast',
        ),
        PodcastLink(
            name="The PIT Show",
            url='https://podcast.productivityintech.com',
            image="/pit-logo-v4.jpg",
            feed='https://feeds.transistor.fm/productivity-in-tech-podcast',
        ),
        PodcastLink(name="TekTok Podcast",
                    url='https://www.tekside.net/tektok',
                    image="/tektok_256.jpeg",
                    feed='http://tekside.net/tektok?format=rss'),
    ]
    search = elastic_app_search
    search_client = Client(
        use_https=True,
        base_endpoint=os.environ['APP_SEARCH_ENDPOINT'],
        api_key=os.environ['APP_SEARCH_API_KEY'],
    )
    search_params = {
        'engine': 'kjaymiller',
    }
Example #8
0
from elastic_app_search import Client
import json
client = Client(
    base_endpoint='34.87.101.217:3002/api/as/v1',
    api_key='private-15enfbz3zdf59jvchr94mu7k',
    use_https=False
)

engine_name = "sensitive-data-engine"

# load data from json file
with open("outfile.json", "r") as fp:
    documents = json.load(fp)


print (documents)

# update the appsearch
client.index_documents(engine_name, documents)

Example #9
0
class TestClient(TestCase):
    def setUp(self):
        self.engine_name = 'some-engine-name'
        self.client = Client('host_identifier', 'api_key')

        self.document_index_url = "{}/{}".format(
            self.client.session.base_url,
            "engines/{}/documents".format(self.engine_name))

    def test_deprecated_init_support_with_old_names(self):
        self.client = Client(account_host_key='host_identifier',
                             api_key='api_key')
        self.assertEqual(self.client.account_host_key, 'host_identifier')

    def test_deprecated_init_support_with_new_names(self):
        self.client = Client(host_identifier='host_identifier',
                             api_key='api_key')
        self.assertEqual(self.client.account_host_key, 'host_identifier')

    def test_deprecated_init_support_with_positional(self):
        self.client = Client('host_identifier', 'api_key', 'example.com',
                             False)
        self.assertEqual(self.client.account_host_key, 'host_identifier')

    def test_host_identifier_is_optional(self):
        client = Client('', 'api_key', 'localhost:3002/api/as/v1', False)
        query = 'query'

        with requests_mock.Mocker() as m:
            url = "http://localhost:3002/api/as/v1/engines/some-engine-name/search"
            m.register_uri('GET', url, json={}, status_code=200)
            client.search(self.engine_name, query, {})

    def test_index_document_processing_error(self):
        invalid_document = {'id': 'something', 'bad': {'no': 'nested'}}
        error = 'some processing error'
        stubbed_return = [{'id': 'something', 'errors': [error]}]
        with requests_mock.Mocker() as m:
            m.register_uri('POST',
                           self.document_index_url,
                           json=stubbed_return,
                           status_code=200)

            with self.assertRaises(InvalidDocument) as context:
                self.client.index_document(self.engine_name, invalid_document)
                self.assertEqual(str(context.exception), error)

    def test_index_document_no_error_key_in_response(self):
        document_without_id = {'body': 'some value'}
        stubbed_return = [{'id': 'auto generated', 'errors': []}]

        with requests_mock.Mocker() as m:
            m.register_uri('POST',
                           self.document_index_url,
                           json=stubbed_return,
                           status_code=200)
            response = self.client.index_document(self.engine_name,
                                                  document_without_id)
            self.assertEqual(response, {'id': 'auto generated'})

    def test_index_documents(self):
        id = 'INscMGmhmX4'
        valid_document = {'id': id}
        other_document = {'body': 'some value'}

        expected_return = [{
            'id': id,
            'errors': []
        }, {
            'id': 'some autogenerated id',
            'errors': []
        }]

        with requests_mock.Mocker() as m:
            m.register_uri('POST',
                           self.document_index_url,
                           json=expected_return,
                           status_code=200)
            response = self.client.index_documents(
                self.engine_name, [valid_document, other_document])
            self.assertEqual(response, expected_return)

    def test_update_documents(self):
        id = 'INscMGmhmX4'
        valid_document = {'id': id}
        other_document = {'body': 'some value'}

        expected_return = [{
            'id': id,
            'errors': []
        }, {
            'id': 'some autogenerated id',
            'errors': []
        }]

        with requests_mock.Mocker() as m:
            m.register_uri('PATCH',
                           self.document_index_url,
                           json=expected_return,
                           status_code=200)
            response = self.client.update_documents(
                self.engine_name, [valid_document, other_document])
            self.assertEqual(response, expected_return)

    def test_get_documents(self):
        id = 'INscMGmhmX4'
        expected_return = [{
            'id': id,
            'url': 'http://www.youtube.com/watch?v=v1uyQZNg2vE',
            'title': 'The Original Grumpy Cat',
            'body': 'this is a test'
        }]

        with requests_mock.Mocker() as m:
            m.register_uri('GET',
                           self.document_index_url,
                           json=expected_return,
                           status_code=200)
            response = self.client.get_documents(self.engine_name, [id])
            self.assertEqual(response, expected_return)

    def test_list_documents(self):
        expected_return = {
            'meta': {
                'page': {
                    'current': 1,
                    'total_results': 1,
                    'total_pages': 1,
                    'size': 20
                },
                'results': [{
                    'body': 'this is a test',
                    'id': '1'
                }, {
                    'body': 'this is also a test',
                    'id': '2'
                }]
            }
        }

        def match_request_text(request):
            data = json.loads(request.text)
            return data["page"]["current"] == 1 and data["page"]["size"] == 20

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/documents/list".format(
                self.client.session.base_url, self.engine_name)
            m.register_uri('GET',
                           url,
                           additional_matcher=match_request_text,
                           json=expected_return,
                           status_code=200)

            response = self.client.list_documents(self.engine_name)
            self.assertEqual(response, expected_return)

    def test_destroy_documents(self):
        id = 'INscMGmhmX4'
        expected_return = [{'id': id, 'result': True}]

        with requests_mock.Mocker() as m:
            m.register_uri('DELETE',
                           self.document_index_url,
                           json=expected_return,
                           status_code=200)
            response = self.client.destroy_documents(self.engine_name, [id])
            self.assertEqual(response, expected_return)

    def test_get_schema(self):
        expected_return = {'square_km': 'text'}

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/schema".format(self.client.session.base_url,
                                                self.engine_name)
            m.register_uri('GET', url, json=expected_return, status_code=200)

            response = self.client.get_schema(self.engine_name)
            self.assertEqual(response, expected_return)

    def test_update_schema(self):
        expected_return = {'square_mi': 'number', 'square_km': 'number'}

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/schema".format(self.client.session.base_url,
                                                self.engine_name)
            m.register_uri('POST', url, json=expected_return, status_code=200)

            response = self.client.update_schema(self.engine_name,
                                                 expected_return)
            self.assertEqual(response, expected_return)

    def test_list_engines(self):
        expected_return = [{'name': 'myawesomeengine'}]

        def match_request_text(request):
            data = json.loads(request.text)
            return data["page"]["current"] == 1 and data["page"]["size"] == 20

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('GET',
                           url,
                           additional_matcher=match_request_text,
                           json=expected_return,
                           status_code=200)
            response = self.client.list_engines()
            self.assertEqual(response, expected_return)

    def test_list_engines_with_paging(self):
        expected_return = [{'name': 'myawesomeengine'}]

        def match_request_text(request):
            data = json.loads(request.text)
            return data["page"]["current"] == 10 and data["page"]["size"] == 2

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('GET',
                           url,
                           additional_matcher=match_request_text,
                           json=expected_return,
                           status_code=200)
            response = self.client.list_engines(current=10, size=2)
            self.assertEqual(response, expected_return)

    def test_get_engine(self):
        engine_name = 'myawesomeengine'
        expected_return = [{'name': engine_name}]

        with requests_mock.Mocker() as m:
            url = "{}/{}/{}".format(self.client.session.base_url, 'engines',
                                    engine_name)
            m.register_uri('GET', url, json=expected_return, status_code=200)
            response = self.client.get_engine(engine_name)
            self.assertEqual(response, expected_return)

    def test_create_engine(self):
        engine_name = 'myawesomeengine'
        expected_return = {'name': engine_name, 'language': 'en'}

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('POST', url, json=expected_return, status_code=200)
            response = self.client.create_engine(engine_name=engine_name,
                                                 language='en')
            self.assertEqual(response, expected_return)

    def test_create_engine_with_options(self):
        engine_name = 'myawesomeengine'
        expected_return = {
            'name': engine_name,
            'type': 'meta',
            'source_engines': ['source-engine-1', 'source-engine-2']
        }

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('POST', url, json=expected_return, status_code=200)
            response = self.client.create_engine(
                engine_name=engine_name,
                options={
                    'type': 'meta',
                    'source_engines': ['source-engine-1', 'source-engine-2']
                })
            self.assertEqual(response, expected_return)

    def test_destroy_engine(self):
        engine_name = 'myawesomeengine'
        expected_return = {'deleted': True}

        with requests_mock.Mocker() as m:
            url = "{}/{}/{}".format(self.client.session.base_url, 'engines',
                                    engine_name)
            m.register_uri('DELETE',
                           url,
                           json=expected_return,
                           status_code=200)
            response = self.client.destroy_engine(engine_name)
            self.assertEqual(response, expected_return)

    def test_list_synonym_sets(self):
        expected_return = {
            'meta': {
                'page': {
                    'current': 1,
                    'total_pages': 1,
                    'total_results': 3,
                    'size': 20
                }
            },
            'results': [{
                'id': 'syn-5b11ac66c9f9292013220ad3',
                'synonyms': ['park', 'trail']
            }, {
                'id': 'syn-5b11ac72c9f9296b35220ac9',
                'synonyms': ['protected', 'heritage']
            }, {
                'id': 'syn-5b11ac66c9f9292013220ad3',
                'synonyms': ['hectares', 'acres']
            }]
        }

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms".format(self.client.session.base_url,
                                                  self.engine_name)

            def match_request_text(request):
                data = json.loads(request.text)
                return data["page"]["current"] == 1 and data["page"][
                    "size"] == 20

            m.register_uri('GET',
                           url,
                           additional_matcher=match_request_text,
                           json=expected_return,
                           status_code=200)

            response = self.client.list_synonym_sets(self.engine_name)

    def test_get_synonym_set(self):
        synonym_id = 'syn-5b11ac66c9f9292013220ad3'
        expected_return = {'id': synonym_id, 'synonyms': ['park', 'trail']}

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms/{}".format(
                self.client.session.base_url, self.engine_name, synonym_id)
            m.register_uri('GET', url, json=expected_return, status_code=200)

            response = self.client.get_synonym_set(self.engine_name,
                                                   synonym_id)
            self.assertEqual(response, expected_return)

    def test_create_synonym_set(self):
        synonym_set = ['park', 'trail']
        expected_return = {
            'id': 'syn-5b11ac72c9f9296b35220ac9',
            'synonyms': ['park', 'trail']
        }

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms".format(self.client.session.base_url,
                                                  self.engine_name)
            m.register_uri('POST', url, json=expected_return, status_code=200)

            response = self.client.create_synonym_set(self.engine_name,
                                                      synonym_set)
            self.assertEqual(response, expected_return)

    def test_update_synonym_set(self):
        synonym_id = 'syn-5b11ac72c9f9296b35220ac9'
        synonym_set = ['park', 'trail', 'ground']
        expected_return = {
            'id': synonym_id,
            'synonyms': ['park', 'trail', 'ground']
        }

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms/{}".format(
                self.client.session.base_url, self.engine_name, synonym_id)
            m.register_uri('PUT', url, json=expected_return, status_code=200)

            response = self.client.update_synonym_set(self.engine_name,
                                                      synonym_id, synonym_set)
            self.assertEqual(response, expected_return)

    def test_destroy_synonym_set(self):
        synonym_id = 'syn-5b11ac66c9f9292013220ad3'
        expected_return = {'deleted': True}

        with requests_mock.Mocker() as m:
            url = "{}/engines/{}/synonyms/{}".format(
                self.client.session.base_url, self.engine_name, synonym_id)
            m.register_uri('DELETE',
                           url,
                           json=expected_return,
                           status_code=200)

            response = self.client.destroy_synonym_set(self.engine_name,
                                                       synonym_id)
            self.assertEqual(response, expected_return)

    def test_search(self):
        query = 'query'
        expected_return = {'meta': {}, 'results': []}

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url,
                                 "engines/{}/search".format(self.engine_name))
            m.register_uri('GET', url, json=expected_return, status_code=200)
            response = self.client.search(self.engine_name, query, {})
            self.assertEqual(response, expected_return)

    def test_multi_search(self):
        expected_return = [{
            'meta': {},
            'results': []
        }, {
            'meta': {},
            'results': []
        }]

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(
                self.client.session.base_url,
                "engines/{}/multi_search".format(self.engine_name))
            m.register_uri('GET', url, json=expected_return, status_code=200)
            response = self.client.multi_search(self.engine_name, {})
            self.assertEqual(response, expected_return)

    def test_query_suggestion(self):
        query = 'query'
        expected_return = {'meta': {}, 'results': {}}

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(
                self.client.session.base_url,
                "engines/{}/query_suggestion".format(self.engine_name))
            m.register_uri('GET', url, json=expected_return, status_code=200)
            response = self.client.query_suggestion(self.engine_name, query,
                                                    {})
            self.assertEqual(response, expected_return)

    def test_click(self):
        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url,
                                 "engines/{}/click".format(self.engine_name))
            m.register_uri('POST', url, json={}, status_code=200)
            self.client.click(self.engine_name, {
                'query': 'cat',
                'document_id': 'INscMGmhmX4'
            })

    def test_create_meta_engine(self):
        source_engines = ['source-engine-1', 'source-engine-2']
        expected_return = {
            'source_engines': source_engines,
            'type': 'meta',
            'name': self.engine_name
        }

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(self.client.session.base_url, 'engines')
            m.register_uri('POST', url, json=expected_return, status_code=200)
            response = self.client.create_meta_engine(self.engine_name,
                                                      source_engines)
            self.assertEqual(response, expected_return)

    def test_add_meta_engine_sources(self):
        target_source_engine_name = 'source-engine-3'
        expected_return = {
            'source_engines':
            ['source-engine-1', 'source-engine-2', target_source_engine_name],
            'type':
            'meta',
            'name':
            self.engine_name
        }

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(
                self.client.session.base_url,
                "engines/{}/source_engines".format(self.engine_name))
            m.register_uri('POST', url, json=expected_return, status_code=200)
            response = self.client.add_meta_engine_sources(
                self.engine_name, [target_source_engine_name])
            self.assertEqual(response, expected_return)

    def test_delete_meta_engine_sources(self):
        source_engine_name = 'source-engine-3'
        expected_return = {
            'source_engines': ['source-engine-1', 'source-engine-2'],
            'type': 'meta',
            'name': self.engine_name
        }

        with requests_mock.Mocker() as m:
            url = "{}/{}".format(
                self.client.session.base_url,
                "engines/{}/source_engines".format(self.engine_name))
            m.register_uri('DELETE',
                           url,
                           json=expected_return,
                           status_code=200)
            response = self.client.delete_meta_engine_sources(
                self.engine_name, [source_engine_name])
            self.assertEqual(response, expected_return)
Example #10
0
from elastic_app_search import Client

import app.constants as constants

client = Client()
pagesize = 10


def init():
    global client
    client = Client(base_endpoint=constants.search_endpoint,
                    api_key=constants.search_api_key,
                    use_https=False)


def search(query):
    a = client.search(constants.search_engine_name, query,
                      {"page": {
                          "size": pagesize,
                          "current": 1
                      }})
    return a


def get_by_id(id):
    a = client.get_documents(constants.search_engine_name, [id])
    return a
Example #11
0
 def test_deprecated_init_support_with_new_names(self):
     self.client = Client(host_identifier='host_identifier',
                          api_key='api_key')
     self.assertEqual(self.client.account_host_key, 'host_identifier')
Example #12
0

def batching_function(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return zip_longest(*args, fillvalue=fillvalue)


# Configuration
MAX_BATCH_SIZE = 100
host_identifier = 'localhost:3002/api/as/v1'
api_key = 'private-key'
engine_name = 'flask-app-search'
file_name = 'movies.json'

client = Client(api_key=api_key,
                base_endpoint=host_identifier,
                use_https=False)

f = open(file_name, "r")
document = f.read()
records = loads(document)
batched_records = list(batching_function(records, MAX_BATCH_SIZE))
number_of_batches = len(batched_records)
print("Indexing " + str(len(records)) + " records using " +
      str(number_of_batches) + " batches, each carrying up to " +
      str(MAX_BATCH_SIZE) + " documents")
for i in range(number_of_batches):
    indexing_response = client.index_documents(engine_name, batched_records[i])
    print("...batch " + str(i + 1) + " with " +
          str(len(list(filter(None, batched_records[i])))) +
          " documents completed"),
def grouper(n, iterable):
    it = iter(iterable)
    while True:
        chunk = tuple(itertools.islice(it, n))
        if not chunk:
            return
        yield chunk


client = pymongo.MongoClient(os.getenv("COVID_HOST"),
                             username=os.getenv("COVID_USER"),
                             password=os.getenv("COVID_PASS"),
                             authSource=os.getenv("COVID_DB"))
db = client[os.getenv("COVID_DB")]

doc_post_url = os.getenv(
    "APPSEARCH_API_ENDPOINT") + "/api/as/v1/engines/entries/documents"

elastic_app_client = Client(base_endpoint='{}/api/as/v1'.format(
    os.getenv("APPSEARCH_API_ENDPOINT")),
                            api_key=os.getenv("APPSEARCH_API_KEY"),
                            use_https=False)

for docs in grouper(
        100, db.entries_searchable.find({"category_ML": {
            "$exists": False
        }})):
    for doc in docs:
        doc['id'] = str(doc['_id'])
        del (doc['_id'])
    pprint(elastic_app_client.index_documents("entries", docs))
Example #14
0
from flask import Flask
from config import Config
from elastic_app_search import Client

app = Flask(__name__)
app.config.from_object(Config)


def str_to_bool(s):
    if s == 'True':
        return True
    elif s == 'False':
        return False
    else:
        raise ValueError("Cannot covert {} to a bool".format(s))


bool_value = str_to_bool(app.config['APP_SEARCH_USE_HTTPS'])

client_app_search = Client(
    api_key=app.config['APP_SEARCH_API_KEY'],
    base_endpoint=app.config['APP_SEARCH_BASE_ENDPOINT'],
    use_https=bool_value)

client_blog_search = Client(
    api_key=app.config['APP_SEARCH_API_KEY_BLOG'],
    base_endpoint=app.config['APP_SEARCH_BASE_ENDPOINT'],
    use_https=bool_value)

from app import routes
Example #15
0
def init():
    global client
    client = Client(base_endpoint=constants.search_endpoint,
                    api_key=constants.search_api_key,
                    use_https=False)
Example #16
0
 def test_deprecated_init_support_with_positional(self):
     self.client = Client('host_identifier', 'api_key', 'example.com',
                          False)
     self.assertEqual(self.client.account_host_key, 'host_identifier')
Example #17
0
import sys
import time
import uuid
import requests
import urllib.parse
import requests_cache
from git import Repo
from elastic_app_search import Client
from datetime import datetime, timezone, timedelta
from dateutil.parser import parse

with open(r'config.yml') as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

client = Client(base_endpoint=config['base_endpoint'] + '/api/as/v1',
                api_key=config['api_key'],
                use_https=False)

engine_name = config['index_name']
archive_dir = config['archivedir']
archive_name = os.path.join(archive_dir, 'archive')

requests_cache.install_cache(archive_dir + '/gitter_indexer')

with open('token') as f:
    token = f.read().strip()

h = {'Authorization': 'Bearer %s' % token}


def utcnow():