Ejemplo n.º 1
0
def _set_up_index(service_name, endpoint, api_key, schema, index_batch):
    from azure.core.credentials import AzureKeyCredential
    from azure.search.documents import SearchClient
    from azure.search.documents._generated.models import IndexBatch

    schema = _load_schema(schema)
    index_batch = _load_batch(index_batch)
    if schema:
        index_name = json.loads(schema)["name"]
        response = requests.post(
            SERVICE_URL_FMT.format(service_name, SEARCH_ENDPOINT_SUFFIX),
            headers={"Content-Type": "application/json", "api-key": api_key},
            data=schema,
        )
        if response.status_code != 201:
            raise AzureTestError(
                "Could not create a search index {}".format(response.status_code)
            )
        
    # optionally load data into the index
    if index_batch and schema:
        batch = IndexBatch.deserialize(index_batch)
        index_client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key))
        results = index_client.index_documents(batch)
        if not all(result.succeeded for result in results):
            raise AzureTestError("Document upload to search index failed")

        # Indexing is asynchronous, so if you get a 200 from the REST API, that only means that the documents are
        # persisted, not that they're searchable yet. The only way to check for searchability is to run queries,
        # and even then things are eventually consistent due to replication. In the Track 1 SDK tests, we "solved"
        # this by using a constant delay between indexing and querying.
        import time
        time.sleep(TIME_TO_SLEEP)
Ejemplo n.º 2
0
 def test_get_document_count(self, mock_count):
     client = SearchClient("endpoint", "index name", CREDENTIAL)
     client.get_document_count()
     assert mock_count.called
     assert mock_count.call_args[0] == ()
     assert len(mock_count.call_args[1]) == 1
     assert mock_count.call_args[1]["headers"] == client._headers
Ejemplo n.º 3
0
def search_query(search_text, filter_by=None, sort_order=None):
    try:

        # Create a search client
        azure_credential = AzureKeyCredential(search_key)
        search_client = SearchClient(search_endpoint, search_index,
                                     azure_credential)

        # Submit search query
        results = search_client.search(
            search_text,
            search_mode="all",
            include_total_count=True,
            filter=filter_by,
            order_by=sort_order,
            facets=['metadata_author'],
            highlight_fields='merged_content-3,imageCaption-3',
            select=
            "url,metadata_storage_name,metadata_author,metadata_storage_size,"
            "metadata_storage_last_modified,language,sentiment,merged_content,"
            "keyphrases,locations,imageTags,imageCaption")
        return results

    except Exception as ex:
        raise ex
Ejemplo n.º 4
0
def search(rootdir, Query, index_name, endpoint, key, fnames, vertical):
    field_names = ['FileName', 'FilePath', 'Score']
    # Create a client
    credential = AzureKeyCredential(key)
    client = SearchClient(endpoint=endpoint,
                          index_name=index_name,
                          credential=credential)
    results = client.search(search_text=Query)
    df = pd.read_csv('TrackerFiles/{}.csv'.format(vertical))
    r = []

    for result in results:
        if result['FileName'] in fnames:
            d = {
                'FileName': result['FileName'],
                'FilePath': result['FilePath'],
                'Score': result['@search.score']
            }
            r.append(d)
            if df.loc[df['FileName'] == str(result['FileName']),
                      'Intent'].isnull().any():
                df.loc[df['FileName'] == str(result['FileName']),
                       'Intent'] = str(Query)
            else:
                df.loc[df['FileName'] == str(result['FileName']),
                       'Intent'] = df.loc[df['FileName'] ==
                                          str(result['FileName']),
                                          'Intent'] + "," + str(Query)
    df.to_csv('TrackerFiles/{}.csv'.format(vertical), index=False)
    with open(rootdir + 'result.csv', 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=field_names)
        writer.writeheader()
        writer.writerows(r)

    return rootdir + 'result.csv'
Ejemplo n.º 5
0
    def test_merge_or_upload_documents(self, api_key, endpoint, index_name,
                                       **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))
        batch_client = SearchIndexingBufferedSender(
            endpoint, index_name, AzureKeyCredential(api_key))
        batch_client._batch_size = 2
        batch_client.merge_or_upload_documents([{
            "hotelId": "1000",
            "rating": 1
        }, {
            "hotelId": "4",
            "rating": 2
        }])
        batch_client.close()

        # There can be some lag before a document is searchable
        if self.is_live:
            time.sleep(TIME_TO_SLEEP)

        assert client.get_document_count() == 11

        result = client.get_document(key="1000")
        assert result["rating"] == 1

        result = client.get_document(key="4")
        assert result["rating"] == 2
Ejemplo n.º 6
0
    def test_index_documents(self, mock_index):
        client = SearchClient("endpoint", "index name", CREDENTIAL)

        batch = IndexDocumentsBatch()
        actions = batch.add_upload_actions("upload1")
        assert len(actions) == 1
        for x in actions:
            assert x.action_type == "upload"
        actions = batch.add_delete_actions("delete1", "delete2")
        assert len(actions) == 2
        for x in actions:
            assert x.action_type == "delete"
        actions = batch.add_merge_actions(["merge1", "merge2", "merge3"])
        for x in actions:
            assert x.action_type == "merge"
        actions = batch.add_merge_or_upload_actions("merge_or_upload1")
        for x in actions:
            assert x.action_type == "mergeOrUpload"

        client.index_documents(batch, extra="foo")
        assert mock_index.called
        assert mock_index.call_args[0] == ()
        assert len(mock_index.call_args[1]) == 4
        assert mock_index.call_args[1]["headers"] == client._headers
        assert mock_index.call_args[1]["extra"] == "foo"
Ejemplo n.º 7
0
 def test_suggest_bad_argument(self):
     client = SearchClient("endpoint", "index name", CREDENTIAL)
     with pytest.raises(TypeError) as e:
         client.suggest("bad_query")
         assert str(
             e) == "Expected a SuggestQuery for 'query', but got {}".format(
                 repr("bad_query"))
Ejemplo n.º 8
0
    def test_upload_documents_new(self, api_key, endpoint, index_name,
                                  **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))
        DOCUMENTS = [
            {
                "hotelId": "1000",
                "rating": 5,
                "rooms": [],
                "hotelName": "Azure Inn"
            },
            {
                "hotelId": "1001",
                "rating": 4,
                "rooms": [],
                "hotelName": "Redmond Hotel"
            },
        ]
        results = client.upload_documents(DOCUMENTS)
        assert len(results) == 2
        assert set(x.status_code for x in results) == {201}

        # There can be some lag before a document is searchable
        if self.is_live:
            time.sleep(TIME_TO_SLEEP)

        assert client.get_document_count() == 12
        for doc in DOCUMENTS:
            result = client.get_document(key=doc["hotelId"])
            assert result["hotelId"] == doc["hotelId"]
            assert result["hotelName"] == doc["hotelName"]
            assert result["rating"] == doc["rating"]
            assert result["rooms"] == doc["rooms"]
 def test_autocomplete(self, api_key, endpoint, index_name, **kwargs):
     client = SearchClient(
         endpoint, index_name, AzureKeyCredential(api_key)
     )
     query = AutocompleteQuery(search_text="mot", suggester_name="sg")
     results = client.autocomplete(query=query)
     assert results == [{"text": "motel", "query_plus_text": "motel"}]
Ejemplo n.º 10
0
    def test_upload_documents_existing(self, api_key, endpoint, index_name,
                                       **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))
        batch_client = SearchIndexingBufferedSender(
            endpoint, index_name, AzureKeyCredential(api_key))
        batch_client._batch_size = 2
        DOCUMENTS = [
            {
                "hotelId": "1000",
                "rating": 5,
                "rooms": [],
                "hotelName": "Azure Inn"
            },
            {
                "hotelId": "3",
                "rating": 4,
                "rooms": [],
                "hotelName": "Redmond Hotel"
            },
        ]
        batch_client.upload_documents(DOCUMENTS)

        # There can be some lag before a document is searchable
        if self.is_live:
            time.sleep(TIME_TO_SLEEP)

        assert client.get_document_count() == 11
        batch_client.close()
    def test_upload_documents_new(self, api_key, endpoint, index_name,
                                  **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))
        batch_client = SearchIndexDocumentBatchingClient(
            endpoint, index_name, AzureKeyCredential(api_key))
        batch_client._batch_size = 2
        DOCUMENTS = [
            {
                "hotelId": "1000",
                "rating": 5,
                "rooms": [],
                "hotelName": "Azure Inn"
            },
            {
                "hotelId": "1001",
                "rating": 4,
                "rooms": [],
                "hotelName": "Redmond Hotel"
            },
        ]
        batch_client.add_upload_actions(DOCUMENTS)

        # There can be some lag before a document is searchable
        if self.is_live:
            time.sleep(TIME_TO_SLEEP)

        assert client.get_document_count() == 12
        for doc in DOCUMENTS:
            result = client.get_document(key=doc["hotelId"])
            assert result["hotelId"] == doc["hotelId"]
            assert result["hotelName"] == doc["hotelName"]
            assert result["rating"] == doc["rating"]
            assert result["rooms"] == doc["rooms"]
        batch_client.close()
    def test_merge_documents_existing(self, api_key, endpoint, index_name,
                                      **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))
        batch_client = SearchIndexDocumentBatchingClient(
            endpoint, index_name, AzureKeyCredential(api_key))
        batch_client._batch_size = 2
        batch_client.add_merge_actions([{
            "hotelId": "3",
            "rating": 1
        }, {
            "hotelId": "4",
            "rating": 2
        }])
        batch_client.close()

        # There can be some lag before a document is searchable
        if self.is_live:
            time.sleep(TIME_TO_SLEEP)

        assert client.get_document_count() == 10

        result = client.get_document(key="3")
        assert result["rating"] == 1

        result = client.get_document(key="4")
        assert result["rating"] == 2
Ejemplo n.º 13
0
 def test_suggest_query_argument(self, mock_suggest_post):
     client = SearchClient("endpoint", "index name", CREDENTIAL)
     result = client.suggest(search_text="search text", suggester_name="sg")
     assert mock_suggest_post.called
     assert mock_suggest_post.call_args[0] == ()
     assert mock_suggest_post.call_args[1]["headers"] == client._headers
     assert (mock_suggest_post.call_args[1]["suggest_request"].search_text
             == "search text")
Ejemplo n.º 14
0
    def test_get_search_simple(self, api_key, endpoint, index_name, **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))
        results = list(client.search(search_text="hotel"))
        assert len(results) == 7

        results = list(client.search(search_text="motel"))
        assert len(results) == 2
Ejemplo n.º 15
0
    def test_get_search_facets_none(self, api_key, endpoint, index_name,
                                    **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))

        select = ("hotelName", "category", "description")
        results = client.search(search_text="WiFi", select=",".join(select))
        assert results.get_facets() is None
 def test_get_document(self, endpoint, api_key, index_name, index_batch):
     client = SearchClient(endpoint, index_name, api_key)
     for hotel_id in range(1, 11):
         result = client.get_document(key=str(hotel_id))
         expected = index_batch["value"][hotel_id - 1]
         assert result.get("hotelId") == expected.get("hotelId")
         assert result.get("hotelName") == expected.get("hotelName")
         assert result.get("description") == expected.get("description")
Ejemplo n.º 17
0
 def test_get_document(self, api_key, endpoint, index_name, **kwargs):
     client = SearchClient(endpoint, index_name,
                           AzureKeyCredential(api_key))
     for hotel_id in range(1, 11):
         result = client.get_document(key=str(hotel_id))
         expected = BATCH["value"][hotel_id - 1]
         assert result.get("hotelId") == expected.get("hotelId")
         assert result.get("hotelName") == expected.get("hotelName")
         assert result.get("description") == expected.get("description")
Ejemplo n.º 18
0
    def test_get_search_counts(self, api_key, endpoint, index_name, **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))

        results = client.search(search_text="hotel")
        assert results.get_count() is None

        results = client.search(search_text="hotel", include_total_count=True)
        assert results.get_count() == 7
 def test_suggest(self, api_key, endpoint, index_name, **kwargs):
     client = SearchClient(
         endpoint, index_name, AzureKeyCredential(api_key)
     )
     results = client.suggest(search_text="mot", suggester_name="sg")
     assert results == [
         {"hotelId": "2", "text": "Cheapest hotel in town. Infact, a motel."},
         {"hotelId": "9", "text": "Secret Point Motel"},
     ]
Ejemplo n.º 20
0
 def test_get_document_count_v2020_06_30(self, mock_count):
     client = SearchClient("endpoint",
                           "index name",
                           CREDENTIAL,
                           api_version=ApiVersion.V2020_06_30)
     client.get_document_count()
     assert mock_count.called
     assert mock_count.call_args[0] == ()
     assert len(mock_count.call_args[1]) == 1
     assert mock_count.call_args[1]["headers"] == client._headers
Ejemplo n.º 21
0
 def test_request_too_large_error(self):
     with mock.patch.object(
             SearchClient,
             "_index_documents_actions",
             side_effect=RequestEntityTooLargeError("Error")):
         client = SearchClient("endpoint", "index name", CREDENTIAL)
         batch = IndexDocumentsBatch()
         batch.add_upload_actions("upload1")
         with pytest.raises(RequestEntityTooLargeError):
             client.index_documents(batch, extra="foo")
 def test_upload_documents_existing(self, api_key, endpoint, index_name, **kwargs):
     client = SearchClient(
         endpoint, index_name, AzureKeyCredential(api_key)
     )
     DOCUMENTS = [
         {"hotelId": "1000", "rating": 5, "rooms": [], "hotelName": "Azure Inn"},
         {"hotelId": "3", "rating": 4, "rooms": [], "hotelName": "Redmond Hotel"},
     ]
     results = client.upload_documents(DOCUMENTS)
     assert len(results) == 2
     assert set(x.status_code for x in results) == {200, 201}
Ejemplo n.º 23
0
 def test_headers_merge(self):
     credential = AzureKeyCredential(key="test_api_key")
     client = SearchClient("endpoint", "index name", credential)
     orig = {"foo": "bar"}
     result = client._merge_client_headers(orig)
     assert result is not orig
     assert result == {
         "api-key": "test_api_key",
         "Accept": "application/json;odata.metadata=none",
         "foo": "bar",
     }
Ejemplo n.º 24
0
    def test_get_search_coverage(self, api_key, endpoint, index_name,
                                 **kwargs):
        client = SearchClient(endpoint, index_name,
                              AzureKeyCredential(api_key))

        results = client.search(search_text="hotel")
        assert results.get_coverage() is None

        results = client.search(search_text="hotel", minimum_coverage=50.0)
        cov = results.get_coverage()
        assert isinstance(cov, float)
        assert cov >= 50.0
def get_document():
    # [START get_document]
    from azure.core.credentials import AzureKeyCredential
    from azure.search.documents import SearchClient

    search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))

    result = search_client.get_document(key="23")

    print("Details for hotel '23' are:")
    print("        Name: {}".format(result["HotelName"]))
    print("      Rating: {}".format(result["Rating"]))
    print("    Category: {}".format(result["Category"]))
Ejemplo n.º 26
0
def autocomplete_query():
    # [START autocomplete_query]
    from azure.core.credentials import AzureKeyCredential
    from azure.search.documents import SearchClient

    search_client = SearchClient(service_endpoint, index_name,
                                 AzureKeyCredential(key))

    results = search_client.autocomplete(search_text="bo", suggester_name="sg")

    print("Autocomplete suggestions for 'bo'")
    for result in results:
        print("    Completion: {}".format(result["text"]))
Ejemplo n.º 27
0
def suggest_query():
    # [START suggest_query]
    from azure.core.credentials import AzureKeyCredential
    from azure.search.documents import SearchClient

    search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key))

    results = search_client.suggest(search_text="coffee", suggester_name="sg")

    print("Search suggestions for 'coffee'")
    for result in results:
        hotel = search_client.get_document(key=result["HotelId"])
        print("    Text: {} for Hotel: {}".format(repr(result["text"]), hotel["HotelName"]))
Ejemplo n.º 28
0
def simple_text_query():
    # [START simple_query]
    from azure.core.credentials import AzureKeyCredential
    from azure.search.documents import SearchClient

    search_client = SearchClient(service_endpoint, index_name,
                                 AzureKeyCredential(key))

    results = search_client.search(query="spa")

    print("Hotels containing 'spa' in the name (or other fields):")
    for result in results:
        print("    Name: {} (rating {})".format(result["HotelName"],
                                                result["Rating"]))
Ejemplo n.º 29
0
 def test_autocomplete_query_argument_v2020_06_30(self,
                                                  mock_autocomplete_post):
     client = SearchClient("endpoint",
                           "index name",
                           CREDENTIAL,
                           api_version=ApiVersion.V2020_06_30)
     result = client.autocomplete(search_text="search text",
                                  suggester_name="sg")
     assert mock_autocomplete_post.called
     assert mock_autocomplete_post.call_args[0] == ()
     assert mock_autocomplete_post.call_args[1][
         "headers"] == client._headers
     assert (mock_autocomplete_post.call_args[1]
             ["autocomplete_request"].search_text == "search text")
Ejemplo n.º 30
0
 def test_get_count_reset_continuation_token(self, mock_search_post):
     client = SearchClient("endpoint", "index name", CREDENTIAL)
     result = client.search(search_text="search text")
     assert isinstance(result, ItemPaged)
     assert result._page_iterator_class is SearchPageIterator
     search_result = SearchDocumentsResult()
     search_result.results = [
         SearchResult(additional_properties={"key": "val"})
     ]
     mock_search_post.return_value = search_result
     result.__next__()
     result._first_page_iterator_instance.continuation_token = "fake token"
     result.get_count()
     assert not result._first_page_iterator_instance.continuation_token