def _set_up_index(service_name, endpoint, api_key, schema, index_batch): from azure.core.credentials import AzureKeyCredential from azure.search.documents import SearchClient from azure.search.documents._generated.models import IndexBatch schema = _load_schema(schema) index_batch = _load_batch(index_batch) if schema: index_name = json.loads(schema)["name"] response = requests.post( SERVICE_URL_FMT.format(service_name, SEARCH_ENDPOINT_SUFFIX), headers={"Content-Type": "application/json", "api-key": api_key}, data=schema, ) if response.status_code != 201: raise AzureTestError( "Could not create a search index {}".format(response.status_code) ) # optionally load data into the index if index_batch and schema: batch = IndexBatch.deserialize(index_batch) index_client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) results = index_client.index_documents(batch) if not all(result.succeeded for result in results): raise AzureTestError("Document upload to search index failed") # Indexing is asynchronous, so if you get a 200 from the REST API, that only means that the documents are # persisted, not that they're searchable yet. The only way to check for searchability is to run queries, # and even then things are eventually consistent due to replication. In the Track 1 SDK tests, we "solved" # this by using a constant delay between indexing and querying. import time time.sleep(TIME_TO_SLEEP)
def test_get_document_count(self, mock_count): client = SearchClient("endpoint", "index name", CREDENTIAL) client.get_document_count() assert mock_count.called assert mock_count.call_args[0] == () assert len(mock_count.call_args[1]) == 1 assert mock_count.call_args[1]["headers"] == client._headers
def search_query(search_text, filter_by=None, sort_order=None): try: # Create a search client azure_credential = AzureKeyCredential(search_key) search_client = SearchClient(search_endpoint, search_index, azure_credential) # Submit search query results = search_client.search( search_text, search_mode="all", include_total_count=True, filter=filter_by, order_by=sort_order, facets=['metadata_author'], highlight_fields='merged_content-3,imageCaption-3', select= "url,metadata_storage_name,metadata_author,metadata_storage_size," "metadata_storage_last_modified,language,sentiment,merged_content," "keyphrases,locations,imageTags,imageCaption") return results except Exception as ex: raise ex
def search(rootdir, Query, index_name, endpoint, key, fnames, vertical): field_names = ['FileName', 'FilePath', 'Score'] # Create a client credential = AzureKeyCredential(key) client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential) results = client.search(search_text=Query) df = pd.read_csv('TrackerFiles/{}.csv'.format(vertical)) r = [] for result in results: if result['FileName'] in fnames: d = { 'FileName': result['FileName'], 'FilePath': result['FilePath'], 'Score': result['@search.score'] } r.append(d) if df.loc[df['FileName'] == str(result['FileName']), 'Intent'].isnull().any(): df.loc[df['FileName'] == str(result['FileName']), 'Intent'] = str(Query) else: df.loc[df['FileName'] == str(result['FileName']), 'Intent'] = df.loc[df['FileName'] == str(result['FileName']), 'Intent'] + "," + str(Query) df.to_csv('TrackerFiles/{}.csv'.format(vertical), index=False) with open(rootdir + 'result.csv', 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=field_names) writer.writeheader() writer.writerows(r) return rootdir + 'result.csv'
def test_merge_or_upload_documents(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) batch_client = SearchIndexingBufferedSender( endpoint, index_name, AzureKeyCredential(api_key)) batch_client._batch_size = 2 batch_client.merge_or_upload_documents([{ "hotelId": "1000", "rating": 1 }, { "hotelId": "4", "rating": 2 }]) batch_client.close() # There can be some lag before a document is searchable if self.is_live: time.sleep(TIME_TO_SLEEP) assert client.get_document_count() == 11 result = client.get_document(key="1000") assert result["rating"] == 1 result = client.get_document(key="4") assert result["rating"] == 2
def test_index_documents(self, mock_index): client = SearchClient("endpoint", "index name", CREDENTIAL) batch = IndexDocumentsBatch() actions = batch.add_upload_actions("upload1") assert len(actions) == 1 for x in actions: assert x.action_type == "upload" actions = batch.add_delete_actions("delete1", "delete2") assert len(actions) == 2 for x in actions: assert x.action_type == "delete" actions = batch.add_merge_actions(["merge1", "merge2", "merge3"]) for x in actions: assert x.action_type == "merge" actions = batch.add_merge_or_upload_actions("merge_or_upload1") for x in actions: assert x.action_type == "mergeOrUpload" client.index_documents(batch, extra="foo") assert mock_index.called assert mock_index.call_args[0] == () assert len(mock_index.call_args[1]) == 4 assert mock_index.call_args[1]["headers"] == client._headers assert mock_index.call_args[1]["extra"] == "foo"
def test_suggest_bad_argument(self): client = SearchClient("endpoint", "index name", CREDENTIAL) with pytest.raises(TypeError) as e: client.suggest("bad_query") assert str( e) == "Expected a SuggestQuery for 'query', but got {}".format( repr("bad_query"))
def test_upload_documents_new(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) DOCUMENTS = [ { "hotelId": "1000", "rating": 5, "rooms": [], "hotelName": "Azure Inn" }, { "hotelId": "1001", "rating": 4, "rooms": [], "hotelName": "Redmond Hotel" }, ] results = client.upload_documents(DOCUMENTS) assert len(results) == 2 assert set(x.status_code for x in results) == {201} # There can be some lag before a document is searchable if self.is_live: time.sleep(TIME_TO_SLEEP) assert client.get_document_count() == 12 for doc in DOCUMENTS: result = client.get_document(key=doc["hotelId"]) assert result["hotelId"] == doc["hotelId"] assert result["hotelName"] == doc["hotelName"] assert result["rating"] == doc["rating"] assert result["rooms"] == doc["rooms"]
def test_autocomplete(self, api_key, endpoint, index_name, **kwargs): client = SearchClient( endpoint, index_name, AzureKeyCredential(api_key) ) query = AutocompleteQuery(search_text="mot", suggester_name="sg") results = client.autocomplete(query=query) assert results == [{"text": "motel", "query_plus_text": "motel"}]
def test_upload_documents_existing(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) batch_client = SearchIndexingBufferedSender( endpoint, index_name, AzureKeyCredential(api_key)) batch_client._batch_size = 2 DOCUMENTS = [ { "hotelId": "1000", "rating": 5, "rooms": [], "hotelName": "Azure Inn" }, { "hotelId": "3", "rating": 4, "rooms": [], "hotelName": "Redmond Hotel" }, ] batch_client.upload_documents(DOCUMENTS) # There can be some lag before a document is searchable if self.is_live: time.sleep(TIME_TO_SLEEP) assert client.get_document_count() == 11 batch_client.close()
def test_upload_documents_new(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) batch_client = SearchIndexDocumentBatchingClient( endpoint, index_name, AzureKeyCredential(api_key)) batch_client._batch_size = 2 DOCUMENTS = [ { "hotelId": "1000", "rating": 5, "rooms": [], "hotelName": "Azure Inn" }, { "hotelId": "1001", "rating": 4, "rooms": [], "hotelName": "Redmond Hotel" }, ] batch_client.add_upload_actions(DOCUMENTS) # There can be some lag before a document is searchable if self.is_live: time.sleep(TIME_TO_SLEEP) assert client.get_document_count() == 12 for doc in DOCUMENTS: result = client.get_document(key=doc["hotelId"]) assert result["hotelId"] == doc["hotelId"] assert result["hotelName"] == doc["hotelName"] assert result["rating"] == doc["rating"] assert result["rooms"] == doc["rooms"] batch_client.close()
def test_merge_documents_existing(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) batch_client = SearchIndexDocumentBatchingClient( endpoint, index_name, AzureKeyCredential(api_key)) batch_client._batch_size = 2 batch_client.add_merge_actions([{ "hotelId": "3", "rating": 1 }, { "hotelId": "4", "rating": 2 }]) batch_client.close() # There can be some lag before a document is searchable if self.is_live: time.sleep(TIME_TO_SLEEP) assert client.get_document_count() == 10 result = client.get_document(key="3") assert result["rating"] == 1 result = client.get_document(key="4") assert result["rating"] == 2
def test_suggest_query_argument(self, mock_suggest_post): client = SearchClient("endpoint", "index name", CREDENTIAL) result = client.suggest(search_text="search text", suggester_name="sg") assert mock_suggest_post.called assert mock_suggest_post.call_args[0] == () assert mock_suggest_post.call_args[1]["headers"] == client._headers assert (mock_suggest_post.call_args[1]["suggest_request"].search_text == "search text")
def test_get_search_simple(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) results = list(client.search(search_text="hotel")) assert len(results) == 7 results = list(client.search(search_text="motel")) assert len(results) == 2
def test_get_search_facets_none(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) select = ("hotelName", "category", "description") results = client.search(search_text="WiFi", select=",".join(select)) assert results.get_facets() is None
def test_get_document(self, endpoint, api_key, index_name, index_batch): client = SearchClient(endpoint, index_name, api_key) for hotel_id in range(1, 11): result = client.get_document(key=str(hotel_id)) expected = index_batch["value"][hotel_id - 1] assert result.get("hotelId") == expected.get("hotelId") assert result.get("hotelName") == expected.get("hotelName") assert result.get("description") == expected.get("description")
def test_get_document(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) for hotel_id in range(1, 11): result = client.get_document(key=str(hotel_id)) expected = BATCH["value"][hotel_id - 1] assert result.get("hotelId") == expected.get("hotelId") assert result.get("hotelName") == expected.get("hotelName") assert result.get("description") == expected.get("description")
def test_get_search_counts(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) results = client.search(search_text="hotel") assert results.get_count() is None results = client.search(search_text="hotel", include_total_count=True) assert results.get_count() == 7
def test_suggest(self, api_key, endpoint, index_name, **kwargs): client = SearchClient( endpoint, index_name, AzureKeyCredential(api_key) ) results = client.suggest(search_text="mot", suggester_name="sg") assert results == [ {"hotelId": "2", "text": "Cheapest hotel in town. Infact, a motel."}, {"hotelId": "9", "text": "Secret Point Motel"}, ]
def test_get_document_count_v2020_06_30(self, mock_count): client = SearchClient("endpoint", "index name", CREDENTIAL, api_version=ApiVersion.V2020_06_30) client.get_document_count() assert mock_count.called assert mock_count.call_args[0] == () assert len(mock_count.call_args[1]) == 1 assert mock_count.call_args[1]["headers"] == client._headers
def test_request_too_large_error(self): with mock.patch.object( SearchClient, "_index_documents_actions", side_effect=RequestEntityTooLargeError("Error")): client = SearchClient("endpoint", "index name", CREDENTIAL) batch = IndexDocumentsBatch() batch.add_upload_actions("upload1") with pytest.raises(RequestEntityTooLargeError): client.index_documents(batch, extra="foo")
def test_upload_documents_existing(self, api_key, endpoint, index_name, **kwargs): client = SearchClient( endpoint, index_name, AzureKeyCredential(api_key) ) DOCUMENTS = [ {"hotelId": "1000", "rating": 5, "rooms": [], "hotelName": "Azure Inn"}, {"hotelId": "3", "rating": 4, "rooms": [], "hotelName": "Redmond Hotel"}, ] results = client.upload_documents(DOCUMENTS) assert len(results) == 2 assert set(x.status_code for x in results) == {200, 201}
def test_headers_merge(self): credential = AzureKeyCredential(key="test_api_key") client = SearchClient("endpoint", "index name", credential) orig = {"foo": "bar"} result = client._merge_client_headers(orig) assert result is not orig assert result == { "api-key": "test_api_key", "Accept": "application/json;odata.metadata=none", "foo": "bar", }
def test_get_search_coverage(self, api_key, endpoint, index_name, **kwargs): client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key)) results = client.search(search_text="hotel") assert results.get_coverage() is None results = client.search(search_text="hotel", minimum_coverage=50.0) cov = results.get_coverage() assert isinstance(cov, float) assert cov >= 50.0
def get_document(): # [START get_document] from azure.core.credentials import AzureKeyCredential from azure.search.documents import SearchClient search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key)) result = search_client.get_document(key="23") print("Details for hotel '23' are:") print(" Name: {}".format(result["HotelName"])) print(" Rating: {}".format(result["Rating"])) print(" Category: {}".format(result["Category"]))
def autocomplete_query(): # [START autocomplete_query] from azure.core.credentials import AzureKeyCredential from azure.search.documents import SearchClient search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key)) results = search_client.autocomplete(search_text="bo", suggester_name="sg") print("Autocomplete suggestions for 'bo'") for result in results: print(" Completion: {}".format(result["text"]))
def suggest_query(): # [START suggest_query] from azure.core.credentials import AzureKeyCredential from azure.search.documents import SearchClient search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key)) results = search_client.suggest(search_text="coffee", suggester_name="sg") print("Search suggestions for 'coffee'") for result in results: hotel = search_client.get_document(key=result["HotelId"]) print(" Text: {} for Hotel: {}".format(repr(result["text"]), hotel["HotelName"]))
def simple_text_query(): # [START simple_query] from azure.core.credentials import AzureKeyCredential from azure.search.documents import SearchClient search_client = SearchClient(service_endpoint, index_name, AzureKeyCredential(key)) results = search_client.search(query="spa") print("Hotels containing 'spa' in the name (or other fields):") for result in results: print(" Name: {} (rating {})".format(result["HotelName"], result["Rating"]))
def test_autocomplete_query_argument_v2020_06_30(self, mock_autocomplete_post): client = SearchClient("endpoint", "index name", CREDENTIAL, api_version=ApiVersion.V2020_06_30) result = client.autocomplete(search_text="search text", suggester_name="sg") assert mock_autocomplete_post.called assert mock_autocomplete_post.call_args[0] == () assert mock_autocomplete_post.call_args[1][ "headers"] == client._headers assert (mock_autocomplete_post.call_args[1] ["autocomplete_request"].search_text == "search text")
def test_get_count_reset_continuation_token(self, mock_search_post): client = SearchClient("endpoint", "index name", CREDENTIAL) result = client.search(search_text="search text") assert isinstance(result, ItemPaged) assert result._page_iterator_class is SearchPageIterator search_result = SearchDocumentsResult() search_result.results = [ SearchResult(additional_properties={"key": "val"}) ] mock_search_post.return_value = search_result result.__next__() result._first_page_iterator_instance.continuation_token = "fake token" result.get_count() assert not result._first_page_iterator_instance.continuation_token