def test_create_skillset(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) s = EntityRecognitionSkill(inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry( name="organizations", target_name="organizations") ]) skillset = SearchIndexerSkillset(name='test-ss', skills=list([s]), description="desc") result = client.create_skillset(skillset) assert isinstance(result, SearchIndexerSkillset) assert result.name == "test-ss" assert result.description == "desc" assert result.e_tag assert len(result.skills) == 1 assert isinstance(result.skills[0], EntityRecognitionSkill) assert len(client.get_skillsets()) == 1
def test_create_indexer(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) indexer = self._prepare_indexer(endpoint, api_key) result = client.create_indexer(indexer) assert result.name == "sample-indexer" assert result.target_index_name == "hotels" assert result.data_source_name == "sample-datasource"
def _prepare_indexer(self, endpoint, api_key, name="sample-indexer", ds_name="sample-datasource", id_name="hotels"): con_str = self.settings.AZURE_STORAGE_CONNECTION_STRING self.scrubber.register_name_pair(con_str, 'connection_string') container = SearchIndexerDataContainer(name='searchcontainer') data_source = SearchIndexerDataSourceConnection( name=ds_name, type="azureblob", connection_string=con_str, container=container) client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) ds = client.create_datasource(data_source) index_name = id_name fields = [{ "name": "hotelId", "type": "Edm.String", "key": True, "searchable": False }] index = SearchIndex(name=index_name, fields=fields) ind = SearchIndexClient( endpoint, AzureKeyCredential(api_key)).create_index(index) return SearchIndexer(name=name, data_source_name=ds.name, target_index_name=ind.name)
def test_list_datasource(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) data_source_connection1 = self._create_data_source_connection() data_source_connection2 = self._create_data_source_connection(name="another-sample") created1 = client.create_data_source_connection(data_source_connection1) created2 = client.create_data_source_connection(data_source_connection2) result = client.get_data_source_connections() assert isinstance(result, list) assert set(x.name for x in result) == {"sample-datasource", "another-sample"}
def _create_skillset(): client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key)) inp = InputFieldMappingEntry(name="text", source="/document/lastRenovationDate") output = OutputFieldMappingEntry(name="dateTimes", target_name="RenovatedDate") s = EntityRecognitionSkill(name="merge-skill", inputs=[inp], outputs=[output]) skillset = SearchIndexerSkillset(name='hotel-data-skill', skills=[s], description="example skillset") result = client.create_skillset(skillset) return result
def test_list_indexer(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) indexer1 = self._prepare_indexer(endpoint, api_key) indexer2 = self._prepare_indexer(endpoint, api_key, name="another-indexer", ds_name="another-datasource", id_name="another-index") created1 = client.create_indexer(indexer1) created2 = client.create_indexer(indexer2) result = client.get_indexers() assert isinstance(result, list) assert set(x.name for x in result) == {"sample-indexer", "another-indexer"}
def _create_datasource(): # Here we create a datasource. As mentioned in the description we have stored it in # "searchcontainer" ds_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key)) container = SearchIndexerDataContainer(name='searchcontainer') data_source = ds_client.create_datasource( name="hotel-datasource", type="azureblob", connection_string=connection_string, container=container) return data_source
def test_indexer_endpoint_https(self): credential = AzureKeyCredential(key="old_api_key") client = SearchIndexerClient("endpoint", credential) assert client._endpoint.startswith('https') client = SearchIndexerClient("https://endpoint", credential) assert client._endpoint.startswith('https') with pytest.raises(ValueError): client = SearchIndexerClient("http://endpoint", credential) with pytest.raises(ValueError): client = SearchIndexerClient(12345, credential)
def test_create_or_update_skillset_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) s = EntityRecognitionSkill(inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry( name="organizations", target_name="organizations") ]) ss = client.create_or_update_skillset(name='test-ss', skills=[s], description="desc1") etag = ss.e_tag client.create_or_update_skillset(name='test-ss', skills=[s], description="desc2", skillset=ss) assert len(client.get_skillsets()) == 1 ss.e_tag = etag with pytest.raises(HttpResponseError): client.create_or_update_skillset( name='test-ss', skills=[s], skillset=ss, match_condition=MatchConditions.IfNotModified)
def test_create_skillset_validation(self, **kwargs): with pytest.raises(ValueError) as err: client = SearchIndexerClient("fake_endpoint", AzureKeyCredential("fake_key")) name = "test-ss" s1 = EntityRecognitionSkill(inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry( name="organizations", target_name="organizationsS1") ], description="Skill Version 1", model_version="1", include_typeless_entities=True) s2 = EntityRecognitionSkill( inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry(name="organizations", target_name="organizationsS2") ], skill_version=EntityRecognitionSkillVersion.LATEST, description="Skill Version 3", model_version="3", include_typeless_entities=True) s3 = SentimentSkill(inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry( name="score", target_name="scoreS3") ], skill_version=SentimentSkillVersion.V1, description="Sentiment V1", include_opinion_mining=True) skillset = SearchIndexerSkillset(name=name, skills=list([s1, s2, s3]), description="desc") client.create_skillset(skillset) assert 'include_typeless_entities' in str(err.value) assert 'model_version' in str(err.value) assert 'include_opinion_mining' in str(err.value)
def test_search_indexers(self, endpoint, api_key, **kwargs): storage_cs = kwargs.get("search_storage_connection_string") container_name = kwargs.get("search_storage_container_name") client = SearchIndexerClient(endpoint, api_key) index_client = SearchIndexClient(endpoint, api_key) self._test_create_indexer(client, index_client, storage_cs, container_name) self._test_delete_indexer(client, index_client, storage_cs, container_name) self._test_get_indexer(client, index_client, storage_cs, container_name) self._test_list_indexer(client, index_client, storage_cs, container_name) self._test_create_or_update_indexer(client, index_client, storage_cs, container_name) self._test_reset_indexer(client, index_client, storage_cs, container_name) self._test_run_indexer(client, index_client, storage_cs, container_name) self._test_get_indexer_status(client, index_client, storage_cs, container_name) self._test_create_or_update_indexer_if_unchanged( client, index_client, storage_cs, container_name) self._test_delete_indexer_if_unchanged(client, index_client, storage_cs, container_name)
def test_delete_datasource(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) data_source = self._create_datasource() result = client.create_datasource(data_source) assert len(client.get_datasources()) == 1 client.delete_datasource("sample-datasource") assert len(client.get_datasources()) == 0
def test_delete_indexer(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) indexer = self._prepare_indexer(endpoint, api_key) result = client.create_indexer(indexer) assert len(client.get_indexers()) == 1 client.delete_indexer("sample-indexer") assert len(client.get_indexers()) == 0
def test_create_or_update_datasource(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) data_source_connection = self._create_data_source_connection() created = client.create_data_source_connection(data_source_connection) assert len(client.get_data_source_connections()) == 1 data_source_connection.description = "updated" client.create_or_update_data_source_connection(data_source_connection) assert len(client.get_data_source_connections()) == 1 result = client.get_data_source_connection("sample-datasource") assert result.name == "sample-datasource" assert result.description == "updated"
def test_create_or_update_indexer(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) indexer = self._prepare_indexer(endpoint, api_key) created = client.create_indexer(indexer) assert len(client.get_indexers()) == 1 indexer.description = "updated" client.create_or_update_indexer(indexer) assert len(client.get_indexers()) == 1 result = client.get_indexer("sample-indexer") assert result.name == "sample-indexer" assert result.description == "updated"
def test_run_indexer(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) indexer = self._prepare_indexer(endpoint, api_key) result = client.create_indexer(indexer) assert len(client.get_indexers()) == 1 start = time.time() client.run_indexer("sample-indexer") assert client.get_indexer_status("sample-indexer").status == 'running'
def sample_indexer_workflow(): # Now that we have a datasource and an index, we can create an indexer. skillset_name = _create_skillset().name print("Skillset is created") ds_name = _create_datasource().name print("Data source is created") ind_name = _create_index().name print("Index is created") # we pass the data source, skillsets and targeted index to build an indexer parameters = IndexingParameters(configuration={"parsingMode": "jsonArray"}) indexer = SearchIndexer( name="hotel-data-indexer", data_source_name=ds_name, target_index_name=ind_name, skillset_name=skillset_name, parameters=parameters ) indexer_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key)) indexer_client.create_indexer(indexer) # create the indexer # to get an indexer result = indexer_client.get_indexer("hotel-data-indexer") print(result) # To run an indexer, we can use run_indexer() indexer_client.run_indexer(result.name) # Using create or update to schedule an indexer schedule = IndexingSchedule(interval=datetime.timedelta(hours=24)) result.schedule = schedule updated_indexer = indexer_client.create_or_update_indexer(result) print(updated_indexer) # get the status of an indexer indexer_client.get_indexer_status(updated_indexer.name)
def test_data_source(self, endpoint, api_key, **kwargs): storage_cs = kwargs.get("search_storage_connection_string") client = SearchIndexerClient(endpoint, api_key) self._test_create_datasource(client, storage_cs) self._test_delete_datasource(client, storage_cs) self._test_get_datasource(client, storage_cs) self._test_list_datasources(client, storage_cs) self._test_create_or_update_datasource(client, storage_cs) self._test_create_or_update_datasource_if_unchanged(client, storage_cs) self._test_delete_datasource_if_unchanged(client, storage_cs) self._test_delete_datasource_string_if_unchanged(client, storage_cs)
def test_skillset_crud(self, api_key, endpoint): client = SearchIndexerClient(endpoint, api_key) self._test_create_skillset_validation() self._test_create_skillset(client) self._test_get_skillset(client) self._test_get_skillsets(client) self._test_create_or_update_skillset(client) self._test_create_or_update_skillset_if_unchanged(client) self._test_create_or_update_skillset_inplace(client) self._test_delete_skillset_if_unchanged(client) self._test_delete_skillset(client)
def test_indexer_credential_roll(self): credential = AzureKeyCredential(key="old_api_key") client = SearchIndexerClient("endpoint", credential) assert client._headers == { "api-key": "old_api_key", "Accept": "application/json;odata.metadata=minimal", } credential.update("new_api_key") assert client._headers == { "api-key": "new_api_key", "Accept": "application/json;odata.metadata=minimal", }
def test_reset_indexer(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) indexer = self._prepare_indexer(endpoint, api_key) result = client.create_indexer(indexer) assert len(client.get_indexers()) == 1 result = client.reset_indexer("sample-indexer") assert client.get_indexer_status("sample-indexer").last_result.status in ('InProgress', 'reset')
def test_delete_skillset_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) s = EntityRecognitionSkill(inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry( name="organizations", target_name="organizations") ]) skillset = SearchIndexerSkillset(name='test-ss', skills=list([s]), description="desc") result = client.create_skillset(skillset) etag = result.e_tag skillset = SearchIndexerSkillset(name='test-ss', skills=list([s]), description="updated") updated = client.create_or_update_skillset(skillset) updated.e_tag = etag with pytest.raises(HttpResponseError): client.delete_skillset( updated, match_condition=MatchConditions.IfNotModified)
def test_delete_datasource_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) data_source_connection = self._create_data_source_connection() created = client.create_data_source_connection(data_source_connection) etag = created.e_tag # Now update the data source connection data_source_connection.description = "updated" client.create_or_update_data_source_connection(data_source_connection) # prepare data source connection data_source_connection.e_tag = etag # reset to the original data source connection with pytest.raises(HttpResponseError): client.delete_data_source_connection(data_source_connection, match_condition=MatchConditions.IfNotModified) assert len(client.get_data_source_connections()) == 1
def test_delete_indexer_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) indexer = self._prepare_indexer(endpoint, api_key) result = client.create_indexer(indexer) etag = result.e_tag indexer.description = "updated" client.create_or_update_indexer(indexer) indexer.e_tag = etag with pytest.raises(HttpResponseError): client.delete_indexer(indexer, match_condition=MatchConditions.IfNotModified)
def test_skillset_crud(self, api_key, endpoint): client = SearchIndexerClient(endpoint, api_key) self._test_create_skillset_validation() self._test_create_skillset(client) self._test_get_skillset(client) self._test_get_skillsets(client) # TODO: Disabled due to service regression. See #22769 #self._test_create_or_update_skillset(client) self._test_create_or_update_skillset_if_unchanged(client) # TODO: Disabled due to service regression. See #22769 #self._test_create_or_update_skillset_inplace(client) # TODO: Disabled due to service regression. See #22769 #self._test_delete_skillset_if_unchanged(client) self._test_delete_skillset(client)
def test_delete_datasource_string_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) data_source = self._create_datasource() created = client.create_datasource(data_source) etag = created.e_tag # Now update the data source data_source.description = "updated" client.create_or_update_datasource(data_source) # prepare data source data_source.e_tag = etag # reset to the original datasource with pytest.raises(ValueError): client.delete_datasource( data_source.name, match_condition=MatchConditions.IfNotModified)
def test_delete_skillset(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) s = EntityRecognitionSkill(inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry( name="organizations", target_name="organizations") ]) result = client.create_skillset(name='test-ss', skills=[s], description="desc") assert len(client.get_skillsets()) == 1 client.delete_skillset("test-ss") assert len(client.get_skillsets()) == 0
def test_get_skillsets(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) s = EntityRecognitionSkill(inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry( name="organizations", target_name="organizations") ]) client.create_skillset(name='test-ss-1', skills=[s], description="desc1") client.create_skillset(name='test-ss-2', skills=[s], description="desc2") result = client.get_skillsets() assert isinstance(result, list) assert all(isinstance(x, SearchIndexerSkillset) for x in result) assert set(x.name for x in result) == {"test-ss-1", "test-ss-2"}
def test_create_or_update_skillset_if_unchanged(self, api_key, endpoint, index_name, **kwargs): client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key)) s = EntityRecognitionSkill(inputs=[ InputFieldMappingEntry(name="text", source="/document/content") ], outputs=[ OutputFieldMappingEntry( name="organizations", target_name="organizations") ]) skillset1 = SearchIndexerSkillset(name='test-ss', skills=list([s]), description="desc1") ss = client.create_or_update_skillset(skillset1) etag = ss.e_tag skillset2 = SearchIndexerSkillset(name='test-ss', skills=[s], description="desc2", skillset=ss) client.create_or_update_skillset(skillset2) assert len(client.get_skillsets()) == 1
def test_indexer_init(self): client = SearchIndexerClient("endpoint", CREDENTIAL) assert client._headers == { "api-key": "test_api_key", "Accept": "application/json;odata.metadata=minimal", }