def test_create_skillset(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))

        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        skillset = SearchIndexerSkillset(name='test-ss',
                                         skills=list([s]),
                                         description="desc")

        result = client.create_skillset(skillset)
        assert isinstance(result, SearchIndexerSkillset)
        assert result.name == "test-ss"
        assert result.description == "desc"
        assert result.e_tag
        assert len(result.skills) == 1
        assert isinstance(result.skills[0], EntityRecognitionSkill)

        assert len(client.get_skillsets()) == 1
Example #2
0
 def test_create_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = self._prepare_indexer(endpoint, api_key)
     result = client.create_indexer(indexer)
     assert result.name == "sample-indexer"
     assert result.target_index_name == "hotels"
     assert result.data_source_name == "sample-datasource"
Example #3
0
    def _prepare_indexer(self,
                         endpoint,
                         api_key,
                         name="sample-indexer",
                         ds_name="sample-datasource",
                         id_name="hotels"):
        con_str = self.settings.AZURE_STORAGE_CONNECTION_STRING
        self.scrubber.register_name_pair(con_str, 'connection_string')
        container = SearchIndexerDataContainer(name='searchcontainer')
        data_source = SearchIndexerDataSourceConnection(
            name=ds_name,
            type="azureblob",
            connection_string=con_str,
            container=container)
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        ds = client.create_datasource(data_source)

        index_name = id_name
        fields = [{
            "name": "hotelId",
            "type": "Edm.String",
            "key": True,
            "searchable": False
        }]
        index = SearchIndex(name=index_name, fields=fields)
        ind = SearchIndexClient(
            endpoint, AzureKeyCredential(api_key)).create_index(index)
        return SearchIndexer(name=name,
                             data_source_name=ds.name,
                             target_index_name=ind.name)
Example #4
0
 def test_list_datasource(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     data_source_connection1 = self._create_data_source_connection()
     data_source_connection2 = self._create_data_source_connection(name="another-sample")
     created1 = client.create_data_source_connection(data_source_connection1)
     created2 = client.create_data_source_connection(data_source_connection2)
     result = client.get_data_source_connections()
     assert isinstance(result, list)
     assert set(x.name for x in result) == {"sample-datasource", "another-sample"}
Example #5
0
def _create_skillset():
    client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
    inp = InputFieldMappingEntry(name="text", source="/document/lastRenovationDate")
    output = OutputFieldMappingEntry(name="dateTimes", target_name="RenovatedDate")
    s = EntityRecognitionSkill(name="merge-skill", inputs=[inp], outputs=[output])

    skillset = SearchIndexerSkillset(name='hotel-data-skill', skills=[s], description="example skillset")
    result = client.create_skillset(skillset)
    return result
Example #6
0
 def test_list_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer1 = self._prepare_indexer(endpoint, api_key)
     indexer2 = self._prepare_indexer(endpoint, api_key, name="another-indexer", ds_name="another-datasource", id_name="another-index")
     created1 = client.create_indexer(indexer1)
     created2 = client.create_indexer(indexer2)
     result = client.get_indexers()
     assert isinstance(result, list)
     assert set(x.name for x in result) == {"sample-indexer", "another-indexer"}
Example #7
0
def _create_datasource():
    # Here we create a datasource. As mentioned in the description we have stored it in
    # "searchcontainer"
    ds_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
    container = SearchIndexerDataContainer(name='searchcontainer')
    data_source = ds_client.create_datasource(
        name="hotel-datasource",
        type="azureblob",
        connection_string=connection_string,
        container=container)
    return data_source
    def test_indexer_endpoint_https(self):
        credential = AzureKeyCredential(key="old_api_key")
        client = SearchIndexerClient("endpoint", credential)
        assert client._endpoint.startswith('https')

        client = SearchIndexerClient("https://endpoint", credential)
        assert client._endpoint.startswith('https')

        with pytest.raises(ValueError):
            client = SearchIndexerClient("http://endpoint", credential)

        with pytest.raises(ValueError):
            client = SearchIndexerClient(12345, credential)
Example #9
0
    def test_create_or_update_skillset_if_unchanged(self, api_key, endpoint,
                                                    index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        ss = client.create_or_update_skillset(name='test-ss',
                                              skills=[s],
                                              description="desc1")
        etag = ss.e_tag

        client.create_or_update_skillset(name='test-ss',
                                         skills=[s],
                                         description="desc2",
                                         skillset=ss)
        assert len(client.get_skillsets()) == 1

        ss.e_tag = etag
        with pytest.raises(HttpResponseError):
            client.create_or_update_skillset(
                name='test-ss',
                skills=[s],
                skillset=ss,
                match_condition=MatchConditions.IfNotModified)
Example #10
0
    def test_create_skillset_validation(self, **kwargs):
        with pytest.raises(ValueError) as err:
            client = SearchIndexerClient("fake_endpoint",
                                         AzureKeyCredential("fake_key"))
            name = "test-ss"

            s1 = EntityRecognitionSkill(inputs=[
                InputFieldMappingEntry(name="text", source="/document/content")
            ],
                                        outputs=[
                                            OutputFieldMappingEntry(
                                                name="organizations",
                                                target_name="organizationsS1")
                                        ],
                                        description="Skill Version 1",
                                        model_version="1",
                                        include_typeless_entities=True)

            s2 = EntityRecognitionSkill(
                inputs=[
                    InputFieldMappingEntry(name="text",
                                           source="/document/content")
                ],
                outputs=[
                    OutputFieldMappingEntry(name="organizations",
                                            target_name="organizationsS2")
                ],
                skill_version=EntityRecognitionSkillVersion.LATEST,
                description="Skill Version 3",
                model_version="3",
                include_typeless_entities=True)
            s3 = SentimentSkill(inputs=[
                InputFieldMappingEntry(name="text", source="/document/content")
            ],
                                outputs=[
                                    OutputFieldMappingEntry(
                                        name="score", target_name="scoreS3")
                                ],
                                skill_version=SentimentSkillVersion.V1,
                                description="Sentiment V1",
                                include_opinion_mining=True)
            skillset = SearchIndexerSkillset(name=name,
                                             skills=list([s1, s2, s3]),
                                             description="desc")
            client.create_skillset(skillset)
        assert 'include_typeless_entities' in str(err.value)
        assert 'model_version' in str(err.value)
        assert 'include_opinion_mining' in str(err.value)
Example #11
0
 def test_search_indexers(self, endpoint, api_key, **kwargs):
     storage_cs = kwargs.get("search_storage_connection_string")
     container_name = kwargs.get("search_storage_container_name")
     client = SearchIndexerClient(endpoint, api_key)
     index_client = SearchIndexClient(endpoint, api_key)
     self._test_create_indexer(client, index_client, storage_cs,
                               container_name)
     self._test_delete_indexer(client, index_client, storage_cs,
                               container_name)
     self._test_get_indexer(client, index_client, storage_cs,
                            container_name)
     self._test_list_indexer(client, index_client, storage_cs,
                             container_name)
     self._test_create_or_update_indexer(client, index_client, storage_cs,
                                         container_name)
     self._test_reset_indexer(client, index_client, storage_cs,
                              container_name)
     self._test_run_indexer(client, index_client, storage_cs,
                            container_name)
     self._test_get_indexer_status(client, index_client, storage_cs,
                                   container_name)
     self._test_create_or_update_indexer_if_unchanged(
         client, index_client, storage_cs, container_name)
     self._test_delete_indexer_if_unchanged(client, index_client,
                                            storage_cs, container_name)
Example #12
0
 def test_delete_datasource(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     data_source = self._create_datasource()
     result = client.create_datasource(data_source)
     assert len(client.get_datasources()) == 1
     client.delete_datasource("sample-datasource")
     assert len(client.get_datasources()) == 0
Example #13
0
 def test_delete_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = self._prepare_indexer(endpoint, api_key)
     result = client.create_indexer(indexer)
     assert len(client.get_indexers()) == 1
     client.delete_indexer("sample-indexer")
     assert len(client.get_indexers()) == 0
Example #14
0
 def test_create_or_update_datasource(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     data_source_connection = self._create_data_source_connection()
     created = client.create_data_source_connection(data_source_connection)
     assert len(client.get_data_source_connections()) == 1
     data_source_connection.description = "updated"
     client.create_or_update_data_source_connection(data_source_connection)
     assert len(client.get_data_source_connections()) == 1
     result = client.get_data_source_connection("sample-datasource")
     assert result.name == "sample-datasource"
     assert result.description == "updated"
Example #15
0
 def test_create_or_update_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = self._prepare_indexer(endpoint, api_key)
     created = client.create_indexer(indexer)
     assert len(client.get_indexers()) == 1
     indexer.description = "updated"
     client.create_or_update_indexer(indexer)
     assert len(client.get_indexers()) == 1
     result = client.get_indexer("sample-indexer")
     assert result.name == "sample-indexer"
     assert result.description == "updated"
Example #16
0
 def test_run_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = self._prepare_indexer(endpoint, api_key)
     result = client.create_indexer(indexer)
     assert len(client.get_indexers()) == 1
     start = time.time()
     client.run_indexer("sample-indexer")
     assert client.get_indexer_status("sample-indexer").status == 'running'
Example #17
0
def sample_indexer_workflow():
    # Now that we have a datasource and an index, we can create an indexer.

    skillset_name = _create_skillset().name
    print("Skillset is created")

    ds_name = _create_datasource().name
    print("Data source is created")

    ind_name = _create_index().name
    print("Index is created")

    # we pass the data source, skillsets and targeted index to build an indexer
    parameters = IndexingParameters(configuration={"parsingMode": "jsonArray"})
    indexer = SearchIndexer(
        name="hotel-data-indexer",
        data_source_name=ds_name,
        target_index_name=ind_name,
        skillset_name=skillset_name,
        parameters=parameters
    )

    indexer_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
    indexer_client.create_indexer(indexer) # create the indexer

    # to get an indexer
    result = indexer_client.get_indexer("hotel-data-indexer")
    print(result)

    # To run an indexer, we can use run_indexer()
    indexer_client.run_indexer(result.name)

    # Using create or update to schedule an indexer

    schedule = IndexingSchedule(interval=datetime.timedelta(hours=24))
    result.schedule = schedule
    updated_indexer = indexer_client.create_or_update_indexer(result)

    print(updated_indexer)

    # get the status of an indexer
    indexer_client.get_indexer_status(updated_indexer.name)
Example #18
0
 def test_data_source(self, endpoint, api_key, **kwargs):
     storage_cs = kwargs.get("search_storage_connection_string")
     client = SearchIndexerClient(endpoint, api_key)
     self._test_create_datasource(client, storage_cs)
     self._test_delete_datasource(client, storage_cs)
     self._test_get_datasource(client, storage_cs)
     self._test_list_datasources(client, storage_cs)
     self._test_create_or_update_datasource(client, storage_cs)
     self._test_create_or_update_datasource_if_unchanged(client, storage_cs)
     self._test_delete_datasource_if_unchanged(client, storage_cs)
     self._test_delete_datasource_string_if_unchanged(client, storage_cs)
 def test_skillset_crud(self, api_key, endpoint):
     client = SearchIndexerClient(endpoint, api_key)
     self._test_create_skillset_validation()
     self._test_create_skillset(client)
     self._test_get_skillset(client)
     self._test_get_skillsets(client)
     self._test_create_or_update_skillset(client)
     self._test_create_or_update_skillset_if_unchanged(client)
     self._test_create_or_update_skillset_inplace(client)
     self._test_delete_skillset_if_unchanged(client)
     self._test_delete_skillset(client)
 def test_indexer_credential_roll(self):
     credential = AzureKeyCredential(key="old_api_key")
     client = SearchIndexerClient("endpoint", credential)
     assert client._headers == {
         "api-key": "old_api_key",
         "Accept": "application/json;odata.metadata=minimal",
     }
     credential.update("new_api_key")
     assert client._headers == {
         "api-key": "new_api_key",
         "Accept": "application/json;odata.metadata=minimal",
     }
Example #21
0
 def test_reset_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = self._prepare_indexer(endpoint, api_key)
     result = client.create_indexer(indexer)
     assert len(client.get_indexers()) == 1
     result = client.reset_indexer("sample-indexer")
     assert client.get_indexer_status("sample-indexer").last_result.status in ('InProgress', 'reset')
    def test_delete_skillset_if_unchanged(self, api_key, endpoint, index_name,
                                          **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        skillset = SearchIndexerSkillset(name='test-ss',
                                         skills=list([s]),
                                         description="desc")

        result = client.create_skillset(skillset)
        etag = result.e_tag

        skillset = SearchIndexerSkillset(name='test-ss',
                                         skills=list([s]),
                                         description="updated")
        updated = client.create_or_update_skillset(skillset)
        updated.e_tag = etag

        with pytest.raises(HttpResponseError):
            client.delete_skillset(
                updated, match_condition=MatchConditions.IfNotModified)
Example #23
0
    def test_delete_datasource_if_unchanged(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        data_source_connection = self._create_data_source_connection()
        created = client.create_data_source_connection(data_source_connection)
        etag = created.e_tag

        # Now update the data source connection
        data_source_connection.description = "updated"
        client.create_or_update_data_source_connection(data_source_connection)

        # prepare data source connection
        data_source_connection.e_tag = etag # reset to the original data source connection
        with pytest.raises(HttpResponseError):
            client.delete_data_source_connection(data_source_connection, match_condition=MatchConditions.IfNotModified)
            assert len(client.get_data_source_connections()) == 1
Example #24
0
    def test_delete_indexer_if_unchanged(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        indexer = self._prepare_indexer(endpoint, api_key)
        result = client.create_indexer(indexer)
        etag = result.e_tag

        indexer.description = "updated"
        client.create_or_update_indexer(indexer)

        indexer.e_tag = etag
        with pytest.raises(HttpResponseError):
            client.delete_indexer(indexer, match_condition=MatchConditions.IfNotModified)
 def test_skillset_crud(self, api_key, endpoint):
     client = SearchIndexerClient(endpoint, api_key)
     self._test_create_skillset_validation()
     self._test_create_skillset(client)
     self._test_get_skillset(client)
     self._test_get_skillsets(client)
     # TODO: Disabled due to service regression. See #22769
     #self._test_create_or_update_skillset(client)
     self._test_create_or_update_skillset_if_unchanged(client)
     # TODO: Disabled due to service regression. See #22769
     #self._test_create_or_update_skillset_inplace(client)
     # TODO: Disabled due to service regression. See #22769
     #self._test_delete_skillset_if_unchanged(client)
     self._test_delete_skillset(client)
Example #26
0
    def test_delete_datasource_string_if_unchanged(self, api_key, endpoint,
                                                   index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        data_source = self._create_datasource()
        created = client.create_datasource(data_source)
        etag = created.e_tag

        # Now update the data source
        data_source.description = "updated"
        client.create_or_update_datasource(data_source)

        # prepare data source
        data_source.e_tag = etag  # reset to the original datasource
        with pytest.raises(ValueError):
            client.delete_datasource(
                data_source.name,
                match_condition=MatchConditions.IfNotModified)
Example #27
0
    def test_delete_skillset(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        result = client.create_skillset(name='test-ss',
                                        skills=[s],
                                        description="desc")
        assert len(client.get_skillsets()) == 1

        client.delete_skillset("test-ss")
        assert len(client.get_skillsets()) == 0
Example #28
0
    def test_get_skillsets(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        client.create_skillset(name='test-ss-1',
                               skills=[s],
                               description="desc1")
        client.create_skillset(name='test-ss-2',
                               skills=[s],
                               description="desc2")
        result = client.get_skillsets()
        assert isinstance(result, list)
        assert all(isinstance(x, SearchIndexerSkillset) for x in result)
        assert set(x.name for x in result) == {"test-ss-1", "test-ss-2"}
    def test_create_or_update_skillset_if_unchanged(self, api_key, endpoint,
                                                    index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        skillset1 = SearchIndexerSkillset(name='test-ss',
                                          skills=list([s]),
                                          description="desc1")
        ss = client.create_or_update_skillset(skillset1)
        etag = ss.e_tag
        skillset2 = SearchIndexerSkillset(name='test-ss',
                                          skills=[s],
                                          description="desc2",
                                          skillset=ss)
        client.create_or_update_skillset(skillset2)
        assert len(client.get_skillsets()) == 1
 def test_indexer_init(self):
     client = SearchIndexerClient("endpoint", CREDENTIAL)
     assert client._headers == {
         "api-key": "test_api_key",
         "Accept": "application/json;odata.metadata=minimal",
     }