Beispiel #1
0
    def test_indexer_endpoint_https(self):
        credential = AzureKeyCredential(key="old_api_key")
        client = SearchIndexerClient("endpoint", credential)
        assert client._endpoint.startswith('https')

        client = SearchIndexerClient("https://endpoint", credential)
        assert client._endpoint.startswith('https')

        with pytest.raises(ValueError):
            client = SearchIndexerClient("http://endpoint", credential)

        with pytest.raises(ValueError):
            client = SearchIndexerClient(12345, credential)
    async def test_delete_skillset_if_unchanged(self, api_key, endpoint,
                                                index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        skillset = SearchIndexerSkillset(name='test-ss',
                                         skills=list([s]),
                                         description="desc")
        result = await client.create_skillset(skillset)
        etag = result.e_tag

        skillset1 = SearchIndexerSkillset(name='test-ss',
                                          skills=list([s]),
                                          description="updated")
        updated = await client.create_or_update_skillset(skillset1)
        updated.e_tag = etag

        with pytest.raises(HttpResponseError):
            await client.delete_skillset(
                updated, match_condition=MatchConditions.IfNotModified)
async def create_indexer():
    # create an index
    index_name = "hotels"
    fields = [
        SimpleField(name="hotelId", type=SearchFieldDataType.String, key=True),
        SimpleField(name="baseRate", type=SearchFieldDataType.Double)
    ]
    index = SearchIndex(name=index_name, fields=fields)
    ind_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
    async with ind_client:
        await ind_client.create_index(index)

    # [START create_indexer_async]
    # create a datasource
    container = SearchIndexerDataContainer(name='searchcontainer')
    data_source_connection = SearchIndexerDataSourceConnection(
        name="indexer-datasource",
        type="azureblob",
        connection_string=connection_string,
        container=container)
    async with ind_client:
        data_source = await ind_client.create_data_source_connection(
            data_source_connection)

    # create an indexer
    indexer = SearchIndexer(name="async-sample-indexer",
                            data_source_name="async-indexer-datasource",
                            target_index_name="indexer-hotels")
    async with indexers_client:
        result = await indexers_client.create_indexer(indexer)
    print("Create new Indexer - async-sample-indexer")
Beispiel #4
0
    async def _prepare_indexer(self, endpoint, api_key, name="sample-indexer", ds_name="sample-datasource", id_name="hotels"):
        con_str = self.settings.AZURE_STORAGE_CONNECTION_STRING
        self.scrubber.register_name_pair(con_str, 'connection_string')
        container = SearchIndexerDataContainer(name='searchcontainer')
        data_source_connection = SearchIndexerDataSourceConnection(
            name=ds_name,
            type="azureblob",
            connection_string=con_str,
            container=container
        )
        ds_client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        ds = await ds_client.create_data_source_connection(data_source_connection)

        index_name = id_name
        fields = [
        {
          "name": "hotelId",
          "type": "Edm.String",
          "key": True,
          "searchable": False
        }]
        index = SearchIndex(name=index_name, fields=fields)
        ind_client = SearchIndexClient(endpoint, AzureKeyCredential(api_key))
        ind = await ind_client.create_index(index)
        return SearchIndexer(name=name, data_source_name=ds.name, target_index_name=ind.name)
Beispiel #5
0
 async def test_get_datasource_async(self, api_key, endpoint, index_name,
                                     **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     data_source = self._create_datasource()
     created = await client.create_datasource(data_source)
     result = await client.get_datasource("sample-datasource")
     assert result.name == "sample-datasource"
 async def test_search_indexers(self, endpoint, api_key, **kwargs):
     storage_cs = kwargs.get("search_storage_connection_string")
     container_name = kwargs.get("search_storage_container_name")
     client = SearchIndexerClient(endpoint, api_key)
     index_client = SearchIndexClient(endpoint, api_key)
     async with client:
         async with index_client:
             await self._test_create_indexer(client, index_client,
                                             storage_cs, container_name)
             await self._test_delete_indexer(client, index_client,
                                             storage_cs, container_name)
             await self._test_get_indexer(client, index_client, storage_cs,
                                          container_name)
             await self._test_list_indexer(client, index_client, storage_cs,
                                           container_name)
             await self._test_create_or_update_indexer(
                 client, index_client, storage_cs, container_name)
             await self._test_reset_indexer(client, index_client,
                                            storage_cs, container_name)
             await self._test_run_indexer(client, index_client, storage_cs,
                                          container_name)
             await self._test_get_indexer_status(client, index_client,
                                                 storage_cs, container_name)
             await self._test_create_or_update_indexer_if_unchanged(
                 client, index_client, storage_cs, container_name)
             await self._test_delete_indexer_if_unchanged(
                 client, index_client, storage_cs, container_name)
Beispiel #7
0
 async def test_create_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = await self._prepare_indexer(endpoint, api_key)
     result = await client.create_indexer(indexer)
     assert result.name == "sample-indexer"
     assert result.target_index_name == "hotels"
     assert result.data_source_name == "sample-datasource"
Beispiel #8
0
    async def test_create_or_update_skillset_if_unchanged(
            self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        ss = await client.create_or_update_skillset(name='test-ss',
                                                    skills=[s],
                                                    description="desc1")
        etag = ss.e_tag

        await client.create_or_update_skillset(name='test-ss',
                                               skills=[s],
                                               description="desc2",
                                               skillset=ss)
        assert len(await client.get_skillsets()) == 1

        ss.e_tag = etag
        with pytest.raises(HttpResponseError):
            await client.create_or_update_skillset(
                name='test-ss',
                skills=[s],
                skillset=ss,
                match_condition=MatchConditions.IfNotModified)
Beispiel #9
0
 async def test_get_indexer_status(self, api_key, endpoint, index_name,
                                   **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = await self._prepare_indexer(endpoint, api_key)
     result = await client.create_indexer(indexer)
     status = await client.get_indexer_status("sample-indexer")
     assert status.status is not None
Beispiel #10
0
 async def test_delete_datasource_async(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     data_source_connection = self._create_data_source_connection()
     result = await client.create_data_source_connection(data_source_connection)
     assert len(await client.get_data_source_connections()) == 1
     await client.delete_data_source_connection("sample-datasource")
     assert len(await client.get_data_source_connections()) == 0
Beispiel #11
0
 async def test_delete_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = await self._prepare_indexer(endpoint, api_key)
     result = await client.create_indexer(indexer)
     assert len(await client.get_indexers()) == 1
     await client.delete_indexer("sample-indexer")
     assert len(await client.get_indexers()) == 0
Beispiel #12
0
 async def test_reset_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = await self._prepare_indexer(endpoint, api_key)
     result = await client.create_indexer(indexer)
     assert len(await client.get_indexers()) == 1
     await client.reset_indexer("sample-indexer")
     assert (await client.get_indexer_status("sample-indexer")).last_result.status.lower() in ('inprogress', 'reset')
    async def test_create_or_update_skillset_inplace(self, api_key, endpoint,
                                                     index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[
            InputFieldMappingEntry(name="text", source="/document/content")
        ],
                                   outputs=[
                                       OutputFieldMappingEntry(
                                           name="organizations",
                                           target_name="organizations")
                                   ])

        skillset1 = SearchIndexerSkillset(name='test-ss',
                                          skills=list([s]),
                                          description="desc1")
        ss = await client.create_or_update_skillset(skillset1)
        skillset2 = SearchIndexerSkillset(name='test-ss',
                                          skills=[s],
                                          description="desc2",
                                          skillset=ss)
        await client.create_or_update_skillset(skillset2)
        assert len(await client.get_skillsets()) == 1

        result = await client.get_skillset("test-ss")
        assert isinstance(result, SearchIndexerSkillset)
        assert result.name == "test-ss"
        assert result.description == "desc2"
Beispiel #14
0
 async def test_run_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = await self._prepare_indexer(endpoint, api_key)
     result = await client.create_indexer(indexer)
     assert len(await client.get_indexers()) == 1
     start = time.time()
     await client.run_indexer("sample-indexer")
     assert (await client.get_indexer_status("sample-indexer")).status == 'running'
Beispiel #15
0
 async def test_create_datasource_async(self, api_key, endpoint, index_name,
                                        **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     data_source_connection = self._create_data_source_connection()
     result = await client.create_data_source_connection(
         data_source_connection)
     assert result.name == "sample-datasource"
     assert result.type == "azureblob"
Beispiel #16
0
 async def test_list_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer1 = await self._prepare_indexer(endpoint, api_key)
     indexer2 = await self._prepare_indexer(endpoint, api_key, name="another-indexer", ds_name="another-datasource", id_name="another-index")
     created1 = await client.create_indexer(indexer1)
     created2 = await client.create_indexer(indexer2)
     result = await client.get_indexers()
     assert isinstance(result, list)
     assert set(x.name for x in result) == {"sample-indexer", "another-indexer"}
Beispiel #17
0
 async def test_list_datasource_async(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     data_source_connection1 = self._create_data_source_connection()
     data_source_connection2 = self._create_data_source_connection(name="another-sample")
     created1 = await client.create_data_source_connection(data_source_connection1)
     created2 = await client.create_data_source_connection(data_source_connection2)
     result = await client.get_data_source_connections()
     assert isinstance(result, list)
     assert set(x.name for x in result) == {"sample-datasource", "another-sample"}
Beispiel #18
0
 async def test_create_or_update_datasource_async(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     data_source_connection = self._create_data_source_connection()
     created = await client.create_data_source_connection(data_source_connection)
     assert len(await client.get_data_source_connections()) == 1
     data_source_connection.description = "updated"
     await client.create_or_update_data_source_connection(data_source_connection)
     assert len(await client.get_data_source_connections()) == 1
     result = await client.get_data_source_connection("sample-datasource")
     assert result.name == "sample-datasource"
     assert result.description == "updated"
Beispiel #19
0
 async def test_create_or_update_indexer(self, api_key, endpoint, index_name, **kwargs):
     client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
     indexer = await self._prepare_indexer(endpoint, api_key)
     created = await client.create_indexer(indexer)
     assert len(await client.get_indexers()) == 1
     indexer.description = "updated"
     await client.create_or_update_indexer(indexer)
     assert len(await client.get_indexers()) == 1
     result = await client.get_indexer("sample-indexer")
     assert result.name == "sample-indexer"
     assert result.description == "updated"
    async def test_get_skillsets(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                                   outputs=[OutputFieldMappingEntry(name="organizations", target_name="organizations")])

        await client.create_skillset(name='test-ss-1', skills=[s], description="desc1")
        await client.create_skillset(name='test-ss-2', skills=[s], description="desc2")
        result = await client.get_skillsets()
        assert isinstance(result, list)
        assert all(isinstance(x, SearchIndexerSkillset) for x in result)
        assert set(x.name for x in result) == {"test-ss-1", "test-ss-2"}
Beispiel #21
0
 async def test_skillset_crud(self, api_key, endpoint):
     client = SearchIndexerClient(endpoint, api_key)
     async with client:
         await self._test_create_skillset(client)
         await self._test_get_skillset(client)
         await self._test_get_skillsets(client)
         await self._test_create_or_update_skillset(client)
         await self._test_create_or_update_skillset_if_unchanged(client)
         await self._test_create_or_update_skillset_inplace(client)
         await self._test_delete_skillset_if_unchanged(client)
         await self._test_delete_skillset(client)
Beispiel #22
0
    async def test_delete_indexer_if_unchanged(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        indexer = await self._prepare_indexer(endpoint, api_key)
        result = await client.create_indexer(indexer)
        etag = result.e_tag

        indexer.description = "updated"
        await client.create_or_update_indexer(indexer)

        indexer.e_tag = etag
        with pytest.raises(HttpResponseError):
            await client.delete_indexer(indexer, match_condition=MatchConditions.IfNotModified)
Beispiel #23
0
 async def test_data_source(self, endpoint, api_key, **kwargs):
     storage_cs = kwargs.get("search_storage_connection_string")
     client = SearchIndexerClient(endpoint, api_key)
     async with client:
         await self._test_create_datasource(client, storage_cs)
         await self._test_delete_datasource(client, storage_cs)
         await self._test_get_datasource(client, storage_cs)
         await self._test_list_datasources(client, storage_cs)
         await self._test_create_or_update_datasource(client, storage_cs)
         await self._test_create_or_update_datasource_if_unchanged(
             client, storage_cs)
         await self._test_delete_datasource_if_unchanged(client, storage_cs)
Beispiel #24
0
 def test_indexer_credential_roll(self):
     credential = AzureKeyCredential(key="old_api_key")
     client = SearchIndexerClient("endpoint", credential)
     assert client._headers == {
         "api-key": "old_api_key",
         "Accept": "application/json;odata.metadata=minimal",
     }
     credential.update("new_api_key")
     assert client._headers == {
         "api-key": "new_api_key",
         "Accept": "application/json;odata.metadata=minimal",
     }
    async def test_delete_skillset(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        s = EntityRecognitionSkill(inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                                   outputs=[OutputFieldMappingEntry(name="organizations", target_name="organizations")])

        result = await client.create_skillset(name='test-ss', skills=[s], description="desc")
        assert len(await client.get_skillsets()) == 1

        await client.delete_skillset("test-ss")
        if self.is_live:
            time.sleep(TIME_TO_SLEEP)
        assert len(await client.get_skillsets()) == 0
Beispiel #26
0
    async def test_create_skillset(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        name = "test-ss"

        s1 = EntityRecognitionSkill(inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                                    outputs=[OutputFieldMappingEntry(name="organizations", target_name="organizationsS1")],
                                    description="Skill Version 1",
                                    model_version="1",
                                    include_typeless_entities=True)

        s2 = EntityRecognitionSkill(inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                                    outputs=[OutputFieldMappingEntry(name="organizations", target_name="organizationsS2")],
                                    skill_version=EntityRecognitionSkillVersion.LATEST,
                                    description="Skill Version 3",
                                    model_version="3",
                                    include_typeless_entities=True)
        s3 = SentimentSkill(inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                            outputs=[OutputFieldMappingEntry(name="score", target_name="scoreS3")],
                            skill_version=SentimentSkillVersion.V1,
                            description="Sentiment V1",
                            include_opinion_mining=True)

        s4 = SentimentSkill(inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                            outputs=[OutputFieldMappingEntry(name="confidenceScores", target_name="scoreS4")],
                            skill_version=SentimentSkillVersion.V3,
                            description="Sentiment V3",
                            include_opinion_mining=True)

        s5 = EntityLinkingSkill(inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                                outputs=[OutputFieldMappingEntry(name="entities", target_name="entitiesS5")],
                                minimum_precision=0.5)

        skillset = SearchIndexerSkillset(name=name, skills=list([s1, s2, s3, s4, s5]), description="desc")
        result = await client.create_skillset(skillset)

        assert isinstance(result, SearchIndexerSkillset)
        assert result.name == "test-ss"
        assert result.description == "desc"
        assert result.e_tag
        assert len(result.skills) == 5
        assert isinstance(result.skills[0], EntityRecognitionSkill)
        assert result.skills[0].skill_version == EntityRecognitionSkillVersion.V1
        assert isinstance(result.skills[1], EntityRecognitionSkill)
        assert result.skills[1].skill_version == EntityRecognitionSkillVersion.V3
        assert isinstance(result.skills[2], SentimentSkill)
        assert result.skills[2].skill_version == SentimentSkillVersion.V1
        assert isinstance(result.skills[3], SentimentSkill)
        assert result.skills[3].skill_version == SentimentSkillVersion.V3
        assert isinstance(result.skills[4], EntityLinkingSkill)
        assert result.skills[4].minimum_precision == 0.5

        assert len(await client.get_skillsets()) == 1
Beispiel #27
0
 async def test_skillset_crud(self, api_key, endpoint):
     client = SearchIndexerClient(endpoint, api_key)
     async with client:
         await self._test_create_skillset(client)
         await self._test_get_skillset(client)
         await self._test_get_skillsets(client)
         # TODO: Disabled due to service regression. See #22769
         #await self._test_create_or_update_skillset(client)
         await self._test_create_or_update_skillset_if_unchanged(client)
         # TODO: Disabled due to service regression. See #22769
         #await self._test_create_or_update_skillset_inplace(client)
         # TODO: Disabled due to service regression. See #22769
         #await self._test_delete_skillset_if_unchanged(client)
         await self._test_delete_skillset(client)
    async def test_create_skillset(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))

        s = EntityRecognitionSkill(inputs=[InputFieldMappingEntry(name="text", source="/document/content")],
                                   outputs=[OutputFieldMappingEntry(name="organizations", target_name="organizations")])

        result = await client.create_skillset(name='test-ss', skills=[s], description="desc")
        assert isinstance(result, SearchIndexerSkillset)
        assert result.name == "test-ss"
        assert result.description == "desc"
        assert result.e_tag
        assert len(result.skills) == 1
        assert isinstance(result.skills[0], EntityRecognitionSkill)

        assert len(await client.get_skillsets()) == 1
Beispiel #29
0
    async def test_delete_datasource_if_unchanged(self, api_key, endpoint, index_name, **kwargs):
        client = SearchIndexerClient(endpoint, AzureKeyCredential(api_key))
        data_source_connection = self._create_data_source_connection()
        created = await client.create_data_source_connection(data_source_connection)
        etag = created.e_tag

        # Now update the data source connection
        data_source_connection.description = "updated"
        await client.create_or_update_data_source_connection(data_source_connection)

        # prepare data source connection
        data_source_connection.e_tag = etag # reset to the original data source connection
        with pytest.raises(HttpResponseError):
            await client.delete_data_source_connection(data_source_connection, match_condition=MatchConditions.IfNotModified)
            assert len(await client.get_data_source_connections()) == 1
"""

import asyncio
import os

service_endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
key = os.getenv("AZURE_SEARCH_API_KEY")
connection_string = os.getenv("AZURE_STORAGE_CONNECTION_STRING")

from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes.models import (
    SearchIndexerDataContainer, SearchIndexerDataSourceConnection, SearchIndex,
    SearchIndexer, SimpleField, SearchFieldDataType)
from azure.search.documents.indexes.aio import SearchIndexerClient, SearchIndexClient

indexers_client = SearchIndexerClient(service_endpoint,
                                      AzureKeyCredential(key))


async def create_indexer():
    # create an index
    index_name = "hotels"
    fields = [
        SimpleField(name="hotelId", type=SearchFieldDataType.String, key=True),
        SimpleField(name="baseRate", type=SearchFieldDataType.Double)
    ]
    index = SearchIndex(name=index_name, fields=fields)
    ind_client = SearchIndexerClient(service_endpoint, AzureKeyCredential(key))
    async with ind_client:
        await ind_client.create_index(index)

    # [START create_indexer_async]