Python DataManagerClient.create_dataset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sap.aibus.dar.client.data_manager_client

클래스/타입: DataManagerClient

메소드/함수: create_dataset

hotexamples.com에서의 예제들: 3

Python DataManagerClient.create_dataset - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sap.aibus.dar.client.data_manager_client.DataManagerClient.create_dataset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

create_dataset_schema(4)

delete_dataset_schema_by_id(4)

create_dataset(3)

delete_dataset_by_id(3)

DataManagerClient(2)

construct_from_jwt(1)

예제 #1

파일 보기

    def test_dataset(self, data_manager_client: DataManagerClient):
        # Tests dataset functionality without upload
        create_response = data_manager_client.create_dataset_schema(
            dataset_schema=self.new_schema)
        new_dataset_schema_id = create_response["id"]

        all_datasets = data_manager_client.read_dataset_collection()
        prev_count = all_datasets["count"]

        new_dataset_response = data_manager_client.create_dataset(
            dataset_schema_id=new_dataset_schema_id, dataset_name="my-dataset")

        assert new_dataset_response["name"] == "my-dataset"
        assert new_dataset_response["datasetSchemaId"] == new_dataset_schema_id
        assert new_dataset_response["status"] == "NO_DATA"

        new_dataset_id = new_dataset_response["id"]

        all_datasets = data_manager_client.read_dataset_collection()
        new_count = all_datasets["count"]

        assert new_count == prev_count + 1

        data_manager_client.delete_dataset_by_id(new_dataset_id)
        data_manager_client.delete_dataset_schema_by_id(new_dataset_schema_id)

        all_datasets = data_manager_client.read_dataset_collection()
        after_deletion_count = all_datasets["count"]

        assert after_deletion_count == new_count - 1

예제 #2

파일 보기

    def test_dataset_upload(self, data_manager_client: DataManagerClient):
        csv = """
manufacturer,description,category,subcategory
me,"simple è test, records",A,AA
me,"übrigens ein Beispiel, records",A,AA
me,"un po' di testo",A,AA
me,"какой-то текст",A,AA
me,"du texte",A,AA
me,"一些文字",A,AA
me,"कुछ पाठ",A,AA
me,"κάποιο κείμενο",A,AA
me,"кейбір мәтін",A,AA
me,"iu teksto",A,AA
        """
        data_stream = BytesIO(csv.strip().encode("utf-8"))

        create_response = data_manager_client.create_dataset_schema(
            dataset_schema=self.new_schema)
        new_dataset_schema_id = create_response["id"]

        new_dataset_response = data_manager_client.create_dataset(
            dataset_schema_id=new_dataset_schema_id, dataset_name="my-dataset")

        new_dataset_id = new_dataset_response["id"]

        response = data_manager_client.upload_data_and_validate(
            new_dataset_id, data_stream)
        assert response["status"] == "SUCCEEDED"

        data_manager_client.delete_dataset_by_id(new_dataset_id)
        data_manager_client.delete_dataset_schema_by_id(new_dataset_schema_id)

예제 #3

파일 보기

파일: model.py 프로젝트: Juliana-Morais/data-attribute-recommendation-python-sdk

class ModelCreator(BaseClient):
    """
    This class provides a high-level means of training a model from a CSV file.

    To construct an instance of this class, see the various *construct_* methods
    such as
    :meth:`~sap.aibus.dar.client.base_client.BaseClient.construct_from_credentials`
    in :class:`~sap.aibus.dar.client.base_client.BaseClient`.

    Internally, the class wraps and orchestrates :class:`DataManagerClient` and
    :class:`ModelManagerClient`.
    """
    def __init__(self, url: str, source: CredentialsSource):
        self.data_manager_client = DataManagerClient(url=url,
                                                     credentials_source=source)
        self.model_manager_client = ModelManagerClient(
            url=url, credentials_source=source)

    def create(
        self,
        data_stream: typing.BinaryIO,
        model_template_id: str,
        dataset_schema: dict,
        model_name: str,
    ) -> dict:
        """
        Trains a model from a CSV file.

        Internally, this method creates the required DatasetSchema and Dataset entities,
        uploads the data and starts the training job. The method will block until
        the training job finishes.

        Once this method returns, the model `model_name` can be deployed and used
        for inference.

        This method will raise an Exception if an error occurs.

        **No** clean up is performed: if for example a *TrainingJobFailed* or
        *TrainingJobTimeOut* exception occurs, the previously created Dataset
        and DatasetSchema will remain within the service and must be cleaned up
        manually.

        :param data_stream: binary stream containing a CSV file in UTF-8 encoding
        :param model_template_id: the model template ID
        :param dataset_schema: dataset schema as dict
        :param model_name: name of the model to be trained
        :raises TrainingJobFailed: When training job has status FAILED
        :raises TrainingJobTimeOut: When training job takes too long
        :raises: DatasetValidationTimeout: if validation takes too long
        :raises: DatasetValidationFailed: if validation does not finish in state
                *SUCCEEDED*
        :return:
        """
        self.log.info("Creating DatasetSchema.")
        response_dataset_schema = self.data_manager_client.create_dataset_schema(
            dataset_schema)
        dataset_schema_id = response_dataset_schema["id"]
        self.log.info("Created dataset schema with id '%s'", dataset_schema_id)

        dataset_name = self.format_dataset_name(model_name)
        self.log.info("Creating Dataset with name '%s'", dataset_name)

        response_dataset = self.data_manager_client.create_dataset(
            dataset_name=dataset_name, dataset_schema_id=dataset_schema_id)

        dataset_id = response_dataset["id"]
        self.log.info("Created Dataset with id '%s'", dataset_id)

        self.log.info("Uploading data to Dataset '%s'", dataset_id)

        self.data_manager_client.upload_data_and_validate(
            dataset_id=dataset_id, data_stream=data_stream)
        self.log.info(
            "Data uploaded and validated successfully for dataset '%s'",
            dataset_id)

        self.log.info("Starting training job.")
        response_job_creation = self.model_manager_client.create_job_and_wait(
            model_name=model_name,
            dataset_id=dataset_id,
            model_template_id=model_template_id,
        )
        self.log.info("Training finished successfully. Job ID: '%s'",
                      response_job_creation["id"])

        model = self.model_manager_client.read_model_by_name(
            model_name=model_name)
        self.log.debug("Final model resource: '%s'", model)
        return model

    @staticmethod
    def format_dataset_name(model_name: str) -> str:
        """
        Derives a Dataset name from a Model name.

        For the purpose of automation, we automatically create a Dataset name from
        a Model name.

        Return value has no more than 255 characters.

        :param model_name: Model name
        :return: suitable Dataset name
        """
        random_string = "-" + str(uuid.uuid4())
        return (model_name[0:255 - len(model_name) - len(random_string)] +
                random_string)