コード例 #1
0
def test_copy_checkpoints(
    mock_validate_input,
    mock_validate_entry_point,
    mock_process_local_source,
    aws_session,
    entry_point,
    device,
    checkpoint_config,
    generate_get_job_response,
):
    other_checkpoint_uri = "s3://amazon-braket-jobs/job-path/checkpoints"
    aws_session.get_job.return_value = generate_get_job_response(
        checkpointConfig={
            "s3Uri": other_checkpoint_uri,
        }
    )
    prepare_quantum_job(
        device=device,
        source_module="source_module",
        entry_point=entry_point,
        copy_checkpoints_from_job="other-job-arn",
        checkpoint_config=checkpoint_config,
        aws_session=aws_session,
    )
    aws_session.copy_s3_directory.assert_called_with(other_checkpoint_uri, checkpoint_config.s3Uri)
コード例 #2
0
def test_invalid_input_parameters(entry_point, aws_session):
    error_message = (
        "'instance_config' should be of '<class 'braket.jobs.config.InstanceConfig'>' "
        "but user provided <class 'int'>.")
    with pytest.raises(ValueError, match=error_message):
        prepare_quantum_job(
            aws_session=aws_session,
            entry_point=entry_point,
            device="arn:aws:braket:::device/quantum-simulator/amazon/sv1",
            source_module="alpha_test_job",
            hyperparameters={
                "param-1": "first parameter",
                "param-2": "second param",
            },
            instance_config=2,
        )
コード例 #3
0
def test_create_job(
    mock_time,
    mock_path,
    mock_findspec,
    mock_tarfile,
    aws_session,
    source_module,
    create_job_args,
):
    mock_path.return_value.resolve.return_value.parent = "parent_dir"
    mock_path.return_value.resolve.return_value.stem = source_module
    mock_path.return_value.name = "file_name"
    mock_time.return_value = datetime.datetime.now().timestamp()
    expected_kwargs = _translate_creation_args(create_job_args)
    result_kwargs = prepare_quantum_job(**create_job_args)
    assert expected_kwargs == result_kwargs
コード例 #4
0
    def create(
        cls,
        device: str,
        source_module: str,
        entry_point: str = None,
        image_uri: str = None,
        job_name: str = None,
        code_location: str = None,
        role_arn: str = None,
        hyperparameters: Dict[str, Any] = None,
        input_data: Union[str, Dict, S3DataSourceConfig] = None,
        output_data_config: OutputDataConfig = None,
        checkpoint_config: CheckpointConfig = None,
        aws_session: AwsSession = None,
    ) -> LocalQuantumJob:
        """Creates and runs job by setting up and running the customer script in a local
         docker container.

         Args:
            device (str): ARN for the AWS device which is primarily
                accessed for the execution of this job.

            source_module (str): Path (absolute, relative or an S3 URI) to a python module to be
                tarred and uploaded. If `source_module` is an S3 URI, it must point to a
                tar.gz file. Otherwise, source_module may be a file or directory.

            entry_point (str): A str that specifies the entry point of the job, relative to
                the source module. The entry point must be in the format
                `importable.module` or `importable.module:callable`. For example,
                `source_module.submodule:start_here` indicates the `start_here` function
                contained in `source_module.submodule`. If source_module is an S3 URI,
                entry point must be given. Default: source_module's name

            image_uri (str): A str that specifies the ECR image to use for executing the job.
                `image_uris.retrieve_image()` function may be used for retrieving the ECR image URIs
                for the containers supported by Braket. Default = `<Braket base image_uri>`.

            job_name (str): A str that specifies the name with which the job is created.
                Default: f'{image_uri_type}-{timestamp}'.

            code_location (str): The S3 prefix URI where custom code will be uploaded.
                Default: f's3://{default_bucket_name}/jobs/{job_name}/script'.

            role_arn (str): This field is currently not used for local jobs. Local jobs will use
                the current role's credentials. This may be subject to change.

            hyperparameters (Dict[str, Any]): Hyperparameters accessible to the job.
                The hyperparameters are made accessible as a Dict[str, str] to the job.
                For convenience, this accepts other types for keys and values, but `str()`
                is called to convert them before being passed on. Default: None.

            input_data (Union[str, S3DataSourceConfig, dict]): Information about the training
                data. Dictionary maps channel names to local paths or S3 URIs. Contents found
                at any local paths will be uploaded to S3 at
                f's3://{default_bucket_name}/jobs/{job_name}/data/{channel_name}. If a local
                path, S3 URI, or S3DataSourceConfig is provided, it will be given a default
                channel name "input".
                Default: {}.

            output_data_config (OutputDataConfig): Specifies the location for the output of the job.
                Default: OutputDataConfig(s3Path=f's3://{default_bucket_name}/jobs/{job_name}/data',
                kmsKeyId=None).

            checkpoint_config (CheckpointConfig): Configuration that specifies the location where
                checkpoint data is stored.
                Default: CheckpointConfig(localPath='/opt/jobs/checkpoints',
                s3Uri=f's3://{default_bucket_name}/jobs/{job_name}/checkpoints').

            aws_session (AwsSession): AwsSession for connecting to AWS Services.
                Default: AwsSession()

        Returns:
            LocalQuantumJob: The representation of a local Braket Job.
        """
        create_job_kwargs = prepare_quantum_job(
            device=device,
            source_module=source_module,
            entry_point=entry_point,
            image_uri=image_uri,
            job_name=job_name,
            code_location=code_location,
            role_arn=role_arn,
            hyperparameters=hyperparameters,
            input_data=input_data,
            output_data_config=output_data_config,
            checkpoint_config=checkpoint_config,
            aws_session=aws_session,
        )

        job_name = create_job_kwargs["jobName"]
        if os.path.isdir(job_name):
            raise ValueError(
                f"A local directory called {job_name} already exists. "
                f"Please use a different job name."
            )

        session = aws_session or AwsSession()
        algorithm_specification = create_job_kwargs["algorithmSpecification"]
        if "containerImage" in algorithm_specification:
            image_uri = algorithm_specification["containerImage"]["uri"]
        else:
            image_uri = retrieve_image(Framework.BASE, session.region)

        with _LocalJobContainer(image_uri) as container:
            env_variables = setup_container(container, session, **create_job_kwargs)
            container.run_local_job(env_variables)
            container.copy_from("/opt/ml/model", job_name)
            with open(os.path.join(job_name, "log.txt"), "w") as log_file:
                log_file.write(container.run_log)
            if "checkpointConfig" in create_job_kwargs:
                checkpoint_config = create_job_kwargs["checkpointConfig"]
                if "localPath" in checkpoint_config:
                    checkpoint_path = checkpoint_config["localPath"]
                    container.copy_from(checkpoint_path, os.path.join(job_name, "checkpoints"))
            run_log = container.run_log
        return LocalQuantumJob(f"local:job/{job_name}", run_log)
コード例 #5
0
    def create(
            cls,
            device: str,
            source_module: str,
            entry_point: str = None,
            image_uri: str = None,
            job_name: str = None,
            code_location: str = None,
            role_arn: str = None,
            wait_until_complete: bool = False,
            hyperparameters: Dict[str, Any] = None,
            input_data: Union[str, Dict, S3DataSourceConfig] = None,
            instance_config: InstanceConfig = None,
            distribution: str = None,
            stopping_condition: StoppingCondition = None,
            output_data_config: OutputDataConfig = None,
            copy_checkpoints_from_job: str = None,
            checkpoint_config: CheckpointConfig = None,
            aws_session: AwsSession = None,
            tags: Dict[str, str] = None,
            logger: Logger = getLogger(__name__),
    ) -> AwsQuantumJob:
        """Creates a job by invoking the Braket CreateJob API.

        Args:
            device (str): ARN for the AWS device which is primarily accessed for the execution
                of this job. Alternatively, a string of the format "local:<provider>/<simulator>"
                for using a local simulator for the job. This string will be available as the
                environment variable `AMZN_BRAKET_DEVICE_ARN` inside the job container when
                using a Braket container.

            source_module (str): Path (absolute, relative or an S3 URI) to a python module to be
                tarred and uploaded. If `source_module` is an S3 URI, it must point to a
                tar.gz file. Otherwise, source_module may be a file or directory.

            entry_point (str): A str that specifies the entry point of the job, relative to
                the source module. The entry point must be in the format
                `importable.module` or `importable.module:callable`. For example,
                `source_module.submodule:start_here` indicates the `start_here` function
                contained in `source_module.submodule`. If source_module is an S3 URI,
                entry point must be given. Default: source_module's name

            image_uri (str): A str that specifies the ECR image to use for executing the job.
                `image_uris.retrieve_image()` function may be used for retrieving the ECR image URIs
                for the containers supported by Braket. Default = `<Braket base image_uri>`.

            job_name (str): A str that specifies the name with which the job is created.
                Default: f'{image_uri_type}-{timestamp}'.

            code_location (str): The S3 prefix URI where custom code will be uploaded.
                Default: f's3://{default_bucket_name}/jobs/{job_name}/script'.

            role_arn (str): A str providing the IAM role ARN used to execute the
                script. Default: IAM role returned by AwsSession's `get_default_jobs_role()`.

            wait_until_complete (bool): `True` if we should wait until the job completes.
                This would tail the job logs as it waits. Otherwise `False`. Default: `False`.

            hyperparameters (Dict[str, Any]): Hyperparameters accessible to the job.
                The hyperparameters are made accessible as a Dict[str, str] to the job.
                For convenience, this accepts other types for keys and values, but `str()`
                is called to convert them before being passed on. Default: None.

            input_data (Union[str, Dict, S3DataSourceConfig]): Information about the training
                data. Dictionary maps channel names to local paths or S3 URIs. Contents found
                at any local paths will be uploaded to S3 at
                f's3://{default_bucket_name}/jobs/{job_name}/data/{channel_name}. If a local
                path, S3 URI, or S3DataSourceConfig is provided, it will be given a default
                channel name "input".
                Default: {}.

            instance_config (InstanceConfig): Configuration of the instances to be used
                to execute the job. Default: InstanceConfig(instanceType='ml.m5.large',
                instanceCount=1, volumeSizeInGB=30).

            distribution (str): A str that specifies how the job should be distributed. If set to
                "data_parallel", the hyperparameters for the job will be set to use data parallelism
                features for PyTorch or TensorFlow. Default: None.

            stopping_condition (StoppingCondition): The maximum length of time, in seconds,
                and the maximum number of tasks that a job can run before being forcefully stopped.
                Default: StoppingCondition(maxRuntimeInSeconds=5 * 24 * 60 * 60).

            output_data_config (OutputDataConfig): Specifies the location for the output of the job.
                Default: OutputDataConfig(s3Path=f's3://{default_bucket_name}/jobs/{job_name}/data',
                kmsKeyId=None).

            copy_checkpoints_from_job (str): A str that specifies the job ARN whose checkpoint you
                want to use in the current job. Specifying this value will copy over the checkpoint
                data from `use_checkpoints_from_job`'s checkpoint_config s3Uri to the current job's
                checkpoint_config s3Uri, making it available at checkpoint_config.localPath during
                the job execution. Default: None

            checkpoint_config (CheckpointConfig): Configuration that specifies the location where
                checkpoint data is stored.
                Default: CheckpointConfig(localPath='/opt/jobs/checkpoints',
                s3Uri=f's3://{default_bucket_name}/jobs/{job_name}/checkpoints').

            aws_session (AwsSession): AwsSession for connecting to AWS Services.
                Default: AwsSession()

            tags (Dict[str, str]): Dict specifying the key-value pairs for tagging this job.
                Default: {}.

            logger (Logger): Logger object with which to write logs, such as task statuses
                while waiting for task to be in a terminal state. Default is `getLogger(__name__)`

        Returns:
            AwsQuantumJob: Job tracking the execution on Amazon Braket.

        Raises:
            ValueError: Raises ValueError if the parameters are not valid.
        """
        aws_session = AwsQuantumJob._initialize_session(
            aws_session, device, logger)

        create_job_kwargs = prepare_quantum_job(
            device=device,
            source_module=source_module,
            entry_point=entry_point,
            image_uri=image_uri,
            job_name=job_name,
            code_location=code_location,
            role_arn=role_arn,
            hyperparameters=hyperparameters,
            input_data=input_data,
            instance_config=instance_config,
            distribution=distribution,
            stopping_condition=stopping_condition,
            output_data_config=output_data_config,
            copy_checkpoints_from_job=copy_checkpoints_from_job,
            checkpoint_config=checkpoint_config,
            aws_session=aws_session,
            tags=tags,
        )

        job_arn = aws_session.create_job(**create_job_kwargs)
        job = AwsQuantumJob(job_arn, aws_session)

        if wait_until_complete:
            print(f"Initializing Braket Job: {job_arn}")
            job.logs(wait=True)

        return job