def test_copy_checkpoints( mock_validate_input, mock_validate_entry_point, mock_process_local_source, aws_session, entry_point, device, checkpoint_config, generate_get_job_response, ): other_checkpoint_uri = "s3://amazon-braket-jobs/job-path/checkpoints" aws_session.get_job.return_value = generate_get_job_response( checkpointConfig={ "s3Uri": other_checkpoint_uri, } ) prepare_quantum_job( device=device, source_module="source_module", entry_point=entry_point, copy_checkpoints_from_job="other-job-arn", checkpoint_config=checkpoint_config, aws_session=aws_session, ) aws_session.copy_s3_directory.assert_called_with(other_checkpoint_uri, checkpoint_config.s3Uri)
def test_invalid_input_parameters(entry_point, aws_session): error_message = ( "'instance_config' should be of '<class 'braket.jobs.config.InstanceConfig'>' " "but user provided <class 'int'>.") with pytest.raises(ValueError, match=error_message): prepare_quantum_job( aws_session=aws_session, entry_point=entry_point, device="arn:aws:braket:::device/quantum-simulator/amazon/sv1", source_module="alpha_test_job", hyperparameters={ "param-1": "first parameter", "param-2": "second param", }, instance_config=2, )
def test_create_job( mock_time, mock_path, mock_findspec, mock_tarfile, aws_session, source_module, create_job_args, ): mock_path.return_value.resolve.return_value.parent = "parent_dir" mock_path.return_value.resolve.return_value.stem = source_module mock_path.return_value.name = "file_name" mock_time.return_value = datetime.datetime.now().timestamp() expected_kwargs = _translate_creation_args(create_job_args) result_kwargs = prepare_quantum_job(**create_job_args) assert expected_kwargs == result_kwargs
def create( cls, device: str, source_module: str, entry_point: str = None, image_uri: str = None, job_name: str = None, code_location: str = None, role_arn: str = None, hyperparameters: Dict[str, Any] = None, input_data: Union[str, Dict, S3DataSourceConfig] = None, output_data_config: OutputDataConfig = None, checkpoint_config: CheckpointConfig = None, aws_session: AwsSession = None, ) -> LocalQuantumJob: """Creates and runs job by setting up and running the customer script in a local docker container. Args: device (str): ARN for the AWS device which is primarily accessed for the execution of this job. source_module (str): Path (absolute, relative or an S3 URI) to a python module to be tarred and uploaded. If `source_module` is an S3 URI, it must point to a tar.gz file. Otherwise, source_module may be a file or directory. entry_point (str): A str that specifies the entry point of the job, relative to the source module. The entry point must be in the format `importable.module` or `importable.module:callable`. For example, `source_module.submodule:start_here` indicates the `start_here` function contained in `source_module.submodule`. If source_module is an S3 URI, entry point must be given. Default: source_module's name image_uri (str): A str that specifies the ECR image to use for executing the job. `image_uris.retrieve_image()` function may be used for retrieving the ECR image URIs for the containers supported by Braket. Default = `<Braket base image_uri>`. job_name (str): A str that specifies the name with which the job is created. Default: f'{image_uri_type}-{timestamp}'. code_location (str): The S3 prefix URI where custom code will be uploaded. Default: f's3://{default_bucket_name}/jobs/{job_name}/script'. role_arn (str): This field is currently not used for local jobs. Local jobs will use the current role's credentials. This may be subject to change. hyperparameters (Dict[str, Any]): Hyperparameters accessible to the job. The hyperparameters are made accessible as a Dict[str, str] to the job. For convenience, this accepts other types for keys and values, but `str()` is called to convert them before being passed on. Default: None. input_data (Union[str, S3DataSourceConfig, dict]): Information about the training data. Dictionary maps channel names to local paths or S3 URIs. Contents found at any local paths will be uploaded to S3 at f's3://{default_bucket_name}/jobs/{job_name}/data/{channel_name}. If a local path, S3 URI, or S3DataSourceConfig is provided, it will be given a default channel name "input". Default: {}. output_data_config (OutputDataConfig): Specifies the location for the output of the job. Default: OutputDataConfig(s3Path=f's3://{default_bucket_name}/jobs/{job_name}/data', kmsKeyId=None). checkpoint_config (CheckpointConfig): Configuration that specifies the location where checkpoint data is stored. Default: CheckpointConfig(localPath='/opt/jobs/checkpoints', s3Uri=f's3://{default_bucket_name}/jobs/{job_name}/checkpoints'). aws_session (AwsSession): AwsSession for connecting to AWS Services. Default: AwsSession() Returns: LocalQuantumJob: The representation of a local Braket Job. """ create_job_kwargs = prepare_quantum_job( device=device, source_module=source_module, entry_point=entry_point, image_uri=image_uri, job_name=job_name, code_location=code_location, role_arn=role_arn, hyperparameters=hyperparameters, input_data=input_data, output_data_config=output_data_config, checkpoint_config=checkpoint_config, aws_session=aws_session, ) job_name = create_job_kwargs["jobName"] if os.path.isdir(job_name): raise ValueError( f"A local directory called {job_name} already exists. " f"Please use a different job name." ) session = aws_session or AwsSession() algorithm_specification = create_job_kwargs["algorithmSpecification"] if "containerImage" in algorithm_specification: image_uri = algorithm_specification["containerImage"]["uri"] else: image_uri = retrieve_image(Framework.BASE, session.region) with _LocalJobContainer(image_uri) as container: env_variables = setup_container(container, session, **create_job_kwargs) container.run_local_job(env_variables) container.copy_from("/opt/ml/model", job_name) with open(os.path.join(job_name, "log.txt"), "w") as log_file: log_file.write(container.run_log) if "checkpointConfig" in create_job_kwargs: checkpoint_config = create_job_kwargs["checkpointConfig"] if "localPath" in checkpoint_config: checkpoint_path = checkpoint_config["localPath"] container.copy_from(checkpoint_path, os.path.join(job_name, "checkpoints")) run_log = container.run_log return LocalQuantumJob(f"local:job/{job_name}", run_log)
def create( cls, device: str, source_module: str, entry_point: str = None, image_uri: str = None, job_name: str = None, code_location: str = None, role_arn: str = None, wait_until_complete: bool = False, hyperparameters: Dict[str, Any] = None, input_data: Union[str, Dict, S3DataSourceConfig] = None, instance_config: InstanceConfig = None, distribution: str = None, stopping_condition: StoppingCondition = None, output_data_config: OutputDataConfig = None, copy_checkpoints_from_job: str = None, checkpoint_config: CheckpointConfig = None, aws_session: AwsSession = None, tags: Dict[str, str] = None, logger: Logger = getLogger(__name__), ) -> AwsQuantumJob: """Creates a job by invoking the Braket CreateJob API. Args: device (str): ARN for the AWS device which is primarily accessed for the execution of this job. Alternatively, a string of the format "local:<provider>/<simulator>" for using a local simulator for the job. This string will be available as the environment variable `AMZN_BRAKET_DEVICE_ARN` inside the job container when using a Braket container. source_module (str): Path (absolute, relative or an S3 URI) to a python module to be tarred and uploaded. If `source_module` is an S3 URI, it must point to a tar.gz file. Otherwise, source_module may be a file or directory. entry_point (str): A str that specifies the entry point of the job, relative to the source module. The entry point must be in the format `importable.module` or `importable.module:callable`. For example, `source_module.submodule:start_here` indicates the `start_here` function contained in `source_module.submodule`. If source_module is an S3 URI, entry point must be given. Default: source_module's name image_uri (str): A str that specifies the ECR image to use for executing the job. `image_uris.retrieve_image()` function may be used for retrieving the ECR image URIs for the containers supported by Braket. Default = `<Braket base image_uri>`. job_name (str): A str that specifies the name with which the job is created. Default: f'{image_uri_type}-{timestamp}'. code_location (str): The S3 prefix URI where custom code will be uploaded. Default: f's3://{default_bucket_name}/jobs/{job_name}/script'. role_arn (str): A str providing the IAM role ARN used to execute the script. Default: IAM role returned by AwsSession's `get_default_jobs_role()`. wait_until_complete (bool): `True` if we should wait until the job completes. This would tail the job logs as it waits. Otherwise `False`. Default: `False`. hyperparameters (Dict[str, Any]): Hyperparameters accessible to the job. The hyperparameters are made accessible as a Dict[str, str] to the job. For convenience, this accepts other types for keys and values, but `str()` is called to convert them before being passed on. Default: None. input_data (Union[str, Dict, S3DataSourceConfig]): Information about the training data. Dictionary maps channel names to local paths or S3 URIs. Contents found at any local paths will be uploaded to S3 at f's3://{default_bucket_name}/jobs/{job_name}/data/{channel_name}. If a local path, S3 URI, or S3DataSourceConfig is provided, it will be given a default channel name "input". Default: {}. instance_config (InstanceConfig): Configuration of the instances to be used to execute the job. Default: InstanceConfig(instanceType='ml.m5.large', instanceCount=1, volumeSizeInGB=30). distribution (str): A str that specifies how the job should be distributed. If set to "data_parallel", the hyperparameters for the job will be set to use data parallelism features for PyTorch or TensorFlow. Default: None. stopping_condition (StoppingCondition): The maximum length of time, in seconds, and the maximum number of tasks that a job can run before being forcefully stopped. Default: StoppingCondition(maxRuntimeInSeconds=5 * 24 * 60 * 60). output_data_config (OutputDataConfig): Specifies the location for the output of the job. Default: OutputDataConfig(s3Path=f's3://{default_bucket_name}/jobs/{job_name}/data', kmsKeyId=None). copy_checkpoints_from_job (str): A str that specifies the job ARN whose checkpoint you want to use in the current job. Specifying this value will copy over the checkpoint data from `use_checkpoints_from_job`'s checkpoint_config s3Uri to the current job's checkpoint_config s3Uri, making it available at checkpoint_config.localPath during the job execution. Default: None checkpoint_config (CheckpointConfig): Configuration that specifies the location where checkpoint data is stored. Default: CheckpointConfig(localPath='/opt/jobs/checkpoints', s3Uri=f's3://{default_bucket_name}/jobs/{job_name}/checkpoints'). aws_session (AwsSession): AwsSession for connecting to AWS Services. Default: AwsSession() tags (Dict[str, str]): Dict specifying the key-value pairs for tagging this job. Default: {}. logger (Logger): Logger object with which to write logs, such as task statuses while waiting for task to be in a terminal state. Default is `getLogger(__name__)` Returns: AwsQuantumJob: Job tracking the execution on Amazon Braket. Raises: ValueError: Raises ValueError if the parameters are not valid. """ aws_session = AwsQuantumJob._initialize_session( aws_session, device, logger) create_job_kwargs = prepare_quantum_job( device=device, source_module=source_module, entry_point=entry_point, image_uri=image_uri, job_name=job_name, code_location=code_location, role_arn=role_arn, hyperparameters=hyperparameters, input_data=input_data, instance_config=instance_config, distribution=distribution, stopping_condition=stopping_condition, output_data_config=output_data_config, copy_checkpoints_from_job=copy_checkpoints_from_job, checkpoint_config=checkpoint_config, aws_session=aws_session, tags=tags, ) job_arn = aws_session.create_job(**create_job_kwargs) job = AwsQuantumJob(job_arn, aws_session) if wait_until_complete: print(f"Initializing Braket Job: {job_arn}") job.logs(wait=True) return job