Esempio n. 1
0
    def create_custom_job(client,
                          resource_group,
                          cluster_id,
                          job_name,
                          nodes,
                          cmd,
                          job_preparation_cmd=None,
                          container=None):
        """Creates custom toolkit job

        :param BatchAIManagementClient client: client instance.
        :param str resource_group: resource group name.
        :param str cluster_id: resource Id of the cluster.
        :param str job_name: job name.
        :param int nodes: number of nodes to execute the job.
        :param str cmd: command line to run.
        :param str or None job_preparation_cmd: Job preparation command line.
        :param models.ContainerSettings or None container: container settings to run the job.
        :return models.Job: the created job.
        """
        job_preparation = None
        if job_preparation_cmd:
            job_preparation = models.JobPreparation(
                command_line=job_preparation_cmd)
        client.experiments.create(resource_group,
                                  Helpers.DEFAULT_WORKSPACE_NAME,
                                  Helpers.DEFAULT_EXPERIMENT_NAME).result()
        return client.jobs.create(
            resource_group,
            Helpers.DEFAULT_WORKSPACE_NAME,
            Helpers.DEFAULT_EXPERIMENT_NAME,
            job_name,
            parameters=models.JobCreateParameters(
                cluster=models.ResourceId(id=cluster_id),
                node_count=nodes,
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                    Helpers.AZURE_FILES_MOUNTING_PATH),
                output_directories=[
                    models.OutputDirectory(
                        id=Helpers.JOB_OUTPUT_DIRECTORY_ID,
                        path_prefix=Helpers.JOB_OUTPUT_DIRECTORY_PATH,
                        path_suffix="files")
                ],
                input_directories=[
                    models.InputDirectory(
                        id='INPUT',
                        path='$AZ_BATCHAI_MOUNT_ROOT/{0}/input'.format(
                            Helpers.AZURE_FILES_MOUNTING_PATH))
                ],
                container_settings=container,
                job_preparation=job_preparation,
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line=cmd))).result()
Esempio n. 2
0
 def test_job_environment_variables_and_secrets(self, resource_group,
                                                location, cluster):
     """Tests if it's possible to mount external file systems for a job."""
     job_name = 'job'
     job = self.client.jobs.create(
         resource_group.name,
         helpers.DEFAULT_WORKSPACE_NAME,
         helpers.DEFAULT_EXPERIMENT_NAME,
         job_name,
         parameters=models.JobCreateParameters(
             cluster=models.ResourceId(id=cluster.id),
             node_count=1,
             std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                 helpers.AZURE_FILES_MOUNTING_PATH),
             environment_variables=[
                 models.EnvironmentVariable(name='VARIABLE', value='VALUE')
             ],
             secrets=[
                 models.EnvironmentVariableWithSecretValue(
                     name='SECRET_VARIABLE', value='SECRET')
             ],
             # Check that the job preparation has access to env variables and secrets.
             job_preparation=models.JobPreparation(
                 command_line='echo $VARIABLE $SECRET_VARIABLE'),
             # Check that the job has access to env variables and secrets.
             custom_toolkit_settings=models.CustomToolkitSettings(
                 command_line='echo $VARIABLE $SECRET_VARIABLE'))).result(
                 )  # type: models.Job
     self.assertEqual(
         helpers.wait_for_job_completion(self.is_live, self.client,
                                         resource_group.name, job.name,
                                         helpers.MINUTE),
         models.ExecutionState.succeeded)
     # Check that environment variables are reported by the server.
     self.assertEqual(len(job.environment_variables), 1)
     self.assertEqual(job.environment_variables[0].name, 'VARIABLE')
     self.assertEqual(job.environment_variables[0].value, 'VALUE')
     # Check that secrets are reported back by server, but value is not reported.
     self.assertEqual(len(job.secrets), 1)
     self.assertEqual(job.secrets[0].name, 'SECRET_VARIABLE')
     self.assertIsNone(job.secrets[0].value)
     # Check that job and job prep had access to the env variables and secrets.
     helpers.assert_job_files_are(
         self, self.client, resource_group.name, job.name,
         helpers.STANDARD_OUTPUT_DIRECTORY_ID, {
             u'stdout.txt': u'VALUE SECRET\n',
             u'stderr.txt': u'',
             u'stdout-job_prep.txt': u'VALUE SECRET\n',
             u'stderr-job_prep.txt': u''
         })
Esempio n. 3
0
    def test_job_level_mounting(self, resource_group, location, cluster,
                                storage_account, storage_account_key):
        """Tests if it's possible to mount external file systems for a job."""
        job_name = 'job'

        # Create file share and container to mount on the job level
        if storage_account.name != FAKE_STORAGE.name:
            files = FileService(storage_account.name, storage_account_key)
            files.create_share('jobshare', fail_on_exist=False)
            blobs = BlockBlobService(storage_account.name, storage_account_key)
            blobs.create_container('jobcontainer', fail_on_exist=False)

        job = self.client.jobs.create(
            resource_group.name,
            job_name,
            parameters=models.JobCreateParameters(
                location=location,
                cluster=models.ResourceId(id=cluster.id),
                node_count=1,
                mount_volumes=models.
                MountVolumes(azure_file_shares=[
                    models.AzureFileShareReference(
                        account_name=storage_account.name,
                        azure_file_url='https://{0}.file.core.windows.net/{1}'.
                        format(storage_account.name, 'jobshare'),
                        relative_mount_path='job_afs',
                        credentials=models.AzureStorageCredentialsInfo(
                            account_key=storage_account_key),
                    )
                ],
                             azure_blob_file_systems=[
                                 models.AzureBlobFileSystemReference(
                                     account_name=storage_account.name,
                                     container_name='jobcontainer',
                                     relative_mount_path='job_bfs',
                                     credentials=models.
                                     AzureStorageCredentialsInfo(
                                         account_key=storage_account_key),
                                 )
                             ]),
                # Put standard output on cluster level AFS to check that the job has access to it.
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                    AZURE_FILES_MOUNTING_PATH),
                # Create two output directories on job level AFS and blobfuse.
                output_directories=[
                    models.OutputDirectory(
                        id='OUTPUT1',
                        path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_afs'),
                    models.OutputDirectory(
                        id='OUTPUT2',
                        path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_bfs')
                ],
                # Check that the job preparation has access to job level file systems.
                job_preparation=models.JobPreparation(
                    command_line=
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/prep_afs.txt; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/prep_bfs.txt; '
                    'echo done'),
                # Check that the job has access to job
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line=
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/job_afs.txt; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/job_bfs.txt; '
                    'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT1/afs; '
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/afs/job_afs.txt; '
                    'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs/job_bfs.txt; '
                    'echo done'))).result()
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job.name, MINUTE),
            models.ExecutionState.succeeded)

        job = self.client.jobs.get(resource_group.name, job.name)
        # Assert job and job prep standard output is populated on cluster level filesystem
        assert_job_files_are(
            self, self.client, resource_group.name, job.name,
            STANDARD_OUTPUT_DIRECTORY_ID, {
                u'stdout.txt': u'done\n',
                u'stderr.txt': u'',
                u'stdout-job_prep.txt': u'done\n',
                u'stderr-job_prep.txt': u''
            })
        # Assert files are generated on job level AFS
        assert_job_files_are(self, self.client, resource_group.name, job.name,
                             'OUTPUT1', {
                                 u'job_afs.txt': u'afs\n',
                                 u'prep_afs.txt': u'afs\n',
                                 u'afs': None
                             })
        # Assert files are generated on job level blobfuse
        assert_job_files_are(self, self.client, resource_group.name, job.name,
                             'OUTPUT2', {
                                 u'job_bfs.txt': u'bfs\n',
                                 u'prep_bfs.txt': u'bfs\n',
                                 u'bfs': None
                             })
        # Assert subfolders are available via API
        assert_job_files_in_path_are(self, self.client, resource_group.name,
                                     job.name, 'OUTPUT1', 'afs',
                                     {u'job_afs.txt': u'afs\n'})
        assert_job_files_in_path_are(self, self.client, resource_group.name,
                                     job.name, 'OUTPUT2', 'bfs',
                                     {u'job_bfs.txt': u'bfs\n'})

        # Assert that we can access the output files created on job level mount volumes directly in storage using path
        # segment returned by the server.
        if storage_account.name != FAKE_STORAGE.name:
            files = FileService(storage_account.name, storage_account_key)
            self.assertTrue(
                files.exists(
                    'jobshare', job.job_output_directory_path_segment + '/' +
                    OUTPUT_DIRECTORIES_FOLDER_NAME, 'job_afs.txt'))
            blobs = BlockBlobService(storage_account.name, storage_account_key)
            self.assertTrue(
                blobs.exists(
                    'jobcontainer', job.job_output_directory_path_segment +
                    '/' + OUTPUT_DIRECTORIES_FOLDER_NAME + '/job_bfs.txt'))
        # After the job is done the filesystems should be unmounted automatically, check this by submitting a new job.
        checker = self.client.jobs.create(
            resource_group.name,
            'checker',
            parameters=models.JobCreateParameters(
                location=location,
                cluster=models.ResourceId(id=cluster.id),
                node_count=1,
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                    AZURE_FILES_MOUNTING_PATH),
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line='echo job; df | grep -E "job_bfs|job_afs"'))
        ).result()
        # Check the job failed because there are not job level mount volumes anymore
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, checker.name, MINUTE),
            models.ExecutionState.failed)
        # Check that the cluster level AFS was still mounted
        assert_job_files_are(self, self.client, resource_group.name,
                             checker.name, STANDARD_OUTPUT_DIRECTORY_ID, {
                                 u'stdout.txt': u'job\n',
                                 u'stderr.txt': u''
                             })