def assertCanRunJob(self, resource_group, location, cluster_id, job_name,
                        container_settings, timeout_sec):
        create_custom_job(self.client,
                          resource_group.name,
                          location,
                          cluster_id,
                          job_name,
                          1,
                          'echo hello | tee $AZ_BATCHAI_OUTPUT_OUTPUTS/hi.txt',
                          container=container_settings)

        # Verify if the job finishes reasonably fast.
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job_name,
                                    timeout_sec),
            models.ExecutionState.succeeded)

        # Verify if output files and standard output files are available and contain expected greeting.
        assert_job_files_are(self, self.client, resource_group.name, job_name,
                             'OUTPUTS', {u'hi.txt': u'hello\n'})
        assert_job_files_are(self, self.client, resource_group.name, job_name,
                             STANDARD_OUTPUT_DIRECTORY_ID, {
                                 u'stdout.txt': u'hello\n',
                                 u'stderr.txt': ''
                             })
예제 #2
0
    def test_password_less_ssh_in_container(self, resource_group, location,
                                            cluster):
        """Tests if password-less ssh is configured in containers."""
        job = create_custom_job(
            self.client,
            resource_group.name,
            location,
            cluster.id,
            'job',
            2,
            'ssh 10.0.0.5 echo done && ssh 10.0.0.5 echo done',
            container=models.ContainerSettings(
                image_source_registry=models.ImageSourceRegistry(
                    image='ubuntu')))
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job.name, MINUTE),
            models.ExecutionState.succeeded)

        job = self.client.jobs.get(resource_group.name, job.name)
        assert_job_files_are(
            self, self.client, resource_group.name, job.name,
            STANDARD_OUTPUT_DIRECTORY_ID, {
                u'stdout.txt': u'done\ndone\n',
                u'stderr.txt': re.compile('Permanently added.*')
            })
        self.client.jobs.delete(resource_group.name, job.name).result()
        self.assertRaises(
            CloudError,
            lambda: self.client.jobs.get(resource_group.name, job.name))
예제 #3
0
    def test_job_preparation_container(self, resource_group, location,
                                       cluster):
        """Tests job preparation execution for a job running in a container."""
        # create a job with job preparation which populates input data in $AZ_BATCHAI_INPUT_INPUT/hi.txt
        job = create_custom_job(
            self.client,
            resource_group.name,
            location,
            cluster.id,
            'job',
            1,
            'cat $AZ_BATCHAI_INPUT_INPUT/hi.txt',
            'mkdir -p $AZ_BATCHAI_INPUT_INPUT && echo hello | tee $AZ_BATCHAI_INPUT_INPUT/hi.txt',
            container=models.ContainerSettings(
                image_source_registry=models.ImageSourceRegistry(
                    image='ubuntu')))
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job.name, MINUTE),
            models.ExecutionState.succeeded)

        assert_job_files_are(
            self, self.client, resource_group.name, job.name,
            STANDARD_OUTPUT_DIRECTORY_ID, {
                u'stdout.txt': u'hello\n',
                u'stderr.txt': u'',
                u'stdout-job_prep.txt': u'hello\n',
                u'stderr-job_prep.txt': u''
            })
        self.client.jobs.delete(resource_group.name, job.name).result()
        self.assertRaises(
            CloudError,
            lambda: self.client.jobs.get(resource_group.name, job.name))
예제 #4
0
 def test_job_environment_variables_and_secrets(self, resource_group,
                                                location, cluster):
     """Tests if it's possible to mount external file systems for a job."""
     job_name = 'job'
     job = self.client.jobs.create(
         resource_group.name,
         job_name,
         parameters=models.JobCreateParameters(
             location=location,
             cluster=models.ResourceId(id=cluster.id),
             node_count=1,
             std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                 AZURE_FILES_MOUNTING_PATH),
             environment_variables=[
                 models.EnvironmentVariable(name='VARIABLE', value='VALUE')
             ],
             secrets=[
                 models.EnvironmentVariableWithSecretValue(
                     name='SECRET_VARIABLE', value='SECRET')
             ],
             # Check that the job preparation has access to env variables and secrets.
             job_preparation=models.JobPreparation(
                 command_line='echo $VARIABLE $SECRET_VARIABLE'),
             # Check that the job has access to env variables and secrets.
             custom_toolkit_settings=models.CustomToolkitSettings(
                 command_line='echo $VARIABLE $SECRET_VARIABLE'))).result(
                 )  # type: models.Job
     self.assertEqual(
         wait_for_job_completion(self.is_live, self.client,
                                 resource_group.name, job.name, MINUTE),
         models.ExecutionState.succeeded)
     # Check that environment variables are reported by the server.
     self.assertEqual(len(job.environment_variables), 1)
     self.assertEqual(job.environment_variables[0].name, 'VARIABLE')
     self.assertEqual(job.environment_variables[0].value, 'VALUE')
     # Check that secrets are reported back by server, but value is not reported.
     self.assertEqual(len(job.secrets), 1)
     self.assertEqual(job.secrets[0].name, 'SECRET_VARIABLE')
     self.assertIsNone(job.secrets[0].value)
     # Check that job and job prep had access to the env variables and secrets.
     assert_job_files_are(
         self, self.client, resource_group.name, job.name,
         STANDARD_OUTPUT_DIRECTORY_ID, {
             u'stdout.txt': u'VALUE SECRET\n',
             u'stderr.txt': u'',
             u'stdout-job_prep.txt': u'VALUE SECRET\n',
             u'stderr-job_prep.txt': u''
         })
예제 #5
0
 def test_job_creation_and_deletion(self, resource_group, location, cluster,
                                    storage_account, storage_account_key):
     """Tests simple scenario for a job - submit, check results, delete."""
     job = create_custom_job(
         self.client,
         resource_group.name,
         location,
         cluster.id,
         'job',
         1,
         'echo hi | tee {0}/hi.txt'.format(JOB_OUTPUT_DIRECTORY_PATH_ENV),
         container=models.ContainerSettings(
             image_source_registry=models.ImageSourceRegistry(
                 image='ubuntu')))  # type: models.Job
     self.assertEqual(
         wait_for_job_completion(self.is_live, self.client,
                                 resource_group.name, job.name, MINUTE),
         models.ExecutionState.succeeded)
     # Check standard job output
     assert_job_files_are(self, self.client, resource_group.name, job.name,
                          STANDARD_OUTPUT_DIRECTORY_ID, {
                              u'stdout.txt': u'hi\n',
                              u'stderr.txt': u''
                          })
     # Check job's output
     assert_job_files_are(self, self.client, resource_group.name, job.name,
                          JOB_OUTPUT_DIRECTORY_ID, {u'hi.txt': u'hi\n'})
     # Check that we can access the output files directly in storage using path segment returned by the server
     assert_file_in_file_share(
         self, storage_account.name, storage_account_key,
         job.job_output_directory_path_segment + '/' +
         STDOUTERR_FOLDER_NAME, 'stdout.txt', u'hi\n')
     self.client.jobs.delete(resource_group.name, job.name).result()
     self.assertRaises(
         CloudError,
         lambda: self.client.jobs.get(resource_group.name, job.name))
예제 #6
0
    def test_file_server(self, resource_group, location, storage_account,
                         storage_account_key):
        """Tests file server functionality

        1. Create file server
        2. Create two clusters with this file server
        3. Check that the file server is mounted:
            a. submit tasks (one from host and another from container) on the first cluster to write data to nfs
            b. submit a task on the second cluster to read the data from nfs
        """
        server = create_file_server(
            self.client, location, resource_group.name,
            self.file_server_name)  # type: models.FileServer

        cluster1 = create_cluster(
            self.client,
            location,
            resource_group.name,
            'cluster1',
            'STANDARD_D1',
            1,
            storage_account.name,
            storage_account_key,
            file_servers=[
                models.FileServerReference(
                    file_server=models.ResourceId(id=server.id),
                    relative_mount_path='nfs',
                    mount_options="rw")
            ])
        cluster2 = create_cluster(
            self.client,
            location,
            resource_group.name,
            'cluster2',
            'STANDARD_D1',
            1,
            storage_account.name,
            storage_account_key,
            file_servers=[
                models.FileServerReference(
                    file_server=models.ResourceId(id=server.id),
                    relative_mount_path='nfs',
                    mount_options="rw")
            ])
        # Verify the file server is reported.
        assert_existing_file_servers_are(self, self.client,
                                         resource_group.name,
                                         [self.file_server_name])

        # Verify the file server become available in a reasonable time
        self.assertTrue(
            wait_for_file_server(self.is_live, self.client,
                                 resource_group.name, self.file_server_name,
                                 _FILE_SERVER_CREATION_TIMEOUT_SEC))

        # Verify the remote login information and private ip are reported
        server = self.client.file_servers.get(
            resource_group.name,
            self.file_server_name)  # type: models.FileServer
        self.assertRegexpMatches(server.mount_settings.file_server_public_ip,
                                 RE_ID_ADDRESS)
        self.assertRegexpMatches(server.mount_settings.file_server_internal_ip,
                                 RE_ID_ADDRESS)

        # Verify the clusters allocated nodes successfully
        self.assertEqual(
            wait_for_nodes(self.is_live, self.client, resource_group.name,
                           'cluster1', 1, NODE_STARTUP_TIMEOUT_SEC), 1)
        self.assertEqual(
            wait_for_nodes(self.is_live, self.client, resource_group.name,
                           'cluster2', 1, NODE_STARTUP_TIMEOUT_SEC), 1)

        # Execute publishing tasks on the first cluster
        job1 = create_custom_job(
            self.client, resource_group.name, location, cluster1.id,
            'host_publisher', 1,
            'echo hi from host > $AZ_BATCHAI_MOUNT_ROOT/nfs/host.txt')
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job1.name, MINUTE),
            models.ExecutionState.succeeded)
        job2 = create_custom_job(
            self.client,
            resource_group.name,
            location,
            cluster1.id,
            'container_publisher',
            1,
            'echo hi from container >> $AZ_BATCHAI_MOUNT_ROOT/nfs/container.txt',
            container=models.ContainerSettings(
                image_source_registry=models.ImageSourceRegistry(
                    image="ubuntu")))
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job2.name, MINUTE),
            models.ExecutionState.succeeded)

        # Execute consumer task on the second cluster
        job3 = create_custom_job(
            self.client, resource_group.name, location, cluster2.id,
            'consumer', 1, 'cat $AZ_BATCHAI_MOUNT_ROOT/nfs/host.txt; '
            'cat $AZ_BATCHAI_MOUNT_ROOT/nfs/container.txt')
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job3.name, MINUTE),
            models.ExecutionState.succeeded)

        # Verify the data
        assert_job_files_are(
            self, self.client, resource_group.name, job3.name,
            STANDARD_OUTPUT_DIRECTORY_ID, {
                u'stdout.txt': u'hi from host\nhi from container\n',
                u'stderr.txt': ''
            })

        # Delete clusters
        self.client.clusters.delete(resource_group.name, 'cluster1').result()
        self.client.clusters.delete(resource_group.name, 'cluster2').result()

        # Test deletion
        self.client.file_servers.delete(resource_group.name,
                                        self.file_server_name).result()
        assert_existing_file_servers_are(self, self.client,
                                         resource_group.name, [])
예제 #7
0
    def test_job_level_mounting(self, resource_group, location, cluster,
                                storage_account, storage_account_key):
        """Tests if it's possible to mount external file systems for a job."""
        job_name = 'job'

        # Create file share and container to mount on the job level
        if storage_account.name != FAKE_STORAGE.name:
            files = FileService(storage_account.name, storage_account_key)
            files.create_share('jobshare', fail_on_exist=False)
            blobs = BlockBlobService(storage_account.name, storage_account_key)
            blobs.create_container('jobcontainer', fail_on_exist=False)

        job = self.client.jobs.create(
            resource_group.name,
            job_name,
            parameters=models.JobCreateParameters(
                location=location,
                cluster=models.ResourceId(id=cluster.id),
                node_count=1,
                mount_volumes=models.
                MountVolumes(azure_file_shares=[
                    models.AzureFileShareReference(
                        account_name=storage_account.name,
                        azure_file_url='https://{0}.file.core.windows.net/{1}'.
                        format(storage_account.name, 'jobshare'),
                        relative_mount_path='job_afs',
                        credentials=models.AzureStorageCredentialsInfo(
                            account_key=storage_account_key),
                    )
                ],
                             azure_blob_file_systems=[
                                 models.AzureBlobFileSystemReference(
                                     account_name=storage_account.name,
                                     container_name='jobcontainer',
                                     relative_mount_path='job_bfs',
                                     credentials=models.
                                     AzureStorageCredentialsInfo(
                                         account_key=storage_account_key),
                                 )
                             ]),
                # Put standard output on cluster level AFS to check that the job has access to it.
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                    AZURE_FILES_MOUNTING_PATH),
                # Create two output directories on job level AFS and blobfuse.
                output_directories=[
                    models.OutputDirectory(
                        id='OUTPUT1',
                        path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_afs'),
                    models.OutputDirectory(
                        id='OUTPUT2',
                        path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_bfs')
                ],
                # Check that the job preparation has access to job level file systems.
                job_preparation=models.JobPreparation(
                    command_line=
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/prep_afs.txt; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/prep_bfs.txt; '
                    'echo done'),
                # Check that the job has access to job
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line=
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/job_afs.txt; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/job_bfs.txt; '
                    'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT1/afs; '
                    'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/afs/job_afs.txt; '
                    'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs; '
                    'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs/job_bfs.txt; '
                    'echo done'))).result()
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, job.name, MINUTE),
            models.ExecutionState.succeeded)

        job = self.client.jobs.get(resource_group.name, job.name)
        # Assert job and job prep standard output is populated on cluster level filesystem
        assert_job_files_are(
            self, self.client, resource_group.name, job.name,
            STANDARD_OUTPUT_DIRECTORY_ID, {
                u'stdout.txt': u'done\n',
                u'stderr.txt': u'',
                u'stdout-job_prep.txt': u'done\n',
                u'stderr-job_prep.txt': u''
            })
        # Assert files are generated on job level AFS
        assert_job_files_are(self, self.client, resource_group.name, job.name,
                             'OUTPUT1', {
                                 u'job_afs.txt': u'afs\n',
                                 u'prep_afs.txt': u'afs\n',
                                 u'afs': None
                             })
        # Assert files are generated on job level blobfuse
        assert_job_files_are(self, self.client, resource_group.name, job.name,
                             'OUTPUT2', {
                                 u'job_bfs.txt': u'bfs\n',
                                 u'prep_bfs.txt': u'bfs\n',
                                 u'bfs': None
                             })
        # Assert subfolders are available via API
        assert_job_files_in_path_are(self, self.client, resource_group.name,
                                     job.name, 'OUTPUT1', 'afs',
                                     {u'job_afs.txt': u'afs\n'})
        assert_job_files_in_path_are(self, self.client, resource_group.name,
                                     job.name, 'OUTPUT2', 'bfs',
                                     {u'job_bfs.txt': u'bfs\n'})

        # Assert that we can access the output files created on job level mount volumes directly in storage using path
        # segment returned by the server.
        if storage_account.name != FAKE_STORAGE.name:
            files = FileService(storage_account.name, storage_account_key)
            self.assertTrue(
                files.exists(
                    'jobshare', job.job_output_directory_path_segment + '/' +
                    OUTPUT_DIRECTORIES_FOLDER_NAME, 'job_afs.txt'))
            blobs = BlockBlobService(storage_account.name, storage_account_key)
            self.assertTrue(
                blobs.exists(
                    'jobcontainer', job.job_output_directory_path_segment +
                    '/' + OUTPUT_DIRECTORIES_FOLDER_NAME + '/job_bfs.txt'))
        # After the job is done the filesystems should be unmounted automatically, check this by submitting a new job.
        checker = self.client.jobs.create(
            resource_group.name,
            'checker',
            parameters=models.JobCreateParameters(
                location=location,
                cluster=models.ResourceId(id=cluster.id),
                node_count=1,
                std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format(
                    AZURE_FILES_MOUNTING_PATH),
                custom_toolkit_settings=models.CustomToolkitSettings(
                    command_line='echo job; df | grep -E "job_bfs|job_afs"'))
        ).result()
        # Check the job failed because there are not job level mount volumes anymore
        self.assertEqual(
            wait_for_job_completion(self.is_live, self.client,
                                    resource_group.name, checker.name, MINUTE),
            models.ExecutionState.failed)
        # Check that the cluster level AFS was still mounted
        assert_job_files_are(self, self.client, resource_group.name,
                             checker.name, STANDARD_OUTPUT_DIRECTORY_ID, {
                                 u'stdout.txt': u'job\n',
                                 u'stderr.txt': u''
                             })