def assertCanRunJob(self, resource_group, location, cluster_id, job_name, container_settings, timeout_sec): create_custom_job(self.client, resource_group.name, location, cluster_id, job_name, 1, 'echo hello | tee $AZ_BATCHAI_OUTPUT_OUTPUTS/hi.txt', container=container_settings) # Verify if the job finishes reasonably fast. self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job_name, timeout_sec), models.ExecutionState.succeeded) # Verify if output files and standard output files are available and contain expected greeting. assert_job_files_are(self, self.client, resource_group.name, job_name, 'OUTPUTS', {u'hi.txt': u'hello\n'}) assert_job_files_are(self, self.client, resource_group.name, job_name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'hello\n', u'stderr.txt': '' })
def test_password_less_ssh_in_container(self, resource_group, location, cluster): """Tests if password-less ssh is configured in containers.""" job = create_custom_job( self.client, resource_group.name, location, cluster.id, 'job', 2, 'ssh 10.0.0.5 echo done && ssh 10.0.0.5 echo done', container=models.ContainerSettings( image_source_registry=models.ImageSourceRegistry( image='ubuntu'))) self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name, MINUTE), models.ExecutionState.succeeded) job = self.client.jobs.get(resource_group.name, job.name) assert_job_files_are( self, self.client, resource_group.name, job.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'done\ndone\n', u'stderr.txt': re.compile('Permanently added.*') }) self.client.jobs.delete(resource_group.name, job.name).result() self.assertRaises( CloudError, lambda: self.client.jobs.get(resource_group.name, job.name))
def test_job_preparation_container(self, resource_group, location, cluster): """Tests job preparation execution for a job running in a container.""" # create a job with job preparation which populates input data in $AZ_BATCHAI_INPUT_INPUT/hi.txt job = create_custom_job( self.client, resource_group.name, location, cluster.id, 'job', 1, 'cat $AZ_BATCHAI_INPUT_INPUT/hi.txt', 'mkdir -p $AZ_BATCHAI_INPUT_INPUT && echo hello | tee $AZ_BATCHAI_INPUT_INPUT/hi.txt', container=models.ContainerSettings( image_source_registry=models.ImageSourceRegistry( image='ubuntu'))) self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name, MINUTE), models.ExecutionState.succeeded) assert_job_files_are( self, self.client, resource_group.name, job.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'hello\n', u'stderr.txt': u'', u'stdout-job_prep.txt': u'hello\n', u'stderr-job_prep.txt': u'' }) self.client.jobs.delete(resource_group.name, job.name).result() self.assertRaises( CloudError, lambda: self.client.jobs.get(resource_group.name, job.name))
def test_job_environment_variables_and_secrets(self, resource_group, location, cluster): """Tests if it's possible to mount external file systems for a job.""" job_name = 'job' job = self.client.jobs.create( resource_group.name, job_name, parameters=models.JobCreateParameters( location=location, cluster=models.ResourceId(id=cluster.id), node_count=1, std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format( AZURE_FILES_MOUNTING_PATH), environment_variables=[ models.EnvironmentVariable(name='VARIABLE', value='VALUE') ], secrets=[ models.EnvironmentVariableWithSecretValue( name='SECRET_VARIABLE', value='SECRET') ], # Check that the job preparation has access to env variables and secrets. job_preparation=models.JobPreparation( command_line='echo $VARIABLE $SECRET_VARIABLE'), # Check that the job has access to env variables and secrets. custom_toolkit_settings=models.CustomToolkitSettings( command_line='echo $VARIABLE $SECRET_VARIABLE'))).result( ) # type: models.Job self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name, MINUTE), models.ExecutionState.succeeded) # Check that environment variables are reported by the server. self.assertEqual(len(job.environment_variables), 1) self.assertEqual(job.environment_variables[0].name, 'VARIABLE') self.assertEqual(job.environment_variables[0].value, 'VALUE') # Check that secrets are reported back by server, but value is not reported. self.assertEqual(len(job.secrets), 1) self.assertEqual(job.secrets[0].name, 'SECRET_VARIABLE') self.assertIsNone(job.secrets[0].value) # Check that job and job prep had access to the env variables and secrets. assert_job_files_are( self, self.client, resource_group.name, job.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'VALUE SECRET\n', u'stderr.txt': u'', u'stdout-job_prep.txt': u'VALUE SECRET\n', u'stderr-job_prep.txt': u'' })
def test_job_creation_and_deletion(self, resource_group, location, cluster, storage_account, storage_account_key): """Tests simple scenario for a job - submit, check results, delete.""" job = create_custom_job( self.client, resource_group.name, location, cluster.id, 'job', 1, 'echo hi | tee {0}/hi.txt'.format(JOB_OUTPUT_DIRECTORY_PATH_ENV), container=models.ContainerSettings( image_source_registry=models.ImageSourceRegistry( image='ubuntu'))) # type: models.Job self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name, MINUTE), models.ExecutionState.succeeded) # Check standard job output assert_job_files_are(self, self.client, resource_group.name, job.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'hi\n', u'stderr.txt': u'' }) # Check job's output assert_job_files_are(self, self.client, resource_group.name, job.name, JOB_OUTPUT_DIRECTORY_ID, {u'hi.txt': u'hi\n'}) # Check that we can access the output files directly in storage using path segment returned by the server assert_file_in_file_share( self, storage_account.name, storage_account_key, job.job_output_directory_path_segment + '/' + STDOUTERR_FOLDER_NAME, 'stdout.txt', u'hi\n') self.client.jobs.delete(resource_group.name, job.name).result() self.assertRaises( CloudError, lambda: self.client.jobs.get(resource_group.name, job.name))
def test_file_server(self, resource_group, location, storage_account, storage_account_key): """Tests file server functionality 1. Create file server 2. Create two clusters with this file server 3. Check that the file server is mounted: a. submit tasks (one from host and another from container) on the first cluster to write data to nfs b. submit a task on the second cluster to read the data from nfs """ server = create_file_server( self.client, location, resource_group.name, self.file_server_name) # type: models.FileServer cluster1 = create_cluster( self.client, location, resource_group.name, 'cluster1', 'STANDARD_D1', 1, storage_account.name, storage_account_key, file_servers=[ models.FileServerReference( file_server=models.ResourceId(id=server.id), relative_mount_path='nfs', mount_options="rw") ]) cluster2 = create_cluster( self.client, location, resource_group.name, 'cluster2', 'STANDARD_D1', 1, storage_account.name, storage_account_key, file_servers=[ models.FileServerReference( file_server=models.ResourceId(id=server.id), relative_mount_path='nfs', mount_options="rw") ]) # Verify the file server is reported. assert_existing_file_servers_are(self, self.client, resource_group.name, [self.file_server_name]) # Verify the file server become available in a reasonable time self.assertTrue( wait_for_file_server(self.is_live, self.client, resource_group.name, self.file_server_name, _FILE_SERVER_CREATION_TIMEOUT_SEC)) # Verify the remote login information and private ip are reported server = self.client.file_servers.get( resource_group.name, self.file_server_name) # type: models.FileServer self.assertRegexpMatches(server.mount_settings.file_server_public_ip, RE_ID_ADDRESS) self.assertRegexpMatches(server.mount_settings.file_server_internal_ip, RE_ID_ADDRESS) # Verify the clusters allocated nodes successfully self.assertEqual( wait_for_nodes(self.is_live, self.client, resource_group.name, 'cluster1', 1, NODE_STARTUP_TIMEOUT_SEC), 1) self.assertEqual( wait_for_nodes(self.is_live, self.client, resource_group.name, 'cluster2', 1, NODE_STARTUP_TIMEOUT_SEC), 1) # Execute publishing tasks on the first cluster job1 = create_custom_job( self.client, resource_group.name, location, cluster1.id, 'host_publisher', 1, 'echo hi from host > $AZ_BATCHAI_MOUNT_ROOT/nfs/host.txt') self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job1.name, MINUTE), models.ExecutionState.succeeded) job2 = create_custom_job( self.client, resource_group.name, location, cluster1.id, 'container_publisher', 1, 'echo hi from container >> $AZ_BATCHAI_MOUNT_ROOT/nfs/container.txt', container=models.ContainerSettings( image_source_registry=models.ImageSourceRegistry( image="ubuntu"))) self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job2.name, MINUTE), models.ExecutionState.succeeded) # Execute consumer task on the second cluster job3 = create_custom_job( self.client, resource_group.name, location, cluster2.id, 'consumer', 1, 'cat $AZ_BATCHAI_MOUNT_ROOT/nfs/host.txt; ' 'cat $AZ_BATCHAI_MOUNT_ROOT/nfs/container.txt') self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job3.name, MINUTE), models.ExecutionState.succeeded) # Verify the data assert_job_files_are( self, self.client, resource_group.name, job3.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'hi from host\nhi from container\n', u'stderr.txt': '' }) # Delete clusters self.client.clusters.delete(resource_group.name, 'cluster1').result() self.client.clusters.delete(resource_group.name, 'cluster2').result() # Test deletion self.client.file_servers.delete(resource_group.name, self.file_server_name).result() assert_existing_file_servers_are(self, self.client, resource_group.name, [])
def test_job_level_mounting(self, resource_group, location, cluster, storage_account, storage_account_key): """Tests if it's possible to mount external file systems for a job.""" job_name = 'job' # Create file share and container to mount on the job level if storage_account.name != FAKE_STORAGE.name: files = FileService(storage_account.name, storage_account_key) files.create_share('jobshare', fail_on_exist=False) blobs = BlockBlobService(storage_account.name, storage_account_key) blobs.create_container('jobcontainer', fail_on_exist=False) job = self.client.jobs.create( resource_group.name, job_name, parameters=models.JobCreateParameters( location=location, cluster=models.ResourceId(id=cluster.id), node_count=1, mount_volumes=models. MountVolumes(azure_file_shares=[ models.AzureFileShareReference( account_name=storage_account.name, azure_file_url='https://{0}.file.core.windows.net/{1}'. format(storage_account.name, 'jobshare'), relative_mount_path='job_afs', credentials=models.AzureStorageCredentialsInfo( account_key=storage_account_key), ) ], azure_blob_file_systems=[ models.AzureBlobFileSystemReference( account_name=storage_account.name, container_name='jobcontainer', relative_mount_path='job_bfs', credentials=models. AzureStorageCredentialsInfo( account_key=storage_account_key), ) ]), # Put standard output on cluster level AFS to check that the job has access to it. std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format( AZURE_FILES_MOUNTING_PATH), # Create two output directories on job level AFS and blobfuse. output_directories=[ models.OutputDirectory( id='OUTPUT1', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_afs'), models.OutputDirectory( id='OUTPUT2', path_prefix='$AZ_BATCHAI_JOB_MOUNT_ROOT/job_bfs') ], # Check that the job preparation has access to job level file systems. job_preparation=models.JobPreparation( command_line= 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/prep_afs.txt; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/prep_bfs.txt; ' 'echo done'), # Check that the job has access to job custom_toolkit_settings=models.CustomToolkitSettings( command_line= 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/job_afs.txt; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/job_bfs.txt; ' 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT1/afs; ' 'echo afs > $AZ_BATCHAI_OUTPUT_OUTPUT1/afs/job_afs.txt; ' 'mkdir $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs; ' 'echo bfs > $AZ_BATCHAI_OUTPUT_OUTPUT2/bfs/job_bfs.txt; ' 'echo done'))).result() self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, job.name, MINUTE), models.ExecutionState.succeeded) job = self.client.jobs.get(resource_group.name, job.name) # Assert job and job prep standard output is populated on cluster level filesystem assert_job_files_are( self, self.client, resource_group.name, job.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'done\n', u'stderr.txt': u'', u'stdout-job_prep.txt': u'done\n', u'stderr-job_prep.txt': u'' }) # Assert files are generated on job level AFS assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT1', { u'job_afs.txt': u'afs\n', u'prep_afs.txt': u'afs\n', u'afs': None }) # Assert files are generated on job level blobfuse assert_job_files_are(self, self.client, resource_group.name, job.name, 'OUTPUT2', { u'job_bfs.txt': u'bfs\n', u'prep_bfs.txt': u'bfs\n', u'bfs': None }) # Assert subfolders are available via API assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT1', 'afs', {u'job_afs.txt': u'afs\n'}) assert_job_files_in_path_are(self, self.client, resource_group.name, job.name, 'OUTPUT2', 'bfs', {u'job_bfs.txt': u'bfs\n'}) # Assert that we can access the output files created on job level mount volumes directly in storage using path # segment returned by the server. if storage_account.name != FAKE_STORAGE.name: files = FileService(storage_account.name, storage_account_key) self.assertTrue( files.exists( 'jobshare', job.job_output_directory_path_segment + '/' + OUTPUT_DIRECTORIES_FOLDER_NAME, 'job_afs.txt')) blobs = BlockBlobService(storage_account.name, storage_account_key) self.assertTrue( blobs.exists( 'jobcontainer', job.job_output_directory_path_segment + '/' + OUTPUT_DIRECTORIES_FOLDER_NAME + '/job_bfs.txt')) # After the job is done the filesystems should be unmounted automatically, check this by submitting a new job. checker = self.client.jobs.create( resource_group.name, 'checker', parameters=models.JobCreateParameters( location=location, cluster=models.ResourceId(id=cluster.id), node_count=1, std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/{0}'.format( AZURE_FILES_MOUNTING_PATH), custom_toolkit_settings=models.CustomToolkitSettings( command_line='echo job; df | grep -E "job_bfs|job_afs"')) ).result() # Check the job failed because there are not job level mount volumes anymore self.assertEqual( wait_for_job_completion(self.is_live, self.client, resource_group.name, checker.name, MINUTE), models.ExecutionState.failed) # Check that the cluster level AFS was still mounted assert_job_files_are(self, self.client, resource_group.name, checker.name, STANDARD_OUTPUT_DIRECTORY_ID, { u'stdout.txt': u'job\n', u'stderr.txt': u'' })