def check_amr_summary_tasks(): amr_summary_tasks = AMRAzureRequest.objects.filter() credentials = batch_auth.SharedKeyCredentials(settings.BATCH_ACCOUNT_NAME, settings.BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( credentials, base_url=settings.BATCH_ACCOUNT_URL) for amr_task in amr_summary_tasks: amr_object = AMRSummary.objects.get(pk=amr_task.amr_request.pk) batch_job_name = 'amrsummary-{}'.format(amr_task.amr_request.pk) # Check if tasks related with this amrsummary job have finished. tasks_completed = True try: for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.state != batchmodels.TaskState.completed: tasks_completed = False except: # If something errors first time through job can't get deleted. In that case, give up. AMRSummary.objects.filter(pk=amr_task.amr_request.pk).update( status='Error') # Delete task so we don't keep iterating over it. AMRAzureRequest.objects.filter(id=amr_task.id).delete() continue # If tasks have completed, check if they were successful. if tasks_completed: exit_codes_good = True for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.execution_info.exit_code != 0: exit_codes_good = False # Get rid of job and pool so we don't waste big $$$ and do cleanup/get files downloaded in tasks. batch_client.job.delete(job_id=batch_job_name) batch_client.pool.delete(pool_id=batch_job_name) if exit_codes_good: # Now need to generate an SAS URL and give access to it/update the download link. blob_client = BlockBlobService( account_key=settings.AZURE_ACCOUNT_KEY, account_name=settings.AZURE_ACCOUNT_NAME) # Download the output container so we can zip it. download_container(blob_service=blob_client, container_name=batch_job_name + '-output', output_dir='olc_webportalv2/media') output_dir = 'olc_webportalv2/media/{}'.format(batch_job_name) if os.path.isfile(os.path.join(output_dir, 'batch_config.txt')): os.remove(os.path.join(output_dir, 'batch_config.txt')) shutil.make_archive(output_dir, 'zip', output_dir) amr_result_container = 'amrsummary-{}'.format(amr_object.pk) sas_url = generate_download_link( blob_client=blob_client, container_name=amr_result_container, output_zipfile=output_dir + '.zip', expiry=8) # Also need to populate our AMRDetail model with results. seq_amr_dict = dict() for seqid in amr_object.seqids: seq_amr_dict[seqid] = dict() with open( os.path.join(output_dir, 'reports', 'amr_summary.csv')) as csvfile: reader = csv.DictReader(csvfile) for row in reader: seqid = row['Strain'] gene = row['Gene'] location = row['Location'] if seqid not in seq_amr_dict: seq_amr_dict[seqid] = dict() seq_amr_dict[seqid][gene] = location for seqid in seq_amr_dict: AMRDetail.objects.create(amr_request=amr_object, seqid=seqid, amr_results=seq_amr_dict[seqid]) # Finally, do some cleanup shutil.rmtree(output_dir) os.remove(output_dir + '.zip') amr_object.download_link = sas_url amr_object.status = 'Complete' amr_object.save() else: amr_object.status = 'Error' amr_object.save() AMRAzureRequest.objects.filter(id=amr_task.id).delete()
analysis_grid_path, datetime.datetime.utcnow() + datetime.timedelta(days=7))) analysis_grid_names.append(analysis_grid_name) print("{0:} uploaded to {1:}/{2:}".format( os.path.basename(analysis_grid_name), project_id, analysis_grid_name)) # TODO: For some reason with >100 tasks two pools of 100 were created, maybe we just cerate a set of jobs and assign to the same pool? # A chunked list of grid indices - to create multiple pool instances job_chunks = list( common.helpers.chunks(list(range(0, len(analysis_grid_names))), 100)) # TODO - change new pool amount # Generate batch client batch_client = batch.BatchServiceClient(batchauth.SharedKeyCredentials( batch_account_name, batch_account_key), base_url=batch_service_url) # Get a verified VM image on which to run the job/s sku_to_use, image_ref_to_use = common.helpers.select_latest_verified_vm_image_with_node_agent_sku( batch_client, 'Canonical', 'UbuntuServer', '16.04') # Create a pool per job (each job contains a maximum of 100 tasks) pool_ids = [] job_ids = [] task_ids = [] for job_n, job_chunk in enumerate(job_chunks): print("Job{0:}, containing grids {1:}".format(job_n, job_chunk)) pool_id = common.helpers.generate_unique_resource_name(
def check_tree_tasks(): # Also check for Mash tree creation tasks tree_tasks = TreeAzureRequest.objects.filter() credentials = batch_auth.SharedKeyCredentials(settings.BATCH_ACCOUNT_NAME, settings.BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( credentials, base_url=settings.BATCH_ACCOUNT_URL) for tree_task in tree_tasks: tree_object = Tree.objects.get(pk=tree_task.tree_request.pk) batch_job_name = 'mash-{}'.format(tree_task.tree_request.pk) # Check if tasks related with this mash job have finished. tasks_completed = True try: for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.state != batchmodels.TaskState.completed: tasks_completed = False except: # If something errors first time through job doesn't exist. In that case, give up. Tree.objects.filter(pk=tree_task.tree_request.pk).update( status='Error') # Delete task so we don't keep iterating over it. TreeAzureRequest.objects.filter(id=tree_task.id).delete() continue # If tasks have completed, check if they were successful. if tasks_completed: exit_codes_good = True for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.execution_info.exit_code != 0: exit_codes_good = False # Get rid of job and pool so we don't waste big $$$ and do cleanup/get files downloaded in tasks. batch_client.job.delete(job_id=batch_job_name) batch_client.pool.delete(pool_id=batch_job_name) if exit_codes_good: # Now need to generate an SAS URL and give access to it/update the download link. blob_client = BlockBlobService( account_key=settings.AZURE_ACCOUNT_KEY, account_name=settings.AZURE_ACCOUNT_NAME) # Download the output container so we can zip it. download_container(blob_service=blob_client, container_name=batch_job_name + '-output', output_dir='olc_webportalv2/media') tree_file = 'olc_webportalv2/media/mash-{}/mash.tree'.format( tree_object.pk) with open(tree_file) as f: tree_string = f.readline() if tree_object.number_diversitree_strains > 0: diverse_strains = strainchoosr.pd_greedy( tree=ete3.Tree(tree_file), number_tips=tree_object.number_diversitree_strains, starting_strains=[]) tree_object.seqids_diversitree = strainchoosr.get_leaf_names_from_nodes( diverse_strains) tree_object.newick_tree = tree_string.rstrip().replace("'", "") blob_client.delete_container(container_name=batch_job_name) # Should now have results from mash in olc_webportalv2/media/mash-X, where X is pk of tree request tree_output_folder = os.path.join('olc_webportalv2/media', batch_job_name) os.remove(os.path.join(tree_output_folder, 'batch_config.txt')) # Need to zip this folder and then upload the zipped folder to cloud shutil.make_archive(tree_output_folder, 'zip', tree_output_folder) tree_result_container = 'tree-{}'.format(tree_object.pk) sas_url = generate_download_link( blob_client=blob_client, container_name=tree_result_container, output_zipfile=tree_output_folder + '.zip', expiry=8) shutil.rmtree(tree_output_folder) zip_folder = 'olc_webportalv2/media/{}.zip'.format( batch_job_name) if os.path.isfile(zip_folder): os.remove(zip_folder) tree_object.download_link = sas_url tree_object.status = 'Complete' tree_object.save() else: Tree.objects.filter(pk=task.tree_request.pk).update( status='Error') # Delete task so we don't keep iterating over it. TreeAzureRequest.objects.filter(id=task.id).delete()
def run(config: BatchConfig, wait=True) -> None: r""" :param config: A :class:`BatchConfig` instance with the Azure Batch run parameters :type config: :class:BatchConfig :param boolean wait: If true, wait for the batch to complete and then download the results to file :raises BatchErrorException: If raised by the Azure Batch Python SDK """ # pylint: disable=too-many-locals # replace any missing values in the configuration with environment variables config = validate_config(config) start_time = datetime.datetime.now().replace(microsecond=0) print('Synthetic Controls Run "{}" start time: {}'.format( config.JOB_ID, start_time)) print() _LOCAL_INPUT_FILE = os.path.join(config.BATCH_DIRECTORY, _BATCH_CV_FILE_NAME) v_pen, w_pen, model_data = get_config(_LOCAL_INPUT_FILE) n_folds = len(model_data["folds"]) * len(v_pen) * len(w_pen) # Create the blob client, for use in obtaining references to # blob storage containers and uploading files to containers. blob_client = azureblob.BlockBlobService( account_name=config.STORAGE_ACCOUNT_NAME, account_key=config.STORAGE_ACCOUNT_KEY) # Use the blob client to create the containers in Azure Storage if they # don't yet exist. blob_client.create_container(config.CONTAINER_NAME, fail_on_exist=False) CONTAINER_SAS_URL = build_output_sas_url(config, blob_client) # The collection of data files that are to be processed by the tasks. input_file_path = os.path.join(sys.path[0], _LOCAL_INPUT_FILE) # Upload the data files. input_file = upload_file_to_container(blob_client, config.CONTAINER_NAME, input_file_path, config.STORAGE_ACCESS_DURATION_HRS) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batch_auth.SharedKeyCredentials(config.BATCH_ACCOUNT_NAME, config.BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient(credentials, batch_url=config.BATCH_ACCOUNT_URL) try: # Create the pool that will contain the compute nodes that will execute the # tasks. try: create_pool(config, batch_client) print("Created pool: ", config.POOL_ID) except models.BatchErrorException: print("Using pool: ", config.POOL_ID) # Create the job that will run the tasks. create_job(batch_client, config.JOB_ID, config.POOL_ID) # Add the tasks to the job. add_tasks( config, blob_client, batch_client, CONTAINER_SAS_URL, config.JOB_ID, input_file, n_folds, ) if not wait: return # Pause execution until tasks reach Completed state. wait_for_tasks_to_complete( batch_client, config.JOB_ID, datetime.timedelta(hours=config.STORAGE_ACCESS_DURATION_HRS)) _download_files(config, blob_client, config.BATCH_DIRECTORY, n_folds) except models.BatchErrorException as err: print_batch_exception(err) raise err # Clean up storage resources # TODO: re-enable this and delete the output container too # -- print("Deleting container [{}]...".format(input_container_name)) # -- blob_client.delete_container(input_container_name) # Print out some timing info end_time = datetime.datetime.now().replace(microsecond=0) print() print("Sample end: {}".format(end_time)) print("Elapsed time: {}".format(end_time - start_time)) print() # Clean up Batch resources (if the user so chooses). if config.DELETE_POOL_WHEN_DONE: batch_client.pool.delete(config.POOL_ID) if config.DELETE_JOB_WHEN_DONE: batch_client.job.delete(config.JOB_ID)
def execute_sample(global_config, sample_config): """Executes the sample with the specified configurations. :param global_config: The global configuration to use. :type global_config: `configparser.ConfigParser` :param sample_config: The sample specific configuration to use. :type sample_config: `configparser.ConfigParser` """ # Set up the configuration batch_account_key = global_config.get('Batch', 'batchaccountkey') batch_account_name = global_config.get('Batch', 'batchaccountname') batch_service_url = global_config.get('Batch', 'batchserviceurl') storage_account_key = global_config.get('Storage', 'storageaccountkey') storage_account_name = global_config.get('Storage', 'storageaccountname') storage_account_suffix = global_config.get('Storage', 'storageaccountsuffix') should_delete_container = sample_config.getboolean( 'DEFAULT', 'shoulddeletecontainer') should_delete_job = sample_config.getboolean('DEFAULT', 'shoulddeletejob') should_delete_pool = sample_config.getboolean('DEFAULT', 'shoulddeletepool') generate_ssh_tunnel_script = sample_config.getboolean( 'DEFAULT', 'generatesshtunnelscript') pool_vm_size = sample_config.get('DEFAULT', 'poolvmsize') pool_vm_count = sample_config.getint('DEFAULT', 'poolvmcount') # Print the settings we are running with common.helpers.print_configuration(global_config) common.helpers.print_configuration(sample_config) credentials = batchauth.SharedKeyCredentials(batch_account_name, batch_account_key) batch_client = batch.BatchServiceClient(credentials, base_url=batch_service_url) # Retry 5 times -- default is 3 batch_client.config.retry_policy.retries = 5 block_blob_client = azureblob.BlockBlobService( account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix) job_id = common.helpers.generate_unique_resource_name('DockerSwarm') pool_id = common.helpers.generate_unique_resource_name('DockerSwarm') public_key = None private_key = None try: # create pool and wait for node idle nodes = create_pool_and_wait_for_nodes(batch_client, block_blob_client, pool_id, pool_vm_size, pool_vm_count) # generate ssh key pair private_key, public_key = generate_ssh_keypair('batch_id_rsa') # add compute node user to nodes with ssh key for node in nodes: add_admin_user_to_compute_node(batch_client, pool_id, node, _NODE_USERNAME, public_key) # designate a swarm master node master_node, swarm_token = designate_master_docker_swarm_node( batch_client, pool_id, nodes, job_id) # add nodes to swarm add_nodes_to_swarm(batch_client, pool_id, nodes, job_id, master_node, swarm_token) # connect to docker remotely connect_to_remote_docker_swarm_master(batch_client, pool_id, nodes, master_node[1], _NODE_USERNAME, private_key, generate_ssh_tunnel_script) # submit job and add a task print('submitting a docker run task via Azure Batch...') task_id = add_docker_batch_task(batch_client, block_blob_client, job_id, pool_id) # wait for tasks to complete common.helpers.wait_for_tasks_to_complete( batch_client, job_id, datetime.timedelta(minutes=5)) common.helpers.print_task_output(batch_client, job_id, [task_id]) finally: # perform clean up if public_key is not None: try: os.remove(public_key) except OSError: pass if private_key is not None: if generate_ssh_tunnel_script: print('not deleting ssh private key due to ssh tunnel script!') else: try: os.remove(private_key) except OSError: pass if should_delete_container: print('Deleting container: {}'.format(_CONTAINER_NAME)) block_blob_client.delete_container(_CONTAINER_NAME, fail_not_exist=False) if should_delete_job: print('Deleting job: {}'.format(job_id)) batch_client.job.delete(job_id) if should_delete_pool: print('Deleting pool: {}'.format(pool_id)) batch_client.pool.delete(pool_id)
def execute_sample(global_config, sample_config): """Executes the sample with the specified configurations. :param global_config: The global configuration to use. :type global_config: `configparser.ConfigParser` :param sample_config: The sample specific configuration to use. :type sample_config: `configparser.ConfigParser` """ # Set up the configuration batch_account_key = global_config.get('Batch', 'batchaccountkey') batch_account_name = global_config.get('Batch', 'batchaccountname') batch_service_url = global_config.get('Batch', 'batchserviceurl') storage_account_key = global_config.get('Storage', 'storageaccountkey') storage_account_name = global_config.get('Storage', 'storageaccountname') storage_account_suffix = global_config.get('Storage', 'storageaccountsuffix') should_delete_container = sample_config.getboolean( 'DEFAULT', 'shoulddeletecontainer') should_delete_job = sample_config.getboolean('DEFAULT', 'shoulddeletejob') should_delete_pool = sample_config.getboolean('DEFAULT', 'shoulddeletepool') should_delete_cert = sample_config.getboolean('DEFAULT', 'shoulddeletecert') pool_vm_size = sample_config.get('DEFAULT', 'poolvmsize') pool_vm_count = sample_config.getint('DEFAULT', 'poolvmcount') # Print the settings we are running with common.helpers.print_configuration(global_config) common.helpers.print_configuration(sample_config) credentials = batchauth.SharedKeyCredentials(batch_account_name, batch_account_key) batch_client = batch.BatchServiceClient(credentials, base_url=batch_service_url) # Retry 5 times -- default is 3 batch_client.config.retry_policy.retries = 5 block_blob_client = azureblob.BlockBlobService( account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix) job_id = common.helpers.generate_unique_resource_name( 'EncryptedResourceFiles') pool_id = common.helpers.generate_unique_resource_name( 'EncryptedResourceFiles') sha1_cert_tp = None try: # encrypt local file and upload to blob storage via blobxfer rsapfxfile, sha1_cert_tp = encrypt_localfile_to_blob_storage( storage_account_name, storage_account_key, _CONTAINER_NAME, _RESOURCE_TO_ENCRYPT) # add certificate to account add_certificate_to_account(batch_client, rsapfxfile, _PFX_PASSPHRASE, sha1_cert_tp) # create pool and wait for node idle create_pool_and_wait_for_node(batch_client, pool_id, pool_vm_size, pool_vm_count, sha1_cert_tp) # submit job and add a task submit_job_and_add_task(batch_client, block_blob_client, storage_account_name, storage_account_key, _CONTAINER_NAME, _RESOURCE_NAME, job_id, pool_id, sha1_cert_tp) # wait for tasks to complete common.helpers.wait_for_tasks_to_complete( batch_client, job_id, datetime.timedelta(minutes=20)) tasks = batch_client.task.list(job_id) task_ids = [task.id for task in tasks] common.helpers.print_task_output(batch_client, job_id, task_ids) finally: # perform clean up if should_delete_container: print('Deleting container: {}'.format(_CONTAINER_NAME)) block_blob_client.delete_container(_CONTAINER_NAME, fail_not_exist=False) if should_delete_job: print('Deleting job: {}'.format(job_id)) batch_client.job.delete(job_id) if should_delete_pool: print('Deleting pool: {}'.format(pool_id)) batch_client.pool.delete(pool_id) if should_delete_cert and sha1_cert_tp is not None: # cert deletion requires no active references to cert, so # override any config settings for preserving job/pool if not should_delete_job: print('Deleting job: {}'.format(job_id)) batch_client.job.delete(job_id) if not should_delete_pool: print('Deleting pool: {}'.format(pool_id)) batch_client.pool.delete(pool_id) print('Deleting cert: {}'.format(sha1_cert_tp)) batch_client.certificate.delete('sha1', sha1_cert_tp)
upload_file_to_container(blob_client, input_container_name, file_path) for file_path in input_file_paths ] # Obtain a shared access signature that provides write access to the output # container to which the tasks will upload their output. output_container_sas_token = get_container_sas_token( blob_client, output_container_name, azureblob.BlobPermissions.WRITE) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batchauth.SharedKeyCredentials(_BATCH_ACCOUNT_NAME, _BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( batch.BatchServiceClientConfiguration(credentials, base_url=_BATCH_ACCOUNT_URL)) # Create the pool that will contain the compute nodes that will execute the # tasks. The resource files we pass in are used for configuring the pool's # start task, which is executed each time a node first joins the pool (or # is rebooted or re-imaged). create_pool(batch_client, _POOL_ID, application_files, _NODE_OS_DISTRO, _NODE_OS_VERSION) # Create the job that will run the tasks. create_job(batch_client, _JOB_ID, _POOL_ID) # Add the tasks to the job. We need to supply a container shared access # signature (SAS) token for the tasks so that they can upload their output # to Azure Storage.
def main(req: func.HttpRequest) -> func.HttpResponse: logging.info('Python HTTP trigger function processed a request.') start_time = datetime.datetime.now().replace(microsecond=0) logging.info('Script start: {}'.format(start_time)) # Create the blob client, for use in obtaining references to # blob storage containers and uploading files to containers. blob_client = azureblob.BlockBlobService( account_name=os.environ["_STORAGE_ACCOUNT_NAME"], account_key=os.environ["_STORAGE_ACCOUNT_KEY"]) #pool_id = os.environ["_POOL_ID"] + '_{date:%Y-%m-%d_%H-%M-%S}'.format(date=datetime.datetime.now()) # ^ I used to use a timestamp in the pool_id, but my setup is simple enough to just use a single pool and reuse it. pool_id = os.environ["_POOL_ID"] job_id = os.environ["_JOB_ID"] + '_{date:%Y-%m-%d_%H-%M-%S}'.format( date=datetime.datetime.now()) output_container_name = 'output' toprocess_container_name = 'toprocess' #clean up the 'toprocess' container blob_client.create_container(toprocess_container_name, fail_on_exist=False) logging.info('Container [{}] created if not exists.'.format( toprocess_container_name)) for blob in blob_client.list_blobs(toprocess_container_name).items: blob_client.delete_blob(toprocess_container_name, blob.name) logging.info('Clean up of container finished.') output_container_sas_url = get_container_sas_url( blob_client, output_container_name, azureblob.BlobPermissions.WRITE) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batchauth.SharedKeyCredentials( os.environ["_BATCH_ACCOUNT_NAME"], os.environ["_BATCH_ACCOUNT_KEY"]) batch_client = batch.BatchServiceClient( credentials, batch_url=os.environ["_BATCH_ACCOUNT_URL"]) try: source_container_name = 'teslacam' dest_container_name = toprocess_container_name # Create the pool that will contain the compute nodes that will execute the tasks. # Check how many tasks will be created: sentry_clips = blob_client.list_blobs(source_container_name, prefix='SentryClips/', delimiter='/') saved_clips = blob_client.list_blobs(source_container_name, prefix='SavedClips/', delimiter='/') num_sentry_clips = len(sentry_clips.items) num_saved_clips = len(saved_clips.items) num_tasks = num_saved_clips + num_sentry_clips logging.info("Number of counted folders (thus tasks): {tasks}.".format( tasks=num_tasks)) create_or_update_pool(batch_client, pool_id, num_tasks) # Create the job that will run the tasks. create_job(batch_client, job_id, pool_id) # get list of Sentry Events and create tasks for those: for folder in sentry_clips.items: # for each event folder, upload the .mp4 files, create SAS urls and create task resourcefiles = [] taskname = folder.name[0:-1].replace("/", "_") destfolderprefix = folder.name + 'TeslaCam/' for file in blob_client.list_blobs(source_container_name, prefix=folder.name, delimiter='/').items: # for each file, upload it, create SAS url dest_blob_name = destfolderprefix + file.name source_sas_token = get_container_sas_token( blob_client, source_container_name, azureblob.BlobPermissions.READ) source_sas_url = blob_client.make_blob_url( source_container_name, file.name, sas_token=source_sas_token) blob_client.copy_blob(dest_container_name, dest_blob_name, source_sas_url) dest_sas_token = get_container_sas_token( blob_client, dest_container_name, azureblob.BlobPermissions.READ) dest_sas_url = blob_client.make_blob_url( dest_container_name, dest_blob_name, sas_token=dest_sas_token) resourcefile = batchmodels.ResourceFile( file_path=dest_blob_name, http_url=dest_sas_url) resourcefiles.append(resourcefile) # create task input_file_dir = folder.name[0:-1] + '/TeslaCam' add_task(batch_client, job_id, resourcefiles, output_container_sas_url, taskname, input_file_dir) logging.info("All Sentry Clips tasks created in Azure Batch.") # get list of Sentry Events and create tasks for those: for folder in saved_clips.items: # for each event folder, upload the .mp4 files, create SAS urls and create task resourcefiles = [] taskname = folder.name[0:-1].replace("/", "_") destfolderprefix = folder.name + 'TeslaCam/' for file in blob_client.list_blobs(source_container_name, prefix=folder.name, delimiter='/').items: # for each file, upload it, create SAS url dest_blob_name = destfolderprefix + file.name source_sas_token = get_container_sas_token( blob_client, source_container_name, azureblob.BlobPermissions.READ) source_sas_url = blob_client.make_blob_url( source_container_name, file.name, sas_token=source_sas_token) blob_client.copy_blob(dest_container_name, dest_blob_name, source_sas_url) dest_sas_token = get_container_sas_token( blob_client, dest_container_name, azureblob.BlobPermissions.READ) dest_sas_url = blob_client.make_blob_url( dest_container_name, dest_blob_name, sas_token=dest_sas_token) resourcefile = batchmodels.ResourceFile( file_path=dest_blob_name, http_url=dest_sas_url) resourcefiles.append(resourcefile) # create task input_file_dir = folder.name[0:-1] + '/TeslaCam' add_task(batch_client, job_id, resourcefiles, output_container_sas_url, taskname, input_file_dir) logging.info("All Saved Clips tasks created in Azure Batch.") # archive videos from teslacam container to archive folder logging.info('Started archiving...') archive_container_name = 'archive' archive_prefix = 'backup_{date:%Y-%m-%d_%H-%M-%S}/'.format( date=datetime.datetime.now()) for blob in blob_client.list_blobs(source_container_name).items: arch_blob_name = archive_prefix + blob.name source_sas_token = get_container_sas_token( blob_client, source_container_name, azureblob.BlobPermissions.READ) source_sas_url = blob_client.make_blob_url( source_container_name, blob.name, sas_token=source_sas_token) blob_client.copy_blob(archive_container_name, arch_blob_name, source_sas_url) logging.info('Finished archiving.') # Clean 'teslacam' container logging.info( 'Cleaning [{}] container...'.format(source_container_name)) for blob in blob_client.list_blobs(source_container_name).items: blob_client.delete_blob(source_container_name, blob.name) logging.info('Clean up of [{}] container finished.'.format( source_container_name)) except batchmodels.BatchErrorException as err: print_batch_exception(err) raise except Exception as err: logging.error(err) raise else: # Clean up Batch resources (if the user so chooses). logging.info('Script ends here.') return func.HttpResponse(f"Script done", status_code=200) finally: # Print out some timing info end_time = datetime.datetime.now().replace(microsecond=0) logging.info('Job end: {}'.format(end_time)) logging.info('Elapsed time: {}'.format(end_time - start_time))
def __init__(self, pool_id, data_dir): self.pool_id = pool_id if data_dir[-1] != '/': data_dir += '/' self.data_dir = data_dir self.app = common.helpers.generate_unique_resource_name('app') self.inp = common.helpers.generate_unique_resource_name('inp') self.out = common.helpers.generate_unique_resource_name('out') global_config = configparser.ConfigParser() global_config.read('configuration.cfg') our_config = configparser.ConfigParser() our_config.read('ebo.cfg') batch_account_key = global_config.get('Batch', 'batchaccountkey') batch_account_name = global_config.get('Batch', 'batchaccountname') batch_service_url = global_config.get('Batch', 'batchserviceurl') storage_account_key = global_config.get('Storage', 'storageaccountkey') storage_account_name = global_config.get('Storage', 'storageaccountname') storage_account_suffix = global_config.get( 'Storage', 'storageaccountsuffix') pool_vm_size = our_config.get( 'DEFAULT', 'poolvmsize') pool_vm_count = our_config.getint( 'DEFAULT', 'poolvmcount') # remember: no space, file names split by ',' #app_file_names = our_config.get('APP', 'app').split(',') #app_file_names = [os.path.realpath(fn) for fn in app_file_names] # Print the settings we are running with common.helpers.print_configuration(global_config) common.helpers.print_configuration(our_config) credentials = batchauth.SharedKeyCredentials( batch_account_name, batch_account_key) batch_client = batch.BatchServiceClient( credentials, base_url=batch_service_url) # Retry 5 times -- default is 3 batch_client.config.retry_policy.retries = 5 self.storage_account_name = storage_account_name block_blob_client = azureblob.BlockBlobService( account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix) # create containers block_blob_client.create_container(self.app, fail_on_exist=False) block_blob_client.create_container(self.inp, fail_on_exist=False) block_blob_client.create_container(self.out, fail_on_exist=False) #app_files = upload_files(block_blob_client, self.app, app_file_names) output_container_sas_token = get_container_sas_token( block_blob_client, self.out, azureblob.BlobPermissions.WRITE) self.out_sas_token = output_container_sas_token create_pool(batch_client, pool_id, pool_vm_size, pool_vm_count, None) # create necessary folders if not os.path.exists(data_dir): os.makedirs(data_dir) self.batch_client = batch_client self.block_blob_client = block_blob_client
blob_client.create_container(output_container_name, fail_on_exist=False) print('Container [{}] created.'.format(output_container_name)) # Obtain a shared access signature URL that provides write access to the output # container to which the tasks will upload their output. output_container_sas_url = get_container_sas_url( blob_client, output_container_name, azureblob.BlobPermissions.WRITE) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batchauth.SharedKeyCredentials( os.environ['BATCH_ACCOUNT_NAME'], os.environ['BATCH_ACCOUNT_KEY']) batch_client = batch.BatchServiceClient( credentials, base_url=os.environ['BATCH_ACCOUNT_URL']) try: # Create the pool that will contain the compute nodes that will execute the # tasks. create_pool(batch_client, os.environ['POOL_ID']) # Create the job that will run the tasks. create_job(batch_client, os.environ['JOB_ID'], os.environ['POOL_ID']) # Add the tasks to the job. Pass the input files and a SAS URL # to the storage container for output files. add_tasks(batch_client, os.environ['JOB_ID'], input_blobs, output_container_sas_url) # Pause execution until tasks reach Completed state.
def main(argv): # azure python sdk https://github.com/Azure/azure-sdk-for-python/blob/master/doc/batch.rst # or https://docs.microsoft.com/en-us/python/api/azure-batch/index?view=azure-python input_container_name = _INPUT_FOLDER output_container_name = _OUTPUT_FOLDER try: opts, args = getopt.getopt(argv[1:], 'hi:o', ['ifolder=', 'ofolder=']) except getopt.GetoptError: print('n4t1-azBatch.py [-h] -i <input folder> -o <output folder>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('n4t1-azBatch.py -i <input folder> -o <output folder>') sys.exit(2) elif opt in ("-i", "--ifolder"): input_container_name = str(arg) elif opt in ("-o", "--ofolder"): output_container_name = str(arg) start_time = datetime.datetime.now().replace(microsecond=0) print('Azure Batch start: {}\n'.format(start_time)) blob_client = azureblob.BlockBlobService( account_name=_STORAGE_ACCOUNT_NAME, account_key=_STORAGE_ACCOUNT_KEY) input_container_created = blob_client.create_container( input_container_name, fail_on_exist=True) if not input_container_created: print('Error creating input container [{}]. Exiting'.format( input_container_name)) sys.exit(2) output_container_created = blob_client.create_container( output_container_name, fail_on_exist=True) if not output_container_created: blob_client.delete_container(input_container_name) print( 'Error creating output container [{}]. Has this job already been run?' .format(output_container_name)) print('Deleted input container [{}] and exiting.'.format( input_container_name)) sys.exit(2) # Create a list of all files in the input_container_name directory. input_file_paths = [] command_path = [] script_path = [] design_path = [] zip_path = [] for folder, subs, files in os.walk('./' + input_container_name + '/'): for filename in files: if filename.startswith('a'): input_file_paths.append( os.path.abspath(os.path.join(folder, filename))) elif filename.startswith('m'): zip_path.append(os.path.abspath(os.path.join(folder, filename))) elif filename.startswith('r') and filename.endswith('e'): command_path.append( os.path.abspath(os.path.join(folder, filename))) elif filename.startswith('r') and filename.endswith('h'): script_path.append( os.path.abspath(os.path.join(folder, filename))) elif filename.startswith('i'): design_path.append( os.path.abspath(os.path.join(folder, filename))) else: blob_client.delete_container(input_container_name) blob_client.delete_container(output_container_name) print( 'Error finding upload files. Deleting containers [{}] & [{}] and exiting.' .format(input_container_name, output_container_name)) sys.exit(2) print( 'Uploading file(s) to container [{}] ...'.format(input_container_name), end=' ') input_files = [] input_files.append( upload_file_to_container(blob_client, input_container_name, command_path[0])) input_files.append( upload_file_to_container(blob_client, input_container_name, zip_path[0])) input_files.append( upload_file_to_container(blob_client, input_container_name, script_path[0])) input_files.append( upload_file_to_container(blob_client, input_container_name, input_file_paths[0])) input_files.append( upload_file_to_container(blob_client, input_container_name, design_path[0])) print('Done\n') # Obtain a shared access signature URL that provides write access to the output # container to which the tasks will upload their output. output_container_sas_url = get_container_sas_url(blob_client, output_container_name) # Create a Batch service client. We'll now be interacting with the Batch service in addition to Storage credentials = ServicePrincipalCredentials( client_id=_APPLICATION_ID, secret=_APPLICATION_SECRET, tenant=_TENANT_ID, resource='https://batch.core.windows.net/') batch_client = batch.BatchServiceClient(credentials, batch_url=_BATCH_ACCOUNT_URL) try: # Create the pool that will contain the compute nodes that will execute the tasks. dedicated_node_count = ceil(_MAX_TASKS / _MAX_TASK_PER_NODE) create_pool(batch_client, _POOL_ID, _SCALE_INT, _AUTO_SCALE_EVAL_INT, dedicated_node_count) except batchmodels.BatchErrorException as err: print_batch_exception(err) print('Error creating pool [{}]'.format(_POOL_ID)) blob_client.delete_container(input_container_name) blob_client.delete_container(output_container_name) print('Deleted containers [{}] & [{}] and exiting'.format( input_container_name, output_container_name)) sys.exit(2) try: # Create the job that will run the tasks. create_job(batch_client, _JOB_ID, _POOL_ID) except batchmodels.BatchErrorException as err: print_batch_exception(err) print('Error creating jobs [{}]'.format(_JOB_ID)) batch_client.pool.delete(_POOL_ID) blob_client.delete_container(input_container_name) blob_client.delete_container(output_container_name) print('Deleted pool [{}] ...'.format(_POOL_ID)) print('Deleted containers [{}] & [{}] and exiting'.format( input_container_name, output_container_name)) sys.exit(2) try: # Add the tasks to the job. Pass the input files and a SAS URL to the storage container for output files. add_tasks(batch_client, _JOB_ID, input_files, output_container_sas_url) # Add ctrl-c handling def signal_handler(sig, frame): print() print('Ctrl+C pressed!') if query_yes_no('Delete storage container [{}]?'.format( input_container_name)) == 'yes': blob_client.delete_container(input_container_name) if query_yes_no('Delete storage container [{}]?'.format( output_container_name)) == 'yes': blob_client.delete_container(output_container_name) if query_yes_no('Delete job [{}]?'.format(_JOB_ID)) == 'yes': batch_client.job.delete(_JOB_ID) if query_yes_no('Delete pool [{}]?'.format(_POOL_ID)) == 'yes': batch_client.pool.delete(_POOL_ID) sys.exit(0) signal.signal(signal.SIGINT, signal_handler) # Pause execution until tasks reach Completed state. wait_for_tasks_to_complete(batch_client, _JOB_ID, datetime.timedelta(hours=_TIMEOUT_HR), blob_client, output_container_name) except batchmodels.BatchErrorException as err: print_batch_exception(err) raise # Print out some timing info end_time = datetime.datetime.now().replace(microsecond=0) print('Batch end: {}'.format(end_time)) print('Elapsed time: {}'.format(end_time - start_time)) # Clean up Batch resources (if the user so chooses) # if query_yes_no('Delete storage container [{}]?'.format(input_container_name)) == 'yes': blob_client.delete_container(input_container_name) # if query_yes_no('Delete storage container [{}]?'.format(output_container_name)) == 'yes': blob_client.delete_container(output_container_name) # if query_yes_no('Delete job [{}]?'.format(_JOB_ID)) == 'yes': batch_client.job.delete(_JOB_ID) # if query_yes_no('Delete pool [{}]?'.format(_POOL_ID)) == 'yes': batch_client.pool.delete(_POOL_ID)
def batch_client(self): credentials = batchauth.SharedKeyCredentials(self.batch_account, self.batch_key) result = batch.BatchServiceClient(credentials, base_url=self.batch_service_url) return result
def get_client() -> batch.BatchServiceClient: credentials = batchauth.SharedKeyCredentials(account_name, account_key) return batch.BatchServiceClient(credentials, base_url=account_url)
def execute_sample(global_config, sample_config): """Executes the sample with the specified configurations. :param global_config: The global configuration to use. :type global_config: `configparser.ConfigParser` :param sample_config: The sample specific configuration to use. :type sample_config: `configparser.ConfigParser` """ # Set up the configuration batch_account_key = global_config.get('Batch', 'batchaccountkey') batch_account_name = global_config.get('Batch', 'batchaccountname') batch_service_url = global_config.get('Batch', 'batchserviceurl') storage_account_key = global_config.get('Storage', 'storageaccountkey') storage_account_name = global_config.get('Storage', 'storageaccountname') storage_account_suffix = global_config.get( 'Storage', 'storageaccountsuffix') should_delete_container = sample_config.getboolean( 'DEFAULT', 'shoulddeletecontainer') should_delete_job_schedule = sample_config.getboolean( 'DEFAULT', 'shoulddeletejobschedule') pool_vm_size = sample_config.get( 'DEFAULT', 'poolvmsize') pool_vm_count = sample_config.getint( 'DEFAULT', 'poolvmcount') # Print the settings we are running with common.helpers.print_configuration(global_config) common.helpers.print_configuration(sample_config) credentials = batchauth.SharedKeyCredentials( batch_account_name, batch_account_key) batch_client = batch.BatchServiceClient( credentials, batch_url=batch_service_url) block_blob_client = azureblob.BlockBlobService( account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix) batch_client.config.retry_policy.retries = 5 job_schedule_id = common.helpers.generate_unique_resource_name( "JobScheduler") try: create_job_schedule( batch_client, job_schedule_id, pool_vm_size, pool_vm_count, block_blob_client) print("Start time: ", _START_TIME) print("Delete time: ", _END_TIME) recent_job = common.helpers.wait_for_job_under_job_schedule( batch_client, job_schedule_id, datetime.timedelta(minutes=5)) common.helpers.wait_for_tasks_to_complete( batch_client, recent_job, datetime.timedelta(minutes=25)) tasks = batch_client.task.list(recent_job) task_ids = [task.id for task in tasks] common.helpers.print_task_output( batch_client, recent_job, task_ids) common.helpers.wait_for_job_schedule_to_complete( batch_client, job_schedule_id, _END_TIME + datetime.timedelta(minutes=10)) except batchmodels.BatchErrorException as e: for x in e.error.values: print("BatchErrorException: ", x) finally: if should_delete_job_schedule: print("Deleting job schedule: ", job_schedule_id) batch_client.job_schedule.delete(job_schedule_id) if should_delete_container: block_blob_client.delete_container( _CONTAINER_NAME, fail_not_exist=False)
def check_vir_typer_tasks(): """ VirusTyper! """ vir_typer_tasks = VirTyperAzureRequest.objects.filter() credentials = batch_auth.SharedKeyCredentials(settings.BATCH_ACCOUNT_NAME, settings.BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( credentials, base_url=settings.BATCH_ACCOUNT_URL) for sub_task in vir_typer_tasks: vir_typer_task = VirTyperProject.objects.get( pk=sub_task.project_name.pk) batch_job_name = VirTyperProject.objects.get( pk=vir_typer_task.pk).container_namer() # Check if tasks related with this VirusTyper project have finished. tasks_completed = True try: for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.state != batchmodels.TaskState.completed: tasks_completed = False except: # If something errors first time through job can't get deleted. In that case, give up. VirTyperProject.objects.filter(pk=vir_typer_task.pk).update( status='Error') # Delete Azure task so we don't keep iterating over it. VirTyperAzureRequest.objects.filter(id=sub_task.id).delete() continue # If tasks have completed, check if they were successful. if tasks_completed: exit_codes_good = True for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.execution_info.exit_code != 0: exit_codes_good = False # Get rid of job and pool so we don't waste big $$$ and do cleanup/get files downloaded in tasks. batch_client.job.delete(job_id=batch_job_name) batch_client.pool.delete(pool_id=batch_job_name) if exit_codes_good: # Now need to generate an SAS URL and give access to it/update the download link. blob_client = BlockBlobService( account_key=settings.AZURE_ACCOUNT_KEY, account_name=settings.AZURE_ACCOUNT_NAME) vir_typer_result_container = batch_job_name + '-output' # # Download the output container so we can zip it. download_container(blob_service=blob_client, container_name=vir_typer_result_container, output_dir='olc_webportalv2/media') output_dir = 'olc_webportalv2/media/{}'.format(batch_job_name) if os.path.isfile(os.path.join(output_dir, 'batch_config.txt')): os.remove(os.path.join(output_dir, 'batch_config.txt')) shutil.make_archive(output_dir, 'zip', output_dir) # Read in the json output json_output = os.path.join(output_dir, 'virus_typer_outputs.json') with open(json_output, 'r') as json_report: vir_typer_task.report = json.load(json_report) # vir_typer_result_container = 'vir-typer-result-{}'.format(vir_typer_task.pk) sas_url = generate_download_link( blob_client=blob_client, container_name=vir_typer_result_container, output_zipfile=output_dir + '.zip', expiry=8) vir_typer_task.download_link = sas_url vir_typer_task.status = 'Complete' vir_typer_task.save() shutil.rmtree(output_dir) os.remove(output_dir + '.zip') else: vir_typer_task.status = 'Error' vir_typer_task.save() VirTyperAzureRequest.objects.filter(id=sub_task.id).delete()
with open('notebook_config.json', 'r') as f: NOTEBOOK_CONFIG = json.loads(f.read()) batch_update_frequency = 25000 max_epoch_runtime_sec = 30 per_iter_epsilon_reduction = 0.000001 min_epsilon = 0.1 batch_size = 32 replay_memory_size = 50000 weights_path = '' train_conv_layers = 'false' batch_credentials = batchauth.SharedKeyCredentials( NOTEBOOK_CONFIG['batch_account_name'], NOTEBOOK_CONFIG['batch_account_key']) batch_client = batch.BatchServiceClient( batch_credentials, batch_url=NOTEBOOK_CONFIG['batch_account_url']) job_id = 'distributed_rl_{0}'.format(str(uuid.uuid4())) job = batch.models.JobAddParameter( id=job_id, pool_info=batch.models.PoolInformation( pool_id=NOTEBOOK_CONFIG['batch_pool_name'])) batch_client.job.add(job) tasks = [] # Trainer task tasks.append( batchmodels.TaskAddParameter( id='TrainerTask', command_line= r'call C:\\prereq\\mount.bat && C:\\ProgramData\\Anaconda3\\Scripts\\activate.bat py36 && python -u Z:\\scripts_downpour\\manage.py runserver 0.0.0.0:80 data_dir=Z:\\\\ role=trainer experiment_name={0} batch_update_frequency={1} weights_path={2} train_conv_layers={3} per_iter_epsilon_reduction={4} min_epsilon={5}'
def check_prokka_tasks(): # Prokka! prokka_tasks = ProkkaAzureRequest.objects.filter() credentials = batch_auth.SharedKeyCredentials(settings.BATCH_ACCOUNT_NAME, settings.BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( credentials, base_url=settings.BATCH_ACCOUNT_URL) for prokka_task in prokka_tasks: prokka_object = ProkkaRequest.objects.get( pk=prokka_task.prokka_request.pk) batch_job_name = 'prokka-{}'.format(prokka_task.prokka_request.pk) # Check if tasks related with this amrsummary job have finished. tasks_completed = True try: for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.state != batchmodels.TaskState.completed: tasks_completed = False except: # If something errors first time through job can't get deleted. In that case, give up. ProkkaRequest.objects.filter( pk=prokka_task.prokka_request.pk).update(status='Error') # Delete task so we don't keep iterating over it. ProkkaAzureRequest.objects.filter(id=prokka_task.id).delete() continue # If tasks have completed, check if they were successful. if tasks_completed: exit_codes_good = True for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.execution_info.exit_code != 0: exit_codes_good = False # Get rid of job and pool so we don't waste big $$$ and do cleanup/get files downloaded in tasks. batch_client.job.delete(job_id=batch_job_name) batch_client.pool.delete(pool_id=batch_job_name) if exit_codes_good: # Now need to generate an SAS URL and give access to it/update the download link. blob_client = BlockBlobService( account_key=settings.AZURE_ACCOUNT_KEY, account_name=settings.AZURE_ACCOUNT_NAME) # Download the output container so we can zip it. download_container(blob_service=blob_client, container_name=batch_job_name + '-output', output_dir='olc_webportalv2/media') output_dir = 'olc_webportalv2/media/{}'.format(batch_job_name) if os.path.isfile(os.path.join(output_dir, 'batch_config.txt')): os.remove(os.path.join(output_dir, 'batch_config.txt')) shutil.make_archive(output_dir, 'zip', output_dir) prokka_result_container = 'prokka-result-{}'.format( prokka_object.pk) sas_url = generate_download_link( blob_client=blob_client, container_name=prokka_result_container, output_zipfile=output_dir + '.zip', expiry=8) prokka_object.download_link = sas_url prokka_object.status = 'Complete' prokka_object.save() shutil.rmtree(output_dir) os.remove(output_dir + '.zip') else: prokka_object.status = 'Error' prokka_object.save() ProkkaAzureRequest.objects.filter(id=prokka_task.id).delete()
def run_azure(target, jobs, n=1, path='.', delete=True, config=None): """Execute a function for multiple sets of arguments on Microsoft Azure, and return the results as a list. :param function target: A target function. :param list jobs: A list of sets of arguments given to the target. :param int n: The number of repeats running the target. 1 as default. :param str path: A path to save temp files. The current path as default. :param bool delete: Delete temp files after finishing jobs, or not. True as default. :param config: str or configparser.ConfigParser. A config file. An example is the following: ``` [azure] batch.name = foo batch.key = bar batch.url = hoge storage.name = fuga storage.key = spam pool.nodecount = 2 # pool.id = MyPool # pool.vmsize = Standard_D11_v2 # os.publisher = Canonical # os.offer = UbuntuServer # os.sku = 16 # job.id = MyJob ``` :return: A list of results corresponding the `jobs` list. :rtype: list """ if config is None: raise ValueError('Argument \'config\' must be given.') elif isinstance(config, str): if not os.path.isfile(config): raise FileNotFoundError('A file [{}] could not be found.'.format(config)) config_filename = config config = configparser.ConfigParser() config.sections() config.read(config_filename) config.sections() elif not isinstance(config, configparser.ConfigParser): raise ValueError('\'config\' must be eighter str or ConfigParser. [{}] was given.'.format(repr(config))) if 'azure' not in config: raise KeyError('Key \'azure\' could not be found in the given config.') for key in ('batch.name', 'batch.key', 'batch.url', 'storage.name', 'storage.key', 'pool.nodecount'): if key not in config['azure']: raise KeyError('Key \'{}\' could not be found in the \'azure\' section.'.format(key)) # Update the Batch and Storage account credential strings below with the values # unique to your accounts. These are used when constructing connection strings # for the Batch and Storage client objects. _BATCH_ACCOUNT_NAME = config['azure']['batch.name'] _BATCH_ACCOUNT_KEY = config['azure']['batch.key'] _BATCH_ACCOUNT_URL = config['azure']['batch.url'] _STORAGE_ACCOUNT_NAME = config['azure']['storage.name'] _STORAGE_ACCOUNT_KEY = config['azure']['storage.key'] _POOL_NODE_COUNT = config['azure']['pool.nodecount'] _POOL_ID = config['azure'].get('pool.id', 'MyPool') _POOL_VM_SIZE = config['azure'].get('pool.vmsize', 'Standard_D11_v2') _NODE_OS_PUBLISHER = config['azure'].get('os.publisher', 'Canonical') _NODE_OS_OFFER = config['azure'].get('os.offer', 'UbuntuServer') _NODE_OS_SKU = config['azure'].get('os.sku', '16') _JOB_ID = config['azure'].get('job.id', 'MyJob') if not _POOL_NODE_COUNT.isdigit(): raise ValueError('The wrong pool node count was given [{}]. This must be an integer'.format(_POOL_NODE_COUNT)) proc_per_node = 2 #XXX: Does this depend on pool vm? nproc = int(_POOL_NODE_COUNT) * proc_per_node code_header = """ from __future__ import print_function import argparse import os import string import azure.storage.blob as azureblob parser = argparse.ArgumentParser() parser.add_argument('--filepath', required=True, help='The path to the text file to process. The path' 'may include a compute node\\'s environment' 'variables, such as' '$AZ_BATCH_NODE_SHARED_DIR/filename.txt') parser.add_argument('--output', required=True, help='The path to the output.') parser.add_argument('--job_id', type=int, required=True) parser.add_argument('--task_id', type=int, required=True) parser.add_argument('--storageaccount', required=True, help='The name the Azure Storage account that owns the' 'blob storage container to which to upload' 'results.') parser.add_argument('--storagecontainer', required=True, help='The Azure Blob storage container to which to' 'upload results.') parser.add_argument('--sastoken', required=True, help='The SAS token providing write access to the' 'Storage container.') args = parser.parse_args() input_file = os.path.realpath(args.filepath) output_file = args.output import pickle with open(input_file, mode='rb') as fin: inputs = pickle.load(fin) """ code_footer = """ with open(output_file, mode='wb') as fout: pickle.dump(res, fout, protocol=2) # Create the blob client using the container's SAS token. # This allows us to create a client that provides write # access only to the container. blob_client = azureblob.BlockBlobService(account_name=args.storageaccount, sas_token=args.sastoken) output_file_path = os.path.realpath(output_file) blob_client.create_blob_from_path(args.storagecontainer, output_file, output_file_path) """ # src = textwrap.dedent(inspect.getsource(target)).replace(r'"', r'\"') src = textwrap.dedent(inspect.getsource(target)) if re.match('[\s\t]+', src.split('\n')[0]) is not None: raise RuntimeError( "Wrong indentation was found in the source translated") code = code_header code += src code += 'res = {}(inputs, args.task_id, args.job_id)'.format(target.__name__) code += code_footer target = code suffix = binascii.hexlify(os.urandom(4)).decode() start_time = datetime.datetime.now().replace(microsecond=0) _log.info('Sample start: {}'.format(start_time)) if not os.path.isdir(path): os.mkdir(path) # task_file = target # task_file = 'task-{}.py'.format(suffix) task_file = '{}/task-{}.py'.format(path, suffix) with open(task_file, 'w') as fout: fout.write(target) # Prepare input pickle files input_file_names = [] output_file_names = [] for i, job in enumerate(jobs): filename = '{}/input-{}_{}.pickle'.format(path, suffix, i) input_file_names.append(filename) for j in range(n): output_file_names.append('output-{}_{}.{}.pickle'.format(suffix, i, j + 1)) with open(filename, mode='wb') as fout: pickle.dump(job, fout, protocol=2) # Create the blob client, for use in obtaining references to # blob storage containers and uploading files to containers. blob_client = azureblob.BlockBlobService( account_name=_STORAGE_ACCOUNT_NAME, account_key=_STORAGE_ACCOUNT_KEY) n_jobs = -(-(len(jobs) * n) // nproc) # ceil for int _log.info('{} jobs will be created.'.format(n_jobs)) res = None try: # Use the blob client to create the containers in Azure Storage if they # don't yet exist. app_container_name = 'application-{}'.format(suffix) input_container_name = 'input-{}'.format(suffix) output_container_name = 'output-{}'.format(suffix) # app_container_name = 'application' # input_container_name = 'input' # output_container_name = 'output' blob_client.create_container(app_container_name, fail_on_exist=False) blob_client.create_container(input_container_name, fail_on_exist=False) blob_client.create_container(output_container_name, fail_on_exist=False) # Paths to the task script. This script will be executed by the tasks that # run on the compute nodes. application_file_paths = [os.path.realpath(task_file)] # The collection of data files that are to be processed by the tasks. input_file_paths = [os.path.realpath(filename) for filename in input_file_names] # Upload the application script to Azure Storage. This is the script that # will process the data files, and is executed by each of the tasks on the # compute nodes. application_files = [ upload_file_to_container(blob_client, app_container_name, file_path) for file_path in application_file_paths] # Upload the data files. This is the data that will be processed by each of # the tasks executed on the compute nodes in the pool. input_files = [ upload_file_to_container(blob_client, input_container_name, file_path) for file_path in input_file_paths] # Obtain a shared access signature that provides write access to the output # container to which the tasks will upload their output. output_container_sas_token = get_container_sas_token( blob_client, output_container_name, azureblob.BlobPermissions.WRITE) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batchauth.SharedKeyCredentials(_BATCH_ACCOUNT_NAME, _BATCH_ACCOUNT_KEY) #print(_BATCH_ACCOUNT_URL) batch_client = batch.BatchServiceClient( credentials, batch_url=_BATCH_ACCOUNT_URL) # Create the pool that will contain the compute nodes that will execute the # tasks. The resource files we pass in are used for configuring the pool's # start task, which is executed each time a node first joins the pool (or # is rebooted or re-imaged). create_pool(batch_client, _POOL_ID + '-' + suffix, application_files, _NODE_OS_PUBLISHER, _NODE_OS_OFFER, _NODE_OS_SKU, task_file, _POOL_VM_SIZE, _POOL_NODE_COUNT) # Create the job that will run the tasks. loads = [] for i, input_file in enumerate(input_files): for j, output_file in enumerate(output_file_names[i * n: (i + 1) * n]): loads.append((input_file, output_file, i + 1, j + 1)) assert n_jobs == -(-len(loads) // nproc) # ceil for int job_names = [] for i in range(n_jobs): job_name = '{}-{}-{}'.format(_JOB_ID, suffix, i + 1) create_job(batch_client, job_name, _POOL_ID + '-' + suffix) # Add the tasks to the job. We need to supply a container shared access # signature (SAS) token for the tasks so that they can upload their output # to Azure Storage. task_ids = add_tasks(batch_client, job_name, loads[i * nproc: (i + 1) * nproc], output_container_name, output_container_sas_token, task_file, _STORAGE_ACCOUNT_NAME) job_names.append((job_name, task_ids)) # Pause execution until tasks reach Completed state. wait_for_tasks_to_complete(batch_client, job_names, datetime.timedelta(minutes=20)) _log.info(" Success! All tasks reached the 'Completed' state within the specified timeout period.") # Download the task output files from the output Storage container to a # local directory. Note that we could have also downloaded the output # files directly from the compute nodes themselves. download_blobs_from_container(blob_client, output_container_name, os.path.abspath(path)) for job_id, task_ids in job_names: print_task_output(batch_client, job_id, task_ids) # Print out some timing info end_time = datetime.datetime.now().replace(microsecond=0) _log.info('Sample end: {}'.format(end_time)) _log.info('Elapsed time: {}'.format(end_time - start_time)) res = [] for output_file in output_file_names: with open(os.path.join(path, output_file), mode='rb') as fin: res.append(pickle.load(fin)) res = [res[i * n: (i + 1) * n] for i in range(len(jobs))] finally: # Clean up storage resources _log.info('Deleting containers...') blob_client.delete_container(app_container_name) blob_client.delete_container(input_container_name) blob_client.delete_container(output_container_name) # Clean up Batch resources (if the user so chooses). for i in range(n_jobs): job_name = '{}-{}-{}'.format(_JOB_ID, suffix, i + 1) _log.info('Deleting job [{}] ...'.format(job_name)) batch_client.job.delete(job_name) _log.info('Deleting pool...') batch_client.pool.delete(_POOL_ID + '-' + suffix) if delete: _log.info('Deleting temporary files...') for filename in output_file_names: filename = os.path.join(path, filename) if os.path.isfile(filename): os.remove(filename) for filename in itertools.chain(input_file_paths, application_file_paths): if os.path.isfile(filename): os.remove(filename) return res
def _get_batch_client(self): batch_client = batchsc.BatchServiceClient( self._get_batch_credentials(), base_url=self.batch_config.batch_url) return batch_client
def execute_sample(global_config, sample_config): """Executes the sample with the specified configurations. :param global_config: The global configuration to use. :type global_config: `configparser.ConfigParser` :param sample_config: The sample specific configuration to use. :type sample_config: `configparser.ConfigParser` """ # Set up the configuration batch_account_key = global_config.get('Batch', 'batchaccountkey') batch_account_name = global_config.get('Batch', 'batchaccountname') batch_service_url = global_config.get('Batch', 'batchserviceurl') storage_account_key = global_config.get('Storage', 'storageaccountkey') storage_account_name = global_config.get('Storage', 'storageaccountname') storage_account_suffix = global_config.get('Storage', 'storageaccountsuffix') should_delete_container = sample_config.getboolean( 'DEFAULT', 'shoulddeletecontainer') should_delete_job = sample_config.getboolean('DEFAULT', 'shoulddeletejob') should_delete_pool = sample_config.getboolean('DEFAULT', 'shoulddeletepool') pool_vm_size = sample_config.get('DEFAULT', 'poolvmsize') pool_vm_count = sample_config.getint('DEFAULT', 'poolvmcount') # Print the settings we are running with common.helpers.print_configuration(global_config) common.helpers.print_configuration(sample_config) credentials = batchauth.SharedKeyCredentials(batch_account_name, batch_account_key) batch_client = batch.BatchServiceClient(credentials, batch_url=batch_service_url) # Retry 5 times -- default is 3 batch_client.config.retry_policy.retries = 5 block_blob_client = azureblob.BlockBlobService( account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix) job_id = common.helpers.generate_unique_resource_name( "PoolsAndResourceFilesJob") pool_id = "PoolsAndResourceFilesPool" try: create_pool(batch_client, block_blob_client, pool_id, pool_vm_size, pool_vm_count) submit_job_and_add_task(batch_client, block_blob_client, job_id, pool_id) common.helpers.wait_for_tasks_to_complete( batch_client, job_id, datetime.timedelta(minutes=25)) tasks = batch_client.task.list(job_id) task_ids = [task.id for task in tasks] common.helpers.print_task_output(batch_client, job_id, task_ids) finally: # clean up if should_delete_container: block_blob_client.delete_container(_CONTAINER_NAME, fail_not_exist=False) if should_delete_job: print("Deleting job: ", job_id) batch_client.job.delete(job_id) if should_delete_pool: print("Deleting pool: ", pool_id) batch_client.pool.delete(pool_id)
upload_file_to_container(blob_client, input_container_name, file_path) for file_path in input_file_paths ] # Obtain a shared access signature URL that provides write access to the output # container to which the tasks will upload their output. output_container_sas_url = get_container_sas_url( blob_client, output_container_name, azureblob.BlobPermissions.WRITE) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batchauth.SharedKeyCredentials(config._BATCH_ACCOUNT_NAME, config._BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( credentials, batch_url=config._BATCH_ACCOUNT_URL) try: # Create the pool that will contain the compute nodes that will execute the # tasks. create_pool(batch_client, config._POOL_ID) # Create the job that will run the tasks. create_job(batch_client, config._JOB_ID, config._POOL_ID) # Add the tasks to the job. Pass the input files and a SAS URL # to the storage container for output files. add_tasks(batch_client, config._JOB_ID, input_files, output_container_sas_url) # Pause execution until tasks reach Completed state.
entity.CompletedTasks = len(complete_tasks) entity._State = get_analysis_state(all_tasks_complete, any_failures) if not incomplete_tasks: entity.EndTime = datetime.datetime.utcnow() table_service.update_entity('AnalysisEntity', entity) return else: table_service.update_entity('AnalysisEntity', entity) time.sleep(5) if __name__ == '__main__': storage_account = sys.argv[1] storage_key = sys.argv[2] batch_account = sys.argv[3] batch_key = sys.argv[4] batch_url = sys.argv[5] job_id = sys.argv[6] entity_pk = sys.argv[7] entity_rk = sys.argv[8] table_service = TableService(account_name=storage_account, account_key=storage_key) blob_service = BlockBlobService(account_name=storage_account, account_key=storage_key) credentials = batchauth.SharedKeyCredentials(batch_account, batch_key) batch_client = batch.BatchServiceClient(credentials, base_url=batch_url) wait_for_tasks_to_complete(table_service, batch_client, entity_pk, entity_rk, job_id)
def load_results(config: BatchConfig) -> None: r""" :param config: A :class:`BatchConfig` instance with the Azure Batch run parameters :type config: :class:BatchConfig :raises BatchErrorException: If raised by the Azure Batch Python SDK """ # pylint: disable=too-many-locals # replace any missing values in the configuration with environment variables config = validate_config(config) start_time = datetime.datetime.now().replace(microsecond=0) print('Load result for job "{}" start time: {}'.format( config.JOB_ID, start_time)) print() _LOCAL_INPUT_FILE = os.path.join(config.BATCH_DIRECTORY, _BATCH_CV_FILE_NAME) v_pen, w_pen, model_data = get_config(_LOCAL_INPUT_FILE) n_folds = len(model_data["folds"]) * len(v_pen) * len(w_pen) # Create the blob client, for use in obtaining references to # blob storage containers and uploading files to containers. blob_client = azureblob.BlockBlobService( account_name=config.STORAGE_ACCOUNT_NAME, account_key=config.STORAGE_ACCOUNT_KEY) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batch_auth.SharedKeyCredentials(config.BATCH_ACCOUNT_NAME, config.BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient(credentials, batch_url=config.BATCH_ACCOUNT_URL) try: # Pause execution until tasks reach Completed state. wait_for_tasks_to_complete( batch_client, config.JOB_ID, datetime.timedelta(hours=config.STORAGE_ACCESS_DURATION_HRS)) _download_files(config, blob_client, config.BATCH_DIRECTORY, n_folds) except models.BatchErrorException as err: print_batch_exception(err) raise err # Clean up storage resources # TODO: re-enable this and delete the output container too # -- print("Deleting container [{}]...".format(input_container_name)) # -- blob_client.delete_container(input_container_name) # Print out some timing info end_time = datetime.datetime.now().replace(microsecond=0) print() print("Sample end: {}".format(end_time)) print("Elapsed time: {}".format(end_time - start_time)) print() # Clean up Batch resources (if the user so chooses). if config.DELETE_POOL_WHEN_DONE: batch_client.pool.delete(config.POOL_ID) if config.DELETE_JOB_WHEN_DONE: batch_client.job.delete(config.JOB_ID)
def check_cowbat_tasks(): # Check for completed cowbat runs azure_tasks = AzureTask.objects.filter() # Create batch client so we can check on the status of runs. credentials = batch_auth.SharedKeyCredentials(settings.BATCH_ACCOUNT_NAME, settings.BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( credentials, base_url=settings.BATCH_ACCOUNT_URL) for azure_task in azure_tasks: sequencing_run = SequencingRun.objects.get( pk=azure_task.sequencing_run.pk) batch_job_name = sequencing_run.run_name.lower().replace('_', '-') # Check if all tasks (within batch, this terminology gets confusing) associated # with this job have completed. tasks_completed = True try: for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.state != batchmodels.TaskState.completed: tasks_completed = False except BatchErrorException as e: sequencing_run.errors.append(e) sequencing_run.save() # Assuming that things have completed, check exit codes. Set status to error if any are non-zero. if tasks_completed: exit_codes_good = True exit_code = 0 # Determine the exit code try: for cloudtask in batch_client.task.list(batch_job_name): if cloudtask.execution_info.exit_code != 0: exit_code = cloudtask.execution_info.exit_code exit_codes_good = False except BatchErrorException as e: sequencing_run.errors.append(e) sequencing_run.save() # Get rid of job and pool so we don't waste big $$$ and do cleanup/get files downloaded in tasks. try: batch_client.job.delete(job_id=batch_job_name) except BatchErrorException as e: sequencing_run.errors.append(e) sequencing_run.save() try: batch_client.pool.delete(pool_id=batch_job_name) except BatchErrorException as e: sequencing_run.errors.append(e) sequencing_run.save() # Add the exit code to the sequencing run sequencing_run.exit_code = exit_code sequencing_run.save() if exit_codes_good: # Clean up the sequencing run try: cowbat_cleanup.apply_async(queue='cowbat', args=(sequencing_run.pk, )) except Exception as e: sequencing_run.errors.append(e) sequencing_run.save() else: # Something went wrong - update status to error so user knows. SequencingRun.objects.filter(pk=sequencing_run.pk).update( status='Error') sequencing_run.errors.append('Exit code bad') sequencing_run.save() try: # Delete task so we don't have to keep checking up on it. AzureTask.objects.filter(id=azure_task.id).delete() except Exception as e: sequencing_run.errors.append(e) sequencing_run.save() else: check_cowbat_progress(batch_client, batch_job_name, sequencing_run, azure_task)
# Obtain a shared access signature URL that provides write access to the output # container to which the tasks will upload their output. output_container_sas_url = get_container_sas_url( blob_client, output_container_name, azureblob.BlobPermissions.WRITE) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batchauth.SharedKeyCredentials(_BATCH_ACCOUNT_NAME, _BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( credentials, base_url=_BATCH_ACCOUNT_URL) try: # Create the pool that will contain the compute nodes that will execute the # tasks. create_pool(batch_client, _POOL_ID) # Create the job that will run the tasks. create_job(batch_client, _JOB_ID, _POOL_ID) # Add the tasks to the job. Pass the input files and a SAS URL # to the storage container for output files. add_tasks(batch_client, _JOB_ID, input_files, output_container_sas_url) # Pause execution until tasks reach Completed state.
def execute_sample(global_config, sample_config): """Executes the sample with the specified configurations. :param global_config: The global configuration to use. :type global_config: `configparser.ConfigParser` :param sample_config: The sample specific configuration to use. :type sample_config: `configparser.ConfigParser` """ # Set up the configuration batch_account_key = global_config.get('Batch', 'batchaccountkey') batch_account_name = global_config.get('Batch', 'batchaccountname') batch_service_url = global_config.get('Batch', 'batchserviceurl') storage_account_key = global_config.get('Storage', 'storageaccountkey') storage_account_name = global_config.get('Storage', 'storageaccountname') storage_account_suffix = global_config.get('Storage', 'storageaccountsuffix') should_delete_job = sample_config.getboolean('DEFAULT', 'shoulddeletejob') should_delete_pool = sample_config.getboolean('DEFAULT', 'shoulddeletepool') pool_vm_size = sample_config.get('DEFAULT', 'poolvmsize') pool_vm_count = sample_config.getint('DEFAULT', 'poolvmcount') # Print the settings we are running with common.helpers.print_configuration(global_config) common.helpers.print_configuration(sample_config) credentials = batchauth.SharedKeyCredentials(batch_account_name, batch_account_key) batch_client = batch.BatchServiceClient(credentials, base_url=batch_service_url) # Retry 5 times -- default is 3 batch_client.config.retry_policy.retries = 5 block_blob_client = azureblob.BlockBlobService( account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix) job_id = common.helpers.generate_unique_resource_name('DockerBatchTask') pool_id = common.helpers.generate_unique_resource_name('DockerBatchTask') try: # create pool create_pool(batch_client, block_blob_client, pool_id, pool_vm_size, pool_vm_count) # submit job and add a task print('submitting docker run tasks via Azure Batch...') add_docker_batch_task(batch_client, block_blob_client, job_id, pool_id) # wait for tasks to complete common.helpers.wait_for_tasks_to_complete( batch_client, job_id, datetime.timedelta(minutes=25)) finally: # perform clean up if should_delete_job: print('Deleting job: {}'.format(job_id)) batch_client.job.delete(job_id) if should_delete_pool: print('Deleting pool: {}'.format(pool_id)) batch_client.pool.delete(pool_id)