def create_batch_client(self) -> BatchServiceClient: """ Create a 'Batch client' using an Azure Service Principal """ logger.info("creating a 'batch' client using a service principal") credentials = self._get_service_principal() return BatchServiceClient(credentials, base_url=self.auth_settings.batch_acc_url)
def main(): print("-----------------------------------") print("job manager task reporting for duty") print("-----------------------------------") # get the environment variables for the job manager task x_tiles = int(os.environ["X_TILES"]) y_tiles = int(os.environ["Y_TILES"]) frame_start = int(os.environ["FRAME_START"]) frame_end = int(os.environ["FRAME_END"]) batch_account_url = os.environ["AZ_BATCH_ACCOUNT_URL"] # create Batch client # when running inside a task with authentication enabled, this token allows access to the rest of the job credentials = OAuthTokenAuthentication( client_id=None, token={"access_token": os.environ["AZ_BATCH_AUTHENTICATION_TOKEN"]}) batch_client = BatchServiceClient(credentials, base_url=batch_account_url) # create the tile collection, can be used for every frame tiles = create_tiles(x_tiles, y_tiles) # create the task collections for each frame current_task_id = 1 for frame in range(frame_start, frame_end + 1): print( "generating tasks for frame: {}, with current_task_id: {}".format( frame, current_task_id)) current_task_id = create_tasks_for_frame(frame, current_task_id, tiles, batch_client) print( "finished creating tasks for frame: {}, with current_task_id: {}\n" .format(frame, current_task_id))
def get_conn(self): """ Get the batch client connection :return: Azure batch client """ conn = self._connection() def _get_required_param(name): """Extract required parameter from extra JSON, raise exception if not found""" value = conn.extra_dejson.get(name) if not value: raise AirflowException( 'Extra connection option is missing required parameter: `{}`' .format(name)) return value batch_account_name = _get_required_param('account_name') batch_account_key = _get_required_param('account_key') batch_account_url = _get_required_param('account_url') credentials = batch_auth.SharedKeyCredentials(batch_account_name, batch_account_key) batch_client = BatchServiceClient(credentials, batch_url=batch_account_url) return batch_client
def get_conn(self): """ Get the batch client connection :return: Azure batch client """ conn = self._connection() def _get_required_param(name): """Extract required parameter from extra JSON, raise exception if not found""" value = conn.extra_dejson.get(name) if not value: raise AirflowException( f'Extra connection option is missing required parameter: `{name}`' ) return value batch_account_url = _get_required_param( 'account_url') or _get_required_param( 'extra__azure_batch__account_url') credentials = batch_auth.SharedKeyCredentials(conn.login, conn.password) batch_client = BatchServiceClient(credentials, batch_url=batch_account_url) return batch_client
def _get_azure_batch_client(conf): from azure.batch import batch_auth, BatchServiceClient creds = batch_auth.SharedKeyCredentials( conf[utils.PLATFORM]['batch_account'], conf[utils.PLATFORM]['batch_key']) batch_url = f"https://{conf[utils.PLATFORM]['batch_account']}.{conf[utils.PLATFORM]['location']}.batch.azure.com" return BatchServiceClient(creds, batch_url=batch_url)
def main(): print("------------------------------------") print("Azure Batch Task Manager task reporting for duty") job_id = os.environ["AZ_BATCH_JOB_ID"] batch_account_url = os.environ["AZ_BATCH_ACCOUNT_URL"] manifest_file = os.environ["AZ_BATCH_TASK_WORKING_DIR"] + "/assets/manifest.txt" tasks = [] counter = 1 # Create Batch client # When running inside a task with authentication enabled, this token allows access to the rest of the job #credentials = BasicTokenAuthentication(os.environ["AZ_BATCH_AUTHENTICATION_TOKEN"]) #credentials = SharedKeyCredentials("<your account name>", "<your account key>") credentials = OAuthTokenAuthentication( client_id=None, token={ "access_token" : os.environ["AZ_BATCH_AUTHENTICATION_TOKEN"] } ) batch_client = BatchServiceClient(credentials, base_url=batch_account_url) print("opening file: {0}".format(manifest_file)) with open(manifest_file) as manifest: for line in manifest: print("create task for: " + line) tasks.append(create_task(counter, line)) counter += 1 # submit the tasks to the service submit_tasks(batch_client, job_id, tasks)
def __init__(self): credentials = ServicePrincipalCredentials( client_id=api_config.APP_CLIENT_ID, secret=api_config.APP_CLIENT_SECRET, tenant=api_config.APP_TENANT_ID, resource='https://batch.core.windows.net/') self.batch_client = BatchServiceClient( credentials=credentials, batch_url=api_config.BATCH_ACCOUNT_URL)
def __init__(self, client_id, tenant_id, secret_key, batch_account_url, config, keyvault_account, storage_account_key=None): """ abstraction around all batch related methods :param client_id: Azure AD client id (application id) :type client_id: str :param tenant_id: Azure AD tenant id :type tenant_id: str :param secret_key: secret key for the app :type secret_key: str :param batch_account_url: azure batch account URL :type batch_account_url: str :param config: configuration file data from submit config :type config: dict :param keyvault_account: keyvault account for pulling storage keys :type keyvault_account: str :param storage_account_key: storage account key :type storage_account_key: str """ self.config = config self.run_id = self.__random_string(8) self.logger = self.__get_logger(add_azure_filter=True) self.logger.info('creating blob client') self.blob_client = BlobStorageClient( config['pypeliner_storage_account'], client_id, tenant_id, secret_key, keyvault_account, storage_account_key=storage_account_key, ) self.logger.info('creating batch client') self.credentials = ServicePrincipalCredentials( client_id=client_id, secret=secret_key, tenant=tenant_id, resource="https://batch.core.windows.net/") self.batch_client = BatchServiceClient(self.credentials, batch_account_url) self.container_name = self.config['storage_container_name'] self.blob_client.create_container(container_name=self.container_name) self.active_pools = set() self.active_jobs = set()
def __init__(self, image=None, **kwargs): """ Args: image (azure.batch.models.ImageReference): The VM image to use for the pool nodes defaults to ```python azure.batch.models.ImageReference( publisher="microsoft-azure-batch", offer="ubuntu-server-container", sku="16-04-lts", version="latest", ) ``` **kwargs: Additinal arguments passed to :class:`super_barch.BatchConfig` """ self.image = image if image is not None else _IMAGE_REF self.config = BatchConfig(**kwargs) self.output_files = [] self.tasks = [] # -------------------------------------------------- # BLOB STORAGE CONFIGURATION: # -------------------------------------------------- # Create the blob client, for use in obtaining references to # blob storage containers and uploading files to containers. self.blob_client = BlobServiceClient.from_connection_string( self.config.STORAGE_ACCOUNT_CONNECTION_STRING) # Use the blob client to create the containers in Azure Storage if they # don't yet exist. self.container_client = self.blob_client.get_container_client( self.config.BLOB_CONTAINER_NAME) try: self.container_client.create_container() except ResourceExistsError: pass # -------------------------------------------------- # AZURE BATCH CONFIGURATION # -------------------------------------------------- # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage self.batch_client = BatchServiceClient( SharedKeyCredentials(self.config.BATCH_ACCOUNT_NAME, self.config.BATCH_ACCOUNT_KEY), batch_url=self.config.BATCH_ACCOUNT_URL, )
def __init__(self, source_control: GithubService, storage: BlockBlobService): from azure.batch.batch_auth import SharedKeyCredentials batch_account = Setting.objects.get(name__exact='BATCH_ACCOUNT').value batch_account_key = Setting.objects.get( name__exact='BATCH_ACCOUNT_KEY').value batch_account_endpoint = Setting.objects.get( name__exact='BATCH_ACCOUNT_ENDPOINT').value self.client = BatchServiceClient( SharedKeyCredentials(batch_account, batch_account_key), batch_account_endpoint) self.logger = logging.getLogger(AzureBatchClient.__name__) self.source = source_control self.storage = storage
def get_conn(self): """ Get the Batch client connection :return: Azure Batch client """ conn = self._connection() batch_account_url = conn.extra_dejson.get( 'extra__azure_batch__account_url') if not batch_account_url: raise AirflowException('Batch Account URL parameter is missing.') credentials = batch_auth.SharedKeyCredentials(conn.login, conn.password) batch_client = BatchServiceClient(credentials, batch_url=batch_account_url) return batch_client
def __init__(self, config: BatchConfig, common_data, K, verbose=True): self.config = config self.K = K self.blob_client = azureblob.BlockBlobService( account_name=config.STORAGE_ACCOUNT_NAME, account_key=config.STORAGE_ACCOUNT_KEY, ) # Use the blob client to create the containers in Azure Storage if they # don't yet exist. self.blob_client.create_container(config.CONTAINER_NAME, fail_on_exist=False) self.CONTAINER_SAS_URL = build_output_sas_url(config, self.blob_client) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage self.credentials = batch_auth.SharedKeyCredentials( config.BATCH_ACCOUNT_NAME, config.BATCH_ACCOUNT_KEY) self.batch_client = BatchServiceClient( self.credentials, batch_url=config.BATCH_ACCOUNT_URL) # Upload The common files. self.common_file = self.upload_object_to_container( self.blob_client, config.CONTAINER_NAME, _GRAD_COMMON_FILE, common_data) # Create the pool that will contain the compute nodes that will execute the # tasks. try: create_pool(self.config, self.batch_client) if verbose: print("Created pool: ", self.config.POOL_ID) except models.BatchErrorException: if verbose: print("Using pool: ", self.config.POOL_ID)
def __init__(self, image=IMAGE_REF, **kwargs): self.image = image self.config = BatchConfig(**kwargs) self.output_files = [] self.tasks = [] # -------------------------------------------------- # BLOB STORAGE CONFIGURATION: # -------------------------------------------------- # Create the blob client, for use in obtaining references to # blob storage containers and uploading files to containers. self.blob_client = BlobServiceClient.from_connection_string( self.config.STORAGE_ACCOUNT_CONNECTION_STRING) # Use the blob client to create the containers in Azure Storage if they # don't yet exist. self.container_client = self.blob_client.get_container_client( self.config.BLOB_CONTAINER_NAME) try: self.container_client.create_container() except ResourceExistsError: pass # -------------------------------------------------- # AZURE BATCH CONFIGURATION # -------------------------------------------------- # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage self.batch_client = BatchServiceClient( SharedKeyCredentials(self.config.BATCH_ACCOUNT_NAME, self.config.BATCH_ACCOUNT_KEY), batch_url=self.config.BATCH_ACCOUNT_URL, )
def get_client(*_): creds = SharedKeyCredentials('test1', 'ZmFrZV9hY29jdW50X2tleQ==') return BatchServiceClient(creds, 'https://test1.westus.batch.azure.com/')
def BatchServiceClient(self, base_url=None): from azure.batch import BatchServiceClient return BatchServiceClient( self.GetCredentialsForResource('https://batch.core.windows.net/'), base_url=base_url)
''' # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batchauth.SharedKeyCredentials(_BATCH_ACCOUNT_NAME, _BATCH_ACCOUNT_KEY) batch_client = batch.BatchServiceClient( credentials, base_url=_BATCH_ACCOUNT_URL) ''' credentials = ServicePrincipalCredentials( client_id=CLIENT_ID, secret=SECRET, tenant=TENANT_ID, resource="https://batch.core.windows.net/") batch_client = BatchServiceClient(credentials, base_url=_BATCH_ACCOUNT_URL) # filedirs=range(0,10000) # blobnames_list= ['gist_out_{:0>4}.json'.format(f) for f in filedirs] # blobsdone = [] # for i in range(10): # # list_blobs returns only max 5000 blobs # blobsdone = blobsdone + [blob.name for blob in blob_client.list_blobs(container_name=_STORAGE_OUTPUT_CONTAINER, prefix='gist_out_{}'.format(i))] # blobstodo= [b for b in blobnames_list if b not in blobsdone] # blobdirs=[c.split("_")[2].split(".")[0] for c in blobstodo] # blobstodo = [b.name for b in blob_client.list_blobs(container_name=_STORAGE_INPUT_CONTAINER)] # Create the pool that will contain the compute nodes that will execute the # tasks. The resource files we pass in are used for configuring the pool's # start task, which is executed each time a node first joins the pool (or
def load_results(config: BatchConfig) -> None: r""" :param config: A :class:`BatchConfig` instance with the Azure Batch run parameters :type config: :class:BatchConfig :raises BatchErrorException: If raised by the Azure Batch Python SDK """ # pylint: disable=too-many-locals start_time = datetime.datetime.now().replace(microsecond=0) print('Load result for job "{}" start time: {}'.format( config.JOB_ID, start_time)) print() _LOCAL_INPUT_FILE = os.path.join(config.BATCH_DIRECTORY, _BATCH_CV_FILE_NAME) v_pen, w_pen, model_data = get_config(_LOCAL_INPUT_FILE) n_folds = len(model_data["folds"]) * len(v_pen) * len(w_pen) # Create the blob client, for use in obtaining references to # blob storage containers and uploading files to containers. blob_client = azureblob.BlockBlobService( account_name=config.STORAGE_ACCOUNT_NAME, account_key=config.STORAGE_ACCOUNT_KEY) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batch_auth.SharedKeyCredentials(config.BATCH_ACCOUNT_NAME, config.BATCH_ACCOUNT_KEY) batch_client = BatchServiceClient(credentials, batch_url=config.BATCH_ACCOUNT_URL) try: # Pause execution until tasks reach Completed state. wait_for_tasks_to_complete( batch_client, config.JOB_ID, datetime.timedelta(hours=config.STORAGE_ACCESS_DURATION_HRS)) _download_files(config, blob_client, config.BATCH_DIRECTORY, n_folds) except models.BatchErrorException as err: print_batch_exception(err) raise err # Clean up storage resources # TODO: re-enable this and delete the output container too # -- print("Deleting container [{}]...".format(input_container_name)) # -- blob_client.delete_container(input_container_name) # Print out some timing info end_time = datetime.datetime.now().replace(microsecond=0) print() print("Sample end: {}".format(end_time)) print("Elapsed time: {}".format(end_time - start_time)) print() # Clean up Batch resources (if the user so chooses). if config.DELETE_POOL_WHEN_DONE: batch_client.pool.delete(config.POOL_ID) if config.DELETE_JOB_WHEN_DONE: batch_client.job.delete(config.JOB_ID)
def run(config: BatchConfig, wait=True) -> None: r""" :param config: A :class:`BatchConfig` instance with the Azure Batch run parameters :type config: :class:BatchConfig :param boolean wait: If true, wait for the batch to complete and then download the results to file :raises BatchErrorException: If raised by the Azure Batch Python SDK """ # pylint: disable=too-many-locals start_time = datetime.datetime.now().replace(microsecond=0) print('Synthetic Controls Run "{}" start time: {}'.format( config.JOB_ID, start_time)) print() _LOCAL_INPUT_FILE = os.path.join(config.BATCH_DIRECTORY, _BATCH_CV_FILE_NAME) v_pen, w_pen, model_data = get_config(_LOCAL_INPUT_FILE) n_folds = len(model_data["folds"]) * len(v_pen) * len(w_pen) # Create the blob client, for use in obtaining references to # blob storage containers and uploading files to containers. blob_client = azureblob.BlockBlobService( account_name=config.STORAGE_ACCOUNT_NAME, account_key=config.STORAGE_ACCOUNT_KEY) # Use the blob client to create the containers in Azure Storage if they # don't yet exist. blob_client.create_container(config.CONTAINER_NAME, fail_on_exist=False) CONTAINER_SAS_URL = build_output_sas_url(config, blob_client) # The collection of data files that are to be processed by the tasks. input_file_path = os.path.join(sys.path[0], _LOCAL_INPUT_FILE) # Upload the data files. input_file = upload_file_to_container(blob_client, config.CONTAINER_NAME, input_file_path, config.STORAGE_ACCESS_DURATION_HRS) # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = batch_auth.SharedKeyCredentials(config.BATCH_ACCOUNT_NAME, config.BATCH_ACCOUNT_KEY) batch_client = BatchServiceClient(credentials, batch_url=config.BATCH_ACCOUNT_URL) try: # Create the pool that will contain the compute nodes that will execute the # tasks. try: create_pool(config, batch_client) print("Created pool: ", config.POOL_ID) except models.BatchErrorException: print("Using pool: ", config.POOL_ID) # Create the job that will run the tasks. create_job(batch_client, config.JOB_ID, config.POOL_ID) # Add the tasks to the job. add_tasks( config, blob_client, batch_client, CONTAINER_SAS_URL, config.JOB_ID, input_file, n_folds, ) if not wait: return # Pause execution until tasks reach Completed state. wait_for_tasks_to_complete( batch_client, config.JOB_ID, datetime.timedelta(hours=config.STORAGE_ACCESS_DURATION_HRS)) _download_files(config, blob_client, config.BATCH_DIRECTORY, n_folds) except models.BatchErrorException as err: print_batch_exception(err) raise err # Clean up storage resources # TODO: re-enable this and delete the output container too # -- print("Deleting container [{}]...".format(input_container_name)) # -- blob_client.delete_container(input_container_name) # Print out some timing info end_time = datetime.datetime.now().replace(microsecond=0) print() print("Sample end: {}".format(end_time)) print("Elapsed time: {}".format(end_time - start_time)) print() # Clean up Batch resources (if the user so chooses). if config.DELETE_POOL_WHEN_DONE: batch_client.pool.delete(config.POOL_ID) if config.DELETE_JOB_WHEN_DONE: batch_client.job.delete(config.JOB_ID)
def get_batch_client() -> BatchServiceClient: from azure.batch.batch_auth import SharedKeyCredentials account_info = get_batch_account_info() cred = SharedKeyCredentials(account_info.account, account_info.key) return BatchServiceClient(cred, account_info.endpoint)
def get_client(*args): # pylint: disable=unused-argument creds = SharedKeyCredentials('test1', 'ZmFrZV9hY29jdW50X2tleQ==') return BatchServiceClient(creds, 'https://test1.westus.batch.azure.com/')
os.path.join(sys.path[0], 'taskdata1.txt'), os.path.join(sys.path[0], 'taskdata2.txt') ] # Upload the data files. input_files = [ upload_file_to_container(blob_service_client, input_container_name, file_path) for file_path in input_file_paths ] # Create a Batch service client. We'll now be interacting with the Batch # service in addition to Storage credentials = SharedKeyCredentials(config.BATCH_ACCOUNT_NAME, config.BATCH_ACCOUNT_KEY) batch_client = BatchServiceClient(credentials, batch_url=config.BATCH_ACCOUNT_URL) try: # Create the pool that will contain the compute nodes that will execute the # tasks. create_pool(batch_client, config.POOL_ID) # Create the job that will run the tasks. create_job(batch_client, config.JOB_ID, config.POOL_ID) # Add the tasks to the job. add_tasks(batch_client, config.JOB_ID, input_files) # Pause execution until tasks reach Completed state. wait_for_tasks_to_complete(batch_client, config.JOB_ID, datetime.timedelta(minutes=30))