def run(self, job_status, expand_arrays, job_queue=None, job_ids=None, show_details=False): """ print list of jobs, by filtering by queue or by ids """ if job_ids: self.__populate_output_by_job_ids( job_status, job_ids, show_details or len(job_ids) == 1) # explicitly asking for job details, # or asking for a single job that is not an array (the output is not a list of jobs) details_required = show_details or (len(job_ids) == 1 and self.output.length() == 1) elif job_queue: self.__populate_output_by_queue(job_queue, job_status, expand_arrays, show_details) details_required = show_details else: fail( "Error listing jobs from AWS Batch. job_ids or job_queue must be defined" ) if details_required: self.output.show() else: self.output.show_table([ 'jobId', 'jobName', 'status', 'startedAt', 'stoppedAt', 'exitCode' ])
def __get_ecs_clusters(self, compute_environments): """ Get Compute Environments from AWS Batch and create a list of ECS Cluster ARNs. :param compute_environments: compute environments to query :return: a list of ECS clusters """ ecs_clusters = [] try: # connect to batch and ask for compute environments batch_client = self.boto3_factory.get_client("batch") next_token = "" while next_token is not None: response = batch_client.describe_compute_environments( computeEnvironments=compute_environments, nextToken=next_token) ecs_clusters.extend( self.__get_clusters(response["computeEnvironments"])) next_token = response.get("nextToken") except Exception as e: fail( "Error listing compute environments from AWS Batch. Failed with exception: %s" % e) return ecs_clusters
def main(): """Command entrypoint.""" try: # parse input parameters and config file args = _get_parser().parse_args() log = config_logger(args.log_level) log.info("Input parameters: %s" % args) config = AWSBatchCliConfig(log, args.cluster) boto3_factory = Boto3ClientFactory( region=config.region, proxy=config.proxy, aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key, ) AWSBhostsCommand(log, boto3_factory).run( compute_environments=[config.compute_environment], instance_ids=args.instance_ids, show_details=args.details) except KeyboardInterrupt: print("Exiting...") sys.exit(0) except Exception as e: fail("Unexpected error. Command failed with exception: %s" % e)
def main(): """Command entrypoint.""" try: # parse input parameters and config file args = _get_parser().parse_args() _validate_parameters(args) log = config_logger(args.log_level) log.info("Input parameters: %s" % args) config = AWSBatchCliConfig(log=log, cluster=args.cluster) boto3_factory = Boto3ClientFactory( region=config.region, proxy=config.proxy, aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key, ) AWSBoutCommand(log, boto3_factory).run( job_id=args.job_id, head=args.head, tail=args.tail, stream=args.stream, stream_period=args.stream_period ) except KeyboardInterrupt: print("Exiting...") sys.exit(0) except Exception as e: fail("Unexpected error. Command failed with exception: %s" % e)
def __populate_output_by_job_ids(self, job_status, job_ids, details): """ Add Job item or jobs array children to the output :param job_status: list of job status to ask :param job_ids: job ids or ARNs :param details: ask for job details """ try: if job_ids: self.log.info("Describing jobs (%s), details (%s)" % (job_ids, details)) single_jobs = [] job_array_ids = [] jobs = self.batch_client.describe_jobs(jobs=job_ids)['jobs'] for job in jobs: if is_job_array(job): job_array_ids.append(job['jobId']) else: single_jobs.append(job) # create output items for job array children self.__populate_output_by_array_ids(job_status, job_array_ids, details) # add single jobs to the output self.__add_jobs(single_jobs, details) except Exception as e: fail( "Error describing jobs from AWS Batch. Failed with exception: %s" % e)
def _validate_parameters(args): """ Validate input parameters :param args: args variable """ if args.command_file: if not type(args.command) == str: fail( "The command parameter is required with --command-file option") elif not os.path.isfile(args.command): fail("The command parameter (%s) must be an existing file" % args.command) elif not sys.stdin.isatty(): # stdin if args.arguments or type(args.command) == str: fail( "Error: command and arguments cannot be specified when submitting by stdin." ) elif not type(args.command) == str: fail("Parameters validation error: command parameter is required.") if args.depends_on and not re.match( '(jobId|type)=[^\s,]+([\s,]?(jobId|type)=[^\s]+)*', args.depends_on): fail( "Parameters validation error: please double check --depends-on parameter syntax." )
def main(): """Command entrypoint.""" try: # parse input parameters and config file args = _get_parser().parse_args() _validate_parameters(args) log = config_logger(args.log_level) log.info("Input parameters: %s" % args) config = AWSBatchCliConfig(log=log, cluster=args.cluster) boto3_factory = Boto3ClientFactory( region=config.region, proxy=config.proxy, aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key, ) AWSBoutCommand(log, boto3_factory).run(job_id=args.job_id, head=args.head, tail=args.tail, stream=args.stream, stream_period=args.stream_period) except KeyboardInterrupt: print("Exiting...") sys.exit(0) except Exception as e: fail("Unexpected error. Command failed with exception: %s" % e)
def __populate_output_by_queue(self, job_queue, job_status, expand_children, details): """ Add Job items to the output asking for given queue and status. :param job_queue: job queue name or ARN :param job_status: list of job status to ask :param expand_children: if True, the job with children will be expanded by creating a row for each child :param details: ask for job details """ try: single_jobs = [] jobs_with_children = [] for status in job_status: next_token = "" # nosec while next_token is not None: response = self.batch_client.list_jobs(jobStatus=status, jobQueue=job_queue, nextToken=next_token) for job in response["jobSummaryList"]: if get_job_type(job) != "SIMPLE" and expand_children is True: jobs_with_children.append(job["jobId"]) else: single_jobs.append(job) next_token = response.get("nextToken") # create output items for job array children self.__populate_output_by_job_ids(jobs_with_children, details) # add single jobs to the output self.__add_jobs(single_jobs, details) except Exception as e: fail("Error listing jobs from AWS Batch. Failed with exception: %s" % e)
def __add_jobs(self, jobs, details=False): """ Get job info from AWS Batch and add to the output. :param jobs: list of jobs items (output of the list_jobs function) :param details: ask for job details """ try: if jobs: self.log.debug("Adding jobs to the output (%s)" % jobs) if details: self.log.info("Asking for jobs details") jobs_to_show = self.__chunked_describe_jobs([job["jobId"] for job in jobs]) else: jobs_to_show = jobs for job in jobs_to_show: self.log.debug("Adding job to the output (%s)", job) job_converter = self.__JOB_CONVERTERS[get_job_type(job)] self.output.add(job_converter.convert(job)) except KeyError as e: fail("Error building Job item. Key (%s) not found." % e) except Exception as e: fail("Error adding jobs to the output. Failed with exception: %s" % e)
def main(argv=None): """Command entrypoint.""" try: # parse input parameters and config file args = _get_parser().parse_args(argv) log = config_logger(args.log_level) log.info("Input parameters: %s", args) config = AWSBatchCliConfig(log=log, cluster=args.cluster) boto3_factory = Boto3ClientFactory(region=config.region, proxy=config.proxy) job_status_set = OrderedDict((status.strip().upper(), "") for status in args.status.split(",")) if "ALL" in job_status_set: # add all the statuses in the list job_status_set = OrderedDict((status, "") for status in AWS_BATCH_JOB_STATUS) job_status = list(job_status_set) AWSBstatCommand(log, boto3_factory).run( job_status=job_status, expand_children=args.expand_children, job_ids=args.job_ids, job_queue=config.job_queue, show_details=args.details, ) except KeyboardInterrupt: print("Exiting...") sys.exit(0) except Exception as e: fail("Unexpected error. Command failed with exception: %s" % e)
def config_logger(log_level): """ Define a logger for aws-parallelcluster-awsbatch-cli. :param log_level logging level :return: the logger """ try: logfile = os.path.expanduser(os.path.join("~", ".parallelcluster", "awsbatch-cli.log")) logdir = os.path.dirname(logfile) os.makedirs(logdir) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(logdir): pass else: fail("Cannot create log file (%s). Failed with exception: %s" % (logfile, e)) formatter = logging.Formatter("%(asctime)s %(levelname)s [%(module)s:%(funcName)s] %(message)s") logfile_handler = RotatingFileHandler(logfile, maxBytes=5 * 1024 * 1024, backupCount=1) logfile_handler.setFormatter(formatter) logger = logging.getLogger("awsbatch-cli") logger.addHandler(logfile_handler) try: logger.setLevel(log_level.upper()) except (TypeError, ValueError) as e: fail("Error setting log level. Failed with exception: %s" % e) return logger
def __init__(self, log, cluster): """ Constructor. Search for the [cluster cluster-name] section in the /etc/awsbatch-cli.cfg configuration file, if there or ask to the pcluster status. :param log: log :param cluster: cluster name """ # Check if credentials and region have been provided in parallelcluster config self.aws_access_key_id = None self.aws_secret_access_key = None self.region = None self.env_blacklist = None parallelcluster_config_file = os.path.expanduser( os.path.join("~", ".parallelcluster", "config")) if os.path.isfile(parallelcluster_config_file): self.__init_from_parallelcluster_config( parallelcluster_config_file, log) # search for awsbatch-cli config cli_config_file = os.path.expanduser( os.path.join("~", ".parallelcluster", "awsbatch-cli.cfg")) if os.path.isfile(cli_config_file): self.__init_from_config(cli_config_file, cluster, log) elif cluster: self.__init_from_stack(cluster, log) else: fail("Error: cluster parameter is required") self.__verify_initialization(log)
def config_logger(log_level): """ Define a logger for aws-parallelcluster-awsbatch-cli. :param log_level logging level :return: the logger """ try: logfile = os.path.expanduser( os.path.join("~", ".parallelcluster", "awsbatch-cli.log")) logdir = os.path.dirname(logfile) os.makedirs(logdir) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(logdir): pass else: fail("Cannot create log file (%s). Failed with exception: %s" % (logfile, e)) formatter = logging.Formatter( "%(asctime)s %(levelname)s [%(module)s:%(funcName)s] %(message)s") logfile_handler = RotatingFileHandler(logfile, maxBytes=5 * 1024 * 1024, backupCount=1) logfile_handler.setFormatter(formatter) logger = logging.getLogger("awsbatch-cli") logger.addHandler(logfile_handler) try: logger.setLevel(log_level.upper()) except (TypeError, ValueError) as e: fail("Error setting log level. Failed with exception: %s" % e) return logger
def __init__(self, log, cluster): """ Constructor. Search for the [cluster cluster-name] section in the /etc/awsbatch-cli.cfg configuration file, if there or ask to the pcluster status. :param log: log :param cluster: cluster name """ # Check if credentials and region have been provided in parallelcluster config self.aws_access_key_id = None self.aws_secret_access_key = None self.region = None self.env_blacklist = None parallelcluster_config_file = os.path.expanduser(os.path.join("~", ".parallelcluster", "config")) if os.path.isfile(parallelcluster_config_file): self.__init_from_parallelcluster_config(parallelcluster_config_file, log) # search for awsbatch-cli config cli_config_file = os.path.expanduser(os.path.join("~", ".parallelcluster", "awsbatch-cli.cfg")) if os.path.isfile(cli_config_file): self.__init_from_config(cli_config_file, cluster, log) elif cluster: self.__init_from_stack(cluster, log) else: fail("Error: cluster parameter is required") self.__verify_initialization(log)
def main(): """Command entrypoint.""" try: # parse input parameters and config file args = _get_parser().parse_args() log = config_logger(args.log_level) log.info("Input parameters: %s" % args) config = AWSBatchCliConfig(log=log, cluster=args.cluster) boto3_factory = Boto3ClientFactory( region=config.region, proxy=config.proxy, aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key, ) if args.job_queues: job_queues = args.job_queues show_details = True else: job_queues = [config.job_queue] show_details = args.details AWSBqueuesCommand(log, boto3_factory).run(job_queues=job_queues, show_details=show_details) except KeyboardInterrupt: print("Exiting...") sys.exit(0) except Exception as e: fail("Unexpected error. Command failed with exception: %s" % e)
def __populate_output_by_job_ids(self, job_ids, details, include_parents=False): """ Add Job item or jobs array children to the output. :param job_ids: job ids or ARNs :param details: ask for job details """ try: if job_ids: self.log.info("Describing jobs (%s), details (%s)" % (job_ids, details)) parent_jobs = [] jobs_with_children = [] jobs = self.__chunked_describe_jobs(job_ids) for job in jobs: # always add parent job if include_parents or get_job_type(job) == "SIMPLE": parent_jobs.append(job) if is_job_array(job): jobs_with_children.append((job["jobId"], ":", job["arrayProperties"]["size"])) elif is_mnp_job(job): jobs_with_children.append((job["jobId"], "#", job["nodeProperties"]["numNodes"])) # add parent jobs to the output self.__add_jobs(parent_jobs) # create output items for jobs' children self.__populate_output_by_parent_ids(jobs_with_children) except Exception as e: fail("Error describing jobs from AWS Batch. Failed with exception: %s" % e)
def __search_for_job_definition(self, base_job_definition, nodes): """ Search for existing job definition with the same name of the base_job_definition and the same number of nodes. :param base_job_definition: job definition arn :param nodes: number of nodes :return: the found jobDefinition object or None """ job_definition_found = None base_job_definition_name = get_job_definition_name_by_arn( base_job_definition) try: next_token = "" while next_token is not None: response = self.batch_client.describe_job_definitions( jobDefinitionName=base_job_definition_name, status="ACTIVE", nextToken=next_token) for job_definition in response["jobDefinitions"]: if job_definition["nodeProperties"]["numNodes"] == nodes: job_definition_found = job_definition break next_token = response.get("nextToken") except Exception as e: fail("Error listing job definition. Failed with exception: %s" % e) return job_definition_found
def __populate_output_by_array_ids(self, job_status, job_array_ids, details): """ Add jobs array children to the output :param job_status: list of job status to ask :param job_array_ids: job array ids to ask :param details: ask for job details """ try: for job_array_id in job_array_ids: for status in job_status: self.log.info( "Listing job array children for job (%s) in status (%s)" % (job_array_id, status)) next_token = '' while next_token is not None: response = self.batch_client.list_jobs( jobStatus=status, arrayJobId=job_array_id, nextToken=next_token) # add single jobs to the output self.__add_jobs(response['jobSummaryList'], details) next_token = response.get('nextToken') except Exception as e: fail( "Error listing job array children for job (%s). Failed with exception: %s" % (job_array_id, e))
def __register_new_job_definition(self, base_job_definition_arn, nodes): """ Register a new job definition by using the base_job_definition_arn as starting point for the nodeRangeProperties :param base_job_definition_arn: job definition arn to use as starting point :param nodes: nuber of nodes to set in the job definition :return: the ARN of the created job definition """ try: # get base job definition and reuse its nodeRangeProperties response = self.batch_client.describe_job_definitions( jobDefinitions=[base_job_definition_arn], status='ACTIVE') job_definition = response['jobDefinitions'][0] # create new job definition response = self.batch_client.register_job_definition( jobDefinitionName=job_definition['jobDefinitionName'], type='multinode', nodeProperties={ 'numNodes': nodes, 'mainNode': 0, 'nodeRangeProperties': [{ 'targetNodes': '0:%d' % (nodes - 1), 'container': job_definition['nodeProperties']['nodeRangeProperties'] [0]['container'] }] }) job_definition_arn = response['jobDefinitionArn'] except Exception as e: fail("Error listing job definition. Failed with exception: %s" % e) return job_definition_arn
def __init__(self, log, cluster): """ Initialize the object. Search for the [cluster cluster-name] section in the /etc/awsbatch-cli.cfg configuration file, if there or ask to the pcluster status. :param log: log :param cluster: cluster name """ self.region = None self.env_blacklist = None # search for awsbatch-cli config cli_config_file = os.path.expanduser( os.path.join("~", ".parallelcluster", "awsbatch-cli.cfg")) if os.path.isfile(cli_config_file): self.__init_from_config(cli_config_file, cluster, log) elif cluster: self.__init_from_stack(cluster, log) else: fail("Error: cluster parameter is required") self.__verify_initialization(log)
def __create_host_item(container_instance, ec2_instance): """ Merge container instance and ec2 instance information and create a Host item. :param container_instance: the containerInstance object to parse :param ec2_instance: the ec2Instance object to parse :return: the Host item """ try: instance_type = '-' for attr in container_instance['attributes']: if attr['name'] == 'ecs.instance-type': instance_type = attr['value'] break return Host(container_instance_arn=container_instance[ 'containerInstanceArn'], status=container_instance['status'], ec2_instance=container_instance['ec2InstanceId'], instance_type=instance_type, private_ip_address=ec2_instance['PrivateIpAddress'], public_ip_address=ec2_instance['PublicIpAddress'] if ec2_instance['PublicIpAddress'] != '' else '-', private_dns_name=ec2_instance['PrivateDnsName'], public_dns_name=ec2_instance['PublicDnsName'] if ec2_instance['PublicDnsName'] != '' else '-', running_jobs=container_instance['runningTasksCount'], pending_jobs=container_instance['pendingTasksCount']) except KeyError as e: fail("Error building Host item. Key (%s) not found." % e)
def __init_from_stack(self, cluster, log): # noqa: C901 FIXME """ Init object attributes by asking to the stack. :param cluster: cluster name :param log: log """ try: self.stack_name = _get_stack_name(cluster) log.info("Describing stack (%s)" % self.stack_name) # get required values from the output of the describe-stack command # don't use proxy because we are in the client and use default region boto3_factory = Boto3ClientFactory( region=self.region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, ) cfn_client = boto3_factory.get_client("cloudformation") stack = cfn_client.describe_stacks( StackName=self.stack_name).get("Stacks")[0] log.debug(stack) if self.region is None: self.region = get_region_by_stack_id(stack.get("StackId")) self.proxy = "NONE" stack_status = stack.get("StackStatus") if stack_status in ["CREATE_COMPLETE", "UPDATE_COMPLETE"]: for output in stack.get("Outputs", []): output_key = output.get("OutputKey") output_value = output.get("OutputValue") if output_key == "ResourcesS3Bucket": self.s3_bucket = output_value elif output_key == "ArtifactS3RootDirectory": self.artifact_directory = output_value elif output_key == "BatchComputeEnvironmentArn": self.compute_environment = output_value elif output_key == "BatchJobQueueArn": self.job_queue = output_value elif output_key == "BatchJobDefinitionArn": self.job_definition = output_value elif output_key == "MasterPrivateIP": self.head_node_ip = output_value elif output_key == "BatchJobDefinitionMnpArn": self.job_definition_mnp = output_value for parameter in stack.get("Parameters", []): if parameter.get("OutputKey") == "ProxyServer": self.proxy = parameter.get("OutputValue") if not self.proxy == "NONE": log.info("Configured proxy is: %s" % self.proxy) break else: fail("The cluster is in the (%s) status." % stack_status) except (ClientError, ParamValidationError) as e: fail( "Error getting cluster information from AWS CloudFormation. Failed with exception: %s" % e)
def get_client(self, service): try: return boto3.client( service, region_name=self.region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, config=self.proxy_config) except ClientError as e: fail("AWS %s service failed with exception: %s" % (service, e))
def __print_log_stream(self, log_stream, head=None, tail=None, stream=None, stream_period=None): # noqa: C901 FIXME """ Ask for log stream and print it. :param log_stream: job log stream """ logs_client = self.boto3_factory.get_client("logs") try: # The maximum number of log events returned by the get_log_events function is as many log events # as can fit in a response size of 1 MB, up to 10,000 log events max_limit = 10000 if head: limit = head start_from_head = True elif tail: limit = tail start_from_head = False else: limit = max_limit start_from_head = False response = logs_client.get_log_events( logGroupName="/aws/batch/job", logStreamName=log_stream, limit=limit, startFromHead=start_from_head ) events = response["events"] self.log.debug(response) if not events: print("No events found.") self.__print_events(events) if limit == max_limit or stream: # get paginated items next_token = response["nextForwardToken"] while next_token is not None or stream: self.log.info("Next Forward Token is (%s)" % next_token) if stream: period = stream_period if stream_period else 5 self.log.info("Waiting other %s seconds..." % period) time.sleep(period) response = logs_client.get_log_events( logGroupName="/aws/batch/job", logStreamName=log_stream, nextToken=next_token ) self.__print_events(response["events"]) # if nextForwardToken is the same we passed in, we reached the end of the stream if stream: next_token = response["nextForwardToken"] else: next_token = ( response["nextForwardToken"] if response["nextForwardToken"] != next_token else None ) except KeyboardInterrupt: self.log.info("Interrupted by the user") exit(0) except Exception as e: fail("Error listing jobs from AWS Batch. Failed with exception: %s" % e)
def _upload_and_get_command(boto3_factory, args, job_s3_folder, job_name, config, log): """ Get command by parsing args and config. The function will also perform an s3 upload, if needed. :param boto3_factory: initialized Boto3ClientFactory object :param args: input arguments :param job_s3_folder: S3 folder for the job files :param job_name: job name :param config: config object :param log: log :return: command to submit """ # create S3 folder for the job s3_uploader = S3Uploader(boto3_factory, config.s3_bucket, job_s3_folder) # upload input files, if there if args.input_file: for file in args.input_file: s3_uploader.put_file(file, os.path.basename(file)) # upload command, if needed if args.command_file or not sys.stdin.isatty() or args.env: # define job script name job_script = job_name + ".sh" log.info("Using command-file option or stdin. Job script name: %s" % job_script) env_file = None if args.env: env_file = job_name + ".env.sh" # get environment variables and upload file used to extend the submission environment env_blacklist = args.env_blacklist if args.env_blacklist else config.env_blacklist _get_env_and_upload(s3_uploader, args.env, env_blacklist, env_file, log) # upload job script if args.command_file: # existing script file try: s3_uploader.put_file(args.command, job_script) except Exception as e: fail("Error creating job script. Failed with exception: %s" % e) elif not sys.stdin.isatty(): # stdin _get_stdin_and_upload(s3_uploader, job_script) # define command to execute bash_command = _compose_bash_command(args, config.s3_bucket, config.region, job_s3_folder, job_script, env_file) command = ["/bin/bash", "-c", bash_command] elif type(args.command) == str: log.info("Using command parameter") command = [args.command] + args.arguments else: fail("Unexpected error. Command cannot be empty.") log.info("Command: %s" % shell_join(command)) return command
def check(self): """Verify if CLI requirements are satisfied.""" for req in self.requirements: if not self.COMPARISON_OPERATORS[req.operator]( packaging.version.parse(get_installed_version( req.package)), packaging.version.parse(req.version), ): fail( f"The cluster requires {req.package}{req.operator}{req.version}" )
def __init_from_stack(self, cluster, log): # noqa: C901 FIXME """ Init object attributes by asking to the stack. :param cluster: cluster name :param log: log """ try: self.stack_name = "parallelcluster-" + cluster log.info("Describing stack (%s)" % self.stack_name) # get required values from the output of the describe-stack command # don't use proxy because we are in the client and use default region boto3_factory = Boto3ClientFactory( region=self.region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, ) cfn_client = boto3_factory.get_client("cloudformation") stack = cfn_client.describe_stacks(StackName=self.stack_name).get("Stacks")[0] log.debug(stack) if self.region is None: self.region = get_region_by_stack_id(stack.get("StackId")) self.proxy = "NONE" stack_status = stack.get("StackStatus") if stack_status in ["CREATE_COMPLETE", "UPDATE_COMPLETE"]: for output in stack.get("Outputs", []): output_key = output.get("OutputKey") output_value = output.get("OutputValue") if output_key == "ResourcesS3Bucket": self.s3_bucket = output_value elif output_key == "BatchComputeEnvironmentArn": self.compute_environment = output_value elif output_key == "BatchJobQueueArn": self.job_queue = output_value elif output_key == "BatchJobDefinitionArn": self.job_definition = output_value elif output_key == "MasterPrivateIP": self.master_ip = output_value elif output_key == "BatchJobDefinitionMnpArn": self.job_definition_mnp = output_value for parameter in stack.get("Parameters", []): if parameter.get("OutputKey") == "ProxyServer": self.proxy = parameter.get("OutputValue") if not self.proxy == "NONE": log.info("Configured proxy is: %s" % self.proxy) break else: fail("The cluster is in the (%s) status." % stack_status) except (ClientError, ParamValidationError) as e: fail("Error getting cluster information from AWS CloudFormation. Failed with exception: %s" % e)
def main(): try: # parse input parameters and config file args = _get_parser().parse_args() _validate_parameters(args) log = config_logger(args.log_level) log.info("Input parameters: %s" % args) config = AWSBatchCliConfig(log=log, cluster=args.cluster) boto3_factory = Boto3ClientFactory( region=config.region, proxy=config.proxy, aws_access_key_id=config.aws_access_key_id, aws_secret_access_key=config.aws_secret_access_key) # define job name if args.job_name: job_name = args.job_name else: # set a default job name if not specified if not sys.stdin.isatty(): # stdin job_name = 'STDIN' else: # normalize name job_name = re.sub('\W+', '_', os.path.basename(args.command)) log.info("Job name not specified, setting it to (%s)" % job_name) # upload script, if needed, and get related command command = _upload_and_get_command(boto3_factory, args, job_name, config.region, config.s3_bucket, log) # parse and validate depends_on parameter depends_on = _get_depends_on(args) job_definition = config.job_definition AWSBsubCommand(log, boto3_factory).run(job_definition=job_definition, job_name=job_name, job_queue=config.job_queue, command=command, vcpus=args.vcpus, memory=args.memory, array_size=args.array_size, dependencies=depends_on, retry_attempts=args.retry_attempts, timeout=args.timeout, master_ip=config.master_ip) except KeyboardInterrupt: print("Exiting...") sys.exit(0) except Exception as e: fail("Unexpected error. Command failed with exception: %s" % e)
def __init_from_stack(self, cluster, log): """ init object attributes by asking to the stack :param cluster: cluster name :param log: log """ try: self.stack_name = 'parallelcluster-' + cluster log.info("Describing stack (%s)" % self.stack_name) # get required values from the output of the describe-stack command # don't use proxy because we are in the client and use default region boto3_factory = Boto3ClientFactory( region=self.region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key) cfn_client = boto3_factory.get_client('cloudformation') stack = cfn_client.describe_stacks( StackName=self.stack_name).get("Stacks")[0] log.debug(stack) if self.region is None: self.region = get_region_by_stack_id(stack.get('StackId')) self.proxy = 'NONE' stack_status = stack.get('StackStatus') if stack_status in ['CREATE_COMPLETE', 'UPDATE_COMPLETE']: for output in stack.get('Outputs', []): output_key = output.get('OutputKey') output_value = output.get('OutputValue') if output_key == 'ResourcesS3Bucket': self.s3_bucket = output_value elif output_key == 'BatchComputeEnvironmentArn': self.compute_environment = output_value elif output_key == 'BatchJobQueueArn': self.job_queue = output_value elif output_key == 'BatchJobDefinitionArn': self.job_definition = output_value elif output_key == 'MasterPrivateIP': self.master_ip = output_value for parameter in stack.get('Parameters', []): if parameter.get('OutputKey') == 'ProxyServer': self.proxy = parameter.get('OutputValue') if not self.proxy == "NONE": log.info("Configured proxy is: %s" % self.proxy) break else: fail("The cluster is in the (%s) status." % stack_status) except (ClientError, ParamValidationError) as e: fail( "Error getting cluster information from AWS CloudFormation. Failed with exception: %s" % e)
def get_client(self, service): """ Initialize the boto3 client for a given service. :param service: boto3 service. :return: the boto3 client """ try: return boto3.client(service, region_name=self.region, config=self.proxy_config) except ClientError as e: fail("AWS %s service failed with exception: %s" % (service, e))
def __verify_initialization(self, log): try: log.debug("stack_name = %s", self.stack_name) log.debug("region = %s", self.region) log.debug("s3_bucket = %s", self.s3_bucket) log.debug("compute_environment = %s", self.compute_environment) log.debug("job_queue = %s", self.job_queue) log.debug("job_definition = %s", self.job_definition) log.debug("master_ip = %s", self.master_ip) log.info(self) except AttributeError as e: fail("Error getting cluster information from AWS CloudFormation." "Missing attribute (%s) from the output CloudFormation stack." % e)
def __new_queue(queue): """ Parse jobQueue and return a Queue object. :param queue: the jobQueue object to parse :return: a Queue object """ try: return Queue(arn=queue['jobQueueArn'], name=queue['jobQueueName'], priority=queue['priority'], status=queue['status'], status_reason=queue['statusReason']) except KeyError as e: fail("Error building Queue item. Key (%s) not found." % e)
def __init__(self, requirements_string): try: self.requirements = [] for requirement_string in requirements_string.split(","): match = re.search(r"([\w+_-]+)([<>=]+)([\d.]+)", requirement_string) self.requirements.append( CliRequirement(package=match.group(1), operator=match.group(2), version=match.group(3))) except IndexError: fail( f"Unable to parse ParallelCluster AWS Batch CLI requirements: '{requirements_string}'" )
def _add_host_items(self, ecs_cluster_arn, container_instances_arns, instance_ids=None): """ Add a list of Hosts to the output. :param ecs_cluster_arn: ECS Cluster arn :param container_instances_arns: container ids :param instance_ids: hosts requested """ self.log.info("Container ARNs = %s" % container_instances_arns) if container_instances_arns: response = self.ecs_client.describe_container_instances( cluster=ecs_cluster_arn, containerInstances=container_instances_arns) container_instances = response["containerInstances"] self.log.debug("Container Instances = %s" % container_instances) # get ec2_instance_ids ec2_instances_ids = [] for container_instance in container_instances: ec2_instances_ids.append(container_instance["ec2InstanceId"]) # get ec2 instances information ec2_instances = {} try: ec2_client = self.boto3_factory.get_client("ec2") paginator = ec2_client.get_paginator("describe_instances") for page in paginator.paginate(InstanceIds=ec2_instances_ids): for reservation in page["Reservations"]: for instance in reservation["Instances"]: ec2_instances[instance["InstanceId"]] = instance except Exception as e: fail( "Error listing EC2 instances from AWS EC2. Failed with exception: %s" % e) # merge ec2 and container information for container_instance in container_instances: ec2_instance_id = container_instance["ec2InstanceId"] # filter by instance_id if there if not instance_ids or ec2_instance_id in instance_ids: self.log.debug("Container Instance = %s" % container_instance) self.log.debug("EC2 Instance = %s" % ec2_instances[ec2_instance_id]) self.output.add( self.__create_host_item( container_instance, ec2_instances[ec2_instance_id]))
def __verify_initialization(self, log): try: log.debug("stack_name = %s", self.stack_name) log.debug("region = %s", self.region) log.debug("s3_bucket = %s", self.s3_bucket) log.debug("compute_environment = %s", self.compute_environment) log.debug("job_queue = %s", self.job_queue) log.debug("job_definition = %s", self.job_definition) log.debug("master_ip = %s", self.master_ip) log.info(self) except AttributeError as e: fail( "Error getting cluster information from AWS CloudFormation." "Missing attribute (%s) from the output CloudFormation stack." % e )
def _get_stdin_and_upload(s3_uploader, job_script): """ Create file from STDIN and upload to S3. :param s3_uploader: S3Uploader object :param job_script: job script name """ try: # copy stdin to temporary file and upload with os.fdopen(sys.stdin.fileno(), "rb") as src: with tempfile.NamedTemporaryFile() as dst: shutil.copyfileobj(src, dst) dst.flush() s3_uploader.put_file(dst.name, job_script) except Exception as e: fail("Error creating job script. Failed with exception: %s" % e)
def get_client(self, service): """ Initialize the boto3 client for a given service. :param service: boto3 service. :return: the boto3 client """ try: return boto3.client( service, region_name=self.region, aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, config=self.proxy_config, ) except ClientError as e: fail("AWS %s service failed with exception: %s" % (service, e))
def __new_queue(queue): """ Parse jobQueue and return a Queue object. :param queue: the jobQueue object to parse :return: a Queue object """ try: return Queue( arn=queue["jobQueueArn"], name=queue["jobQueueName"], priority=queue["priority"], status=queue["status"], status_reason=queue["statusReason"], ) except KeyError as e: fail("Error building Queue item. Key (%s) not found." % e)
def __init_output(self, job_queues): """ Initialize queues output by asking for given queues. :param job_queues: a list of job queues """ try: # connect to batch and get queues batch_client = self.boto3_factory.get_client("batch") queues = batch_client.describe_job_queues(jobQueues=job_queues)["jobQueues"] self.log.info("Job Queues: %s" % job_queues) self.log.debug(queues) for queue in queues: self.output.add(self.__new_queue(queue=queue)) except Exception as e: fail("Error listing queues from AWS Batch. Failed with exception: %s" % e)
def __get_log_stream(self, job_id): """ Get log stream for the given job. :param job_id: job id (ARN) :return: the log_stream if there, or None """ log_stream = None try: batch_client = self.boto3_factory.get_client("batch") jobs = batch_client.describe_jobs(jobs=[job_id])["jobs"] if len(jobs) == 1: job = jobs[0] self.log.debug(job) if "nodeProperties" in job: # MNP job container = job["nodeProperties"]["nodeRangeProperties"][0]["container"] elif "container" in job: container = job["container"] else: container = {} if get_job_type(job) != "SIMPLE": fail("No output available for the Job (%s). Please ask for its children." % job["jobId"]) else: if "logStreamName" in container: log_stream = container.get("logStreamName") else: print("No log stream found for job (%s) in the status (%s)" % (job_id, job["status"])) else: fail("Error asking job output for job (%s). Job not found." % job_id) except Exception as e: fail("Error listing jobs from AWS Batch. Failed with exception: %s" % e) return log_stream
def _validate_parameters(args): """ Validate input parameters. :param args: args variable """ if args.head: if args.tail: fail("Parameters validation error: --tail and --head option cannot be set at the same time") if args.stream: fail("Parameters validation error: --stream and --head option cannot be set at the same time") if args.stream_period and not args.stream: fail("Parameters validation error: --stream-period can be used only with --stream option")
def __init_from_config(self, cli_config_file, cluster, log): # noqa: C901 FIXME """ Init object attributes from awsbatch-cli configuration file. :param cli_config_file: awsbatch-cli config :param cluster: cluster name :param log: log """ with open(cli_config_file) as config_file: log.info("Searching for configuration file %s" % cli_config_file) config = ConfigParser() config.read_file(config_file) # use cluster if there or search for default value in [main] section of the config file try: cluster_name = cluster if cluster else config.get("main", "cluster_name") except NoSectionError as e: fail("Error getting the section [%s] from the configuration file (%s)" % (e.section, cli_config_file)) except NoOptionError as e: fail( "Error getting the option (%s) from the section [%s] of the configuration file (%s)" % (e.option, e.section, cli_config_file) ) cluster_section = "cluster {0}".format(cluster_name) try: self.region = config.get("main", "region") except NoOptionError: pass try: self.env_blacklist = config.get("main", "env_blacklist") except NoOptionError: pass try: self.stack_name = "parallelcluster-" + cluster_name log.info("Stack name is (%s)" % self.stack_name) # if region is set for the current stack, override the region from the AWS ParallelCluster config file # or the region from the [main] section self.region = config.get(cluster_section, "region") self.s3_bucket = config.get(cluster_section, "s3_bucket") self.compute_environment = config.get(cluster_section, "compute_environment") self.job_queue = config.get(cluster_section, "job_queue") self.job_definition = config.get(cluster_section, "job_definition") try: self.job_definition_mnp = config.get(cluster_section, "job_definition_mnp") except NoOptionError: pass self.master_ip = config.get(cluster_section, "master_ip") # get proxy self.proxy = config.get(cluster_section, "proxy") if not self.proxy == "NONE": log.info("Configured proxy is: %s" % self.proxy) except NoSectionError: # initialize by getting stack info self.__init_from_stack(cluster_name, log) except NoOptionError as e: fail( "Error getting the option (%s) from the section [%s] of the configuration file (%s)" % (e.option, e.section, cli_config_file) )