def __call__(self): """ Retrieve the current usage of the relevant AWS resources and compare it with the service quotas. Throws a UserReportError if there aren't enough resources available to run ElasticBLAST """ SERVICES = [ 'EC2', 'CloudFormation' ] result = self.checker.check_thresholds(service=SERVICES) if not result: # No service thresholds were exceeded :) return fatal_errors = '' warnings = '' for svc_name in result.keys(): for usage_metric in result[svc_name].keys(): if svc_name == 'EC2' and not usage_metric.startswith('Running On-Demand'): continue aws_limit = result[svc_name][usage_metric] criticals = aws_limit.get_criticals() warnings = aws_limit.get_warnings() if len(criticals): for c in criticals: fatal_errors += f'{svc_name} metric "{usage_metric}" has reached a critical usage level ({c}) that is too close to the limit ({aws_limit.get_limit()}) to run ElasticBLAST. ' elif len(warnings): for w in warnings: warnings += f'{svc_name} metric "{usage_metric}" has reached a level of usage ({w}) that is close to the limit ({aws_limit.get_limit()}) and may run into problems. ' if fatal_errors: raise UserReportError(DEPENDENCY_ERROR, fatal_errors) if warnings: logging.warning(warnings)
def assemble_query_file_list(cfg: ElasticBlastConfig) -> List[str]: """Assemble a list of query files. cfg.blast.queries_arg is a list of space-separated files. if a file has extension constants.QUERY_LIST_EXT, it is considered a list of files, otherwise it is a FASTA file with queries. This function initializes global variable config.query_files.""" msg = [] query_files = [] for query_file in cfg.blast.queries_arg.split(): if query_file.endswith(QUERY_LIST_EXT): with open_for_read(query_file) as f: for line in f: if len(line.rstrip()) == 0: continue query_file_from_list = line.rstrip() if query_file_from_list.startswith('gs://') or \ query_file_from_list.startswith('s3://'): try: validate_cloud_storage_object_uri( query_file_from_list) except ValueError as err: msg.append( f'Incorrect query file URI "{query_file_from_list}" in list file "{query_file}": {err}' ) query_files.append(query_file_from_list) else: query_files.append(query_file) if msg: raise UserReportError(returncode=INPUT_ERROR, message=('\n'.join(msg))) return query_files
def reject_string_with_unicode(content: str) -> None: for c in content: if ord(c) > 255: raise UserReportError( returncode=constants.INPUT_ERROR, message= f"Command line has Unicode letters in argument '{content}', can't be processed" )
def __call__(self) -> None: """ Retrieve the current usage of the relevant AWS Batch resources and compare it with the service quotas. Throws a UserReportError if there aren't enough resources available to run ElasticBLAST """ njq = self._count_aws_batch_job_queues() nce = self._count_aws_batch_job_queues() logging.debug(f'AWS Batch usage: number of job queues {njq}') logging.debug(f'AWS Batch usage: number of compute environments {nce}') if njq + 1 >= self._service_quotas['Job queue limit']: raise UserReportError( DEPENDENCY_ERROR, OUT_OF_QUOTA_ERR_MSG.format('batch job queue')) if nce + 1 >= self._service_quotas['Compute environment limit']: raise UserReportError( DEPENDENCY_ERROR, OUT_OF_QUOTA_ERR_MSG.format('batch compute environment'))
def check_submit_data(query_files: List[str], cfg: ElasticBlastConfig) -> None: """ Check that the query files are present and readable and that results bucket is writeable Parameters: query_files - list of query files cfg - configuration holding information about source query and results bucket """ dry_run = cfg.cluster.dry_run try: for query_file in query_files: check_for_read(query_file, dry_run) except FileNotFoundError: raise UserReportError( INPUT_ERROR, f'Query input {query_file} is not readable or does not exist') bucket = cfg.cluster.results try: check_dir_for_write(bucket, dry_run) except PermissionError: raise UserReportError(PERMISSIONS_ERROR, f'Cannot write into bucket {bucket}')
def check_memory_requirements(cfg: ElasticBlastConfig): """ Using configuration cfg ensure that the memory required by database (database size plus margin) is available on machine type of configured cluster""" db = cfg.blast.db try: dbsize = get_blastdb_size(cfg.blast.db, cfg.blast.db_source) except ValueError as err: raise UserReportError(returncode=BLASTDB_ERROR, message=str(err)) db_mem_margin = cfg.blast.db_mem_margin db_mem_req = dbsize * db_mem_margin machine_type = cfg.cluster.machine_type machine_mem = get_machine_properties(machine_type).memory if machine_mem < db_mem_req: raise RuntimeError( f'Database {db} requires {db_mem_req:.3f}GB RAM for processing, machine {machine_type} provides only {machine_mem:.3f}GB' )
def main(): """Local main entry point which sets up arguments, undo stack, and processes exceptions """ try: signal.signal(signal.SIGINT, signal.default_int_handler) clean_up_stack = [] # Check parameters for Unicode letters and reject if codes higher than 255 occur reject_cli_args_with_unicode(sys.argv[1:]) parser = create_arg_parser() args = parser.parse_args() if not args.subcommand: # report missing command line task raise UserReportError(returncode=constants.INPUT_ERROR, message=NO_TASK_MSG) config_logging(args) cfg = configure(args) logging.info(f"ElasticBLAST {args.subcommand} {VERSION}") task = ElbCommand(args.subcommand.lower()) cfg = ElasticBlastConfig(cfg, task=task) logging.debug(pprint.pformat(cfg.asdict())) check_prerequisites(cfg) #TODO: use cfg only when args.wait, args.sync, and args.run_label are replicated in cfg return args.func(args, cfg, clean_up_stack) except (SafeExecError, UserReportError) as e: logging.error(e.message) # SafeExecError return code is the exit code from command line # application ran via subprocess if isinstance(e, SafeExecError): return constants.DEPENDENCY_ERROR return e.returncode except KeyboardInterrupt: return constants.INTERRUPT_ERROR #TODO: process filehelper.TarReadError here finally: messages = clean_up(clean_up_stack) if messages: for msg in messages: logging.error(msg) sys.exit(constants.UNKNOWN_ERROR)
def submit(args, cfg, clean_up_stack): """ Entry point to submit an ElasticBLAST search """ dry_run = cfg.cluster.dry_run cfg.validate(ElbCommand.SUBMIT) # For now, checking resources is only implemented for AWS if cfg.cloud_provider.cloud == CSP.AWS: check_resource_quotas(cfg) else: enable_gcp_api(cfg) if check_running_cluster(cfg): raise UserReportError( CLUSTER_ERROR, 'An ElasticBLAST search that will write results to ' f'{cfg.cluster.results} has already been submitted.\n' 'Please resubmit your search with a different value ' 'for "results" configuration parameter or delete ' 'the previous ElasticBLAST search by running elastic-blast delete.' ) query_files = assemble_query_file_list(cfg) check_submit_data(query_files, cfg) #mode_str = "synchronous" if args.sync else "asynchronous" #logging.info(f'Running ElasticBLAST on {cfg.cloud_provider.cloud.name} in {mode_str} mode') # split FASTA query into batches clean_up_stack.append(cleanup_temp_bucket_dirs) queries, query_length = split_query(query_files, cfg) # setup taxonomy filtering, if requested setup_taxid_filtering(cfg) # FIXME: this is a temporary code arrangement if cfg.cloud_provider.cloud == CSP.AWS: elastic_blast = ElasticBlastAws(cfg, create=True) upload_split_query_to_bucket(cfg, clean_up_stack, dry_run) elastic_blast.upload_query_length(query_length) elastic_blast.submit(queries) return 0 k8s_job_limit = get_maximum_number_of_allowed_k8s_jobs(dry_run) # check database availability try: get_blastdb_size(cfg.blast.db, cfg.blast.db_source) except ValueError as err: raise UserReportError(returncode=BLASTDB_ERROR, message=str(err)) # check_memory_requirements(cfg) # FIXME: EB-281, EB-313 usage_reporting = get_usage_reporting() db, db_path, db_label = get_blastdb_info(cfg.blast.db) # Job generation job_template_text = read_job_template(cfg=cfg) program = cfg.blast.program # prepare substitution for current template # TODO consider template using cfg variables directly as, e.g. ${blast.program} subs = { 'ELB_BLAST_PROGRAM': program, 'ELB_DB': db, 'ELB_DB_LABEL': db_label, 'ELB_MEM_REQUEST': str(cfg.blast.mem_request), 'ELB_MEM_LIMIT': str(cfg.blast.mem_limit), 'ELB_BLAST_OPTIONS': cfg.blast.options, # FIXME: EB-210 'ELB_BLAST_TIMEOUT': str(cfg.timeouts.blast_k8s * 60), 'BUCKET': cfg.cluster.results, 'ELB_NUM_CPUS': str(cfg.cluster.num_cpus), 'ELB_DB_MOL_TYPE': ElbSupportedPrograms().get_molecule_type(program), 'ELB_DOCKER_IMAGE': ELB_DOCKER_IMAGE, 'ELB_TIMEFMT': '%s%N', # timestamp in nanoseconds 'BLAST_ELB_JOB_ID': uuid.uuid4().hex, 'BLAST_USAGE_REPORT': str(usage_reporting).lower(), 'K8S_JOB_GET_BLASTDB': K8S_JOB_GET_BLASTDB, 'K8S_JOB_LOAD_BLASTDB_INTO_RAM': K8S_JOB_LOAD_BLASTDB_INTO_RAM, 'K8S_JOB_IMPORT_QUERY_BATCHES': K8S_JOB_IMPORT_QUERY_BATCHES, 'K8S_JOB_BLAST': K8S_JOB_BLAST, 'K8S_JOB_RESULTS_EXPORT': K8S_JOB_RESULTS_EXPORT } with TemporaryDirectory() as job_path: job_files = write_job_files(job_path, 'batch_', job_template_text, queries, **subs) if len(job_files) > k8s_job_limit: batch_len = cfg.blast.batch_len suggested_batch_len = int(query_length / k8s_job_limit) + 1 msg = f'The batch size specified ({batch_len}) led to creating {len(job_files)} kubernetes jobs, which exceeds the limit on number of jobs ({k8s_job_limit}). Please increase the batch-len parameter to at least {suggested_batch_len}.' raise UserReportError(INPUT_ERROR, msg) logging.debug('Generated %d job files', len(job_files)) logging.debug(f'Job #1 file: {job_files[0]}') logging.debug('Command to run in the pod:') with open(job_files[0]) as f: for line in f: if line.find('-query') >= 0: logging.debug(line.strip()) break upload_split_query_to_bucket(cfg, clean_up_stack, dry_run) initialize_cluster(cfg, db, db_path, clean_up_stack) logging.info('Submitting jobs to cluster') clean_up_stack.append( lambda: logging.debug('Before submission computational jobs')) job_names = submit_jobs(Path(job_path), dry_run=dry_run) clean_up_stack.append( lambda: logging.debug('After submission computational jobs')) if job_names: logging.debug(f'Job #1 name: {job_names[0]}') # Sync mode disabled per EB-700 #if args.sync: # while True: # try: # pending, running, succeeded, failed = get_status(args.run_label, dry_run=dry_run) # except RuntimeError as e: # returncode = e.args[0] # logging.error(f'Error while getting job status: {e.args[1]}, returncode: {returncode}') # # TODO: maybe analyze situation in more details here. It happens when kubectl can't be found # # or cluster connection can't be established. If the latter, maybe try to get GKE credentials again # except ValueError as e: # returncode = 1 # logging.error(f'Error while getting job status: {e}') # # This error happens when run-label is malformed, it will not repair, so exit here # break # else: # if pending + running: # logging.debug(f'Pending {pending}, Running {running}, Succeeded {succeeded}, Failed {failed}') # else: # logging.info(f'Done: {succeeded} jobs succeeded, {failed} jobs failed') # break # time.sleep(20) # TODO: make this a parameter (granularity) # logging.info('Deleting cluster') #else: clean_up_stack.clear() clean_up_stack.append(lambda: collect_k8s_logs(cfg)) return 0