예제 #1
0
    def __call__(self):
        """ Retrieve the current usage of the relevant AWS resources and compare it with the service quotas.
        Throws a UserReportError if there aren't enough resources available to run ElasticBLAST
        """
        SERVICES = [ 'EC2', 'CloudFormation' ]
        result = self.checker.check_thresholds(service=SERVICES)
        if not result:
            # No service thresholds were exceeded :)
            return

        fatal_errors = ''
        warnings = ''
        for svc_name in result.keys():
            for usage_metric in result[svc_name].keys():
                if svc_name == 'EC2' and not usage_metric.startswith('Running On-Demand'):
                    continue
                aws_limit = result[svc_name][usage_metric]
                criticals = aws_limit.get_criticals()
                warnings = aws_limit.get_warnings()
                if len(criticals):
                    for c in criticals:
                        fatal_errors += f'{svc_name} metric "{usage_metric}" has reached a critical usage level ({c}) that is too close to the limit ({aws_limit.get_limit()}) to run ElasticBLAST. '
                elif len(warnings):
                    for w in warnings:
                        warnings += f'{svc_name} metric "{usage_metric}" has reached a level of usage ({w}) that is close to the limit ({aws_limit.get_limit()}) and may run into problems. '
        if fatal_errors:
            raise UserReportError(DEPENDENCY_ERROR, fatal_errors)
        if warnings:
            logging.warning(warnings)
예제 #2
0
def assemble_query_file_list(cfg: ElasticBlastConfig) -> List[str]:
    """Assemble a list of query files. cfg.blast.queries_arg is a list of
    space-separated files. if a file has extension constants.QUERY_LIST_EXT, it
    is considered a list of files, otherwise it is a FASTA file with queries.
    This function initializes global variable config.query_files."""
    msg = []
    query_files = []
    for query_file in cfg.blast.queries_arg.split():
        if query_file.endswith(QUERY_LIST_EXT):
            with open_for_read(query_file) as f:
                for line in f:
                    if len(line.rstrip()) == 0:
                        continue
                    query_file_from_list = line.rstrip()
                    if query_file_from_list.startswith('gs://') or \
                           query_file_from_list.startswith('s3://'):
                        try:
                            validate_cloud_storage_object_uri(
                                query_file_from_list)
                        except ValueError as err:
                            msg.append(
                                f'Incorrect query file URI "{query_file_from_list}" in list file "{query_file}": {err}'
                            )
                    query_files.append(query_file_from_list)
        else:
            query_files.append(query_file)

    if msg:
        raise UserReportError(returncode=INPUT_ERROR, message=('\n'.join(msg)))

    return query_files
예제 #3
0
def reject_string_with_unicode(content: str) -> None:
    for c in content:
        if ord(c) > 255:
            raise UserReportError(
                returncode=constants.INPUT_ERROR,
                message=
                f"Command line has Unicode letters in argument '{content}', can't be processed"
            )
예제 #4
0
    def __call__(self) -> None:
        """ Retrieve the current usage of the relevant AWS Batch resources and compare it with the service quotas.
        Throws a UserReportError if there aren't enough resources available to run ElasticBLAST
        """
        njq = self._count_aws_batch_job_queues()
        nce = self._count_aws_batch_job_queues()
        logging.debug(f'AWS Batch usage: number of job queues {njq}')
        logging.debug(f'AWS Batch usage: number of compute environments {nce}')

        if njq + 1 >= self._service_quotas['Job queue limit']:
            raise UserReportError(
                DEPENDENCY_ERROR,
                OUT_OF_QUOTA_ERR_MSG.format('batch job queue'))
        if nce + 1 >= self._service_quotas['Compute environment limit']:
            raise UserReportError(
                DEPENDENCY_ERROR,
                OUT_OF_QUOTA_ERR_MSG.format('batch compute environment'))
예제 #5
0
def check_submit_data(query_files: List[str], cfg: ElasticBlastConfig) -> None:
    """ Check that the query files are present and readable and that results bucket is writeable
        Parameters:
           query_files - list of query files
           cfg - configuration holding information about source query and results bucket
    """
    dry_run = cfg.cluster.dry_run
    try:
        for query_file in query_files:
            check_for_read(query_file, dry_run)
    except FileNotFoundError:
        raise UserReportError(
            INPUT_ERROR,
            f'Query input {query_file} is not readable or does not exist')
    bucket = cfg.cluster.results
    try:
        check_dir_for_write(bucket, dry_run)
    except PermissionError:
        raise UserReportError(PERMISSIONS_ERROR,
                              f'Cannot write into bucket {bucket}')
예제 #6
0
def check_memory_requirements(cfg: ElasticBlastConfig):
    """ Using configuration cfg ensure that the memory required by database
        (database size plus margin) is available on machine type of configured cluster"""
    db = cfg.blast.db
    try:
        dbsize = get_blastdb_size(cfg.blast.db, cfg.blast.db_source)
    except ValueError as err:
        raise UserReportError(returncode=BLASTDB_ERROR, message=str(err))
    db_mem_margin = cfg.blast.db_mem_margin
    db_mem_req = dbsize * db_mem_margin
    machine_type = cfg.cluster.machine_type
    machine_mem = get_machine_properties(machine_type).memory
    if machine_mem < db_mem_req:
        raise RuntimeError(
            f'Database {db} requires {db_mem_req:.3f}GB RAM for processing, machine {machine_type} provides only {machine_mem:.3f}GB'
        )
예제 #7
0
def main():
    """Local main entry point which sets up arguments, undo stack,
    and processes exceptions """
    try:
        signal.signal(signal.SIGINT, signal.default_int_handler)
        clean_up_stack = []
        # Check parameters for Unicode letters and reject if codes higher than 255 occur
        reject_cli_args_with_unicode(sys.argv[1:])
        parser = create_arg_parser()
        args = parser.parse_args()
        if not args.subcommand:
            # report missing command line task
            raise UserReportError(returncode=constants.INPUT_ERROR,
                                  message=NO_TASK_MSG)
        config_logging(args)
        cfg = configure(args)
        logging.info(f"ElasticBLAST {args.subcommand} {VERSION}")
        task = ElbCommand(args.subcommand.lower())
        cfg = ElasticBlastConfig(cfg, task=task)
        logging.debug(pprint.pformat(cfg.asdict()))
        check_prerequisites(cfg)
        #TODO: use cfg only when args.wait, args.sync, and args.run_label are replicated in cfg
        return args.func(args, cfg, clean_up_stack)
    except (SafeExecError, UserReportError) as e:
        logging.error(e.message)
        # SafeExecError return code is the exit code from command line
        # application ran via subprocess
        if isinstance(e, SafeExecError):
            return constants.DEPENDENCY_ERROR
        return e.returncode
    except KeyboardInterrupt:
        return constants.INTERRUPT_ERROR
    #TODO: process filehelper.TarReadError here
    finally:
        messages = clean_up(clean_up_stack)
        if messages:
            for msg in messages:
                logging.error(msg)
            sys.exit(constants.UNKNOWN_ERROR)
예제 #8
0
def submit(args, cfg, clean_up_stack):
    """ Entry point to submit an ElasticBLAST search
    """
    dry_run = cfg.cluster.dry_run
    cfg.validate(ElbCommand.SUBMIT)

    # For now, checking resources is only implemented for AWS
    if cfg.cloud_provider.cloud == CSP.AWS:
        check_resource_quotas(cfg)
    else:
        enable_gcp_api(cfg)

    if check_running_cluster(cfg):
        raise UserReportError(
            CLUSTER_ERROR, 'An ElasticBLAST search that will write results to '
            f'{cfg.cluster.results} has already been submitted.\n'
            'Please resubmit your search with a different value '
            'for "results" configuration parameter or delete '
            'the previous ElasticBLAST search by running elastic-blast delete.'
        )

    query_files = assemble_query_file_list(cfg)
    check_submit_data(query_files, cfg)

    #mode_str = "synchronous" if args.sync else "asynchronous"
    #logging.info(f'Running ElasticBLAST on {cfg.cloud_provider.cloud.name} in {mode_str} mode')

    # split FASTA query into batches
    clean_up_stack.append(cleanup_temp_bucket_dirs)
    queries, query_length = split_query(query_files, cfg)

    # setup taxonomy filtering, if requested
    setup_taxid_filtering(cfg)

    # FIXME: this is a temporary code arrangement
    if cfg.cloud_provider.cloud == CSP.AWS:
        elastic_blast = ElasticBlastAws(cfg, create=True)
        upload_split_query_to_bucket(cfg, clean_up_stack, dry_run)
        elastic_blast.upload_query_length(query_length)
        elastic_blast.submit(queries)
        return 0

    k8s_job_limit = get_maximum_number_of_allowed_k8s_jobs(dry_run)

    # check database availability
    try:
        get_blastdb_size(cfg.blast.db, cfg.blast.db_source)
    except ValueError as err:
        raise UserReportError(returncode=BLASTDB_ERROR, message=str(err))

    # check_memory_requirements(cfg)  # FIXME: EB-281, EB-313

    usage_reporting = get_usage_reporting()

    db, db_path, db_label = get_blastdb_info(cfg.blast.db)

    # Job generation
    job_template_text = read_job_template(cfg=cfg)
    program = cfg.blast.program

    # prepare substitution for current template
    # TODO consider template using cfg variables directly as, e.g. ${blast.program}
    subs = {
        'ELB_BLAST_PROGRAM': program,
        'ELB_DB': db,
        'ELB_DB_LABEL': db_label,
        'ELB_MEM_REQUEST': str(cfg.blast.mem_request),
        'ELB_MEM_LIMIT': str(cfg.blast.mem_limit),
        'ELB_BLAST_OPTIONS': cfg.blast.options,
        # FIXME: EB-210
        'ELB_BLAST_TIMEOUT': str(cfg.timeouts.blast_k8s * 60),
        'BUCKET': cfg.cluster.results,
        'ELB_NUM_CPUS': str(cfg.cluster.num_cpus),
        'ELB_DB_MOL_TYPE': ElbSupportedPrograms().get_molecule_type(program),
        'ELB_DOCKER_IMAGE': ELB_DOCKER_IMAGE,
        'ELB_TIMEFMT': '%s%N',  # timestamp in nanoseconds
        'BLAST_ELB_JOB_ID': uuid.uuid4().hex,
        'BLAST_USAGE_REPORT': str(usage_reporting).lower(),
        'K8S_JOB_GET_BLASTDB': K8S_JOB_GET_BLASTDB,
        'K8S_JOB_LOAD_BLASTDB_INTO_RAM': K8S_JOB_LOAD_BLASTDB_INTO_RAM,
        'K8S_JOB_IMPORT_QUERY_BATCHES': K8S_JOB_IMPORT_QUERY_BATCHES,
        'K8S_JOB_BLAST': K8S_JOB_BLAST,
        'K8S_JOB_RESULTS_EXPORT': K8S_JOB_RESULTS_EXPORT
    }
    with TemporaryDirectory() as job_path:
        job_files = write_job_files(job_path, 'batch_', job_template_text,
                                    queries, **subs)
        if len(job_files) > k8s_job_limit:
            batch_len = cfg.blast.batch_len
            suggested_batch_len = int(query_length / k8s_job_limit) + 1
            msg = f'The batch size specified ({batch_len}) led to creating {len(job_files)} kubernetes jobs, which exceeds the limit on number of jobs ({k8s_job_limit}). Please increase the batch-len parameter to at least {suggested_batch_len}.'
            raise UserReportError(INPUT_ERROR, msg)
        logging.debug('Generated %d job files', len(job_files))
        logging.debug(f'Job #1 file: {job_files[0]}')
        logging.debug('Command to run in the pod:')
        with open(job_files[0]) as f:
            for line in f:
                if line.find('-query') >= 0:
                    logging.debug(line.strip())
                    break

        upload_split_query_to_bucket(cfg, clean_up_stack, dry_run)
        initialize_cluster(cfg, db, db_path, clean_up_stack)

        logging.info('Submitting jobs to cluster')
        clean_up_stack.append(
            lambda: logging.debug('Before submission computational jobs'))
        job_names = submit_jobs(Path(job_path), dry_run=dry_run)
        clean_up_stack.append(
            lambda: logging.debug('After submission computational jobs'))
        if job_names:
            logging.debug(f'Job #1 name: {job_names[0]}')

    # Sync mode disabled per EB-700
    #if args.sync:
    #    while True:
    #        try:
    #            pending, running, succeeded, failed = get_status(args.run_label, dry_run=dry_run)
    #        except RuntimeError as e:
    #            returncode = e.args[0]
    #            logging.error(f'Error while getting job status: {e.args[1]}, returncode: {returncode}')
    #            # TODO: maybe analyze situation in more details here. It happens when kubectl can't be found
    #            # or cluster connection can't be established. If the latter, maybe try to get GKE credentials again
    #        except ValueError as e:
    #            returncode = 1
    #            logging.error(f'Error while getting job status: {e}')
    #            # This error happens when run-label is malformed, it will not repair, so exit here
    #            break
    #        else:
    #            if pending + running:
    #                logging.debug(f'Pending {pending}, Running {running}, Succeeded {succeeded}, Failed {failed}')
    #            else:
    #                logging.info(f'Done: {succeeded} jobs succeeded, {failed} jobs failed')
    #                break
    #        time.sleep(20)  # TODO: make this a parameter (granularity)
    #    logging.info('Deleting cluster')
    #else:
    clean_up_stack.clear()
    clean_up_stack.append(lambda: collect_k8s_logs(cfg))
    return 0