Example #1
0
    def add_arguments(self, parser):

        parser.add_argument(dest='inputs', nargs='+', help="Input sequence file path (FASTA, GenBank, Pfam CSV).")

        parser.add_argument('-o', '--output', required=False, help="Custom output directory path.")
        parser.add_argument('--limit-to-record', action='append', help="Process only specific record ID. Can be provided multiple times.")
        parser.add_argument('--minimal-output', dest='is_minimal_output', action='store_true', default=False,
                            help="Produce minimal output with just the GenBank sequence file.")
        group = parser.add_argument_group('BGC detection options', '')
        no_models_message = 'run "deepbgc download" to download models'
        detector_names = util.get_available_models('detector')
        group.add_argument('-d', '--detector', dest='detectors', action='append', default=[],
                           help="Trained detection model name ({}). "
                                "Can be provided multiple times (-d first -d second).".format(', '.join(detector_names) or no_models_message))
        group.add_argument('--no-detector', action='store_true', help="Disable BGC detection.")
        group.add_argument('-l', '--label', dest='labels', action='append', default=[], help="Label for detected clusters (equal to --detector by default). "
                                                                                             "If multiple detectors are provided, a label should be provided for each one.")
        group.add_argument('-s', '--score', default=0.5, type=float,
                            help="Average protein-wise DeepBGC score threshold for extracting BGC regions from Pfam sequences.")
        group.add_argument('--merge-max-protein-gap', default=0, type=int, help="Merge detected BGCs within given number of proteins.")
        group.add_argument('--merge-max-nucl-gap', default=0, type=int, help="Merge detected BGCs within given number of nucleotides.")
        group.add_argument('--min-nucl', default=1, type=int, help="Minimum BGC nucleotide length.")
        group.add_argument('--min-proteins', default=1, type=int, help="Minimum number of proteins in a BGC.")
        group.add_argument('--min-domains', default=1, type=int, help="Minimum number of protein domains in a BGC.")
        group.add_argument('--min-bio-domains', default=0, type=int, help="Minimum number of known biosynthetic protein domains in a BGC (from antiSMASH ClusterFinder).")

        group = parser.add_argument_group('BGC classification options', '')
        classifier_names = util.get_available_models('classifier')
        group.add_argument('-c', '--classifier', dest='classifiers', action='append', default=[],
                            help="Trained classification model name ({}). "
                                 "Can be provided multiple times (-c first -c second).".format(', '.join(classifier_names) or no_models_message))
        group.add_argument('--no-classifier', action='store_true', help="Disable BGC classification.")
        group.add_argument('--classifier-score', default=0.5, type=float,
                            help="DeepBGC classification score threshold for assigning classes to BGCs (inclusive).")
Example #2
0
    def run(self):
        ok = True
        custom_dir = os.environ.get(util.DEEPBGC_DOWNLOADS_DIR)
        if custom_dir:
            logging.info('Using custom downloads dir: %s', custom_dir)

        data_dir = util.get_downloads_dir(versioned=False)
        if not os.path.exists(data_dir):
            logging.warning('Data downloads directory does not exist yet: %s',
                            data_dir)
            logging.warning(
                'Run "deepbgc download" to download all dependencies or set %s env var',
                util.DEEPBGC_DOWNLOADS_DIR)
            ok = False
        else:
            logging.info('Available data files: %s', os.listdir(data_dir))

        versioned_dir = util.get_downloads_dir(versioned=True)
        if not os.path.exists(versioned_dir):
            logging.info(
                'Downloads directory for current version does not exist yet: %s',
                versioned_dir)
            logging.info('Run "deepbgc download" to download current models')
            return

        detectors = util.get_available_models('detector')
        logging.info('=' * 80)
        logging.info('Available detectors: %s', detectors)

        if not detectors:
            logging.warning(
                'Run "deepbgc download" to download current detector models')
            ok = False

        for name in detectors:
            model_path = util.get_model_path(name, 'detector')
            ok = self.print_model(name, model_path) and ok

        classifiers = util.get_available_models('classifier')
        logging.info('=' * 80)
        logging.info('Available classifiers: %s', classifiers)

        for name in classifiers:
            model_path = util.get_model_path(name, 'classifier')
            ok = self.print_model(name, model_path) and ok

        if not classifiers:
            logging.warning(
                'Run "deepbgc download" to download current classifier models')
            ok = False

        logging.info('=' * 80)
        if ok:
            logging.info('All OK')
        else:
            logging.warning('Some warnings detected, check the output above')