def add_arguments(self, parser): parser.add_argument(dest='inputs', nargs='+', help="Input sequence file path (FASTA, GenBank, Pfam CSV).") parser.add_argument('-o', '--output', required=False, help="Custom output directory path.") parser.add_argument('--limit-to-record', action='append', help="Process only specific record ID. Can be provided multiple times.") parser.add_argument('--minimal-output', dest='is_minimal_output', action='store_true', default=False, help="Produce minimal output with just the GenBank sequence file.") group = parser.add_argument_group('BGC detection options', '') no_models_message = 'run "deepbgc download" to download models' detector_names = util.get_available_models('detector') group.add_argument('-d', '--detector', dest='detectors', action='append', default=[], help="Trained detection model name ({}). " "Can be provided multiple times (-d first -d second).".format(', '.join(detector_names) or no_models_message)) group.add_argument('--no-detector', action='store_true', help="Disable BGC detection.") group.add_argument('-l', '--label', dest='labels', action='append', default=[], help="Label for detected clusters (equal to --detector by default). " "If multiple detectors are provided, a label should be provided for each one.") group.add_argument('-s', '--score', default=0.5, type=float, help="Average protein-wise DeepBGC score threshold for extracting BGC regions from Pfam sequences.") group.add_argument('--merge-max-protein-gap', default=0, type=int, help="Merge detected BGCs within given number of proteins.") group.add_argument('--merge-max-nucl-gap', default=0, type=int, help="Merge detected BGCs within given number of nucleotides.") group.add_argument('--min-nucl', default=1, type=int, help="Minimum BGC nucleotide length.") group.add_argument('--min-proteins', default=1, type=int, help="Minimum number of proteins in a BGC.") group.add_argument('--min-domains', default=1, type=int, help="Minimum number of protein domains in a BGC.") group.add_argument('--min-bio-domains', default=0, type=int, help="Minimum number of known biosynthetic protein domains in a BGC (from antiSMASH ClusterFinder).") group = parser.add_argument_group('BGC classification options', '') classifier_names = util.get_available_models('classifier') group.add_argument('-c', '--classifier', dest='classifiers', action='append', default=[], help="Trained classification model name ({}). " "Can be provided multiple times (-c first -c second).".format(', '.join(classifier_names) or no_models_message)) group.add_argument('--no-classifier', action='store_true', help="Disable BGC classification.") group.add_argument('--classifier-score', default=0.5, type=float, help="DeepBGC classification score threshold for assigning classes to BGCs (inclusive).")
def run(self): ok = True custom_dir = os.environ.get(util.DEEPBGC_DOWNLOADS_DIR) if custom_dir: logging.info('Using custom downloads dir: %s', custom_dir) data_dir = util.get_downloads_dir(versioned=False) if not os.path.exists(data_dir): logging.warning('Data downloads directory does not exist yet: %s', data_dir) logging.warning( 'Run "deepbgc download" to download all dependencies or set %s env var', util.DEEPBGC_DOWNLOADS_DIR) ok = False else: logging.info('Available data files: %s', os.listdir(data_dir)) versioned_dir = util.get_downloads_dir(versioned=True) if not os.path.exists(versioned_dir): logging.info( 'Downloads directory for current version does not exist yet: %s', versioned_dir) logging.info('Run "deepbgc download" to download current models') return detectors = util.get_available_models('detector') logging.info('=' * 80) logging.info('Available detectors: %s', detectors) if not detectors: logging.warning( 'Run "deepbgc download" to download current detector models') ok = False for name in detectors: model_path = util.get_model_path(name, 'detector') ok = self.print_model(name, model_path) and ok classifiers = util.get_available_models('classifier') logging.info('=' * 80) logging.info('Available classifiers: %s', classifiers) for name in classifiers: model_path = util.get_model_path(name, 'classifier') ok = self.print_model(name, model_path) and ok if not classifiers: logging.warning( 'Run "deepbgc download" to download current classifier models') ok = False logging.info('=' * 80) if ok: logging.info('All OK') else: logging.warning('Some warnings detected, check the output above')