def run(self): ok = True custom_dir = os.environ.get(util.DEEPBGC_DOWNLOADS_DIR) if custom_dir: logging.info('Using custom downloads dir: %s', custom_dir) data_dir = util.get_downloads_dir(versioned=False) if not os.path.exists(data_dir): logging.warning('Data downloads directory does not exist yet: %s', data_dir) logging.warning( 'Run "deepbgc download" to download all dependencies or set %s env var', util.DEEPBGC_DOWNLOADS_DIR) ok = False else: logging.info('Available data files: %s', os.listdir(data_dir)) versioned_dir = util.get_downloads_dir(versioned=True) if not os.path.exists(versioned_dir): logging.info( 'Downloads directory for current version does not exist yet: %s', versioned_dir) logging.info('Run "deepbgc download" to download current models') return detectors = util.get_available_models('detector') logging.info('=' * 80) logging.info('Available detectors: %s', detectors) if not detectors: logging.warning( 'Run "deepbgc download" to download current detector models') ok = False for name in detectors: model_path = util.get_model_path(name, 'detector') ok = self.print_model(name, model_path) and ok classifiers = util.get_available_models('classifier') logging.info('=' * 80) logging.info('Available classifiers: %s', classifiers) for name in classifiers: model_path = util.get_model_path(name, 'classifier') ok = self.print_model(name, model_path) and ok if not classifiers: logging.warning( 'Run "deepbgc download" to download current classifier models') ok = False logging.info('=' * 80) if ok: logging.info('All OK') else: logging.warning('Some warnings detected, check the output above')
def __init__(self, classifier, score_threshold=0.5): if classifier is None or not isinstance(classifier, six.string_types): raise ValueError( 'Expected classifier name, got {}'.format(classifier)) self.classifier_name = classifier self.score_threshold = score_threshold classifier_path = util.get_model_path(self.classifier_name, 'classifier') self.model = SequenceModelWrapper.load(classifier_path) self.total_class_counts = pd.Series()
def __init__(self, classifier, score_threshold=0.5): if classifier is None or not isinstance(classifier, six.string_types): raise ValueError('Expected classifier name or path, got {}'.format(classifier)) if (os.path.exists(classifier) or os.path.sep in classifier) and not os.path.isdir(classifier): classifier_path = classifier # Set classifier name to filename without suffix classifier, _ = os.path.splitext(os.path.basename(classifier)) else: classifier_path = util.get_model_path(classifier, 'classifier') self.classifier_name = classifier self.score_threshold = score_threshold self.model = SequenceModelWrapper.load(classifier_path) self.total_class_counts = pd.Series()
def __init__(self, detector, label=None, score_threshold=0.5, merge_max_protein_gap=0, merge_max_nucl_gap=0, min_nucl=1, min_proteins=1, min_domains=1, min_bio_domains=0): self.score_threshold = score_threshold if detector is None or not isinstance(detector, six.string_types): raise ValueError('Expected detector name, got {}'.format(detector)) self.detector_name = detector self.detector_label = label or self.detector_name self.score_column = util.format_bgc_score_column(self.detector_name) self.merge_max_protein_gap = merge_max_protein_gap self.merge_max_nucl_gap = merge_max_nucl_gap self.min_nucl = min_nucl self.min_proteins = min_proteins self.min_domains = min_domains self.min_bio_domains = min_bio_domains model_path = util.get_model_path(self.detector_name, 'detector') self.model = SequenceModelWrapper.load(model_path) self.num_detected = 0
def __init__(self, detector, label=None, score_threshold=0.5, merge_max_protein_gap=0, merge_max_nucl_gap=0, min_nucl=1, min_proteins=1, min_domains=1, min_bio_domains=0): self.score_threshold = score_threshold if detector is None or not isinstance(detector, six.string_types): raise ValueError('Expected detector name or path, got {}'.format(detector)) if (os.path.exists(detector) or os.path.sep in detector) and not os.path.isdir(detector): model_path = detector # Set detector name to filename without suffix detector, _ = os.path.splitext(os.path.basename(detector)) else: model_path = util.get_model_path(detector, 'detector') self.detector_name = detector self.detector_label = label or self.detector_name self.score_column = util.format_bgc_score_column(self.detector_name) self.merge_max_protein_gap = merge_max_protein_gap self.merge_max_nucl_gap = merge_max_nucl_gap self.min_nucl = min_nucl self.min_proteins = min_proteins self.min_domains = min_domains self.min_bio_domains = min_bio_domains self.model = SequenceModelWrapper.load(model_path) self.num_detected = 0