def annotate_with_deepnog(identifier: str, protein_list: List[SeqRecord], database: str = 'eggNOG5', tax_level: int = 2, confidence_threshold: float = None, verb: bool = True) -> GenotypeRecord: """ Assign proteins belonging to a sample to orthologous groups using deepnog. :param identifier: The name associated with the sample. :param protein_list: A list of SeqRecords containing protein sequences. :param database: Orthologous group/family database to use. :param tax_level: The NCBI taxon ID of the taxonomic level to use from the given database. :param confidence_threshold: Confidence threshold of deepnog annotations below which annotations will be discarded. :param verb: Whether to print verbose progress messages. :returns: a GenotypeRecord suitable for use with phenotrex. """ if not (database, tax_level) in DEEPNOG_VALID_CONFIG: raise RuntimeError( f'Unknown database and/or tax level: {database}/{tax_level}') device = set_device('auto') torch.set_num_threads(1) weights_path = get_weights_path( database=database, level=str(tax_level), architecture=DEEPNOG_ARCH, ) model_dict = torch.load(weights_path, map_location=device) model = load_nn( architecture=DEEPNOG_ARCH, model_dict=model_dict, device=device, ) class_labels = model_dict['classes'] dataset = PreloadedProteinDataset(protein_list) preds, confs, ids, indices = predict(model, dataset, device, batch_size=1, num_workers=1, verbose=3 if verb else 0) threshold = float(model.threshold) if hasattr( model, 'threshold') else confidence_threshold df = create_df( class_labels, preds, confs, ids, indices, threshold=threshold, ) cogs = [x for x in df.prediction.unique() if x] feature_type_str = f'{database}-tax-{tax_level}' return GenotypeRecord(identifier=identifier, feature_type=feature_type_str, features=cogs)
def _start_prediction_or_training(args): # Importing here makes CLI more snappy from deepnog.utils import get_logger, set_device logger = get_logger(__name__, verbose=args.verbose) logger.info('Starting deepnog') # Sanity check command line arguments if args.batch_size <= 0: logger.error(f'Batch size must be at least one. ' f'Got batch size = {args.batch_size} instead.') sys.exit(1) # Better safe than sorry -- don't overwrite existing files if args.out is not None: if Path(args.out).is_file(): logger.error(f'Output file {args.out} already exists.') sys.exit(1) elif args.phase == 'infer' and (Path(args.out).is_dir() or args.out.endswith('/')): logger.error(f'Output path must be a file during inference, ' f'but got a directory instead: {args.out}') sys.exit(1) # Set up device args.device = set_device(args.device) # Get path to deep network architecture config = get_config() module = config['architecture'][args.architecture]['module'] cls = config['architecture'][args.architecture]['class'] if args.phase == 'infer': return _start_inference(args=args, arch_module=module, arch_cls=cls) elif args.phase == 'train': return _start_training(args=args, arch_module=module, arch_cls=cls)
def test_gpu_device_available(): device = 'gpu' assert isinstance(set_device(device), torch.device)
def test_cpu_device(): device = 'cpu' assert isinstance(set_device(device), torch.device)
def test_auto_device(): device = set_device('auto') assert isinstance(device, torch.device) assert str(device) in ['cpu', 'cuda'], f'Unrecognized device: {device}'
def test_set_device(): device = 'tpu' msg = f'Unknown device "{device}". Try "auto".' with pytest.raises(ValueError, match=msg): set_device(device)
def test_gpu_device_unavailable(): device = 'gpu' msg = 'no cuda-enabled gpu is available on this machine' with pytest.raises(RuntimeError, match=msg): set_device(device)
def test_auto_device(): device = set_device('auto') print(f'Auto device: {device}')