Esempio n. 1
0
def annotate_with_deepnog(identifier: str,
                          protein_list: List[SeqRecord],
                          database: str = 'eggNOG5',
                          tax_level: int = 2,
                          confidence_threshold: float = None,
                          verb: bool = True) -> GenotypeRecord:
    """
    Assign proteins belonging to a sample to orthologous groups using deepnog.

    :param identifier: The name associated with the sample.
    :param protein_list: A list of SeqRecords containing protein sequences.
    :param database: Orthologous group/family database to use.
    :param tax_level: The NCBI taxon ID of the taxonomic level to use from the given database.
    :param confidence_threshold: Confidence threshold of deepnog annotations below which annotations
                                 will be discarded.
    :param verb: Whether to print verbose progress messages.
    :returns: a GenotypeRecord suitable for use with phenotrex.
    """
    if not (database, tax_level) in DEEPNOG_VALID_CONFIG:
        raise RuntimeError(
            f'Unknown database and/or tax level: {database}/{tax_level}')

    device = set_device('auto')
    torch.set_num_threads(1)
    weights_path = get_weights_path(
        database=database,
        level=str(tax_level),
        architecture=DEEPNOG_ARCH,
    )
    model_dict = torch.load(weights_path, map_location=device)
    model = load_nn(
        architecture=DEEPNOG_ARCH,
        model_dict=model_dict,
        device=device,
    )
    class_labels = model_dict['classes']
    dataset = PreloadedProteinDataset(protein_list)
    preds, confs, ids, indices = predict(model,
                                         dataset,
                                         device,
                                         batch_size=1,
                                         num_workers=1,
                                         verbose=3 if verb else 0)
    threshold = float(model.threshold) if hasattr(
        model, 'threshold') else confidence_threshold
    df = create_df(
        class_labels,
        preds,
        confs,
        ids,
        indices,
        threshold=threshold,
    )

    cogs = [x for x in df.prediction.unique() if x]
    feature_type_str = f'{database}-tax-{tax_level}'
    return GenotypeRecord(identifier=identifier,
                          feature_type=feature_type_str,
                          features=cogs)
Esempio n. 2
0
def _start_prediction_or_training(args):
    # Importing here makes CLI more snappy
    from deepnog.utils import get_logger, set_device

    logger = get_logger(__name__, verbose=args.verbose)
    logger.info('Starting deepnog')

    # Sanity check command line arguments
    if args.batch_size <= 0:
        logger.error(f'Batch size must be at least one. '
                     f'Got batch size = {args.batch_size} instead.')
        sys.exit(1)

    # Better safe than sorry -- don't overwrite existing files
    if args.out is not None:
        if Path(args.out).is_file():
            logger.error(f'Output file {args.out} already exists.')
            sys.exit(1)
        elif args.phase == 'infer' and (Path(args.out).is_dir()
                                        or args.out.endswith('/')):
            logger.error(f'Output path must be a file during inference, '
                         f'but got a directory instead: {args.out}')
            sys.exit(1)

    # Set up device
    args.device = set_device(args.device)

    # Get path to deep network architecture
    config = get_config()
    module = config['architecture'][args.architecture]['module']
    cls = config['architecture'][args.architecture]['class']

    if args.phase == 'infer':
        return _start_inference(args=args, arch_module=module, arch_cls=cls)
    elif args.phase == 'train':
        return _start_training(args=args, arch_module=module, arch_cls=cls)
Esempio n. 3
0
def test_gpu_device_available():
    device = 'gpu'
    assert isinstance(set_device(device), torch.device)
Esempio n. 4
0
def test_cpu_device():
    device = 'cpu'
    assert isinstance(set_device(device), torch.device)
Esempio n. 5
0
def test_auto_device():
    device = set_device('auto')
    assert isinstance(device, torch.device)
    assert str(device) in ['cpu', 'cuda'], f'Unrecognized device: {device}'
Esempio n. 6
0
def test_set_device():
    device = 'tpu'
    msg = f'Unknown device "{device}". Try "auto".'
    with pytest.raises(ValueError, match=msg):
        set_device(device)
Esempio n. 7
0
def test_gpu_device_unavailable():
    device = 'gpu'
    msg = 'no cuda-enabled gpu is available on this machine'
    with pytest.raises(RuntimeError, match=msg):
        set_device(device)
Esempio n. 8
0
def test_auto_device():
    device = set_device('auto')
    print(f'Auto device: {device}')