Ejemplo n.º 1
0
def test_get_deepnog_root():
    deepnog_root = get_deepnog_root()

    # Ensure this is the correct directory will all the subpackages
    subpackages = [
        'client',
        'config',
        'data',
        'learning',
        'models',
        'tests',
        'utils',
    ]
    for pkg in subpackages:
        d = deepnog_root / pkg
        assert d.is_dir()

    # Also check presence of some files
    some_files = [
        "tests/data/test_inference_short.csv",
        "tests/parameters/test_deepnog.pthsmall",
        "config/deepnog_config.yml",
    ]
    for file_ in some_files:
        f = deepnog_root / file_
        assert f.is_file()

    # Ensure certain things are not present in deepnog
    unexpected_files = [
        "me/no/think.so",
        "plagiarism/manuscript.tex",
        "bugs",
    ]
    for file_ in unexpected_files:
        f = deepnog_root / file_
        assert not f.exists()
Ejemplo n.º 2
0
"""
from itertools import repeat
from functools import partial
import pytest

import numpy as np
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

from deepnog.data import dataset as ds
from deepnog.tests.utils import get_deepnog_root

TESTS = get_deepnog_root() / "tests"
test_file = TESTS / "data/GCF_000007025.1.faa"
test_file_gzip = TESTS / "data/GCF_000007025.1.faa.gz"
TRAINING_FASTA = TESTS / "data/test_training_dummy.faa"
TRAINING_LABELS = TESTS / "data/test_training_dummy.faa.csv"
EXPECTED_IDS_WITH_LABEL = [f'test_all_A{x}' for x in range(11)] \
                          + [f'test_all_C{x}' for x in range(11)] \
                          + [f'M{x:02d}' for x in range(1, 9)]
EXPECTED_IDS = [f'test_all_A{x}' for x in range(12)] \
               + [f'test_all_C{x}' for x in range(12)] \
               + [f'M{x:02d}' for x in range(1, 11)]
LABELS_WRONG_COL_NAMES = TESTS / "data/test_inference_short_wrong_column_names.csv"


@pytest.mark.parametrize("f", [
    test_file,
Ejemplo n.º 3
0
import pytest

import numpy as np
import pandas as pd

from deepnog.data import train_val_test_split, group_train_val_test_split
from deepnog.tests.utils import get_deepnog_root

DEEPNOG_ROOT = get_deepnog_root()
TESTS = DEEPNOG_ROOT/"tests"
DATAFRAME_GROUP = TESTS/"data/test_split.csv"
DATAFRAME_SIMPLE = TESTS/"data/test_simple_split.csv"


@pytest.mark.parametrize('ratio', [[.4, .3, .3], [4, 3, 3]])
def test_simple_split(ratio):
    df = pd.read_csv(DATAFRAME_SIMPLE)
    train, val, test = ratio
    res = train_val_test_split(df,
                               train_ratio=train,
                               validation_ratio=val,
                               test_ratio=test,
                               random_state=123,
                               stratify=True,
                               shuffle=True,
                               verbose=0)

    for group in [res.uniref_train, res.uniref_val, res.uniref_test]:
        assert group is None
    assert res.X_train.shape == (4, )
    assert res.X_val.shape == (3, )
Ejemplo n.º 4
0
from pathlib import Path
import tempfile
import pytest
import numpy as np

from deepnog.learning import fit
from deepnog.tests.utils import get_deepnog_root

DEEPNOG_TESTS = get_deepnog_root() / "tests"
TRAINING_FASTA = DEEPNOG_TESTS / "data/test_training_dummy.faa"
TRAINING_CSV = DEEPNOG_TESTS / "data/test_training_dummy.faa.csv"
Y_TRUE = np.array([[
    0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 1, 1, 1,
    1, 1, 1, 1, 1
],
                   [
                       0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0,
                       2, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1
                   ]])


@pytest.mark.parametrize('batch_size', [
    4,
])
@pytest.mark.parametrize('num_workers', [0, 2])
def test_shuffled_training(batch_size, num_workers):
    results = fit(architecture='deepnog',
                  module='deepnog',
                  cls='DeepNOG',
                  training_sequences=TRAINING_FASTA,
                  validation_sequences=TRAINING_FASTA,