def test_get_deepnog_root(): deepnog_root = get_deepnog_root() # Ensure this is the correct directory will all the subpackages subpackages = [ 'client', 'config', 'data', 'learning', 'models', 'tests', 'utils', ] for pkg in subpackages: d = deepnog_root / pkg assert d.is_dir() # Also check presence of some files some_files = [ "tests/data/test_inference_short.csv", "tests/parameters/test_deepnog.pthsmall", "config/deepnog_config.yml", ] for file_ in some_files: f = deepnog_root / file_ assert f.is_file() # Ensure certain things are not present in deepnog unexpected_files = [ "me/no/think.so", "plagiarism/manuscript.tex", "bugs", ] for file_ in unexpected_files: f = deepnog_root / file_ assert not f.exists()
""" from itertools import repeat from functools import partial import pytest import numpy as np from pandas import read_csv from sklearn.preprocessing import LabelEncoder from torch.utils.data import DataLoader from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord from deepnog.data import dataset as ds from deepnog.tests.utils import get_deepnog_root TESTS = get_deepnog_root() / "tests" test_file = TESTS / "data/GCF_000007025.1.faa" test_file_gzip = TESTS / "data/GCF_000007025.1.faa.gz" TRAINING_FASTA = TESTS / "data/test_training_dummy.faa" TRAINING_LABELS = TESTS / "data/test_training_dummy.faa.csv" EXPECTED_IDS_WITH_LABEL = [f'test_all_A{x}' for x in range(11)] \ + [f'test_all_C{x}' for x in range(11)] \ + [f'M{x:02d}' for x in range(1, 9)] EXPECTED_IDS = [f'test_all_A{x}' for x in range(12)] \ + [f'test_all_C{x}' for x in range(12)] \ + [f'M{x:02d}' for x in range(1, 11)] LABELS_WRONG_COL_NAMES = TESTS / "data/test_inference_short_wrong_column_names.csv" @pytest.mark.parametrize("f", [ test_file,
import pytest import numpy as np import pandas as pd from deepnog.data import train_val_test_split, group_train_val_test_split from deepnog.tests.utils import get_deepnog_root DEEPNOG_ROOT = get_deepnog_root() TESTS = DEEPNOG_ROOT/"tests" DATAFRAME_GROUP = TESTS/"data/test_split.csv" DATAFRAME_SIMPLE = TESTS/"data/test_simple_split.csv" @pytest.mark.parametrize('ratio', [[.4, .3, .3], [4, 3, 3]]) def test_simple_split(ratio): df = pd.read_csv(DATAFRAME_SIMPLE) train, val, test = ratio res = train_val_test_split(df, train_ratio=train, validation_ratio=val, test_ratio=test, random_state=123, stratify=True, shuffle=True, verbose=0) for group in [res.uniref_train, res.uniref_val, res.uniref_test]: assert group is None assert res.X_train.shape == (4, ) assert res.X_val.shape == (3, )
from pathlib import Path import tempfile import pytest import numpy as np from deepnog.learning import fit from deepnog.tests.utils import get_deepnog_root DEEPNOG_TESTS = get_deepnog_root() / "tests" TRAINING_FASTA = DEEPNOG_TESTS / "data/test_training_dummy.faa" TRAINING_CSV = DEEPNOG_TESTS / "data/test_training_dummy.faa.csv" Y_TRUE = np.array([[ 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1 ], [ 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1 ]]) @pytest.mark.parametrize('batch_size', [ 4, ]) @pytest.mark.parametrize('num_workers', [0, 2]) def test_shuffled_training(batch_size, num_workers): results = fit(architecture='deepnog', module='deepnog', cls='DeepNOG', training_sequences=TRAINING_FASTA, validation_sequences=TRAINING_FASTA,