def test_make_lmdb_dataset(): # Load PDB dataset dataset = da.load_dataset('tests/test_data/pdb', 'pdb') assert len(dataset) == 4 # Create LMDB dataset from PDB dataset da.make_lmdb_dataset(dataset, 'tests/test_data/_output_lmdb', filter_fn=None, serialization_format='json', include_bonds=False) # Try to load generated dataset new_dataset = da.load_dataset('tests/test_data/_output_lmdb', 'lmdb') assert len(new_dataset) == 4 # Remove temporary files os.remove('tests/test_data/_output_lmdb/data.mdb') os.remove('tests/test_data/_output_lmdb/lock.mdb') os.rmdir('tests/test_data/_output_lmdb')
def test_load_dataset_xyz(): dataset = da.load_dataset('tests/test_data/xyz', 'xyz') assert len(dataset) == 3 for df in dataset: print(df) assert df['atoms'].x.dtype == 'float' assert df['atoms'].y.dtype == 'float' assert df['atoms'].z.dtype == 'float'
def test_load_dataset_xyzgdb(): file_list = [ 'tests/test_data/xyz-gdb/dsgdb9nsd_000005.xyz', 'tests/test_data/xyz-gdb/dsgdb9nsd_000212.xyz', 'tests/test_data/xyz-gdb/dsgdb9nsd_001458.xyz' ] dataset = da.load_dataset(file_list, 'xyz-gdb') assert len(dataset) == 3
def test_load_dataset_lmdb(): dataset = da.load_dataset('tests/test_data/lmdb', 'lmdb') assert len(dataset) == 4 for df in dataset: print(df) assert df['atoms'].x.dtype == 'float' assert df['atoms'].y.dtype == 'float' assert df['atoms'].z.dtype == 'float'
def test_load_dataset_list(): dataset = da.load_dataset('tests/test_data/list/pdbs.txt', 'pdb') assert len(dataset) == 4 for df in dataset: print(df) print(df['atoms'].x.dtype) assert df['atoms'].x.dtype in ['float', 'float32', 'float64'] assert df['atoms'].y.dtype in ['float', 'float32', 'float64'] assert df['atoms'].z.dtype in ['float', 'float32', 'float64']
def test_load_dataset_sdf(): dataset = da.load_dataset('tests/test_data/sdf', 'sdf') assert len(dataset) == 4 for df in dataset: print(df) print(df['atoms'].x.dtype) assert df['atoms'].x.dtype in ['float', 'float32', 'float64'] assert df['atoms'].y.dtype in ['float', 'float32', 'float64'] assert df['atoms'].z.dtype in ['float', 'float32', 'float64']
def test_load_dataset_xyzgdb(): file_list = ['tests/test_data/xyz-gdb/dsgdb9nsd_000005.xyz', 'tests/test_data/xyz-gdb/dsgdb9nsd_000212.xyz', 'tests/test_data/xyz-gdb/dsgdb9nsd_001458.xyz'] dataset = da.load_dataset(file_list, 'xyz-gdb') assert len(dataset) == 3 for df in dataset: print(df) assert df['atoms'].x.dtype == 'float' assert df['atoms'].y.dtype == 'float' assert df['atoms'].z.dtype == 'float'
def test_split(): # Load LMDB dataset dataset = da.load_dataset('tests/test_data/lmdb', 'lmdb') # Split with defined indices indices_train, indices_val, indices_test = [3, 0], [2], [1] s = spl.split(dataset, indices_train, indices_val, indices_test) train_dataset, val_dataset, test_dataset = s # Check whether the frames are in the correct dataset assert dataset[0]['atoms'].equals(train_dataset[1]['atoms']) assert dataset[1]['atoms'].equals(test_dataset[0]['atoms']) assert dataset[2]['atoms'].equals(val_dataset[0]['atoms']) assert dataset[3]['atoms'].equals(train_dataset[0]['atoms'])
def test_load_dataset_list_nonexistent(): dataset = da.load_dataset('tests/test_data/list/nonexistent.txt', 'pdb') assert len(dataset) == 1 with pytest.raises(FileNotFoundError): df = dataset[0]
def test_get_chain_sequences(): dataset = da.load_dataset('tests/test_data/lmdb', 'lmdb') cseq = seq.get_chain_sequences(dataset[2]['atoms']) assert cseq[0][1] == 'NNQQ'
def test_load_dataset_silent(): dataset = da.load_dataset('tests/test_data/silent', 'silent') for x in dataset: print(x) assert len(dataset) == 8
import pandas as pd import atom3d.datasets as da import atom3d.filters.filters as filters import atom3d.filters.scop as scop dataset = da.load_dataset('tests/test_data/lmdb', 'lmdb') # TODO: Commented code below fails. # FileNotFoundError: [Errno 2] File ../../metadata/scop-cla-latest.txt does not exist: '../../metadata/scop-cla-latest.txt' def test_scop_filter(): # level = 'class' # filter_fn = scop.form_scop_filter(level, allowed=None, excluded=None) # for i,d in enumerate(dataset): # df_fil = filter_fn(d['atoms']) pass
def load_example_dataset(): dataset = da.load_dataset(str(Path(__file__).parent.absolute()) + '/test_lmdb', 'lmdb') return dataset
from atom3d.datasets import load_dataset, make_lmdb_dataset import sys in_path = sys.argv[1] out_path = sys.argv[2] dataset = load_dataset(in_path, 'lmdb') make_lmdb_dataset(dataset, out_path, filter_fn=lambda x: not x)
def test_load_dataset_sdf(): dataset = da.load_dataset('tests/test_data/sdf', 'sdf') assert len(dataset) == 4
def test_load_dataset_silent(): dataset = da.load_dataset('tests/test_data/silent', 'silent') assert len(dataset) == 4
def test_load_dataset_xyz(): dataset = da.load_dataset('tests/test_data/xyz-gdb', 'xyz') assert len(dataset) == 3
def test_load_dataset_lmdb(): dataset = da.load_dataset('tests/test_data/lmdb', 'lmdb') assert len(dataset) == 4