def test_nnscore_desc(): """Test NNScore descriptors generators""" mols = list(oddt.toolkit.readfile('sdf', actives_sdf)) list(map(lambda x: x.addh(only_polar=True), mols)) rec = next(oddt.toolkit.readfile('pdb', receptor_pdb)) rec.protein = True rec.addh(only_polar=True) # Delete molecule which has differences in Acceptor-Donor def in RDK and OB del mols[65] gen = nnscore(protein=rec).descriptor_generator descs = gen.build(mols) # save correct results (for future use) # np.savetxt(os.path.join(results, 'nnscore_descs.csv'), # descs, # fmt='%.16g', # delimiter=',') if oddt.toolkit.backend == 'ob': descs_correct = np.loadtxt(os.path.join(results, 'nnscore_descs_ob.csv'), delimiter=',') else: descs_correct = np.loadtxt(os.path.join(results, 'nnscore_descs_rdk.csv'), delimiter=',') # help debug errors for i in range(descs.shape[1]): mask = np.abs(descs[:, i] - descs_correct[:, i]) > 1e-4 if mask.sum() > 1: print(i, gen.titles[i], mask.sum()) print(np.vstack((descs[mask, i], descs_correct[mask, i]))) assert_array_almost_equal(descs, descs_correct, decimal=4)
def test_model_train(): mols = list(oddt.toolkit.readfile('sdf', actives_sdf))[:10] list(map(lambda x: x.addh(), mols)) rec = next(oddt.toolkit.readfile('pdb', receptor_pdb)) rec.protein = True rec.addh() data_dir = os.path.join(test_data_dir, 'data') home_dir = mkdtemp() pdbbind_versions = (2007, 2013, 2016) pdbbind_dir = os.path.join(data_dir, 'pdbbind') for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if not os.path.isdir(version_dir): os.symlink(pdbbind_dir, version_dir) for model in [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]]: with NamedTemporaryFile(suffix='.pickle') as f: model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions, home_dir=home_dir) model.train(home_dir=home_dir, sf_pickle=f.name) model.set_protein(rec) preds = model.predict(mols) assert len(preds) == 10 assert preds.dtype == np.float assert model.score(mols, preds) == 1.0 for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if os.path.islink(version_dir): os.unlink(version_dir)
def test_nnscore(): """Test NNScore descriptors generators""" mols = list(oddt.toolkit.readfile('sdf', os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) list(map(lambda x: x.addh(only_polar=True), mols)) rec = next(oddt.toolkit.readfile('pdb', os.path.join(test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) rec.protein = True rec.addh(only_polar=True) # Delete molecule which has differences in Acceptor-Donor def in RDK and OB del mols[65] # print((rec.atom_dict['atomicnum'] == 1).sum(), # rec.atom_dict['isdonor'].sum(), # rec.atom_dict['isdonorh'].sum()) # for mol in mols: # print(mol.num_rotors) # print(sum(atom.Atom.GetAtomicNum() == 1 for atom in mol.atoms)) # print((mol.atom_dict['atomicnum'] == 1).sum(), # mol.atom_dict['isdonor'].sum(), # mol.atom_dict['isdonorh'].sum()) gen = nnscore(protein=rec).descriptor_generator descs = gen.build(mols) # save correct results (for future use) # np.savetxt(os.path.join(test_data_dir, # 'data/results/xiap/nnscore_descs.csv'), # descs, # fmt='%.16g', # delimiter=',') descs_correct = np.loadtxt(os.path.join(test_data_dir, 'data/results/xiap/nnscore_descs.csv'), delimiter=',') # help debug errors for i in range(descs.shape[1]): mask = np.abs(descs[:, i] - descs_correct[:, i]) > 1e-4 if mask.sum() > 1: print(i, gen.titles[i], mask.sum()) print(np.vstack((descs[mask, i], descs_correct[mask, i]))) assert_array_almost_equal(descs, descs_correct, decimal=4)
def test_vs_scoring(): protein = next(oddt.toolkit.readfile('pdb', xiap_protein)) protein.protein = True data_dir = os.path.join(test_data_dir, 'data') home_dir = mkdtemp() pdbbind_versions = (2007, 2013, 2016) pdbbind_dir = os.path.join(data_dir, 'pdbbind') for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if not os.path.isdir(version_dir): os.symlink(pdbbind_dir, version_dir) filenames = [] # train mocked SFs for model in [nnscore(n_jobs=1) ] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]]: model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions, home_dir=home_dir) filenames.append(model.train(home_dir=home_dir)) vs = virtualscreening(n_cpu=-1, chunksize=10) vs.load_ligands('sdf', xiap_actives_docked) # error if no protein is fed with pytest.raises(ValueError): vs.score('nnscore') # bad sf name with pytest.raises(ValueError): vs.score('bad_sf', protein=protein) vs.score('nnscore', protein=xiap_protein) vs.score('nnscore_pdbbind2016', protein=protein) vs.score('rfscore_v1', protein=protein) vs.score('rfscore_v1_pdbbind2016', protein=protein) vs.score('rfscore_v2', protein=protein) vs.score('rfscore_v3', protein=protein) # use pickle directly vs.score(filenames[0], protein=protein) # pass SF object directly vs.score(scorer.load(filenames[0]), protein=protein) # pass wrong object (sum is not an instance of scorer) with pytest.raises(ValueError): vs.score(sum, protein=protein) mols = list(vs.fetch()) assert len(mols) == 100 mol_data = mols[0].data assert 'nnscore' in mol_data assert 'rfscore_v1' in mol_data assert 'rfscore_v2' in mol_data assert 'rfscore_v3' in mol_data vs = virtualscreening(n_cpu=-1, chunksize=10) vs.load_ligands('sdf', xiap_actives_docked) vs.score('nnscore', protein=protein) vs.score('rfscore_v1', protein=protein) vs.score('rfscore_v2', protein=protein) vs.score('rfscore_v3', protein=protein) with NamedTemporaryFile('w', suffix='.sdf') as molfile: with NamedTemporaryFile('w', suffix='.csv') as csvfile: vs.write('sdf', molfile.name, csv_filename=csvfile.name) data = pd.read_csv(csvfile.name) assert 'nnscore' in data.columns assert 'rfscore_v1' in data.columns assert 'rfscore_v2' in data.columns assert 'rfscore_v3' in data.columns mols = list(oddt.toolkit.readfile('sdf', molfile.name)) assert len(mols) == 100 vs.write_csv( csvfile.name, fields=['nnscore', 'rfscore_v1', 'rfscore_v2', 'rfscore_v3']) data = pd.read_csv(csvfile.name) assert len(data.columns) == 4 assert 'nnscore' in data.columns assert 'rfscore_v1' in data.columns assert 'rfscore_v2' in data.columns assert 'rfscore_v3' in data.columns # remove files for f in filenames: os.unlink(f) # remove symlinks for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if os.path.islink(version_dir): os.unlink(version_dir)
'nnscore_descs_rdk.csv'), delimiter=',') # help debug errors for i in range(descs.shape[1]): mask = np.abs(descs[:, i] - descs_correct[:, i]) > 1e-4 if mask.sum() > 1: print(i, gen.titles[i], mask.sum()) print(np.vstack((descs[mask, i], descs_correct[mask, i]))) assert_array_almost_equal(descs, descs_correct, decimal=4) models = ([ PLECscore(n_jobs=1, version=v, size=2048) for v in ['linear', 'nn', 'rf'] ] + [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]]) @pytest.mark.parametrize('model', models) def test_model_train(model): mols = list(oddt.toolkit.readfile('sdf', actives_sdf))[:10] list(map(lambda x: x.addh(), mols)) rec = next(oddt.toolkit.readfile('pdb', receptor_pdb)) rec.protein = True rec.addh() data_dir = os.path.join(test_data_dir, 'data') home_dir = mkdtemp() pdbbind_versions = (2007, 2013, 2016)
def init_nnscore(files): global NNSCORES for file in files: NNSCORES.append(functions.nnscore().load(file))
def test_vs_scoring(): protein = next(oddt.toolkit.readfile('pdb', xiap_protein)) protein.protein = True data_dir = os.path.join(test_data_dir, 'data') home_dir = mkdtemp() pdbbind_versions = (2007, 2013, 2016) pdbbind_dir = os.path.join(data_dir, 'pdbbind') for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if not os.path.isdir(version_dir): os.symlink(pdbbind_dir, version_dir) filenames = [] # train mocked SFs for model in [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]]: model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions, home_dir=home_dir) filenames.append(model.train(home_dir=home_dir)) vs = virtualscreening(n_cpu=-1, chunksize=10) vs.load_ligands('sdf', xiap_actives_docked) # error if no protein is fed with pytest.raises(ValueError): vs.score('nnscore') # bad sf name with pytest.raises(ValueError): vs.score('bad_sf', protein=protein) vs.score('nnscore', protein=xiap_protein) vs.score('nnscore_pdbbind2016', protein=protein) vs.score('rfscore_v1', protein=protein) vs.score('rfscore_v1_pdbbind2016', protein=protein) vs.score('rfscore_v2', protein=protein) vs.score('rfscore_v3', protein=protein) vs.score('pleclinear', protein=protein) vs.score('pleclinear_p5_l1_s65536_pdbbind2016', protein=protein) # use pickle directly vs.score(filenames[0], protein=protein) # pass SF object directly vs.score(scorer.load(filenames[0]), protein=protein) # pass wrong object (sum is not an instance of scorer) with pytest.raises(ValueError): vs.score(sum, protein=protein) mols = list(vs.fetch()) assert len(mols) == 100 mol_data = mols[0].data assert 'nnscore' in mol_data assert 'rfscore_v1' in mol_data assert 'rfscore_v2' in mol_data assert 'rfscore_v3' in mol_data assert 'PLEClinear_p5_l1_s65536' in mol_data vs = virtualscreening(n_cpu=-1, chunksize=10) vs.load_ligands('sdf', xiap_actives_docked) vs.score('nnscore', protein=protein) vs.score('rfscore_v1', protein=protein) vs.score('rfscore_v2', protein=protein) vs.score('rfscore_v3', protein=protein) with NamedTemporaryFile('w', suffix='.sdf') as molfile: with NamedTemporaryFile('w', suffix='.csv') as csvfile: vs.write('sdf', molfile.name, csv_filename=csvfile.name) data = pd.read_csv(csvfile.name) assert 'nnscore' in data.columns assert 'rfscore_v1' in data.columns assert 'rfscore_v2' in data.columns assert 'rfscore_v3' in data.columns mols = list(oddt.toolkit.readfile('sdf', molfile.name)) assert len(mols) == 100 vs.write_csv(csvfile.name, fields=['nnscore', 'rfscore_v1', 'rfscore_v2', 'rfscore_v3']) data = pd.read_csv(csvfile.name) assert len(data.columns) == 4 assert 'nnscore' in data.columns assert 'rfscore_v1' in data.columns assert 'rfscore_v2' in data.columns assert 'rfscore_v3' in data.columns # remove files for f in filenames: os.unlink(f) # remove symlinks for pdbbind_v in pdbbind_versions: version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v) if os.path.islink(version_dir): os.unlink(version_dir)