Example #1
0
def test_nnscore_desc():
    """Test NNScore descriptors generators"""
    mols = list(oddt.toolkit.readfile('sdf', actives_sdf))
    list(map(lambda x: x.addh(only_polar=True), mols))

    rec = next(oddt.toolkit.readfile('pdb', receptor_pdb))
    rec.protein = True
    rec.addh(only_polar=True)

    # Delete molecule which has differences in Acceptor-Donor def in RDK and OB
    del mols[65]

    gen = nnscore(protein=rec).descriptor_generator
    descs = gen.build(mols)
    # save correct results (for future use)
    # np.savetxt(os.path.join(results, 'nnscore_descs.csv'),
    #            descs,
    #            fmt='%.16g',
    #            delimiter=',')
    if oddt.toolkit.backend == 'ob':
        descs_correct = np.loadtxt(os.path.join(results, 'nnscore_descs_ob.csv'),
                                   delimiter=',')
    else:
        descs_correct = np.loadtxt(os.path.join(results, 'nnscore_descs_rdk.csv'),
                                   delimiter=',')

    # help debug errors
    for i in range(descs.shape[1]):
        mask = np.abs(descs[:, i] - descs_correct[:, i]) > 1e-4
        if mask.sum() > 1:
            print(i, gen.titles[i], mask.sum())
            print(np.vstack((descs[mask, i], descs_correct[mask, i])))

    assert_array_almost_equal(descs, descs_correct, decimal=4)
Example #2
0
def test_model_train():
    mols = list(oddt.toolkit.readfile('sdf', actives_sdf))[:10]
    list(map(lambda x: x.addh(), mols))

    rec = next(oddt.toolkit.readfile('pdb', receptor_pdb))
    rec.protein = True
    rec.addh()

    data_dir = os.path.join(test_data_dir, 'data')
    home_dir = mkdtemp()
    pdbbind_versions = (2007, 2013, 2016)

    pdbbind_dir = os.path.join(data_dir, 'pdbbind')
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if not os.path.isdir(version_dir):
            os.symlink(pdbbind_dir, version_dir)

    for model in [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1)
                                        for v in [1, 2, 3]]:
        with NamedTemporaryFile(suffix='.pickle') as f:
            model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions,
                                    home_dir=home_dir)
            model.train(home_dir=home_dir, sf_pickle=f.name)
            model.set_protein(rec)
            preds = model.predict(mols)
            assert len(preds) == 10
            assert preds.dtype == np.float
            assert model.score(mols, preds) == 1.0

    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if os.path.islink(version_dir):
            os.unlink(version_dir)
Example #3
0
def test_nnscore():
    """Test NNScore descriptors generators"""
    mols = list(oddt.toolkit.readfile('sdf', os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf')))
    list(map(lambda x: x.addh(only_polar=True), mols))

    rec = next(oddt.toolkit.readfile('pdb', os.path.join(test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb')))
    rec.protein = True
    rec.addh(only_polar=True)

    # Delete molecule which has differences in Acceptor-Donor def in RDK and OB
    del mols[65]

    # print((rec.atom_dict['atomicnum'] == 1).sum(),
    #       rec.atom_dict['isdonor'].sum(),
    #       rec.atom_dict['isdonorh'].sum())

    # for mol in mols:
    #     print(mol.num_rotors)
    #     print(sum(atom.Atom.GetAtomicNum() == 1 for atom in mol.atoms))
    #     print((mol.atom_dict['atomicnum'] == 1).sum(),
    #           mol.atom_dict['isdonor'].sum(),
    #           mol.atom_dict['isdonorh'].sum())

    gen = nnscore(protein=rec).descriptor_generator
    descs = gen.build(mols)
    # save correct results (for future use)
    # np.savetxt(os.path.join(test_data_dir,
    #                         'data/results/xiap/nnscore_descs.csv'),
    #            descs,
    #            fmt='%.16g',
    #            delimiter=',')
    descs_correct = np.loadtxt(os.path.join(test_data_dir, 'data/results/xiap/nnscore_descs.csv'), delimiter=',')

    # help debug errors
    for i in range(descs.shape[1]):
        mask = np.abs(descs[:, i] - descs_correct[:, i]) > 1e-4
        if mask.sum() > 1:
            print(i, gen.titles[i], mask.sum())
            print(np.vstack((descs[mask, i], descs_correct[mask, i])))

    assert_array_almost_equal(descs, descs_correct, decimal=4)
Example #4
0
def test_vs_scoring():
    protein = next(oddt.toolkit.readfile('pdb', xiap_protein))
    protein.protein = True

    data_dir = os.path.join(test_data_dir, 'data')
    home_dir = mkdtemp()
    pdbbind_versions = (2007, 2013, 2016)

    pdbbind_dir = os.path.join(data_dir, 'pdbbind')
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if not os.path.isdir(version_dir):
            os.symlink(pdbbind_dir, version_dir)

    filenames = []
    # train mocked SFs
    for model in [nnscore(n_jobs=1)
                  ] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]]:
        model.gen_training_data(data_dir,
                                pdbbind_versions=pdbbind_versions,
                                home_dir=home_dir)
        filenames.append(model.train(home_dir=home_dir))
    vs = virtualscreening(n_cpu=-1, chunksize=10)
    vs.load_ligands('sdf', xiap_actives_docked)
    # error if no protein is fed
    with pytest.raises(ValueError):
        vs.score('nnscore')
    # bad sf name
    with pytest.raises(ValueError):
        vs.score('bad_sf', protein=protein)
    vs.score('nnscore', protein=xiap_protein)
    vs.score('nnscore_pdbbind2016', protein=protein)
    vs.score('rfscore_v1', protein=protein)
    vs.score('rfscore_v1_pdbbind2016', protein=protein)
    vs.score('rfscore_v2', protein=protein)
    vs.score('rfscore_v3', protein=protein)
    # use pickle directly
    vs.score(filenames[0], protein=protein)
    # pass SF object directly
    vs.score(scorer.load(filenames[0]), protein=protein)
    # pass wrong object (sum is not an instance of scorer)
    with pytest.raises(ValueError):
        vs.score(sum, protein=protein)

    mols = list(vs.fetch())

    assert len(mols) == 100
    mol_data = mols[0].data
    assert 'nnscore' in mol_data
    assert 'rfscore_v1' in mol_data
    assert 'rfscore_v2' in mol_data
    assert 'rfscore_v3' in mol_data

    vs = virtualscreening(n_cpu=-1, chunksize=10)
    vs.load_ligands('sdf', xiap_actives_docked)
    vs.score('nnscore', protein=protein)
    vs.score('rfscore_v1', protein=protein)
    vs.score('rfscore_v2', protein=protein)
    vs.score('rfscore_v3', protein=protein)
    with NamedTemporaryFile('w', suffix='.sdf') as molfile:
        with NamedTemporaryFile('w', suffix='.csv') as csvfile:
            vs.write('sdf', molfile.name, csv_filename=csvfile.name)
            data = pd.read_csv(csvfile.name)
            assert 'nnscore' in data.columns
            assert 'rfscore_v1' in data.columns
            assert 'rfscore_v2' in data.columns
            assert 'rfscore_v3' in data.columns

            mols = list(oddt.toolkit.readfile('sdf', molfile.name))
            assert len(mols) == 100

            vs.write_csv(
                csvfile.name,
                fields=['nnscore', 'rfscore_v1', 'rfscore_v2', 'rfscore_v3'])
            data = pd.read_csv(csvfile.name)
            assert len(data.columns) == 4
            assert 'nnscore' in data.columns
            assert 'rfscore_v1' in data.columns
            assert 'rfscore_v2' in data.columns
            assert 'rfscore_v3' in data.columns

    # remove files
    for f in filenames:
        os.unlink(f)

    # remove symlinks
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if os.path.islink(version_dir):
            os.unlink(version_dir)
Example #5
0
                                                'nnscore_descs_rdk.csv'),
                                   delimiter=',')

    # help debug errors
    for i in range(descs.shape[1]):
        mask = np.abs(descs[:, i] - descs_correct[:, i]) > 1e-4
        if mask.sum() > 1:
            print(i, gen.titles[i], mask.sum())
            print(np.vstack((descs[mask, i], descs_correct[mask, i])))

    assert_array_almost_equal(descs, descs_correct, decimal=4)


models = ([
    PLECscore(n_jobs=1, version=v, size=2048) for v in ['linear', 'nn', 'rf']
] + [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]])


@pytest.mark.parametrize('model', models)
def test_model_train(model):
    mols = list(oddt.toolkit.readfile('sdf', actives_sdf))[:10]
    list(map(lambda x: x.addh(), mols))

    rec = next(oddt.toolkit.readfile('pdb', receptor_pdb))
    rec.protein = True
    rec.addh()

    data_dir = os.path.join(test_data_dir, 'data')
    home_dir = mkdtemp()
    pdbbind_versions = (2007, 2013, 2016)
Example #6
0
def init_nnscore(files):
    global NNSCORES
    for file in files:
        NNSCORES.append(functions.nnscore().load(file))
def test_vs_scoring():
    protein = next(oddt.toolkit.readfile('pdb', xiap_protein))
    protein.protein = True

    data_dir = os.path.join(test_data_dir, 'data')
    home_dir = mkdtemp()
    pdbbind_versions = (2007, 2013, 2016)

    pdbbind_dir = os.path.join(data_dir, 'pdbbind')
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if not os.path.isdir(version_dir):
            os.symlink(pdbbind_dir, version_dir)

    filenames = []
    # train mocked SFs
    for model in [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1)
                                        for v in [1, 2, 3]]:
            model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions,
                                    home_dir=home_dir)
            filenames.append(model.train(home_dir=home_dir))
    vs = virtualscreening(n_cpu=-1, chunksize=10)
    vs.load_ligands('sdf', xiap_actives_docked)
    # error if no protein is fed
    with pytest.raises(ValueError):
        vs.score('nnscore')
    # bad sf name
    with pytest.raises(ValueError):
        vs.score('bad_sf', protein=protein)
    vs.score('nnscore', protein=xiap_protein)
    vs.score('nnscore_pdbbind2016', protein=protein)
    vs.score('rfscore_v1', protein=protein)
    vs.score('rfscore_v1_pdbbind2016', protein=protein)
    vs.score('rfscore_v2', protein=protein)
    vs.score('rfscore_v3', protein=protein)
    vs.score('pleclinear', protein=protein)
    vs.score('pleclinear_p5_l1_s65536_pdbbind2016', protein=protein)
    # use pickle directly
    vs.score(filenames[0], protein=protein)
    # pass SF object directly
    vs.score(scorer.load(filenames[0]), protein=protein)
    # pass wrong object (sum is not an instance of scorer)
    with pytest.raises(ValueError):
        vs.score(sum, protein=protein)

    mols = list(vs.fetch())

    assert len(mols) == 100
    mol_data = mols[0].data
    assert 'nnscore' in mol_data
    assert 'rfscore_v1' in mol_data
    assert 'rfscore_v2' in mol_data
    assert 'rfscore_v3' in mol_data
    assert 'PLEClinear_p5_l1_s65536' in mol_data

    vs = virtualscreening(n_cpu=-1, chunksize=10)
    vs.load_ligands('sdf', xiap_actives_docked)
    vs.score('nnscore', protein=protein)
    vs.score('rfscore_v1', protein=protein)
    vs.score('rfscore_v2', protein=protein)
    vs.score('rfscore_v3', protein=protein)
    with NamedTemporaryFile('w', suffix='.sdf') as molfile:
        with NamedTemporaryFile('w', suffix='.csv') as csvfile:
            vs.write('sdf', molfile.name, csv_filename=csvfile.name)
            data = pd.read_csv(csvfile.name)
            assert 'nnscore' in data.columns
            assert 'rfscore_v1' in data.columns
            assert 'rfscore_v2' in data.columns
            assert 'rfscore_v3' in data.columns

            mols = list(oddt.toolkit.readfile('sdf', molfile.name))
            assert len(mols) == 100

            vs.write_csv(csvfile.name, fields=['nnscore', 'rfscore_v1',
                                               'rfscore_v2', 'rfscore_v3'])
            data = pd.read_csv(csvfile.name)
            assert len(data.columns) == 4
            assert 'nnscore' in data.columns
            assert 'rfscore_v1' in data.columns
            assert 'rfscore_v2' in data.columns
            assert 'rfscore_v3' in data.columns

    # remove files
    for f in filenames:
        os.unlink(f)

    # remove symlinks
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if os.path.islink(version_dir):
            os.unlink(version_dir)