コード例 #1
0
ファイル: RFScore.py プロジェクト: BioinformaticsArchive/oddt
 def load(self, filename = ''):
     if not filename:
         for f in ['RFScore_v%i.pickle' % self.version, dirname(__file__) + '/RFScore_v%i.pickle' % self.version]:
             if isfile(f):
                 filename = f
                 break
     # if still no pickle found - train function from pregenerated descriptors
     if not filename:
         print "No pickle, training new scoring function."
         rf = rfscore()
         filename = rf.train()
     return scorer.load(filename)
コード例 #2
0
 def load(self, filename=None, pdbbind_version=2016):
     if filename is None:
         fname = 'NNScore_pdbbind%i.pickle' % (pdbbind_version)
         for f in [fname, path_join(dirname(__file__), fname)]:
             if isfile(f):
                 filename = f
                 break
         else:
             print('No pickle, training new scoring function.', file=sys.stderr)
             nn = nnscore()
             filename = nn.train(pdbbind_version=pdbbind_version)
     return scorer.load(filename)
コード例 #3
0
ファイル: NNScore.py プロジェクト: mwojcikowski/oddt
 def load(self, filename=None, pdbbind_version=2016):
     if filename is None:
         fname = 'NNScore_pdbbind%i.pickle' % (pdbbind_version)
         for f in [fname, path_join(dirname(__file__), fname)]:
             if isfile(f):
                 filename = f
                 break
         else:
             print('No pickle, training new scoring function.', file=sys.stderr)
             nn = nnscore()
             filename = nn.train(pdbbind_version=pdbbind_version)
     return scorer.load(filename)
コード例 #4
0
ファイル: NNScore.py プロジェクト: arnabchakrabarty/oddt
 def load(self, filename = ''):
     if not filename:
         for f in ['NNScore.pickle', dirname(__file__) + '/NNScore.pickle']:
             if isfile(f):
                 filename = f
                 break
     # if still no pickle found - train function from pregenerated descriptors
     if not filename:
         print "No pickle, training new scoring function."
         nn = nnscore()
         filename = nn.train()
     return scorer.load(filename)
コード例 #5
0
 def load(self, filename='', version=1, pdbbind_version=2016):
     if not filename:
         for f in ['RFScore_v%i_pdbbind%i.pickle' % (version, pdbbind_version),
                   dirname(__file__) + '/RFScore_v%i_pdbbind%i.pickle' % (version, pdbbind_version)]:
             if isfile(f):
                 filename = f
                 break
         else:
             print("No pickle, training new scoring function.", file=sys.stderr)
             rf = rfscore(version=version)
             filename = rf.train(sf_pickle=filename, pdbbind_version=pdbbind_version)
     return scorer.load(filename)
コード例 #6
0
ファイル: NNScore.py プロジェクト: hainm/oddt
 def load(self, filename = ''):
     if not filename:
         for f in ['NNScore.pickle', dirname(__file__) + '/NNScore.pickle']:
             if isfile(f):
                 filename = f
                 break
     # if still no pickle found - train function from pregenerated descriptors
     if not filename:
         print "No pickle, training new scoring function."
         nn = nnscore()
         filename = nn.train()
     return scorer.load(filename)
コード例 #7
0
ファイル: RFScore.py プロジェクト: mwojcikowski/oddt
 def load(self, filename=None, version=1, pdbbind_version=2016):
     if filename is None:
         fname = 'RFScore_v%i_pdbbind%i.pickle' % (version, pdbbind_version)
         for f in [fname, path_join(dirname(__file__), fname)]:
             if isfile(f):
                 filename = f
                 break
         else:
             print('No pickle, training new scoring function.',
                   file=sys.stderr)
             rf = rfscore(version=version)
             filename = rf.train(sf_pickle=filename,
                                 pdbbind_version=pdbbind_version)
     return scorer.load(filename)
コード例 #8
0
 def load(self, filename=None, version=1, pdbbind_version=2016):
     if filename is None:
         fname = 'RFScore_v%i_pdbbind%i.pickle' % (version, pdbbind_version)
         for f in [fname, path_join(dirname(__file__), fname)]:
             if isfile(f):
                 filename = f
                 break
         else:
             print('No pickle, training new scoring function.',
                   file=sys.stderr)
             rf = rfscore(version=version)
             filename = rf.train(sf_pickle=filename,
                                 pdbbind_version=pdbbind_version)
     return scorer.load(filename)
コード例 #9
0
 def load(self, filename='', version=1):
     if not filename:
         for f in [
                 'RFScore_v%i.pickle' % version,
                 dirname(__file__) + '/RFScore_v%i.pickle' % version
         ]:
             if isfile(f):
                 filename = f
                 break
     # if still no pickle found - train function from pregenerated descriptors
     if not filename:
         print "No pickle, training new scoring function."
         rf = rfscore(version=version)
         filename = rf.train(sf_pickle=filename)
     return scorer.load(filename)
コード例 #10
0
 def load(self, filename='', pdbbind_version=2016):
     if not filename:
         for f in [
                 'NNScore_pdbbind%i.pickle' % (pdbbind_version),
                 dirname(__file__) + '/NNScore_pdbbind%i.pickle' %
             (pdbbind_version)
         ]:
             if isfile(f):
                 filename = f
                 break
         else:
             print("No pickle, training new scoring function.",
                   file=sys.stderr)
             nn = nnscore()
             filename = nn.train(pdbbind_version=pdbbind_version)
     return scorer.load(filename)
コード例 #11
0
 def load(self, filename=None, version='linear', pdbbind_version=2016,
          depth_protein=5, depth_ligand=1, size=65536):
     if filename is None:
         # FIXME: it would be cool to have templates of names for a class
         fname = ('PLEC%s_p%i_l%i_pdbbind%i_s%i.pickle' %
                  (version, depth_protein, depth_ligand,
                   pdbbind_version, size))
         for f in [fname, path_join(dirname(__file__), fname)]:
             if isfile(f):
                 filename = f
                 break
         else:
             print('No pickle, training new scoring function.',
                   file=sys.stderr)
             sf = PLECscore(version=version)
             filename = sf.train(sf_pickle=filename,
                                 pdbbind_version=pdbbind_version)
     return scorer.load(filename)
コード例 #12
0
ファイル: PLECscore.py プロジェクト: mwojcikowski/oddt
 def load(self, filename=None, version='linear', pdbbind_version=2016,
          depth_protein=5, depth_ligand=1, size=65536):
     if filename is None:
         # FIXME: it would be cool to have templates of names for a class
         fname = ('PLEC%s_p%i_l%i_pdbbind%i_s%i.pickle' %
                  (version, depth_protein, depth_ligand,
                   pdbbind_version, size))
         for f in [fname, path_join(dirname(__file__), fname)]:
             if isfile(f):
                 filename = f
                 break
         else:
             print('No pickle, training new scoring function.',
                   file=sys.stderr)
             sf = PLECscore(version=version)
             filename = sf.train(sf_pickle=filename,
                                 pdbbind_version=pdbbind_version)
     return scorer.load(filename)
コード例 #13
0
ファイル: test_virtualscreening.py プロジェクト: ravila4/oddt
def test_vs_scoring():
    protein = next(oddt.toolkit.readfile('pdb', xiap_protein))
    protein.protein = True

    data_dir = os.path.join(test_data_dir, 'data')
    home_dir = mkdtemp()
    pdbbind_versions = (2007, 2013, 2016)

    pdbbind_dir = os.path.join(data_dir, 'pdbbind')
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if not os.path.isdir(version_dir):
            os.symlink(pdbbind_dir, version_dir)

    filenames = []
    # train mocked SFs
    for model in [nnscore(n_jobs=1)
                  ] + [rfscore(version=v, n_jobs=1) for v in [1, 2, 3]]:
        model.gen_training_data(data_dir,
                                pdbbind_versions=pdbbind_versions,
                                home_dir=home_dir)
        filenames.append(model.train(home_dir=home_dir))
    vs = virtualscreening(n_cpu=-1, chunksize=10)
    vs.load_ligands('sdf', xiap_actives_docked)
    # error if no protein is fed
    with pytest.raises(ValueError):
        vs.score('nnscore')
    # bad sf name
    with pytest.raises(ValueError):
        vs.score('bad_sf', protein=protein)
    vs.score('nnscore', protein=xiap_protein)
    vs.score('nnscore_pdbbind2016', protein=protein)
    vs.score('rfscore_v1', protein=protein)
    vs.score('rfscore_v1_pdbbind2016', protein=protein)
    vs.score('rfscore_v2', protein=protein)
    vs.score('rfscore_v3', protein=protein)
    # use pickle directly
    vs.score(filenames[0], protein=protein)
    # pass SF object directly
    vs.score(scorer.load(filenames[0]), protein=protein)
    # pass wrong object (sum is not an instance of scorer)
    with pytest.raises(ValueError):
        vs.score(sum, protein=protein)

    mols = list(vs.fetch())

    assert len(mols) == 100
    mol_data = mols[0].data
    assert 'nnscore' in mol_data
    assert 'rfscore_v1' in mol_data
    assert 'rfscore_v2' in mol_data
    assert 'rfscore_v3' in mol_data

    vs = virtualscreening(n_cpu=-1, chunksize=10)
    vs.load_ligands('sdf', xiap_actives_docked)
    vs.score('nnscore', protein=protein)
    vs.score('rfscore_v1', protein=protein)
    vs.score('rfscore_v2', protein=protein)
    vs.score('rfscore_v3', protein=protein)
    with NamedTemporaryFile('w', suffix='.sdf') as molfile:
        with NamedTemporaryFile('w', suffix='.csv') as csvfile:
            vs.write('sdf', molfile.name, csv_filename=csvfile.name)
            data = pd.read_csv(csvfile.name)
            assert 'nnscore' in data.columns
            assert 'rfscore_v1' in data.columns
            assert 'rfscore_v2' in data.columns
            assert 'rfscore_v3' in data.columns

            mols = list(oddt.toolkit.readfile('sdf', molfile.name))
            assert len(mols) == 100

            vs.write_csv(
                csvfile.name,
                fields=['nnscore', 'rfscore_v1', 'rfscore_v2', 'rfscore_v3'])
            data = pd.read_csv(csvfile.name)
            assert len(data.columns) == 4
            assert 'nnscore' in data.columns
            assert 'rfscore_v1' in data.columns
            assert 'rfscore_v2' in data.columns
            assert 'rfscore_v3' in data.columns

    # remove files
    for f in filenames:
        os.unlink(f)

    # remove symlinks
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if os.path.islink(version_dir):
            os.unlink(version_dir)
コード例 #14
0
ファイル: virtualscreening.py プロジェクト: oddt/oddt
    def score(self, function, protein=None, *args, **kwargs):
        """Scoring procedure compatible with any scoring function implemented
        in ODDT and other pickled SFs which are subclasses of
        `oddt.scoring.scorer`.

        Parameters
        ----------
        function: string
            Which scoring function to use.

        protein: oddt.toolkit.Molecule
            Default protein to use as reference

        Notes
        -----
        Additional parameters are passed directly to the scoring function.
        """
        if isinstance(protein, six.string_types):
            extension = protein.split('.')[-1]
            protein = next(oddt.toolkit.readfile(extension, protein))
            protein.protein = True
        elif protein is None:
            raise ValueError('Protein needs to be set for structure based '
                             'scoring')
        # trigger cache
        protein.atom_dict

        if isinstance(function, six.string_types):
            if isfile(function):
                sf = scorer.load(function)
                sf.set_protein(protein)
            elif function.lower().startswith('rfscore'):
                from oddt.scoring.functions.RFScore import rfscore
                new_kwargs = {}
                for bit in function.lower().split('_'):
                    if bit.startswith('pdbbind'):
                        new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', ''))
                    elif bit.startswith('v'):
                        new_kwargs['version'] = int(bit.replace('v', ''))
                sf = rfscore.load(**new_kwargs)
                sf.set_protein(protein)
            elif function.lower().startswith('nnscore'):
                from oddt.scoring.functions.NNScore import nnscore
                new_kwargs = {}
                for bit in function.lower().split('_'):
                    if bit.startswith('pdbbind'):
                        new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', ''))
                sf = nnscore.load(**new_kwargs)
                sf.set_protein(protein)
            elif function.lower().startswith('plec'):
                from oddt.scoring.functions.PLECscore import PLECscore
                new_kwargs = {}
                for bit in function.lower().split('_'):
                    if bit.startswith('pdbbind'):
                        new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', ''))
                    elif bit.startswith('plec'):
                        new_kwargs['version'] = bit.replace('plec', '')
                    elif bit.startswith('p'):
                        new_kwargs['depth_protein'] = int(bit.replace('p', ''))
                    elif bit.startswith('l'):
                        new_kwargs['depth_ligand'] = int(bit.replace('l', ''))
                    elif bit.startswith('s'):
                        new_kwargs['size'] = int(bit.replace('s', ''))
                sf = PLECscore.load(**new_kwargs)
                sf.set_protein(protein)
            elif function.lower() == 'autodock_vina':
                from oddt.docking import autodock_vina
                sf = autodock_vina(protein, *args, **kwargs)
                sf.set_protein(protein)
            else:
                raise ValueError('Scoring Function %s was not implemented in '
                                 'ODDT' % function)
        else:
            if isinstance(function, scorer):
                sf = function
                sf.set_protein(protein)
            else:
                raise ValueError('Supplied object "%s" is not an ODDT scoring '
                                 'funtion' % function.__name__)
        self._pipe.append(partial(method_caller, sf, 'predict_ligands'))
コード例 #15
0
ファイル: virtualscreening.py プロジェクト: kinga322/oddt
    def score(self, function, protein=None, *args, **kwargs):
        """Scoring procedure.

        Parameters
        ----------
            function: string
                Which scoring function to use.

            protein: oddt.toolkit.Molecule
                Default protein to use as reference

        Note
        ----
            Additional parameters are passed directly to the scoring function.
        """
        if type(protein) is str:
            extension = protein.split('.')[-1]
            protein = six.next(toolkit.readfile(extension, protein))
            protein.protein = True
        # trigger cache
        protein.atom_dict

        if type(function) is str:
            if function.lower().startswith('rfscore'):
                from oddt.scoring.functions.RFScore import rfscore
                new_kwargs = {}
                for bit in function.lower().split('_'):
                    if bit.startswith('pdbbind'):
                        new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', ''))
                    elif bit.startswith('v'):
                        new_kwargs['version'] = int(bit.replace('v', ''))
                sf = rfscore.load(**new_kwargs)
                sf.set_protein(protein)
            elif function.lower().startswith('nnscore'):
                from oddt.scoring.functions.NNScore import nnscore
                new_kwargs = {}
                for bit in function.lower().split('_'):
                    if bit.startswith('pdbbind'):
                        new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', ''))
                sf = nnscore.load(**new_kwargs)
                sf.set_protein(protein)
            elif function.lower() == 'autodock_vina':
                from oddt.docking import autodock_vina
                sf = autodock_vina(protein, *args, **kwargs)
                sf.set_protein(protein)
            elif isfile(function):
                sf = scorer.load(function)
                sf.set_protein(protein)
            else:
                raise ValueError('Scoring Function %s was not implemented in ODDT' % function)
        else:
            if isinstance(function, scorer):
                sf = function
                sf.set_protein(protein)
            else:
                raise ValueError('Supplied object "%s" is not an ODDT scoring funtion' % function.__name__)
        if self.n_cpu != 1:
            _parallel_helper_partial = partial(_parallel_helper, sf, 'predict_ligand')
            self._pipe = (Pool(self.n_cpu if self.n_cpu > 0 else None)
                          .imap(_parallel_helper_partial, ({'ligand': lig}
                                                           for lig in self._pipe),
                                chunksize=100))
        else:
            self._pipe = sf.predict_ligands(self._pipe)
コード例 #16
0
ファイル: virtualscreening.py プロジェクト: ravila4/oddt
    def score(self, function, protein=None, *args, **kwargs):
        """Scoring procedure compatible with any scoring function implemented
        in ODDT and other pickled SFs which are subclasses of
        `oddt.scoring.scorer`.

        Parameters
        ----------
            function: string
                Which scoring function to use.

            protein: oddt.toolkit.Molecule
                Default protein to use as reference

        Note
        ----
            Additional parameters are passed directly to the scoring function.
        """
        if isinstance(protein, six.string_types):
            extension = protein.split('.')[-1]
            protein = next(oddt.toolkit.readfile(extension, protein))
            protein.protein = True
        elif protein is None:
            raise ValueError('Protein needs to be set for structure based '
                             'scoring')
        # trigger cache
        protein.atom_dict

        if isinstance(function, six.string_types):
            if isfile(function):
                sf = scorer.load(function)
                sf.set_protein(protein)
            elif function.lower().startswith('rfscore'):
                from oddt.scoring.functions.RFScore import rfscore
                new_kwargs = {}
                for bit in function.lower().split('_'):
                    if bit.startswith('pdbbind'):
                        new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', ''))
                    elif bit.startswith('v'):
                        new_kwargs['version'] = int(bit.replace('v', ''))
                sf = rfscore.load(**new_kwargs)
                sf.set_protein(protein)
            elif function.lower().startswith('nnscore'):
                from oddt.scoring.functions.NNScore import nnscore
                new_kwargs = {}
                for bit in function.lower().split('_'):
                    if bit.startswith('pdbbind'):
                        new_kwargs['pdbbind_version'] = int(bit.replace('pdbbind', ''))
                sf = nnscore.load(**new_kwargs)
                sf.set_protein(protein)
            elif function.lower() == 'autodock_vina':
                from oddt.docking import autodock_vina
                sf = autodock_vina(protein, *args, **kwargs)
                sf.set_protein(protein)
            else:
                raise ValueError('Scoring Function %s was not implemented in '
                                 'ODDT' % function)
        else:
            if isinstance(function, scorer):
                sf = function
                sf.set_protein(protein)
            else:
                raise ValueError('Supplied object "%s" is not an ODDT scoring '
                                 'funtion' % function.__name__)
        self._pipe.append(partial(method_caller, sf, 'predict_ligands'))
コード例 #17
0
def test_vs_scoring():
    protein = next(oddt.toolkit.readfile('pdb', xiap_protein))
    protein.protein = True

    data_dir = os.path.join(test_data_dir, 'data')
    home_dir = mkdtemp()
    pdbbind_versions = (2007, 2013, 2016)

    pdbbind_dir = os.path.join(data_dir, 'pdbbind')
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if not os.path.isdir(version_dir):
            os.symlink(pdbbind_dir, version_dir)

    filenames = []
    # train mocked SFs
    for model in [nnscore(n_jobs=1)] + [rfscore(version=v, n_jobs=1)
                                        for v in [1, 2, 3]]:
            model.gen_training_data(data_dir, pdbbind_versions=pdbbind_versions,
                                    home_dir=home_dir)
            filenames.append(model.train(home_dir=home_dir))
    vs = virtualscreening(n_cpu=-1, chunksize=10)
    vs.load_ligands('sdf', xiap_actives_docked)
    # error if no protein is fed
    with pytest.raises(ValueError):
        vs.score('nnscore')
    # bad sf name
    with pytest.raises(ValueError):
        vs.score('bad_sf', protein=protein)
    vs.score('nnscore', protein=xiap_protein)
    vs.score('nnscore_pdbbind2016', protein=protein)
    vs.score('rfscore_v1', protein=protein)
    vs.score('rfscore_v1_pdbbind2016', protein=protein)
    vs.score('rfscore_v2', protein=protein)
    vs.score('rfscore_v3', protein=protein)
    vs.score('pleclinear', protein=protein)
    vs.score('pleclinear_p5_l1_s65536_pdbbind2016', protein=protein)
    # use pickle directly
    vs.score(filenames[0], protein=protein)
    # pass SF object directly
    vs.score(scorer.load(filenames[0]), protein=protein)
    # pass wrong object (sum is not an instance of scorer)
    with pytest.raises(ValueError):
        vs.score(sum, protein=protein)

    mols = list(vs.fetch())

    assert len(mols) == 100
    mol_data = mols[0].data
    assert 'nnscore' in mol_data
    assert 'rfscore_v1' in mol_data
    assert 'rfscore_v2' in mol_data
    assert 'rfscore_v3' in mol_data
    assert 'PLEClinear_p5_l1_s65536' in mol_data

    vs = virtualscreening(n_cpu=-1, chunksize=10)
    vs.load_ligands('sdf', xiap_actives_docked)
    vs.score('nnscore', protein=protein)
    vs.score('rfscore_v1', protein=protein)
    vs.score('rfscore_v2', protein=protein)
    vs.score('rfscore_v3', protein=protein)
    with NamedTemporaryFile('w', suffix='.sdf') as molfile:
        with NamedTemporaryFile('w', suffix='.csv') as csvfile:
            vs.write('sdf', molfile.name, csv_filename=csvfile.name)
            data = pd.read_csv(csvfile.name)
            assert 'nnscore' in data.columns
            assert 'rfscore_v1' in data.columns
            assert 'rfscore_v2' in data.columns
            assert 'rfscore_v3' in data.columns

            mols = list(oddt.toolkit.readfile('sdf', molfile.name))
            assert len(mols) == 100

            vs.write_csv(csvfile.name, fields=['nnscore', 'rfscore_v1',
                                               'rfscore_v2', 'rfscore_v3'])
            data = pd.read_csv(csvfile.name)
            assert len(data.columns) == 4
            assert 'nnscore' in data.columns
            assert 'rfscore_v1' in data.columns
            assert 'rfscore_v2' in data.columns
            assert 'rfscore_v3' in data.columns

    # remove files
    for f in filenames:
        os.unlink(f)

    # remove symlinks
    for pdbbind_v in pdbbind_versions:
        version_dir = os.path.join(data_dir, 'v%s' % pdbbind_v)
        if os.path.islink(version_dir):
            os.unlink(version_dir)