Example #1
0
 def test_reorderAllCoordinatesByChainLen(self):
     input = StringIO.StringIO(chain_padding_proto_3)
     structure = prody.parsePDBStream(input)
     group_lens = {1: ['A'], 2: ['B', 'D'], 3: ['C', 'E']}
     result =  ChainMappingRMSDMatrixCalculator.getReorderedCoordinatesByLenGroups(structure, "all", group_lens)
     expected =[[[1.0, 2.0, 3.0],
                 [4.0, 5.0, 6.0],
                 [7.0, 8.0, 9.0],
                 [19.0, 20.0, 21.0],
                 [22.0, 23.0, 24.0],
                 [10.0, 11.0, 12.0],
                 [13.0, 14.0, 15.0],
                 [16.0, 17.0, 18.0],
                 [25.0, 26.0, 27.0],
                 [28.0, 29.0, 30.0],
                 [31.0, 32.0, 33.0]],
                [[41.0, 42.0, 43.0],
                 [44.0, 45.0, 46.0],
                 [47.0, 48.0, 49.0],
                 [59.0, 60.0, 61.0],
                 [62.0, 63.0, 64.0],
                 [50.0, 51.0, 52.0],
                 [53.0, 54.0, 55.0],
                 [56.0, 57.0, 58.0],
                 [65.0, 66.0, 67.0],
                 [68.0, 69.0, 70.0],
                 [71.0, 72.0, 73.0]]]
     numpy.testing.assert_array_equal(expected, result)
Example #2
0
File: nma.py Project: BILAB/psico
def normalmodes_prody(selection, cutoff=15, first=7, last=10, guide=1,
        prefix='prody', states=7, factor=-1, quiet=1):
    '''
DESCRIPTION

    Anisotropic Network Model (ANM) analysis with ProDy.

    Based on:
    http://www.csb.pitt.edu/prody/examples/dynamics/enm/anm.html
    '''
    try:
        import prody
    except ImportError:
        print('Failed to import prody, please add to PYTHONPATH')
        raise CmdException

    first, last, guide = int(first), int(last), int(guide)
    states, factor, quiet = int(states), float(factor), int(quiet)
    assert first > 6

    if guide:
        selection = '(%s) and guide and alt A+' % (selection)
    tmpsele = cmd.get_unused_name('_')
    cmd.select(tmpsele, selection)

    f = StringIO(cmd.get_pdbstr(tmpsele))
    conf = prody.parsePDBStream(f)

    modes = prody.ANM()
    modes.buildHessian(conf, float(cutoff))
    modes.calcModes(last - first + 1)

    if factor < 0:
        from math import log
        natoms = modes.numAtoms()
        factor = log(natoms) * 10
        if not quiet:
            print(' set factor to %.2f' % (factor))

    for mode in range(first, last + 1):
        name = prefix + '%d' % mode
        cmd.delete(name)

        if not quiet:
            print(' normalmodes: object "%s" for mode %d' % (name, mode))

        for state in range(1, states+1):
            xyz_it = iter(modes[mode-7].getArrayNx3() * (factor *
                    ((state-1.0)/(states-1.0) - 0.5)))
            cmd.create(name, tmpsele, 1, state, zoom=0)
            cmd.alter_state(state, name, '(x,y,z) = next(xyz_it) + (x,y,z)',
                    space={'xyz_it': xyz_it, 'next': next})

    cmd.delete(tmpsele)

    if guide:
        cmd.set('ribbon_trace_atoms', 1, prefix + '*')
        cmd.show_as('ribbon', prefix + '*')
    else:
        cmd.show_as('lines', prefix + '*')
Example #3
0
def normalmodes_prody(selection, cutoff=15, first=7, last=10, guide=1,
        prefix='prody', states=7, factor=-1, quiet=1):
    '''
DESCRIPTION

    Anisotropic Network Model (ANM) analysis with ProDy.

    Based on:
    http://www.csb.pitt.edu/prody/examples/dynamics/enm/anm.html
    '''
    try:
        import prody
    except ImportError:
        print('Failed to import prody, please add to PYTHONPATH')
        raise CmdException

    first, last, guide = int(first), int(last), int(guide)
    states, factor, quiet = int(states), float(factor), int(quiet)
    assert first > 6

    if guide:
        selection = '(%s) and guide and alt A+' % (selection)
    tmpsele = cmd.get_unused_name('_')
    cmd.select(tmpsele, selection)

    f = StringIO(cmd.get_pdbstr(tmpsele))
    conf = prody.parsePDBStream(f)

    modes = prody.ANM()
    modes.buildHessian(conf, float(cutoff))
    modes.calcModes(last - first + 1)

    if factor < 0:
        from math import log
        natoms = modes.numAtoms()
        factor = log(natoms) * 10
        if not quiet:
            print(' set factor to %.2f' % (factor))

    for mode in range(first, last + 1):
        name = prefix + '%d' % mode
        cmd.delete(name)

        if not quiet:
            print(' normalmodes: object "%s" for mode %d' % (name, mode))

        for state in range(1, states+1):
            xyz_it = iter(modes[mode-7].getArrayNx3() * (factor *
                    ((state-1.0)/(states-1.0) - 0.5)))
            cmd.create(name, tmpsele, 1, state, zoom=0)
            cmd.alter_state(state, name, '(x,y,z) = xyz_it.next() + (x,y,z)',
                    space=locals())

    cmd.delete(tmpsele)

    if guide:
        cmd.set('ribbon_trace_atoms', 1, prefix + '*')
        cmd.show_as('ribbon', prefix + '*')
    else:
        cmd.show_as('lines', prefix + '*')
Example #4
0
 def test_removeAllCoordsetsFromStructureLeavingFirst(self):
     input = StringIO.StringIO(switched_pdb_data)
     pdb_structure = prody.parsePDBStream(input)
     removeAllCoordsetsFromStructureLeavingFirst(pdb_structure)
     expected = [[[1.0, 2.0, 3.0], [-33.115, 1.294, -1.163]]]
     numpy.testing.assert_array_equal(expected,
                                      pdb_structure.getCoordsets())
Example #5
0
    def test_getStructureChains(self):
        expected = [{
            'A': numpy.array([[1., 2., 3.]]),
            'X': numpy.array([[-33.115, 1.294, -1.163]])
        }, {
            'A': numpy.array([[4., 5., 6.]]),
            'X': numpy.array([[-32.555, -2.5, -5.367]])
        }, {
            'A': numpy.array([[7., 8., 9.]]),
            'X': numpy.array([[-33.257, 5.28, -8.441]])
        }, {
            'A': numpy.array([[10., 11., 12.]]),
            'X': numpy.array([[32.306, 6.517, -1.544]])
        }, {
            'A': numpy.array([[13., 14., 15.]]),
            'X': numpy.array([[30.494, 10.39, -3.066]])
        }]

        input = StringIO.StringIO(pdb_data)
        pdb_structure = prody.parsePDBStream(input)
        result = ChainMappingRMSDMatrixCalculator.getStructureChains(
            pdb_structure, "all")
        for i in range(len(expected)):
            numpy.testing.assert_array_equal(expected[i]['A'], result[i]['A'])
            numpy.testing.assert_array_equal(expected[i]['X'], result[i]['X'])
Example #6
0
    def add_mol(self, mol, keep_chains=False, keep_resi=False):
        """
        This behaves bad when molecules have same chain names
        """
        ag1 = self.ag.copy()
        ag2 = mol.ag.copy()
        if ag1.numCoordsets() != ag2.numCoordsets():
            raise RuntimeError('Atom groups have different numbers of coordinate sets')

        nsets = ag1.numCoordsets()

        chains1 = list(set(ag1.getChids()))
        chains2 = list(set(ag2.getChids()))
        all_chains = chains1 + chains2
        if len(set(all_chains)) != len(all_chains) and keep_chains:
            logger.warning('Two atom groups contain same chain IDs, merging can go wrong')
            if keep_resi and len(set(ag1.getResnums()).intersection(set(ag2.getResnums()))) > 0:
                raise RuntimeError('Refusing to merge atom groups which contain same chain IDs AND residue IDs')

        if not keep_chains:
            if len(chains1) + len(chains2) > len(self._chain_order):
                raise RuntimeError('Total number of chains is too large, out of chain ID letters')

            iter_chains = iter(self._chain_order)
            map1 = {x: next(iter_chains) for x in chains1}
            map2 = {x: next(iter_chains) for x in chains2}

            for old, new in map1.items():
                ag1.select('chain ' + old).setChids(new)
            for old, new in map2.items():
                ag2.select('chain ' + old).setChids(new)

        if not keep_resi:
            resi = 1
            for r in ag1.getHierView().iterResidues():
                r.setResnum(resi)
                resi += 1
            for r in ag2.getHierView().iterResidues():
                r.setResnum(resi)
                resi += 1

        buf = StringIO()
        for i in range(nsets):
            if nsets > 1:
                buf.write('MODEL \n' + str(i + 1))
            prody.writePDBStream(buf, ag1, csets=i)
            prody.writePDBStream(buf, ag2, csets=i)
            if nsets > 1:
                buf.write('ENDMDL\n')
            else:
                buf.write('END\n')

        buf.seek(0)
        joint = BasePDB(ag=prody.parsePDBStream(buf))
        joint.renumber(keep_resi=True, keep_chains=True)
        buf.close()
        return joint
Example #7
0
 def test_get_dihedrals(self):
     input = StringIO.StringIO(pdb1)
     pdb_structure = prody.parsePDBStream(input)
     dihedrals = DihedralRMSDMatrixCalculator.calculateDihedralsForCoordset(
         pdb_structure,
         pdb_structure.getCoordsets()[0])
     # We have to get rid off the unknown values!
     numpy.testing.assert_array_almost_equal(
         numpy.array(expected_dihedrals[1:-1]),
         numpy.array(dihedrals[2:-2]), 2)
Example #8
0
 def test_getChainLengths(self):
     input = StringIO.StringIO(chain_padding_proto_3)
     structure = prody.parsePDBStream(input)
     self.assertDictEqual({
         'A': 1,
         'C': 3,
         'B': 2,
         'E': 3,
         'D': 2
     }, ChainMappingRMSDMatrixCalculator.getChainLengths(structure, "all"))
Example #9
0
    def _from_str(string):
        """
        Load a ProDy AtomGroup form a string representation.

        Parameters:
            string - a string containing the contents of a ProDy AtomGroup

        Returns:
            A ProDy AtomGroup
        """

        ss = io_tools.StringStream(string)
        return prody.parsePDBStream(ss)
Example #10
0
    def test_getStructureChains(self):
        expected = [{'A': numpy.array([[ 1.,  2.,  3.]]), 'X': numpy.array([[-33.115,   1.294,  -1.163]])},
                    {'A': numpy.array([[ 4.,  5.,  6.]]), 'X': numpy.array([[-32.555,  -2.5  ,  -5.367]])},
                    {'A': numpy.array([[ 7.,  8.,  9.]]), 'X': numpy.array([[-33.257,   5.28 ,  -8.441]])},
                    {'A': numpy.array([[ 10.,  11.,  12.]]), 'X': numpy.array([[ 32.306,   6.517,  -1.544]])},
                    {'A': numpy.array([[ 13.,  14.,  15.]]), 'X': numpy.array([[ 30.494,  10.39 ,  -3.066]])}]

        input = StringIO.StringIO(pdb_data)
        pdb_structure = prody.parsePDBStream(input)
        result =  ChainMappingRMSDMatrixCalculator.getStructureChains(pdb_structure,"all")
        for i in range(len(expected)):
            numpy.testing.assert_array_equal(expected[i]['A'], result[i]['A'])
            numpy.testing.assert_array_equal(expected[i]['X'], result[i]['X'])
Example #11
0
    def add_hydrogens(self, trim=True, csets=None):
        raise NotImplementedError()

        output = []
        natoms = -1
        csets = self._make_csets(csets)

        for i in csets:
            if trim:
                p_start = Popen([define.REDUCE_EXE, '-Quiet', '-Trim', '-'], stdin=PIPE, stdout=PIPE, stderr=STDOUT)
                p_finish = Popen([define.REDUCE_EXE, '-Quiet', '-FLIP', '-'], stdin=p_start.stdout, stdout=PIPE,
                                 stderr=STDOUT)
            else:
                p_start = Popen([define.REDUCE_EXE, '-Quiet', '-FLIP', '-'], stdin=PIPE, stdout=PIPE, stderr=STDOUT)
                p_finish = p_start

            prody.writePDBStream(p_start.stdin, self.ag, csets=i)
            p_start.stdin.close()

            output += ['MODEL%9i\n' % (i + 1)]
            reduced = []
            while p_finish.poll() is None:
                reduced = p_finish.stdout.readlines()

            p_start.wait()
            p_finish.wait()
            print(reduced)

            natoms_cur = len(list(filter(lambda x: x.startswith('ATOM') or x.startswith('HETATM'), reduced)))
            if i == csets[0]:
                natoms = natoms_cur
            elif natoms != natoms_cur:
                raise RuntimeError('Number of atoms in reduced model %i is different from the first model (%i, %i)' % (
                i, natoms_cur, natoms))

            output += reduced
            output += ['ENDMDL\n']

            status = p_finish.poll()

            if status != 0:
                logger.error('Called process returned ' + str(status))

        self.ag = prody.parsePDBStream(StringIO(''.join(output)))
        self.renumber()
        return self
Example #12
0
    def read_result(self, seq):
        fname = "%s.dok" % seq
        with open(fname, 'r') as f:
            raw = f.readlines()
        res = self.split_result(raw)
        pres = list()
        for r in res:
            e_ = self.get_energy(r)
            r_ = prody.parsePDBStream(StringIO.StringIO(r))
            c_ = r_.getCoords()
            pres.append((c_, e_))

        if self.cleanup:
            os.remove(fname)
            os.remove("%s.mol2" % seq)
            os.remove(self.llist_fname)
            os.remove(self.config.fname)
        return pres
Example #13
0
 def test_reorderAllCoordinatesByChainLen(self):
     input = StringIO.StringIO(chain_padding_proto_3)
     structure = prody.parsePDBStream(input)
     group_lens = {1: ['A'], 2: ['B', 'D'], 3: ['C', 'E']}
     result = ChainMappingRMSDMatrixCalculator.getReorderedCoordinatesByLenGroups(
         structure, "all", group_lens)
     expected = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0],
                  [19.0, 20.0, 21.0], [22.0, 23.0,
                                       24.0], [10.0, 11.0, 12.0],
                  [13.0, 14.0, 15.0], [16.0, 17.0,
                                       18.0], [25.0, 26.0, 27.0],
                  [28.0, 29.0, 30.0], [31.0, 32.0, 33.0]],
                 [[41.0, 42.0, 43.0], [44.0, 45.0,
                                       46.0], [47.0, 48.0, 49.0],
                  [59.0, 60.0, 61.0], [62.0, 63.0,
                                       64.0], [50.0, 51.0, 52.0],
                  [53.0, 54.0, 55.0], [56.0, 57.0,
                                       58.0], [65.0, 66.0, 67.0],
                  [68.0, 69.0, 70.0], [71.0, 72.0, 73.0]]]
     numpy.testing.assert_array_equal(expected, result)
Example #14
0
def identify_best_chain(pdbid, aaseq, chain_str):
    """Determine PDB file and chain that best matches the aa sequence
    """
    files = glob.glob(DOWNLOADDIR + pdbid + "*pdb*")
    chains = chain_str.split(',')

    chain_scores = []
    for file_path in files:
        if file_path.split('.')[-1] == "gz":
            f = gzip.open(file_path, 'rt')
            filter_chains = True
        else:
            f = open(file_path, "r")
            filter_chains = False
        try:
            mol, header = prody.parsePDBStream(f, header=True)
        except ValueError as e:
            print("PDB Error: %s: %s" % (pdbid, e))
            return None
        f.close()

        for chain in mol.iterChains():
            if filter_chains and chain.getChid() not in chains:
                continue
            chain_sequence = chain.getSequence()
            # score = pairwise2.align.globalms(aaseq, chain_sequence, 2, -1, -1, -.5, score_only=True)
            # ms: https://biopython.org/DIST/docs/api/Bio.pairwise2-module.html
            # 2 for identity, -1 for non-identity, -1 for gap, -0.5 for extending gap
            score = pairwise2.align.globalxx(aaseq, chain_sequence, score_only=True)
            # xx: no parameters
            if isinstance(score, list):
                # case [], doesn't align at all
                continue
            chain_scores.append((chain, score, file_path))
    best = sorted(chain_scores, key=lambda x: x[1], reverse=True)[0]
    best_chain = best[0]
    best_file = best[2].lstrip(DOWNLOADDIR)
    best_chain_id = best_chain.getChid()
    best_score = best[1] / float(len(aaseq))
    return best_file, best_chain_id, best_score
Example #15
0
 def _from_str(string):
     ss = io.StringStream(string)
     return prody.parsePDBStream(ss)
Example #16
0
 def test_removeAllCoordsetsFromStructure(self):
     input = StringIO.StringIO(switched_pdb_data)
     pdb_structure = prody.parsePDBStream(input)
     removeAllCoordsetsFromStructure(pdb_structure)
     self.assertEqual(pdb_structure.getCoordsets(), None)
Example #17
0
    def __init__(self,
                 user_defined_dir,
                 current_fragment,
                 fragment_smiles_sanitized,
                 verify_substructure=True
                 ):  # todo: fragment_string really shouldn't be a string...

        # --- Paths --- #
        self.current_fragment = current_fragment
        self.frag_rigid_pdb_name = f'{current_fragment}-rigid.pdb'

        self.user_defined_dir = user_defined_dir
        self.inputs_dir = os.path.join(self.user_defined_dir, 'Inputs')
        self.fragment_inputs_path = os.path.join(self.inputs_dir,
                                                 'Fragment_Inputs')
        self.rosetta_inputs_path = os.path.join(self.inputs_dir,
                                                'Rosetta_Inputs')

        # Select rigid atoms from fragment map for alignments if rigid atoms are defined in the Fragment_Inputs directory
        self.frag_inputs_dir = os.path.join(self.fragment_inputs_path,
                                            'Rigid_Fragment_Atoms')

        # --- Fragment-related variables --- #

        self.fragment_string = open(
            os.path.join(self.fragment_inputs_path,
                         '{}.pdb'.format(current_fragment))).read()
        self.fragment_smiles_sanitized = fragment_smiles_sanitized
        self.fragment_prody = prody.parsePDBStream(
            io.StringIO(self.fragment_string)).select('not hydrogen')
        self.ideal_fragment_mapping = None  # Assigned in _init_map_fragment_smiles_onto_ideal()

        # RDKit Mol representations of fragment
        self.fragment_ideal_rdkit_mol = Chem.MolFromSmiles(
            self.fragment_smiles_sanitized)
        if self.fragment_ideal_rdkit_mol is None:
            print(
                'Unable to parse SMILES from Fragment_Inputs.csv, generating fragment Mol from file!'
            )
            try:
                fragment_pdb_path = os.path.join(self.fragment_inputs_path,
                                                 f'{current_fragment}.pdb')
                self.fragment_ideal_rdkit_mol = Chem.MolFromPDBFile(
                    fragment_pdb_path)
                print(Chem.MolToSmiles(self.fragment_ideal_rdkit_mol))
            except Exception as e:
                print(e)

        self.fragment_pdb_rdkit_mol = Chem.MolFromPDBBlock(
            self.fragment_string, removeHs=False)

        # Assert that mol objects were loaded properly
        assert not any([
            self.fragment_pdb_rdkit_mol is None,
            self.fragment_ideal_rdkit_mol is None
        ])

        # --- Run initialization functions --- #

        self._init_map_fragment_smiles_onto_ideal()

        # --- Fragment Mapping options --- #

        self.verify_substructure = verify_substructure

        if verify_substructure:
            reference_ligand_path = os.path.join(
                self.rosetta_inputs_path,
                f'{os.path.basename(user_defined_dir)[:3]}_0001.pdb')
            self.reference_ligand_rdkit_mol = Chem.MolFromPDBFile(
                reference_ligand_path, removeHs=True)

            # Map ref_fragment_index > ref_fragment name > ref_ligand name > ref_ligand degree
            ref_ligand_atomname_degree_map = {
                atom.GetMonomerInfo().GetName(): atom.GetDegree()
                for atom in self.reference_ligand_rdkit_mol.GetAtoms()
            }
            self.pdbfrag_refdegree_map = dict()

            for ideal_idx, pdb_idx in self.ideal_fragment_mapping.items():

                fragment_pdb_atom = self.fragment_pdb_rdkit_mol.GetAtomWithIdx(
                    pdb_idx)
                fragment_pdb_atom_name = fragment_pdb_atom.GetMonomerInfo(
                ).GetName()
                self.pdbfrag_refdegree_map[
                    pdb_idx] = ref_ligand_atomname_degree_map[
                        fragment_pdb_atom_name]
Example #18
0
 def test_getChainLengths(self):
     input = StringIO.StringIO(chain_padding_proto_3)
     structure = prody.parsePDBStream(input)
     self.assertDictEqual({'A': 1, 'C': 3, 'B': 2, 'E': 3, 'D': 2},
                           ChainMappingRMSDMatrixCalculator.getChainLengths(structure, "all"))
Example #19
0
 def test_get_dihedrals(self):
     input = StringIO.StringIO(pdb1)
     pdb_structure = prody.parsePDBStream(input)
     dihedrals =   DihedralRMSDMatrixCalculator.calculateDihedralsForCoordset(pdb_structure,pdb_structure.getCoordsets()[0])
     # We have to get rid off the unknown values!
     numpy.testing.assert_array_almost_equal(numpy.array(expected_dihedrals[1:-1]), numpy.array(dihedrals[2:-2]), 2)
Example #20
0
 def test_removeAllCoordsetsFromStructureLeavingFirst(self):
     input = StringIO.StringIO(switched_pdb_data)
     pdb_structure = prody.parsePDBStream(input)
     removeAllCoordsetsFromStructureLeavingFirst(pdb_structure)
     expected = [[[1.0, 2.0, 3.0], [-33.115, 1.294, -1.163]]]
     numpy.testing.assert_array_equal(expected, pdb_structure.getCoordsets())
Example #21
0
 def test_removeAllCoordsetsFromStructure(self):
     input = StringIO.StringIO(switched_pdb_data)
     pdb_structure = prody.parsePDBStream(input)
     removeAllCoordsetsFromStructure(pdb_structure)
     self.assertEqual(pdb_structure.getCoordsets(),None)
 def _from_str(string):
     ss = io.StringStream(string)
     return prody.parsePDBStream(ss)