def test_reorderAllCoordinatesByChainLen(self): input = StringIO.StringIO(chain_padding_proto_3) structure = prody.parsePDBStream(input) group_lens = {1: ['A'], 2: ['B', 'D'], 3: ['C', 'E']} result = ChainMappingRMSDMatrixCalculator.getReorderedCoordinatesByLenGroups(structure, "all", group_lens) expected =[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [19.0, 20.0, 21.0], [22.0, 23.0, 24.0], [10.0, 11.0, 12.0], [13.0, 14.0, 15.0], [16.0, 17.0, 18.0], [25.0, 26.0, 27.0], [28.0, 29.0, 30.0], [31.0, 32.0, 33.0]], [[41.0, 42.0, 43.0], [44.0, 45.0, 46.0], [47.0, 48.0, 49.0], [59.0, 60.0, 61.0], [62.0, 63.0, 64.0], [50.0, 51.0, 52.0], [53.0, 54.0, 55.0], [56.0, 57.0, 58.0], [65.0, 66.0, 67.0], [68.0, 69.0, 70.0], [71.0, 72.0, 73.0]]] numpy.testing.assert_array_equal(expected, result)
def normalmodes_prody(selection, cutoff=15, first=7, last=10, guide=1, prefix='prody', states=7, factor=-1, quiet=1): ''' DESCRIPTION Anisotropic Network Model (ANM) analysis with ProDy. Based on: http://www.csb.pitt.edu/prody/examples/dynamics/enm/anm.html ''' try: import prody except ImportError: print('Failed to import prody, please add to PYTHONPATH') raise CmdException first, last, guide = int(first), int(last), int(guide) states, factor, quiet = int(states), float(factor), int(quiet) assert first > 6 if guide: selection = '(%s) and guide and alt A+' % (selection) tmpsele = cmd.get_unused_name('_') cmd.select(tmpsele, selection) f = StringIO(cmd.get_pdbstr(tmpsele)) conf = prody.parsePDBStream(f) modes = prody.ANM() modes.buildHessian(conf, float(cutoff)) modes.calcModes(last - first + 1) if factor < 0: from math import log natoms = modes.numAtoms() factor = log(natoms) * 10 if not quiet: print(' set factor to %.2f' % (factor)) for mode in range(first, last + 1): name = prefix + '%d' % mode cmd.delete(name) if not quiet: print(' normalmodes: object "%s" for mode %d' % (name, mode)) for state in range(1, states+1): xyz_it = iter(modes[mode-7].getArrayNx3() * (factor * ((state-1.0)/(states-1.0) - 0.5))) cmd.create(name, tmpsele, 1, state, zoom=0) cmd.alter_state(state, name, '(x,y,z) = next(xyz_it) + (x,y,z)', space={'xyz_it': xyz_it, 'next': next}) cmd.delete(tmpsele) if guide: cmd.set('ribbon_trace_atoms', 1, prefix + '*') cmd.show_as('ribbon', prefix + '*') else: cmd.show_as('lines', prefix + '*')
def normalmodes_prody(selection, cutoff=15, first=7, last=10, guide=1, prefix='prody', states=7, factor=-1, quiet=1): ''' DESCRIPTION Anisotropic Network Model (ANM) analysis with ProDy. Based on: http://www.csb.pitt.edu/prody/examples/dynamics/enm/anm.html ''' try: import prody except ImportError: print('Failed to import prody, please add to PYTHONPATH') raise CmdException first, last, guide = int(first), int(last), int(guide) states, factor, quiet = int(states), float(factor), int(quiet) assert first > 6 if guide: selection = '(%s) and guide and alt A+' % (selection) tmpsele = cmd.get_unused_name('_') cmd.select(tmpsele, selection) f = StringIO(cmd.get_pdbstr(tmpsele)) conf = prody.parsePDBStream(f) modes = prody.ANM() modes.buildHessian(conf, float(cutoff)) modes.calcModes(last - first + 1) if factor < 0: from math import log natoms = modes.numAtoms() factor = log(natoms) * 10 if not quiet: print(' set factor to %.2f' % (factor)) for mode in range(first, last + 1): name = prefix + '%d' % mode cmd.delete(name) if not quiet: print(' normalmodes: object "%s" for mode %d' % (name, mode)) for state in range(1, states+1): xyz_it = iter(modes[mode-7].getArrayNx3() * (factor * ((state-1.0)/(states-1.0) - 0.5))) cmd.create(name, tmpsele, 1, state, zoom=0) cmd.alter_state(state, name, '(x,y,z) = xyz_it.next() + (x,y,z)', space=locals()) cmd.delete(tmpsele) if guide: cmd.set('ribbon_trace_atoms', 1, prefix + '*') cmd.show_as('ribbon', prefix + '*') else: cmd.show_as('lines', prefix + '*')
def test_removeAllCoordsetsFromStructureLeavingFirst(self): input = StringIO.StringIO(switched_pdb_data) pdb_structure = prody.parsePDBStream(input) removeAllCoordsetsFromStructureLeavingFirst(pdb_structure) expected = [[[1.0, 2.0, 3.0], [-33.115, 1.294, -1.163]]] numpy.testing.assert_array_equal(expected, pdb_structure.getCoordsets())
def test_getStructureChains(self): expected = [{ 'A': numpy.array([[1., 2., 3.]]), 'X': numpy.array([[-33.115, 1.294, -1.163]]) }, { 'A': numpy.array([[4., 5., 6.]]), 'X': numpy.array([[-32.555, -2.5, -5.367]]) }, { 'A': numpy.array([[7., 8., 9.]]), 'X': numpy.array([[-33.257, 5.28, -8.441]]) }, { 'A': numpy.array([[10., 11., 12.]]), 'X': numpy.array([[32.306, 6.517, -1.544]]) }, { 'A': numpy.array([[13., 14., 15.]]), 'X': numpy.array([[30.494, 10.39, -3.066]]) }] input = StringIO.StringIO(pdb_data) pdb_structure = prody.parsePDBStream(input) result = ChainMappingRMSDMatrixCalculator.getStructureChains( pdb_structure, "all") for i in range(len(expected)): numpy.testing.assert_array_equal(expected[i]['A'], result[i]['A']) numpy.testing.assert_array_equal(expected[i]['X'], result[i]['X'])
def add_mol(self, mol, keep_chains=False, keep_resi=False): """ This behaves bad when molecules have same chain names """ ag1 = self.ag.copy() ag2 = mol.ag.copy() if ag1.numCoordsets() != ag2.numCoordsets(): raise RuntimeError('Atom groups have different numbers of coordinate sets') nsets = ag1.numCoordsets() chains1 = list(set(ag1.getChids())) chains2 = list(set(ag2.getChids())) all_chains = chains1 + chains2 if len(set(all_chains)) != len(all_chains) and keep_chains: logger.warning('Two atom groups contain same chain IDs, merging can go wrong') if keep_resi and len(set(ag1.getResnums()).intersection(set(ag2.getResnums()))) > 0: raise RuntimeError('Refusing to merge atom groups which contain same chain IDs AND residue IDs') if not keep_chains: if len(chains1) + len(chains2) > len(self._chain_order): raise RuntimeError('Total number of chains is too large, out of chain ID letters') iter_chains = iter(self._chain_order) map1 = {x: next(iter_chains) for x in chains1} map2 = {x: next(iter_chains) for x in chains2} for old, new in map1.items(): ag1.select('chain ' + old).setChids(new) for old, new in map2.items(): ag2.select('chain ' + old).setChids(new) if not keep_resi: resi = 1 for r in ag1.getHierView().iterResidues(): r.setResnum(resi) resi += 1 for r in ag2.getHierView().iterResidues(): r.setResnum(resi) resi += 1 buf = StringIO() for i in range(nsets): if nsets > 1: buf.write('MODEL \n' + str(i + 1)) prody.writePDBStream(buf, ag1, csets=i) prody.writePDBStream(buf, ag2, csets=i) if nsets > 1: buf.write('ENDMDL\n') else: buf.write('END\n') buf.seek(0) joint = BasePDB(ag=prody.parsePDBStream(buf)) joint.renumber(keep_resi=True, keep_chains=True) buf.close() return joint
def test_get_dihedrals(self): input = StringIO.StringIO(pdb1) pdb_structure = prody.parsePDBStream(input) dihedrals = DihedralRMSDMatrixCalculator.calculateDihedralsForCoordset( pdb_structure, pdb_structure.getCoordsets()[0]) # We have to get rid off the unknown values! numpy.testing.assert_array_almost_equal( numpy.array(expected_dihedrals[1:-1]), numpy.array(dihedrals[2:-2]), 2)
def test_getChainLengths(self): input = StringIO.StringIO(chain_padding_proto_3) structure = prody.parsePDBStream(input) self.assertDictEqual({ 'A': 1, 'C': 3, 'B': 2, 'E': 3, 'D': 2 }, ChainMappingRMSDMatrixCalculator.getChainLengths(structure, "all"))
def _from_str(string): """ Load a ProDy AtomGroup form a string representation. Parameters: string - a string containing the contents of a ProDy AtomGroup Returns: A ProDy AtomGroup """ ss = io_tools.StringStream(string) return prody.parsePDBStream(ss)
def test_getStructureChains(self): expected = [{'A': numpy.array([[ 1., 2., 3.]]), 'X': numpy.array([[-33.115, 1.294, -1.163]])}, {'A': numpy.array([[ 4., 5., 6.]]), 'X': numpy.array([[-32.555, -2.5 , -5.367]])}, {'A': numpy.array([[ 7., 8., 9.]]), 'X': numpy.array([[-33.257, 5.28 , -8.441]])}, {'A': numpy.array([[ 10., 11., 12.]]), 'X': numpy.array([[ 32.306, 6.517, -1.544]])}, {'A': numpy.array([[ 13., 14., 15.]]), 'X': numpy.array([[ 30.494, 10.39 , -3.066]])}] input = StringIO.StringIO(pdb_data) pdb_structure = prody.parsePDBStream(input) result = ChainMappingRMSDMatrixCalculator.getStructureChains(pdb_structure,"all") for i in range(len(expected)): numpy.testing.assert_array_equal(expected[i]['A'], result[i]['A']) numpy.testing.assert_array_equal(expected[i]['X'], result[i]['X'])
def add_hydrogens(self, trim=True, csets=None): raise NotImplementedError() output = [] natoms = -1 csets = self._make_csets(csets) for i in csets: if trim: p_start = Popen([define.REDUCE_EXE, '-Quiet', '-Trim', '-'], stdin=PIPE, stdout=PIPE, stderr=STDOUT) p_finish = Popen([define.REDUCE_EXE, '-Quiet', '-FLIP', '-'], stdin=p_start.stdout, stdout=PIPE, stderr=STDOUT) else: p_start = Popen([define.REDUCE_EXE, '-Quiet', '-FLIP', '-'], stdin=PIPE, stdout=PIPE, stderr=STDOUT) p_finish = p_start prody.writePDBStream(p_start.stdin, self.ag, csets=i) p_start.stdin.close() output += ['MODEL%9i\n' % (i + 1)] reduced = [] while p_finish.poll() is None: reduced = p_finish.stdout.readlines() p_start.wait() p_finish.wait() print(reduced) natoms_cur = len(list(filter(lambda x: x.startswith('ATOM') or x.startswith('HETATM'), reduced))) if i == csets[0]: natoms = natoms_cur elif natoms != natoms_cur: raise RuntimeError('Number of atoms in reduced model %i is different from the first model (%i, %i)' % ( i, natoms_cur, natoms)) output += reduced output += ['ENDMDL\n'] status = p_finish.poll() if status != 0: logger.error('Called process returned ' + str(status)) self.ag = prody.parsePDBStream(StringIO(''.join(output))) self.renumber() return self
def read_result(self, seq): fname = "%s.dok" % seq with open(fname, 'r') as f: raw = f.readlines() res = self.split_result(raw) pres = list() for r in res: e_ = self.get_energy(r) r_ = prody.parsePDBStream(StringIO.StringIO(r)) c_ = r_.getCoords() pres.append((c_, e_)) if self.cleanup: os.remove(fname) os.remove("%s.mol2" % seq) os.remove(self.llist_fname) os.remove(self.config.fname) return pres
def test_reorderAllCoordinatesByChainLen(self): input = StringIO.StringIO(chain_padding_proto_3) structure = prody.parsePDBStream(input) group_lens = {1: ['A'], 2: ['B', 'D'], 3: ['C', 'E']} result = ChainMappingRMSDMatrixCalculator.getReorderedCoordinatesByLenGroups( structure, "all", group_lens) expected = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [19.0, 20.0, 21.0], [22.0, 23.0, 24.0], [10.0, 11.0, 12.0], [13.0, 14.0, 15.0], [16.0, 17.0, 18.0], [25.0, 26.0, 27.0], [28.0, 29.0, 30.0], [31.0, 32.0, 33.0]], [[41.0, 42.0, 43.0], [44.0, 45.0, 46.0], [47.0, 48.0, 49.0], [59.0, 60.0, 61.0], [62.0, 63.0, 64.0], [50.0, 51.0, 52.0], [53.0, 54.0, 55.0], [56.0, 57.0, 58.0], [65.0, 66.0, 67.0], [68.0, 69.0, 70.0], [71.0, 72.0, 73.0]]] numpy.testing.assert_array_equal(expected, result)
def identify_best_chain(pdbid, aaseq, chain_str): """Determine PDB file and chain that best matches the aa sequence """ files = glob.glob(DOWNLOADDIR + pdbid + "*pdb*") chains = chain_str.split(',') chain_scores = [] for file_path in files: if file_path.split('.')[-1] == "gz": f = gzip.open(file_path, 'rt') filter_chains = True else: f = open(file_path, "r") filter_chains = False try: mol, header = prody.parsePDBStream(f, header=True) except ValueError as e: print("PDB Error: %s: %s" % (pdbid, e)) return None f.close() for chain in mol.iterChains(): if filter_chains and chain.getChid() not in chains: continue chain_sequence = chain.getSequence() # score = pairwise2.align.globalms(aaseq, chain_sequence, 2, -1, -1, -.5, score_only=True) # ms: https://biopython.org/DIST/docs/api/Bio.pairwise2-module.html # 2 for identity, -1 for non-identity, -1 for gap, -0.5 for extending gap score = pairwise2.align.globalxx(aaseq, chain_sequence, score_only=True) # xx: no parameters if isinstance(score, list): # case [], doesn't align at all continue chain_scores.append((chain, score, file_path)) best = sorted(chain_scores, key=lambda x: x[1], reverse=True)[0] best_chain = best[0] best_file = best[2].lstrip(DOWNLOADDIR) best_chain_id = best_chain.getChid() best_score = best[1] / float(len(aaseq)) return best_file, best_chain_id, best_score
def _from_str(string): ss = io.StringStream(string) return prody.parsePDBStream(ss)
def test_removeAllCoordsetsFromStructure(self): input = StringIO.StringIO(switched_pdb_data) pdb_structure = prody.parsePDBStream(input) removeAllCoordsetsFromStructure(pdb_structure) self.assertEqual(pdb_structure.getCoordsets(), None)
def __init__(self, user_defined_dir, current_fragment, fragment_smiles_sanitized, verify_substructure=True ): # todo: fragment_string really shouldn't be a string... # --- Paths --- # self.current_fragment = current_fragment self.frag_rigid_pdb_name = f'{current_fragment}-rigid.pdb' self.user_defined_dir = user_defined_dir self.inputs_dir = os.path.join(self.user_defined_dir, 'Inputs') self.fragment_inputs_path = os.path.join(self.inputs_dir, 'Fragment_Inputs') self.rosetta_inputs_path = os.path.join(self.inputs_dir, 'Rosetta_Inputs') # Select rigid atoms from fragment map for alignments if rigid atoms are defined in the Fragment_Inputs directory self.frag_inputs_dir = os.path.join(self.fragment_inputs_path, 'Rigid_Fragment_Atoms') # --- Fragment-related variables --- # self.fragment_string = open( os.path.join(self.fragment_inputs_path, '{}.pdb'.format(current_fragment))).read() self.fragment_smiles_sanitized = fragment_smiles_sanitized self.fragment_prody = prody.parsePDBStream( io.StringIO(self.fragment_string)).select('not hydrogen') self.ideal_fragment_mapping = None # Assigned in _init_map_fragment_smiles_onto_ideal() # RDKit Mol representations of fragment self.fragment_ideal_rdkit_mol = Chem.MolFromSmiles( self.fragment_smiles_sanitized) if self.fragment_ideal_rdkit_mol is None: print( 'Unable to parse SMILES from Fragment_Inputs.csv, generating fragment Mol from file!' ) try: fragment_pdb_path = os.path.join(self.fragment_inputs_path, f'{current_fragment}.pdb') self.fragment_ideal_rdkit_mol = Chem.MolFromPDBFile( fragment_pdb_path) print(Chem.MolToSmiles(self.fragment_ideal_rdkit_mol)) except Exception as e: print(e) self.fragment_pdb_rdkit_mol = Chem.MolFromPDBBlock( self.fragment_string, removeHs=False) # Assert that mol objects were loaded properly assert not any([ self.fragment_pdb_rdkit_mol is None, self.fragment_ideal_rdkit_mol is None ]) # --- Run initialization functions --- # self._init_map_fragment_smiles_onto_ideal() # --- Fragment Mapping options --- # self.verify_substructure = verify_substructure if verify_substructure: reference_ligand_path = os.path.join( self.rosetta_inputs_path, f'{os.path.basename(user_defined_dir)[:3]}_0001.pdb') self.reference_ligand_rdkit_mol = Chem.MolFromPDBFile( reference_ligand_path, removeHs=True) # Map ref_fragment_index > ref_fragment name > ref_ligand name > ref_ligand degree ref_ligand_atomname_degree_map = { atom.GetMonomerInfo().GetName(): atom.GetDegree() for atom in self.reference_ligand_rdkit_mol.GetAtoms() } self.pdbfrag_refdegree_map = dict() for ideal_idx, pdb_idx in self.ideal_fragment_mapping.items(): fragment_pdb_atom = self.fragment_pdb_rdkit_mol.GetAtomWithIdx( pdb_idx) fragment_pdb_atom_name = fragment_pdb_atom.GetMonomerInfo( ).GetName() self.pdbfrag_refdegree_map[ pdb_idx] = ref_ligand_atomname_degree_map[ fragment_pdb_atom_name]
def test_getChainLengths(self): input = StringIO.StringIO(chain_padding_proto_3) structure = prody.parsePDBStream(input) self.assertDictEqual({'A': 1, 'C': 3, 'B': 2, 'E': 3, 'D': 2}, ChainMappingRMSDMatrixCalculator.getChainLengths(structure, "all"))
def test_get_dihedrals(self): input = StringIO.StringIO(pdb1) pdb_structure = prody.parsePDBStream(input) dihedrals = DihedralRMSDMatrixCalculator.calculateDihedralsForCoordset(pdb_structure,pdb_structure.getCoordsets()[0]) # We have to get rid off the unknown values! numpy.testing.assert_array_almost_equal(numpy.array(expected_dihedrals[1:-1]), numpy.array(dihedrals[2:-2]), 2)
def test_removeAllCoordsetsFromStructure(self): input = StringIO.StringIO(switched_pdb_data) pdb_structure = prody.parsePDBStream(input) removeAllCoordsetsFromStructure(pdb_structure) self.assertEqual(pdb_structure.getCoordsets(),None)