def test_5(): t = md.load(get_fn('4waters.pdb')) a = md.compute_dssp(t, simplified=True) b = md.compute_dssp(t, simplified=False) ref = np.array([['NA', 'NA', 'NA', 'NA']]) np.testing.assert_array_equal(a, ref) np.testing.assert_array_equal(b, ref)
def test_4(): t = md.load_pdb(get_fn('1am7_protein.pdb')) a = md.compute_dssp(t, simplified=True) b = md.compute_dssp(t, simplified=False) assert len(a) == len(b) assert len(a[0]) == len(b[0]) assert list(np.unique(a[0])) == ['C', 'E', 'H']
def identify_long_helices(geom, min_turns=5, aa_per_turn=3.6, verbose=False, plot=False): ss_str = _md.compute_dssp(geom)[0] ss_vec = _np.zeros(len(ss_str), dtype=int) ss_vec[ss_str == 'H'] = 1 helices = identify_boolean_blocks_in_sequence(ss_vec, min_turns*aa_per_turn, aa_per_turn, verbose=verbose, names=["hlx","brk"]) if plot: _plt.figure() _plt.figure(figsize=(20, 5)) _plt.plot(ss_vec, marker='.') iax = _plt.gca() xticks = _np.arange(geom.n_residues, step=15) iax.set_xticks(xticks) iax.set_xticklabels([geom.top.residue(ii).resSeq for ii in xticks]) #iax.set_xticklabels([ii for ii in xticks]) for ihx in helices: iax.axvspan(ihx[0]-.5, ihx[-1]+.5, alpha=.25) return helices, _plt.gca() else: return helices
def test_3(tmpdir): # 1COY gives a small error, due to a broken chain. pdbids = ['1GAI', '6gsv', '2AAC'] for pdbid in pdbids: t = md.load_pdb('http://www.rcsb.org/pdb/files/%s.pdb' % pdbid) t = t.atom_slice(t.top.select_atom_indices('minimal')) assert_(call_dssp(tmpdir, t), md.compute_dssp(t, simplified=False)[0])
def compute_mdtraj_order_parmeters(trajectory_file, rmsd_reference_structure=None): # documentation: http://mdtraj.org/1.8.0/analysis.html# trajectory = md.load(trajectory_file) return_values = [] return_value_names = [] if not rmsd_reference_structure == None: reference = md.load(rmsd_reference_structure) rmsd = md.rmsd(trajectory, reference) return_values.append(rmsd) return_value_names.append("RMSD") hydrogen_bonds = np.array([np.sum(x) for x in md.kabsch_sander(trajectory)]) return_values.append(hydrogen_bonds) return_value_names.append("HBondEnergy") ss = md.compute_dssp(trajectory) shape = ss.shape transdict = dict(zip(list(set(list(ss.flatten()))),range(len(list(set(list(ss.flatten()))))))) ss = np.array([transdict[x] for x in ss.flatten()]).reshape(shape).T return_values.append(ss) return_value_names.append("SecondaryStructure") rg = md.compute_rg(trajectory) return_values.append(rg) return_value_names.append("Rg") distances, residue_pairs = md.compute_contacts(trajectory, scheme='ca') contacts = md.geometry.squareform(distances, residue_pairs) return_values.append(contacts) return_value_names.append("Contacts") return dict(zip(return_value_names, return_values))
def construct_residue_df(traj): residue_features = {} residue_features["residue_idx"] = np.arange(traj.top.n_residues) residue_features["dssp"] = mdtraj.compute_dssp(traj)[0].tolist() # ( # residue_features["sasa"], # residue_features["sasa_relative"], # ) = structure_tools.protein_structure_analysis.calculate_sasa(traj).T residue_features[ "phi"] = structure_tools.protein_structure_analysis.calculate_phi(traj) residue_features[ "psi"] = structure_tools.protein_structure_analysis.calculate_psi(traj) ( residue_features["omega_prev"], residue_features["omega_next"], ) = structure_tools.protein_structure_analysis.calculate_omega(traj).T residue_features[ "ca_angles"] = structure_tools.protein_structure_analysis.calculate_backbone_angles( traj) ( residue_features["ca_dihedral_prev"], residue_features["ca_dihedral_next"], ) = structure_tools.protein_structure_analysis.calculate_backbone_dihedrals( traj).T return pd.DataFrame(residue_features)
def test_1(): for fn in ['1bpi.pdb', '1vii.pdb', '4K6Q.pdb', '1am7_protein.pdb']: t = md.load_pdb(get_fn(fn)) t = t.atom_slice(t.top.select_atom_indices('minimal')) f = lambda : assert_(call_dssp(t), md.compute_dssp(t, simplified=False)[0]) f.description = 'test_1: %s' % fn yield f
def _computeSecondaryStructure(self): """Compute the secondary structure of the selected frame and format it for the browser """ SS_MAP = {'C': 'coil', 'H': 'helix', 'E': 'sheet', 'NA': 'coil'} top = self.trajectory.topology dssp = md.compute_dssp(self.trajectory[self.frame])[0] result = {} # iterate over the (rindx, ss) pairs in enumerate(dssp), # and use itertools to group them into streaks by contiguous # chain and ss. keyfunc = lambda ir : (top.residue(ir[0]).chain, ir[1]) for (chain, ss), grouper in groupby(enumerate(dssp), keyfunc): # rindxs is a list of residue indices in this contiguous run rindxs = [g[0] for g in grouper] for r in rindxs: # add entry for each atom in the residue for a in top.residue(r).atoms: result[a.index] = { 'ss': SS_MAP[ss], 'ssbegin': (r==rindxs[0] and ss in set(['H', 'E'])), 'ssend': (r==rindxs[-1] and ss in set(['H', 'E']))} return result
def cal_dssp_PDB(pdbfile: str, mode=3): if mode == 7: simplified = False elif mode == 3: simplified = True else: raise Exception('The mode parameter only accept 3 or 7. %s' % str(mode)) PDB = load(pdbfile)[0] u = Universe(pdbfile) chainNames = u.segments.segids ssp = compute_dssp(PDB, simplified)[0] mask = ssp == ' ' ssp[mask] = 'L' chainIDs = [] resnames = [] resids = [] for chain in PDB.top.chains: chainName = chainNames[chain.index] for residue in chain.residues: resnames.append(residue.name) resids.append(residue.resSeq) chainIDs.append(chainName) if len(chainIDs) != len(ssp): raise Exception( 'The chainID and ssp results must be same length. %d-%d' % (len(chainIDs), len(ssp))) ResultDF = DataFrame({ 'resid': resids, 'resname': resnames, 'chainID': chainIDs, 'ssp': ssp }) print(ResultDF) return ResultDF
def _computeSecondaryStructure(self): """Compute the secondary structure of the selected frame and format it for the browser """ SS_MAP = {'C': 'coil', 'H': 'helix', 'E': 'sheet', 'NA': 'coil'} top = self.trajectory.topology dssp = md.compute_dssp(self.trajectory[self.frame])[0] result = {} # iterate over the (rindx, ss) pairs in enumerate(dssp), # and use itertools to group them into streaks by contiguous # chain and ss. keyfunc = lambda ir: (top.residue(ir[0]).chain, ir[1]) for (chain, ss), grouper in groupby(enumerate(dssp), keyfunc): # rindxs is a list of residue indices in this contiguous run rindxs = [g[0] for g in grouper] for r in rindxs: # add entry for each atom in the residue for a in top.residue(r).atoms: result[a.index] = { 'ss': SS_MAP[ss], 'ssbegin': (r == rindxs[0] and ss in set(['H', 'E'])), 'ssend': (r == rindxs[-1] and ss in set(['H', 'E'])) } return result
def ca_rmsd_angstroms(traj, native, cut_tails=False, verbose=True): ''' Computes RMSD of the CA atoms in angstroms, rather than the MDTraj default of nanometers. If cut_tails is True, the secondary structure of the native will be computed first, and all all residues before the first helix or sheet and all residues after the last helix or sheet will be excluded from the RMSD. This is convenient to avoid computing the RMSD of unstructured tails. If verbose is True, the default, then the number of excluded residues on each end will be printed for diagnostics. You must pass full structures, not CA structures, to this function.''' ca_traj = select(traj, 'name CA') native_dssp = md.compute_dssp(native)[0] first_residue = 0 last_residue = len(native_dssp) if cut_tails: while first_residue < len( native_dssp) and native_dssp[first_residue] in ('C', 'NA'): first_residue += 1 while last_residue >= 0 and native_dssp[last_residue - 1] in ('C', 'NA'): last_residue -= 1 if verbose: print( 'RMSD excluded %i residues from N-terminus and %i residues from C-terminus, leaving %i residues' % (first_residue, len(native_dssp) - last_residue, last_residue - first_residue)) # convert RMSD to angstroms sel = 'name CA and resid %i to %i' % (first_residue, last_residue - 1) return 10. * md.rmsd(select(traj, sel), select(native, sel))
def calculate_ss(pdbfilename, simplified=True): ''' The DSSP assignment codes are: H : Alpha helix B : Residue in isolated beta-bridge E : Extended strand, participates in beta ladder G : 3-helix (3/10 helix) I : 5 helix (pi helix) T : hydrogen bonded turn S : bend : Loops and irregular elements There are two ways to simplify 8-letter DSSP codes. By default, the simplified DSSP codes in mdtraj are: H : Helix. Either of the H, G, or I codes. E : Strand. Either of the E, or B codes. C : Coil. Either of the T, S or ' ' codes. Simplify DSSP codes in this way: H : H E : E C : all the others ''' import mdtraj as md prot = md.load(pdbfilename) ss = md.compute_dssp(prot, simplified=False)[0] if simplified == True: ss[np.where((ss!='H')&(ss!='E'))] = 'C' return ss
def calculate_DSSP(): filename_details = os.getcwd() + '/' + str(args.output_folder) + '/protein_properties/DSSP/DSSP.dat' OUTPUT_DSSP = open(filename_details, 'w') dssp = mdt.compute_dssp(traj_mdt.atom_slice(atom_indices=traj.topology.select('protein')), simplified=True) dssp_over_time = dssp.T np.savetxt(OUTPUT_DSSP, (dssp_over_time), delimiter=" ") OUTPUT_DSSP.close() print "DSSP calculation complete"
def test_6(): t = md.load(get_fn('alanine-dipeptide-explicit.pdb')) a = md.compute_dssp(t, simplified=True) protein_residues = np.array([ set(a.name for a in r.atoms).issuperset(('C', 'N', 'O', 'CA')) for r in t.topology.residues ]) assert np.unique(a[:, protein_residues]) == "C" assert np.unique(a[:, np.logical_not(protein_residues)]) == 'NA'
def _superpose(self): """ Superpose structured C-alphas """ self.dssp = mdtraj.compute_dssp(self.traj[0])[0] structured_resis_bool = (self.dssp == 'H') + (self.dssp == 'E') alpha_indices = self.traj.topology.select_atom_indices('alpha') structured_alpha_indices = np.array([ alpha_indices[x] for x in range(self.traj.n_residues) if structured_resis_bool[x] ]) self.traj.superpose(reference=self.traj, frame=0, atom_indices=structured_alpha_indices)
def helicity(traj,peptide_chain): print( '.... computing helicity ....') dssp=mdtraj.compute_dssp(traj, simplified=True) residues=[residue.index for residue in traj.topology.chain(peptide_chain).residues ] unique, counts = np.unique(dssp[:,residues[0]:residues[-1]], return_counts=True) hel = dict(zip(unique, counts)).get('H',0)/np.sum(counts)*10 print('helicity ' + str(hel)) print( '.... computing helicity .... DONE') return hel
def write_simulation_files(self, ref_traj_aa, topfilename, seqfilename, ssbias=False): self.generate_topology() if hasattr(self.model, "ref_traj"): traj = self.model.ref_traj elif hasattr(self.model, "starting_traj"): traj = self.model.starting_traj else: raise AttributeError("need to set intial conditions (ref_traj or starting_traj) to write") fasta = traj.top.to_fasta() with open("{}".format(seqfilename),"w") as fout: for line in fasta: fout.write("{}\n".format(line)) with open("charge_on_residues.dat", "w") as fout: fout.write("{:d}\n".format(len(self.model.mapping._charged_residues))) for res_idx, charge in self.model.mapping._charged_residues: fout.write("{:6d} {:8.4f}\n".format(res_idx, charge)) # compute secondary structure from a reference structure dssp = ("".join(md.compute_dssp(ref_traj_aa)[0])).replace("C","-") assert len(dssp) == sum([ len(x) for x in fasta ]), "Number of residues in reference different than expected" with open("ssweight", "w") as fout: for ss in dssp: if ssbias: if ss == "H": helix = 1. sheet = 0. elif ss == "E": helix = 0. sheet = 1. else: helix = 0. sheet = 0. else: helix = 0. sheet = 0. fout.write("{:.1f} {:.1f}\n".format(helix, sheet)) with open("jpred", "w") as fout: start = 0 for i in range(len(fasta)): chain_length = len(fasta[i]) fout.write("{}\n".format(fasta[i])) fout.write("{}\n".format(dssp[start:start+chain_length])) start += chain_length with open("{}".format(topfilename),"w") as fout: fout.write(self.topfile)
def Helicity(self): for seedi in range(len(self.FNSeeds)): dssps = md.compute_dssp(self.trajectories[seedi], simplified=True) self.helicities1[seedi] = np.zeros((len(dssps))) #for i in range(len(dssps)): # print(dssps[i]) for i in range(len(dssps)): H = 0 for j in range(len(dssps[i])): if dssps[i][j] == "H": H += 1 self.helicities1[seedi][i] = H / len(dssps[i])
def do_dssp(traj, simplified=True): structure = pd.DataFrame( md.compute_dssp(traj, simplified=simplified), index=traj.time, ) code_set = frozenset( frozenset(structure[col].unique()) for col in structure.columns) structure_codes = list({code for codes in code_set for code in codes}) structure_frxn = pd.DataFrame(index=traj.time, columns=structure_codes) for code in structure_frxn.columns: structure_frxn[code] = (structure == code).mean(axis=1) return structure_frxn
def compute_secondary(self, frame): dssp = md.compute_dssp(frame, simplified=True)[0] helices, sheets = [], [] for k, g in groupby(enumerate(dssp), operator.itemgetter(1)): indices, keys = list(zip(*g)) start_residue = indices[0] end_residue = indices[-1] run = [CHAIN_NAMES[self.top.topology.residue(start_residue).chain.index], start_residue, CHAIN_NAMES[self.top.topology.residue(end_residue).chain.index], end_residue] if k == 'H': helices.append(run) elif k == 'E': sheets.append(run) return helices, sheets
def cal_dssp_traj(trajfile: str, topfile: str, mode=3): if mode == 7: simplified = False SStype = ['H', 'B', 'E', 'G', 'I', 'T', 'S', 'L'] comment = comment7 elif mode == 3: simplified = True SStype = ['H', 'E', 'C'] comment = commet3 else: raise Exception('The mode parameter only accept 3 or 7. %s' % str(mode)) top = load(topfile)[0] u = Universe(topfile) chainNames = u.segments.segids chainIDs = [] resnames = [] resids = [] for chain in top.top.chains: chainName = chainNames[chain.index] for residue in chain.residues: resnames.append(residue.name) resids.append(residue.resSeq) chainIDs.append(chainName) traj = load(trajfile, top=topfile) ssp = compute_dssp(traj, simplified=simplified) mask = ssp == ' ' ssp[mask] == 'L' if len(chainIDs) != len(ssp[0]): raise Exception( 'The chainID and ssp results must be same length. %d-%d' % (len(chainIDs), len(ssp[0]))) ResultDict = { 'resid': resids, 'resname': resnames, 'chainID': chainIDs, } residueSize = ssp.size for ss in SStype: ResultDict[ss] = sum(ssp == ss, axis=0) comment.append("# %s: %.4f" % (ss, sum(ssp == ss) / residueSize)) ResultDF = DataFrame(ResultDict) print("\n".join(comment)) print(ResultDF) return ResultDict, ssp
def run_DSSP_analysis_OLD(CP, outdir): dssp_data = md.compute_dssp(CP.traj) C_vector = [] E_vector = [] H_vector = [] n_residues = CP.n_residues n_frames = CP.n_frames for i in range(1, n_residues - 1): C_vector.append(float(sum(dssp_data.transpose()[i] == 'C')) / n_frames) E_vector.append(float(sum(dssp_data.transpose()[i] == 'E')) / n_frames) H_vector.append(float(sum(dssp_data.transpose()[i] == 'H')) / n_frames) np.savetxt('%s/DSSP_H.csv' % (outdir), np.array(H_vector), delimiter=', ') np.savetxt('%s/DSSP_E.csv' % (outdir), np.array(E_vector), delimiter=', ') np.savetxt('%s/DSSP_C.csv' % (outdir), np.array(C_vector), delimiter=', ')
def topology_mdtraj(traj): '''Generate topology spec for the MolecularViewer from mdtraj. :param mdtraj.Trajectory traj: the trajectory :return: A chemview-compatible dictionary corresponding to the topology defined in mdtraj. ''' import mdtraj as md top = {} top['atom_types'] = [a.element.symbol for a in traj.topology.atoms] top['atom_names'] = [a.name for a in traj.topology.atoms] top['bonds'] = [(a.index, b.index) for a, b in traj.topology.bonds] top['secondary_structure'] = md.compute_dssp(traj[0])[0] top['residue_types'] = [r.name for r in traj.topology.residues ] top['residue_indices'] = [ [a.index for a in r.atoms] for r in traj.topology.residues ] return top
def structure_contact_fraction(traj, selection, selection2=None, cutoff=0.4, simplified=True): # calculate structure of each residue in each frame structure = pd.DataFrame( md.compute_dssp(traj, simplified), index=traj.time, ) # get unique structure codes code_set = frozenset( frozenset(structure[col].unique()) for col in structure.columns) structure_codes = list({code for codes in code_set for code in codes}) # calculate contacts contacts, atom_pairs = calculate_contacts(traj, selection, selection2, cutoff) # residue-wise salt bridge + structure contacts_mask = pd.DataFrame( np.zeros_like(structure, dtype=bool), index=traj.time, ) # for each frame, set True where salt bridge occurs for t in contacts.index: # TODO: figure out how to slice mdtraj topology and vectorize for atom_idx in atom_pairs[np.where(contacts.loc[t, :])]: for atom in atom_idx: res_id = traj.top.atom(atom).residue.index contacts_mask.loc[t, res_id] = True # structure_sb_frxn = pd.DataFrame(index=traj.time, columns=structure_codes) for code in structure_sb_frxn.columns: structure_mask = (structure == code) structure_and_sb = (structure_mask * contacts_mask).sum(axis=1) # filter to only helical frames and reweight fraction nonzero_frames = structure_mask.sum(axis=1).nonzero() structure_sb_frxn[code] = ( structure_and_sb.iloc[nonzero_frames] / structure_mask.sum(axis=1).iloc[nonzero_frames]) return structure_sb_frxn
def compute_secondary(self, frame): dssp = md.compute_dssp(frame, simplified=True)[0] helices, sheets = [], [] for k, g in groupby(enumerate(dssp), operator.itemgetter(1)): indices, keys = list(zip(*g)) start_residue = indices[0] end_residue = indices[-1] run = [ CHAIN_NAMES[self.top.topology.residue( start_residue).chain.index], start_residue, CHAIN_NAMES[ self.top.topology.residue(end_residue).chain.index], end_residue ] if k == 'H': helices.append(run) elif k == 'E': sheets.append(run) return helices, sheets
def calc_dssp(chimera: Chimera = None, filename: str = None, simplified: bool = True): """ Compute Dictionary of protein secondary structure (DSSP) secondary structure assignments. This funcion uses the MDtraj compute_dssp implementation as a basis. :param chimera: A Chimera object. :param filename: path to a pdb file :param simplified: Use the simplified 3-category assignment scheme. Otherwise the original 8-category scheme is used. :return: assignments np.ndarray. The secondary structure assignment for each residue """ if chimera and filename: raise ValueError("Only a Chimera object or the path to a pdb file must be specified") if not chimera and not filename: raise ValueError("At least a Chimera object or the path to a pdb file must be specified") if chimera: filename = "/tmp/structure.pdb" chimera.write(filename) structure = md.load(filename) dssp = md.compute_dssp(structure, simplified=simplified) return dssp
def protein_calcs(self, struc): """ Run calculations specified in self.calcs. Before running calculation, check to make sure it wasn't already done. If it was done before, load the data. """ coors = struc.xyz[0] CA_coors = struc.atom_slice(struc.topology.select('name CA'))[0].xyz[0] self.nres = struc.n_residues if 'Gyr' in self.calcs: L = sa_core.gyration_tensor(coors) if 'Rg' in self.calcs: #self.Rg.append(md.compute_rg(struc)[0]) self.Rg.append(sa_core.compute_Rg(L)) if 'Asph' in self.calcs: self.Asph.append(sa_core.compute_Asph(L)) if 'EED' in self.calcs: self.EED.append(np.linalg.norm(CA_coors[0] - CA_coors[-1])) if 'SASA' in self.calcs: SASA = md.shrake_rupley(struc) self.SASA.append(SASA.sum(axis=1)[0]) if 'cmaps' in self.calcs: dist = sa_core.contact_maps(CA_coors) self.cmaps.append(dist) if 'gcmaps' in self.calcs: self.gcmaps.append(sa_core.gremlin_contact_maps(dist)) if 'SS' in self.calcs: self.SS.append(md.compute_dssp(struc)) if 'flory' in self.calcs: self.fex.append(polymer.compute_flory(struc, self.nres)) if 'rama' in self.calcs: self.dihedrals.append(rama.compute_phipsi(struc)) if 'surface_contacts' in self.calcs: #self.resnames = [struc.atom_slice(struc.topology.select('name CA')).topology.atom(r).residue.name for r in range(self.nres)] # above was replaced by self.seq self.scmaps.append(sa_core.surface_contacts(struc, SASA)) return None
def test_dssp_allresidues(self): from numpy.testing import assert_array_equal def update_mdtraj_dssp(mdata): for idx, elm in enumerate(mdata): if elm == 'NA': mdata[idx] = 'C' return mdata trajlist = [] trajlist.append(pt.iterload('data/DPDP.nc', 'data/DPDP.parm7')) trajlist.append(pt.iterload('data/tz2.ortho.nc', 'data/tz2.ortho.parm7')) pt.io.download_PDB('1l2y', './output/', overwrite=True) trajlist.append(pt.iterload('output/1l2y.pdb')) for traj in trajlist: data = pt.dssp_allresidues(traj, simplified=True)[0] mtraj = md.load(traj.filename, top=traj.top.filename) mdata = md.compute_dssp(mtraj, simplified=True)[0] mdata = update_mdtraj_dssp(mdata) assert_array_equal(data, mdata)
def test_dssp_allresidues(self): from numpy.testing import assert_array_equal def update_mdtraj_dssp(mdata): for idx, elm in enumerate(mdata): if elm == 'NA': mdata[idx] = 'C' return mdata trajlist = [] trajlist.append(pt.iterload('data/DPDP.nc', 'data/DPDP.parm7')) trajlist.append( pt.iterload('data/tz2.ortho.nc', 'data/tz2.ortho.parm7')) pt.io.download_PDB('1l2y', './output/', overwrite=True) trajlist.append(pt.iterload('output/1l2y.pdb')) for traj in trajlist: data = pt.dssp_allresidues(traj, simplified=True)[0] mtraj = md.load(traj.filename, top=traj.top.filename) mdata = md.compute_dssp(mtraj, simplified=True)[0] mdata = update_mdtraj_dssp(mdata) assert_array_equal(data, mdata)
def write_simulation_files(self, ref_traj_aa, topfilename, seqfilename, ssbias=False): self.generate_topology() if hasattr(self.model, "ref_traj"): traj = self.model.ref_traj elif hasattr(self.model, "starting_traj"): traj = self.model.starting_traj else: raise AttributeError( "need to set intial conditions (ref_traj or starting_traj) to write" ) fasta = traj.top.to_fasta() with open("{}".format(seqfilename), "w") as fout: for line in fasta: fout.write("{}\n".format(line)) with open("charge_on_residues.dat", "w") as fout: fout.write("{:d}\n".format( len(self.model.mapping._charged_residues))) for res_idx, charge in self.model.mapping._charged_residues: fout.write("{:6d} {:8.4f}\n".format(res_idx, charge)) # compute secondary structure from a reference structure dssp = ("".join(md.compute_dssp(ref_traj_aa)[0])).replace("C", "-") assert len(dssp) == sum([ len(x) for x in fasta ]), "Number of residues in reference different than expected" with open("ssweight", "w") as fout: for ss in dssp: if ssbias: if ss == "H": helix = 1. sheet = 0. elif ss == "E": helix = 0. sheet = 1. else: helix = 0. sheet = 0. else: helix = 0. sheet = 0. fout.write("{:.1f} {:.1f}\n".format(helix, sheet)) with open("jpred", "w") as fout: start = 0 for i in range(len(fasta)): chain_length = len(fasta[i]) fout.write("{}\n".format(fasta[i])) fout.write("{}\n".format(dssp[start:start + chain_length])) start += chain_length with open("{}".format(topfilename), "w") as fout: fout.write(self.topfile)
def mktraj(targetid, ensembler_stage=None, traj_filepath=None, topol_filepath=None, models_data_filepath=None, process_only_these_templates=None): """Makes a trajectory for a given target, using mdtraj. The trajectory can be used with other software, e.g. for visualization with PyMOL or VMD. Parameters ---------- targetid : str e.g. 'EGFR_HUMAN_D0' ensembler_stage : str The Ensembler stage from which to build models, e.g. 'build_models' results in a trajectory built from the 'model.pdb.gz' files output by the build_models command. options: build_models|refine_implicit_md|refine_explicit_md default: most advanced stage for which model files are available traj_filepath : str default: models/[targetid]/traj-[ensembler_stage].xtc topol_filepath : str default: models/[targetid]/traj-[ensembler_stage]-topol.pdb models_data_filepath : default: models/[targetid]/traj-[ensembler_stage]-data.csv process_only_these_templates : list of str Returns ------- traj : mdtraj.Trajectory df : pandas.DataFrame models data (e.g. sequence identities): """ ensembler.core.check_project_toplevel_dir() models_target_dir = os.path.join(ensembler.core.default_project_dirnames.models, targetid) logger.debug('Working on target %s' % targetid) if ensembler_stage is None: for stagename in ['refine_explicit_md', 'refine_implicit_md', 'build_models']: if check_ensembler_modeling_stage_complete(stagename, targetid): ensembler_stage = stagename break if ensembler_stage is None: raise Exception('Models have not yet been built for this Ensembler project.') if traj_filepath is None: traj_filepath = os.path.join(models_target_dir, 'traj-{0}.xtc'.format(ensembler_stage)) if topol_filepath is None: topol_filepath = os.path.join(models_target_dir, 'traj-{0}-topol.pdb'.format(ensembler_stage)) if models_data_filepath is None: models_data_filepath = os.path.join(models_target_dir, 'traj-{0}-data.csv'.format(ensembler_stage)) if process_only_these_templates: templateids = process_only_these_templates else: dirs = os.walk(models_target_dir).next()[1] templateids = [dir for dir in dirs if '_D' in dir] model_filename = ensembler.core.model_filenames_by_ensembler_stage[ensembler_stage] valid_model_templateids = [templateid for templateid in templateids if os.path.exists(os.path.join(models_target_dir, templateid, model_filename))] valid_model_filepaths = [os.path.join(models_target_dir, templateid, model_filename) for templateid in valid_model_templateids] seqid_filepaths = [os.path.join(models_target_dir, templateid, 'sequence-identity.txt') for templateid in valid_model_templateids] seqids = [float(open(seqid_filepath).read().strip()) if os.path.exists(seqid_filepath) else None for seqid_filepath in seqid_filepaths] df = pd.DataFrame({ 'templateid': valid_model_templateids, 'model_filepath': valid_model_filepaths, 'seqid': seqids, }) df.sort(columns='seqid', inplace=True, ascending=False) df.reset_index(drop=True, inplace=True) df.to_csv(models_data_filepath, columns=['templateid', 'seqid']) # construct traj traj = mdtraj.load_pdb(df.model_filepath[0]) for model_filepath in df.model_filepath[1:]: traj += mdtraj.load_pdb(model_filepath) # superpose structured C-alphas dssp = mdtraj.compute_dssp(traj[0])[0] structured_resis_bool = (dssp == 'H') + (dssp == 'E') alpha_indices = traj.topology.select_atom_indices('alpha') structured_alpha_indices = np.array([alpha_indices[x] for x in range(traj.n_residues) if structured_resis_bool[x]]) traj.superpose(reference=traj, frame=0, atom_indices=structured_alpha_indices) # write traj, and write first frame as pdb file traj[0].save(topol_filepath) traj.save(traj_filepath) return traj, df
def test_2(get_fn, tmpdir): t = md.load(get_fn('2EQQ.pdb')) for i in range(len(t)): assert_(call_dssp(tmpdir, t[i]), md.compute_dssp(t[i], simplified=False)[0])
""" Created on Fri Jul 29 16:10:31 2016 @author: hliu """ import mdtraj as md import pandas as pd import numpy as np from researchcode.plotting.plot_set import * import glob import os import matplotlib as mpl from matplotlib.ticker import FuncFormatter struct_funct = {'ss': lambda x: md.compute_dssp(x), 'rg': lambda x: md.compute_rg(x), 'heli': lambda x: calSSPercent(x, 'H'), 'beta': lambda x: calSSPercent(x, 'E'), #'rmsd': lambda x: rmsds[x.name] } def addProperty2Traj(traj, props): for key in props: if not hasattr(traj, key): setattr(traj, key, props[key](traj)) else: continue
def test_7(): t = md.load(get_fn('2EQQ.pdb')) a = md.compute_dssp(t, simplified=True)
import matplotlib matplotlib.use("AGG") import matplotlib.pyplot as plt from matplotlib.lines import Line2D import pylab maxrep=4 trajpath='/storage/mi/pycon/TTApplications/ALA10TT2/Results/' for r in range(maxrep): print 'Using rep '+str(r) if os.path.exists(trajpath+'EigenfunctionFramesCenter'+str(r)+'.xtc')==True: print "file exists" Traj=md.load(trajpath+'EigenfunctionFramesCenter'+str(r)+'.xtc', top=trajpath+'md_production0_noWater.pdb') trajlen=len(Traj) Dssp=md.compute_dssp(Traj,simplified=False) # Plot as gromacs dssp fig1 = plt.figure(1) ax=fig1.add_subplot(111) skip=1 for t in range(trajlen/skip): for m in range(1,9): if Dssp[skip*t,m]=='H': colore='b' elif Dssp[skip*t,m]=='B': colore='k' elif Dssp[skip*t,m]=='E': colore='r' elif Dssp[skip*t,m]=='G': colore='gray' elif Dssp[skip*t,m]=='I':
def calc_ss(traj): return md.compute_dssp(traj)
def get_SS(SS, top=None): r""" Try to guess what type of input for secondary-structre computation the user wants, and compute it Parameters ---------- SS : secondary structure information Can be many things: * triple of ints (CP_idx, traj_idx, frame_idx) Nothing happens, the tuple is returned as is and handled externally by the :obj:`ContactGroup` that called this method. Tuple representing a ContactPair, trajectory See the docs there for more info * True same as [0,0,0] * None or False Do nothing * :obj:`mdtraj.Trajectory` Use this geometry to compute the SS * string Path to a filename, of which only the first frame will be read. The SS will be computed from there. The file will be tried to read first witouth topology information (e.g. .pdb, .gro, .h5) will work, and when this fails, self.top will be passed (e.g. .xtc, .dcd) * array_like Use the SS from here, s.t.ss_inf[idx] gives the SS-info for the residue with that idx top : :obj:`~mdtraj.Topology`, default is None Returns ------- from_tuple : bool Whether the infor should be gotten from a tuple or not ss_array : np.ndarray or None """ from_tuple = False ss_array = None if SS is None or (isinstance(SS, bool) and not SS): pass elif isinstance(SS, _md.Trajectory): ss_array = _md.compute_dssp(SS[0], simplified=True)[0] elif isinstance(SS, str): try: ss_array = _md.compute_dssp(_md.load(SS, frame=0), simplified=True)[0] except ValueError as e: ss_array = _md.compute_dssp(_md.load(SS, top=top, frame=0), simplified=True)[0] elif SS is True: from_tuple = (0, 0, 0) elif len(SS) == 3: from_tuple = SS else: ss_array = SS return from_tuple, ss_array
chain = entry['chain'] # get chains - FML # IGNORE FOR NOW - USING SHIFT mmp = app.PDBFile(pdb) if chain == '_': chain = list(mmp.topology.residues())[0].chain.id[0] chain_names = [] for c in mmp.topology.chains(): chain_names.append(c.id[0]) chain_idx = chain_names.index(chain) p = md.load_pdb(pdb) dssp = md.compute_dssp(p) residues = list(p.topology.residues) for i in range(dssp.shape[0]): for j in range(dssp.shape[1]): fid = i rid = residues[j].resSeq if residues[j].chain.index == chain_idx: rinfo.write( f'{entry["pdb_file"].split("/")[-1]} {chain} {residues[j].name} {index} {fid} {rid} {dssp[i, j]}\n' ) key = residues[j].name + '-' + dssp[i, j] if key not in combos: combos[key] = 0 combos[key] += 1 pbar.set_description(f'Processed PDB {pdb} Total Records: {index}') rinfo.flush()
def test_2(): t = md.load(get_fn('2EQQ.pdb')) for i in range(len(t)): yield lambda: assert_(call_dssp(t[i]), md.compute_dssp(t[i], simplified=False)[0])
plt.clf() rmsd = md.rmsd(longest_traj,longest_traj,frame=0) plt.plot(time, rmsd) plt.xlabel('time (ns)') plt.ylabel('RMSD(nm)') plt.title('RMSD'); plt.tight_layout() plt.savefig('rmsd.png') ### SECONDARY STRUCTURE PLOT plt.clf() dssp = md.compute_dssp(longest_traj) dssp_counts = [] for d in dssp: unique, counts = np.unique(d, return_counts=True) dssp_counts.append(dict(zip(unique, counts))) total_vals = sum(dssp_counts[0].values()) C_values = [] for d in dssp_counts: C_values.append(d['C']/float(total_vals)) E_values = [] for d in dssp_counts: E_values.append(d['E']/float(total_vals)) H_values = [] for d in dssp_counts: H_values.append(d['H']/float(total_vals))
def test_6(): t = md.load(get_fn('alanine-dipeptide-explicit.pdb')) a = md.compute_dssp(t, simplified=True) protein_residues = np.array([set(a.name for a in r.atoms).issuperset(('C', 'N', 'O', 'CA')) for r in t.topology.residues]) assert np.unique(a[:, protein_residues]) == "C" assert np.unique(a[:, np.logical_not(protein_residues)]) == 'NA'
def results(self): self.framedata = md.compute_dssp(self.trj, self.simplify)[:,self._selection] resid_list = [res.resSeq for res in self.trj.topology.residues if res.is_protein] return pd.DataFrame(self.framedata, columns=resid_list)
def calculate_ss(pdbfilename): prot = md.load(pdbfilename) ss = md.compute_dssp(prot) secseq = ''.join((ele for ele in ss[0])) return secseq,np.where((ss[0]=='H'))
def calSSPercent(traj, ss_type): if not hasattr(traj, 'ss'): traj.ss = md.compute_dssp(traj) percent = np.where(traj.ss==ss_type, 1, 0).mean(axis=0)*100 return percent
def test_1(get_fn, tmpdir): for fn in ['1bpi.pdb', '1vii.pdb', '4ZUO.pdb', '1am7_protein.pdb']: t = md.load_pdb(get_fn(fn)) t = t.atom_slice(t.top.select_atom_indices('minimal')) assert_(call_dssp(tmpdir, t), md.compute_dssp(t, simplified=False)[0])