def test_bonds(self): u = Universe(self.filename, guess_bonds=True) # need to force topology to load before querying individual atom bonds u.build_topology() bonds0 = u.select_atoms("segid B and (altloc A)")[0].bonds bonds1 = u.select_atoms("segid B and (altloc B)")[0].bonds assert_equal(len(bonds0), len(bonds1))
def calc_rama(grof, xtcf, btime, etime): u = Universe(grof, xtcf) resname_query = 'resname GLY or resname VAL or resname PRO' atoms = u.selectAtoms(resname_query) resname = atoms.resnames()[0] # [0] because .resnames() returns a list of one element resid = atoms.resids()[0] # [0] because .resnames() returns a list of one element phi_query = ('(resname ACE and name C) or ' '(resname GLY or resname VAL or resname PRO and ' '(name N or name CA or name C))') psi_query = ('(resname GLY or resname VAL or resname PRO and (name N or name CA or name C or name NT)) or ' '(resname NH2 and name N)') # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm phi = u.selectAtoms(phi_query) psi = u.selectAtoms(psi_query) for _ in phi.atoms: print _ for _ in psi.atoms: print _ for ts in u.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break yield '{0:.3f} {1:.3f} {2}-{3}\n'.format( phi.dihedral(), psi.dihedral(), resname, resid) U.print_progress(ts)
def main(): u = Universe( '/Volumes/HD-siida/gtail_b1_sys/analysis/merged_aligned_complexes.pdb') #u = Universe('complex_models.pdb') print(u.atoms.segids) ca_integrinAB = u.select_atoms('segid A B and name CA') ca_lamininE8 = u.select_atoms('segid C D E and name CA') lower, upper = 6.0, 10.0 with open('model_no.out', 'w') as fout: fout.write( f'#MODEL NO, nViolations (if r<{lower}), nContacts ({lower}<=r<={upper}) \n' ) for i, frame in enumerate(tqdm(u.trajectory), 1): # Note that i starts with 1. distances = distance.cdist(ca_integrinAB.positions, ca_lamininE8.positions, metric='euclidean') #nViolations = len(distances[distances<=cutoff]) nViolations = len(distances[distances < lower]) nContacts = len(distances[(distances <= upper) & (distances >= lower)]) # score = nContacts -nViolations if nViolations != 0: score = -0.59 * np.log(nContacts / nViolations) else: score = np.nan fout.write(f'{i}, {nViolations}, {nContacts}, {score}\n')
def test_write_selection(self): ref = Universe(mol2_molecule) gr0 = ref.select_atoms("name C*") gr0.write(self.outfile) u = Universe(self.outfile) gr1 = u.select_atoms("name C*") assert_equal(len(gr0), len(gr1))
def main(): """Entry to validate_pbc.py""" if len(sys.argv) < 3: print "Please provide at least a reference file and a trajectory" \ " for validation" sys.exit(1) argv = sys.argv[1:] ref_name = argv.pop(0) for xtc_name in argv: print "checking file " + xtc_name judger = True ref = Universe(ref_name) xtc = Universe(ref_name, xtc_name) total = 0.0 count = 0 for frame in xtc.trajectory: rmsd = alignto(xtc, ref, select='protein and name CA') if rmsd[1] > THRESHOLD: print "At " + str(frame.frame) + " violates the criterion." print "trajectory file " + xtc_name + " is invalid." judger = False break total += rmsd[1] count += 1 if judger: print "pass" + " - " + "average rmsd: " + str(total / count)
def lindemann_per_frames(u: Universe, select_lang): """Calculate the lindemann index for each atom AND FRAME Warning this can produce extremly large ndarrays in memory depending on the size of the cluster and the ammount of frames. Parameters ---------- u : MDA trajectory instance. select_lang : select language. Returns ------- a ndarray of shape (len_frames, natoms, natoms) """ # natoms = natoms sele_ori = u.select_atoms(select_lang) natoms = len(sele_ori) nframes = len(u.trajectory) len_frames = len(u.trajectory) array_mean = np.zeros((natoms, natoms)) array_var = np.zeros((natoms, natoms)) # array_distance = np.zeros((natoms, natoms)) iframe = 1 lindex_array = np.zeros((len_frames, natoms, natoms)) cluster = u.select_atoms(select_lang, updating=True) for q, ts in enumerate(u.trajectory): # print(ts) coords = cluster.positions n, p = coords.shape array_distance = distance.cdist(coords, coords) ################################################################################# # update mean and var arrays based on Welford algorithm suggested by Donald Knuth ################################################################################# for i in range(natoms): for j in range(i + 1, natoms): xn = array_distance[i, j] mean = array_mean[i, j] var = array_var[i, j] delta = xn - mean # update mean array_mean[i, j] = mean + delta / iframe # update variance array_var[i, j] = var + delta * (xn - array_mean[i, j]) iframe += 1 if iframe > nframes + 1: break for i in range(natoms): for j in range(i + 1, natoms): array_mean[j, i] = array_mean[i, j] array_var[j, i] = array_var[i, j] lindemann_indices = np.divide(np.sqrt(np.divide(array_var, nframes)), array_mean) # lindemann_indices = np.nanmean(np.sqrt(array_var/nframes)/array_mean, axis=1) lindex_array[q] = lindemann_indices return np.array([np.nanmean(i, axis=1) for i in lindex_array])
def from_mol2(f): path = str(datapath / f) u = Universe(path) elements = [guess_atom_element(n) for n in u.atoms.names] u.add_TopologyAttr("elements", np.array(elements, dtype=object)) u.atoms.types = np.array([x.upper() for x in u.atoms.types], dtype=object) return Molecule.from_mda(u, force=True)
def main(): # get options options = parse_options() psf = options.psf_file dcd = options.dcd_file chain1 = options.segid1 chain2 = options.segid2 selection1 = options.selection1 selection2 = options.selection2 co = options.cutoff output = options.output_file visu = options.pymol pdbvisu = options.pymol_pdb # use MDAnalysis to read trajectory u = Universe(psf, dcd) # get contact probability cp = GetContacts(u) contactprob, bio1, bio2 = cp.run(chain1, chain2, selection1, selection2, co) np.savetxt(output, contactprob, fmt='%4.2f', delimiter=" ") # generate pymol scripts if needed if visu == 'Y': # if no pdb file is supplied, write one from trajectory, first frame if pdbvisu == None: seleforpymol = u.select_atoms("segid %s or segid %s" % (chain1, chain2)) seleforpymol.write('forpymol.pdb', remarks=None) pdbvisu = 'forpymol.pdb' # check pdb file format for weird encoding check_pdb(pdbvisu) pymol_contact_visu(contactprob, pdbvisu, chain1, chain2, bio1, bio2)
def ave_stru(reffile, trajfile, startframe=0, endframe=-1): ''' Get the average structure from a trajectory ''' traj = Universe(reffile, trajfile) ref = Universe(reffile) ave = ref.atoms cycle, count = 20, 0 rmsd, rmsd_ = 0.0, 0.0 if endframe == -1 or endframe > traj.trajectory.numframes: endframe = traj.trajectory.numframes rmsds = np.empty(((endframe - startframe) / freq + 1, )) coords = np.empty( ((endframe - startframe) / freq + 1, traj.atoms.numberOfAtoms(), 3)) while count < cycle: for ts in traj.trajectory[startframe:endframe:freq]: rmsds[(ts.frame - startframe) / freq] = analysis.align.alignto( traj, ave, select='name CA')[1] coords[(ts.frame - startframe) / freq] = traj.atoms.positions rmsd = np.mean(rmsds) print rmsd ave.set_positions(np.mean(coords, axis=0)) if np.abs(rmsd - rmsd_) < 0.00001: break else: rmsd_ = rmsd return (rmsds, ave)
def count_interactions(grof, xtcf, btime, etime, cutoff): cutoff = cutoff * 10 # * 10: convert from nm to angstrom to work with MDAnalysis u = Universe(grof, xtcf) query = ('(resname PRO and (name CB or name CG or name CD)) or' '(resname VAL and (name CG1 or name CG2)) or' '(resname GLY and name CA) or' '(resname ALA and name CB)') # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm atoms = u.selectAtoms(query) for ts in u.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break numcount = 0 for i, ai in enumerate(atoms): for j, aj in enumerate(atoms): # to avoid counting the same pair twices, # the 2 resid cannot be neigbors if i < j and abs(ai.resid - aj.resid) >= 2: d = np.linalg.norm(ai.pos - aj.pos) if d <= cutoff: numcount += 1 yield '{0:10.0f}{1:8d}\n'.format(ts.time, numcount) utils.print_progress(ts)
def gen_hbond_map(xpm, ndx, grof): xpm = objs.XPM(xpm) hbndx = objs.HBNdx(ndx) univ = Universe(grof) pro_atoms = univ.selectAtoms('protein and not resname ACE and not resname NH2') hbonds_by_resid = hbndx.map_id2resid(pro_atoms) # pl: peptide length pl = pro_atoms.residues.numberOfResidues() hblist = [] for i, j in zip(hbonds_by_resid, xpm.color_count): # j[1] is the probability of hbonds, while j[0] = 1 - j[1] # format: [resid of donor, resid of acceptor] # -1 is because resid in MDAnalysis starts from 1, minus so as to fit # -into hb_map initialized by hb_map hblist.append([i[0]-1, i[1]-1, j[1]]) # +1: for missing resname ACE, such that it's easier to proceed in the next # step pl1 = pl + 1 hb_map = np.zeros((pl1, pl1)) for _ in hblist: hb_map[_[0]][_[1]] = _[2] return hb_map
def classifyResiduesIntoTwo(apo_pdb, holo_pdb, ligname, cutoff=4.0): S_aromatic_resname = set(['PHE', 'TRP', 'TYR', 'HIS']) u_holo, u_apo = Universe(holo_pdb), Universe(apo_pdb) ligand = u_holo.select_atoms(f'resname {ligname}') holo = u_holo.select_atoms(f'not resname {ligname}') apo = u_apo.select_atoms(f'protein') resids = [] S_cryptic, S_not_cryptic = [], [] # -- calculate distances from atoms of a ligand to those of residues in an apo state # -- the aim is to detect residues in a cryptic site. # -- if the distance is less than a threshold (i.e., CRASHED!), then the aromatic residue is considered as cryptic one. for iatom in ligand: for jatom in apo: distance = np.linalg.norm(iatom.position - jatom.position) if distance <= cutoff and jatom.resname in S_aromatic_resname: #print(f'{iatom.name}-{iatom.resname}, {jatom.name}-{jatom.resname}{jatom.resid}, {distance}') resids.append(jatom.resid) S_cryptic.append(f'{jatom.resname}{jatom.resid}') S_cryptic = set(S_cryptic) # -- a set of aromatic residue's names are generated here. note that this is specialised for aromatic residues S_all_aroma = set([ f'{residue.resname}{residue.resid}' for residue in holo.residues if residue.resname in S_aromatic_resname ]) S_not_cryptic = S_all_aroma - S_cryptic return set(S_cryptic), set(S_not_cryptic)
def sequence_spacing(pf, grof, xtcf, peptide_length, atom_sel, output=None): u = Universe(grof, xtcf) # this selection part should be better customized # here, only have been backbone atoms are used, u.selectAtoms doesn't # include Hydrogen atoms # REMMEMBER: OPTIONS verification should be done in main ONLY! residues = [ u.selectAtoms(atom_sel.format(i)) for i in range(2, peptide_length) ] ijdist_dict = {} for ts in u.trajectory: for i, resi in enumerate(residues): for j, resj in enumerate(residues): if i < j: resi_pos = resi.centerOfGeometry() # residue i position resj_pos = resj.centerOfGeometry() # residue j position ijdist = np.linalg.norm(resi_pos - resj_pos) dij = j - i # distance between i and j if dij not in ijdist_dict.keys(): ijdist_dict[dij] = [dij] else: ijdist_dict[dij].append(ijdist) if ts.step % 2000000 == 0: # 2000ps print "time step: {0:d}".format(ts.step) return ijdist_dict
def calc_tilt_end_to_end(universe: mda.Universe, resid_up, resid_down, fname="TMD_tilt.dat"): ''' Calculate tilt related to angle between zaxis and resid_down --> resid_up Takes COM of resids ''' fstr2 = '{: <15}{: <20}' fstr = '{: <15}{: <20.5f}' with open(fname, "w") as outf: print(fstr2.format("time", "tilt"), file=outf) for t in range(universe.trajectory.n_frames): time = universe.trajectory[t].time LOGGER.info("At %s", time) zaxis = np.array([0, 0, 1]) sel_u = universe.select_atoms("resid {}".format(resid_up)) sel_d = universe.select_atoms("resid {}".format(resid_down)) pos_u = sel_u.center_of_mass() pos_d = sel_d.center_of_mass() costilt = np.dot( (pos_d - pos_u), zaxis) / np.linalg.norm(pos_d - pos_u) angle = np.arccos(costilt) * (180 / np.pi) if angle > 90: angle -= 180 print(fstr.format(time, abs(angle)), file=outf)
def _activate(self): """Make the universe and attach it. """ if not self.topology: self._treant._universe = None return uh = Universehound(self) paths = uh.fetch() topology = paths['top'] trajectory = paths['traj'] if not trajectory: self._treant._universe = Universe(topology, **self.kwargs) else: self._treant._universe = Universe(topology, *trajectory, **self.kwargs) self._apply_resnums() # update the universe definition; will automatically build current # path variants for each file # if read-only, move on try: self._set_topology(topology) self._set_trajectory(trajectory) except OSError: warnings.warn( "Cannot update paths for universe; " " state file is read-only.")
def test_write(self): ref = Universe(mol2_molecules) ref.atoms.write(self.outfile) u = Universe(self.outfile) assert_equal(len(u.atoms), len(ref.atoms)) assert_equal(len(u.trajectory), 1) assert_array_equal(u.atoms.positions, ref.atoms.positions)
def main(): arg_parser = argparse.ArgumentParser( description='通过给定残基名称,残基内原子数目,原子在残基内的索引(从0开始),计算原子的坐标。') arg_parser.add_argument('resname', action='store', help='残基名称') arg_parser.add_argument('atoms_num', type=int, action='store', help='残基内原子数目') arg_parser.add_argument('index', type=int, action='store', help='原子的索引,索引从0开始') arg_parser.add_argument('topology_file', action='store', help='拓扑文件,例如gro, pdb') args = arg_parser.parse_args() resname, atoms_num, index = args.resname, args.atoms_num, args.index universe = Universe(args.topology_file) atom_groups = universe.selectAtoms("resname " + resname) if len(atom_groups) % atoms_num != 0: print("拓扑文件内对应残基原子总数不是所给原子数目的整数倍,请给予正确的原子数目。") exit(1) positions = [] for i in range(0, len(atom_groups), atoms_num): positions.append(atom_groups[i:i + atoms_num][index].position) print("The positions of atoms %s is:" % (index)) for i in positions: print(i)
def cluster_coordinates( # TODO: rewrite the method nvt_run: Universe, select_dict: Dict[str, str], run_start: int, run_end: int, species: List[str], distance: float, basis_vectors: Optional[Union[List[np.ndarray], np.ndarray]] = None, cluster_center: str = "center", ) -> np.ndarray: """Calculates the average position of a cluster. Args: nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory. select_dict: A dictionary of atom species selection, where each atom species name is a key and the corresponding values are the selection language. run_start: Start frame of analysis. run_end: End frame of analysis. species: A list of species in the cluster. distance: The coordination cutoff distance. basis_vectors: The basis vector for normalizing the coordinates of the cluster atoms. cluster_center: Cluster center atom species. Returns: An array of coordinates of the cluster atoms. """ trj_analysis = nvt_run.trajectory[run_start:run_end:] cluster_center_atom = nvt_run.select_atoms(select_dict.get(cluster_center), periodic=True)[0] selection = ("(" + " or ".join(s for s in species) + ") and (around " + str(distance) + " index " + str(cluster_center_atom.index) + ")") shell = nvt_run.select_atoms(selection, periodic=True) cluster = [] for atom in shell: coord_list = [] for ts in trj_analysis: coord_list.append(atom.position) cluster.append(np.mean(np.array(coord_list), axis=0)) cluster_array = np.array(cluster) if basis_vectors: if len(basis_vectors) == 2: vec1 = basis_vectors[0] vec2 = basis_vectors[1] vec3 = np.cross(vec1, vec2) vec2 = np.cross(vec1, vec3) elif len(basis_vectors) == 3: vec1 = basis_vectors[0] vec2 = basis_vectors[1] vec3 = basis_vectors[2] else: raise ValueError("incorrect vector format") vec1 = vec1 / np.linalg.norm(vec1) vec2 = vec2 / np.linalg.norm(vec2) vec3 = vec3 / np.linalg.norm(vec3) basis_xyz = np.transpose([vec1, vec2, vec3]) cluster_norm = np.linalg.solve(basis_xyz, cluster_array.T).T cluster_norm = cluster_norm - np.mean(cluster_norm, axis=0) return cluster_norm return cluster_array
def count_interactions(grof, xtcf, btime, cutoff, debug): u = Universe(grof, xtcf) un_query = ('(resname PRO and (name CB or name CG or name CD)) or' '(resname VAL and (name CG1 or name CG2)) or' '(resname GLY and name CA) or' '(resname ALA and name CB)') vp_query = ('name OW') # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm un_atoms = u.selectAtoms(un_query) for ts in u.trajectory: if ts.time >= btime: numcount = 0 tropo_vp_atoms = u.selectAtoms( '({0}) and around 8 ({1})'.format(vp_query, un_query)) # different from when calculating unun, there is no overlap atom # between un_atoms & tropo_vp_atoms for ai in un_atoms: for aj in tropo_vp_atoms: d = np.linalg.norm(ai.pos - aj.pos) if d <= cutoff: numcount += 1 yield '{0:10.0f}{1:8d}\n'.format(ts.time, numcount) # per 100 frames, num of frames changes with the size of xtc file, for debugging if debug and ts.frame % 2 == 0: print "time: {0:10.0f}; step: {1:10d}; frame: {2:10d}".format(ts.time, ts.step, ts.frame)
def res_dict_from_select_dict(u: Universe, select_dict: Dict[str, str]) -> Dict[str, str]: """ Infer res_dict (residue selection) from select_dict (atom selection) in a MDAnalysis.universe object. Args: u: The universe object to assign resnames to. select_dict: A dictionary of atom species, where each atom species name is a key and the corresponding values are the selection language. return: A dictionary of resnames. """ saved_select = [] res_dict = {} for key, val in select_dict.items(): res_select = "same resid as (" + val + ")" res_group = u.select_atoms(res_select) if key in ["cation", "anion"] or res_group not in saved_select: saved_select.append(res_group) res_dict[key] = res_select if ("cation" in res_dict and "anion" in res_dict and u.select_atoms( res_dict.get("cation")) == u.select_atoms(res_dict.get("anion"))): res_dict.pop("anion") res_dict["salt"] = res_dict.pop("cation") return res_dict
def gen_hbond_map(xpm, ndx, grof): xpm = objs.XPM(xpm) hbndx = objs.HBNdx(ndx) univ = Universe(grof) pro_atoms = univ.selectAtoms( 'protein and not resname ACE and not resname NH2') hbonds_by_resid = hbndx.map_id2resid(pro_atoms) # pl: peptide length pl = pro_atoms.residues.numberOfResidues() hblist = [] for i, j in zip(hbonds_by_resid, xpm.color_count): # j[1] is the probability of hbonds, while j[0] = 1 - j[1] # format: [resid of donor, resid of acceptor] # -1 is because resid in MDAnalysis starts from 1, minus so as to fit # -into hb_map initialized by hb_map hblist.append([i[0] - 1, i[1] - 1, j[1]]) # +1: for missing resname ACE, such that it's easier to proceed in the next # step pl1 = pl + 1 hb_map = np.zeros((pl1, pl1)) for _ in hblist: hb_map[_[0]][_[1]] = _[2] return hb_map
def move_and_add_box(self, initial: str, final: str, move: bool = True, pbc: Tuple = (1, 1, 1)): """ Moves molecules a random vector, applies pbcs and writes box dimensions. Parameters ---------- initial : str Path with the pdb to modify. final : str Path where the modified pdb will be written. move : bool, optional If True all the molecules in the system will be displaced a random vector and then pbcs will be applied to bring back the atoms to the box. Each of the random vector components is selected with a homogeneous distribution from 0 to the corresponding box side. Setting to False this parameter is useful to write the box dimensions in the initial file. """ universe = Universe(initial) universe.dimensions = [*self.box_side, 90, 90, 90] if move: maximum_displ = self.box_side * self.input_info['pbc'] universe.atoms.positions += maximum_displ * np.random.random(3) universe.atoms.pack_into_box() # Rotation in caso of walls to avoid acumulation in one side. # universe.atoms.positions = self.rotate_box(universe.atoms.positions, np.array([1, 0, 0]), np.pi) universe.atoms.write(final)
def count_interactions(grof, xtcf, btime, cutoff, debug): u = Universe(grof, xtcf) un_query = ('(resname PRO and (name CB or name CG or name CD)) or' '(resname VAL and (name CG1 or name CG2)) or' '(resname GLY and name CA) or' '(resname ALA and name CB)') vp_query = ('name OW') # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm un_atoms = u.selectAtoms(un_query) for ts in u.trajectory: if ts.time >= btime: numcount = 0 tropo_vp_atoms = u.selectAtoms('({0}) and around 8 ({1})'.format( vp_query, un_query)) # different from when calculating unun, there is no overlap atom # between un_atoms & tropo_vp_atoms for ai in un_atoms: for aj in tropo_vp_atoms: d = np.linalg.norm(ai.pos - aj.pos) if d <= cutoff: numcount += 1 yield '{0:10.0f}{1:8d}\n'.format(ts.time, numcount) # per 100 frames, num of frames changes with the size of xtc file, for debugging if debug and ts.frame % 2 == 0: print "time: {0:10.0f}; step: {1:10d}; frame: {2:10d}".format( ts.time, ts.step, ts.frame)
def main(): arg_parser = argparse.ArgumentParser(description='通过给定残基名称,残基内原子数目,两个原子在残基内的索引(从0开始),计算所有残基内这两个原子之间的直线距离。') arg_parser.add_argument('resname', action='store', help='残基名称') arg_parser.add_argument('atoms_num', type=int, action='store', help='残基内原子数目') arg_parser.add_argument('index1', type=int, action='store', help='第一个原子的索引,索引从0开始') arg_parser.add_argument('index2', type=int, action='store', help='第二个原子的索引,索引从0开始') arg_parser.add_argument('topology_file', action='store', help='拓扑文件,例如gro, pdb') args = arg_parser.parse_args() resname, atoms_num, index1, index2 = args.resname, args.atoms_num, args.index1, args.index2 universe = Universe(args.topology_file) atom_groups = universe.selectAtoms("resname " + resname) if len(atom_groups) % atoms_num != 0: print("拓扑文件内对应残基原子总数不是所给原子数目的整数倍,请给予正确的原子数目。") exit(1) atoms1 = [] atoms2 = [] for i in range(0, len(atom_groups), atoms_num): atoms1.append(atom_groups[i:i + atoms_num][index1]) atoms2.append(atom_groups[i:i + atoms_num][index2]) dists = dist(AtomGroup(atoms1), AtomGroup(atoms2)) print("The distance between atoms %s and %s is:" % (index1, index2)) for i in dists[2]: print(i) print("The average distance between atoms %s and %s is:" % (index1, index2)) print(np.average(dists[2]))
def count_interactions(A): logger.debug('loading {0}'.format(A.grof)) univ = Universe(A.grof) logger.debug('loaded {0}'.format(A.grof)) pro_atoms = univ.selectAtoms('protein and not resname ACE and not resname NH2') pl = pro_atoms.residues.numberOfResidues() # +1: for missing resname ACE, such that it's easier to proceed in the next # step logger.debug('loading {0}, {1}'.format(A.grof, A.xtcf)) u = Universe(A.grof, A.xtcf) logger.debug('loaded {0}, {1}'.format(A.grof, A.xtcf)) # Just for reference to the content of query when then code was first # written and used # query = ('(resname PRO and (name CB or name CG or name CD)) or' # '(resname VAL and (name CG1 or name CG2)) or' # '(resname GLY and name CA) or' # '(resname ALA and name CB)') query = A.query atoms = u.selectAtoms(query) logger.info('Number of atoms selected: {0}'.format(atoms.numberOfAtoms())) # MDAnalysis will convert the unit of length to angstrom, though in Gromacs # the unit is nm cutoff = A.cutoff * 10 nres_away = A.nres_away btime = A.btime etime = A.etime nframe = 0 unun_map = None for ts in u.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break nframe += 1 map_ = np.zeros((pl+1, pl+1)) # map for a single frame for i, ai in enumerate(atoms): ai_resid = ai.resid for j, aj in enumerate(atoms): aj_resid = aj.resid # to avoid counting the same pair twices, # the 2 resid cannot be neigbors if i < j and aj_resid - ai_resid >= nres_away: d = np.linalg.norm(ai.pos - aj.pos) if d <= cutoff: # -1: resid in MDAnalysis starts from 1 map_[ai_resid-1][aj_resid-1] += 1 if unun_map is None: unun_map = map_ else: unun_map = unun_map + map_ utils.print_progress(ts) sys.stdout.write("\n") return unun_map / float(nframe)
def __init__(self, top_file, trj_files, selection='backbone', **kwargs): # Used to store the result self.result = [] self._u = Universe(top_file, trj_files) self._ag = self._u.select_atoms(selection) super(IntramolecularDistance, self).__init__(self._ag.universe.trajectory, **kwargs)
def save_systems(flex: mda.Universe, protein: mda.Universe, crystal: mda.Universe, dir: str): def sel(resnum, resname, segid, icode) -> str: s = f"(resid {resnum}{icode} and resname {resname} and segid {segid})" return s flexres = flex.select_atoms("protein").residues max_rmsd = -1 residues = [] for res in flexres: ressel = (sel(res.resnum, res.resname, res.segid, res.icode) + " and not (type H or name H*)") # Select single residue p_res = protein.select_atoms(ressel) c_res = crystal.select_atoms(ressel) assert p_res.n_atoms == c_res.n_atoms pfname = os.path.join( dir, f"pflex-{res.resname}-{res.segid}{res.resnum}{res.icode}.pdb") cfname = os.path.join( dir, f"cflex-{res.resname}-{res.segid}{res.resnum}{res.icode}.pdb") # Write out PDB files p_res.write(pfname) c_res.write(cfname) residues.append((res.resnum, res.resname, res.segid, res.icode)) # Check that all flexible residues are listed assert len(residues) == len(flexres) # TODO: Can be improved by using ressel selection = "".join([ sel(id, name, chain, icode) + " or " for id, name, chain, icode in residues ]) selection = selection[:-4] # Remove final " or " # Remove H atoms # TODO: Possibly need perception for atom name, when type is not present selection = f"({selection}) and not (type H or name H*)" p_atoms = protein.select_atoms(selection) c_atoms = crystal.select_atoms(selection) # Check that the number of atoms in the two selections is equal assert len(p_atoms) == len(c_atoms) pfname = os.path.join(dir, "pflex.pdb") cfname = os.path.join(dir, "cflex.pdb") p_atoms.write(pfname) c_atoms.write(cfname)
def main(): args = parse_args() u = Universe(args.input) gr = u.selectAtoms(args.selection) print(gr) if args.center: center(gr) gr.write(args.output)
def assign_name(u: Universe, names: np.ndarray): """ Assign resnames to residues in a MDAnalysis.universe object. The function will not overwrite existing names. Args: u: The universe object to assign resnames to. names: The element name array. """ u.add_TopologyAttr("name", values=names)
def main(struct): u = Universe(struct) phi = u.selectAtoms(PHI_SEL) psi = u.selectAtoms(PSI_SEL) print u.filename print 'phi: {0:8.2f}'.format(phi.dihedral()) print 'psi: {0:8.2f}'.format(psi.dihedral()) print
def test_atomgroups(self): u = Universe(self.filename) segidB0 = len(u.select_atoms("segid B and (not altloc B)")) segidB1 = len(u.select_atoms("segid B and (not altloc A)")) assert_equal(segidB0, segidB1) altlocB0 = len(u.select_atoms("segid B and (altloc A)")) altlocB1 = len(u.select_atoms("segid B and (altloc B)")) assert_equal(altlocB0, altlocB1) sum = len(u.select_atoms("segid B")) assert_equal(sum, segidB0 + altlocB0)
def num_of_neighbor_simple( nvt_run: Universe, center_atom: Atom, distance_dict: Dict[str, float], select_dict: Dict[str, str], run_start: int, run_end: int, ) -> Dict[str, np.ndarray]: """Calculates solvation structure type (1 for SSIP, 2 for CIP and 3 for AGG) with respect to the ``enter_atom`` in the specified frame range. Args: nvt_run: An MDAnalysis ``Universe`` containing wrapped trajectory. center_atom: The solvation shell center atom. distance_dict: A dict of coordination cutoff distance of the neighbor species. select_dict: A dictionary of atom species selection, where each atom species name is a key and the corresponding values are the selection language. run_start: Start frame of analysis. run_end: End frame of analysis. Returns: A dict with "total" as the key and an array of the solvation structure type in the specified frame range as the value. """ time_count = 0 trj_analysis = nvt_run.trajectory[run_start:run_end:] center_selection = "same type as index " + str(center_atom.index) assert len( distance_dict ) == 1, "Please only specify the counter-ion species in the distance_dict" species = list(distance_dict.keys())[0] cn_values = np.zeros(int(len(trj_analysis))) for ts in trj_analysis: selection = select_shell(select_dict, distance_dict, center_atom, species) shell = nvt_run.select_atoms(selection, periodic=True) shell_len = len(shell) if shell_len == 0: cn_values[time_count] = 1 elif shell_len == 1: selection_species = select_shell(center_selection, distance_dict, shell.atoms[0], species) shell_species = nvt_run.select_atoms(selection_species, periodic=True) shell_species_len = len(shell_species) - 1 if shell_species_len == 0: cn_values[time_count] = 2 else: cn_values[time_count] = 3 else: cn_values[time_count] = 3 time_count += 1 cn_values = {"total": cn_values} return cn_values
def split_molecules( u: mda.Universe, keep_ions: bool = False ) -> Dict[str, Union[mda.AtomGroup, List[mda.AtomGroup]]]: """ Split different molecules (protein, water, ligands, ...) within a structure in separate files. Args: u (mda.Universe): MDAnalysis universe keep_ions (bool, optional): Flag to keep/ignore ions Returns: A dictionaty with the name of the selection and the corresponding ``mda.AtomGroup`` (or a list of ``mda.AtomGroup`` is there are multiple molecules with the same name). """ split = {} # Select protein protein = u.select_atoms("protein") if len(protein.atoms) != 0: # Check if protein is present split["protein"] = protein # Select water molecules for water_name in ["WAT", "HOH"]: water = u.select_atoms(f"resname {water_name}") if len(water.atoms) != 0: break # If selection is not empty, stop if len(water.atoms) != 0: # Check if water is present split["water"] = water # Other molecules other = u.select_atoms("all") - protein - water for res in other.residues: # Loop over all "other" residues name = res.resname if re.search("[A-Z]?[+-]", name) is not None and not keep_ions: break # Skip ion if keep_ions=True try: old = split[name] if type(old) is list: split[name].append(res) else: split[name] = [old, res] except KeyError: split[name] = res return split
def _list_types(coordinates_file): # Check the extension _check_input_file(coordinates_file, extensions=[".gro"]) # Load the system system = Universe(coordinates_file) # List the residue names resnames = system.select_atoms("all").resnames return np.unique(resnames)
def generate_universe(topology, trajectory=None): print('Generating Universe...') if trajectory is None or trajectory == '': u = Universe(topology) else: u = Universe(topology, trajectory) x, y, z = u.dimensions[:3] print(f'Universe with dimensions x: {x}, y: {y}, z: {z} loaded!') n_waters = u.select_atoms('resname WAT').n_residues print(f'{n_waters} water molecules detected!') return u
def test_write_in_loop(self): ref = Universe(mol2_molecules) with mda.Writer(self.outfile) as W: for ts in ref.trajectory: W.write(ref.atoms) u = Universe(self.outfile) assert_equal(len(u.atoms), len(ref.atoms)) assert_equal(len(u.trajectory), len(ref.trajectory)) assert_array_equal(u.atoms.positions, ref.atoms.positions) u.trajectory[199] ref.trajectory[199] assert_array_equal(u.atoms.positions, ref.atoms.positions)
def calc_rg(grof, xtcf, btime, debug): u = Universe(grof, xtcf) query = 'name CA' # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm atoms = u.selectAtoms(query) natoms = atoms.numberOfAtoms() for ts in u.trajectory: if ts.time >= btime: com = atoms.centerOfMass() # center of mass _sum = sum((sum(i**2 for i in (a.pos - com)) for a in atoms)) rg = np.sqrt(_sum / natoms) yield '{0:10.0f}{1:15.6f}\n'.format(ts.time, rg) # per 100 frames, num of frames changes with the size of xtc file, for debugging if debug and ts.frame % 2 == 0: print "time: {0:10.0f}; step: {1:10d}; frame: {2:10d}".format(ts.time, ts.step, ts.frame)
def __setstate__(self, dict): self.__dict__.update(dict) #reconstruct the universe self.u = Universe(dict['structure_filename'], dict['trajectory_filename']) apply_mass_map(self.u, dict['mass_map']) self.u.trajectory.periodic = dict['trajectory_is_periodic'] for f in self.tar_forces + self.ref_forces: cat = f.get_category() if(not (cat is None)): self.ref_cats.append(cat) f.setup_hook(self.u)
def sequence_spacing(grof, xtcf, btime, etime, peptide_length, atom_sel): u = Universe(grof, xtcf) # this selection part should be better customized # here, only have been backbone atoms are used, u.selectAtoms doesn't # include Hydrogen atoms # REMMEMBER: ARGS verification should be done in main ONLY! # range works like this: # in MDAnalysis, resid starts from 1, in sequence_spacing.py, we don't count # the C- and N- termini, so it's from 2 to peptide_len+2 residues = [u.selectAtoms(atom_sel.format(i)) for i in range(2, peptide_length + 2)] ijdist_dict = {} for ts in u.trajectory: # btime, etime defaults to 0, if etime is 0, loop till the end of the # trajectory if btime > ts.time: continue if etime > 0 and etime < ts.time: break # the good stuff for i, resi in enumerate(residues): for j, resj in enumerate(residues): # to remove duplicate since resi & resj are within the same peptide if i < j: dij = abs(i - j) d_atomi_atomj = [] # loop through every atom in both residues for atomi in resi: for atomj in resj: d_atomi_atomj.append( np.linalg.norm(atomi.pos - atomj.pos)) # add the result to the dictionary ij_dist = np.average(d_atomi_atomj) # distance between i and j if dij not in ijdist_dict.keys(): ijdist_dict[dij] = [ij_dist] else: ijdist_dict[dij].append(ij_dist) utils.print_progress(ts) return ijdist_dict
def getWaterCoorWithH(self,centre,psf,dcd,outputFile): rho=Universe(psf,dcd) H2OCoordinate=[] no=0 title='resname'+' '+'atomid'+' '+'resnumber'+' X Y Z '+' '+'segname'+' '+'frameNo'+' '+'centreNo'+'\n' outputFile.write(title) for oxygenInforSet in self: H2OCoordinateSet=[] print 'There were',len(oxygenInforSet),'waters in the' for oxygenInfor in oxygenInforSet: ## no1+=1 ## print no1 frameNo=oxygenInfor[-2] frameNo=int(frameNo)-1 segName=oxygenInfor[-3] resNumber=oxygenInfor[2] frame=rho.trajectory[frameNo] infor='segid '+segName+' and resid '+resNumber selected=rho.selectAtoms(infor) atomID=[] for atoms in selected.atoms: ID=str(atoms).split()[2][:-1] atomID.append(ID) selectedResId=selected.resids() selectedResNa=selected.resnames() coordsOH1H2=selected.coordinates() for i in range(3): atomInfor=str(selectedResNa[0])+' '+str(atomID[i])+' '+str(resNumber)+' '+str(coordsOH1H2[i])[1:-1]+' '+segName+' '+str(frameNo)+' '+str(no)+'\n' outputFile.write(atomInfor) H2OCoordinateSet.append(coordsOH1H2) no+=1 H2OCoordinate.append(H2OCoordinateSet) print no,'is finished' outputFile.close() return H2OCoordinate
def process_trajectory(args): start, end = args.region.split('-') plane = [int(x) for x in args.plane.split(',')] univ = Universe(args.topo,args.traj) results = [] for ts in univ.trajectory: norm = get_normal(univ, atoms=plane) temp = get_region(univ, start, end) if norm is not None and temp is not None and len(temp) > 0: angle = get_vector(temp) name = univ.split('/')[-1] if angle is not None: results.append(dot(norm, angle)) if args.graph and len(results) > 0: import matplotlib.pyplot as plt ysort = sorted(list(enumerate(results,start=1)), key=lambda kv: float(kv[1])) x, y = zip(*ysort) ind = arange(len(x)) plt.plot(y, ind, color='r') plt.yticks(ind, x) print ysort plt.show() return
def count_interactions(grof, xtcf, btime, cutoff, debug): u = Universe(grof, xtcf) query = ('(resname PRO and (name CB or name CG or name CD)) or' '(resname VAL and (name CG1 or name CG2)) or' '(resname GLY and name CA) or' '(resname ALA and name CB)') # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm atoms = u.selectAtoms(query) for ts in u.trajectory: if ts.time >= btime: numcount = 0 for i, ai in enumerate(atoms): for j, aj in enumerate(atoms): # to avoid counting the same pair twices, # the 2 resid cannot be neigbors if i < j and abs(ai.resid - aj.resid) >= 2: d = np.linalg.norm(ai.pos - aj.pos) if d <= cutoff: numcount += 1 yield '{0:10.0f}{1:8d}\n'.format(ts.time, numcount) # per 100 frames, num of frames changes with the size of xtc file, for debugging if debug and ts.frame % 2 == 0: print "time: {0:10.0f}; step: {1:10d}; frame: {2:10d}".format(ts.time, ts.step, ts.frame)
def sequence_spacing(pf, grof, xtcf, peptide_length, atom_sel, output=None): u = Universe(grof, xtcf) # this selection part should be better customized # here, only have been backbone atoms are used, u.selectAtoms doesn't # include Hydrogen atoms # REMMEMBER: OPTIONS verification should be done in main ONLY! residues = [u.selectAtoms(atom_sel.format(i)) for i in range(2, peptide_length)] ijdist_dict = {} for ts in u.trajectory: for i, resi in enumerate(residues): for j, resj in enumerate(residues): if i < j: resi_pos = resi.centerOfGeometry() # residue i position resj_pos = resj.centerOfGeometry() # residue j position ijdist = np.linalg.norm(resi_pos - resj_pos) dij = j - i # distance between i and j if dij not in ijdist_dict.keys(): ijdist_dict[dij] = [dij] else: ijdist_dict[dij].append(ijdist) if ts.step % 2000000 == 0: # 2000ps print "time step: {0:d}".format(ts.step) return ijdist_dict
def main(): arg_parser = argparse.ArgumentParser(description='通过给定残基名称,残基内原子数目,原子在残基内的索引(从0开始),计算原子的坐标。') arg_parser.add_argument('resname', action='store', help='残基名称') arg_parser.add_argument('atoms_num', type=int, action='store', help='残基内原子数目') arg_parser.add_argument('index', type=int, action='store', help='原子的索引,索引从0开始') arg_parser.add_argument('topology_file', action='store', help='拓扑文件,例如gro, pdb') args = arg_parser.parse_args() resname, atoms_num, index = args.resname, args.atoms_num, args.index universe = Universe(args.topology_file) atom_groups = universe.selectAtoms("resname " + resname) if len(atom_groups) % atoms_num != 0: print("拓扑文件内对应残基原子总数不是所给原子数目的整数倍,请给予正确的原子数目。") exit(1) positions = [] for i in range(0, len(atom_groups), atoms_num): positions.append(atom_groups[i:i + atoms_num][index].position) print("The positions of atoms %s is:" % (index)) for i in positions: print(i)
job = Job(inputfiles=dict(sequence='sequences/1IFC_R151IFC.fasta', ref_psf='coord/1ifc_xtal.psf', ref_pdb='coord/1ifc_xtal.pdb', trj_psf='GSBPsetup/ifabp_apo_gsbp_15_0.psf', trj_pdb='GSBPsetup/ifabp_apo_gsbp_15_0.pdb', ), outputfiles=dict(fit_pdb='GSBPsetup/rmsfit_ifabp_apo_gsbp_15_0.pdb', )) job.stage() from MDAnalysis import Universe import hop.trajectory print "Setting up the Universes..." ref = Universe(job.filenames['ref_psf'],pdbfilename=job.filenames['ref_pdb']) trj = Universe(job.filenames['trj_psf'],job.filenames['trj_pdb']) ref_resids = [a.resid for a in ref.selectAtoms('name CA')] target_resids = [a.resid for a in trj.selectAtoms('name CA')] print "Alignment and selection string..." selection = hop.trajectory.fasta2select(job.filenames['sequence'], ref_resids=ref_resids,target_resids=target_resids, is_aligned=True) print "Fitting trajectory to reference..." hop.trajectory.RMS_fit_trj(trj,ref, select=selection, filename=job.filenames['fit_pdb']) print "Done: result is '%(fit_pdb)s'" % job.filenames
import numpy from MDAnalysis import Universe, collection, Timeseries from MDAnalysis.tests.datafiles import PSF, DCD try: import matplotlib matplotlib.use('agg') # no interactive plotting, only save figures from pylab import errorbar, legend, xlabel, ylabel, savefig, clf, gca, draw have_matplotlib = True except ImportError: have_matplotlib = False universe = Universe(PSF, DCD) protein = universe.selectAtoms("protein") numresidues = protein.numberOfResidues() collection.clear() for res in range(2, numresidues-1): print "Processing residue %d" % res # selection of the atoms involved for the phi for resid '%d' %res ## selectAtoms("atom 4AKE %d C"%(res-1), "atom 4AKE %d N"%res, "atom %d 4AKE CA"%res, "atom 4AKE %d C" % res) phi_sel = universe.residues[res].phi_selection() print phi_sel; # selection of the atoms involved for the psi for resid '%d' %res psi_sel = universe.residues[res].psi_selection() print psi_sel; # collect the timeseries of a dihedral
def calc_bond_length(grof, xtcf, btime, etime, debug): # thebonds contains all the bonds that I am interested thebonds = { #atom names should be UNIQUE within each residue for this script 'BACKBONE_INTRA': [('N', 'CA'), ('CA', 'C'), ('C', 'O'), ], # backbone, intramolecular interactions # PB: peptide bond, which is the only intermolecular bonds that I am interested 'PB': [('C', 'N'),], 'GLY': [('CA', 'HA1'),], 'PRO': [('CA', 'CB'), ('CB', 'CG'), ('CG', 'CD'), ('CD', 'N' )], 'VAL': [('CA', 'CB'), ('CB', 'CG1'), ('CB', 'CG2')], 'MeO': [('C', 'OA'), ('C', 'H'), ('OA', 'HO')], 'SOL': [('OW', 'HW1')], } aas = ['GLY', 'PRO', 'VAL'] # rl: residue list solvents = ['MeO', 'SOL'] # initialize ibonds ibonds = {} # interested bonds, not very legible to human for k in thebonds: ibonds[k] = {} if k in aas: for kk in thebonds[k] + thebonds['BACKBONE_INTRA']: ibonds[k][tuple(sorted(kk))] = [] elif k in solvents: for kk in thebonds[k]: ibonds[k][tuple(sorted(kk))] = [] ibonds['PB'] = {} ibonds['PB'][('C', 'N')] = [] # data structure would be (to do) # ibonds = { # 'PRO': { # (a1, b1):[(atom_object1, atom_object2), (atom_object3, atom_object4), ... , ], # (a2, b2):[(atom_object1, atom_object2), (atom_object3, atom_object4), ... , ], # ... # }, # 'VAL': { # (a1, b1):[(atom_object1, atom_object2), (atom_object3, atom_object4), ... , ], # (a2, b2):[(atom_object1, atom_object2), (atom_object3, atom_object4), ... , ], # ... # }, # 'GLY': { # (a1, b1):[(atom_object1, atom_object2), (atom_object3, atom_object4), ... , ], # (a2, b2):[(atom_object1, atom_object2), (atom_object3, atom_object4), ... , ], # ... # }, # } univer = Universe(grof, xtcf) atom_selection = "not resname ACE and not resname NH2" # get rid of the ends # atom_selection = "resname MeO and resid 3000" atoms = univer.selectAtoms(atom_selection) # initialize ibonds data structure # a bondname is composed of readable plain text # a bond is composed of Atom object for ki, ai in enumerate(atoms): for kj, aj in enumerate(atoms): if ki < kj: if ai.resid == aj.resid: # collecting intramolecular bonds associated with real atom objects resname= ai.resname # will also equal aj.resname bondname = tuple(sorted([ai.name, aj.name])) if bondname in ibonds[resname]: bond = [ai, aj] ibonds[resname][bondname].append(bond) elif ai.resid - aj.resid == -1: # collecting itermolecular bonds: i.e. peptide bond bondname = tuple([ai.name, aj.name]) if bondname == ('C', 'N'): bond = [ai, aj] ibonds['PB'][bondname].append(bond) ################################################################################ # VERIFICATION STATUS: ibonds initiation verified for # sq1w00_md.gro & sq1m00_md.gro # 2012-04-25 # for i in ibonds: # for j in ibonds[i]: # print i, j, len(ibonds[i][j]) # from pprint import pprint as pp # pp(ibonds) # VAL ('CB', 'CG2') 14 # VAL ('C', 'CA') 14 # VAL ('CA', 'N') 14 # VAL ('CB', 'CG1') 14 # VAL ('C', 'O') 14 # VAL ('CA', 'CB') 14 # PRO ('CD', 'CG') 7 # PRO ('C', 'CA') 7 # PRO ('CA', 'N') 7 # PRO ('CA', 'CB') 7 # PRO ('C', 'O') 7 # PRO ('CD', 'N') 7 # PRO ('CB', 'CG') 7 # SOL ('HW1', 'OW') 0 # PB ('C', 'N') 34 # GLY ('CA', 'N') 14 # GLY ('C', 'O') 14 # GLY ('CA', 'HA1') 14 # GLY ('C', 'CA') 14 # MeO ('HO', 'OA') 0 # MeO ('C', 'OA') 0 # MeO ('C', 'H') 0 # import sys # sys.exit() ################################################################################ # Just for Printing the Header sorted_resname = sorted(ibonds.keys()) # sort to keep the value in the right order partial_header = [] for resname in sorted_resname: resname_header = [] # the header specific to residue for bondname in sorted(ibonds[resname].keys()): # bn: since bondname has been used in previous codes bn = '{0}|{1}'.format(resname[0], '-'.join(bondname)) resname_header.append('{0:9s}'.format(bn)) partial_header.extend(resname_header) yield '#{0:8s}{1}\n'.format('t(ps)', ''.join(partial_header)) # import sys # sys.exit() # Production Calculation # use < when for formatting values to align with headers, and the width will # be 1 col narrower than that in the corresponding header for ts in univer.trajectory: # for debugging only if debug and ts.frame % 2 == 0: print "time: {0:10.0f}; step: {1:10d}; frame: {2:10d}".format(ts.time, ts.step, ts.frame) if etime > ts.time >= btime: partial_yield = [] for resname in sorted_resname: resname_yield = [] for bondname in sorted(ibonds[resname].keys()): bonds = ibonds[resname][bondname] ds = [] for bond in bonds: r = bond[0].pos - bond[1].pos # end-to-end vector from atom positions d = np.linalg.norm(r) # distance ds.append(d) resname_yield.append('{0:<8.3f}'.format(np.average(ds))) #, np.std(ds)) partial_yield.extend(resname_yield) # a space in order to align with # in the header yield ' {0:<8.0f}{1}\n'.format(ts.time, ' '.join(partial_yield))
import numpy.linalg if '-h' in sys.argv: print 'Input topology file first and dcd file second and the new filename output 3rd.It will write the Raius of Gyration and N to C terminal distance.' sys.exit() TOP = sys.argv[1] DCD = sys.argv[2] FILENAME=sys.argv[3] #TOP = '/Users/ronaldholt/1JJS_autopsf.psf' #DCD = '/Users/ronaldholt/Google_Drive/ORNL_Research/1JJS/1JJS_1us.dcd' #FILENAME="1JJS" u =Universe(TOP,DCD) # Extract position of N and C terminal can calculate distace, write it to a file along with the radius of gyration (RG) f=open(str(FILENAME) +'Rg_data.txt','w') nterm = u.P1.N[0] # can access structure via segid (s4AKE) and atom name cterm = u.P1.C[-1] # ... takes the last atom named 'C' bb = u.selectAtoms('protein and backbone') # a selection (a AtomGroup) for ts in u.trajectory: # iterate through all frames r = cterm.pos - nterm.pos # end-to-end vector from atom positions d = numpy.linalg.norm(r) # end-to-end distance rgyr = bb.radiusOfGyration() # method of a AtomGroup; updates with each frame print >>f, " %d %f %f " % (ts.frame, d, rgyr) f.close() #Extract distance of N to C terminal and RG overtrajectory
def __init__( self, psf, pdb, delta=1.0, atomselection="name OH2", metadata=None, padding=4.0, sigma=None, verbosity=3 ): """Construct the density from psf and pdb and the atomselection. DC = BfactorDensityCreator(psf, pdb, delta=<delta>, atomselection=<MDAnalysis selection>, metadata=<dict>, padding=2, sigma=None) density = DC.PDBDensity() psf Charmm psf topology file pdb PDB file atomselection selection string (MDAnalysis syntax) for the species to be analyzed delta approximate bin size for the density grid (same in x,y,z) (It is slightly adjusted when the box length is not an integer multiple of delta.) metadata dictionary of additional data to be saved with the object padding increase histogram dimensions by padding (on top of initial box size) sigma width (in Angstrom) of the gaussians that are used to build up the density; if None then uses B-factors from pdb verbosity=int level of chattiness; 0 is silent, 3 is verbose For assigning X-ray waters to MD densities one might have to use a sigma of about 0.5 A to obtain a well-defined and resolved x-ray water density that can be easily matched to a broader density distribution. """ from MDAnalysis import Universe set_verbosity(verbosity) # set to 0 for no messages u = Universe(psf, pdbfilename=pdb) group = u.selectAtoms(atomselection) coord = group.coordinates() logger.info( "BfactorDensityCreator: Selected %d atoms (%s) out of %d total.", coord.shape[0], atomselection, len(u.atoms), ) smin = numpy.min(coord, axis=0) - padding smax = numpy.max(coord, axis=0) + padding BINS = fixedwidth_bins(delta, smin, smax) arange = zip(BINS["min"], BINS["max"]) bins = BINS["Nbins"] # get edges by doing a fake run grid, self.edges = numpy.histogramdd(numpy.zeros((1, 3)), bins=bins, range=arange, normed=False) self.delta = numpy.diag(map(lambda e: (e[-1] - e[0]) / (len(e) - 1), self.edges)) self.midpoints = map(lambda e: 0.5 * (e[:-1] + e[1:]), self.edges) self.origin = map(lambda m: m[0], self.midpoints) numframes = 1 if sigma is None: # histogram individually, and smear out at the same time # with the appropriate B-factor if numpy.any(group.bfactors == 0.0): wmsg = "BfactorDensityCreator: Some B-factors are Zero." warnings.warn(wmsg, category=hop.MissingDataWarning) logger.warn(wmsg) rmsf = Bfactor2RMSF(group.bfactors) grid *= 0.0 # reset grid self.g = self._smear_rmsf(coord, grid, self.edges, rmsf) else: # histogram 'delta functions' grid, self.edges = numpy.histogramdd(coord, bins=bins, range=arange, normed=False) logger.info("Histogrammed %6d atoms from pdb.", len(group.atoms)) # just a convolution of the density with a Gaussian self.g = self._smear_sigma(grid, sigma) try: metadata["psf"] = psf except TypeError: metadata = dict(psf=psf) metadata["pdb"] = pdb metadata["atomselection"] = atomselection metadata["numframes"] = numframes metadata["sigma"] = sigma self.metadata = metadata # Density automatically converts histogram to density for isDensity=False logger.info("BfactorDensityCreator: Histogram completed (initial density in Angstrom**-3)\n")
Similarity Analysis: a Method for Quantifying Macromolecular Pathways. `arXiv:1505.04807v1`_ [q-bio.QM], 2015. .. SeeAlso:: :mod:`MDAnalysis.analysis.psa` """ from MDAnalysis import Universe from MDAnalysis.analysis.align import rotation_matrix from MDAnalysis.analysis.psa import PSAnalysis if __name__ == '__main__': print("Generating AdK CORE C-alpha reference coordinates and structure...") # Read in closed/open AdK structures; work with C-alphas only u_closed = Universe('structs/adk1AKE.pdb') u_open = Universe('structs/adk4AKE.pdb') ca_closed = u_closed.select_atoms('name CA') ca_open = u_open.select_atoms('name CA') # Move centers-of-mass of C-alphas of each structure's CORE domain to origin adkCORE_resids = "(resid 1:29 or resid 60:121 or resid 160:214)" u_closed.atoms.translate(-ca_closed.select_atoms(adkCORE_resids).center_of_mass()) u_open.atoms.translate(-ca_open.select_atoms(adkCORE_resids).center_of_mass()) # Get C-alpha CORE coordinates for each structure closed_ca_core_coords = ca_closed.select_atoms(adkCORE_resids).positions open_ca_core_coords = ca_open.select_atoms(adkCORE_resids).positions # Compute rotation matrix, R, that minimizes rmsd between the C-alpha COREs R, rmsd_value = rotation_matrix(open_ca_core_coords, closed_ca_core_coords)
""" This program calculates center of mass and geometry. At this time, I wanted to confirm if the com of s100b was canceled. Caution: this program is specialized for s100b-CTD system. Usage: python conform_com_cancel.py [ PDB file name ] """ file_name = sys.argv[1] print "Input file name : ", file_name u = Universe(file_name) f_out = open(file_name+"_comTraj.dat", "w") print "No of snapshots: ", len(u.trajectory) for i, ts in enumerate(u.trajectory): #Select the all atoms constitute s100b selected_atoms = u.select_atoms("resid 1-94") print "atom ids: ", selected_atoms.ids com = selected_atoms.center_of_mass() cog = selected_atoms.center_of_geometry() f_out.write(str(com[0]) + " " + str(com[1]) + " " + str(com[2]) + " \n")
import sys sys.path.append('/home/x/xiansu/pfs/program/numpy/lib/python2.6/site-packages') from MDAnalysis import Universe, Writer from MDAnalysis.analysis.distances import distance_array import MDAnalysis import numpy from Numeric import * top='npt.gro' traj='md_extract1.trr' water=Universe(top,traj) o=water.selectAtoms('name O*') resid=o.resids() print resid #resnu=o.resnums() #resna=o.resnames() atomInf=[] for i in o.atoms: atomid= str(i).split()[2] atomseg=str(i).split()[-1] atomidandseg=[] atomidandseg.append(atomid) atomidandseg.append(atomseg) atomInf.append(atomidandseg)
import MDAnalysis from MDAnalysis import Universe from MDAnalysis.analysis.contacts import calculate_contacts import numpy as np import pandas as pd ref = Universe("conf_protein.gro.bz2") u = Universe("conf_protein.gro.bz2", "traj_protein_0.xtc") x = len(ref.select_atoms("protein")) selA = "not name H* and resid 72-95 and bynum {}:{}".format(1, x//2) selB = "not name H* and resid 72-95 and bynum {}:{}".format(x//2, x) data = calculate_contacts(ref, u, selA, selB) df = pd.DataFrame(data, columns=["Time (ps)", "Q"]) print(df)
cont = 0 if trajFormat == "NETCDF": trajFormat = "NCDF" for topologyPath, trajectoryPath in trajList: cont +=1 print "Trajectory "+str(cont)+"/"+str(totalTrajNumber)+" "+topologyPath #canviar parametres simulationId simulationId = simulationInsert(con, 225000, 1, 1, 225000, topologyPath+" "+trajectoryPath) referenceInsert(con, simulationId, 'DNA ', 'X') try: #uniTraj = Universe(topologyPath, format='PRMTOP') uniTraj = Universe(topologyPath, format=topoFormat) except: print "Error: Could not load topology file" return 100 try: #filename, fileExtension = os.path.splitext(trajectoryPath) #if str.lower(fileExtension)== ".netcdf": # uniTraj.load_new(trajectoryPath, format='NCDF') #else: # uniTraj.load_new(trajectoryPath) uniTraj.load_new(trajectoryPath, format=trajFormat) #uniTraj.load_new(trajectoryPath) except: print "Error: Could not load trajectory file" return 200
def test_write_read(self): u = Universe(self.filename) u.select_atoms("all").write(self.outfile) u2 = Universe(self.outfile) assert_equal(len(u.atoms), len(u2.atoms))
import numpy import sys sys.path.append('/home/x/xiansu/pfs/program/numpy/lib/python2.6/site-packages') from MDAnalysis import Universe, Writer from MDAnalysis.analysis.distances import distance_array import MDAnalysis DCD='water_analysis.dcd' PSF='ionized.psf' distanceMat=open('distance.txt','w') rho=Universe(PSF,DCD) ##print rho ##print list(rho.residues) p=rho.selectAtoms('protein and not backbone and not(name H*)') w=rho.selectAtoms('resname TIP3 and not(name H*)') ##print list(p) pc=p.coordinates() print len(pc) proteResid=p.resids() waterResid=w.resids() proteResnu=p.resnames() waterResna=w.resnames() waterResnu=w.resnums() atomInf=[] for i in w.atoms: atomid= str(i).split()[2] atomseg=str(i).split()[-1] atomidandseg=[] atomidandseg.append(atomid) atomidandseg.append(atomseg)