def calc_distance(): u = mda.Universe(STRUCTFNAME, TRAJFILE) head_atms = u.atoms.select_atoms("name {}".format(HEADATM)) tail_atms = u.atoms.select_atoms("name {}".format(' '.join(TAILATMS))) tail_atms = mda.AtomGroup(list(chunks(tail_atms, len(TAILATMS)))) LOGGER.debug("head_atms: %s tail atms %s", head_atms, tail_atms) if not isinstance(mda.AtomGroup, list): head_atms = mda.AtomGroup(head_atms) with open(OUTPUTFILENAME, "w") as outf: outf.write("{: <15}{: <10}{: <10}{: <20}\n".format( "time", "resid", "chain", "dist")) for ts in u.trajectory: LOGGER.info("at time %s", ts.time) outp_inf = [] #distances = distance_array(np.array([head.position for head in head_atms]), np.array([tail.position for tail in tail_atms])) for head, tail in zip(head_atms, tail_atms): #print("HEAD", head) distances = distance_array(head.position, tail.position)[0] #LOGGER.debug("distances:\n%s", distances) residue = head.residue #LOGGER.info("at residue %s", residue) for chainid, dist in enumerate(distances): outpline = "{: <15}{: <10}{: <10}{: <20}\n"\ .format(ts.time, residue.resid, chainid, dist ) outp_inf.append(outpline) for line in outp_inf: outf.write(line)
def __init__(self, atomgroups, orders, **kwargs): """Parameters ---------- atomgroups : list a list of atomgroups for which the dihedral angles are calculated Raises ------ ValueError If any atomgroups do not contain 4 atoms """ super(DihedralFromAtoms, self).__init__(atomgroups[0].universe.trajectory, **kwargs) self.atomgroups = atomgroups if any([len(ag) != 4 for ag in atomgroups]): raise ValueError("All AtomGroups must contain 4 atoms") if len(atomgroups) != len(orders): raise ValueError( "Order data should be provided for every atom group") self.ag1 = mda.AtomGroup( [atomgroups[i][orders[i][0]] for i in range(len(atomgroups))]) self.ag2 = mda.AtomGroup( [atomgroups[i][orders[i][1]] for i in range(len(atomgroups))]) self.ag3 = mda.AtomGroup( [atomgroups[i][orders[i][2]] for i in range(len(atomgroups))]) self.ag4 = mda.AtomGroup( [atomgroups[i][orders[i][3]] for i in range(len(atomgroups))])
def __init__(self, atomgroups, **kwargs): super(Angles, self).__init__(atomgroups[0].universe.trajectory, **kwargs) self.atomgroups = atomgroups if any([len(ag) != 3 for ag in atomgroups]): raise ValueError("All AtomGroups must contain 3 atoms") self.ag1 = md.AtomGroup([ag[0] for ag in atomgroups]) self.ag2 = md.AtomGroup([ag[1] for ag in atomgroups]) self.ag3 = md.AtomGroup([ag[2] for ag in atomgroups])
def convert_traj(args): u = mda.Universe(args.coords, args.traj) if args.output is None: args.output = args.traj.rsplit(".")[0] + ".pdb" sel = mda.AtomGroup(u.atoms) # so we can use sel.n_atoms later if args.nodrudes: sel = sel.select_atoms("not name D*") if args.novirtuals: sel = sel.select_atoms("not name V*") if args.sel: sel = sel.select_atoms(args.sel) if args.start and args.end: frames = u.trajectory[args.start:args.end] elif args.start: frames = u.trajectory[args.start:] elif args.end: frames = u.trajectory[:args.end] else: frames = u.trajectory with mda.Writer(args.output, sel.n_atoms) as f: for _ in completion(frames): f.write(sel)
def __init__(self, atomgroup, **kwargs): """Parameters ---------- atomgroup : AtomGroup or ResidueGroup atoms for residues for which :math:`\phi` and :math:`\psi` are calculated Raises ------ ValueError If the selection of residues is not contained within the protein """ super(Ramachandran, self).__init__(atomgroup.universe.trajectory, **kwargs) self.atomgroup = atomgroup residues = self.atomgroup.residues protein = self.atomgroup.universe.select_atoms("protein").residues if not residues.issubset(protein): raise ValueError("Found atoms outside of protein. Only atoms " "inside of a 'protein' selection can be used to " "calculate dihedrals.") elif not residues.isdisjoint(protein[[0, -1]]): warnings.warn("Cannot determine phi and psi angles for the first " "or last residues") residues = residues.difference(protein[[0, -1]]) phi_sel = [res.phi_selection() for res in residues] psi_sel = [res.psi_selection() for res in residues] # phi_selection() and psi_selection() currently can't handle topologies # with an altloc attribute so this removes any residues that have either # angle return none instead of a value if any(sel is None for sel in phi_sel): warnings.warn("Some residues in selection do not have phi selections") remove = [i for i, sel in enumerate(phi_sel) if sel is None] phi_sel = [sel for i, sel in enumerate(phi_sel) if i not in remove] psi_sel = [sel for i, sel in enumerate(psi_sel) if i not in remove] if any(sel is None for sel in psi_sel): warnings.warn("Some residues in selection do not have psi selections") remove = [i for i, sel in enumerate(psi_sel) if sel is None] phi_sel = [sel for i, sel in enumerate(phi_sel) if i not in remove] psi_sel = [sel for i, sel in enumerate(psi_sel) if i not in remove] self.ag1 = mda.AtomGroup([atoms[0] for atoms in phi_sel]) self.ag2 = mda.AtomGroup([atoms[1] for atoms in phi_sel]) self.ag3 = mda.AtomGroup([atoms[2] for atoms in phi_sel]) self.ag4 = mda.AtomGroup([atoms[3] for atoms in phi_sel]) self.ag5 = mda.AtomGroup([atoms[3] for atoms in psi_sel])
def _reorg_groups(groups: List[EnsembleAtomGroup]): ag1 = [] ag2 = [] ag3 = [] ag4 = [] ag_keys = [] names = [] for group in groups: ag1 += [ mda.AtomGroup([ag[0]]) for ag in [group[k] for k in group.keys()] ] ag2 += [ mda.AtomGroup([ag[1]]) for ag in [group[k] for k in group.keys()] ] ag3 += [ mda.AtomGroup([ag[2]]) for ag in [group[k] for k in group.keys()] ] ag4 += [ mda.AtomGroup([ag[3]]) for ag in [group[k] for k in group.keys()] ] names.append('-'.join([ ag1[-1].atoms[0].name, ag2[-1].atoms[0].name, ag3[-1].atoms[0].name, ag4[-1].atoms[0].name ])) for k in group.keys(): ag_keys.append((names[-1], k[0], k[1], k[2])) eag1 = EnsembleAtomGroup( {ag_keys[i]: ag1[i] for i in range(len(ag_keys))}, groups[0].ensemble) eag2 = EnsembleAtomGroup( {ag_keys[i]: ag2[i] for i in range(len(ag_keys))}, groups[0].ensemble) eag3 = EnsembleAtomGroup( {ag_keys[i]: ag3[i] for i in range(len(ag_keys))}, groups[0].ensemble) eag4 = EnsembleAtomGroup( {ag_keys[i]: ag4[i] for i in range(len(ag_keys))}, groups[0].ensemble) return eag1, eag2, eag3, eag4, names
def get_atom_groups(bin_int): for cluster in cluster_atoms_under(bin_int): atom_group_inds = [] bin_positions_ind = np.where(inds < bin_int)[0] bin_positions = [positions[i] for i in bin_positions_ind] for ind in cluster: point = [bin_positions[ind][0], bin_positions[ind][1], bin_positions[ind][2]] index = positions.index(point) atom_group_inds.append(index) atom_group = mda.AtomGroup(atom_group_inds, u) atom_groups_clusters_under.append(atom_group) for cluster in cluster_atoms_over(bin_int): atom_group_inds = [] bin_positions_ind = np.where(inds > bin_int)[0] bin_positions = [positions[i] for i in bin_positions_ind] for ind in cluster: point = [bin_positions[ind][0], bin_positions[ind][1], bin_positions[ind][2]] index = positions.index(point) atom_group_inds.append(index) atom_group = mda.AtomGroup(atom_group_inds, u) atom_groups_clusters_over.append(atom_group)
def __init__(self, atomgroups, **kwargs): """Parameters ---------- atomgroups : list a list of atomgroups for which the dihedral angles are calculated Raises ------ ValueError If any atomgroups do not contain 4 atoms """ super(Dihedral, self).__init__(atomgroups[0].universe.trajectory, **kwargs) self.atomgroups = atomgroups if any([len(ag) != 4 for ag in atomgroups]): raise ValueError("All AtomGroups must contain 4 atoms") self.ag1 = mda.AtomGroup([ag[0] for ag in atomgroups]) self.ag2 = mda.AtomGroup([ag[1] for ag in atomgroups]) self.ag3 = mda.AtomGroup([ag[2] for ag in atomgroups]) self.ag4 = mda.AtomGroup([ag[3] for ag in atomgroups])
def select_by_helixandbase(self, helix_range: List[int], base_range: List[int]): """select all atoms of the cadnano subset of helices and baseposition-range""" def DhpsFid(h, p, s) -> int: return self.link.DidFid[self.link.DhpsDid[(h, p, s)]] u = self.link.u stsc_bases = self._parse_selection(base_range, helix_range) atoms = mda.AtomGroup([], u) for idx, bases in enumerate(stsc_bases): for base in bases: Fid = DhpsFid(base.h, base.p, bool(idx)) atoms += u.residues[Fid].atoms return atoms, self.link.Dcolor
def test_unwrap_empty_group(self, level, compound, reference, is_triclinic): # get a pristine test universe: u = UnWrapUniverse(is_triclinic=is_triclinic) if level == 'atoms': group = mda.AtomGroup([], u) elif level == 'residues': group = mda.ResidueGroup([], u) elif level == 'segments': group = mda.SegmentGroup([], u) group.unwrap(compound=compound, reference=reference, inplace=True) # check for correct (empty) result: assert_array_equal(group.atoms.positions, np.empty((0, 3), dtype=np.float32))
def __init__(self, atom1, atom2, universe): """Initialise the Bond object. Input ----- atom1 : index of the first atom involved in bond atom2 : index of the second atom involve in bond universe: MDAnalysis universe instance defining the bond """ # Store the atoms in 1-based indices self.atom1 = atom1 self.atom2 = atom2 # We generate the atom group from the zero-based indices self.atomgroup = mda.AtomGroup([atom1 - 1, atom2 - 1], universe) print(self.atomgroup.atoms[0]) print(self.atomgroup.atoms[1]) self.data = VectorData(universe.trajectory.n_frames)
def __init__(self, atom1, atom2, atom3, universe): """Initialise the Angle object. Input ----- atom1 : index of the first atom involved in angle atom2 : index of the second atom involved in angle atom3 : index of third atom involved in angle universe: MDAnalysis universe instance defining the bond """ # Store the atoms in 1-based indices self.atom1 = atom1 self.atom2 = atom2 self.atom3 = atom3 # We generate the atom group from the zero-based indices self.atomgroup = mda.AtomGroup([atom1 - 1, atom2 - 1, atom3 - 1], universe) self.data = VectorData(universe.trajectory.n_frames)
post_filter_area = auc(FPR_post_ordered, TPR_post_ordered) roc_area[i] = post_filter_area # for every beta phi value calculate area under the curve - cluster filtering # also create atom group that will visualize the atoms that are being filtered out ag_filtered = [] for i in range(0, 264, 4): print(i) FP_dict_filter = {} TP_dict_filter = {} num_clusters, cluster_dict, singleton_ind, doubles_ind, triples_ind = clustering_info_pred_interface_atoms( i) total_ind = singleton_ind filtered_ag = mda.AtomGroup(total_ind, u_dewet) ag_filtered.append(filtered_ag) for j in range(0, 264, 4): if len(total_ind) == 0: break else: new_ind_FP = list(set(FP_dict[j]) - set(total_ind)) new_ind_TP = list(set(TP_dict[j]) - set(total_ind)) FP_dict_filter[j] = new_ind_FP TP_dict_filter[j] = new_ind_TP if len(total_ind) == 0: roc_area[i] = pre_filter_area else: FPR_post = [len(x) / float(P) for x in FP_dict_filter.values()]
def test_empty_atomgroup_access(self): ag = mda.AtomGroup([], case1()) assert ag.fragments == tuple() assert_equal(ag.fragindices, np.array([], dtype=np.int64)) assert ag.n_fragments == 0
elif args.indMM: atoms_num = read_ndx(args.indMM) # Build selection kwd selection = 'bynum ' for num in atoms_num: selection += str(num) + ' ' # Apply selection (static) try: # Ensure no overlap with QM layer #layerMM = exclusive_selection(u,selection,layerQM) layerMM = u.select_atoms(selection, updating=False) except: raise BaseException("Error setting MM layer. Check index file") else: # return an empty AtomGroup (acts as None type within if clauses) layerMM = MDAnalysis.AtomGroup([], u) # -- Point charges layer -- if args.selPC: try: # Ensure no overlap with QM/MM layers #layerPC = exclusive_selection(u,args.selPC,layerQM+layerMM) layerPC = u.select_atoms(args.selPC, updating=True) except: raise BaseException( "Error setting PC layer. Maybe due to missplells in selection keyword" ) elif args.indPC: atoms_num = read_ndx(args.indPC) # Build selection kwd selection = 'bynum '
indices_post = np.argsort(FPR_post) FPR_post_ordered = (np.asarray(FPR_post))[indices_post] TPR_post_ordered = (np.asarray(TPR_post))[indices_post] plt.plot(FP_dict.keys(), FPR_post_ordered, label='FPR') plt.plot(FP_dict.keys(), TPR_post_ordered, label='TPR') plt.title('FPR and TPR, Post-Filter') plt.legend() plt.show() #visualize_post_FPR_TPR() # create atom groups for visualizations contact_ind = np.where(contact == 1)[0].tolist() ag_contact = mda.AtomGroup(contact_ind, u_dewet) ag_filtered_FP = [] ag_filtered_TP = [] ag_FP = [] ag_TP = [] for i in range(0, 404, 4): ag = mda.AtomGroup(FP_dict[i], u_dewet) ag_FP.append(ag) # ag = mda.AtomGroup(FP_removed[i], u_dewet) # ag_filtered_FP.append(ag) ag = mda.AtomGroup(TP_dict[i], u_dewet) ag_TP.append(ag)
def createForce(self, common_core_names): """Actually creates the force, after dismissing all idxs not in the common core from the molecule. Args: common_core_names (array[str]): - Array with strings of the common core names. Usually provided by the restraint.yaml file. Returns: openMM.CustomCentroidBondForce: An openMM force object representing the restraint bond. """ # Only done for g1, as it is the ligand group - theres no common core for the protein logger.debug(f"Before CC check: {self.g1.names}") self.g1_in_cc = MDAnalysis.AtomGroup([], self.topology) for atom in self.g1: if atom.name in common_core_names: self.g1_in_cc += atom logger.debug(f"After CC check: {self.g1_in_cc.names}") self.common_core_idxs = [atom.id for atom in self.g1_in_cc] # convert MDAnalysis syntax into openMM usable idxs self.g1_openmm = [int(id) for id in self.g1_in_cc.ix] self.g2_openmm = [int(id) for id in self.g2.ix] logger.debug(f"G1 openMM ix: {self.g1_openmm}\nG2 openMM ix: {self.g2_openmm}") self.g1pos = self.g1_in_cc.center_of_mass() self.g2pos = self.g2.center_of_mass() self.initial_distance = np.linalg.norm(self.g1pos - self.g2pos) if not "r0" in self.kwargs.keys(): # default value - equilibrium distance is the initial distance. Otherwise, overriden with the r0 specified self.r0 = self.initial_distance * angstrom else: self.r0 = self.kwargs["r0"] * angstrom if self.shape == "harmonic": # create force with harmonic potential self.force = CustomCentroidBondForce(2, "0.5*k*(distance(g1,g2)-r0)^2") self.force.addPerBondParameter("k") self.force.addPerBondParameter("r0") self.force.addGroup(self.g1_openmm) self.force.addGroup(self.g2_openmm) self.force.addBond( [0, 1], [self.force_constant, self.r0] ) logger.info( f"""Restraint force (centroid/bonded, shape is {self.shape}, initial distance: {self.initial_distance}, k={self.force_constant}""" ) elif self.shape in ["flatbottom","flatbottom-oneside"]: # create force with flat-bottom potential identical to the one used in openmmtools logger.debug("creating flatbottom-1side-soft") self.force = CustomCentroidBondForce( 2, "step(distance(g1,g2)-r0) * (k/2)*(distance(g1,g2)-r0)^2" ) self._add_flatbottom_parameters() elif self.shape == "flatbottom-oneside-sharp": # create force with flat-bottom potential logger.debug("creating flatbottom-1side-sharp") self.force = CustomCentroidBondForce( 2, "step(distance(g1,g2)-r0) * (k/2)*(distance(g1,g2))^2" ) self._add_flatbottom_parameters() elif self.shape == "flatbottom-twoside": # create force with flat-bottom potential logger.debug("creating flatbottom-2side-sharp") self.force = CustomCentroidBondForce( 2, "step(abs(distance(g1,g2)-r0)-w)*k*(distance(g1,g2)-r0)^2" ) self._add_flatbottom_parameters() else: raise NotImplementedError(f"Cannot create potential of shape {self.shape}") logger.debug( f""" Group 1 (COM: {self.g1pos}): {[self.g1_in_cc.names]} Group 2 (COM: {self.g2pos}): {[self.g2.names]}""" ) del self.topology # delete the enormous no longer needed universe asap return self.force
def coarse_grain(universe, residue_list, simulation_name='simulation_name', export=False): # ============== Misc Initiation ============== # with open('src/mapping_dict.json', "r") as f: mapping_dict = load(f) with open('src/abrev_dict.json', "r") as f: abrev_dict = load(f) u = universe # ================= Execution ================= # print('Calculating Bond connections...') resnames = ' '.join(residue_list) original_bond_count = len(u.bonds) u.select_atoms(f'resname {resnames}').guess_bonds(vdwradii=config.vdw_radi) print( f'Original file contained {original_bond_count} bonds. {len(u.bonds) - original_bond_count} additional bonds infered.' ) print(f'Begining Coarse-Graining process...') bead_data = [] cg_beads = [] dummy_parents = {} non_water_atoms = u.select_atoms('not resname WAT') for residue in non_water_atoms.residues: # loops thu each matching residue id resid = residue.resid # store int id resname = residue.resname if resname in residue_list: # if resname == "PHOSPHATE" or resname == "RIBOSE": # resname_atoms = u.atoms.select_atoms('resname DA DT DG DC DU') # else: # resname_atoms = u.atoms.select_atoms(f'resname {resname}') # selects all resname-specific atoms if len(resname) == 4 and resname[0] == 'D': # for D-varants resname_key = resname[1:] else: resname_key = resname try: segments = mapping_dict[resname_key].keys() for segment in segments: # loops thru each segment of each residue params = 'name ' + ' '.join( mapping_dict[resname_key][segment] ['atoms']) # generates param # selects all atoms in a given residue segment atms = residue.atoms.select_atoms(params) dummy = atms[0] # names dummy atom in propper format dummy.name = str(abrev_dict[resname_key]) + str( segment[0]) + str(resid) dummy.type = mapping_dict[resname_key][segment]['name'] dummy.charge = mapping_dict[resname_key][segment]['charge'] bead_data.append((dummy, atms)) cg_beads.append(dummy) for atm in atms: dummy_parents[atm.ix] = dummy except KeyError: print( f'{resname_key} was not found in mapping/abrev_dict, skipping coarse grain. Please add its parameters to the dictionary. (See README section A3. for help)' ) new_bonds = [] # for residue in residue_list: # for mapping in mapping_dict[residue]["Bonds"]: # first_code = mapping[0] # segment, resid offset, resname # seccond_code = mapping[1] if isinstance(mapping[1], list) else [mapping[1], 0, residue] # type_params = list(mapping_dict[residue]["Mapping"].keys())[first_code] # first_atoms = cg_beads.select_atoms(f'resname {residue} and type {type_params}') # for first_atom in first_atoms: # type_params = list(mapping_dict[residue]["Mapping"].keys())[seccond_code[0]] # segment # seccond_atom_resid = int(first_atom.resid) + int(seccond_code[1]) # try: # seccond_atom = cg_beads.atoms.select_atoms(f'resname {seccond_code[2]} and type {type_params} and resid {seccond_atom_resid}') # except IndexError: # pass # if isinstance(seccond_atom, mda.core.groups.AtomGroup): # closest = seccond_atom[0] # closest_dist = mda.AtomGroup([first_atom, seccond_atom[0]]).bond.length() # for atom in seccond_atom: # dist = mda.AtomGroup([first_atom, atom]).bond.length() # if dist < closest_dist: # closest = atom # closest_dist = dist # seccond_atom = closest # new_bonds.append([first_atom.index, seccond_atom.index]) # new_bonds = [] for dummy, atms in bead_data: # connect all parents with connected children for atom in atms: for bond in atom.bonds: for bonded_atom in bond.atoms: if bonded_atom not in atms: # make more efficent if atms were a set # by the end of all these loops and ifs, every bonded_atom that gets to this point is an atom connected to the edge of the cluster of atoms assigned to the coarse grain dummy bead in question try: new_bonds.append([ cg_beads.index(dummy), cg_beads.index(dummy_parents[bonded_atom.ix]) ]) # type is used to store the cluster dummy except KeyError: # raises if atom does not belong to a coarse grain bead pass # try: # new_bonds.append([cg_beads.index(dummy), cg_beads.index(bonded_atom)]) # adds the bond between the dummies # except ValueError: # if the other atom is just an atom withouot a coarse grain bead parent, ignore it # pass cg_beads = mda.AtomGroup(cg_beads) # TODO: EXPORT NEW_U INSTEAD OF OLD U # TODO: EXPORT NEW_U TO HAVE APPROPRIATE FRAMES # TODO: SHIFT THE DEFINITION OF CENTERS IN THE UNIVERSE EVEN IF NOT EXPORTING # TODO: AUTOTUNE THE CURVE TO FIND THE RIGHT STEP progress(0) number_of_frames = len(u.trajectory) for frame in u.trajectory: # loops tru each frame f = frame.frame # positions a dummy atoms at cluster center of mass for dummy, atms in bead_data: dummy.position = AtomGroup(atms).center_of_mass() progress(f / number_of_frames) progress(1) print() for dummy, atms in bead_data: dummy.mass = AtomGroup(atms).masses.sum() # purge existing reminant bonds u.delete_bonds(u.bonds) u.delete_angles(u.angles) u.delete_dihedrals(u.dihedrals) print(f'Building new coarse-grained universe...') coordinates = AnalysisFromFunction(lambda ag: ag.positions.copy(), cg_beads).run().results new_u = mda.Merge(cg_beads) new_u.load_new(coordinates, format=MemoryReader) new_u.add_TopologyAttr('bonds', new_bonds) new_u.add_TopologyAttr('angles', guess_angles(new_u.bonds)) new_u.add_TopologyAttr('dihedrals', guess_dihedrals(new_u.angles)) print( f'Built universe with {len(new_u.atoms)} coarse-grained beads, {len(new_u.bonds)} bonds, {len(new_u.angles)} angles, and {len(new_u.dihedrals)} dihedrals' ) if export: print('Writing Output Files...') out_file = f'outputs/CoarseGrain/{simulation_name}_CG.pdb' with open(out_file, 'w+') as _: new_u.atoms.write(out_file, bonds='all') print(f'Topology written to {simulation_name}_CG.pdb!') is_multiframe = number_of_frames > 1 with mda.Writer(f'outputs/CoarseGrain/{simulation_name}_CG.dcd', new_u.atoms.n_atoms, multiframe=is_multiframe, bonds='all') as w: for frame in new_u.trajectory[1:]: # loops tru each frame w.write(new_u.atoms) print('Generated All Coarse Grained Molecules!') print(f'Trajectory written to {simulation_name}_CG.dcd!') # for dummy, atms in bead_data: # dummy.type = '' print(f'Reduced {len(u.atoms)} atoms to {len(new_u.atoms)} beads!') print('Coarse Graining Task complete!') return new_u
def create_cc_orderfiles(): ''' ''' u = mda.Universe(GRO, TRJ) ## Gather all input data for _calc_scd_output function len_traj = len(u.trajectory) with open(OUTPUTFILENAME, "w") as scdfile, open("ztilt_randaxis.csv", "w") as axf: #### Print header files #### print("{: <12}{: <10}{: <15}{: <15}{: <15}{: <20}{: <20}{: <20}"\ .format("time", "axndx", "S", "proj_ch1", "proj_ch2", "ax_x", "ax_y", "ax_z"), file=scdfile) print("time,tilt", file=axf) tailatms = SCD_TAIL_ATOMS_OF[LIPID[:2]] s_atoms = [] for sn in tailatms: atms = u.atoms.select_atoms( "name {}"\ .format(' '.join(sn) ) ) idmap = {id: pos for pos, id in enumerate(sn)} atms = sorted(atms, key=lambda atom: idmap[atom.name]) s_atoms.append(atms) ### from get randaxis from PC vector ### #glycatms_ref = mda.AtomGroup([u.atoms.select_atoms("name P"), u.atoms.select_atoms("name C1")]) #glycatms_plane = mda.AtomGroup([u.atoms.select_atoms("name C1"), u.atoms.select_atoms("name C3")]) ######################################## chainvec_atms1 = mda.AtomGroup([ u.atoms.select_atoms("name P"), u.atoms.select_atoms("name C216") ]) chainvec_atms2 = mda.AtomGroup([ u.atoms.select_atoms("name P"), u.atoms.select_atoms("name C316") ]) for t in range(len_traj): time = u.trajectory[t].time LOGGER.info("At time %s", time) tailatms = SCD_TAIL_ATOMS_OF[LIPID[:2]] positions = [] for atms in s_atoms: positions.append([atm.position for atm in atms]) ### from get randaxis from PC vector ### #glycvecref = (glycatms_ref.positions[0] - glycatms_ref.positions[1])[0] #glycvecref = glycvecref / np.linalg.norm(glycvecref) #glycvecplane = (glycatms_plane.positions[0] - glycatms_plane.positions[1])[0] #glycvecplane = glycvecplane / np.linalg.norm(glycvecplane) ######################################## chainatmvec1 = (chainvec_atms1.positions[0] - chainvec_atms1.positions[1])[0] #chainatmvec1 = chainatmvec1 / np.linalg.norm(chainatmvec1) chainatmvec2 = (chainvec_atms2.positions[0] - chainvec_atms2.positions[1])[0] #chainatmvec2 = chainatmvec2 / np.linalg.norm(chainatmvec2) for i in range(1000): #### from get randaxis from PC vector ### #refaxis = get_rand_axis(glycvecref, glycvecplane) #tiltref = np.arccos(np.dot(refaxis, glycvecref)) * 180/np.pi #print("{},{}".format(time, tiltref), file=axf) ######################################### refaxis = np.random.random_sample((3, )) refaxis = refaxis / np.linalg.norm(refaxis) # projection of chainvec to new axis projection_mag1 = np.dot(chainatmvec1, refaxis) projection_mag2 = np.dot(chainatmvec2, refaxis) order_val, s_prof = get_cc_order(positions, ref_axis=refaxis) LOGGER.debug("printing to files ...") ### Print everything to files ### line_scd = "{: <12.2f}{: <10}{: <15.8}{: <15.5}{: <15.5}{: <20}{: <20}{: <20}".format( time, i, order_val, projection_mag1, projection_mag2, *refaxis) print(line_scd, file=scdfile)
def get_data(): first = True for protein in protein_files: filename = '%s/prot_atoms_dewetting_order.pdb' % (protein) ppdb = PandasPdb() ppdb.read_pdb(filename) if protein == 'thymidylate_synthase': u = mda.Universe('%s/actual_contact.pdb' % (protein)) contact = u.atoms.tempfactors contact_ind = np.where(contact == 1)[0].tolist() ag_contact = mda.AtomGroup(contact_ind, u) interface_col = np.where(contact == 1, 1, 0) else: u = mda.Universe(protein + '/beta_phi_400/pred_contact_tp_fp.pdb') contact = u.atoms.tempfactors.astype(int) interface_col = np.where(contact == 1, 1, 0) # create Pandas dataframe df = ppdb.df['ATOM'] df = df.drop(columns=['record_name', 'atom_number', 'residue_number', 'x_coord', 'y_coord', 'z_coord', 'chain_id', 'blank_1', 'alt_loc', 'blank_2', 'insertion', 'blank_3', 'occupancy', 'blank_4', 'segment_id', 'element_symbol', 'charge', 'line_idx']) # encode nominal variables df = pd.get_dummies(df, columns=['atom_name'], prefix=['atom_name']) df = pd.get_dummies(df, columns=['residue_name'], prefix=['residue_name']) # add column that indicates whether atom is part of interface df['Interface'] = interface_col # for each beta phi value, access atoms that are predicted to be a part of the interface and compare to # the actual contact atoms if protein == 'thymidylate_synthase': start = 0 end = 204 else: start = 0 end = 204 for i in range(start, end, 4): print(i) df_concat = df df_concat['Beta Phi Prediction Value'] = float(i) / 100 filename = protein + '/beta_phi_%03d/pred_contact_mask.dat' % (i) contact_pred = np.loadtxt(filename) df_concat['Prediction'] = contact_pred # df_concat = df_concat.drop(df_concat[df_concat.Prediction == 0].index) # df_concat['True Positive'] = (df_concat['Interface'] == 1) & (df_concat['Prediction'] == 1) # df_concat = df_concat.drop(columns=['Prediction', 'Interface']) if i == 0: df_final = df_concat else: df_final = pd.concat([df_final, df_concat], ignore_index=True) nn_col = get_num_nn(protein) dewet_nn_col = get_num_dewet_nn(protein) df_final['Number Nearest Neighbors'] = nn_col df_final['Number Dewetted Nearest Neighbors'] = dewet_nn_col df_final = df_final.drop(df_final[df_final.Prediction == 0].index) df_final['True Positive'] = (df_final['Interface'] == 1) & (df_final['Prediction'] == 1) df_final = df_final.drop(columns=['Prediction', 'Interface']) if first: df_final_final = df_final first = False else: df_final_final = pd.concat([df_final_final, df_final], ignore_index=True) df_final_final[['True Positive']] *= 1 df_final_final = df_final_final.loc[df_final_final['b_factor'] > -2] # randomize rows in dataframe df_final_final = df_final_final.sample(frac=1) df_final_final = df_final_final.fillna(0) x = df_final_final.iloc[:, df_final_final.columns != 'True Positive'].values y = df_final_final['True Positive'].values x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15) # scale values sc = StandardScaler() x_train = sc.fit_transform(x_train) x_test = sc.transform(x_test) return x_train, x_test, y_train, y_test, df_final_final
atom_groups_clusters = [] positions = positions.tolist() for n in range(len(hist) - 1): if hist[n] == 0 or hist[n] == 1: n += 1 else: for cluster in cluster_atoms(n + 1): atom_group_inds = [] bin_positions_ind = np.where(inds == (n + 1))[0] bin_positions = [positions[i] for i in bin_positions_ind] for ind in cluster: point = [bin_positions[ind][0], bin_positions[ind][1], bin_positions[ind][2]] index = positions.index(point) atom_group_inds.append(index) atom_group = mda.AtomGroup(atom_group_inds, u) atom_groups_clusters.append(atom_group) # create atom groups with atoms in same bin atom_groups_bins = [] for i in range(1, max(inds)): atom_group_inds = [m for m, n in enumerate(inds) if n == i] atom_group = mda.AtomGroup(atom_group_inds, u) atom_groups_bins.append(atom_group) # write selections to vmd file with mda.selections.vmd.SelectionWriter('selection.vmd', mode='w') as vmd: cluster_num = 0 for ag in atom_groups_clusters: