def get_rmsd_to(self, other_rnamodel, output='', dont_move=False): """Calc rmsd P-atom based rmsd to other rna model""" sup = Bio.PDB.Superimposer() if dont_move: # fix http://biopython.org/DIST/docs/api/Bio.PDB.Vector%27.Vector-class.html coords = array([a.get_vector().get_array() for a in self.atoms]) other_coords = array([a.get_vector().get_array() for a in other_rnamodel.atoms]) s = SVDSuperimposer() s.set(coords,other_coords) return s.get_init_rms() try: sup.set_atoms(self.atoms, other_rnamodel.atoms) except: print(self.fn, len(self.atoms), other_rnamodel.fn, len(other_rnamodel.atoms)) for a,b in zip(self.atoms, other_rnamodel.atoms): print(a.parent, b.parent)#a.get_full_id(), b.get_full_id()) rms = round(sup.rms, 3) if output: io = Bio.PDB.PDBIO() sup.apply(self.struc.get_atoms()) io.set_structure( self.struc ) io.save("aligned.pdb") io = Bio.PDB.PDBIO() sup.apply(other_rnamodel.struc.get_atoms()) io.set_structure( other_rnamodel.struc ) io.save("aligned2.pdb") return rms
def setUp(self): self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]]) self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]]) self.sup = SVDSuperimposer() self.sup.set(self.x, self.y)
def __sub__(self, other): """ Return rmsd between two fragments. Example: >>> rmsd=fragment1-fragment2 @return: rmsd between fragments @rtype: float """ sup=SVDSuperimposer() sup.set(self.coords_ca, other.coords_ca) sup.run() return sup.get_rms()
def distance_matrix(CA): n_models = CA.shape[0] distances = np.zeros((n_models, n_models)) sup=SVDSuperimposer() for i in range(n_models): model1 = CA[i,:,:] for j in range(i+1,n_models): model2 = CA[j,:,:] sup.set(model1, model2) sup.run() rms=sup.get_rms() distances[i,j] = rms distances[j,i] = rms return distances
def computeRMSD(): if len(ca_atoms)!=len(ca_atoms_pdb): print "Error. Length mismatch!", len(ca_atoms), len(ca_atoms_pdb) exit() l = len(ca_atoms) fixed_coord = numpy.zeros((l, 3)) moving_coord = numpy.zeros((l, 3)) for i in range(0, l): fixed_coord[i] = numpy.array ([ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]]) moving_coord[i] = numpy.array ([ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]]) sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() rms = sup.get_rms() return rms
def set_atoms(self, fixed, moving): """Put (translate/rotate) the atoms in fixed on the atoms in moving, in such a way that the RMSD is minimized. @param fixed: list of (fixed) atoms @param moving: list of (moving) atoms @type fixed,moving: [L{Atom}, L{Atom},...] """ if not len(fixed) == len(moving): raise PDBException("Fixed and moving atom lists differ in size") l = len(fixed) fixed_coord = numpy.zeros((l, 3)) moving_coord = numpy.zeros((l, 3)) for i in range(0, len(fixed)): fixed_coord[i] = fixed[i].get_coord() moving_coord[i] = moving[i].get_coord() sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() self.rms = sup.get_rms() self.rotran = sup.get_rotran()
def compute_frag_RMSD(res_len): if len(ca_atoms)!=len(ca_atoms_pdb): print "Error. Length mismatch! target:frag", len(ca_atoms_pdb), len(ca_atoms) return 0 l = len(ca_atoms) N = res_len if l != N : print "atom list length mismatches the fragment length!", str(l), str(N) return 0 fixed_coord = numpy.zeros((l, 3)) moving_coord = numpy.zeros((l, 3)) for i in range(0, l): fixed_coord[i] = numpy.array ([ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]]) moving_coord[i] = numpy.array ([ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]]) sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() rms = sup.get_rms() return rms
def assemble_multiscale_visualization(topology_fn, rmf_fn, pdb_dir, outprefix=None, chimerax=True, xl_fn=None): """ Render multiscale versions of rigid bodies from PDB files + flexible beads from RMF files w/o mapped crosslinks. Args: topology_fn (str): Topolgy file in pipe-separated-value (PSV) format as required in integrative modeling using IMP. For details on how to write a topology file, see: https://integrativemodeling.org/2.13.0/doc/ref/classIMP_1_1pmi_1_1topology_1_1TopologyReader.html rmf_fn (str): Name of the RMF file. pdb_dir (str): Directory containing all the PDB files for the rigid bodies used in modeling. outprefix (str, optional): Prefix for output files. Defaults to None. chimerax (bool, optional): If true, a Chimerax script will be written (extension ".cxc"). Defaults to True. xl_fn (str, optional): A file containing a XL dataset. Defaults to None. If this dataset is supplied, then it will be mapped on to the overall structure with satisfied XLs drawn in blue and violated XLs drawn in red. A XL dataset should be supplied in a comma-separated-value (CSV) format containing at least the following fields protein1, residue1, protein2, residue2, sat where the last field <sat> is a boolean 1 or 0 depending on whether the particular XL is satisfied (in the ensemble sense) as a result of the integrative modeling exercise. """ # ------------------------------------------- # read the RMF file and extract all particles # ------------------------------------------- of = RMF.open_rmf_file_read_only(rmf_fn) rmf_model = IMP.Model() hier = IMP.rmf.create_hierarchies(of, rmf_model)[0] IMP.rmf.load_frame(of, 0) particles = IMP.core.get_leaves(hier) rmf_ps = {} for p in particles: molname = p.get_parent().get_parent().get_parent().get_name().strip() name = p.get_name().strip() coord = IMP.core.XYZ(p).get_coordinates() rmf_ps[(molname, name)] = coord # -------------------------------------------------------------- # map pdb residues to rmf particles for each rigid body pdb file # -------------------------------------------------------------- # read the topology file t = TopologyReader(topology_fn, pdb_dir=pdb_dir) components = t.get_components() map_pdb2rmf = {} rigid_body_models = {} rigid_body_residues = {} chain_ids = {} # these are matched to the chimerax rmf plugin chain_id_count = 0 for c in components: # ignore unstructured residues if c.pdb_file == "BEADS": continue mol = c.molname pdb_prefix = os.path.basename(c.pdb_file).split(".pdb")[0] chain_id = c.chain resrange = c.residue_range offset = c.pdb_offset r0 = resrange[0] + offset r1 = resrange[1] + 1 + offset if mol not in chain_ids: chain_ids[mol] = string.ascii_uppercase[chain_id_count] chain_id_count += 1 if pdb_prefix not in map_pdb2rmf: map_pdb2rmf[pdb_prefix] = {} this_rigid_body_model = PDBParser().get_structure("x", c.pdb_file)[0] this_rigid_body_residues = {(r.full_id[2], r.id[1]): r for r in this_rigid_body_model.get_residues()} rigid_body_models[pdb_prefix] = this_rigid_body_model rigid_body_residues[pdb_prefix] = this_rigid_body_residues for r in range(r0, r1): key = (chain_id, r) val = (mol, r) if key in rigid_body_residues[pdb_prefix]: map_pdb2rmf[pdb_prefix][key] = val # -------------------------------- # align all pdb files with the rmf # -------------------------------- print("\nAligning all rigid body structures...") align = SVDSuperimposer() for pdb_prefix, mapper in map_pdb2rmf.items(): pdb_coords = [] pdb_atoms = [] rmf_coords = [] residues = rigid_body_residues[pdb_prefix] for (chain, pdb_res), (mol, rmf_res) in mapper.items(): r = residues[(chain, pdb_res)] pdb_coords.append(r["CA"].coord) pdb_atoms.extend([a for a in r.get_atoms()]) rmf_coords.append(rmf_ps[(mol, str(rmf_res))]) pdb_coords = np.array(pdb_coords) rmf_coords = np.array(rmf_coords) align.set(rmf_coords, pdb_coords) align.run() rotmat, vec = align.get_rotran() [a.transform(rotmat, vec) for a in pdb_atoms] # -------------------------- # assemble the composite pdb # -------------------------- mols = set(sorted([c.molname for c in components])) print("\nChain IDs by molecule:") for k, v in chain_ids.items(): print("molecule %s, chain ID %s" % (k, v)) reslists = {mol: [] for mol in mols} for pdb_prefix, mapper in map_pdb2rmf.items(): residues = rigid_body_residues[pdb_prefix] for (chain, pdb_res), (mol, rmf_res) in mapper.items(): r = residues[(chain, pdb_res)] ; resid = rmf_res new_id = (r.id[0], resid, r.id[2]) new_resname = r.resname new_segid = r.segid new_atoms = r.get_atoms() new_residue = Residue.Residue(id=new_id, resname=new_resname, segid=new_segid) [new_residue.add(a) for a in new_atoms] reslists[mol].append(new_residue) composite_model = Model.Model(0) for mol, chain_id in chain_ids.items(): this_residues = sorted(reslists[mol], key=lambda r: r.id[1]) this_chain = Chain.Chain(chain_id) [this_chain.add(r) for r in this_residues] composite_model.add(this_chain) # save the composite pdb to file io = PDBIO() io.set_structure(composite_model) if outprefix is None: outprefix = "centroid_model" io.save(outprefix + ".pdb") # ------------------------------------------------------------------- # chimerax rendering (hide most of the rmf except unstructured beads) # ------------------------------------------------------------------- if not chimerax: exit() print("\nWriting UCSF Chimerax script...") s = "" s += "open %s\n" % (outprefix + ".pdb") s += "open %s\n" % rmf_fn s += "hide\n" s += "show cartoon\n" s += "color #%d %s\n" % (CHIMERAX_PDB_MODEL_NUM, STRUCT_COLOR) s += "color #%d %s\n" % (CHIMERAX_RMF_MODEL_NUM, UNSTRUCT_COLOR) s += "hide #%d\n" % CHIMERAX_RMF_MODEL_NUM struct_residues = [] for key, val in map_pdb2rmf.items(): struct_residues.extend(list(val.values())) unstruct_atomspec = {} for p in rmf_ps: molname, particle_name = p rmf_chain_id = chain_ids[molname] if "bead" in particle_name: r0, r1 = particle_name.split("_")[0].split("-") r0 = int(r0) ; r1 = int(r1) this_atomspec = "#%d/%s:%d-%d" % \ (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r0, r1) for r in range(r0, r1+1): unstruct_atomspec[(molname, r)] = this_atomspec else: if (molname, int(particle_name)) not in struct_residues: r = int(particle_name) this_atomspec = "#%d/%s:%d" % \ (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r) unstruct_atomspec[(molname, r)] = this_atomspec s += "show %s\n" % (" ".join(set(unstruct_atomspec.values()))) # ---------------------------------------------------------- # if crosslink data is supplied, write out a pseudobond file # ---------------------------------------------------------- if xl_fn is not None: # parse XL data df = pd.read_csv(os.path.abspath(xl_fn)) xls = [] for i in range(len(df)): this_df = df.iloc[i] p1 = this_df["protein1"] ; r1 = this_df["residue1"] p2 = this_df["protein2"] ; r2 = this_df["residue2"] sat = this_df["sat"] xls.append((p1, r1, p2, r2, sat)) # get lists of struct atomspecs atomspec = {} for (mol, particle_name) in rmf_ps: if "bead" in particle_name: continue if (mol, int(particle_name)) in unstruct_atomspec: continue chain_id = chain_ids[mol] resid = int(particle_name) atomspec[(mol, resid)] = "#%d/%s:%d@CA" % \ (CHIMERAX_PDB_MODEL_NUM, chain_id, resid) # now add in all the unstruct atomspecs atomspec.update(unstruct_atomspec) # write pseudobond script s_pb = "" s_pb += "; radius = %2.2f\n" % XL_RADIUS s_pb += "; dashes = 0\n" for xl in xls: p1, r1, p2, r2, sat = xl atomspec_1 = atomspec[(p1, r1)] atomspec_2 = atomspec[(p2, r2)] if atomspec_1 == atomspec_2: continue color = SAT_XL_COLOR if sat else VIOL_XL_COLOR s_pb += "%s %s %s\n" % (atomspec_1, atomspec_2, color) s_pb += "\n" pb_fn = outprefix + "_XLs.pb" with open(pb_fn, "w") as of: of.write(s_pb) s += "open %s\n" % pb_fn s += "preset 'overall look' publication\n" chimerax_out_fn = outprefix + ".cxc" with open(chimerax_out_fn, "w") as of: of.write(s)
def compute_deviations(reader, mean_structure, indexed_mean_structure, num_confs, start=None, stop=None): """ Computes RMSF of each particle from the mean structure Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze. mean_structure (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) confid = 0 # helper to fetch nucleotide positions fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides]) indexed_fetch_np = lambda conf: np.array( [n.cm_pos for n in conf._nucleotides if n.index in indexes]) # Use the single-value decomposition method for superimposing configurations sup = SVDSuperimposer() deviations = [] mysystem = reader._get_system(N_skip=start) while mysystem != False and confid < stop: mysystem.inbox() # calculate alignment transform cur_conf = fetch_np(mysystem) indexed_cur_conf = indexed_fetch_np(mysystem) sup.set(indexed_mean_structure, indexed_cur_conf) sup.run() print("Frame number:", confid, "RMSF:", sup.get_rms()) # realign frame rot, tran = sup.get_rotran() # align structures and collect coordinates for each frame # compatible with json deviations.append( list( np.linalg.norm(np.einsum('ij, ki -> kj', rot, cur_conf) + tran - mean_structure, axis=1))) confid += 1 mysystem = reader._get_system() return deviations
def main(): parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "superimposes one or more structures sharing a topology to a reference structure" ) parser.add_argument('reference', type=str, nargs=1, help="The reference configuration to superimpose to") parser.add_argument( 'victims', type=str, nargs='+', help="The configuraitons to superimpose on the reference") parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Align to only a subset of particles from a space-separated list in the provided file' ) args = parser.parse_args() #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "Bio"]) #Get the reference files ref_dat = args.reference[0] #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(ref_dat) as r: indexes = list(range(len(r.read().positions))) #Create list of configurations to superimpose to_sup = [] r = ErikReader(ref_dat) ref = r.read() ref.inbox() ref_conf = ref.positions[indexes] for i in args.victims: r = ErikReader(i) sys = r.read() sys.inbox() to_sup.append(sys) sup = SVDSuperimposer() #Run the biopython superimposer on each configuration and rewrite its configuration file for i, sys in enumerate(to_sup): indexed_cur_conf = sys.positions[indexes] sup.set(ref_conf, indexed_cur_conf) sup.run() rot, tran = sup.get_rotran() sys.positions = np.einsum('ij, ki -> kj', rot, sys.positions) + tran sys.a1s = np.einsum('ij, ki -> kj', rot, sys.a1s) sys.a3s = np.einsum('ij, ki -> kj', rot, sys.a3s) sys.write_new("aligned{}.dat".format(i)) print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
def calc_DockQ(model,native,use_CA_only=False): exec_path=os.path.dirname(os.path.abspath(sys.argv[0])) atom_for_sup=['CA','C','N','O'] if(use_CA_only): atom_for_sup=['CA'] cmd_fnat=exec_path + '/fnat ' + model + ' ' + native + ' 5' #cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 backbone' cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10' #fnat_out = os.popen(cmd_fnat).readlines() fnat_out = commands.getoutput(cmd_fnat) # sys.exit() (fnat,nat_correct,nat_total,fnonnat,nonnat_count,model_total,interface5A)=parse_fnat(fnat_out) assert fnat!=-1, "Error running cmd: %s\n" % (cmd_fnat) # inter_out = os.popen(cmd_interface).readlines() inter_out = commands.getoutput(cmd_interface) (fnat_bb,nat_correct_bb,nat_total_bb,fnonnat_bb,nonnat_count_bb,model_total_bb,interface)=parse_fnat(inter_out) assert fnat_bb!=-1, "Error running cmd: %s\n" % (cmd_interface) #print fnat #Use same interface as for fnat for iRMS #interface=interface5A # Start the parser pdb_parser = Bio.PDB.PDBParser(QUIET = True) # Get the structures ref_structure = pdb_parser.get_structure("reference", native) sample_structure = pdb_parser.get_structure("model", model) # Use the first model in the pdb-files for alignment # Change the number 0 if you want to align to another structure ref_model = ref_structure[0] sample_model = sample_structure[0] # Make a list of the atoms (in the structures) you wish to align. # In this case we use CA atoms whose index is in the specified range ref_atoms = [] sample_atoms = [] common_interface=[] chain_res={} #find atoms common in both sample and native atoms_def_sample=[] atoms_def_in_both=[] #first read in sample for sample_chain in sample_model: # print sample_chain chain=sample_chain.id # print chain for sample_res in sample_chain: # print sample_res if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname=sample_res.get_id()[1] key=str(resname) + chain for a in atom_for_sup: atom_key=key + '.' + a if a in sample_res: if atom_key in atoms_def_sample: print atom_key + ' already added (MODEL)!!!' atoms_def_sample.append(atom_key) #then read in native also present in sample for ref_chain in ref_model: chain=ref_chain.id for ref_res in ref_chain: #print ref_res if ref_res.get_id()[0] != ' ': #Skip hetatm. # print ref_res.get_id() continue resname=ref_res.get_id()[1] key=str(resname) + chain for a in atom_for_sup: atom_key=key + '.' + a if a in ref_res and atom_key in atoms_def_sample: if atom_key in atoms_def_in_both: print atom_key + ' already added (Native)!!!' atoms_def_in_both.append(atom_key) # print atoms_def_in_both for sample_chain in sample_model: chain=sample_chain.id if chain not in chain_res.keys(): chain_res[chain]=[] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname=sample_res.get_id()[1] key=str(resname) + chain chain_res[chain].append(key) if key in interface: for a in atom_for_sup: atom_key=key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: sample_atoms.append(sample_res[a]) common_interface.append(key) #print inter_pairs chain_ref={} common_residues=[] # Iterate of all chains in the model in order to find all residues for ref_chain in ref_model: # Iterate of all residues in each model in order to find proper atoms # print dir(ref_chain) chain=ref_chain.id if chain not in chain_ref.keys(): chain_ref[chain]=[] for ref_res in ref_chain: if ref_res.get_id()[0] != ' ': #Skip hetatm. continue resname=ref_res.get_id()[1] key=str(resname) + chain #print ref_res # print key # print chain_res.values() if key in chain_res[chain]: # if key is present in sample #print key for a in atom_for_sup: atom_key=key + '.' + a if a in ref_res and atom_key in atoms_def_in_both: chain_ref[chain].append(ref_res[a]) common_residues.append(key) #chain_sample.append((ref_res['CA']) if key in common_interface: # Check if residue number ( .get_id() ) is in the list # Append CA atom to list #print key for a in atom_for_sup: atom_key=key + '.' + a #print atom_key if a in ref_res and atom_key in atoms_def_in_both: ref_atoms.append(ref_res[a]) #get the ones that are present in native chain_sample={} for sample_chain in sample_model: chain=sample_chain.id if chain not in chain_sample.keys(): chain_sample[chain]=[] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname=sample_res.get_id()[1] key=str(resname) + chain if key in common_residues: for a in atom_for_sup: atom_key=key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: chain_sample[chain].append(sample_res[a]) #if key in common_residues: # print key #sample_atoms.append(sample_res['CA']) #common_interface.append(key) assert len(ref_atoms)!=0, "length of native is zero" assert len(sample_atoms)!=0, "length of model is zero" assert len(ref_atoms)==len(sample_atoms), "Different number of atoms in native and model %d %d\n" % (len(ref_atoms),len(sample_atoms)) super_imposer = Bio.PDB.Superimposer() super_imposer.set_atoms(ref_atoms, sample_atoms) super_imposer.apply(sample_model.get_atoms()) # Print RMSD: irms=super_imposer.rms (chain1,chain2)=chain_sample.keys() ligand_chain=chain1 receptor_chain=chain2 len1=len(chain_res[chain1]) len2=len(chain_res[chain2]) assert len1!=0, "%s chain has zero length!\n" % chain1 assert len2!=0, "%s chain has zero length!\n" % chain2 class1='ligand' class2='receptor' if(len(chain_sample[chain1]) > len(chain_sample[chain2])): receptor_chain=chain1 ligand_chain=chain2 class1='receptor' class2='ligand' #print len1 #print len2 #print chain_sample.keys() #Set to align on receptor assert len(chain_ref[receptor_chain])==len(chain_sample[receptor_chain]), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % (receptor_chain,len(chain_ref[receptor_chain]),len(chain_sample[receptor_chain])) super_imposer.set_atoms(chain_ref[receptor_chain], chain_sample[receptor_chain]) super_imposer.apply(sample_model.get_atoms()) receptor_chain_rms=super_imposer.rms #print receptor_chain_rms #print dir(super_imposer) #print chain1_rms #Grep out the transformed ligand coords #print ligand_chain #print chain_ref[ligand_chain] #print chain_sample[ligand_chain] #l1=len(chain_ref[ligand_chain]) #l2=len(chain_sample[ligand_chain]) assert len(chain_ref[ligand_chain])!=0 or len(chain_sample[ligand_chain])!=0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain])) assert len(chain_ref[ligand_chain])==len(chain_sample[ligand_chain]), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % (ligand_chain,len(chain_ref[ligand_chain]),len(chain_sample[ligand_chain])) coord1=np.array([atom.coord for atom in chain_ref[ligand_chain]]) coord2=np.array([atom.coord for atom in chain_sample[ligand_chain]]) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #print len(coord1) #print len(coord2) sup=SVDSuperimposer() Lrms = sup._rms(coord1,coord2) #using the private _rms function which does not superimpose #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain]) #super_imposer.apply(sample_model.get_atoms()) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #Rrms= sup._rms(coord1,coord2) #should give same result as above line #diff = coord1-coord2 #l = len(diff) #number of atoms #from math import sqrt #print sqrt(sum(sum(diff*diff))/l) #print np.sqrt(np.sum(diff**2)/l) DockQ=(float(fnat) + 1/(1+(irms/1.5)*(irms/1.5)) + 1/(1+(Lrms/8.5)*(Lrms/8.5)))/3 dict={} dict['DockQ']=DockQ dict['irms']=irms dict['Lrms']=Lrms dict['fnat']=fnat dict['nat_correct']=nat_correct dict['nat_total']=nat_total dict['fnonnat']=fnonnat dict['nonnat_count']=nonnat_count dict['model_total']=model_total dict['chain1']=chain1 dict['chain2']=chain2 dict['len1']=len1 dict['len2']=len2 dict['class1']=class1 dict['class2']=class2 return dict
def align_models(CA): n_models = CA.shape[0] working_CA = np.copy(CA) sup=SVDSuperimposer() ref_model = working_CA[0, :, :] rms_total = 0 for i_model in range(1, n_models): sup.set(ref_model, working_CA[i_model]) sup.run() rms_total += sup.get_rms()**2 working_CA[i_model] = sup.get_transformed() rms_best = float("inf") epsilon = 0.001 while rms_best - rms_total > epsilon: rms_best = rms_total mean_model = np.mean(working_CA,0) rms_total = 0 for i_model in range(n_models): sup.set(mean_model, working_CA[i_model]) sup.run() rms_total += sup.get_rms()**2 working_CA[i_model] = sup.get_transformed() transformations = [] for start_model, result_model in zip(CA, working_CA): sup.set(result_model, start_model) sup.run() transformations.append(sup.get_rotran()) return transformations,np.sqrt(rms_total/n_models)
class SVDSuperimposerTest(unittest.TestCase): def setUp(self): self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]]) self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]]) self.sup = SVDSuperimposer() self.sup.set(self.x, self.y) def test_get_init_rms(self): x = array([[1.19, 1.28, 1.37], [1.46, 1.55, 1.64], [1.73, 1.82, 1.91]]) y = array([[1.91, 1.82, 1.73], [1.64, 1.55, 1.46], [1.37, 1.28, 1.19]]) self.sup.set(x, y) self.assertIsNone(self.sup.init_rms) init_rms = 0.8049844719 self.assertTrue( float('%.3f' % self.sup.get_init_rms()), float('%.3f' % init_rms)) def test_oldTest(self): self.assertTrue( array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3))) self.assertTrue( array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3))) self.assertIsNone(self.sup.rot) self.assertIsNone(self.sup.tran) self.assertIsNone(self.sup.rms) self.assertIsNone(self.sup.init_rms) self.sup.run() self.assertTrue( array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3))) self.assertTrue( array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3))) rot = array([[0.68304983, 0.53664371, 0.49543563], [-0.52277295, 0.83293229, -0.18147242], [-0.51005037, -0.13504564, 0.84947707]]) tran = array([38.78608157, -20.65451334, -15.42227366]) self.assertTrue( array_equal(around(self.sup.rot, decimals=3), around(rot, decimals=3))) self.assertTrue( array_equal(around(self.sup.tran, decimals=3), around(tran, decimals=3))) self.assertIsNone(self.sup.rms) self.assertIsNone(self.sup.init_rms) rms = 0.00304266526014 self.assertEqual( float('%.3f' % self.sup.get_rms()), float('%.3f' % rms)) rot_get, tran_get = self.sup.get_rotran() self.assertTrue( array_equal(around(rot_get, decimals=3), around(rot, decimals=3))) self.assertTrue( array_equal(around(tran_get, decimals=3), around(tran, decimals=3))) y_on_x1 = dot(self.y, rot) + tran y_x_solution = array( [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01], [5.03977138e+01, -1.22877050e+00, 5.06488200e+01], [5.06801788e+01, -4.16095666e-02, 5.15368866e+01], [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]]) self.assertTrue( array_equal(around(y_on_x1, decimals=3), around(y_x_solution, decimals=3))) y_on_x2 = self.sup.get_transformed() self.assertTrue( array_equal(around(y_on_x2, decimals=3), around(y_x_solution, decimals=3)))
def doublets_dist(d1, d2): sup = SVDSuperimposer() sup.set(d1['vec'], d2['vec']) sup.run() rms1 = sup.get_rms() sup.set(d1['vec'], d2['vec2']) sup.run() rms2 = sup.get_rms() return min(rms1, rms2)
def main(): #handle commandline arguments parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description="Aligns each frame in a trajectory to the first frame") parser.add_argument('traj', type=str, nargs=1, help="The trajectory file to align") parser.add_argument( 'outfile', type=str, nargs=1, help='The name of the new trajectory file to write out') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Align to only a subset of particles from a space-separated list in the provided file' ) parser.add_argument( '-r', metavar='reference_structure', dest='reference_structure', nargs=1, help="Align to a provided configuration instead of the first frame.") args = parser.parse_args() #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "Bio"]) #Parse command line arguments traj_file = args.traj[0] outfile = args.outfile[0] sup = SVDSuperimposer() #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) #-r will make it align to a provided .dat file instead of the first configuration if args.reference_structure: #read reference configuration r = ErikReader(args.reference_structure[0]) ref = r.read() ref.inbox() r = ErikReader(traj_file) ref_conf = ref.positions[indexes] mysystem = align_frame(ref_conf, sup, r.read()) else: #read the first configuration and use it as the reference configuration for the rest r = ErikReader(traj_file) mysystem = r.read() mysystem.inbox() ref_conf = mysystem.positions[indexes] #write first configuration to output file mysystem.write_new(outfile) mysystem = r.read() #Read the trajectory one configuration at a time and perform the alignment while mysystem != False: print("working on t = ", mysystem.time) mysystem = align_frame(ref_conf, sup, mysystem, indexes) mysystem.write_append(outfile) mysystem = r.read()
from Bio.SVDSuperimposer import SVDSuperimposer # start with two coordinate sets (Nx3 arrays - Float0) x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]], 'f') y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]], 'f') sup = SVDSuperimposer() # set the coords # y will be rotated and translated on x sup.set(x, y) # do the lsq fit sup.run() # get the rmsd rms = sup.get_rms() # get rotation (right multiplying!) and the translation rot, tran = sup.get_rotran() # rotate y on x manually
class SVDSuperimposerTest(unittest.TestCase): def setUp(self): self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]]) self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]]) self.sup = SVDSuperimposer() self.sup.set(self.x, self.y) def test_get_init_rms(self): x = array([[1.19, 1.28, 1.37], [1.46, 1.55, 1.64], [1.73, 1.82, 1.91]]) y = array([[1.91, 1.82, 1.73], [1.64, 1.55, 1.46], [1.37, 1.28, 1.19]]) self.sup.set(x, y) self.assertIsNone(self.sup.init_rms) init_rms = 0.8049844719 self.assertTrue(float("%.3f" % self.sup.get_init_rms()), float("%.3f" % init_rms)) def test_oldTest(self): self.assertTrue( array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3))) self.assertTrue( array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3))) self.assertIsNone(self.sup.rot) self.assertIsNone(self.sup.tran) self.assertIsNone(self.sup.rms) self.assertIsNone(self.sup.init_rms) self.sup.run() self.assertTrue( array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3))) self.assertTrue( array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3))) rot = array([[0.68304983, 0.53664371, 0.49543563], [-0.52277295, 0.83293229, -0.18147242], [-0.51005037, -0.13504564, 0.84947707]]) tran = array([38.78608157, -20.65451334, -15.42227366]) self.assertTrue( array_equal(around(self.sup.rot, decimals=3), around(rot, decimals=3))) self.assertTrue( array_equal(around(self.sup.tran, decimals=3), around(tran, decimals=3))) self.assertIsNone(self.sup.rms) self.assertIsNone(self.sup.init_rms) rms = 0.00304266526014 self.assertEqual(float("%.3f" % self.sup.get_rms()), float("%.3f" % rms)) rot_get, tran_get = self.sup.get_rotran() self.assertTrue( array_equal(around(rot_get, decimals=3), around(rot, decimals=3))) self.assertTrue( array_equal(around(tran_get, decimals=3), around(tran, decimals=3))) y_on_x1 = dot(self.y, rot) + tran y_x_solution = array( [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01], [5.03977138e+01, -1.22877050e+00, 5.06488200e+01], [5.06801788e+01, -4.16095666e-02, 5.15368866e+01], [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]]) self.assertTrue( array_equal(around(y_on_x1, decimals=3), around(y_x_solution, decimals=3))) y_on_x2 = self.sup.get_transformed() self.assertTrue( array_equal(around(y_on_x2, decimals=3), around(y_x_solution, decimals=3)))
#Get the reference files top_file = args.topology[0] ref_dat = args.reference[0] #Create list of configurations to superimpose to_sup = [] r = LorenzoReader2(ref_dat, top_file) ref = r._get_system() ref.inbox() ref_conf = fetch_np(ref) for i in args.victims: r = LorenzoReader2(i, top_file) sys = r._get_system() to_sup.append(sys) sup = SVDSuperimposer() #Run the biopython superimposer on each configuration and rewrite its configuration file for i, sys in enumerate(to_sup): cur_conf = fetch_np(sys) sup.set(ref_conf, cur_conf) sup.run() rot, tran = sup.get_rotran() cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran for j, n in enumerate(sys._nucleotides): n.cm_pos = cur_conf[j] n._a1 = normalize(np.dot(n._a1, rot)) n._a3 = normalize(np.dot(n._a3, rot)) sys.print_lorenzo_output("aligned{}.dat".format(i), "/dev/null") print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
def tm_movement_2D(pdbs1, pdbs2, mode, data, gn_dictionary): string_mode = ["extracellular", "intracellular", "pocket", "middle"] intracellular = (mode == 1) print("COMPARISON", string_mode[mode]) print(pdbs1) print("VS") print(pdbs2) distances_set1 = Distances() distances_set1.load_pdbs(pdbs1) distances_set1.filtered_gns = True distances_set2 = Distances() distances_set2.load_pdbs(pdbs2) distances_set2.filtered_gns = True conserved_set1 = distances_set1.fetch_conserved_gns_tm() conserved_set2 = distances_set2.fetch_conserved_gns_tm() conserved = [x for x in conserved_set2 if x in conserved_set1] gns = [[]] * 7 middle_gpcr = [[]] * 7 if mode <= 1: # Intracellular or Extracellular for i in range(0,7): tm_only = [x for x in conserved if x[0]==str(i+1)] if intracellular and i % 2 == 0: #all uneven TMs (as # = i+1) tm_only.reverse() elif not intracellular and i % 2 == 1: # all even TMs (as # i+1) tm_only.reverse() if len(tm_only) < 3: print("too few residues") return [] gns[i] = tm_only[0:3] for upwards in range(12, 6, -1): if len(tm_only) >= upwards: middle_gpcr[i] = tm_only[(upwards-3):upwards] break # INCLUDING References points from membrane middle of GPCR # ref_membrane_mid = {} # ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A # #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1 # ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1 # ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2 # ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C # ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F # # middle_gpcr = ref_membrane_mid[data['gpcr_class']] elif mode == 2: # Major pocket (class A) ligand_references = [['1x39', '1x40','1x41'], ['2x56', '2x57','2x58'], ['3x31', '3x32', '3x33'], ['4x56', '4x57', '4x58'], ['5x43', '5x44', '5x45'], ['6x51', '6x52', '6x53'], ['7x39', '7x40', '7x41']] for i in range(0,7): gns[i] = [x for x in ligand_references[i] if x in conserved] tm_only = [x for x in conserved if x[0]==str(i+1)] if i % 2 == 1: #all uneven TMs (as # = i+1) tm_only.reverse() if len(gns[i]) > 0: if i % 2 == 1: #all uneven TMs (as # = i+1) start_pos = tm_only.index(gns[i][-1]) else: start_pos = tm_only.index(gns[i][0]) gns[i] = tm_only[start_pos:(start_pos+3)] # Stay close for this as references #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)] for upwards in range(9, 6, -1): if len(tm_only) >= (start_pos+upwards): middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)] continue else: if len(tm_only) < 9: print("too few residues") return [] else: #print("Refind",i, gns[i]) gns[i] = tm_only[0:3] middle_gpcr[i] = tm_only[6:9] # for upwards in range(15, 6, -1): # if len(tm_only) >= upwards: # middle_gpcr[i] = tm_only[(upwards-3):upwards] # # FILTER not conserved GNs # middle_gpcr = [[]] * 7 # for i in range(0,7): # tm_only = [x for x in conserved if x[0]==str(i+1)] # if i % 2 == 0: #all uneven TMs (as # = i+1) # tm_only.reverse() # # if len(tm_only) < 3: # print("too few residues") # return [] # # middle_gpcr[i] = tm_only[0:3] #print(middle_gpcr) elif mode == 3: # Middle # References points from membrane middle of GPCR ref_membrane_mid = {} ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1 #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1 ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['6x48', '6x49', '6x50'], ['7x47', '7x49']] # B1 ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2 ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F membrane_mid = ref_membrane_mid[data['gpcr_class']] if data['gpcr_class'] != "001": inv_gn_dictionary = {v: k for k, v in gn_dictionary.items()} for index in range(len(membrane_mid)): membrane_mid[index] = [inv_gn_dictionary[res] for res in membrane_mid[index]] for i in range(0,7): gns[i] = [x for x in membrane_mid[i] if x in conserved] tm_only = [x for x in conserved if x[0]==str(i+1)] if i % 2 == 1: #all uneven TMs (as # = i+1) tm_only.reverse() if len(gns[i]) > 0: if i % 2 == 1: #all uneven TMs (as # = i+1) start_pos = tm_only.index(gns[i][-1]) else: start_pos = tm_only.index(gns[i][0]) gns[i] = tm_only[start_pos:(start_pos+3)] # Stay close for this as references #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)] for upwards in range(6, 3, -1): if len(tm_only) >= (start_pos+upwards): middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)] continue else: if len(tm_only) < 6: print("too few residues") return [] else: #print("Refind",i, gns[i]) gns[i] = tm_only[0:3] middle_gpcr[i] = tm_only[3:6] # for upwards in range(15, 6, -1): # if len(tm_only) >= upwards: # middle_gpcr[i] = tm_only[(upwards-3):upwards] # Merge the reference and the helper points gns_flat = [y for x in gns for y in x] middle_gpcr = [list(filter(lambda x: x in conserved and x not in gns_flat, tm_list)) for tm_list in middle_gpcr] # print(gns) # print(middle_gpcr) ends_and_middle = gns[:] ends_and_middle.extend(middle_gpcr) ends_and_middle_flat = [y for x in ends_and_middle for y in x] ends_and_middle_grouping = [x for x in range(0, len(ends_and_middle)) for y in ends_and_middle[x]] segment_order = [int(ends_and_middle[x][0][0])-1 for x in range(0, len(ends_and_middle))] distances_set1.filter_gns.extend([y for x in ends_and_middle for y in x]) distances_set2.filter_gns = distances_set1.filter_gns distances_set1.fetch_distances_tm(distance_type = "HC") distances_set2.fetch_distances_tm(distance_type = "HC") membrane_data1 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)] membrane_data2 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)] for i in range(0,len(ends_and_middle_flat)-1): for j in range(i+1, len(ends_and_middle_flat)): if right_gn_order(ends_and_middle_flat[i], ends_and_middle_flat[j]): filter_key = ends_and_middle_flat[i] + "_" + ends_and_middle_flat[j] else: filter_key = ends_and_middle_flat[j] + "_" + ends_and_middle_flat[i] if ends_and_middle_flat[i] != ends_and_middle_flat[j]: membrane_data1[i][j] = sum(distances_set1.data[filter_key])/len(pdbs1) membrane_data1[j][i] = membrane_data1[i][j] membrane_data2[i][j] = sum(distances_set2.data[filter_key])/len(pdbs2) membrane_data2[j][i] = membrane_data2[i][j] # Identify most stable TMs by ranking the variations to all other helices membrane_data1 = np.array([np.array(x) for x in membrane_data1]) membrane_data2 = np.array([np.array(x) for x in membrane_data2]) diff_distances = [x[:] for x in [[0] * len(ends_and_middle)] * len(ends_and_middle)] for i in range(0,max(ends_and_middle_grouping)): for j in range(i+1, max(ends_and_middle_grouping)+1): # Calculate movements for each TM relative to their "normal" distance # selected residues for group 1 and 2 group_1 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i] group_2 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == j] diff_distances[i][j] = np.sum(abs(membrane_data1[group_1][:, group_2] - membrane_data2[group_1][:, group_2]))/(np.sum(membrane_data1[group_1][:, group_2]+membrane_data2[group_1][:, group_2])/2)*100 diff_distances[j][i] = diff_distances[i][j] # Ranking for each TM sum_differences = [sum(x) for x in diff_distances] # normalized_differences = [((sum_differences[i]-min(sum_differences[0:7]))/(max(sum_differences[0:7])-min(sum_differences[0:7])))**2 for i in range(0,7)] for i in range(0,7): diff_distances[i] = [sorted(diff_distances[i]).index(x) for x in diff_distances[i]] final_rank = [sum([diff_distances[j][i] for j in range(0,7)]) for i in range(0,7)] # Grab stable TMs tm_ranking = [0] * 7 sorted_rank = sorted(final_rank) for i in range(0,7): tm_ranking[i] = final_rank.index(sorted_rank[i]) final_rank[tm_ranking[i]] = 100 # make sure this TM isn't repeated # Calculate 3D coordinates from distance matrix tms_centroids_set1, tms_set1 = recreate3Dorder(membrane_data1, ends_and_middle_grouping) tms_centroids_set2, tms_set2 = recreate3Dorder(membrane_data2, ends_and_middle_grouping) # Align 3D points of set2 with 3D points of set1 using the most stable reference points best_rmsd = 1000 best_set = [] # Disabled the testing RMSD for now for comb in combinations(tm_ranking[:3], 3): #for comb in combinations(tm_ranking[:4], 3): sel_refs = [x for x in range(0,len(segment_order)) if segment_order[x] in comb] #print(sel_refs) tms_reference_set1 = np.array(tms_centroids_set1[sel_refs], copy = True) tms_reference_set2 = np.array(tms_centroids_set2[sel_refs], copy = True) imposer = SVDSuperimposer() imposer.set(tms_reference_set1, tms_reference_set2) imposer.run() rot, trans = imposer.get_rotran() rmsd = imposer.get_rms() print("RMSD", round(rmsd,2), tm_ranking) if rmsd < best_rmsd: best_set = comb best_rmsd = rmsd # Check for possible mirroring error test_set2 = np.dot(tms_centroids_set2, rot) + trans error = 0 for i in tm_ranking[3:7]: if np.linalg.norm(test_set2[i] - tms_centroids_set1[i]) > 5: error += 1 #if rmsd > 2: #if error >= 3 or rmsd > 2: if True: for i in range(0,len(tms_centroids_set2)): tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # Align 3D points of set2 with 3D points of set1 using the most stable reference points tms_reference_set1 = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]] tms_reference_set2 = tms_centroids_set2[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]] imposer = SVDSuperimposer() imposer.set(tms_reference_set1, tms_reference_set2) imposer.run() new_rot, new_trans = imposer.get_rotran() new_rmsd = imposer.get_rms() print("RMSD2", round(new_rmsd,2)) if new_rmsd < rmsd: rot = new_rot trans = new_trans rmsd = new_rmsd else: for i in range(0,len(tms_centroids_set2)): tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # test_set2 = np.dot(tms_reference_set2, rot) + trans # for i in range(0,len(test_set2)): # print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_reference_set1[i]]), "]") # for i in range(0,len(test_set2)): # print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in test_set2[i]]), "]") # # print("############") # #test_set2 = np.dot(tms_centroids_set2, rot) + trans # test_set2 = np.array(tms_centroids_set2, copy = True) # for i in range(0,len(tms_centroids_set1)): # print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set1[i]]), "]") # for i in range(0,len(tms_centroids_set2)): # print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set2[i]]), "]") # if rmsd > 2: # for i in range(0,len(tms_centroids_set2)): # tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # # Huge error during alignment of "stable" helices, just use the references not the helper points # tms_reference_set1 = tms_centroids_set1[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]] # tms_reference_set2 = tms_centroids_set2[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]] # imposer = SVDSuperimposer() # imposer.set(tms_reference_set1, tms_reference_set2) # imposer.run() # rot, trans = imposer.get_rotran() # rmsd = imposer.get_rms() # print("RMSD3", round(rmsd,2)) # tms_centroids_set2 = np.dot(tms_centroids_set2, rot) + trans tms_set2 = np.dot(tms_set2, rot) + trans # Calculate optimal plane through points in both sets and convert to 2D # Try normal based on TM7 # tm7_centroids = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] == 6]] # if len(tm7_centroids) == 2: # normal = (tm7_centroids[1] - tm7_centroids[0])/np.linalg.norm(tm7_centroids[1] - tm7_centroids[0]) # else: # # Using TM mid as reference plane # normal, midpoint = calculatePlane(np.concatenate((tms_centroids_set1[7:], tms_centroids_set2[7:])), intracellular) # Alternative: use center of helical ends and center of helical middle # normal = tms_centroids_set1[:7].mean(axis=0) - tms_centroids_set1[7:].mean(axis=0) # normal = normal/np.linalg.norm(normal) # 7TM references tm_centroids = {y:[] for y in range(0,7)} [tm_centroids[y].append(tms_centroids_set1[x]) for y in range(0,7) for x in range(0,len(segment_order)) if segment_order[x] == y] count = 0 normal = np.array([0.0,0.0,0.0]) for y in range(0,7): #if len(tm_centroids[y]) == 2 and (mode != 1 or y != 5): if len(tm_centroids[y]) == 2: normal += np.array((tm_centroids[y][1] - tm_centroids[y][0])/np.linalg.norm(tm_centroids[y][1] - tm_centroids[y][0])) count += 1 normal = normal/count midpoint = tms_centroids_set1[:7].mean(axis=0) #plane_set1, z_set1 = convert3D_to_2D_plane(tms_centroids_set1[:7], intracellular, normal, midpoint) #plane_set2, z_set2 = convert3D_to_2D_plane(tms_centroids_set2[:7], intracellular, normal, midpoint) plane_set, z_set = convert3D_to_2D_plane(np.concatenate((tms_centroids_set1[:7], tms_centroids_set2[:7]), axis = 0), intracellular, normal, midpoint) plane_set1 = plane_set[:7] plane_set2 = plane_set[7:] z_set1 = z_set[:7] z_set2 = z_set[7:] # DO NOT REMOVE: possibly we want to upgrade to weighted superposing # Based on Biopython SVDSuperimposer # coords = tms_centroids_set2 # reference_coords = tms_centroids_set1 # OLD centroid calcalation # av1 = sum(coords) / len(coords) # av2 = sum(reference_coords) / len(reference_coords) # NEW weighted centroid calculation # print(normalized_differences) # av1, av2 = 0, 0 # totalweight = 0 # for i in range(0,7): # # print("Round",i) # #weight = 1+(7-tm_ranking.index(i))/7 # weight = (1-normalized_differences[i]+0.1)/1.1 # totalweight += weight # print("TM", str(i+1), "weight",weight) # av1 += coords[i]*weight # av2 += reference_coords[i]*weight # # av1 = av1/totalweight # av2 = av2/totalweight # # coords = coords - av1 # reference_coords = reference_coords - av2 # # # correlation matrix # a = np.dot(np.transpose(coords), reference_coords) # u, d, vt = np.linalg.svd(a) # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u))) # # check if we have found a reflection # if np.linalg.det(rot) < 0: # vt[2] = -vt[2] # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u))) # trans = av2 - np.dot(av1, rot) # rot, trans = imposer.get_rotran() # tms_set2 = np.dot(tms_set2, rot) + trans # CURRENT: Ca-angle to axis core rotations = [0] * 7 for i in range(0,7): try: # rotations[i] = [data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1] if abs(data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1]) < 180 else -1*data['tab4'][gn_dictionary[x]]['angles_set2'][1]-data['tab4'][gn_dictionary[x]]['angles_set1'][1] for x in gns[i]] angles1 = [data['tab4'][gn_dictionary[x]]['angles_set1'][11] for x in gns[i]] angles1 = [angle if angle > 0 else angle + 360 for angle in angles1 ] angles2 = [data['tab4'][gn_dictionary[x]]['angles_set2'][11] for x in gns[i]] angles2 = [angle if angle > 0 else angle + 360 for angle in angles2 ] rotations[i] = [angles1[x] - angles2[x] for x in range(3)] rotations[i] = [value if abs(value) <= 180 else value-360 if value > 0 else value+360 for value in rotations[i]] # count=0 # for x in gns[i]: # print(i, x, data['tab4'][gn_dictionary[x]]['angles_set1'][11], data['tab4'][gn_dictionary[x]]['angles_set2'][11], rotations[i][count]) # count += 1 except: rotations[i] = [0.0, 0.0, 0.0] # TODO: verify other class B errors # UPDATE 20-02-2020 No mirroring but top-down through GPCR rotations[i] = sum(rotations[i])/3 # if intracellular: # rotations[i] = -1*sum(rotations[i])/3 # else: # rotations[i] = sum(rotations[i])/3 # ALTERNATIVE: utilize TM tip alignment (needs debugging as some angles seem off, e.g. GLP-1 active vs inactive TM2) # Add rotation angle based on TM point placement # tms_2d_set1, junk = convert3D_to_2D_plane(tms_set1, intracellular, normal, midpoint) # tms_2d_set2, junk = convert3D_to_2D_plane(tms_set2, intracellular, normal, midpoint) # rotations = [0] * 7 # for i in range(0,7): # positions = [x for x in range(0, len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i] # turn_set1 = tms_2d_set1[positions] # turn_set2 = tms_2d_set2[positions] # # # set to middle # turn_set1 = turn_set1 - turn_set1.mean(axis=0) # turn_set2 = turn_set2 - turn_set2.mean(axis=0) # # # Calculate shift per residue and take average for this TM # for j in range(0,len(turn_set1)): # v1 = turn_set1[j]/np.linalg.norm(turn_set1[j]) # v2 = turn_set2[j]/np.linalg.norm(turn_set2[j]) # angle = np.degrees(np.arctan2(v2[1], v2[0]) - np.arctan2(v1[1],v1[0])) # # if abs(angle) > 180: # angle = 360 - abs(angle) # # rotations[i] += angle/len(turn_set1) # TODO: check z-coordinates orientation # Step 1: collect movement relative to membrane mid # Step 2: find min and max TM # Step 3: check if orientation of min/max TM matches the z-scales + intra/extra - if not invert z-coordinates labeled_set1 = [{"label": "TM"+str(i+1), "x": float(plane_set1[i][0]), "y": float(plane_set1[i][1]), "z": float(z_set1[i]), "rotation" : 0} for i in range(0,7)] labeled_set2 = [{"label": "TM"+str(i+1), "x": float(plane_set2[i][0]), "y": float(plane_set2[i][1]), "z": float(z_set2[i]), "rotation" : rotations[i]} for i in range(0,7)] # Convert used GNs to right numbering gns_used = gns[:] for i in range(0,len(gns)): for j in range(0,len(gns[i])): gns_used[i][j] = gn_dictionary[gns[i][j]] return {"coordinates_set1" : labeled_set1, "coordinates_set2": labeled_set2, "gns_used": gns_used}
def calc_DockQ(model, native, use_CA_only=False, capri_peptide=False): exec_path = os.path.dirname(os.path.abspath(sys.argv[0])) atom_for_sup = ['CA', 'C', 'N', 'O'] if (use_CA_only): atom_for_sup = ['CA'] cmd_fnat = exec_path + '/fnat ' + model + ' ' + native + ' 5 -all' cmd_interface = exec_path + '/fnat ' + model + ' ' + native + ' 10 -all' if capri_peptide: cmd_fnat = exec_path + '/fnat ' + model + ' ' + native + ' 4 -all' cmd_interface = exec_path + '/fnat ' + model + ' ' + native + ' 8 -cb' fnat_out = os.popen(cmd_fnat).read() #fnat_out = subprocess.getoutput(cmd_fnat) #print(fnat_out) # sys.exit() (fnat, nat_correct, nat_total, fnonnat, nonnat_count, model_total, interface5A) = parse_fnat(fnat_out) assert fnat != -1, "Error running cmd: %s\n" % (cmd_fnat) inter_out = os.popen(cmd_interface).read() # inter_out = subprocess.getoutput(cmd_interface) (fnat_bb, nat_correct_bb, nat_total_bb, fnonnat_bb, nonnat_count_bb, model_total_bb, interface) = parse_fnat(inter_out) assert fnat_bb != -1, "Error running cmd: %s\n" % (cmd_interface) #print fnat #Use same interface as for fnat for iRMS #interface=interface5A # Start the parser pdb_parser = Bio.PDB.PDBParser(QUIET=True) # Get the structures ref_structure = pdb_parser.get_structure("reference", native) sample_structure = pdb_parser.get_structure("model", model) # Use the first model in the pdb-files for alignment # Change the number 0 if you want to align to another structure ref_model = ref_structure[0] sample_model = sample_structure[0] # Make a list of the atoms (in the structures) you wish to align. # In this case we use CA atoms whose index is in the specified range ref_atoms = [] sample_atoms = [] common_interface = [] chain_res = {} #find atoms common in both sample and native atoms_def_sample = [] atoms_def_in_both = [] #first read in sample for sample_chain in sample_model: # print sample_chain chain = sample_chain.id # print chain for sample_res in sample_chain: # print sample_res if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res: if atom_key in atoms_def_sample: print(atom_key + ' already added (MODEL)!!!') atoms_def_sample.append(atom_key) #then read in native also present in sample for ref_chain in ref_model: chain = ref_chain.id for ref_res in ref_chain: #print ref_res if ref_res.get_id()[0] != ' ': #Skip hetatm. # print ref_res.get_id() continue resname = ref_res.get_id()[1] key = str(resname) + chain for a in atom_for_sup: atom_key = key + '.' + a if a in ref_res and atom_key in atoms_def_sample: if atom_key in atoms_def_in_both: print(atom_key + ' already added (Native)!!!') atoms_def_in_both.append(atom_key) # print atoms_def_in_both for sample_chain in sample_model: chain = sample_chain.id if chain not in list(chain_res.keys()): chain_res[chain] = [] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain chain_res[chain].append(key) if key in interface: for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: sample_atoms.append(sample_res[a]) common_interface.append(key) #print inter_pairs chain_ref = {} common_residues = [] # Iterate of all chains in the model in order to find all residues for ref_chain in ref_model: # Iterate of all residues in each model in order to find proper atoms # print dir(ref_chain) chain = ref_chain.id if chain not in list(chain_ref.keys()): chain_ref[chain] = [] for ref_res in ref_chain: if ref_res.get_id()[0] != ' ': #Skip hetatm. continue resname = ref_res.get_id()[1] key = str(resname) + chain #print ref_res # print key # print chain_res.values() if key in chain_res[chain]: # if key is present in sample #print key for a in atom_for_sup: atom_key = key + '.' + a if a in ref_res and atom_key in atoms_def_in_both: chain_ref[chain].append(ref_res[a]) common_residues.append(key) #chain_sample.append((ref_res['CA']) if key in common_interface: # Check if residue number ( .get_id() ) is in the list # Append CA atom to list #print key for a in atom_for_sup: atom_key = key + '.' + a #print atom_key if a in ref_res and atom_key in atoms_def_in_both: ref_atoms.append(ref_res[a]) #get the ones that are present in native chain_sample = {} for sample_chain in sample_model: chain = sample_chain.id if chain not in list(chain_sample.keys()): chain_sample[chain] = [] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain if key in common_residues: for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: chain_sample[chain].append(sample_res[a]) #if key in common_residues: # print key #sample_atoms.append(sample_res['CA']) #common_interface.append(key) assert len(ref_atoms) != 0, "length of native is zero" assert len(sample_atoms) != 0, "length of model is zero" assert len(ref_atoms) == len( sample_atoms ), "Different number of atoms in native and model %d %d\n" % ( len(ref_atoms), len(sample_atoms)) super_imposer = Bio.PDB.Superimposer() super_imposer.set_atoms(ref_atoms, sample_atoms) super_imposer.apply(sample_model.get_atoms()) # Print RMSD: irms = super_imposer.rms (chain1, chain2) = list(chain_sample.keys()) ligand_chain = chain1 receptor_chain = chain2 len1 = len(chain_res[chain1]) len2 = len(chain_res[chain2]) assert len1 != 0, "%s chain has zero length!\n" % chain1 assert len2 != 0, "%s chain has zero length!\n" % chain2 class1 = 'ligand' class2 = 'receptor' if (len(chain_sample[chain1]) > len(chain_sample[chain2])): receptor_chain = chain1 ligand_chain = chain2 class1 = 'receptor' class2 = 'ligand' #print len1 #print len2 #print chain_sample.keys() #Set to align on receptor assert len(chain_ref[receptor_chain]) == len( chain_sample[receptor_chain] ), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % ( receptor_chain, len( chain_ref[receptor_chain]), len(chain_sample[receptor_chain])) super_imposer.set_atoms(chain_ref[receptor_chain], chain_sample[receptor_chain]) super_imposer.apply(sample_model.get_atoms()) receptor_chain_rms = super_imposer.rms #print receptor_chain_rms #print dir(super_imposer) #print chain1_rms #Grep out the transformed ligand coords #print ligand_chain #print chain_ref[ligand_chain] #print chain_sample[ligand_chain] #l1=len(chain_ref[ligand_chain]) #l2=len(chain_sample[ligand_chain]) assert len(chain_ref[ligand_chain]) != 0 or len( chain_sample[ligand_chain] ) != 0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % ( ligand_chain, len( chain_ref[ligand_chain]), len(chain_sample[ligand_chain])) assert len(chain_ref[ligand_chain]) == len( chain_sample[ligand_chain] ), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % ( ligand_chain, len( chain_ref[ligand_chain]), len(chain_sample[ligand_chain])) coord1 = np.array([atom.coord for atom in chain_ref[ligand_chain]]) coord2 = np.array([atom.coord for atom in chain_sample[ligand_chain]]) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #print len(coord1) #print len(coord2) sup = SVDSuperimposer() Lrms = sup._rms( coord1, coord2) #using the private _rms function which does not superimpose #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain]) #super_imposer.apply(sample_model.get_atoms()) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #Rrms= sup._rms(coord1,coord2) #should give same result as above line #diff = coord1-coord2 #l = len(diff) #number of atoms #from math import sqrt #print sqrt(sum(sum(diff*diff))/l) #print np.sqrt(np.sum(diff**2)/l) DockQ = (float(fnat) + 1 / (1 + (irms / 1.5) * (irms / 1.5)) + 1 / (1 + (Lrms / 8.5) * (Lrms / 8.5))) / 3 info = {} info['DockQ'] = DockQ info['irms'] = irms info['Lrms'] = Lrms info['fnat'] = fnat info['nat_correct'] = nat_correct info['nat_total'] = nat_total info['fnonnat'] = fnonnat info['nonnat_count'] = nonnat_count info['model_total'] = model_total info['chain1'] = chain1 info['chain2'] = chain2 info['len1'] = len1 info['len2'] = len2 info['class1'] = class1 info['class2'] = class2 return info
def run_system(dir): pdb = os.path.basename(dir).split('_')[0] org_dir = os.getcwd() os.chdir(dir) f_coord = "coord.h5" f_RMSD = "RMSD.txt" f_OC = os.path.join("..","..","cmap_coordinates",pdb+'.txt') if not os.path.exists(f_OC): print "Missing coordinates for cmap", dir os.chdir(org_dir) return dir if not os.path.exists(f_coord): print "Missing coordinates, extract_coordinates.py first", dir os.chdir(org_dir) return dir if os.path.exists(f_RMSD) and not _FORCE: print "RMSD file exists, skipping", dir os.chdir(org_dir) return dir h5 = h5py.File(f_coord,'r') C = h5["coord"][:] h5.close() OC = np.loadtxt(f_OC) # Move the coordinates to something sensible #C -= C.mean(axis=0) #OC -= OC.mean(axis=0) median_OC = np.median([np.linalg.norm(a-b) for a,b in zip(OC,OC[1:])]) median_C = np.median([np.linalg.norm(a-b) for a,b in zip(C[-1],C[-1][1:])]) assert(C[0].shape == OC.shape) RMSD = [] org_RMSD = [] sup = SVDSuperimposer() RG = [] OC -= OC.mean(axis=0) OC_RG = ((np.linalg.norm(OC,axis=1)**2).sum()/len(OC)) ** 0.5 for cx in C: cx -= cx.mean(axis=0) rg_cx = ((np.linalg.norm(cx,axis=1)**2).sum()/len(cx)) ** 0.5 RG.append(rg_cx) sup.set(OC,cx) sup.run() RMSD.append(sup.get_rms()) org_RMSD.append(sup.get_init_rms()) rot, tran = sup.get_rotran() cx = np.dot(cx, rot) + tran RMSD = np.array(RMSD) org_RMSD = np.array(org_RMSD) RG = np.array(RG) #print dir, RMSD[-20:].mean(), org_RMSD[-20:].mean(),RG[-20:].mean() print "{} {: 0.4f} {: 0.4f}".format(dir, RMSD[-200:].mean(), RG[-200:].mean() / OC_RG) ''' from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(OC[:,0],OC[:,1],OC[:,2],'b') #ax.plot(OC[:,0],OC[:,1],OC[:,2],'k',alpha=0.5) ax.scatter(cx[:,0],cx[:,1],cx[:,2],color='r') #ax.plot(cx[:,0],cx[:,1],cx[:,2],'k',alpha=0.5) plt.show() exit() print OC #exit() ''' np.savetxt(f_RMSD,RMSD) os.chdir(org_dir) return dir
def compute_centroid(reader, mean_structure, indexes, num_confs, start=None, stop=None): """ Compares each structure to the mean and returns the one with the lowest RMSF Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze. mean_structure (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: centroid (numpy.array): The positions corresponding to the structure with the lowest RMSF to the mean. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) confid = 0 # Use the single-value decomposition method for superimposing configurations sup = SVDSuperimposer() lowest_rmsf = 100000 #if you have a larger number than this, we need to talk... centroid_candidate = np.zeros_like(mean_structure) centroid_a1 = np.zeros_like(mean_structure) centroid_a3 = np.zeros_like(mean_structure) mysystem = reader.read(n_skip=start) while mysystem != False and confid < stop: mysystem.inbox() # calculate alignment transform cur_conf = mysystem.positions indexed_cur_conf = mysystem.positions[indexes] cur_conf_a1 = mysystem.a1s cur_conf_a3 = mysystem.a3s sup.set(mean_structure, indexed_cur_conf) sup.run() rot, tran = sup.get_rotran() cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1) cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3) RMSF = sup.get_rms() print("Frame number:", confid, "RMSF:", RMSF) if RMSF < lowest_rmsf: centroid_candidate = cur_conf centroid_a1 = cur_conf_a1 centroid_a3 = cur_conf_a3 lowest_rmsf = RMSF centroid_t = mysystem.time confid += 1 mysystem = reader.read() return centroid_candidate, centroid_a1, centroid_a3, lowest_rmsf, centroid_t
except ImportError: from Bio import MissingPythonDependencyError raise MissingPythonDependencyError( "Install NumPy if you want to use Bio.SVDSuperimposer.") from Bio.SVDSuperimposer import SVDSuperimposer # start with two coordinate sets (Nx3 arrays - Float0) x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]], 'f') y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]], 'f') sup = SVDSuperimposer() # set the coords # y will be rotated and translated on x sup.set(x, y) # do the lsq fit sup.run() # get the rmsd rms = sup.get_rms() # get rotation (right multiplying!) and the translation rot, tran = sup.get_rotran() # rotate y on x manually
def compute_mean(reader, align_conf, indexes, num_confs, start=None, stop=None): """ Computes the mean structure of a trajectory Structured to work with the multiprocessing process from UTILS/parallelize.py Parameters: reader (readers.ErikReader): An active reader on the trajectory file to take the mean of. align_conf (numpy.array): The position of each particle in the reference configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: mean_pos_storage (numpy.array): For each particle, the sum of positions in all configurations read. mean_a1_storage (numpy.array): For each particle, the sum of a1 orientation vectors in all configuraitons read. mean_a3_storage (numpy.array): For each particle, the sum of a3 orientation vectors in all configuraitons read. intermediate_mean_structures (list): mean structures computed periodically during the summing to check decoorrelation. confid (int): the number of configurations summed for the storage arrays. """ parallel = True if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 parallel = False else: start = int(start) mysystem = reader.read(n_skip=start) n_nuc = len(mysystem.positions) if not parallel: #This also computes the mean every num_confs/10 configurations to check decorrelation. #Only works when run in serial. INTERMEDIATE_EVERY = np.floor(num_confs / 10) # storage for the intermediate mean structures intermediate_mean_structures = [] # the class doing the alignment of 2 structures sup = SVDSuperimposer() mean_pos_storage = np.zeros((n_nuc, 3)) mean_a1_storage = np.zeros((n_nuc, 3)) mean_a3_storage = np.zeros((n_nuc, 3)) # for every conf in the current trajectory we calculate the global mean confid = 0 while mysystem != False and confid < stop: mysystem.inbox() indexed_cur_conf_pos = mysystem.positions[indexes] # calculate alignment sup.set(align_conf, indexed_cur_conf_pos) sup.run() rot, tran = sup.get_rotran() #apply alignment mysystem.positions = np.einsum('ij, ki -> kj', rot, mysystem.positions) + tran mysystem.a1s = np.einsum('ij, ki -> kj', rot, mysystem.a1s) mysystem.a3s = np.einsum('ij, ki -> kj', rot, mysystem.a3s) mean_pos_storage += mysystem.positions mean_a1_storage += mysystem.a1s mean_a3_storage += mysystem.a3s # print the rmsd of the alignment in case anyone is interested... print("Frame:", confid, "Time:", mysystem.time, "RMSF:", sup.get_rms()) # thats all we do for a frame confid += 1 mysystem = reader.read() # We produce 10 intermediate means to check decorrelation. # This can't be done neatly in parallel if not parallel and confid % INTERMEDIATE_EVERY == 0: mp = np.copy(mean_pos_storage) mp /= confid intermediate_mean_structures.append(prep_pos_for_json(mp)) print("INFO: Calculated intermediate mean for {} ".format(confid)) return (mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, confid)
class ResidueMutator(object): def __init__(self, tripeptides=None, components=None, standard_residues=None): """ The mutator object takes a non-standard residue or incomplete residue and modifies it """ # get defaults if not provided if (standard_residues is None): standard_residues = data.standard_residues if (tripeptides is None): tripeptides = data.tripeptides if (components is None): components = data.chem_components self.components = components self.candidates = {} self.standard_residues = standard_residues self.imposer = SVDSuperimposer() self.parser = PDBParser(PERMISSIVE=1, QUIET=True) # build up candidate structures for fn in tripeptides: structure = self.parser.get_structure("", fn) resn = structure[0][" "][2].get_resname() self.candidates[resn] = [] for model in structure: self.candidates[resn].append(model[" "][2]) def mutate(self, residue, repair=False): resn = residue.get_resname() if (repair): # use residue as its own parent parn = resn else: if (self.standard(resn)): # the residue is already a standard residue, do not need to mutate. return residue parn = self.components[resn]['_chem_comp.mon_nstd_parent_comp_id'] if (not self.standard(parn)): # the parent residue is a nonstandard residue, can't mutate return False if (parn not in self.candidates): # parent not in candidate structures return False sc_fixed = set( self.components[resn] ['side_chain_atoms']) # side chain atoms of fixed residue sc_movin = set( self.components[parn] ['side_chain_atoms']) # side chain atoms of standard parent atom_names = sc_fixed.intersection(sc_movin) # get list of side chain atoms present in residue atom_list = [] for atom in atom_names: if (atom in residue): atom_list.append(atom) # get side chain atom coordinates fixed_coord = np.zeros((len(atom_list), 3)) for i in range(len(atom_list)): fixed_coord[i] = residue[atom_list[i]].get_coord() # loop over candidates, finding best RMSD moved_coord = np.zeros((len(atom_list), 3)) min_rms = 99999 rotm = None tran = None min_candidate = None for candidate in self.candidates[parn]: for j in range(len(atom_list)): moved_coord[j] = candidate[atom_list[j]].get_coord() # perfom SVD fitting self.imposer.set(fixed_coord, moved_coord) self.imposer.run() if (self.imposer.get_rms() < min_rms): min_rms = self.imposer.get_rms() rotm, tran = self.imposer.get_rotran() min_candidate = candidate # copy the candidate to a new object candidate = min_candidate.copy() candidate.transform(rotm, tran) stripHydrogens(candidate) # replace backbone atoms of candidate backbone_atoms = self.components[resn]['main_chain_atoms'] for atom in backbone_atoms: if (atom not in residue): continue if (atom not in candidate): candidate.add(residue[atom].copy()) candidate[atom].set_coord(residue[atom].get_coord()) return candidate def standard(self, resname): return (resname in self.standard_residues) def modified(self, resname): if (resname in self.standard_residues): # it's standard, not modified return False if (resname in self.components and '_chem_comp.mon_nstd_parent_comp_id' in self.components[resname]): return ( (resname not in self.standard_residues) and (self.components[resname]['_chem_comp.mon_nstd_parent_comp_id'] in self.standard_residues)) else: # has no standard parent field - can't be modified return False
def merge_cc(coords_list, res_overlap, n_cc_helices): ref_coords = coords_list[0] aligned_coords = [deepcopy(coords_list[0])] n_atoms_per_res = 5 n_atoms_mono = int(ref_coords.shape[0] / n_cc_helices) msds = [] for coords, cc_overlap in zip(coords_list[1:], res_overlap): n_atoms_overlap = cc_overlap * n_atoms_per_res for i in range(n_cc_helices): hi_ref = ref_coords[(i + 1) * n_atoms_mono - n_atoms_overlap:(i + 1) * n_atoms_mono] if i == 0: ref_atoms = hi_ref else: ref_atoms = np.append(ref_atoms, hi_ref, axis=0) for i in range(n_cc_helices): hi = coords[i * n_atoms_mono:i * n_atoms_mono + n_atoms_overlap] if i == 0: sup_atoms = hi else: sup_atoms = np.append(sup_atoms, hi, axis=0) sup = SVDSuperimposer() sup.set(ref_atoms, sup_atoms) sup.run() msds.append(sup.get_rms()**2) rot, tran = sup.get_rotran() coord_new = np.dot(coords, rot) + tran aligned_coords.append(coord_new) ref_coords = coord_new rmsd = np.sqrt(np.sum(msds)) hi_all = [] for i in range(n_cc_helices): hi_all.append(aligned_coords[0][i * n_atoms_mono:(i + 1) * n_atoms_mono]) for coords, cc_overlap in zip(aligned_coords[1:], res_overlap): hi = [] for i in range(n_cc_helices): hi.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono]) n_atoms_overlap = cc_overlap * n_atoms_per_res for ind_overlap in range(cc_overlap): weight = (ind_overlap + 1) / float(cc_overlap + 1) for ind_atom in range(n_atoms_per_res): ind_shift = ind_overlap * n_atoms_per_res + ind_atom for i in range(n_cc_helices): coordi_prev = hi_all[i][-n_atoms_overlap + ind_shift] coordi_next = hi[i][ind_shift] hi_all[i][-n_atoms_overlap + ind_shift] = ( 1 - weight) * coordi_prev + weight * coordi_next for i in range(n_cc_helices): hi_rest = hi[i][n_atoms_overlap:] hi_all[i] = np.append(hi_all[i], hi_rest, axis=0) res_dimer = hi_all[0] for i in range(1, n_cc_helices): res_dimer = np.append(res_dimer, hi_all[i], axis=0) return res_dimer, rmsd