def __sub__(self, other): """Return rmsd between two fragments. :return: rmsd between fragments :rtype: float Examples -------- >>> rmsd = fragment1 - fragment2 """ sup = SVDSuperimposer() sup.set(self.coords_ca, other.coords_ca) sup.run() return sup.get_rms()
def get_cov(reader, align_conf, num_confs, start=None, stop=None): """ Performs principal component analysis on deviations from the mean structure Parameters: reader (readers.ErikReader): An active reader on the trajectory file to analyze. align_conf (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: deviations_marix (numpy.array): The difference in position from the mean for each configuration. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) mysystem = reader.read(n_skip=start) covariation_matrix = np.zeros( (len(mysystem.positions) * 3, len(mysystem.positions) * 3)) sup = SVDSuperimposer() confid = 0 #for every configuration in the trajectory chunk, align it to the mean and compute positional difference for every particle while mysystem != False and confid < stop: print("-->", "frame", confid, "time={}".format(mysystem.time)) mysystem.inbox() cur_conf = mysystem.positions sup.set(align_conf, cur_conf) sup.run() rot, tran = sup.get_rotran() #equivalent to taking the dot product of the rotation array and every vector in the deviations array cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran difference_matrix = (cur_conf - align_conf).flatten() covariation_matrix += np.einsum('i,j -> ij', difference_matrix, difference_matrix) confid += 1 mysystem = reader.read() return covariation_matrix
def run_sup3d(coord1, coord2): sup = SVDSuperimposer() sup.set( np.array(coord1), np.array(coord2) ) #set is setting the group of coordinates because i have initialized SVD, it is empty sup.run( ) #superimpose the coordinates, run does all the work. Then we compute the RMSD between vc1 and vc2 after transformation rmsd = sup.get_rms() rot, tran = sup.get_rotran( ) #shows the matrix of rotation and vector for translation tcoord = sup.get_transformed() print rmsd print rot print tran print tcoord #you obtain the set of coordinates to be superimposable to the se 1, so the set of coordinates after transformation. return
def __sub__(self, other): """Return rmsd between two fragments. :return: rmsd between fragments :rtype: float Examples -------- This is an incomplete but illustrative example:: rmsd = fragment1 - fragment2 """ sup = SVDSuperimposer() sup.set(self.coords_ca, other.coords_ca) sup.run() return sup.get_rms()
def _superimpose_atoms(ref_points, points, atoms): if ref_points is None or points is None or atoms is None: return (None, None, None, None) ref_vec = [] vec = [] for a in atoms: if a in ref_points and a in points: ref_vec.append(ref_points[a]) vec.append(points[a]) if len(vec) < 3: return (None, None, None, None) sup = SVDSuperimposer() sup.set(np.array(ref_vec, 'f'), np.array(vec, 'f')) sup.run() (rot, tran) = sup.get_rotran() rms = sup.get_rms() return (_apply_rot_tran(points, rot, tran), rot, tran, rms)
def setUp(self): self.x = array([ [51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85], ]) self.y = array([ [51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38], ]) self.sup = SVDSuperimposer() self.sup.set(self.x, self.y)
def get_pca(reader, align_conf, num_confs, start=None, stop=None): """ Performs principal component analysis on deviations from the mean structure Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze. mean_structure (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: deviations_marix (numpy.array): The difference in position from the mean for each configuration. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) mysystem = reader._get_system(N_skip=start) deviations_matrix = np.empty((stop, (len(align_conf)) * 3)) sup = SVDSuperimposer() confid = 0 #for every configuration in the trajectory chunk, align it to the mean and compute positional difference for every particle while mysystem != False and confid < stop: print("-->", mysystem._time) mysystem.inbox() cur_conf = fetch_np(mysystem) sup.set(align_conf, cur_conf) sup.run() rot, tran = sup.get_rotran() #equivalent to taking the dot product of the rotation array and every vector in the deviations array cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran deviations_matrix[confid] = (cur_conf - align_conf).flatten() confid += 1 mysystem = reader._get_system() return deviations_matrix
def getRMSD(self, nSeqUnbound, seqUnboundAli, nSeqBound, seqBoundAli): ''' Computes rmsd for nSeqUnbound chain unbound and nSeqBound bound chain @param nSeqUnbound: int. The index of the bound sequence that will be aligned @param seqUnboundAli: str. The alignment result for unbound sequence number nSeqUnbound @param nSeqBound: int. The index of the bound sequence that will be aligned @param seqBoundAli: str. The alignment result for bound sequence number nSeqBound @return rmsd. float. Root mean square deviation of CA of both imput chains @return boundToUnboundResDict. {Bio.PDB.Residue_bound --> Bio.PDB.Residue_unbound} ''' boundToUnboundResDict, atomBoundToUnboundMap = self.build2SeqsDictMap( nSeqUnbound, seqUnboundAli, nSeqBound, seqBoundAli) atoms_x, atoms_y = zip(*atomBoundToUnboundMap) coords_x = np.array([elem.get_coord() for elem in atoms_x]) coords_y = np.array([elem.get_coord() for elem in atoms_y]) sup = SVDSuperimposer() rmsd = sup._rms(coords_x, coords_y) # print(boundToUnboundResDict) return rmsd, boundToUnboundResDict
def sel_straight(coords_arr, n_cc_helices): n_atoms_mono = int(coords_arr[0].shape[0] / n_cc_helices) chain_rmss = [] for coords in coords_arr: hi_all = [] for i in range(n_cc_helices): hi_all.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono]) rmss = [] for i in range(n_cc_helices - 1): sup = SVDSuperimposer() sup.set(hi_all[i], hi_all[i + 1]) sup.run() rms = sup.get_rms() rmss.append(rms) chain_rmss.append(np.mean(rmss)) return np.argmin(chain_rmss), np.min(chain_rmss)
def computeRMSD(): if len(ca_atoms) != len(ca_atoms_pdb): print "Error. Length mismatch!" exit() l = len(ca_atoms) fixed_coord = numpy.zeros((l, 3)) moving_coord = numpy.zeros((l, 3)) for i in range(0, l): fixed_coord[i] = numpy.array( [ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]]) moving_coord[i] = numpy.array( [ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]]) sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() rms = sup.get_rms() return rms
def set_atoms(self, fixed, moving): """Put (translate/rotate) the atoms in fixed on the atoms in moving, in such a way that the RMSD is minimized. :param fixed: list of (fixed) atoms :param moving: list of (moving) atoms :type fixed,moving: [L{Atom}, L{Atom},...] """ if not len(fixed) == len(moving): raise PDBException("Fixed and moving atom lists differ in size") length = len(fixed) fixed_coord = numpy.zeros((length, 3)) moving_coord = numpy.zeros((length, 3)) for i in range(0, length): fixed_coord[i] = fixed[i].get_coord() moving_coord[i] = moving[i].get_coord() sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() self.rms = sup.get_rms() self.rotran = sup.get_rotran()
def __init__(self, static, moving): """ Align two structures :param static: the reference structure :param moving: the structure to the aligned to the reference """ sup = SVDSuperimposer() sup.set(np.asarray(static), np.asarray(moving)) sup.run() rot, trans = sup.get_rotran() self.rms = sup.get_rms() self.static = static self.moving = [ np.dot(np.asarray(moving[atom]), rot) + trans for atom in range(len(moving)) ]
def align(predicted, gt): """ # Grid search through scales for affine alignment. scale_range = np.arange(0.9, 1.1, 0.05) best_drmsd = float("inf") best_sx = 1 best_sy = 1 best_sz = 1 for sx in scale_range: for sy in scale_range: for sz in scale_range: sup = SVDSuperimposer() scaling = np.diag([sx, sy, sz]) scaled_predicted = np.dot(np.array(predicted), scaling) sup.set(np.array(gt), scaled_predicted) sup.run() rms = sup.get_rms() rot, tran = sup.get_rotran() b = sup.get_transformed() a = np.array(gt) drmsd = compute_drmsd(a, b) if drmsd < best_drmsd: best_drmsd = drmsd best_sx = sx best_sy = sy best_sz = sz """ best_sx = 1 best_sy = 1 best_sz = 1 # Use best sx, sy, sz to perform final alignment. sup = SVDSuperimposer() scaling = np.diag([best_sx, best_sy, best_sz]) scaled_predicted = np.dot(np.array(predicted), scaling) sup.set(np.array(gt), scaled_predicted) sup.run() predicted = sup.get_transformed() return predicted
def align(coordinate_file): ''' 1. Input: File contains lines, where each line contains the coordinates of a model, e.g., if model 1 has 70 atoms, each with 3 coordinates (3*70 = 210 coordinates), then the line corresponding model 1 is like this: 210 x1 y1 z1 x2 y2 z2 ... x70 y70 z70 2. Alignes all the model with the first model in the cordinate_file. 3. Returns: a dictionary of aligned models. Each model, i.e., each entry (value) in the dictionary is a flattened numpy array. ''' modelDict = {} ind = 0 ref = [] sup = SVDSuperimposer() with open(coordinate_file) as f: for line in f: if ind == 0: l = [float(t) for t in line.split()] l = l[1:] samples = [l[i:i + 3] for i in range(0, len(l), 3)] ref = array(samples, 'f') modelDict[ind] = np.ravel(ref) ind += 1 else: l = [float(t) for t in line.split()] l = l[1:] samples = [l[i:i + 3] for i in range(0, len(l), 3)] seq = array(samples, 'f') s = sup.set(ref, seq) sup.run() z = sup.get_transformed() modelDict[ind] = np.ravel(z) ind += 1 return modelDict, ref
def calc_DockQ(model, native, use_CA_only=False): exec_path = os.path.dirname(os.path.abspath(sys.argv[0])) atom_for_sup = ['CA', 'C', 'N', 'O'] if (use_CA_only): atom_for_sup = ['CA'] cmd_fnat = exec_path + '/fnat ' + model + ' ' + native + ' 5' #cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 backbone' cmd_interface = exec_path + '/fnat ' + model + ' ' + native + ' 10' #fnat_out = os.popen(cmd_fnat).readlines() fnat_out = commands.getoutput(cmd_fnat) # sys.exit() (fnat, nat_correct, nat_total, fnonnat, nonnat_count, model_total, interface5A) = parse_fnat(fnat_out) assert fnat != -1, "Error running cmd: %s\n" % (cmd_fnat) # inter_out = os.popen(cmd_interface).readlines() inter_out = commands.getoutput(cmd_interface) (fnat_bb, nat_correct_bb, nat_total_bb, fnonnat_bb, nonnat_count_bb, model_total_bb, interface) = parse_fnat(inter_out) assert fnat_bb != -1, "Error running cmd: %s\n" % (cmd_interface) #print fnat #Use same interface as for fnat for iRMS #interface=interface5A # Start the parser pdb_parser = Bio.PDB.PDBParser(QUIET=True) # Get the structures ref_structure = pdb_parser.get_structure("reference", native) sample_structure = pdb_parser.get_structure("model", model) # Use the first model in the pdb-files for alignment # Change the number 0 if you want to align to another structure ref_model = ref_structure[0] sample_model = sample_structure[0] # Make a list of the atoms (in the structures) you wish to align. # In this case we use CA atoms whose index is in the specified range ref_atoms = [] sample_atoms = [] common_interface = [] chain_res = {} #find atoms common in both sample and native atoms_def_sample = [] atoms_def_in_both = [] #first read in sample for sample_chain in sample_model: # print sample_chain chain = sample_chain.id # print chain for sample_res in sample_chain: # print sample_res if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res: if atom_key in atoms_def_sample: print atom_key + ' already added (MODEL)!!!' atoms_def_sample.append(atom_key) #then read in native also present in sample for ref_chain in ref_model: chain = ref_chain.id for ref_res in ref_chain: #print ref_res if ref_res.get_id()[0] != ' ': #Skip hetatm. # print ref_res.get_id() continue resname = ref_res.get_id()[1] key = str(resname) + chain for a in atom_for_sup: atom_key = key + '.' + a if a in ref_res and atom_key in atoms_def_sample: if atom_key in atoms_def_in_both: print atom_key + ' already added (Native)!!!' atoms_def_in_both.append(atom_key) # print atoms_def_in_both for sample_chain in sample_model: chain = sample_chain.id if chain not in chain_res.keys(): chain_res[chain] = [] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain chain_res[chain].append(key) if key in interface: for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: sample_atoms.append(sample_res[a]) common_interface.append(key) #print inter_pairs chain_ref = {} common_residues = [] # Iterate of all chains in the model in order to find all residues for ref_chain in ref_model: # Iterate of all residues in each model in order to find proper atoms # print dir(ref_chain) chain = ref_chain.id if chain not in chain_ref.keys(): chain_ref[chain] = [] for ref_res in ref_chain: if ref_res.get_id()[0] != ' ': #Skip hetatm. continue resname = ref_res.get_id()[1] key = str(resname) + chain #print ref_res # print key # print chain_res.values() if key in chain_res[chain]: # if key is present in sample #print key for a in atom_for_sup: atom_key = key + '.' + a if a in ref_res and atom_key in atoms_def_in_both: chain_ref[chain].append(ref_res[a]) common_residues.append(key) #chain_sample.append((ref_res['CA']) if key in common_interface: # Check if residue number ( .get_id() ) is in the list # Append CA atom to list #print key for a in atom_for_sup: atom_key = key + '.' + a #print atom_key if a in ref_res and atom_key in atoms_def_in_both: ref_atoms.append(ref_res[a]) #get the ones that are present in native chain_sample = {} for sample_chain in sample_model: chain = sample_chain.id if chain not in chain_sample.keys(): chain_sample[chain] = [] for sample_res in sample_chain: if sample_res.get_id()[0] != ' ': #Skip hetatm. continue resname = sample_res.get_id()[1] key = str(resname) + chain if key in common_residues: for a in atom_for_sup: atom_key = key + '.' + a if a in sample_res and atom_key in atoms_def_in_both: chain_sample[chain].append(sample_res[a]) #if key in common_residues: # print key #sample_atoms.append(sample_res['CA']) #common_interface.append(key) assert len(ref_atoms) != 0, "length of native is zero" assert len(sample_atoms) != 0, "length of model is zero" assert len(ref_atoms) == len( sample_atoms ), "Different number of atoms in native and model %d %d\n" % ( len(ref_atoms), len(sample_atoms)) super_imposer = Bio.PDB.Superimposer() super_imposer.set_atoms(ref_atoms, sample_atoms) super_imposer.apply(sample_model.get_atoms()) # Print RMSD: irms = super_imposer.rms (chain1, chain2) = chain_sample.keys() ligand_chain = chain1 receptor_chain = chain2 len1 = len(chain_res[chain1]) len2 = len(chain_res[chain2]) assert len1 != 0, "%s chain has zero length!\n" % chain1 assert len2 != 0, "%s chain has zero length!\n" % chain2 class1 = 'ligand' class2 = 'receptor' if (len(chain_sample[chain1]) > len(chain_sample[chain2])): receptor_chain = chain1 ligand_chain = chain2 class1 = 'receptor' class2 = 'ligand' #print len1 #print len2 #print chain_sample.keys() #Set to align on receptor assert len(chain_ref[receptor_chain]) == len( chain_sample[receptor_chain] ), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % ( receptor_chain, len( chain_ref[receptor_chain]), len(chain_sample[receptor_chain])) super_imposer.set_atoms(chain_ref[receptor_chain], chain_sample[receptor_chain]) super_imposer.apply(sample_model.get_atoms()) receptor_chain_rms = super_imposer.rms #print receptor_chain_rms #print dir(super_imposer) #print chain1_rms #Grep out the transformed ligand coords #print ligand_chain #print chain_ref[ligand_chain] #print chain_sample[ligand_chain] #l1=len(chain_ref[ligand_chain]) #l2=len(chain_sample[ligand_chain]) assert len(chain_ref[ligand_chain]) != 0 or len( chain_sample[ligand_chain] ) != 0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % ( ligand_chain, len( chain_ref[ligand_chain]), len(chain_sample[ligand_chain])) assert len(chain_ref[ligand_chain]) == len( chain_sample[ligand_chain] ), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % ( ligand_chain, len( chain_ref[ligand_chain]), len(chain_sample[ligand_chain])) coord1 = np.array([atom.coord for atom in chain_ref[ligand_chain]]) coord2 = np.array([atom.coord for atom in chain_sample[ligand_chain]]) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #print len(coord1) #print len(coord2) sup = SVDSuperimposer() Lrms = sup._rms( coord1, coord2) #using the private _rms function which does not superimpose #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain]) #super_imposer.apply(sample_model.get_atoms()) #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]]) #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]]) #Rrms= sup._rms(coord1,coord2) #should give same result as above line #diff = coord1-coord2 #l = len(diff) #number of atoms #from math import sqrt #print sqrt(sum(sum(diff*diff))/l) #print np.sqrt(np.sum(diff**2)/l) DockQ = (float(fnat) + 1 / (1 + (irms / 1.5) * (irms / 1.5)) + 1 / (1 + (Lrms / 8.5) * (Lrms / 8.5))) / 3 dict = {} dict['DockQ'] = DockQ dict['irms'] = irms dict['Lrms'] = Lrms dict['fnat'] = fnat dict['nat_correct'] = nat_correct dict['nat_total'] = nat_total dict['fnonnat'] = fnonnat dict['nonnat_count'] = nonnat_count dict['model_total'] = model_total dict['chain1'] = chain1 dict['chain2'] = chain2 dict['len1'] = len1 dict['len2'] = len2 dict['class1'] = class1 dict['class2'] = class2 return dict
def main(): parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description= "superimposes one or more structures sharing a topology to a reference structure" ) parser.add_argument('reference', type=str, nargs=1, help="The reference configuration to superimpose to") parser.add_argument( 'victims', type=str, nargs='+', help="The configuraitons to superimpose on the reference") parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Align to only a subset of particles from a space-separated list in the provided file' ) args = parser.parse_args() #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "Bio"]) #Get the reference files ref_dat = args.reference[0] #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(ref_dat) as r: indexes = list(range(len(r.read().positions))) #Create list of configurations to superimpose to_sup = [] r = ErikReader(ref_dat) ref = r.read() ref.inbox() ref_conf = ref.positions[indexes] for i in args.victims: r = ErikReader(i) sys = r.read() sys.inbox() to_sup.append(sys) sup = SVDSuperimposer() #Run the biopython superimposer on each configuration and rewrite its configuration file for i, sys in enumerate(to_sup): indexed_cur_conf = sys.positions[indexes] sup.set(ref_conf, indexed_cur_conf) sup.run() rot, tran = sup.get_rotran() sys.positions = np.einsum('ij, ki -> kj', rot, sys.positions) + tran sys.a1s = np.einsum('ij, ki -> kj', rot, sys.a1s) sys.a3s = np.einsum('ij, ki -> kj', rot, sys.a3s) sys.write_new("aligned{}.dat".format(i)) print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
def compute_centroid(reader, mean_structure, indexes, num_confs, start=None, stop=None): """ Compares each structure to the mean and returns the one with the lowest RMSF Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze. mean_structure (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: centroid (numpy.array): The positions corresponding to the structure with the lowest RMSF to the mean. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) confid = 0 # Use the single-value decomposition method for superimposing configurations sup = SVDSuperimposer() lowest_rmsf = 100000 #if you have a larger number than this, we need to talk... centroid_candidate = np.zeros_like(mean_structure) centroid_a1 = np.zeros_like(mean_structure) centroid_a3 = np.zeros_like(mean_structure) mysystem = reader.read(n_skip=start) while mysystem != False and confid < stop: mysystem.inbox() # calculate alignment transform cur_conf = mysystem.positions indexed_cur_conf = mysystem.positions[indexes] cur_conf_a1 = mysystem.a1s cur_conf_a3 = mysystem.a3s sup.set(mean_structure, indexed_cur_conf) sup.run() rot, tran = sup.get_rotran() cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1) cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3) RMSF = sup.get_rms() print("Frame number:", confid, "RMSF:", RMSF) if RMSF < lowest_rmsf: centroid_candidate = cur_conf centroid_a1 = cur_conf_a1 centroid_a3 = cur_conf_a3 lowest_rmsf = RMSF centroid_t = mysystem.time confid += 1 mysystem = reader.read() return centroid_candidate, centroid_a1, centroid_a3, lowest_rmsf, centroid_t
def main(): #handle commandline arguments parser = argparse.ArgumentParser( prog=os.path.basename(__file__), description="Aligns each frame in a trajectory to the first frame") parser.add_argument('traj', type=str, nargs=1, help="The trajectory file to align") parser.add_argument( 'outfile', type=str, nargs=1, help='The name of the new trajectory file to write out') parser.add_argument( '-i', metavar='index_file', dest='index_file', nargs=1, help= 'Align to only a subset of particles from a space-separated list in the provided file' ) parser.add_argument( '-r', metavar='reference_structure', dest='reference_structure', nargs=1, help="Align to a provided configuration instead of the first frame.") args = parser.parse_args() #run system checks from oxDNA_analysis_tools.config import check_dependencies check_dependencies(["python", "numpy", "Bio"]) #Parse command line arguments traj_file = args.traj[0] outfile = args.outfile[0] sup = SVDSuperimposer() #-i will make it only run on a subset of nucleotides. #The index file is a space-separated list of particle IDs if args.index_file: index_file = args.index_file[0] with open(index_file, 'r') as f: indexes = f.readline().split() try: indexes = [int(i) for i in indexes] except: print( "ERROR: The index file must be a space-seperated list of particles. These can be generated using oxView by clicking the \"Download Selected Base List\" button" ) else: with ErikReader(traj_file) as r: indexes = list(range(len(r.read().positions))) #-r will make it align to a provided .dat file instead of the first configuration if args.reference_structure: #read reference configuration r = ErikReader(args.reference_structure[0]) ref = r.read() ref.inbox() r = ErikReader(traj_file) ref_conf = ref.positions[indexes] mysystem = align_frame(ref_conf, sup, r.read()) else: #read the first configuration and use it as the reference configuration for the rest r = ErikReader(traj_file) mysystem = r.read() mysystem.inbox() ref_conf = mysystem.positions[indexes] #write first configuration to output file mysystem.write_new(outfile) mysystem = r.read() #Read the trajectory one configuration at a time and perform the alignment while mysystem != False: print("working on t = ", mysystem.time) mysystem = align_frame(ref_conf, sup, mysystem, indexes) mysystem.write_append(outfile) mysystem = r.read()
def assemble_multiscale_visualization(topology_fn, rmf_fn, pdb_dir, outprefix=None, chimerax=True, xl_fn=None): """ Render multiscale versions of rigid bodies from PDB files + flexible beads from RMF files w/o mapped crosslinks. Args: topology_fn (str): Topolgy file in pipe-separated-value (PSV) format as required in integrative modeling using IMP. For details on how to write a topology file, see: https://integrativemodeling.org/2.13.0/doc/ref/classIMP_1_1pmi_1_1topology_1_1TopologyReader.html rmf_fn (str): Name of the RMF file. pdb_dir (str): Directory containing all the PDB files for the rigid bodies used in modeling. outprefix (str, optional): Prefix for output files. Defaults to None. chimerax (bool, optional): If true, a Chimerax script will be written (extension ".cxc"). Defaults to True. xl_fn (str, optional): A file containing a XL dataset. Defaults to None. If this dataset is supplied, then it will be mapped on to the overall structure with satisfied XLs drawn in blue and violated XLs drawn in red. A XL dataset should be supplied in a comma-separated-value (CSV) format containing at least the following fields protein1, residue1, protein2, residue2, sat where the last field <sat> is a boolean 1 or 0 depending on whether the particular XL is satisfied (in the ensemble sense) as a result of the integrative modeling exercise. """ # ------------------------------------------- # read the RMF file and extract all particles # ------------------------------------------- of = RMF.open_rmf_file_read_only(rmf_fn) rmf_model = IMP.Model() hier = IMP.rmf.create_hierarchies(of, rmf_model)[0] IMP.rmf.load_frame(of, 0) particles = IMP.core.get_leaves(hier) rmf_ps = {} for p in particles: molname = p.get_parent().get_parent().get_parent().get_name().strip() name = p.get_name().strip() coord = IMP.core.XYZ(p).get_coordinates() rmf_ps[(molname, name)] = coord # -------------------------------------------------------------- # map pdb residues to rmf particles for each rigid body pdb file # -------------------------------------------------------------- # read the topology file t = TopologyReader(topology_fn, pdb_dir=pdb_dir) components = t.get_components() map_pdb2rmf = {} rigid_body_models = {} rigid_body_residues = {} chain_ids = {} # these are matched to the chimerax rmf plugin chain_id_count = 0 for c in components: # ignore unstructured residues if c.pdb_file == "BEADS": continue mol = c.molname pdb_prefix = os.path.basename(c.pdb_file).split(".pdb")[0] chain_id = c.chain resrange = c.residue_range offset = c.pdb_offset r0 = resrange[0] + offset r1 = resrange[1] + 1 + offset if mol not in chain_ids: chain_ids[mol] = string.ascii_uppercase[chain_id_count] chain_id_count += 1 if pdb_prefix not in map_pdb2rmf: map_pdb2rmf[pdb_prefix] = {} this_rigid_body_model = PDBParser().get_structure("x", c.pdb_file)[0] this_rigid_body_residues = {(r.full_id[2], r.id[1]): r for r in this_rigid_body_model.get_residues()} rigid_body_models[pdb_prefix] = this_rigid_body_model rigid_body_residues[pdb_prefix] = this_rigid_body_residues for r in range(r0, r1): key = (chain_id, r) val = (mol, r) if key in rigid_body_residues[pdb_prefix]: map_pdb2rmf[pdb_prefix][key] = val # -------------------------------- # align all pdb files with the rmf # -------------------------------- print("\nAligning all rigid body structures...") align = SVDSuperimposer() for pdb_prefix, mapper in map_pdb2rmf.items(): pdb_coords = [] pdb_atoms = [] rmf_coords = [] residues = rigid_body_residues[pdb_prefix] for (chain, pdb_res), (mol, rmf_res) in mapper.items(): r = residues[(chain, pdb_res)] pdb_coords.append(r["CA"].coord) pdb_atoms.extend([a for a in r.get_atoms()]) rmf_coords.append(rmf_ps[(mol, str(rmf_res))]) pdb_coords = np.array(pdb_coords) rmf_coords = np.array(rmf_coords) align.set(rmf_coords, pdb_coords) align.run() rotmat, vec = align.get_rotran() [a.transform(rotmat, vec) for a in pdb_atoms] # -------------------------- # assemble the composite pdb # -------------------------- mols = set(sorted([c.molname for c in components])) print("\nChain IDs by molecule:") for k, v in chain_ids.items(): print("molecule %s, chain ID %s" % (k, v)) reslists = {mol: [] for mol in mols} for pdb_prefix, mapper in map_pdb2rmf.items(): residues = rigid_body_residues[pdb_prefix] for (chain, pdb_res), (mol, rmf_res) in mapper.items(): r = residues[(chain, pdb_res)] ; resid = rmf_res new_id = (r.id[0], resid, r.id[2]) new_resname = r.resname new_segid = r.segid new_atoms = r.get_atoms() new_residue = Residue.Residue(id=new_id, resname=new_resname, segid=new_segid) [new_residue.add(a) for a in new_atoms] reslists[mol].append(new_residue) composite_model = Model.Model(0) for mol, chain_id in chain_ids.items(): this_residues = sorted(reslists[mol], key=lambda r: r.id[1]) this_chain = Chain.Chain(chain_id) [this_chain.add(r) for r in this_residues] composite_model.add(this_chain) # save the composite pdb to file io = PDBIO() io.set_structure(composite_model) if outprefix is None: outprefix = "centroid_model" io.save(outprefix + ".pdb") # ------------------------------------------------------------------- # chimerax rendering (hide most of the rmf except unstructured beads) # ------------------------------------------------------------------- if not chimerax: exit() print("\nWriting UCSF Chimerax script...") s = "" s += "open %s\n" % (outprefix + ".pdb") s += "open %s\n" % rmf_fn s += "hide\n" s += "show cartoon\n" s += "color #%d %s\n" % (CHIMERAX_PDB_MODEL_NUM, STRUCT_COLOR) s += "color #%d %s\n" % (CHIMERAX_RMF_MODEL_NUM, UNSTRUCT_COLOR) s += "hide #%d\n" % CHIMERAX_RMF_MODEL_NUM struct_residues = [] for key, val in map_pdb2rmf.items(): struct_residues.extend(list(val.values())) unstruct_atomspec = {} for p in rmf_ps: molname, particle_name = p rmf_chain_id = chain_ids[molname] if "bead" in particle_name: r0, r1 = particle_name.split("_")[0].split("-") r0 = int(r0) ; r1 = int(r1) this_atomspec = "#%d/%s:%d-%d" % \ (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r0, r1) for r in range(r0, r1+1): unstruct_atomspec[(molname, r)] = this_atomspec else: if (molname, int(particle_name)) not in struct_residues: r = int(particle_name) this_atomspec = "#%d/%s:%d" % \ (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r) unstruct_atomspec[(molname, r)] = this_atomspec s += "show %s\n" % (" ".join(set(unstruct_atomspec.values()))) # ---------------------------------------------------------- # if crosslink data is supplied, write out a pseudobond file # ---------------------------------------------------------- if xl_fn is not None: # parse XL data df = pd.read_csv(os.path.abspath(xl_fn)) xls = [] for i in range(len(df)): this_df = df.iloc[i] p1 = this_df["protein1"] ; r1 = this_df["residue1"] p2 = this_df["protein2"] ; r2 = this_df["residue2"] sat = this_df["sat"] xls.append((p1, r1, p2, r2, sat)) # get lists of struct atomspecs atomspec = {} for (mol, particle_name) in rmf_ps: if "bead" in particle_name: continue if (mol, int(particle_name)) in unstruct_atomspec: continue chain_id = chain_ids[mol] resid = int(particle_name) atomspec[(mol, resid)] = "#%d/%s:%d@CA" % \ (CHIMERAX_PDB_MODEL_NUM, chain_id, resid) # now add in all the unstruct atomspecs atomspec.update(unstruct_atomspec) # write pseudobond script s_pb = "" s_pb += "; radius = %2.2f\n" % XL_RADIUS s_pb += "; dashes = 0\n" for xl in xls: p1, r1, p2, r2, sat = xl atomspec_1 = atomspec[(p1, r1)] atomspec_2 = atomspec[(p2, r2)] if atomspec_1 == atomspec_2: continue color = SAT_XL_COLOR if sat else VIOL_XL_COLOR s_pb += "%s %s %s\n" % (atomspec_1, atomspec_2, color) s_pb += "\n" pb_fn = outprefix + "_XLs.pb" with open(pb_fn, "w") as of: of.write(s_pb) s += "open %s\n" % pb_fn s += "preset 'overall look' publication\n" chimerax_out_fn = outprefix + ".cxc" with open(chimerax_out_fn, "w") as of: of.write(s)
def analyse(input_file_name, refer_file_name, moved_chain_id, fixed_chain_id, r_moved_chain_id, r_fixed_chain_id, output_file1, output_file2, r_model_number=0): structure = PDBParser(PERMISSIVE=1).get_structure('to_analyse', input_file_name) reference = PDBParser(PERMISSIVE=1).get_structure('reference', refer_file_name) r_chain_moved = reference[r_model_number][r_moved_chain_id] r_chain_fixed = reference[r_model_number][r_fixed_chain_id] theta = [] phi = [] theta_x = [] theta_y = [] theta_z = [] d = [] coords_x = [] coords_y = [] coords_z = [] matrix_entries = [_[:] for _ in [[]] * 9] for model_number, model in enumerate(structure): chain_moved = structure[model_number][moved_chain_id] chain_fixed = structure[model_number][fixed_chain_id] com_denominator = 0.0 com_numerator = Vector(0, 0, 0) for atom in chain_moved.get_atoms(): position = atom.get_vector() com_numerator += Vector(position._ar * np.array(atom.mass)) com_denominator += atom.mass moved_centre = com_numerator.__div__(com_denominator) com_denominator = 0.0 com_numerator = Vector(0, 0, 0) for atom in chain_fixed.get_atoms(): position = atom.get_vector() com_numerator += Vector(position._ar * np.array(atom.mass)) com_denominator += atom.mass fixed_centre = com_numerator.__div__(com_denominator) com_denominator = 0.0 com_numerator = Vector(0, 0, 0) reference_set = np.asarray([ coord for coord in [atom.get_coord() for atom in r_chain_fixed.get_atoms()] ]) coordinate_set = np.asarray([ coord for coord in [atom.get_coord() for atom in chain_fixed.get_atoms()] ]) sup = SVDSuperimposer() sup.set(reference_set, coordinate_set) sup.run() R, V = sup.get_rotran() for atom in model.get_atoms(): atom.transform(R, V) for atom in chain_moved.get_atoms(): com_numerator += Vector( (atom.get_vector())._ar * np.array(atom.mass)) com_denominator += atom.mass moved_centre = com_numerator.__div__(com_denominator) com_denominator = 0.0 com_numerator = Vector(0, 0, 0) for atom in chain_fixed.get_atoms(): com_numerator += Vector( (atom.get_vector())._ar * np.array(atom.mass)) com_denominator += atom.mass fixed_centre = com_numerator.__div__(com_denominator) if fixed_centre.norm() > 0.5: print("Fixed chain norm is " + str(fixed_centre.norm()) + " in model " + str(model_number) + ". Should have been at the origin. Check code...") com_denominator = 0.0 com_numerator = Vector(0, 0, 0) x = moved_centre._ar[0] y = moved_centre._ar[1] z = moved_centre._ar[2] coords_x.append(x) coords_y.append(y) coords_z.append(z) d.append((moved_centre - fixed_centre).norm()) if moved_centre.norm() > 1e-6: theta.append(moved_centre.angle(Vector(0, 0, 1))) norm = np.sqrt(x * x + y * y) if norm > 1e-6: phi.append(np.arctan2(y, x)) else: theta.append(0.0) reference_set = np.asarray([ coord for coord in [atom.get_coord() for atom in r_chain_moved.get_atoms()] ]) coordinate_set = np.asarray([ coord for coord in [atom.get_coord() for atom in chain_moved.get_atoms()] ]) sup = SVDSuperimposer() sup.set(reference_set, coordinate_set) sup.run() R, V = sup.get_rotran() theta_x.append(np.arctan2(R[2][1], R[2][2])) theta_y.append( np.arctan2(-R[2][0], np.sqrt(R[2][1] * R[2][1] + R[2][2] * R[2][2]))) theta_z.append(np.arctan2(R[1][0], R[0][0])) for _ in range(3): matrix_entries[_].append(R[0][_]) matrix_entries[_ + 3].append(R[1][_]) matrix_entries[_ + 6].append(R[2][_]) f_results1 = open(output_file1, "w+") for frame in range(0, len(structure)): f_results1.write( str(frame) + '\t' + str(d[frame]) + '\t' + str(theta[frame]) + '\t' + str(phi[frame]) + '\t' + str(theta_x[frame]) + '\t' + str(theta_y[frame]) + '\t' + str(theta_z[frame]) + '\n') f_results1.close() f_results2 = open(output_file2, "w+") for frame in range(0, len(structure)): f_results2.write( str(frame) + '\t' + str(coords_x[frame]) + '\t' + str(coords_y[frame]) + '\t' + str(coords_z[frame]) + '\t') for _ in range(3): f_results2.write(str(matrix_entries[_][frame]) + '\t') f_results2.write(str(matrix_entries[_ + 3][frame]) + '\t') f_results2.write(str(matrix_entries[_ + 6][frame]) + '\t') f_results2.write('\n') f_results2.close()
def merge_cc(coords_list, res_overlap, n_cc_helices): ref_coords = coords_list[0] aligned_coords = [deepcopy(coords_list[0])] n_atoms_per_res = 5 n_atoms_mono = int(ref_coords.shape[0] / n_cc_helices) msds = [] for coords, cc_overlap in zip(coords_list[1:], res_overlap): n_atoms_overlap = cc_overlap * n_atoms_per_res for i in range(n_cc_helices): hi_ref = ref_coords[(i + 1) * n_atoms_mono - n_atoms_overlap:(i + 1) * n_atoms_mono] if i == 0: ref_atoms = hi_ref else: ref_atoms = np.append(ref_atoms, hi_ref, axis=0) for i in range(n_cc_helices): hi = coords[i * n_atoms_mono:i * n_atoms_mono + n_atoms_overlap] if i == 0: sup_atoms = hi else: sup_atoms = np.append(sup_atoms, hi, axis=0) sup = SVDSuperimposer() sup.set(ref_atoms, sup_atoms) sup.run() msds.append(sup.get_rms()**2) rot, tran = sup.get_rotran() coord_new = np.dot(coords, rot) + tran aligned_coords.append(coord_new) ref_coords = coord_new rmsd = np.sqrt(np.sum(msds)) hi_all = [] for i in range(n_cc_helices): hi_all.append(aligned_coords[0][i * n_atoms_mono:(i + 1) * n_atoms_mono]) for coords, cc_overlap in zip(aligned_coords[1:], res_overlap): hi = [] for i in range(n_cc_helices): hi.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono]) n_atoms_overlap = cc_overlap * n_atoms_per_res for ind_overlap in range(cc_overlap): weight = (ind_overlap + 1) / float(cc_overlap + 1) for ind_atom in range(n_atoms_per_res): ind_shift = ind_overlap * n_atoms_per_res + ind_atom for i in range(n_cc_helices): coordi_prev = hi_all[i][-n_atoms_overlap + ind_shift] coordi_next = hi[i][ind_shift] hi_all[i][-n_atoms_overlap + ind_shift] = ( 1 - weight) * coordi_prev + weight * coordi_next for i in range(n_cc_helices): hi_rest = hi[i][n_atoms_overlap:] hi_all[i] = np.append(hi_all[i], hi_rest, axis=0) res_dimer = hi_all[0] for i in range(1, n_cc_helices): res_dimer = np.append(res_dimer, hi_all[i], axis=0) return res_dimer, rmsd
======================================================== Module contains functions that are used by two or more functions from different modules. Functions --------- .. autofunction:: get_best_fit_rot_mat """ import os import errno import cStringIO from Bio.SVDSuperimposer import SVDSuperimposer superimpose_inst = SVDSuperimposer() def get_best_fit_rot_mat(from_coord, to_coord): """ Compute best-fit rotation matrix. The best-fit rotation matrix rotates from_coord such that the RMSD between the 2 sets of coordinates are minimized after the rotation. Parameters ---------- from_coord, to_coord : np.array Nx3 coordinate arrays, where N is the number of atoms. The from_coord will rotated such that the rotation will minimize the RMSD between the rotated from_coord and to_coord.
def super_prot(atom_coords_1, atom_coords_2): #this function uses BioPython to derive the RMSD from the superimposition of the two atom coordinate lists sup = SVDSuperimposer() sup.set(atom_coords_1,atom_coords_2) sup.run() return(sup.get_rms()) #CAREFUL!! its get_rms, not get_rmsd
def tm_movement_2D(pdbs1, pdbs2, mode, data, gn_dictionary): string_mode = ["extracellular", "intracellular", "pocket", "middle"] intracellular = (mode == 1) print("COMPARISON", string_mode[mode]) print(pdbs1) print("VS") print(pdbs2) distances_set1 = Distances() distances_set1.load_pdbs(pdbs1) distances_set1.filtered_gns = True distances_set2 = Distances() distances_set2.load_pdbs(pdbs2) distances_set2.filtered_gns = True conserved_set1 = distances_set1.fetch_conserved_gns_tm() conserved_set2 = distances_set2.fetch_conserved_gns_tm() conserved = [x for x in conserved_set2 if x in conserved_set1] gns = [[]] * 7 middle_gpcr = [[]] * 7 if mode <= 1: # Intracellular or Extracellular for i in range(0,7): tm_only = [x for x in conserved if x[0]==str(i+1)] if intracellular and i % 2 == 0: #all uneven TMs (as # = i+1) tm_only.reverse() elif not intracellular and i % 2 == 1: # all even TMs (as # i+1) tm_only.reverse() if len(tm_only) < 3: print("too few residues") return [] gns[i] = tm_only[0:3] for upwards in range(12, 6, -1): if len(tm_only) >= upwards: middle_gpcr[i] = tm_only[(upwards-3):upwards] break # INCLUDING References points from membrane middle of GPCR # ref_membrane_mid = {} # ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A # #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1 # ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1 # ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2 # ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C # ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F # # middle_gpcr = ref_membrane_mid[data['gpcr_class']] elif mode == 2: # Major pocket (class A) ligand_references = [['1x39', '1x40','1x41'], ['2x56', '2x57','2x58'], ['3x31', '3x32', '3x33'], ['4x56', '4x57', '4x58'], ['5x43', '5x44', '5x45'], ['6x51', '6x52', '6x53'], ['7x39', '7x40', '7x41']] for i in range(0,7): gns[i] = [x for x in ligand_references[i] if x in conserved] tm_only = [x for x in conserved if x[0]==str(i+1)] if i % 2 == 1: #all uneven TMs (as # = i+1) tm_only.reverse() if len(gns[i]) > 0: if i % 2 == 1: #all uneven TMs (as # = i+1) start_pos = tm_only.index(gns[i][-1]) else: start_pos = tm_only.index(gns[i][0]) gns[i] = tm_only[start_pos:(start_pos+3)] # Stay close for this as references #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)] for upwards in range(9, 6, -1): if len(tm_only) >= (start_pos+upwards): middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)] continue else: if len(tm_only) < 9: print("too few residues") return [] else: #print("Refind",i, gns[i]) gns[i] = tm_only[0:3] middle_gpcr[i] = tm_only[6:9] # for upwards in range(15, 6, -1): # if len(tm_only) >= upwards: # middle_gpcr[i] = tm_only[(upwards-3):upwards] # # FILTER not conserved GNs # middle_gpcr = [[]] * 7 # for i in range(0,7): # tm_only = [x for x in conserved if x[0]==str(i+1)] # if i % 2 == 0: #all uneven TMs (as # = i+1) # tm_only.reverse() # # if len(tm_only) < 3: # print("too few residues") # return [] # # middle_gpcr[i] = tm_only[0:3] #print(middle_gpcr) elif mode == 3: # Middle # References points from membrane middle of GPCR ref_membrane_mid = {} ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1 #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1 ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['6x48', '6x49', '6x50'], ['7x47', '7x49']] # B1 ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2 ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F membrane_mid = ref_membrane_mid[data['gpcr_class']] if data['gpcr_class'] != "001": inv_gn_dictionary = {v: k for k, v in gn_dictionary.items()} for index in range(len(membrane_mid)): membrane_mid[index] = [inv_gn_dictionary[res] for res in membrane_mid[index]] for i in range(0,7): gns[i] = [x for x in membrane_mid[i] if x in conserved] tm_only = [x for x in conserved if x[0]==str(i+1)] if i % 2 == 1: #all uneven TMs (as # = i+1) tm_only.reverse() if len(gns[i]) > 0: if i % 2 == 1: #all uneven TMs (as # = i+1) start_pos = tm_only.index(gns[i][-1]) else: start_pos = tm_only.index(gns[i][0]) gns[i] = tm_only[start_pos:(start_pos+3)] # Stay close for this as references #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)] for upwards in range(6, 3, -1): if len(tm_only) >= (start_pos+upwards): middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)] continue else: if len(tm_only) < 6: print("too few residues") return [] else: #print("Refind",i, gns[i]) gns[i] = tm_only[0:3] middle_gpcr[i] = tm_only[3:6] # for upwards in range(15, 6, -1): # if len(tm_only) >= upwards: # middle_gpcr[i] = tm_only[(upwards-3):upwards] # Merge the reference and the helper points gns_flat = [y for x in gns for y in x] middle_gpcr = [list(filter(lambda x: x in conserved and x not in gns_flat, tm_list)) for tm_list in middle_gpcr] # print(gns) # print(middle_gpcr) ends_and_middle = gns[:] ends_and_middle.extend(middle_gpcr) ends_and_middle_flat = [y for x in ends_and_middle for y in x] ends_and_middle_grouping = [x for x in range(0, len(ends_and_middle)) for y in ends_and_middle[x]] segment_order = [int(ends_and_middle[x][0][0])-1 for x in range(0, len(ends_and_middle))] distances_set1.filter_gns.extend([y for x in ends_and_middle for y in x]) distances_set2.filter_gns = distances_set1.filter_gns distances_set1.fetch_distances_tm(distance_type = "HC") distances_set2.fetch_distances_tm(distance_type = "HC") membrane_data1 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)] membrane_data2 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)] for i in range(0,len(ends_and_middle_flat)-1): for j in range(i+1, len(ends_and_middle_flat)): if right_gn_order(ends_and_middle_flat[i], ends_and_middle_flat[j]): filter_key = ends_and_middle_flat[i] + "_" + ends_and_middle_flat[j] else: filter_key = ends_and_middle_flat[j] + "_" + ends_and_middle_flat[i] if ends_and_middle_flat[i] != ends_and_middle_flat[j]: membrane_data1[i][j] = sum(distances_set1.data[filter_key])/len(pdbs1) membrane_data1[j][i] = membrane_data1[i][j] membrane_data2[i][j] = sum(distances_set2.data[filter_key])/len(pdbs2) membrane_data2[j][i] = membrane_data2[i][j] # Identify most stable TMs by ranking the variations to all other helices membrane_data1 = np.array([np.array(x) for x in membrane_data1]) membrane_data2 = np.array([np.array(x) for x in membrane_data2]) diff_distances = [x[:] for x in [[0] * len(ends_and_middle)] * len(ends_and_middle)] for i in range(0,max(ends_and_middle_grouping)): for j in range(i+1, max(ends_and_middle_grouping)+1): # Calculate movements for each TM relative to their "normal" distance # selected residues for group 1 and 2 group_1 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i] group_2 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == j] diff_distances[i][j] = np.sum(abs(membrane_data1[group_1][:, group_2] - membrane_data2[group_1][:, group_2]))/(np.sum(membrane_data1[group_1][:, group_2]+membrane_data2[group_1][:, group_2])/2)*100 diff_distances[j][i] = diff_distances[i][j] # Ranking for each TM sum_differences = [sum(x) for x in diff_distances] # normalized_differences = [((sum_differences[i]-min(sum_differences[0:7]))/(max(sum_differences[0:7])-min(sum_differences[0:7])))**2 for i in range(0,7)] for i in range(0,7): diff_distances[i] = [sorted(diff_distances[i]).index(x) for x in diff_distances[i]] final_rank = [sum([diff_distances[j][i] for j in range(0,7)]) for i in range(0,7)] # Grab stable TMs tm_ranking = [0] * 7 sorted_rank = sorted(final_rank) for i in range(0,7): tm_ranking[i] = final_rank.index(sorted_rank[i]) final_rank[tm_ranking[i]] = 100 # make sure this TM isn't repeated # Calculate 3D coordinates from distance matrix tms_centroids_set1, tms_set1 = recreate3Dorder(membrane_data1, ends_and_middle_grouping) tms_centroids_set2, tms_set2 = recreate3Dorder(membrane_data2, ends_and_middle_grouping) # Align 3D points of set2 with 3D points of set1 using the most stable reference points best_rmsd = 1000 best_set = [] # Disabled the testing RMSD for now for comb in combinations(tm_ranking[:3], 3): #for comb in combinations(tm_ranking[:4], 3): sel_refs = [x for x in range(0,len(segment_order)) if segment_order[x] in comb] #print(sel_refs) tms_reference_set1 = np.array(tms_centroids_set1[sel_refs], copy = True) tms_reference_set2 = np.array(tms_centroids_set2[sel_refs], copy = True) imposer = SVDSuperimposer() imposer.set(tms_reference_set1, tms_reference_set2) imposer.run() rot, trans = imposer.get_rotran() rmsd = imposer.get_rms() print("RMSD", round(rmsd,2), tm_ranking) if rmsd < best_rmsd: best_set = comb best_rmsd = rmsd # Check for possible mirroring error test_set2 = np.dot(tms_centroids_set2, rot) + trans error = 0 for i in tm_ranking[3:7]: if np.linalg.norm(test_set2[i] - tms_centroids_set1[i]) > 5: error += 1 #if rmsd > 2: #if error >= 3 or rmsd > 2: if True: for i in range(0,len(tms_centroids_set2)): tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # Align 3D points of set2 with 3D points of set1 using the most stable reference points tms_reference_set1 = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]] tms_reference_set2 = tms_centroids_set2[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]] imposer = SVDSuperimposer() imposer.set(tms_reference_set1, tms_reference_set2) imposer.run() new_rot, new_trans = imposer.get_rotran() new_rmsd = imposer.get_rms() print("RMSD2", round(new_rmsd,2)) if new_rmsd < rmsd: rot = new_rot trans = new_trans rmsd = new_rmsd else: for i in range(0,len(tms_centroids_set2)): tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # test_set2 = np.dot(tms_reference_set2, rot) + trans # for i in range(0,len(test_set2)): # print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_reference_set1[i]]), "]") # for i in range(0,len(test_set2)): # print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in test_set2[i]]), "]") # # print("############") # #test_set2 = np.dot(tms_centroids_set2, rot) + trans # test_set2 = np.array(tms_centroids_set2, copy = True) # for i in range(0,len(tms_centroids_set1)): # print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set1[i]]), "]") # for i in range(0,len(tms_centroids_set2)): # print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set2[i]]), "]") # if rmsd > 2: # for i in range(0,len(tms_centroids_set2)): # tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # # Huge error during alignment of "stable" helices, just use the references not the helper points # tms_reference_set1 = tms_centroids_set1[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]] # tms_reference_set2 = tms_centroids_set2[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]] # imposer = SVDSuperimposer() # imposer.set(tms_reference_set1, tms_reference_set2) # imposer.run() # rot, trans = imposer.get_rotran() # rmsd = imposer.get_rms() # print("RMSD3", round(rmsd,2)) # tms_centroids_set2 = np.dot(tms_centroids_set2, rot) + trans tms_set2 = np.dot(tms_set2, rot) + trans # Calculate optimal plane through points in both sets and convert to 2D # Try normal based on TM7 # tm7_centroids = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] == 6]] # if len(tm7_centroids) == 2: # normal = (tm7_centroids[1] - tm7_centroids[0])/np.linalg.norm(tm7_centroids[1] - tm7_centroids[0]) # else: # # Using TM mid as reference plane # normal, midpoint = calculatePlane(np.concatenate((tms_centroids_set1[7:], tms_centroids_set2[7:])), intracellular) # Alternative: use center of helical ends and center of helical middle # normal = tms_centroids_set1[:7].mean(axis=0) - tms_centroids_set1[7:].mean(axis=0) # normal = normal/np.linalg.norm(normal) # 7TM references tm_centroids = {y:[] for y in range(0,7)} [tm_centroids[y].append(tms_centroids_set1[x]) for y in range(0,7) for x in range(0,len(segment_order)) if segment_order[x] == y] count = 0 normal = np.array([0.0,0.0,0.0]) for y in range(0,7): #if len(tm_centroids[y]) == 2 and (mode != 1 or y != 5): if len(tm_centroids[y]) == 2: normal += np.array((tm_centroids[y][1] - tm_centroids[y][0])/np.linalg.norm(tm_centroids[y][1] - tm_centroids[y][0])) count += 1 normal = normal/count midpoint = tms_centroids_set1[:7].mean(axis=0) #plane_set1, z_set1 = convert3D_to_2D_plane(tms_centroids_set1[:7], intracellular, normal, midpoint) #plane_set2, z_set2 = convert3D_to_2D_plane(tms_centroids_set2[:7], intracellular, normal, midpoint) plane_set, z_set = convert3D_to_2D_plane(np.concatenate((tms_centroids_set1[:7], tms_centroids_set2[:7]), axis = 0), intracellular, normal, midpoint) plane_set1 = plane_set[:7] plane_set2 = plane_set[7:] z_set1 = z_set[:7] z_set2 = z_set[7:] # DO NOT REMOVE: possibly we want to upgrade to weighted superposing # Based on Biopython SVDSuperimposer # coords = tms_centroids_set2 # reference_coords = tms_centroids_set1 # OLD centroid calcalation # av1 = sum(coords) / len(coords) # av2 = sum(reference_coords) / len(reference_coords) # NEW weighted centroid calculation # print(normalized_differences) # av1, av2 = 0, 0 # totalweight = 0 # for i in range(0,7): # # print("Round",i) # #weight = 1+(7-tm_ranking.index(i))/7 # weight = (1-normalized_differences[i]+0.1)/1.1 # totalweight += weight # print("TM", str(i+1), "weight",weight) # av1 += coords[i]*weight # av2 += reference_coords[i]*weight # # av1 = av1/totalweight # av2 = av2/totalweight # # coords = coords - av1 # reference_coords = reference_coords - av2 # # # correlation matrix # a = np.dot(np.transpose(coords), reference_coords) # u, d, vt = np.linalg.svd(a) # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u))) # # check if we have found a reflection # if np.linalg.det(rot) < 0: # vt[2] = -vt[2] # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u))) # trans = av2 - np.dot(av1, rot) # rot, trans = imposer.get_rotran() # tms_set2 = np.dot(tms_set2, rot) + trans # CURRENT: Ca-angle to axis core rotations = [0] * 7 for i in range(0,7): try: # rotations[i] = [data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1] if abs(data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1]) < 180 else -1*data['tab4'][gn_dictionary[x]]['angles_set2'][1]-data['tab4'][gn_dictionary[x]]['angles_set1'][1] for x in gns[i]] angles1 = [data['tab4'][gn_dictionary[x]]['angles_set1'][11] for x in gns[i]] angles1 = [angle if angle > 0 else angle + 360 for angle in angles1 ] angles2 = [data['tab4'][gn_dictionary[x]]['angles_set2'][11] for x in gns[i]] angles2 = [angle if angle > 0 else angle + 360 for angle in angles2 ] rotations[i] = [angles1[x] - angles2[x] for x in range(3)] rotations[i] = [value if abs(value) <= 180 else value-360 if value > 0 else value+360 for value in rotations[i]] # count=0 # for x in gns[i]: # print(i, x, data['tab4'][gn_dictionary[x]]['angles_set1'][11], data['tab4'][gn_dictionary[x]]['angles_set2'][11], rotations[i][count]) # count += 1 except: rotations[i] = [0.0, 0.0, 0.0] # TODO: verify other class B errors # UPDATE 20-02-2020 No mirroring but top-down through GPCR rotations[i] = sum(rotations[i])/3 # if intracellular: # rotations[i] = -1*sum(rotations[i])/3 # else: # rotations[i] = sum(rotations[i])/3 # ALTERNATIVE: utilize TM tip alignment (needs debugging as some angles seem off, e.g. GLP-1 active vs inactive TM2) # Add rotation angle based on TM point placement # tms_2d_set1, junk = convert3D_to_2D_plane(tms_set1, intracellular, normal, midpoint) # tms_2d_set2, junk = convert3D_to_2D_plane(tms_set2, intracellular, normal, midpoint) # rotations = [0] * 7 # for i in range(0,7): # positions = [x for x in range(0, len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i] # turn_set1 = tms_2d_set1[positions] # turn_set2 = tms_2d_set2[positions] # # # set to middle # turn_set1 = turn_set1 - turn_set1.mean(axis=0) # turn_set2 = turn_set2 - turn_set2.mean(axis=0) # # # Calculate shift per residue and take average for this TM # for j in range(0,len(turn_set1)): # v1 = turn_set1[j]/np.linalg.norm(turn_set1[j]) # v2 = turn_set2[j]/np.linalg.norm(turn_set2[j]) # angle = np.degrees(np.arctan2(v2[1], v2[0]) - np.arctan2(v1[1],v1[0])) # # if abs(angle) > 180: # angle = 360 - abs(angle) # # rotations[i] += angle/len(turn_set1) # TODO: check z-coordinates orientation # Step 1: collect movement relative to membrane mid # Step 2: find min and max TM # Step 3: check if orientation of min/max TM matches the z-scales + intra/extra - if not invert z-coordinates labeled_set1 = [{"label": "TM"+str(i+1), "x": float(plane_set1[i][0]), "y": float(plane_set1[i][1]), "z": float(z_set1[i]), "rotation" : 0} for i in range(0,7)] labeled_set2 = [{"label": "TM"+str(i+1), "x": float(plane_set2[i][0]), "y": float(plane_set2[i][1]), "z": float(z_set2[i]), "rotation" : rotations[i]} for i in range(0,7)] # Convert used GNs to right numbering gns_used = gns[:] for i in range(0,len(gns)): for j in range(0,len(gns[i])): gns_used[i][j] = gn_dictionary[gns[i][j]] return {"coordinates_set1" : labeled_set1, "coordinates_set2": labeled_set2, "gns_used": gns_used}
from Bio.SVDSuperimposer import SVDSuperimposer # start with two coordinate sets (Nx3 arrays - Float0) x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]], 'f') y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]], 'f') sup = SVDSuperimposer() # set the coords # y will be rotated and translated on x sup.set(x, y) # do the lsq fit sup.run() # get the rmsd rms = sup.get_rms() # get rotation (right multiplying!) and the translation rot, tran = sup.get_rotran() # rotate y on x manually
def compute_mean (reader, align_conf, num_confs, start = None, stop = None): """ Computes the mean structure of a trajectory Structured to work with the multiprocessing process from UTILS/parallelize.py Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to take the mean of. align_conf (numpy.array): The position of each particle in the reference configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: mean_pos_storage (numpy.array): For each particle, the sum of positions in all configurations read. mean_a1_storage (numpy.array): For each particle, the sum of a1 orientation vectors in all configuraitons read. mean_a3_storage (numpy.array): For each particle, the sum of a3 orientation vectors in all configuraitons read. intermediate_mean_structures (list): mean structures computed periodically during the summing to check decoorrelation. confid (int): the number of configurations summed for the storage arrays. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) mysystem = reader._get_system(N_skip = start) # storage for the intermediate mean structures intermediate_mean_structures = [] # the class doing the alignment of 2 structures sup = SVDSuperimposer() mean_pos_storage = np.array([np.zeros(3) for _ in range(n_nuc)]) mean_a1_storage = np.array([np.zeros(3) for _ in range(n_nuc)]) mean_a3_storage = np.array([np.zeros(3) for _ in range(n_nuc)]) # for every conf in the current trajectory we calculate the global mean confid = 0 while mysystem != False and confid < stop: mysystem.inbox() cur_conf_pos = fetch_np(mysystem) indexed_cur_conf_pos = indexed_fetch_np(mysystem) cur_conf_a1 = fetch_a1(mysystem) cur_conf_a3 = fetch_a3(mysystem) # calculate alignment sup.set(align_conf, indexed_cur_conf_pos) sup.run() rot, tran = sup.get_rotran() cur_conf_pos = np.einsum('ij, ki -> kj', rot, cur_conf_pos) + tran cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1) cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3) mean_pos_storage += cur_conf_pos mean_a1_storage += cur_conf_a1 mean_a3_storage += cur_conf_a3 # print the rmsd of the alignment in case anyone is interested... print("Frame:", confid, "Time:", mysystem._time, "RMSF:", sup.get_rms()) # thats all we do for a frame confid += 1 mysystem = reader._get_system() # We produce 10 intermediate means to check decorrelation. # This can't be done neatly in parallel if not parallel and confid % INTERMEDIATE_EVERY == 0: mp = np.copy(mean_pos_storage) mp /= confid intermediate_mean_structures.append( prep_pos_for_json(mp) ) print("INFO: Calculated intermediate mean for {} ".format(confid)) return(mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, confid)
def __init__(self): self.reference_coordinate = None self.model_coordinate = None self.sup = SVDSuperimposer()
def compute_deviations(reader, mean_structure, indexed_mean_structure, num_confs, start=None, stop=None): """ Computes RMSF of each particle from the mean structure Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze. mean_structure (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) confid = 0 # helper to fetch nucleotide positions fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides]) indexed_fetch_np = lambda conf: np.array( [n.cm_pos for n in conf._nucleotides if n.index in indexes]) # Use the single-value decomposition method for superimposing configurations sup = SVDSuperimposer() deviations = [] mysystem = reader._get_system(N_skip=start) while mysystem != False and confid < stop: mysystem.inbox() # calculate alignment transform cur_conf = fetch_np(mysystem) indexed_cur_conf = indexed_fetch_np(mysystem) sup.set(indexed_mean_structure, indexed_cur_conf) sup.run() print("Frame number:", confid, "RMSF:", sup.get_rms()) # realign frame rot, tran = sup.get_rotran() # align structures and collect coordinates for each frame # compatible with json deviations.append( list( np.linalg.norm(np.einsum('ij, ki -> kj', rot, cur_conf) + tran - mean_structure, axis=1))) confid += 1 mysystem = reader._get_system() return deviations