def align_models(CA): n_models = CA.shape[0] working_CA = np.copy(CA) sup=SVDSuperimposer() ref_model = working_CA[0, :, :] rms_total = 0 for i_model in range(1, n_models): sup.set(ref_model, working_CA[i_model]) sup.run() rms_total += sup.get_rms()**2 working_CA[i_model] = sup.get_transformed() rms_best = float("inf") epsilon = 0.001 while rms_best - rms_total > epsilon: rms_best = rms_total mean_model = np.mean(working_CA,0) rms_total = 0 for i_model in range(n_models): sup.set(mean_model, working_CA[i_model]) sup.run() rms_total += sup.get_rms()**2 working_CA[i_model] = sup.get_transformed() transformations = [] for start_model, result_model in zip(CA, working_CA): sup.set(result_model, start_model) sup.run() transformations.append(sup.get_rotran()) return transformations,np.sqrt(rms_total/n_models)
def compute_deviations(reader, mean_structure, indexed_mean_structure, indexes, num_confs, start=None, stop=None): """ Computes RMSF of each particle from the mean structure Parameters: reader (readers.ErikReader): An active reader on the trajectory file to analyze. mean_structure (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) confid = 0 # Use the single-value decomposition method for superimposing configurations sup = SVDSuperimposer() deviations = [] RMSDs = [] mysystem = reader.read(n_skip=start) while mysystem != False and confid < stop: mysystem.inbox() # calculate alignment transform cur_conf = mysystem.positions indexed_cur_conf = cur_conf[indexes] sup.set(indexed_mean_structure, indexed_cur_conf) sup.run() print("Frame number:", confid, "Time:", mysystem.time, "RMSD:", sup.get_rms()) # realign frame rot, tran = sup.get_rotran() # align structures and collect coordinates for each frame # compatible with json deviations.append( list( np.linalg.norm(np.einsum('ij, ki -> kj', rot, cur_conf) + tran - mean_structure, axis=1))) RMSDs.append(sup.get_rms() * 0.8518) confid += 1 mysystem = reader.read() return (deviations, RMSDs)
def doublets_dist(d1, d2): sup = SVDSuperimposer() sup.set(d1['vec'], d2['vec']) sup.run() rms1 = sup.get_rms() sup.set(d1['vec'], d2['vec2']) sup.run() rms2 = sup.get_rms() return min(rms1, rms2)
def rmsd_distance(points, ref_points, sup_atoms, rmsd_atoms=None, multiple_rmsd_variants=False): (c1, c2) = points (p1, p2) = ref_points for atoms_list, c_res, p_res in (sup_atoms[0], c1, p1), (sup_atoms[1], c2, p2): for a in atoms_list: if a not in c_res or a not in p_res: return 1000.0 ref_p = [p1[a] for a in sup_atoms[0]] + [p2[a] for a in sup_atoms[1]] cur_p = [c1[a] for a in sup_atoms[0]] + [c2[a] for a in sup_atoms[1]] sup = SVDSuperimposer() sup.set(np.array(ref_p, 'f'), np.array(cur_p, 'f')) sup.run() if rmsd_atoms is not None: (rot, tran) = sup.get_rotran() if multiple_rmsd_variants: return min([ _rmsd_formula(points, ref_points, rot, tran, r) for r in rmsd_atoms ]) else: return _rmsd_formula(points, ref_points, rot, tran, rmsd_atoms) else: return sup.get_rms()
def computeRMSD(): if len(ca_atoms) != len(ca_atoms_pdb): print "Error. Length mismatch!" exit() l = len(ca_atoms) fixed_coord = [] moving_coord = [] for i in range(l): if include_res_map[i] == 1: fixed_coord.append( [ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]]) moving_coord.append( [ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]]) if len(fixed_coord) == 0: return 0 fixed_coord = numpy.array(fixed_coord) moving_coord = numpy.array(moving_coord) sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() rms = sup.get_rms() return rms
def compute_transformation(c_ref,c): sup = SVDSuperimposer() sup.set(c_ref, c) sup.run() rms = sup.get_rms() (rot,tran) = sup.get_rotran() return (rms,rot,tran)
def compute_centroid(reader, mean_structure, num_confs, start=None, stop=None): """ Compares each structure to the mean and returns the one with the lowest RMSF Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze. mean_structure (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: centroid (numpy.array): The positions corresponding to the structure with the lowest RMSF to the mean. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) confid = 0 # Use the single-value decomposition method for superimposing configurations sup = SVDSuperimposer() lowest_rmsf = 100000 #if you have a larger number than this, we need to talk... centroid_candidate = np.zeros_like(mean_structure) centroid_a1 = np.zeros_like(mean_structure) centroid_a3 = np.zeros_like(mean_structure) mysystem = reader.read(n_skip=start) while mysystem != False and confid < stop: mysystem.inbox() # calculate alignment transform cur_conf = mysystem.positions indexed_cur_conf = mysystem.positions[indexes] cur_conf_a1 = mysystem.a1s cur_conf_a3 = mysystem.a3s sup.set(mean_structure, indexed_cur_conf) sup.run() rot, tran = sup.get_rotran() cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1) cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3) RMSF = sup.get_rms() print("Frame number:", confid, "RMSF:", RMSF) if RMSF < lowest_rmsf: centroid_candidate = cur_conf centroid_a1 = cur_conf_a1 centroid_a3 = cur_conf_a3 lowest_rmsf = RMSF centroid_t = mysystem.time confid += 1 mysystem = reader.read() return centroid_candidate, centroid_a1, centroid_a3, lowest_rmsf, centroid_t
def create_DM(pdb_atoms_list, alignement_type): """ This function is used to clusterize all structures. """ svd = SVDSuperimposer() size = len(pdb_atoms_list) FullDM = DistanceMatrix(size) pbar = pg.ProgressBar(widgets=WIDGETS, maxval=(size * (size - 1) / 2)).start() counter = 0 for i, frame1 in enumerate(pdb_atoms_list): for j, frame2 in enumerate(pdb_atoms_list[i + 1:]): svd.set(frame1, frame2) svd.run() rms = svd.get_rms() #RMS with local alignement rms_raw = svd.get_init_rms( ) #RMS with the GLOBAL alignement made before if alignement_type == "l": FullDM.set(i, i + j + 1, rms) else: FullDM.set(i, i + j + 1, rms_raw) pbar.update(counter) counter += 1 pbar.finish() return FullDM
def computeRMSD(): if len(ca_atoms) != len(ca_atoms_pdb): print "Error. Length mismatch!" exit() l = len(ca_atoms) res = {} for ch in chain_id_list: fixed_coord = [] moving_coord = [] for i in range(l): if chain_id[i] == ch: fixed_coord.append([ ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2] ]) moving_coord.append( [ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]]) if len(fixed_coord) > 0: fixed_coord = numpy.array(fixed_coord) moving_coord = numpy.array(moving_coord) sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() rms = sup.get_rms() res[ch] = rms return res
def get_rot_tran(y, x): """Returns rotation, translation and RMDS values of the superimposed atoms.""" sup = SVDSuperimposer() sup.set(x, y) # AC over AD sup.run() rms = sup.get_rms() rot, tran = sup.get_rotran() return (rot, tran, rms)
def compute_deviations(reader, mean_structure, num_confs, start=None, stop=None): """ Computes RMSF of each particle from the mean structure Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze. mean_structure (numpy.array): The position of each particle in the mean configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) confid = 0 # helper to fetch nucleotide positions fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides]) # Use the single-value decomposition method for superimposing configurations sup = SVDSuperimposer() deviations = [] mysystem = reader._get_system(N_skip=start) while mysystem != False and confid < stop: mysystem.inbox_system() # calculate alignment transform cur_conf = fetch_np(mysystem) sup.set(mean_structure, cur_conf) sup.run() print("Frame number:", confid, "RMSF:", sup.get_rms()) # realign frame rot, tran = sup.get_rotran() # align structures and collect coordinates for each frame # compatible with json deviations.append( list( map( np.linalg.norm, np.array([np.dot(n_pos, rot) + tran for n_pos in cur_conf]) - mean_structure))) confid += 1 mysystem = reader._get_system() return deviations
def super_pdb(coords1, coords2): if len(coords1) != len(coords2): print >> sys.stderr, 'ERROR: Structures with different length' sys.exit(1) svd = SVDSuperimposer() svd.set(np.array(coords1), np.array(coords2)) svd.run() rot, tran = svd.get_rotran() rmsd = svd.get_rms() return rmsd
def calculate_rmsd(atoms_x_coord, atoms_y_coord) -> float: super_imposer = SVDSuperimposer( ) # Calling the class that superposes atoms arrays super_imposer.set( atoms_x_coord, atoms_y_coord) # Vector y will be rotated and translated on vector x super_imposer.run() value = super_imposer.get_rms() # Get the value of RMSD return value
def get_rmsd(coord1, coord2): if len(coord1) != len(coord2): print >> sys.stderr, "ERROR: The sets of coordinates have different sizes" sys.exit(1) #system error >/dev/null or 2>/dev/null svd = SVDSuperimposer() svd.set(np.array(coord1), np.array(coord2)) #transform a list into numeric python svd.run() rmsd = svd.get_rms() rot, tran = svd.get_rotran() print 'R', rot print 'T', tran print 'RMSD', rmsd
def get_rmsd(coord1,coord2): if len(coord1)!=len(coord2): print >> sys.stderr.write("ERROR: The set of Coordinate have different size.") sys.exit(1) svd=SVDSuperimposer() svd.set(np.array(coord1), np.array(coord2)) svd.run() rmsd=svd.get_rms() #rot,tran=svd.get_rotran() T=svd.get_rotran() print("R", T[0]) print("T", T[1]) return(rmsd)
def __sub__(self, other): """ Return rmsd between two fragments. Example: >>> rmsd=fragment1-fragment2 @return: rmsd between fragments @rtype: float """ sup=SVDSuperimposer() sup.set(self.coords_ca, other.coords_ca) sup.run() return sup.get_rms()
def __sub__(self, other): """ Return rmsd between two fragments. Example: >>> rmsd=fragment1-fragment2 @return: rmsd between fragments @rtype: float """ sup = SVDSuperimposer() sup.set(self.coords_ca, other.coords_ca) sup.run() return sup.get_rms()
def Superimpose(atoms1, atoms2): assert len(atoms1) == len(atoms2) #aligner = QCPSuperimposer() aligner = SVDSuperimposer() aligner.set(atoms1, atoms2) aligner.run() RMSD = aligner.get_rms() ## calculate the distance deviation at each position atoms2_transformed = aligner.get_transformed() diff = atoms1 - atoms2_transformed diff2 = np.power(diff, 2) deviations = np.sqrt(np.sum(diff2, axis=1)) return RMSD, deviations
def run_sup3d(coord1, coord2): sup = SVDSuperimposer() sup.set( np.array(coord1), np.array(coord2) ) #set is setting the group of coordinates because i have initialized SVD, it is empty sup.run( ) #superimpose the coordinates, run does all the work. Then we compute the RMSD between vc1 and vc2 after transformation rmsd = sup.get_rms() rot, tran = sup.get_rotran( ) #shows the matrix of rotation and vector for translation tcoord = sup.get_transformed() print rmsd print rot print tran print tcoord #you obtain the set of coordinates to be superimposable to the se 1, so the set of coordinates after transformation. return
def _superimpose_atoms(ref_points, points, atoms): if ref_points is None or points is None or atoms is None: return (None, None, None, None) ref_vec = [] vec = [] for a in atoms: if a in ref_points and a in points: ref_vec.append(ref_points[a]) vec.append(points[a]) if len(vec) < 3: return (None, None, None, None) sup = SVDSuperimposer() sup.set(np.array(ref_vec, 'f'), np.array(vec, 'f')) sup.run() (rot, tran) = sup.get_rotran() rms = sup.get_rms() return (_apply_rot_tran(points, rot, tran), rot, tran, rms)
def __sub__(self, other): """Return rmsd between two fragments. :return: rmsd between fragments :rtype: float Examples -------- This is an incomplete but illustrative example:: rmsd = fragment1 - fragment2 """ sup = SVDSuperimposer() sup.set(self.coords_ca, other.coords_ca) sup.run() return sup.get_rms()
def computeRMSD(): if len(ca_atoms)!=len(ca_atoms_pdb): print "Error. Length mismatch!", len(ca_atoms), len(ca_atoms_pdb) exit() l = len(ca_atoms) fixed_coord = numpy.zeros((l, 3)) moving_coord = numpy.zeros((l, 3)) for i in range(0, l): fixed_coord[i] = numpy.array ([ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]]) moving_coord[i] = numpy.array ([ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]]) sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() rms = sup.get_rms() return rms
def distance_matrix(CA): n_models = CA.shape[0] distances = np.zeros((n_models, n_models)) sup=SVDSuperimposer() for i in range(n_models): model1 = CA[i,:,:] for j in range(i+1,n_models): model2 = CA[j,:,:] sup.set(model1, model2) sup.run() rms=sup.get_rms() distances[i,j] = rms distances[j,i] = rms return distances
def computeRMSD(): if len(ca_atoms) != len(ca_atoms_pdb): print "Error. Length mismatch!" exit() l = len(ca_atoms) fixed_coord = numpy.zeros((l, 3)) moving_coord = numpy.zeros((l, 3)) for i in range(0, l): fixed_coord[i] = numpy.array( [ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]]) moving_coord[i] = numpy.array( [ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]]) sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() rms = sup.get_rms() return rms
def sel_straight(coords_arr, n_cc_helices): n_atoms_mono = int(coords_arr[0].shape[0] / n_cc_helices) chain_rmss = [] for coords in coords_arr: hi_all = [] for i in range(n_cc_helices): hi_all.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono]) rmss = [] for i in range(n_cc_helices - 1): sup = SVDSuperimposer() sup.set(hi_all[i], hi_all[i + 1]) sup.run() rms = sup.get_rms() rmss.append(rms) chain_rmss.append(np.mean(rmss)) return np.argmin(chain_rmss), np.min(chain_rmss)
def __init__(self, static, moving): """ Align two structures :param static: the reference structure :param moving: the structure to the aligned to the reference """ sup = SVDSuperimposer() sup.set(np.asarray(static), np.asarray(moving)) sup.run() rot, trans = sup.get_rotran() self.rms = sup.get_rms() self.static = static self.moving = [ np.dot(np.asarray(moving[atom]), rot) + trans for atom in range(len(moving)) ]
def set_atoms(self, fixed, moving): """Put (translate/rotate) the atoms in fixed on the atoms in moving, in such a way that the RMSD is minimized. :param fixed: list of (fixed) atoms :param moving: list of (moving) atoms :type fixed,moving: [L{Atom}, L{Atom},...] """ if not len(fixed) == len(moving): raise PDBException("Fixed and moving atom lists differ in size") length = len(fixed) fixed_coord = numpy.zeros((length, 3)) moving_coord = numpy.zeros((length, 3)) for i in range(0, length): fixed_coord[i] = fixed[i].get_coord() moving_coord[i] = moving[i].get_coord() sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() self.rms = sup.get_rms() self.rotran = sup.get_rotran()
def compute_frag_RMSD(res_len): if len(ca_atoms)!=len(ca_atoms_pdb): print "Error. Length mismatch! target:frag", len(ca_atoms_pdb), len(ca_atoms) return 0 l = len(ca_atoms) N = res_len if l != N : print "atom list length mismatches the fragment length!", str(l), str(N) return 0 fixed_coord = numpy.zeros((l, 3)) moving_coord = numpy.zeros((l, 3)) for i in range(0, l): fixed_coord[i] = numpy.array ([ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]]) moving_coord[i] = numpy.array ([ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]]) sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() rms = sup.get_rms() return rms
def set_atoms(self, fixed, moving): """Put (translate/rotate) the atoms in fixed on the atoms in moving, in such a way that the RMSD is minimized. @param fixed: list of (fixed) atoms @param moving: list of (moving) atoms @type fixed,moving: [L{Atom}, L{Atom},...] """ if not len(fixed) == len(moving): raise PDBException("Fixed and moving atom lists differ in size") l = len(fixed) fixed_coord = numpy.zeros((l, 3)) moving_coord = numpy.zeros((l, 3)) for i in range(0, len(fixed)): fixed_coord[i] = fixed[i].get_coord() moving_coord[i] = moving[i].get_coord() sup = SVDSuperimposer() sup.set(fixed_coord, moving_coord) sup.run() self.rms = sup.get_rms() self.rotran = sup.get_rotran()
def super_prot(atom_coords_1, atom_coords_2): #this function uses BioPython to derive the RMSD from the superimposition of the two atom coordinate lists sup = SVDSuperimposer() sup.set(atom_coords_1,atom_coords_2) sup.run() return(sup.get_rms()) #CAREFUL!! its get_rms, not get_rmsd
y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]], 'f') sup = SVDSuperimposer() # set the coords # y will be rotated and translated on x sup.set(x, y) # do the lsq fit sup.run() # get the rmsd rms = sup.get_rms() # get rotation (right multiplying!) and the translation rot, tran = sup.get_rotran() # rotate y on x manually y_on_x1 = dot(y, rot) + tran # same thing y_on_x2 = sup.get_transformed() def simple_matrix_print(matrix): """Simple string to display a floating point matrix This should give the same output on multiple systems. This is
class SVDSuperimposerTest(unittest.TestCase): def setUp(self): self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]]) self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]]) self.sup = SVDSuperimposer() self.sup.set(self.x, self.y) def test_get_init_rms(self): x = array([[1.19, 1.28, 1.37], [1.46, 1.55, 1.64], [1.73, 1.82, 1.91]]) y = array([[1.91, 1.82, 1.73], [1.64, 1.55, 1.46], [1.37, 1.28, 1.19]]) self.sup.set(x, y) self.assertIsNone(self.sup.init_rms) init_rms = 0.8049844719 self.assertTrue(float("%.3f" % self.sup.get_init_rms()), float("%.3f" % init_rms)) def test_oldTest(self): self.assertTrue( array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3))) self.assertTrue( array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3))) self.assertIsNone(self.sup.rot) self.assertIsNone(self.sup.tran) self.assertIsNone(self.sup.rms) self.assertIsNone(self.sup.init_rms) self.sup.run() self.assertTrue( array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3))) self.assertTrue( array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3))) rot = array([[0.68304983, 0.53664371, 0.49543563], [-0.52277295, 0.83293229, -0.18147242], [-0.51005037, -0.13504564, 0.84947707]]) tran = array([38.78608157, -20.65451334, -15.42227366]) self.assertTrue( array_equal(around(self.sup.rot, decimals=3), around(rot, decimals=3))) self.assertTrue( array_equal(around(self.sup.tran, decimals=3), around(tran, decimals=3))) self.assertIsNone(self.sup.rms) self.assertIsNone(self.sup.init_rms) rms = 0.00304266526014 self.assertEqual(float("%.3f" % self.sup.get_rms()), float("%.3f" % rms)) rot_get, tran_get = self.sup.get_rotran() self.assertTrue( array_equal(around(rot_get, decimals=3), around(rot, decimals=3))) self.assertTrue( array_equal(around(tran_get, decimals=3), around(tran, decimals=3))) y_on_x1 = dot(self.y, rot) + tran y_x_solution = array( [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01], [5.03977138e+01, -1.22877050e+00, 5.06488200e+01], [5.06801788e+01, -4.16095666e-02, 5.15368866e+01], [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]]) self.assertTrue( array_equal(around(y_on_x1, decimals=3), around(y_x_solution, decimals=3))) y_on_x2 = self.sup.get_transformed() self.assertTrue( array_equal(around(y_on_x2, decimals=3), around(y_x_solution, decimals=3)))
def merge_cc(coords_list, res_overlap, n_cc_helices): ref_coords = coords_list[0] aligned_coords = [deepcopy(coords_list[0])] n_atoms_per_res = 5 n_atoms_mono = int(ref_coords.shape[0] / n_cc_helices) msds = [] for coords, cc_overlap in zip(coords_list[1:], res_overlap): n_atoms_overlap = cc_overlap * n_atoms_per_res for i in range(n_cc_helices): hi_ref = ref_coords[(i + 1) * n_atoms_mono - n_atoms_overlap:(i + 1) * n_atoms_mono] if i == 0: ref_atoms = hi_ref else: ref_atoms = np.append(ref_atoms, hi_ref, axis=0) for i in range(n_cc_helices): hi = coords[i * n_atoms_mono:i * n_atoms_mono + n_atoms_overlap] if i == 0: sup_atoms = hi else: sup_atoms = np.append(sup_atoms, hi, axis=0) sup = SVDSuperimposer() sup.set(ref_atoms, sup_atoms) sup.run() msds.append(sup.get_rms()**2) rot, tran = sup.get_rotran() coord_new = np.dot(coords, rot) + tran aligned_coords.append(coord_new) ref_coords = coord_new rmsd = np.sqrt(np.sum(msds)) hi_all = [] for i in range(n_cc_helices): hi_all.append(aligned_coords[0][i * n_atoms_mono:(i + 1) * n_atoms_mono]) for coords, cc_overlap in zip(aligned_coords[1:], res_overlap): hi = [] for i in range(n_cc_helices): hi.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono]) n_atoms_overlap = cc_overlap * n_atoms_per_res for ind_overlap in range(cc_overlap): weight = (ind_overlap + 1) / float(cc_overlap + 1) for ind_atom in range(n_atoms_per_res): ind_shift = ind_overlap * n_atoms_per_res + ind_atom for i in range(n_cc_helices): coordi_prev = hi_all[i][-n_atoms_overlap + ind_shift] coordi_next = hi[i][ind_shift] hi_all[i][-n_atoms_overlap + ind_shift] = ( 1 - weight) * coordi_prev + weight * coordi_next for i in range(n_cc_helices): hi_rest = hi[i][n_atoms_overlap:] hi_all[i] = np.append(hi_all[i], hi_rest, axis=0) res_dimer = hi_all[0] for i in range(1, n_cc_helices): res_dimer = np.append(res_dimer, hi_all[i], axis=0) return res_dimer, rmsd
class ResidueMutator(object): def __init__(self, tripeptides=None, components=None, standard_residues=None): """ The mutator object takes a non-standard residue or incomplete residue and modifies it """ try: from Bio.PDB import PDBParser from Bio.SVDSuperimposer import SVDSuperimposer except ModuleNotFoundError: raise ModuleNotFoundError( "BioPython is required for this functionality") # get defaults if not provided if standard_residues is None: standard_residues = data.standard_residues if tripeptides is None: tripeptides = data.tripeptides if components is None: components = data.chem_components self.components = components self.candidates = {} self.standard_residues = standard_residues self.imposer = SVDSuperimposer() self.parser = PDBParser(PERMISSIVE=1, QUIET=True) # build up candidate structures for fn in tripeptides: structure = self.parser.get_structure("", fn) resn = structure[0][" "][2].get_resname() self.candidates[resn] = [] for model in structure: self.candidates[resn].append(model[" "][2]) def mutate(self, residue, replace_backbone=True): resn = residue.get_resname() if self.standard(resn): # the residue is already a standard residue, here for repair parn = resn else: parn = self.components[resn]['_chem_comp.mon_nstd_parent_comp_id'] if not self.standard(parn): # the parent residue is a nonstandard residue, can't mutate return False if parn not in self.candidates: # parent not in candidate structures return False sc_fixed = set( self.components[resn] ['side_chain_atoms']) # side chain atoms of fixed residue sc_movin = set( self.components[parn] ['side_chain_atoms']) # side chain atoms of standard parent atom_names = sc_fixed.intersection(sc_movin) # get list of side chain atoms present in residue atom_list = [] for atom in atom_names: if atom in residue: atom_list.append(atom) if len(atom_list) == 0: return False # get side chain atom coordinates fixed_coord = np.zeros((len(atom_list), 3)) for i in range(len(atom_list)): fixed_coord[i] = residue[atom_list[i]].get_coord() # loop over candidates, finding best RMSD moved_coord = np.zeros((len(atom_list), 3)) min_rms = 99999 rotm = None tran = None min_candidate = None for candidate in self.candidates[parn]: for j in range(len(atom_list)): moved_coord[j] = candidate[atom_list[j]].get_coord() # perfom SVD fitting self.imposer.set(fixed_coord, moved_coord) self.imposer.run() if self.imposer.get_rms() < min_rms: min_rms = self.imposer.get_rms() rotm, tran = self.imposer.get_rotran() min_candidate = candidate # copy the candidate to a new object candidate = min_candidate.copy() candidate.transform(rotm, tran) stripHydrogens(candidate) if replace_backbone: # replace backbone atoms of candidate backbone_atoms = self.components[resn]['main_chain_atoms'] for atom in backbone_atoms: if atom not in residue: continue if atom not in candidate: candidate.add(residue[atom].copy()) candidate[atom].set_coord(residue[atom].get_coord()) return candidate def standard(self, resname): return resname in self.standard_residues def modified(self, resname): if resname in self.standard_residues: # it's standard, not modified return False if resname in self.components and '_chem_comp.mon_nstd_parent_comp_id' in self.components[ resname]: return ( (resname not in self.standard_residues) and (self.components[resname]['_chem_comp.mon_nstd_parent_comp_id'] in self.standard_residues)) else: # has no standard parent field - can't be modified return False
class SVDSuperimposerTest(unittest.TestCase): def setUp(self): self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65], [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]]) self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58], [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]]) self.sup = SVDSuperimposer() self.sup.set(self.x, self.y) def test_get_init_rms(self): x = array([[1.19, 1.28, 1.37], [1.46, 1.55, 1.64], [1.73, 1.82, 1.91]]) y = array([[1.91, 1.82, 1.73], [1.64, 1.55, 1.46], [1.37, 1.28, 1.19]]) self.sup.set(x, y) self.assertIsNone(self.sup.init_rms) init_rms = 0.8049844719 self.assertTrue( float('%.3f' % self.sup.get_init_rms()), float('%.3f' % init_rms)) def test_oldTest(self): self.assertTrue( array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3))) self.assertTrue( array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3))) self.assertIsNone(self.sup.rot) self.assertIsNone(self.sup.tran) self.assertIsNone(self.sup.rms) self.assertIsNone(self.sup.init_rms) self.sup.run() self.assertTrue( array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3))) self.assertTrue( array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3))) rot = array([[0.68304983, 0.53664371, 0.49543563], [-0.52277295, 0.83293229, -0.18147242], [-0.51005037, -0.13504564, 0.84947707]]) tran = array([38.78608157, -20.65451334, -15.42227366]) self.assertTrue( array_equal(around(self.sup.rot, decimals=3), around(rot, decimals=3))) self.assertTrue( array_equal(around(self.sup.tran, decimals=3), around(tran, decimals=3))) self.assertIsNone(self.sup.rms) self.assertIsNone(self.sup.init_rms) rms = 0.00304266526014 self.assertEqual( float('%.3f' % self.sup.get_rms()), float('%.3f' % rms)) rot_get, tran_get = self.sup.get_rotran() self.assertTrue( array_equal(around(rot_get, decimals=3), around(rot, decimals=3))) self.assertTrue( array_equal(around(tran_get, decimals=3), around(tran, decimals=3))) y_on_x1 = dot(self.y, rot) + tran y_x_solution = array( [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01], [5.03977138e+01, -1.22877050e+00, 5.06488200e+01], [5.06801788e+01, -4.16095666e-02, 5.15368866e+01], [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]]) self.assertTrue( array_equal(around(y_on_x1, decimals=3), around(y_x_solution, decimals=3))) y_on_x2 = self.sup.get_transformed() self.assertTrue( array_equal(around(y_on_x2, decimals=3), around(y_x_solution, decimals=3)))
def run_system(dir): pdb = os.path.basename(dir).split('_')[0] org_dir = os.getcwd() os.chdir(dir) f_coord = "coord.h5" f_RMSD = "RMSD.txt" f_OC = os.path.join("..","..","cmap_coordinates",pdb+'.txt') if not os.path.exists(f_OC): print "Missing coordinates for cmap", dir os.chdir(org_dir) return dir if not os.path.exists(f_coord): print "Missing coordinates, extract_coordinates.py first", dir os.chdir(org_dir) return dir if os.path.exists(f_RMSD) and not _FORCE: print "RMSD file exists, skipping", dir os.chdir(org_dir) return dir h5 = h5py.File(f_coord,'r') C = h5["coord"][:] h5.close() OC = np.loadtxt(f_OC) # Move the coordinates to something sensible #C -= C.mean(axis=0) #OC -= OC.mean(axis=0) median_OC = np.median([np.linalg.norm(a-b) for a,b in zip(OC,OC[1:])]) median_C = np.median([np.linalg.norm(a-b) for a,b in zip(C[-1],C[-1][1:])]) assert(C[0].shape == OC.shape) RMSD = [] org_RMSD = [] sup = SVDSuperimposer() RG = [] OC -= OC.mean(axis=0) OC_RG = ((np.linalg.norm(OC,axis=1)**2).sum()/len(OC)) ** 0.5 for cx in C: cx -= cx.mean(axis=0) rg_cx = ((np.linalg.norm(cx,axis=1)**2).sum()/len(cx)) ** 0.5 RG.append(rg_cx) sup.set(OC,cx) sup.run() RMSD.append(sup.get_rms()) org_RMSD.append(sup.get_init_rms()) rot, tran = sup.get_rotran() cx = np.dot(cx, rot) + tran RMSD = np.array(RMSD) org_RMSD = np.array(org_RMSD) RG = np.array(RG) #print dir, RMSD[-20:].mean(), org_RMSD[-20:].mean(),RG[-20:].mean() print "{} {: 0.4f} {: 0.4f}".format(dir, RMSD[-200:].mean(), RG[-200:].mean() / OC_RG) ''' from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(OC[:,0],OC[:,1],OC[:,2],'b') #ax.plot(OC[:,0],OC[:,1],OC[:,2],'k',alpha=0.5) ax.scatter(cx[:,0],cx[:,1],cx[:,2],color='r') #ax.plot(cx[:,0],cx[:,1],cx[:,2],'k',alpha=0.5) plt.show() exit() print OC #exit() ''' np.savetxt(f_RMSD,RMSD) os.chdir(org_dir) return dir
def tm_movement_2D(pdbs1, pdbs2, mode, data, gn_dictionary): string_mode = ["extracellular", "intracellular", "pocket", "middle"] intracellular = (mode == 1) print("COMPARISON", string_mode[mode]) print(pdbs1) print("VS") print(pdbs2) distances_set1 = Distances() distances_set1.load_pdbs(pdbs1) distances_set1.filtered_gns = True distances_set2 = Distances() distances_set2.load_pdbs(pdbs2) distances_set2.filtered_gns = True conserved_set1 = distances_set1.fetch_conserved_gns_tm() conserved_set2 = distances_set2.fetch_conserved_gns_tm() conserved = [x for x in conserved_set2 if x in conserved_set1] gns = [[]] * 7 middle_gpcr = [[]] * 7 if mode <= 1: # Intracellular or Extracellular for i in range(0,7): tm_only = [x for x in conserved if x[0]==str(i+1)] if intracellular and i % 2 == 0: #all uneven TMs (as # = i+1) tm_only.reverse() elif not intracellular and i % 2 == 1: # all even TMs (as # i+1) tm_only.reverse() if len(tm_only) < 3: print("too few residues") return [] gns[i] = tm_only[0:3] for upwards in range(12, 6, -1): if len(tm_only) >= upwards: middle_gpcr[i] = tm_only[(upwards-3):upwards] break # INCLUDING References points from membrane middle of GPCR # ref_membrane_mid = {} # ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A # #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1 # ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1 # ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2 # ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C # ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F # # middle_gpcr = ref_membrane_mid[data['gpcr_class']] elif mode == 2: # Major pocket (class A) ligand_references = [['1x39', '1x40','1x41'], ['2x56', '2x57','2x58'], ['3x31', '3x32', '3x33'], ['4x56', '4x57', '4x58'], ['5x43', '5x44', '5x45'], ['6x51', '6x52', '6x53'], ['7x39', '7x40', '7x41']] for i in range(0,7): gns[i] = [x for x in ligand_references[i] if x in conserved] tm_only = [x for x in conserved if x[0]==str(i+1)] if i % 2 == 1: #all uneven TMs (as # = i+1) tm_only.reverse() if len(gns[i]) > 0: if i % 2 == 1: #all uneven TMs (as # = i+1) start_pos = tm_only.index(gns[i][-1]) else: start_pos = tm_only.index(gns[i][0]) gns[i] = tm_only[start_pos:(start_pos+3)] # Stay close for this as references #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)] for upwards in range(9, 6, -1): if len(tm_only) >= (start_pos+upwards): middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)] continue else: if len(tm_only) < 9: print("too few residues") return [] else: #print("Refind",i, gns[i]) gns[i] = tm_only[0:3] middle_gpcr[i] = tm_only[6:9] # for upwards in range(15, 6, -1): # if len(tm_only) >= upwards: # middle_gpcr[i] = tm_only[(upwards-3):upwards] # # FILTER not conserved GNs # middle_gpcr = [[]] * 7 # for i in range(0,7): # tm_only = [x for x in conserved if x[0]==str(i+1)] # if i % 2 == 0: #all uneven TMs (as # = i+1) # tm_only.reverse() # # if len(tm_only) < 3: # print("too few residues") # return [] # # middle_gpcr[i] = tm_only[0:3] #print(middle_gpcr) elif mode == 3: # Middle # References points from membrane middle of GPCR ref_membrane_mid = {} ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1 #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1 ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['6x48', '6x49', '6x50'], ['7x47', '7x49']] # B1 ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2 ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F membrane_mid = ref_membrane_mid[data['gpcr_class']] if data['gpcr_class'] != "001": inv_gn_dictionary = {v: k for k, v in gn_dictionary.items()} for index in range(len(membrane_mid)): membrane_mid[index] = [inv_gn_dictionary[res] for res in membrane_mid[index]] for i in range(0,7): gns[i] = [x for x in membrane_mid[i] if x in conserved] tm_only = [x for x in conserved if x[0]==str(i+1)] if i % 2 == 1: #all uneven TMs (as # = i+1) tm_only.reverse() if len(gns[i]) > 0: if i % 2 == 1: #all uneven TMs (as # = i+1) start_pos = tm_only.index(gns[i][-1]) else: start_pos = tm_only.index(gns[i][0]) gns[i] = tm_only[start_pos:(start_pos+3)] # Stay close for this as references #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)] for upwards in range(6, 3, -1): if len(tm_only) >= (start_pos+upwards): middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)] continue else: if len(tm_only) < 6: print("too few residues") return [] else: #print("Refind",i, gns[i]) gns[i] = tm_only[0:3] middle_gpcr[i] = tm_only[3:6] # for upwards in range(15, 6, -1): # if len(tm_only) >= upwards: # middle_gpcr[i] = tm_only[(upwards-3):upwards] # Merge the reference and the helper points gns_flat = [y for x in gns for y in x] middle_gpcr = [list(filter(lambda x: x in conserved and x not in gns_flat, tm_list)) for tm_list in middle_gpcr] # print(gns) # print(middle_gpcr) ends_and_middle = gns[:] ends_and_middle.extend(middle_gpcr) ends_and_middle_flat = [y for x in ends_and_middle for y in x] ends_and_middle_grouping = [x for x in range(0, len(ends_and_middle)) for y in ends_and_middle[x]] segment_order = [int(ends_and_middle[x][0][0])-1 for x in range(0, len(ends_and_middle))] distances_set1.filter_gns.extend([y for x in ends_and_middle for y in x]) distances_set2.filter_gns = distances_set1.filter_gns distances_set1.fetch_distances_tm(distance_type = "HC") distances_set2.fetch_distances_tm(distance_type = "HC") membrane_data1 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)] membrane_data2 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)] for i in range(0,len(ends_and_middle_flat)-1): for j in range(i+1, len(ends_and_middle_flat)): if right_gn_order(ends_and_middle_flat[i], ends_and_middle_flat[j]): filter_key = ends_and_middle_flat[i] + "_" + ends_and_middle_flat[j] else: filter_key = ends_and_middle_flat[j] + "_" + ends_and_middle_flat[i] if ends_and_middle_flat[i] != ends_and_middle_flat[j]: membrane_data1[i][j] = sum(distances_set1.data[filter_key])/len(pdbs1) membrane_data1[j][i] = membrane_data1[i][j] membrane_data2[i][j] = sum(distances_set2.data[filter_key])/len(pdbs2) membrane_data2[j][i] = membrane_data2[i][j] # Identify most stable TMs by ranking the variations to all other helices membrane_data1 = np.array([np.array(x) for x in membrane_data1]) membrane_data2 = np.array([np.array(x) for x in membrane_data2]) diff_distances = [x[:] for x in [[0] * len(ends_and_middle)] * len(ends_and_middle)] for i in range(0,max(ends_and_middle_grouping)): for j in range(i+1, max(ends_and_middle_grouping)+1): # Calculate movements for each TM relative to their "normal" distance # selected residues for group 1 and 2 group_1 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i] group_2 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == j] diff_distances[i][j] = np.sum(abs(membrane_data1[group_1][:, group_2] - membrane_data2[group_1][:, group_2]))/(np.sum(membrane_data1[group_1][:, group_2]+membrane_data2[group_1][:, group_2])/2)*100 diff_distances[j][i] = diff_distances[i][j] # Ranking for each TM sum_differences = [sum(x) for x in diff_distances] # normalized_differences = [((sum_differences[i]-min(sum_differences[0:7]))/(max(sum_differences[0:7])-min(sum_differences[0:7])))**2 for i in range(0,7)] for i in range(0,7): diff_distances[i] = [sorted(diff_distances[i]).index(x) for x in diff_distances[i]] final_rank = [sum([diff_distances[j][i] for j in range(0,7)]) for i in range(0,7)] # Grab stable TMs tm_ranking = [0] * 7 sorted_rank = sorted(final_rank) for i in range(0,7): tm_ranking[i] = final_rank.index(sorted_rank[i]) final_rank[tm_ranking[i]] = 100 # make sure this TM isn't repeated # Calculate 3D coordinates from distance matrix tms_centroids_set1, tms_set1 = recreate3Dorder(membrane_data1, ends_and_middle_grouping) tms_centroids_set2, tms_set2 = recreate3Dorder(membrane_data2, ends_and_middle_grouping) # Align 3D points of set2 with 3D points of set1 using the most stable reference points best_rmsd = 1000 best_set = [] # Disabled the testing RMSD for now for comb in combinations(tm_ranking[:3], 3): #for comb in combinations(tm_ranking[:4], 3): sel_refs = [x for x in range(0,len(segment_order)) if segment_order[x] in comb] #print(sel_refs) tms_reference_set1 = np.array(tms_centroids_set1[sel_refs], copy = True) tms_reference_set2 = np.array(tms_centroids_set2[sel_refs], copy = True) imposer = SVDSuperimposer() imposer.set(tms_reference_set1, tms_reference_set2) imposer.run() rot, trans = imposer.get_rotran() rmsd = imposer.get_rms() print("RMSD", round(rmsd,2), tm_ranking) if rmsd < best_rmsd: best_set = comb best_rmsd = rmsd # Check for possible mirroring error test_set2 = np.dot(tms_centroids_set2, rot) + trans error = 0 for i in tm_ranking[3:7]: if np.linalg.norm(test_set2[i] - tms_centroids_set1[i]) > 5: error += 1 #if rmsd > 2: #if error >= 3 or rmsd > 2: if True: for i in range(0,len(tms_centroids_set2)): tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # Align 3D points of set2 with 3D points of set1 using the most stable reference points tms_reference_set1 = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]] tms_reference_set2 = tms_centroids_set2[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]] imposer = SVDSuperimposer() imposer.set(tms_reference_set1, tms_reference_set2) imposer.run() new_rot, new_trans = imposer.get_rotran() new_rmsd = imposer.get_rms() print("RMSD2", round(new_rmsd,2)) if new_rmsd < rmsd: rot = new_rot trans = new_trans rmsd = new_rmsd else: for i in range(0,len(tms_centroids_set2)): tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # test_set2 = np.dot(tms_reference_set2, rot) + trans # for i in range(0,len(test_set2)): # print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_reference_set1[i]]), "]") # for i in range(0,len(test_set2)): # print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in test_set2[i]]), "]") # # print("############") # #test_set2 = np.dot(tms_centroids_set2, rot) + trans # test_set2 = np.array(tms_centroids_set2, copy = True) # for i in range(0,len(tms_centroids_set1)): # print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set1[i]]), "]") # for i in range(0,len(tms_centroids_set2)): # print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set2[i]]), "]") # if rmsd > 2: # for i in range(0,len(tms_centroids_set2)): # tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1 # # Huge error during alignment of "stable" helices, just use the references not the helper points # tms_reference_set1 = tms_centroids_set1[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]] # tms_reference_set2 = tms_centroids_set2[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]] # imposer = SVDSuperimposer() # imposer.set(tms_reference_set1, tms_reference_set2) # imposer.run() # rot, trans = imposer.get_rotran() # rmsd = imposer.get_rms() # print("RMSD3", round(rmsd,2)) # tms_centroids_set2 = np.dot(tms_centroids_set2, rot) + trans tms_set2 = np.dot(tms_set2, rot) + trans # Calculate optimal plane through points in both sets and convert to 2D # Try normal based on TM7 # tm7_centroids = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] == 6]] # if len(tm7_centroids) == 2: # normal = (tm7_centroids[1] - tm7_centroids[0])/np.linalg.norm(tm7_centroids[1] - tm7_centroids[0]) # else: # # Using TM mid as reference plane # normal, midpoint = calculatePlane(np.concatenate((tms_centroids_set1[7:], tms_centroids_set2[7:])), intracellular) # Alternative: use center of helical ends and center of helical middle # normal = tms_centroids_set1[:7].mean(axis=0) - tms_centroids_set1[7:].mean(axis=0) # normal = normal/np.linalg.norm(normal) # 7TM references tm_centroids = {y:[] for y in range(0,7)} [tm_centroids[y].append(tms_centroids_set1[x]) for y in range(0,7) for x in range(0,len(segment_order)) if segment_order[x] == y] count = 0 normal = np.array([0.0,0.0,0.0]) for y in range(0,7): #if len(tm_centroids[y]) == 2 and (mode != 1 or y != 5): if len(tm_centroids[y]) == 2: normal += np.array((tm_centroids[y][1] - tm_centroids[y][0])/np.linalg.norm(tm_centroids[y][1] - tm_centroids[y][0])) count += 1 normal = normal/count midpoint = tms_centroids_set1[:7].mean(axis=0) #plane_set1, z_set1 = convert3D_to_2D_plane(tms_centroids_set1[:7], intracellular, normal, midpoint) #plane_set2, z_set2 = convert3D_to_2D_plane(tms_centroids_set2[:7], intracellular, normal, midpoint) plane_set, z_set = convert3D_to_2D_plane(np.concatenate((tms_centroids_set1[:7], tms_centroids_set2[:7]), axis = 0), intracellular, normal, midpoint) plane_set1 = plane_set[:7] plane_set2 = plane_set[7:] z_set1 = z_set[:7] z_set2 = z_set[7:] # DO NOT REMOVE: possibly we want to upgrade to weighted superposing # Based on Biopython SVDSuperimposer # coords = tms_centroids_set2 # reference_coords = tms_centroids_set1 # OLD centroid calcalation # av1 = sum(coords) / len(coords) # av2 = sum(reference_coords) / len(reference_coords) # NEW weighted centroid calculation # print(normalized_differences) # av1, av2 = 0, 0 # totalweight = 0 # for i in range(0,7): # # print("Round",i) # #weight = 1+(7-tm_ranking.index(i))/7 # weight = (1-normalized_differences[i]+0.1)/1.1 # totalweight += weight # print("TM", str(i+1), "weight",weight) # av1 += coords[i]*weight # av2 += reference_coords[i]*weight # # av1 = av1/totalweight # av2 = av2/totalweight # # coords = coords - av1 # reference_coords = reference_coords - av2 # # # correlation matrix # a = np.dot(np.transpose(coords), reference_coords) # u, d, vt = np.linalg.svd(a) # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u))) # # check if we have found a reflection # if np.linalg.det(rot) < 0: # vt[2] = -vt[2] # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u))) # trans = av2 - np.dot(av1, rot) # rot, trans = imposer.get_rotran() # tms_set2 = np.dot(tms_set2, rot) + trans # CURRENT: Ca-angle to axis core rotations = [0] * 7 for i in range(0,7): try: # rotations[i] = [data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1] if abs(data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1]) < 180 else -1*data['tab4'][gn_dictionary[x]]['angles_set2'][1]-data['tab4'][gn_dictionary[x]]['angles_set1'][1] for x in gns[i]] angles1 = [data['tab4'][gn_dictionary[x]]['angles_set1'][11] for x in gns[i]] angles1 = [angle if angle > 0 else angle + 360 for angle in angles1 ] angles2 = [data['tab4'][gn_dictionary[x]]['angles_set2'][11] for x in gns[i]] angles2 = [angle if angle > 0 else angle + 360 for angle in angles2 ] rotations[i] = [angles1[x] - angles2[x] for x in range(3)] rotations[i] = [value if abs(value) <= 180 else value-360 if value > 0 else value+360 for value in rotations[i]] # count=0 # for x in gns[i]: # print(i, x, data['tab4'][gn_dictionary[x]]['angles_set1'][11], data['tab4'][gn_dictionary[x]]['angles_set2'][11], rotations[i][count]) # count += 1 except: rotations[i] = [0.0, 0.0, 0.0] # TODO: verify other class B errors # UPDATE 20-02-2020 No mirroring but top-down through GPCR rotations[i] = sum(rotations[i])/3 # if intracellular: # rotations[i] = -1*sum(rotations[i])/3 # else: # rotations[i] = sum(rotations[i])/3 # ALTERNATIVE: utilize TM tip alignment (needs debugging as some angles seem off, e.g. GLP-1 active vs inactive TM2) # Add rotation angle based on TM point placement # tms_2d_set1, junk = convert3D_to_2D_plane(tms_set1, intracellular, normal, midpoint) # tms_2d_set2, junk = convert3D_to_2D_plane(tms_set2, intracellular, normal, midpoint) # rotations = [0] * 7 # for i in range(0,7): # positions = [x for x in range(0, len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i] # turn_set1 = tms_2d_set1[positions] # turn_set2 = tms_2d_set2[positions] # # # set to middle # turn_set1 = turn_set1 - turn_set1.mean(axis=0) # turn_set2 = turn_set2 - turn_set2.mean(axis=0) # # # Calculate shift per residue and take average for this TM # for j in range(0,len(turn_set1)): # v1 = turn_set1[j]/np.linalg.norm(turn_set1[j]) # v2 = turn_set2[j]/np.linalg.norm(turn_set2[j]) # angle = np.degrees(np.arctan2(v2[1], v2[0]) - np.arctan2(v1[1],v1[0])) # # if abs(angle) > 180: # angle = 360 - abs(angle) # # rotations[i] += angle/len(turn_set1) # TODO: check z-coordinates orientation # Step 1: collect movement relative to membrane mid # Step 2: find min and max TM # Step 3: check if orientation of min/max TM matches the z-scales + intra/extra - if not invert z-coordinates labeled_set1 = [{"label": "TM"+str(i+1), "x": float(plane_set1[i][0]), "y": float(plane_set1[i][1]), "z": float(z_set1[i]), "rotation" : 0} for i in range(0,7)] labeled_set2 = [{"label": "TM"+str(i+1), "x": float(plane_set2[i][0]), "y": float(plane_set2[i][1]), "z": float(z_set2[i]), "rotation" : rotations[i]} for i in range(0,7)] # Convert used GNs to right numbering gns_used = gns[:] for i in range(0,len(gns)): for j in range(0,len(gns[i])): gns_used[i][j] = gn_dictionary[gns[i][j]] return {"coordinates_set1" : labeled_set1, "coordinates_set2": labeled_set2, "gns_used": gns_used}
def compute_mean (reader, align_conf, num_confs, start = None, stop = None): """ Computes the mean structure of a trajectory Structured to work with the multiprocessing process from UTILS/parallelize.py Parameters: reader (readers.LorenzoReader2): An active reader on the trajectory file to take the mean of. align_conf (numpy.array): The position of each particle in the reference configuration. A 3xN array. num_confs (int): The number of configurations in the reader. <optional> start (int): The starting configuration ID to begin averaging at. Used if parallel. <optional> stop (int): The configuration ID on which to end the averaging. Used if parallel. Returns: mean_pos_storage (numpy.array): For each particle, the sum of positions in all configurations read. mean_a1_storage (numpy.array): For each particle, the sum of a1 orientation vectors in all configuraitons read. mean_a3_storage (numpy.array): For each particle, the sum of a3 orientation vectors in all configuraitons read. intermediate_mean_structures (list): mean structures computed periodically during the summing to check decoorrelation. confid (int): the number of configurations summed for the storage arrays. """ if stop is None: stop = num_confs else: stop = int(stop) if start is None: start = 0 else: start = int(start) mysystem = reader._get_system(N_skip = start) # storage for the intermediate mean structures intermediate_mean_structures = [] # the class doing the alignment of 2 structures sup = SVDSuperimposer() mean_pos_storage = np.array([np.zeros(3) for _ in range(n_nuc)]) mean_a1_storage = np.array([np.zeros(3) for _ in range(n_nuc)]) mean_a3_storage = np.array([np.zeros(3) for _ in range(n_nuc)]) # for every conf in the current trajectory we calculate the global mean confid = 0 while mysystem != False and confid < stop: mysystem.inbox() cur_conf_pos = fetch_np(mysystem) indexed_cur_conf_pos = indexed_fetch_np(mysystem) cur_conf_a1 = fetch_a1(mysystem) cur_conf_a3 = fetch_a3(mysystem) # calculate alignment sup.set(align_conf, indexed_cur_conf_pos) sup.run() rot, tran = sup.get_rotran() cur_conf_pos = np.einsum('ij, ki -> kj', rot, cur_conf_pos) + tran cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1) cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3) mean_pos_storage += cur_conf_pos mean_a1_storage += cur_conf_a1 mean_a3_storage += cur_conf_a3 # print the rmsd of the alignment in case anyone is interested... print("Frame:", confid, "Time:", mysystem._time, "RMSF:", sup.get_rms()) # thats all we do for a frame confid += 1 mysystem = reader._get_system() # We produce 10 intermediate means to check decorrelation. # This can't be done neatly in parallel if not parallel and confid % INTERMEDIATE_EVERY == 0: mp = np.copy(mean_pos_storage) mp /= confid intermediate_mean_structures.append( prep_pos_for_json(mp) ) print("INFO: Calculated intermediate mean for {} ".format(confid)) return(mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, confid)