Exemple #1
0
def align_models(CA):
    n_models = CA.shape[0]
    working_CA = np.copy(CA)
    sup=SVDSuperimposer()
    
    ref_model = working_CA[0, :, :]
    rms_total = 0

    for i_model in range(1, n_models):
        sup.set(ref_model, working_CA[i_model])
        sup.run()
        rms_total += sup.get_rms()**2
        working_CA[i_model] = sup.get_transformed()

    rms_best = float("inf")
    epsilon = 0.001
    while rms_best - rms_total  > epsilon:
        rms_best = rms_total
        mean_model = np.mean(working_CA,0)
        rms_total = 0
        for i_model in range(n_models):
            sup.set(mean_model, working_CA[i_model])
            sup.run()
            rms_total += sup.get_rms()**2
            working_CA[i_model] = sup.get_transformed()

    transformations = []
    for start_model, result_model in zip(CA, working_CA):
        sup.set(result_model, start_model)
        sup.run()
        transformations.append(sup.get_rotran())

    return transformations,np.sqrt(rms_total/n_models)
def compute_deviations(reader,
                       mean_structure,
                       indexed_mean_structure,
                       indexes,
                       num_confs,
                       start=None,
                       stop=None):
    """
        Computes RMSF of each particle from the mean structure

        Parameters:
            reader (readers.ErikReader): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    deviations = []
    RMSDs = []

    mysystem = reader.read(n_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox()
        # calculate alignment transform
        cur_conf = mysystem.positions
        indexed_cur_conf = cur_conf[indexes]
        sup.set(indexed_mean_structure, indexed_cur_conf)
        sup.run()
        print("Frame number:", confid, "Time:", mysystem.time, "RMSD:",
              sup.get_rms())
        # realign frame
        rot, tran = sup.get_rotran()
        # align structures and collect coordinates for each frame
        # compatible with json
        deviations.append(
            list(
                np.linalg.norm(np.einsum('ij, ki -> kj', rot, cur_conf) +
                               tran - mean_structure,
                               axis=1)))
        RMSDs.append(sup.get_rms() * 0.8518)
        confid += 1
        mysystem = reader.read()

    return (deviations, RMSDs)
Exemple #3
0
def doublets_dist(d1, d2):
    sup = SVDSuperimposer()
    sup.set(d1['vec'], d2['vec'])
    sup.run()
    rms1 = sup.get_rms()
    sup.set(d1['vec'], d2['vec2'])
    sup.run()
    rms2 = sup.get_rms()
    return min(rms1, rms2)
Exemple #4
0
def rmsd_distance(points,
                  ref_points,
                  sup_atoms,
                  rmsd_atoms=None,
                  multiple_rmsd_variants=False):
    (c1, c2) = points
    (p1, p2) = ref_points
    for atoms_list, c_res, p_res in (sup_atoms[0], c1, p1), (sup_atoms[1], c2,
                                                             p2):
        for a in atoms_list:
            if a not in c_res or a not in p_res:
                return 1000.0
    ref_p = [p1[a] for a in sup_atoms[0]] + [p2[a] for a in sup_atoms[1]]
    cur_p = [c1[a] for a in sup_atoms[0]] + [c2[a] for a in sup_atoms[1]]
    sup = SVDSuperimposer()
    sup.set(np.array(ref_p, 'f'), np.array(cur_p, 'f'))
    sup.run()
    if rmsd_atoms is not None:
        (rot, tran) = sup.get_rotran()
        if multiple_rmsd_variants:
            return min([
                _rmsd_formula(points, ref_points, rot, tran, r)
                for r in rmsd_atoms
            ])
        else:
            return _rmsd_formula(points, ref_points, rot, tran, rmsd_atoms)
    else:
        return sup.get_rms()
Exemple #5
0
def computeRMSD():
    if len(ca_atoms) != len(ca_atoms_pdb):
        print "Error. Length mismatch!"
        exit()
    l = len(ca_atoms)

    fixed_coord = []
    moving_coord = []

    for i in range(l):
        if include_res_map[i] == 1:
            fixed_coord.append(
                [ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]])
            moving_coord.append(
                [ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]])

    if len(fixed_coord) == 0: return 0

    fixed_coord = numpy.array(fixed_coord)
    moving_coord = numpy.array(moving_coord)

    sup = SVDSuperimposer()
    sup.set(fixed_coord, moving_coord)
    sup.run()
    rms = sup.get_rms()
    return rms
Exemple #6
0
def compute_transformation(c_ref,c):
    sup = SVDSuperimposer()
    sup.set(c_ref, c)
    sup.run()
    rms = sup.get_rms()
    (rot,tran) = sup.get_rotran()
    return (rms,rot,tran)
def compute_centroid(reader, mean_structure, num_confs, start=None, stop=None):
    """
        Compares each structure to the mean and returns the one with the lowest RMSF

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            centroid (numpy.array): The positions corresponding to the structure with the lowest RMSF to the mean.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    lowest_rmsf = 100000  #if you have a larger number than this, we need to talk...
    centroid_candidate = np.zeros_like(mean_structure)
    centroid_a1 = np.zeros_like(mean_structure)
    centroid_a3 = np.zeros_like(mean_structure)

    mysystem = reader.read(n_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox()
        # calculate alignment transform
        cur_conf = mysystem.positions
        indexed_cur_conf = mysystem.positions[indexes]
        cur_conf_a1 = mysystem.a1s
        cur_conf_a3 = mysystem.a3s
        sup.set(mean_structure, indexed_cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()

        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1)
        cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3)
        RMSF = sup.get_rms()
        print("Frame number:", confid, "RMSF:", RMSF)
        if RMSF < lowest_rmsf:
            centroid_candidate = cur_conf
            centroid_a1 = cur_conf_a1
            centroid_a3 = cur_conf_a3
            lowest_rmsf = RMSF
            centroid_t = mysystem.time

        confid += 1
        mysystem = reader.read()

    return centroid_candidate, centroid_a1, centroid_a3, lowest_rmsf, centroid_t
Exemple #8
0
def create_DM(pdb_atoms_list, alignement_type):
    """
    This function is used to clusterize all structures.
    """
    svd = SVDSuperimposer()
    size = len(pdb_atoms_list)
    FullDM = DistanceMatrix(size)
    pbar = pg.ProgressBar(widgets=WIDGETS,
                          maxval=(size * (size - 1) / 2)).start()
    counter = 0
    for i, frame1 in enumerate(pdb_atoms_list):
        for j, frame2 in enumerate(pdb_atoms_list[i + 1:]):
            svd.set(frame1, frame2)
            svd.run()
            rms = svd.get_rms()  #RMS with local alignement
            rms_raw = svd.get_init_rms(
            )  #RMS with the GLOBAL alignement made before
            if alignement_type == "l":
                FullDM.set(i, i + j + 1, rms)
            else:
                FullDM.set(i, i + j + 1, rms_raw)
            pbar.update(counter)
            counter += 1
    pbar.finish()
    return FullDM
Exemple #9
0
def computeRMSD():
    if len(ca_atoms) != len(ca_atoms_pdb):
        print "Error. Length mismatch!"
        exit()
    l = len(ca_atoms)

    res = {}
    for ch in chain_id_list:
        fixed_coord = []
        moving_coord = []

        for i in range(l):
            if chain_id[i] == ch:
                fixed_coord.append([
                    ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]
                ])
                moving_coord.append(
                    [ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]])

        if len(fixed_coord) > 0:
            fixed_coord = numpy.array(fixed_coord)
            moving_coord = numpy.array(moving_coord)

            sup = SVDSuperimposer()
            sup.set(fixed_coord, moving_coord)
            sup.run()
            rms = sup.get_rms()

            res[ch] = rms

    return res
def get_rot_tran(y, x):
    """Returns rotation, translation and RMDS values of the superimposed atoms."""
    sup = SVDSuperimposer()
    sup.set(x, y)  # AC over AD
    sup.run()
    rms = sup.get_rms()
    rot, tran = sup.get_rotran()
    return (rot, tran, rms)
Exemple #11
0
def compute_deviations(reader,
                       mean_structure,
                       num_confs,
                       start=None,
                       stop=None):
    """
        Computes RMSF of each particle from the mean structure

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # helper to fetch nucleotide positions
    fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides])

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    deviations = []

    mysystem = reader._get_system(N_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox_system()
        # calculate alignment transform
        cur_conf = fetch_np(mysystem)
        sup.set(mean_structure, cur_conf)
        sup.run()
        print("Frame number:", confid, "RMSF:", sup.get_rms())
        # realign frame
        rot, tran = sup.get_rotran()
        # align structures and collect coordinates for each frame
        # compatible with json
        deviations.append(
            list(
                map(
                    np.linalg.norm,
                    np.array([np.dot(n_pos, rot) + tran
                              for n_pos in cur_conf]) - mean_structure)))
        confid += 1
        mysystem = reader._get_system()

    return deviations
Exemple #12
0
def super_pdb(coords1, coords2):
    if len(coords1) != len(coords2):
        print >> sys.stderr, 'ERROR: Structures with different length'
        sys.exit(1)
    svd = SVDSuperimposer()
    svd.set(np.array(coords1), np.array(coords2))
    svd.run()
    rot, tran = svd.get_rotran()
    rmsd = svd.get_rms()
    return rmsd
def calculate_rmsd(atoms_x_coord, atoms_y_coord) -> float:
    super_imposer = SVDSuperimposer(
    )  # Calling the class that superposes atoms arrays
    super_imposer.set(
        atoms_x_coord,
        atoms_y_coord)  # Vector y will be rotated and translated on vector x
    super_imposer.run()
    value = super_imposer.get_rms()  # Get the value of RMSD

    return value
def get_rmsd(coord1, coord2):
    if len(coord1) != len(coord2):
        print >> sys.stderr, "ERROR: The sets of coordinates have different sizes"
        sys.exit(1)  #system error >/dev/null or 2>/dev/null
    svd = SVDSuperimposer()
    svd.set(np.array(coord1),
            np.array(coord2))  #transform a list into numeric python
    svd.run()
    rmsd = svd.get_rms()
    rot, tran = svd.get_rotran()
    print 'R', rot
    print 'T', tran
    print 'RMSD', rmsd
def get_rmsd(coord1,coord2):
    if len(coord1)!=len(coord2):
        print >> sys.stderr.write("ERROR: The set of Coordinate have different size.")
        sys.exit(1)
    svd=SVDSuperimposer()
    svd.set(np.array(coord1), np.array(coord2))
    svd.run()
    rmsd=svd.get_rms()
    #rot,tran=svd.get_rotran()
    T=svd.get_rotran()
    print("R", T[0])
    print("T", T[1])
    return(rmsd)
Exemple #16
0
    def __sub__(self, other):
        """
        Return rmsd between two fragments.

        Example:
            >>> rmsd=fragment1-fragment2

        @return: rmsd between fragments
        @rtype: float
        """
        sup=SVDSuperimposer()
        sup.set(self.coords_ca, other.coords_ca)
        sup.run()
        return sup.get_rms()
    def __sub__(self, other):
        """
        Return rmsd between two fragments.

        Example:
            >>> rmsd=fragment1-fragment2

        @return: rmsd between fragments
        @rtype: float
        """
        sup = SVDSuperimposer()
        sup.set(self.coords_ca, other.coords_ca)
        sup.run()
        return sup.get_rms()
Exemple #18
0
def Superimpose(atoms1, atoms2):
	assert len(atoms1) == len(atoms2)
	#aligner = QCPSuperimposer()
	aligner = SVDSuperimposer()
	aligner.set(atoms1, atoms2)
	aligner.run()
	RMSD = aligner.get_rms()

	## calculate the distance deviation at each position
	atoms2_transformed = aligner.get_transformed()
	diff = atoms1 - atoms2_transformed
	diff2 = np.power(diff, 2)
	deviations = np.sqrt(np.sum(diff2, axis=1))

	return RMSD, deviations	
def run_sup3d(coord1, coord2):
    sup = SVDSuperimposer()
    sup.set(
        np.array(coord1), np.array(coord2)
    )  #set is setting the group of coordinates because i have initialized SVD, it is empty
    sup.run(
    )  #superimpose the coordinates, run does all the work. Then we compute the RMSD between vc1 and vc2 after transformation
    rmsd = sup.get_rms()
    rot, tran = sup.get_rotran(
    )  #shows the matrix of rotation and vector for translation
    tcoord = sup.get_transformed()
    print rmsd
    print rot
    print tran
    print tcoord  #you obtain the set of coordinates to be superimposable to the se 1, so the set of coordinates after transformation.
    return
Exemple #20
0
def _superimpose_atoms(ref_points, points, atoms):
    if ref_points is None or points is None or atoms is None:
        return (None, None, None, None)
    ref_vec = []
    vec = []
    for a in atoms:
        if a in ref_points and a in points:
            ref_vec.append(ref_points[a])
            vec.append(points[a])
    if len(vec) < 3:
        return (None, None, None, None)
    sup = SVDSuperimposer()
    sup.set(np.array(ref_vec, 'f'), np.array(vec, 'f'))
    sup.run()
    (rot, tran) = sup.get_rotran()
    rms = sup.get_rms()
    return (_apply_rot_tran(points, rot, tran), rot, tran, rms)
Exemple #21
0
    def __sub__(self, other):
        """Return rmsd between two fragments.

        :return: rmsd between fragments
        :rtype: float

        Examples
        --------
        This is an incomplete but illustrative example::

            rmsd = fragment1 - fragment2

        """
        sup = SVDSuperimposer()
        sup.set(self.coords_ca, other.coords_ca)
        sup.run()
        return sup.get_rms()
Exemple #22
0
def computeRMSD():
	if len(ca_atoms)!=len(ca_atoms_pdb):
		print "Error. Length mismatch!", len(ca_atoms), len(ca_atoms_pdb)
		exit()
	l = len(ca_atoms)

	fixed_coord  = numpy.zeros((l, 3))
	moving_coord = numpy.zeros((l, 3))

	for i in range(0, l):
		fixed_coord[i]  = numpy.array ([ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]])
		moving_coord[i] = numpy.array ([ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]])
	sup = SVDSuperimposer()
	sup.set(fixed_coord, moving_coord)
	sup.run()
	rms = sup.get_rms()
	return rms
Exemple #23
0
def distance_matrix(CA):

    n_models = CA.shape[0]
    distances = np.zeros((n_models, n_models))

    sup=SVDSuperimposer()
    for i in range(n_models):
        model1 = CA[i,:,:]
        for j in range(i+1,n_models):
            model2 = CA[j,:,:]
            sup.set(model1, model2)
            sup.run()
            rms=sup.get_rms()
            distances[i,j] = rms
            distances[j,i] = rms

    return distances
Exemple #24
0
def computeRMSD():
    if len(ca_atoms) != len(ca_atoms_pdb):
        print "Error. Length mismatch!"
        exit()
    l = len(ca_atoms)

    fixed_coord = numpy.zeros((l, 3))
    moving_coord = numpy.zeros((l, 3))

    for i in range(0, l):
        fixed_coord[i] = numpy.array(
            [ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]])
        moving_coord[i] = numpy.array(
            [ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]])
    sup = SVDSuperimposer()
    sup.set(fixed_coord, moving_coord)
    sup.run()
    rms = sup.get_rms()
    return rms
Exemple #25
0
def sel_straight(coords_arr, n_cc_helices):
    n_atoms_mono = int(coords_arr[0].shape[0] / n_cc_helices)
    chain_rmss = []
    for coords in coords_arr:

        hi_all = []
        for i in range(n_cc_helices):
            hi_all.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono])

        rmss = []
        for i in range(n_cc_helices - 1):
            sup = SVDSuperimposer()
            sup.set(hi_all[i], hi_all[i + 1])
            sup.run()
            rms = sup.get_rms()
            rmss.append(rms)
        chain_rmss.append(np.mean(rmss))

    return np.argmin(chain_rmss), np.min(chain_rmss)
Exemple #26
0
    def __init__(self, static, moving):
        """
        Align two structures

        :param static: the reference structure
        :param moving: the structure to the aligned to the reference
        """
        sup = SVDSuperimposer()
        sup.set(np.asarray(static), np.asarray(moving))
        sup.run()

        rot, trans = sup.get_rotran()

        self.rms = sup.get_rms()

        self.static = static

        self.moving = [
            np.dot(np.asarray(moving[atom]), rot) + trans
            for atom in range(len(moving))
        ]
Exemple #27
0
    def set_atoms(self, fixed, moving):
        """Put (translate/rotate) the atoms in fixed on the atoms in
        moving, in such a way that the RMSD is minimized.

        :param fixed: list of (fixed) atoms
        :param moving: list of (moving) atoms
        :type fixed,moving: [L{Atom}, L{Atom},...]
        """
        if not len(fixed) == len(moving):
            raise PDBException("Fixed and moving atom lists differ in size")
        length = len(fixed)
        fixed_coord = numpy.zeros((length, 3))
        moving_coord = numpy.zeros((length, 3))
        for i in range(0, length):
            fixed_coord[i] = fixed[i].get_coord()
            moving_coord[i] = moving[i].get_coord()
        sup = SVDSuperimposer()
        sup.set(fixed_coord, moving_coord)
        sup.run()
        self.rms = sup.get_rms()
        self.rotran = sup.get_rotran()
Exemple #28
0
def compute_frag_RMSD(res_len):
        if len(ca_atoms)!=len(ca_atoms_pdb):
		print "Error. Length mismatch! target:frag", len(ca_atoms_pdb), len(ca_atoms)
		return 0
        l = len(ca_atoms)
	N = res_len
	if l != N :
		print "atom list length mismatches the fragment length!", str(l), str(N)
		return 0

        fixed_coord  = numpy.zeros((l, 3))
        moving_coord = numpy.zeros((l, 3))

        for i in range(0, l):
                fixed_coord[i]  = numpy.array ([ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]])
                moving_coord[i] = numpy.array ([ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]])
        sup = SVDSuperimposer()
        sup.set(fixed_coord, moving_coord)
        sup.run()
        rms = sup.get_rms()
        return rms
Exemple #29
0
    def set_atoms(self, fixed, moving):
        """Put (translate/rotate) the atoms in fixed on the atoms in
        moving, in such a way that the RMSD is minimized.

        @param fixed: list of (fixed) atoms
        @param moving: list of (moving) atoms
        @type fixed,moving: [L{Atom}, L{Atom},...]
        """
        if not len(fixed) == len(moving):
            raise PDBException("Fixed and moving atom lists differ in size")
        l = len(fixed)
        fixed_coord = numpy.zeros((l, 3))
        moving_coord = numpy.zeros((l, 3))
        for i in range(0, len(fixed)):
            fixed_coord[i] = fixed[i].get_coord()
            moving_coord[i] = moving[i].get_coord()
        sup = SVDSuperimposer()
        sup.set(fixed_coord, moving_coord)
        sup.run()
        self.rms = sup.get_rms()
        self.rotran = sup.get_rotran()
Exemple #30
0
def super_prot(atom_coords_1, atom_coords_2):           #this function uses BioPython to derive the RMSD from the superimposition of the two atom coordinate lists
    sup = SVDSuperimposer()
    sup.set(atom_coords_1,atom_coords_2)
    sup.run()
    return(sup.get_rms())                               #CAREFUL!! its get_rms, not get_rmsd
y = array([[51.30, -2.99, 46.54],
          [51.09, -1.88, 47.58],
          [52.36, -1.20, 48.03],
          [52.71, -1.18, 49.38]], 'f')

sup = SVDSuperimposer()

# set the coords
# y will be rotated and translated on x
sup.set(x, y)

# do the lsq fit
sup.run()

# get the rmsd
rms = sup.get_rms()

# get rotation (right multiplying!) and the translation
rot, tran = sup.get_rotran()

# rotate y on x manually
y_on_x1 = dot(y, rot) + tran

# same thing
y_on_x2 = sup.get_transformed()


def simple_matrix_print(matrix):
    """Simple string to display a floating point matrix

    This should give the same output on multiple systems.  This is
Exemple #32
0
class SVDSuperimposerTest(unittest.TestCase):
    def setUp(self):
        self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65],
                        [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]])

        self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58],
                        [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]])

        self.sup = SVDSuperimposer()
        self.sup.set(self.x, self.y)

    def test_get_init_rms(self):
        x = array([[1.19, 1.28, 1.37], [1.46, 1.55, 1.64], [1.73, 1.82, 1.91]])
        y = array([[1.91, 1.82, 1.73], [1.64, 1.55, 1.46], [1.37, 1.28, 1.19]])
        self.sup.set(x, y)
        self.assertIsNone(self.sup.init_rms)
        init_rms = 0.8049844719
        self.assertTrue(float("%.3f" % self.sup.get_init_rms()),
                        float("%.3f" % init_rms))

    def test_oldTest(self):
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3),
                        around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3),
                        around(self.y, decimals=3)))
        self.assertIsNone(self.sup.rot)
        self.assertIsNone(self.sup.tran)
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        self.sup.run()
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3),
                        around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3),
                        around(self.y, decimals=3)))
        rot = array([[0.68304983, 0.53664371, 0.49543563],
                     [-0.52277295, 0.83293229, -0.18147242],
                     [-0.51005037, -0.13504564, 0.84947707]])
        tran = array([38.78608157, -20.65451334, -15.42227366])
        self.assertTrue(
            array_equal(around(self.sup.rot, decimals=3),
                        around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.tran, decimals=3),
                        around(tran, decimals=3)))
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        rms = 0.00304266526014
        self.assertEqual(float("%.3f" % self.sup.get_rms()),
                         float("%.3f" % rms))

        rot_get, tran_get = self.sup.get_rotran()
        self.assertTrue(
            array_equal(around(rot_get, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(tran_get, decimals=3), around(tran,
                                                             decimals=3)))

        y_on_x1 = dot(self.y, rot) + tran
        y_x_solution = array(
            [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01],
             [5.03977138e+01, -1.22877050e+00, 5.06488200e+01],
             [5.06801788e+01, -4.16095666e-02, 5.15368866e+01],
             [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]])
        self.assertTrue(
            array_equal(around(y_on_x1, decimals=3),
                        around(y_x_solution, decimals=3)))

        y_on_x2 = self.sup.get_transformed()
        self.assertTrue(
            array_equal(around(y_on_x2, decimals=3),
                        around(y_x_solution, decimals=3)))
Exemple #33
0
def merge_cc(coords_list, res_overlap, n_cc_helices):

    ref_coords = coords_list[0]
    aligned_coords = [deepcopy(coords_list[0])]
    n_atoms_per_res = 5
    n_atoms_mono = int(ref_coords.shape[0] / n_cc_helices)
    msds = []
    for coords, cc_overlap in zip(coords_list[1:], res_overlap):

        n_atoms_overlap = cc_overlap * n_atoms_per_res

        for i in range(n_cc_helices):
            hi_ref = ref_coords[(i + 1) * n_atoms_mono -
                                n_atoms_overlap:(i + 1) * n_atoms_mono]
            if i == 0:
                ref_atoms = hi_ref
            else:
                ref_atoms = np.append(ref_atoms, hi_ref, axis=0)

        for i in range(n_cc_helices):
            hi = coords[i * n_atoms_mono:i * n_atoms_mono + n_atoms_overlap]
            if i == 0:
                sup_atoms = hi
            else:
                sup_atoms = np.append(sup_atoms, hi, axis=0)

        sup = SVDSuperimposer()
        sup.set(ref_atoms, sup_atoms)
        sup.run()
        msds.append(sup.get_rms()**2)
        rot, tran = sup.get_rotran()
        coord_new = np.dot(coords, rot) + tran
        aligned_coords.append(coord_new)
        ref_coords = coord_new

    rmsd = np.sqrt(np.sum(msds))

    hi_all = []
    for i in range(n_cc_helices):
        hi_all.append(aligned_coords[0][i * n_atoms_mono:(i + 1) *
                                        n_atoms_mono])

    for coords, cc_overlap in zip(aligned_coords[1:], res_overlap):
        hi = []
        for i in range(n_cc_helices):
            hi.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono])

        n_atoms_overlap = cc_overlap * n_atoms_per_res
        for ind_overlap in range(cc_overlap):
            weight = (ind_overlap + 1) / float(cc_overlap + 1)
            for ind_atom in range(n_atoms_per_res):
                ind_shift = ind_overlap * n_atoms_per_res + ind_atom

                for i in range(n_cc_helices):
                    coordi_prev = hi_all[i][-n_atoms_overlap + ind_shift]
                    coordi_next = hi[i][ind_shift]
                    hi_all[i][-n_atoms_overlap + ind_shift] = (
                        1 - weight) * coordi_prev + weight * coordi_next

        for i in range(n_cc_helices):
            hi_rest = hi[i][n_atoms_overlap:]
            hi_all[i] = np.append(hi_all[i], hi_rest, axis=0)

    res_dimer = hi_all[0]
    for i in range(1, n_cc_helices):
        res_dimer = np.append(res_dimer, hi_all[i], axis=0)

    return res_dimer, rmsd
class ResidueMutator(object):
    def __init__(self,
                 tripeptides=None,
                 components=None,
                 standard_residues=None):
        """ The mutator object takes a non-standard residue or incomplete residue and modifies it
        """
        try:
            from Bio.PDB import PDBParser
            from Bio.SVDSuperimposer import SVDSuperimposer
        except ModuleNotFoundError:
            raise ModuleNotFoundError(
                "BioPython is required for this functionality")

        # get defaults if not provided
        if standard_residues is None:
            standard_residues = data.standard_residues
        if tripeptides is None:
            tripeptides = data.tripeptides
        if components is None:
            components = data.chem_components
        self.components = components
        self.candidates = {}
        self.standard_residues = standard_residues
        self.imposer = SVDSuperimposer()
        self.parser = PDBParser(PERMISSIVE=1, QUIET=True)

        # build up candidate structures
        for fn in tripeptides:
            structure = self.parser.get_structure("", fn)
            resn = structure[0][" "][2].get_resname()
            self.candidates[resn] = []
            for model in structure:
                self.candidates[resn].append(model[" "][2])

    def mutate(self, residue, replace_backbone=True):
        resn = residue.get_resname()

        if self.standard(resn):
            # the residue is already a standard residue, here for repair
            parn = resn
        else:
            parn = self.components[resn]['_chem_comp.mon_nstd_parent_comp_id']
            if not self.standard(parn):
                # the parent residue is a nonstandard residue, can't mutate
                return False

        if parn not in self.candidates:
            # parent not in candidate structures
            return False

        sc_fixed = set(
            self.components[resn]
            ['side_chain_atoms'])  # side chain atoms of fixed residue
        sc_movin = set(
            self.components[parn]
            ['side_chain_atoms'])  # side chain atoms of standard parent
        atom_names = sc_fixed.intersection(sc_movin)

        # get list of side chain atoms present in residue
        atom_list = []
        for atom in atom_names:
            if atom in residue:
                atom_list.append(atom)

        if len(atom_list) == 0:
            return False

        # get side chain atom coordinates
        fixed_coord = np.zeros((len(atom_list), 3))
        for i in range(len(atom_list)):
            fixed_coord[i] = residue[atom_list[i]].get_coord()

        # loop over candidates, finding best RMSD
        moved_coord = np.zeros((len(atom_list), 3))
        min_rms = 99999
        rotm = None
        tran = None
        min_candidate = None
        for candidate in self.candidates[parn]:
            for j in range(len(atom_list)):
                moved_coord[j] = candidate[atom_list[j]].get_coord()
            # perfom SVD fitting
            self.imposer.set(fixed_coord, moved_coord)
            self.imposer.run()
            if self.imposer.get_rms() < min_rms:
                min_rms = self.imposer.get_rms()
                rotm, tran = self.imposer.get_rotran()
                min_candidate = candidate

        # copy the candidate to a new object
        candidate = min_candidate.copy()
        candidate.transform(rotm, tran)
        stripHydrogens(candidate)

        if replace_backbone:
            # replace backbone atoms of candidate
            backbone_atoms = self.components[resn]['main_chain_atoms']
            for atom in backbone_atoms:
                if atom not in residue:
                    continue
                if atom not in candidate:
                    candidate.add(residue[atom].copy())
                candidate[atom].set_coord(residue[atom].get_coord())

        return candidate

    def standard(self, resname):
        return resname in self.standard_residues

    def modified(self, resname):
        if resname in self.standard_residues:
            # it's standard, not modified
            return False

        if resname in self.components and '_chem_comp.mon_nstd_parent_comp_id' in self.components[
                resname]:
            return (
                (resname not in self.standard_residues) and
                (self.components[resname]['_chem_comp.mon_nstd_parent_comp_id']
                 in self.standard_residues))
        else:
            # has no standard parent field - can't be modified
            return False
class SVDSuperimposerTest(unittest.TestCase):

    def setUp(self):
        self.x = array([[51.65, -1.90, 50.07],
                        [50.40, -1.23, 50.65],
                        [50.68, -0.04, 51.54],
                        [50.22, -0.02, 52.85]])

        self.y = array([[51.30, -2.99, 46.54],
                        [51.09, -1.88, 47.58],
                        [52.36, -1.20, 48.03],
                        [52.71, -1.18, 49.38]])

        self.sup = SVDSuperimposer()
        self.sup.set(self.x, self.y)

    def test_get_init_rms(self):
        x = array([[1.19, 1.28, 1.37],
                   [1.46, 1.55, 1.64],
                   [1.73, 1.82, 1.91]])
        y = array([[1.91, 1.82, 1.73],
                   [1.64, 1.55, 1.46],
                   [1.37, 1.28, 1.19]])
        self.sup.set(x, y)
        self.assertIsNone(self.sup.init_rms)
        init_rms = 0.8049844719
        self.assertTrue(
            float('%.3f' % self.sup.get_init_rms()), float('%.3f' % init_rms))

    def test_oldTest(self):
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3)))
        self.assertIsNone(self.sup.rot)
        self.assertIsNone(self.sup.tran)
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        self.sup.run()
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3)))
        rot = array([[0.68304983, 0.53664371, 0.49543563],
                     [-0.52277295, 0.83293229, -0.18147242],
                     [-0.51005037, -0.13504564, 0.84947707]])
        tran = array([38.78608157, -20.65451334, -15.42227366])
        self.assertTrue(
            array_equal(around(self.sup.rot, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.tran, decimals=3), around(tran, decimals=3)))
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        rms = 0.00304266526014
        self.assertEqual(
            float('%.3f' % self.sup.get_rms()), float('%.3f' % rms))

        rot_get, tran_get = self.sup.get_rotran()
        self.assertTrue(
            array_equal(around(rot_get, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(tran_get, decimals=3), around(tran, decimals=3)))

        y_on_x1 = dot(self.y, rot) + tran
        y_x_solution = array(
            [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01],
             [5.03977138e+01, -1.22877050e+00, 5.06488200e+01],
             [5.06801788e+01, -4.16095666e-02, 5.15368866e+01],
             [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]])
        self.assertTrue(
            array_equal(around(y_on_x1, decimals=3), around(y_x_solution, decimals=3)))

        y_on_x2 = self.sup.get_transformed()
        self.assertTrue(
            array_equal(around(y_on_x2, decimals=3), around(y_x_solution, decimals=3)))
Exemple #36
0
def run_system(dir):

    pdb = os.path.basename(dir).split('_')[0]
    
    org_dir = os.getcwd()
    os.chdir(dir)

    f_coord = "coord.h5"
    f_RMSD  = "RMSD.txt"
    f_OC = os.path.join("..","..","cmap_coordinates",pdb+'.txt')

    if not os.path.exists(f_OC):
        print "Missing coordinates for cmap", dir
        os.chdir(org_dir)
        return dir
    
    if not os.path.exists(f_coord):
        print "Missing coordinates, extract_coordinates.py first", dir
        os.chdir(org_dir)
        return dir

    if os.path.exists(f_RMSD) and not _FORCE:
        print "RMSD file exists, skipping", dir
        os.chdir(org_dir)
        return dir
    
    h5 = h5py.File(f_coord,'r')
    C = h5["coord"][:]
    h5.close()
    OC = np.loadtxt(f_OC)

    # Move the coordinates to something sensible
    #C  -= C.mean(axis=0)
    #OC -= OC.mean(axis=0)

    median_OC = np.median([np.linalg.norm(a-b)
                           for a,b in zip(OC,OC[1:])])
    median_C  = np.median([np.linalg.norm(a-b)
                           for a,b in zip(C[-1],C[-1][1:])])

    assert(C[0].shape == OC.shape)
    RMSD = []
    org_RMSD = []

    sup = SVDSuperimposer()

    RG = []
    OC -= OC.mean(axis=0)
    OC_RG = ((np.linalg.norm(OC,axis=1)**2).sum()/len(OC)) ** 0.5

    for cx in C:
        cx -= cx.mean(axis=0)

        rg_cx = ((np.linalg.norm(cx,axis=1)**2).sum()/len(cx)) ** 0.5
        RG.append(rg_cx)
        
        sup.set(OC,cx)
        sup.run()
        RMSD.append(sup.get_rms())
        org_RMSD.append(sup.get_init_rms())


    rot, tran = sup.get_rotran()
    cx = np.dot(cx, rot) + tran

    RMSD = np.array(RMSD)
    org_RMSD = np.array(org_RMSD)
    RG = np.array(RG)
    
    #print dir, RMSD[-20:].mean(), org_RMSD[-20:].mean(),RG[-20:].mean()
    print "{} {: 0.4f} {: 0.4f}".format(dir, RMSD[-200:].mean(),
                                      RG[-200:].mean() / OC_RG)
    

    '''
    from mpl_toolkits.mplot3d import Axes3D
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    ax.scatter(OC[:,0],OC[:,1],OC[:,2],'b')
    #ax.plot(OC[:,0],OC[:,1],OC[:,2],'k',alpha=0.5)

    ax.scatter(cx[:,0],cx[:,1],cx[:,2],color='r')
    #ax.plot(cx[:,0],cx[:,1],cx[:,2],'k',alpha=0.5)
    plt.show()
    exit()

    print OC
    #exit()
    '''
    
    np.savetxt(f_RMSD,RMSD)
    os.chdir(org_dir)

    return dir
Exemple #37
0
def tm_movement_2D(pdbs1, pdbs2, mode, data, gn_dictionary):
    string_mode = ["extracellular", "intracellular", "pocket", "middle"]
    intracellular = (mode == 1)
    print("COMPARISON", string_mode[mode])
    print(pdbs1)
    print("VS")
    print(pdbs2)

    distances_set1 = Distances()
    distances_set1.load_pdbs(pdbs1)
    distances_set1.filtered_gns = True

    distances_set2 = Distances()
    distances_set2.load_pdbs(pdbs2)
    distances_set2.filtered_gns = True

    conserved_set1 = distances_set1.fetch_conserved_gns_tm()
    conserved_set2 = distances_set2.fetch_conserved_gns_tm()
    conserved = [x for x in conserved_set2 if x in conserved_set1]

    gns = [[]] * 7
    middle_gpcr = [[]] * 7
    if mode <= 1: # Intracellular or Extracellular
        for i in range(0,7):
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if intracellular and i % 2 == 0: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            elif not intracellular and i % 2 == 1: # all even TMs (as # i+1)
                tm_only.reverse()
            if len(tm_only) < 3:
                print("too few residues")
                return []
            gns[i] = tm_only[0:3]

            for upwards in range(12, 6, -1):
                if len(tm_only) >= upwards:
                    middle_gpcr[i] = tm_only[(upwards-3):upwards]
                    break

        # INCLUDING References points from membrane middle of GPCR
        # ref_membrane_mid = {}
        # ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A
        # #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1
        # ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1
        # ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2
        # ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C
        # ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F
        #
        # middle_gpcr = ref_membrane_mid[data['gpcr_class']]
    elif mode == 2: # Major pocket (class A)
        ligand_references = [['1x39', '1x40','1x41'], ['2x56', '2x57','2x58'], ['3x31', '3x32', '3x33'], ['4x56', '4x57', '4x58'], ['5x43', '5x44', '5x45'], ['6x51', '6x52', '6x53'], ['7x39', '7x40', '7x41']]
        for i in range(0,7):
            gns[i] = [x for x in ligand_references[i] if x in conserved]
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if i % 2 == 1: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            if len(gns[i]) > 0:
                if i % 2 == 1: #all uneven TMs (as # = i+1)
                    start_pos = tm_only.index(gns[i][-1])
                else:
                    start_pos = tm_only.index(gns[i][0])

                gns[i] = tm_only[start_pos:(start_pos+3)]

                # Stay close for this as references
                #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)]
                for upwards in range(9, 6, -1):
                   if len(tm_only) >= (start_pos+upwards):
                       middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)]
                       continue
            else:
                if len(tm_only) < 9:
                    print("too few residues")
                    return []
                else:
                    #print("Refind",i, gns[i])
                    gns[i] = tm_only[0:3]
                    middle_gpcr[i] = tm_only[6:9]

                    # for upwards in range(15, 6, -1):
                    #     if len(tm_only) >= upwards:
                    #         middle_gpcr[i] = tm_only[(upwards-3):upwards]

        # # FILTER not conserved GNs
        # middle_gpcr = [[]] * 7
        # for i in range(0,7):
        #     tm_only = [x for x in conserved if x[0]==str(i+1)]
        #     if i % 2 == 0: #all uneven TMs (as # = i+1)
        #         tm_only.reverse()
        #
        #     if len(tm_only) < 3:
        #         print("too few residues")
        #         return []
        #
        #     middle_gpcr[i] = tm_only[0:3]
        #print(middle_gpcr)

    elif mode == 3: # Middle
        # References points from membrane middle of GPCR
        ref_membrane_mid = {}
        ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A
        #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1
        #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1
        ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['6x48', '6x49', '6x50'], ['7x47', '7x49']] # B1
        ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2
        ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C
        ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F

        membrane_mid = ref_membrane_mid[data['gpcr_class']]

        if data['gpcr_class'] != "001":
            inv_gn_dictionary = {v: k for k, v in gn_dictionary.items()}
            for index in range(len(membrane_mid)):
                membrane_mid[index] = [inv_gn_dictionary[res] for res in membrane_mid[index]]

        for i in range(0,7):
            gns[i] = [x for x in membrane_mid[i] if x in conserved]
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if i % 2 == 1: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            if len(gns[i]) > 0:
                if i % 2 == 1: #all uneven TMs (as # = i+1)
                    start_pos = tm_only.index(gns[i][-1])
                else:
                    start_pos = tm_only.index(gns[i][0])

                gns[i] = tm_only[start_pos:(start_pos+3)]

                # Stay close for this as references
                #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)]
                for upwards in range(6, 3, -1):
                   if len(tm_only) >= (start_pos+upwards):
                       middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)]
                       continue
            else:
                if len(tm_only) < 6:
                    print("too few residues")
                    return []
                else:
                    #print("Refind",i, gns[i])
                    gns[i] = tm_only[0:3]
                    middle_gpcr[i] = tm_only[3:6]

                    # for upwards in range(15, 6, -1):
                    #     if len(tm_only) >= upwards:
                    #         middle_gpcr[i] = tm_only[(upwards-3):upwards]

    # Merge the reference and the helper points
    gns_flat = [y for x in gns for y in x]
    middle_gpcr = [list(filter(lambda x: x in conserved and x not in gns_flat, tm_list)) for tm_list in middle_gpcr]
    # print(gns)
    # print(middle_gpcr)

    ends_and_middle = gns[:]
    ends_and_middle.extend(middle_gpcr)
    ends_and_middle_flat = [y for x in ends_and_middle for y in x]
    ends_and_middle_grouping = [x for x in range(0, len(ends_and_middle)) for y in ends_and_middle[x]]
    segment_order = [int(ends_and_middle[x][0][0])-1 for x in range(0, len(ends_and_middle))]

    distances_set1.filter_gns.extend([y for x in ends_and_middle for y in x])
    distances_set2.filter_gns = distances_set1.filter_gns
    distances_set1.fetch_distances_tm(distance_type = "HC")
    distances_set2.fetch_distances_tm(distance_type = "HC")


    membrane_data1 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)]
    membrane_data2 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)]
    for i in range(0,len(ends_and_middle_flat)-1):
        for j in range(i+1, len(ends_and_middle_flat)):
            if right_gn_order(ends_and_middle_flat[i], ends_and_middle_flat[j]):
                filter_key = ends_and_middle_flat[i] + "_" + ends_and_middle_flat[j]
            else:
                filter_key = ends_and_middle_flat[j] + "_" + ends_and_middle_flat[i]

            if ends_and_middle_flat[i] != ends_and_middle_flat[j]:
                membrane_data1[i][j] = sum(distances_set1.data[filter_key])/len(pdbs1)
                membrane_data1[j][i] = membrane_data1[i][j]
                membrane_data2[i][j] = sum(distances_set2.data[filter_key])/len(pdbs2)
                membrane_data2[j][i] = membrane_data2[i][j]

    # Identify most stable TMs by ranking the variations to all other helices
    membrane_data1 = np.array([np.array(x) for x in membrane_data1])
    membrane_data2 = np.array([np.array(x) for x in membrane_data2])
    diff_distances = [x[:] for x in [[0] * len(ends_and_middle)] * len(ends_and_middle)]
    for i in range(0,max(ends_and_middle_grouping)):
        for j in range(i+1, max(ends_and_middle_grouping)+1):
            # Calculate movements for each TM relative to their "normal" distance
            # selected residues for group 1 and 2
            group_1 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i]
            group_2 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == j]

            diff_distances[i][j] = np.sum(abs(membrane_data1[group_1][:, group_2] - membrane_data2[group_1][:, group_2]))/(np.sum(membrane_data1[group_1][:, group_2]+membrane_data2[group_1][:, group_2])/2)*100
            diff_distances[j][i] = diff_distances[i][j]

    # Ranking for each TM
    sum_differences = [sum(x) for x in diff_distances]
    # normalized_differences = [((sum_differences[i]-min(sum_differences[0:7]))/(max(sum_differences[0:7])-min(sum_differences[0:7])))**2 for i in range(0,7)]
    for i in range(0,7):
        diff_distances[i] = [sorted(diff_distances[i]).index(x) for x in diff_distances[i]]
    final_rank = [sum([diff_distances[j][i] for j in range(0,7)]) for i in range(0,7)]

    # Grab stable TMs
    tm_ranking = [0] * 7
    sorted_rank = sorted(final_rank)
    for i in range(0,7):
        tm_ranking[i] = final_rank.index(sorted_rank[i])
        final_rank[tm_ranking[i]] = 100 # make sure this TM isn't repeated

    # Calculate 3D coordinates from distance matrix
    tms_centroids_set1, tms_set1 = recreate3Dorder(membrane_data1, ends_and_middle_grouping)
    tms_centroids_set2, tms_set2 = recreate3Dorder(membrane_data2, ends_and_middle_grouping)

    # Align 3D points of set2 with 3D points of set1 using the most stable reference points
    best_rmsd = 1000
    best_set = []
    # Disabled the testing RMSD for now
    for comb in combinations(tm_ranking[:3], 3):
    #for comb in combinations(tm_ranking[:4], 3):
        sel_refs = [x for x in range(0,len(segment_order)) if segment_order[x] in comb]
        #print(sel_refs)

        tms_reference_set1 = np.array(tms_centroids_set1[sel_refs], copy = True)
        tms_reference_set2 = np.array(tms_centroids_set2[sel_refs], copy = True)

        imposer = SVDSuperimposer()
        imposer.set(tms_reference_set1, tms_reference_set2)
        imposer.run()
        rot, trans = imposer.get_rotran()
        rmsd = imposer.get_rms()

        print("RMSD", round(rmsd,2), tm_ranking)
        if rmsd < best_rmsd:
            best_set = comb
            best_rmsd = rmsd

    # Check for possible mirroring error
    test_set2 = np.dot(tms_centroids_set2, rot) + trans
    error = 0
    for i in tm_ranking[3:7]:
        if np.linalg.norm(test_set2[i] - tms_centroids_set1[i]) > 5:
            error += 1

    #if rmsd > 2:
    #if error >= 3 or rmsd > 2:
    if True:
        for i in range(0,len(tms_centroids_set2)):
            tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1

        # Align 3D points of set2 with 3D points of set1 using the most stable reference points
        tms_reference_set1 = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]]
        tms_reference_set2 = tms_centroids_set2[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]]

        imposer = SVDSuperimposer()
        imposer.set(tms_reference_set1, tms_reference_set2)
        imposer.run()
        new_rot, new_trans = imposer.get_rotran()
        new_rmsd = imposer.get_rms()
        print("RMSD2", round(new_rmsd,2))

        if new_rmsd < rmsd:
            rot = new_rot
            trans = new_trans
            rmsd = new_rmsd
        else:
            for i in range(0,len(tms_centroids_set2)):
                tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1

    # test_set2 = np.dot(tms_reference_set2, rot) + trans
    # for i in range(0,len(test_set2)):
    #     print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_reference_set1[i]]), "]")
    # for i in range(0,len(test_set2)):
    #     print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in test_set2[i]]), "]")
    #
    # print("############")
    # #test_set2 = np.dot(tms_centroids_set2, rot) + trans
    # test_set2 = np.array(tms_centroids_set2, copy = True)
    # for i in range(0,len(tms_centroids_set1)):
    #     print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set1[i]]), "]")
    # for i in range(0,len(tms_centroids_set2)):
    #     print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set2[i]]), "]")

    # if rmsd > 2:
    #     for i in range(0,len(tms_centroids_set2)):
    #         tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1
    #     # Huge error during alignment of "stable" helices, just use the references not the helper points
    #     tms_reference_set1 = tms_centroids_set1[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]]
    #     tms_reference_set2 = tms_centroids_set2[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]]
    #     imposer = SVDSuperimposer()
    #     imposer.set(tms_reference_set1, tms_reference_set2)
    #     imposer.run()
    #     rot, trans = imposer.get_rotran()
    #     rmsd = imposer.get_rms()
    #     print("RMSD3", round(rmsd,2))
    #

    tms_centroids_set2 = np.dot(tms_centroids_set2, rot) + trans
    tms_set2 = np.dot(tms_set2, rot) + trans

    # Calculate optimal plane through points in both sets and convert to 2D
    # Try normal based on TM7
    # tm7_centroids = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] == 6]]
    # if len(tm7_centroids) == 2:
    #     normal = (tm7_centroids[1] - tm7_centroids[0])/np.linalg.norm(tm7_centroids[1] - tm7_centroids[0])
    # else:
    #     # Using TM mid as reference plane
    #     normal, midpoint = calculatePlane(np.concatenate((tms_centroids_set1[7:], tms_centroids_set2[7:])), intracellular)

    # Alternative: use center of helical ends and center of helical middle
    #    normal = tms_centroids_set1[:7].mean(axis=0)  - tms_centroids_set1[7:].mean(axis=0)
    #    normal = normal/np.linalg.norm(normal)

    # 7TM references
    tm_centroids = {y:[] for y in range(0,7)}
    [tm_centroids[y].append(tms_centroids_set1[x]) for y in range(0,7) for x in range(0,len(segment_order)) if segment_order[x] == y]
    count = 0
    normal = np.array([0.0,0.0,0.0])
    for y in range(0,7):
        #if len(tm_centroids[y]) == 2 and (mode != 1 or y != 5):
        if len(tm_centroids[y]) == 2:
            normal += np.array((tm_centroids[y][1] - tm_centroids[y][0])/np.linalg.norm(tm_centroids[y][1] - tm_centroids[y][0]))
            count += 1
    normal = normal/count

    midpoint = tms_centroids_set1[:7].mean(axis=0)

    #plane_set1, z_set1 = convert3D_to_2D_plane(tms_centroids_set1[:7], intracellular, normal, midpoint)
    #plane_set2, z_set2 = convert3D_to_2D_plane(tms_centroids_set2[:7], intracellular, normal, midpoint)
    plane_set, z_set = convert3D_to_2D_plane(np.concatenate((tms_centroids_set1[:7], tms_centroids_set2[:7]), axis = 0), intracellular, normal, midpoint)
    plane_set1 = plane_set[:7]
    plane_set2 = plane_set[7:]
    z_set1 = z_set[:7]
    z_set2 = z_set[7:]

    # DO NOT REMOVE: possibly we want to upgrade to weighted superposing
    # Based on Biopython SVDSuperimposer
    # coords = tms_centroids_set2
    # reference_coords = tms_centroids_set1

    # OLD centroid calcalation
    # av1 = sum(coords) / len(coords)
    # av2 = sum(reference_coords) / len(reference_coords)

    # NEW weighted centroid calculation
    # print(normalized_differences)
    # av1, av2 = 0, 0
    # totalweight = 0
    # for i in range(0,7):
    #     # print("Round",i)
    #     #weight = 1+(7-tm_ranking.index(i))/7
    #     weight = (1-normalized_differences[i]+0.1)/1.1
    #     totalweight += weight
    #     print("TM", str(i+1), "weight",weight)
    #     av1 += coords[i]*weight
    #     av2 += reference_coords[i]*weight
    #
    # av1 = av1/totalweight
    # av2 = av2/totalweight
    #
    # coords = coords - av1
    # reference_coords = reference_coords - av2
    #
    # # correlation matrix
    # a = np.dot(np.transpose(coords), reference_coords)
    # u, d, vt = np.linalg.svd(a)
    # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u)))
    # # check if we have found a reflection
    # if np.linalg.det(rot) < 0:
    #     vt[2] = -vt[2]
    #     rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u)))
    # trans = av2 - np.dot(av1, rot)
    # rot, trans = imposer.get_rotran()
    # tms_set2 = np.dot(tms_set2, rot) + trans

    # CURRENT: Ca-angle to axis core
    rotations = [0] * 7
    for i in range(0,7):
        try:
            # rotations[i] = [data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1] if abs(data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1]) < 180 else -1*data['tab4'][gn_dictionary[x]]['angles_set2'][1]-data['tab4'][gn_dictionary[x]]['angles_set1'][1] for x in gns[i]]
            angles1 = [data['tab4'][gn_dictionary[x]]['angles_set1'][11] for x in gns[i]]
            angles1 = [angle if angle > 0 else angle + 360 for angle in angles1 ]
            angles2 = [data['tab4'][gn_dictionary[x]]['angles_set2'][11] for x in gns[i]]
            angles2 = [angle if angle > 0 else angle + 360 for angle in angles2 ]

            rotations[i] = [angles1[x] - angles2[x] for x in range(3)]
            rotations[i] = [value if abs(value) <= 180 else value-360 if value > 0 else value+360 for value in rotations[i]]

            # count=0
            # for x in gns[i]:
            #     print(i, x, data['tab4'][gn_dictionary[x]]['angles_set1'][11], data['tab4'][gn_dictionary[x]]['angles_set2'][11], rotations[i][count])
            #     count += 1

        except:
            rotations[i] = [0.0, 0.0, 0.0]  # TODO: verify other class B errors

        # UPDATE 20-02-2020 No mirroring but top-down through GPCR
        rotations[i] = sum(rotations[i])/3
        # if intracellular:
        #     rotations[i] = -1*sum(rotations[i])/3
        # else:
        #     rotations[i] = sum(rotations[i])/3


    # ALTERNATIVE: utilize TM tip alignment (needs debugging as some angles seem off, e.g. GLP-1 active vs inactive TM2)
    # Add rotation angle based on TM point placement
    # tms_2d_set1, junk = convert3D_to_2D_plane(tms_set1, intracellular, normal, midpoint)
    # tms_2d_set2, junk = convert3D_to_2D_plane(tms_set2, intracellular, normal, midpoint)

    # rotations = [0] * 7
    # for i in range(0,7):
    #     positions = [x for x in range(0, len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i]
    #     turn_set1 = tms_2d_set1[positions]
    #     turn_set2 = tms_2d_set2[positions]
    #
    #     # set to middle
    #     turn_set1 = turn_set1 - turn_set1.mean(axis=0)
    #     turn_set2 = turn_set2 - turn_set2.mean(axis=0)
    #
    #     # Calculate shift per residue and take average for this TM
    #     for j in range(0,len(turn_set1)):
    #         v1 = turn_set1[j]/np.linalg.norm(turn_set1[j])
    #         v2 = turn_set2[j]/np.linalg.norm(turn_set2[j])
    #         angle = np.degrees(np.arctan2(v2[1], v2[0]) - np.arctan2(v1[1],v1[0]))
    #
    #         if abs(angle) > 180:
    #             angle = 360 - abs(angle)
    #
    #         rotations[i] += angle/len(turn_set1)

    # TODO: check z-coordinates orientation
    # Step 1: collect movement relative to membrane mid
    # Step 2: find min and max TM
    # Step 3: check if orientation of min/max TM matches the z-scales + intra/extra - if not invert z-coordinates
    labeled_set1 = [{"label": "TM"+str(i+1), "x": float(plane_set1[i][0]), "y": float(plane_set1[i][1]), "z": float(z_set1[i]), "rotation" : 0} for i in range(0,7)]
    labeled_set2 = [{"label": "TM"+str(i+1), "x": float(plane_set2[i][0]), "y": float(plane_set2[i][1]), "z": float(z_set2[i]), "rotation" : rotations[i]} for i in range(0,7)]

    # Convert used GNs to right numbering
    gns_used = gns[:]
    for i in range(0,len(gns)):
        for j in range(0,len(gns[i])):
            gns_used[i][j] = gn_dictionary[gns[i][j]]
    return {"coordinates_set1" : labeled_set1, "coordinates_set2": labeled_set2, "gns_used": gns_used}
def compute_mean (reader, align_conf, num_confs, start = None, stop = None):
    """
        Computes the mean structure of a trajectory

        Structured to work with the multiprocessing process from UTILS/parallelize.py

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to take the mean of.
            align_conf (numpy.array): The position of each particle in the reference configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.
        
        Returns:
            mean_pos_storage (numpy.array): For each particle, the sum of positions in all configurations read.
            mean_a1_storage (numpy.array): For each particle, the sum of a1 orientation vectors in all configuraitons read.
            mean_a3_storage (numpy.array): For each particle, the sum of a3 orientation vectors in all configuraitons read.
            intermediate_mean_structures (list): mean structures computed periodically during the summing to check decoorrelation.
            confid (int): the number of configurations summed for the storage arrays.
    """
    if stop is None:
        stop = num_confs
    else: stop = int(stop)
    if start is None:
        start = 0
    else: start = int(start)

    mysystem = reader._get_system(N_skip = start)

    # storage for the intermediate mean structures
    intermediate_mean_structures = []
    # the class doing the alignment of 2 structures
    sup = SVDSuperimposer()

    mean_pos_storage = np.array([np.zeros(3) for _ in range(n_nuc)])
    mean_a1_storage  = np.array([np.zeros(3) for _ in range(n_nuc)])
    mean_a3_storage  = np.array([np.zeros(3) for _ in range(n_nuc)])

    # for every conf in the current trajectory we calculate the global mean
    confid = 0

    while mysystem != False and confid < stop:
        mysystem.inbox()
        cur_conf_pos = fetch_np(mysystem)
        indexed_cur_conf_pos = indexed_fetch_np(mysystem)
        cur_conf_a1 =  fetch_a1(mysystem)
        cur_conf_a3 =  fetch_a3(mysystem)

        # calculate alignment
        sup.set(align_conf, indexed_cur_conf_pos)
        sup.run()
        rot, tran = sup.get_rotran()

        cur_conf_pos = np.einsum('ij, ki -> kj', rot, cur_conf_pos) + tran
        cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1)
        cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3)
        mean_pos_storage += cur_conf_pos
        mean_a1_storage += cur_conf_a1
        mean_a3_storage += cur_conf_a3

        # print the rmsd of the alignment in case anyone is interested...
        print("Frame:", confid, "Time:", mysystem._time, "RMSF:", sup.get_rms())
        # thats all we do for a frame
        confid += 1
        mysystem = reader._get_system()

        # We produce 10 intermediate means to check decorrelation.
        # This can't be done neatly in parallel
        if not parallel and confid % INTERMEDIATE_EVERY == 0:
            mp = np.copy(mean_pos_storage)
            mp /= confid
            intermediate_mean_structures.append(
                prep_pos_for_json(mp)
            )
            print("INFO: Calculated intermediate mean for {} ".format(confid))

    return(mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, confid)