Beispiel #1
0
    def __sub__(self, other):
        """Return rmsd between two fragments.

        :return: rmsd between fragments
        :rtype: float

        Examples
        --------
        >>> rmsd = fragment1 - fragment2

        """
        sup = SVDSuperimposer()
        sup.set(self.coords_ca, other.coords_ca)
        sup.run()
        return sup.get_rms()
Beispiel #2
0
def get_cov(reader, align_conf, num_confs, start=None, stop=None):
    """
        Performs principal component analysis on deviations from the mean structure

        Parameters:
            reader (readers.ErikReader): An active reader on the trajectory file to analyze.
            align_conf (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations_marix (numpy.array): The difference in position from the mean for each configuration.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)

    mysystem = reader.read(n_skip=start)

    covariation_matrix = np.zeros(
        (len(mysystem.positions) * 3, len(mysystem.positions) * 3))
    sup = SVDSuperimposer()
    confid = 0

    #for every configuration in the trajectory chunk, align it to the mean and compute positional difference for every particle
    while mysystem != False and confid < stop:
        print("-->", "frame", confid, "time={}".format(mysystem.time))
        mysystem.inbox()
        cur_conf = mysystem.positions
        sup.set(align_conf, cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        #equivalent to taking the dot product of the rotation array and every vector in the deviations array
        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        difference_matrix = (cur_conf - align_conf).flatten()
        covariation_matrix += np.einsum('i,j -> ij', difference_matrix,
                                        difference_matrix)

        confid += 1
        mysystem = reader.read()

    return covariation_matrix
def run_sup3d(coord1, coord2):
    sup = SVDSuperimposer()
    sup.set(
        np.array(coord1), np.array(coord2)
    )  #set is setting the group of coordinates because i have initialized SVD, it is empty
    sup.run(
    )  #superimpose the coordinates, run does all the work. Then we compute the RMSD between vc1 and vc2 after transformation
    rmsd = sup.get_rms()
    rot, tran = sup.get_rotran(
    )  #shows the matrix of rotation and vector for translation
    tcoord = sup.get_transformed()
    print rmsd
    print rot
    print tran
    print tcoord  #you obtain the set of coordinates to be superimposable to the se 1, so the set of coordinates after transformation.
    return
Beispiel #4
0
    def __sub__(self, other):
        """Return rmsd between two fragments.

        :return: rmsd between fragments
        :rtype: float

        Examples
        --------
        This is an incomplete but illustrative example::

            rmsd = fragment1 - fragment2

        """
        sup = SVDSuperimposer()
        sup.set(self.coords_ca, other.coords_ca)
        sup.run()
        return sup.get_rms()
Beispiel #5
0
def _superimpose_atoms(ref_points, points, atoms):
    if ref_points is None or points is None or atoms is None:
        return (None, None, None, None)
    ref_vec = []
    vec = []
    for a in atoms:
        if a in ref_points and a in points:
            ref_vec.append(ref_points[a])
            vec.append(points[a])
    if len(vec) < 3:
        return (None, None, None, None)
    sup = SVDSuperimposer()
    sup.set(np.array(ref_vec, 'f'), np.array(vec, 'f'))
    sup.run()
    (rot, tran) = sup.get_rotran()
    rms = sup.get_rms()
    return (_apply_rot_tran(points, rot, tran), rot, tran, rms)
    def setUp(self):
        self.x = array([
            [51.65, -1.90, 50.07],
            [50.40, -1.23, 50.65],
            [50.68, -0.04, 51.54],
            [50.22, -0.02, 52.85],
        ])

        self.y = array([
            [51.30, -2.99, 46.54],
            [51.09, -1.88, 47.58],
            [52.36, -1.20, 48.03],
            [52.71, -1.18, 49.38],
        ])

        self.sup = SVDSuperimposer()
        self.sup.set(self.x, self.y)
def get_pca(reader, align_conf, num_confs, start=None, stop=None):
    """
        Performs principal component analysis on deviations from the mean structure

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations_marix (numpy.array): The difference in position from the mean for each configuration.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)

    mysystem = reader._get_system(N_skip=start)

    deviations_matrix = np.empty((stop, (len(align_conf)) * 3))
    sup = SVDSuperimposer()
    confid = 0

    #for every configuration in the trajectory chunk, align it to the mean and compute positional difference for every particle
    while mysystem != False and confid < stop:
        print("-->", mysystem._time)
        mysystem.inbox()
        cur_conf = fetch_np(mysystem)
        sup.set(align_conf, cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        #equivalent to taking the dot product of the rotation array and every vector in the deviations array
        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        deviations_matrix[confid] = (cur_conf - align_conf).flatten()

        confid += 1
        mysystem = reader._get_system()

    return deviations_matrix
Beispiel #8
0
 def getRMSD(self, nSeqUnbound, seqUnboundAli, nSeqBound, seqBoundAli):
     '''
 Computes rmsd for nSeqUnbound chain unbound and nSeqBound bound chain
 @param nSeqUnbound: int. The index of the bound sequence that will be aligned
 @param seqUnboundAli: str. The alignment result for unbound sequence number nSeqUnbound
 @param nSeqBound: int. The index of the bound sequence that will be aligned
 @param seqBoundAli: str. The alignment result for bound sequence number nSeqBound
 @return rmsd. float. Root mean square deviation of CA of both imput chains
 @return boundToUnboundResDict. {Bio.PDB.Residue_bound --> Bio.PDB.Residue_unbound}
 '''
     boundToUnboundResDict, atomBoundToUnboundMap = self.build2SeqsDictMap(
         nSeqUnbound, seqUnboundAli, nSeqBound, seqBoundAli)
     atoms_x, atoms_y = zip(*atomBoundToUnboundMap)
     coords_x = np.array([elem.get_coord() for elem in atoms_x])
     coords_y = np.array([elem.get_coord() for elem in atoms_y])
     sup = SVDSuperimposer()
     rmsd = sup._rms(coords_x, coords_y)
     #    print(boundToUnboundResDict)
     return rmsd, boundToUnboundResDict
Beispiel #9
0
def sel_straight(coords_arr, n_cc_helices):
    n_atoms_mono = int(coords_arr[0].shape[0] / n_cc_helices)
    chain_rmss = []
    for coords in coords_arr:

        hi_all = []
        for i in range(n_cc_helices):
            hi_all.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono])

        rmss = []
        for i in range(n_cc_helices - 1):
            sup = SVDSuperimposer()
            sup.set(hi_all[i], hi_all[i + 1])
            sup.run()
            rms = sup.get_rms()
            rmss.append(rms)
        chain_rmss.append(np.mean(rmss))

    return np.argmin(chain_rmss), np.min(chain_rmss)
Beispiel #10
0
def computeRMSD():
    if len(ca_atoms) != len(ca_atoms_pdb):
        print "Error. Length mismatch!"
        exit()
    l = len(ca_atoms)

    fixed_coord = numpy.zeros((l, 3))
    moving_coord = numpy.zeros((l, 3))

    for i in range(0, l):
        fixed_coord[i] = numpy.array(
            [ca_atoms_pdb[i][0], ca_atoms_pdb[i][1], ca_atoms_pdb[i][2]])
        moving_coord[i] = numpy.array(
            [ca_atoms[i][0], ca_atoms[i][1], ca_atoms[i][2]])
    sup = SVDSuperimposer()
    sup.set(fixed_coord, moving_coord)
    sup.run()
    rms = sup.get_rms()
    return rms
Beispiel #11
0
    def set_atoms(self, fixed, moving):
        """Put (translate/rotate) the atoms in fixed on the atoms in
        moving, in such a way that the RMSD is minimized.

        :param fixed: list of (fixed) atoms
        :param moving: list of (moving) atoms
        :type fixed,moving: [L{Atom}, L{Atom},...]
        """
        if not len(fixed) == len(moving):
            raise PDBException("Fixed and moving atom lists differ in size")
        length = len(fixed)
        fixed_coord = numpy.zeros((length, 3))
        moving_coord = numpy.zeros((length, 3))
        for i in range(0, length):
            fixed_coord[i] = fixed[i].get_coord()
            moving_coord[i] = moving[i].get_coord()
        sup = SVDSuperimposer()
        sup.set(fixed_coord, moving_coord)
        sup.run()
        self.rms = sup.get_rms()
        self.rotran = sup.get_rotran()
Beispiel #12
0
    def __init__(self, static, moving):
        """
        Align two structures

        :param static: the reference structure
        :param moving: the structure to the aligned to the reference
        """
        sup = SVDSuperimposer()
        sup.set(np.asarray(static), np.asarray(moving))
        sup.run()

        rot, trans = sup.get_rotran()

        self.rms = sup.get_rms()

        self.static = static

        self.moving = [
            np.dot(np.asarray(moving[atom]), rot) + trans
            for atom in range(len(moving))
        ]
Beispiel #13
0
def align(predicted, gt):
    """
    # Grid search through scales for affine alignment.
    scale_range = np.arange(0.9, 1.1, 0.05)
    best_drmsd = float("inf")
    best_sx = 1
    best_sy = 1
    best_sz = 1
    for sx in scale_range:
        for sy in scale_range:
            for sz in scale_range:
                sup = SVDSuperimposer()
                scaling = np.diag([sx, sy, sz])
                scaled_predicted = np.dot(np.array(predicted), scaling)
                sup.set(np.array(gt), scaled_predicted)
                sup.run()
                rms = sup.get_rms()
                rot, tran = sup.get_rotran()
                b = sup.get_transformed()
                a = np.array(gt)
                drmsd = compute_drmsd(a, b)
                if drmsd < best_drmsd:
                    best_drmsd = drmsd
                    best_sx = sx
                    best_sy = sy
                    best_sz = sz
    """
    best_sx = 1
    best_sy = 1
    best_sz = 1

    # Use best sx, sy, sz to perform final alignment.
    sup = SVDSuperimposer()
    scaling = np.diag([best_sx, best_sy, best_sz])
    scaled_predicted = np.dot(np.array(predicted), scaling)
    sup.set(np.array(gt), scaled_predicted)
    sup.run()
    predicted = sup.get_transformed()
    return predicted
Beispiel #14
0
def align(coordinate_file):
    '''
	1. Input: File contains lines, where each line contains the coordinates of a model,
	e.g., if model 1 has 70 atoms, each with 3 coordinates  (3*70 = 210 coordinates),
	then the line corresponding model 1 is like this:  210 x1 y1 z1 x2 y2 z2 ... x70 y70 z70

	2. Alignes all the model with the first model in the cordinate_file.

	3. Returns: a dictionary of aligned models. Each model, i.e., each entry (value)
	in the dictionary is a flattened numpy array.

	'''

    modelDict = {}
    ind = 0
    ref = []
    sup = SVDSuperimposer()
    with open(coordinate_file) as f:
        for line in f:
            if ind == 0:
                l = [float(t) for t in line.split()]
                l = l[1:]
                samples = [l[i:i + 3] for i in range(0, len(l), 3)]
                ref = array(samples, 'f')

                modelDict[ind] = np.ravel(ref)
                ind += 1
            else:
                l = [float(t) for t in line.split()]
                l = l[1:]
                samples = [l[i:i + 3] for i in range(0, len(l), 3)]
                seq = array(samples, 'f')
                s = sup.set(ref, seq)
                sup.run()
                z = sup.get_transformed()
                modelDict[ind] = np.ravel(z)
                ind += 1
    return modelDict, ref
Beispiel #15
0
def calc_DockQ(model, native, use_CA_only=False):

    exec_path = os.path.dirname(os.path.abspath(sys.argv[0]))
    atom_for_sup = ['CA', 'C', 'N', 'O']
    if (use_CA_only):
        atom_for_sup = ['CA']

    cmd_fnat = exec_path + '/fnat ' + model + ' ' + native + ' 5'
    #cmd_interface=exec_path + '/fnat ' + model + ' ' + native + ' 10 backbone'
    cmd_interface = exec_path + '/fnat ' + model + ' ' + native + ' 10'

    #fnat_out = os.popen(cmd_fnat).readlines()
    fnat_out = commands.getoutput(cmd_fnat)
    #    sys.exit()
    (fnat, nat_correct, nat_total, fnonnat, nonnat_count, model_total,
     interface5A) = parse_fnat(fnat_out)
    assert fnat != -1, "Error running cmd: %s\n" % (cmd_fnat)
    #    inter_out = os.popen(cmd_interface).readlines()
    inter_out = commands.getoutput(cmd_interface)
    (fnat_bb, nat_correct_bb, nat_total_bb, fnonnat_bb, nonnat_count_bb,
     model_total_bb, interface) = parse_fnat(inter_out)
    assert fnat_bb != -1, "Error running cmd: %s\n" % (cmd_interface)

    #print fnat
    #Use same interface as for fnat for iRMS
    #interface=interface5A

    # Start the parser
    pdb_parser = Bio.PDB.PDBParser(QUIET=True)

    # Get the structures
    ref_structure = pdb_parser.get_structure("reference", native)
    sample_structure = pdb_parser.get_structure("model", model)

    # Use the first model in the pdb-files for alignment
    # Change the number 0 if you want to align to another structure
    ref_model = ref_structure[0]
    sample_model = sample_structure[0]

    # Make a list of the atoms (in the structures) you wish to align.
    # In this case we use CA atoms whose index is in the specified range
    ref_atoms = []
    sample_atoms = []

    common_interface = []

    chain_res = {}

    #find atoms common in both sample and native
    atoms_def_sample = []
    atoms_def_in_both = []
    #first read in sample
    for sample_chain in sample_model:
        #        print sample_chain
        chain = sample_chain.id
        #        print chain
        for sample_res in sample_chain:
            # print sample_res
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            for a in atom_for_sup:
                atom_key = key + '.' + a
                if a in sample_res:
                    if atom_key in atoms_def_sample:
                        print atom_key + ' already added (MODEL)!!!'
                    atoms_def_sample.append(atom_key)

    #then read in native also present in sample
    for ref_chain in ref_model:
        chain = ref_chain.id
        for ref_res in ref_chain:
            #print ref_res
            if ref_res.get_id()[0] != ' ':  #Skip hetatm.
                #                print ref_res.get_id()
                continue
            resname = ref_res.get_id()[1]
            key = str(resname) + chain
            for a in atom_for_sup:
                atom_key = key + '.' + a
                if a in ref_res and atom_key in atoms_def_sample:
                    if atom_key in atoms_def_in_both:
                        print atom_key + ' already added (Native)!!!'
                    atoms_def_in_both.append(atom_key)


#    print atoms_def_in_both
    for sample_chain in sample_model:
        chain = sample_chain.id
        if chain not in chain_res.keys():
            chain_res[chain] = []
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            chain_res[chain].append(key)
            if key in interface:
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        sample_atoms.append(sample_res[a])
                common_interface.append(key)

    #print inter_pairs

    chain_ref = {}
    common_residues = []

    # Iterate of all chains in the model in order to find all residues
    for ref_chain in ref_model:
        # Iterate of all residues in each model in order to find proper atoms
        #  print dir(ref_chain)
        chain = ref_chain.id
        if chain not in chain_ref.keys():
            chain_ref[chain] = []
        for ref_res in ref_chain:
            if ref_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = ref_res.get_id()[1]
            key = str(resname) + chain

            #print ref_res
            #      print key
            # print chain_res.values()
            if key in chain_res[chain]:  # if key is present in sample
                #print key
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in ref_res and atom_key in atoms_def_in_both:
                        chain_ref[chain].append(ref_res[a])
                        common_residues.append(key)
                    #chain_sample.append((ref_res['CA'])
            if key in common_interface:
                # Check if residue number ( .get_id() ) is in the list
                # Append CA atom to list
                #print key
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    #print atom_key
                    if a in ref_res and atom_key in atoms_def_in_both:
                        ref_atoms.append(ref_res[a])

    #get the ones that are present in native
    chain_sample = {}
    for sample_chain in sample_model:
        chain = sample_chain.id
        if chain not in chain_sample.keys():
            chain_sample[chain] = []
        for sample_res in sample_chain:
            if sample_res.get_id()[0] != ' ':  #Skip hetatm.
                continue
            resname = sample_res.get_id()[1]
            key = str(resname) + chain
            if key in common_residues:
                for a in atom_for_sup:
                    atom_key = key + '.' + a
                    if a in sample_res and atom_key in atoms_def_in_both:
                        chain_sample[chain].append(sample_res[a])

        #if key in common_residues:
        #     print key
        #sample_atoms.append(sample_res['CA'])
        #common_interface.append(key)

    assert len(ref_atoms) != 0, "length of native is zero"
    assert len(sample_atoms) != 0, "length of model is zero"
    assert len(ref_atoms) == len(
        sample_atoms
    ), "Different number of atoms in native and model %d %d\n" % (
        len(ref_atoms), len(sample_atoms))

    super_imposer = Bio.PDB.Superimposer()
    super_imposer.set_atoms(ref_atoms, sample_atoms)
    super_imposer.apply(sample_model.get_atoms())

    # Print RMSD:
    irms = super_imposer.rms

    (chain1, chain2) = chain_sample.keys()

    ligand_chain = chain1
    receptor_chain = chain2
    len1 = len(chain_res[chain1])
    len2 = len(chain_res[chain2])

    assert len1 != 0, "%s chain has zero length!\n" % chain1
    assert len2 != 0, "%s chain has zero length!\n" % chain2

    class1 = 'ligand'
    class2 = 'receptor'
    if (len(chain_sample[chain1]) > len(chain_sample[chain2])):
        receptor_chain = chain1
        ligand_chain = chain2
        class1 = 'receptor'
        class2 = 'ligand'

    #print len1
    #print len2
    #print chain_sample.keys()

    #Set to align on receptor
    assert len(chain_ref[receptor_chain]) == len(
        chain_sample[receptor_chain]
    ), "Different number of atoms in native and model receptor (chain %c) %d %d\n" % (
        receptor_chain, len(
            chain_ref[receptor_chain]), len(chain_sample[receptor_chain]))

    super_imposer.set_atoms(chain_ref[receptor_chain],
                            chain_sample[receptor_chain])
    super_imposer.apply(sample_model.get_atoms())
    receptor_chain_rms = super_imposer.rms
    #print receptor_chain_rms
    #print dir(super_imposer)
    #print chain1_rms

    #Grep out the transformed ligand coords

    #print ligand_chain

    #print chain_ref[ligand_chain]
    #print chain_sample[ligand_chain]
    #l1=len(chain_ref[ligand_chain])
    #l2=len(chain_sample[ligand_chain])

    assert len(chain_ref[ligand_chain]) != 0 or len(
        chain_sample[ligand_chain]
    ) != 0, "Zero number of equivalent atoms in native and model ligand (chain %s) %d %d.\nCheck that the residue numbers in model and native is consistent\n" % (
        ligand_chain, len(
            chain_ref[ligand_chain]), len(chain_sample[ligand_chain]))

    assert len(chain_ref[ligand_chain]) == len(
        chain_sample[ligand_chain]
    ), "Different number of atoms in native and model ligand (chain %c) %d %d\n" % (
        ligand_chain, len(
            chain_ref[ligand_chain]), len(chain_sample[ligand_chain]))

    coord1 = np.array([atom.coord for atom in chain_ref[ligand_chain]])
    coord2 = np.array([atom.coord for atom in chain_sample[ligand_chain]])

    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])

    #print len(coord1)
    #print len(coord2)

    sup = SVDSuperimposer()
    Lrms = sup._rms(
        coord1,
        coord2)  #using the private _rms function which does not superimpose

    #super_imposer.set_atoms(chain_ref[ligand_chain], chain_sample[ligand_chain])
    #super_imposer.apply(sample_model.get_atoms())
    #coord1=np.array([atom.coord for atom in chain_ref[receptor_chain]])
    #coord2=np.array([atom.coord for atom in chain_sample[receptor_chain]])
    #Rrms= sup._rms(coord1,coord2)
    #should give same result as above line
    #diff = coord1-coord2
    #l = len(diff) #number of atoms
    #from math import sqrt
    #print sqrt(sum(sum(diff*diff))/l)
    #print np.sqrt(np.sum(diff**2)/l)
    DockQ = (float(fnat) + 1 / (1 + (irms / 1.5) * (irms / 1.5)) + 1 /
             (1 + (Lrms / 8.5) * (Lrms / 8.5))) / 3
    dict = {}
    dict['DockQ'] = DockQ
    dict['irms'] = irms
    dict['Lrms'] = Lrms
    dict['fnat'] = fnat
    dict['nat_correct'] = nat_correct
    dict['nat_total'] = nat_total

    dict['fnonnat'] = fnonnat
    dict['nonnat_count'] = nonnat_count
    dict['model_total'] = model_total

    dict['chain1'] = chain1
    dict['chain2'] = chain2
    dict['len1'] = len1
    dict['len2'] = len2
    dict['class1'] = class1
    dict['class2'] = class2

    return dict
Beispiel #16
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "superimposes one or more structures sharing a topology to a reference structure"
    )
    parser.add_argument('reference',
                        type=str,
                        nargs=1,
                        help="The reference configuration to superimpose to")
    parser.add_argument(
        'victims',
        type=str,
        nargs='+',
        help="The configuraitons to superimpose on the reference")
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Align to only a subset of particles from a space-separated list in the provided file'
    )
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "Bio"])

    #Get the reference files
    ref_dat = args.reference[0]

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(ref_dat) as r:
            indexes = list(range(len(r.read().positions)))

    #Create list of configurations to superimpose
    to_sup = []
    r = ErikReader(ref_dat)
    ref = r.read()
    ref.inbox()
    ref_conf = ref.positions[indexes]
    for i in args.victims:
        r = ErikReader(i)
        sys = r.read()
        sys.inbox()
        to_sup.append(sys)

    sup = SVDSuperimposer()

    #Run the biopython superimposer on each configuration and rewrite its configuration file
    for i, sys in enumerate(to_sup):
        indexed_cur_conf = sys.positions[indexes]
        sup.set(ref_conf, indexed_cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        sys.positions = np.einsum('ij, ki -> kj', rot, sys.positions) + tran
        sys.a1s = np.einsum('ij, ki -> kj', rot, sys.a1s)
        sys.a3s = np.einsum('ij, ki -> kj', rot, sys.a3s)
        sys.write_new("aligned{}.dat".format(i))
        print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
Beispiel #17
0
def compute_centroid(reader,
                     mean_structure,
                     indexes,
                     num_confs,
                     start=None,
                     stop=None):
    """
        Compares each structure to the mean and returns the one with the lowest RMSF

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            centroid (numpy.array): The positions corresponding to the structure with the lowest RMSF to the mean.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    lowest_rmsf = 100000  #if you have a larger number than this, we need to talk...
    centroid_candidate = np.zeros_like(mean_structure)
    centroid_a1 = np.zeros_like(mean_structure)
    centroid_a3 = np.zeros_like(mean_structure)

    mysystem = reader.read(n_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox()
        # calculate alignment transform
        cur_conf = mysystem.positions
        indexed_cur_conf = mysystem.positions[indexes]
        cur_conf_a1 = mysystem.a1s
        cur_conf_a3 = mysystem.a3s
        sup.set(mean_structure, indexed_cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()

        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1)
        cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3)
        RMSF = sup.get_rms()
        print("Frame number:", confid, "RMSF:", RMSF)
        if RMSF < lowest_rmsf:
            centroid_candidate = cur_conf
            centroid_a1 = cur_conf_a1
            centroid_a3 = cur_conf_a3
            lowest_rmsf = RMSF
            centroid_t = mysystem.time

        confid += 1
        mysystem = reader.read()

    return centroid_candidate, centroid_a1, centroid_a3, lowest_rmsf, centroid_t
def main():
    #handle commandline arguments
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description="Aligns each frame in a trajectory to the first frame")
    parser.add_argument('traj',
                        type=str,
                        nargs=1,
                        help="The trajectory file to align")
    parser.add_argument(
        'outfile',
        type=str,
        nargs=1,
        help='The name of the new trajectory file to write out')
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Align to only a subset of particles from a space-separated list in the provided file'
    )
    parser.add_argument(
        '-r',
        metavar='reference_structure',
        dest='reference_structure',
        nargs=1,
        help="Align to a provided configuration instead of the first frame.")
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "Bio"])

    #Parse command line arguments
    traj_file = args.traj[0]
    outfile = args.outfile[0]
    sup = SVDSuperimposer()

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(traj_file) as r:
            indexes = list(range(len(r.read().positions)))

    #-r will make it align to a provided .dat file instead of the first configuration
    if args.reference_structure:
        #read reference configuration
        r = ErikReader(args.reference_structure[0])
        ref = r.read()
        ref.inbox()
        r = ErikReader(traj_file)
        ref_conf = ref.positions[indexes]

        mysystem = align_frame(ref_conf, sup, r.read())

    else:
        #read the first configuration and use it as the reference configuration for the rest
        r = ErikReader(traj_file)
        mysystem = r.read()
        mysystem.inbox()
        ref_conf = mysystem.positions[indexes]

    #write first configuration to output file
    mysystem.write_new(outfile)
    mysystem = r.read()

    #Read the trajectory one configuration at a time and perform the alignment
    while mysystem != False:
        print("working on t = ", mysystem.time)

        mysystem = align_frame(ref_conf, sup, mysystem, indexes)

        mysystem.write_append(outfile)

        mysystem = r.read()
Beispiel #19
0
def assemble_multiscale_visualization(topology_fn, rmf_fn, pdb_dir,
                                      outprefix=None, chimerax=True,
                                      xl_fn=None):
    """
    Render multiscale versions of rigid bodies from PDB files + flexible
    beads from RMF files w/o mapped crosslinks.
    
    Args: 
    topology_fn (str): Topolgy file in pipe-separated-value (PSV) format
    as required in integrative modeling using IMP. For details on how
    to write a topology file, see:
    https://integrativemodeling.org/2.13.0/doc/ref/classIMP_1_1pmi_1_1topology_1_1TopologyReader.html
        
    rmf_fn (str): Name of the RMF file.
    
    pdb_dir (str): Directory containing all the PDB files for the rigid
    bodies used in modeling.
    
    outprefix (str, optional): Prefix for output files. Defaults to None.
    
    chimerax (bool, optional): If true, a Chimerax script will be written (extension ".cxc"). Defaults to True.
    
    xl_fn (str, optional): A file containing a XL dataset. Defaults to None.
    If this dataset is supplied, then it will be mapped on to the overall 
    structure with satisfied XLs drawn in blue and violated XLs drawn in red.
    A XL dataset should be supplied in a comma-separated-value (CSV) format
    containing at least the following fields
    
    protein1, residue1, protein2, residue2, sat
    
    where the last field <sat> is a boolean 1 or 0 depending on whether
    the particular XL is satisfied (in the ensemble sense) as a result of the
    integrative modeling exercise.
    """
    
    # -------------------------------------------
    # read the RMF file and extract all particles
    # -------------------------------------------
    of = RMF.open_rmf_file_read_only(rmf_fn)
    rmf_model = IMP.Model()
    hier = IMP.rmf.create_hierarchies(of, rmf_model)[0]
    IMP.rmf.load_frame(of, 0)
    particles = IMP.core.get_leaves(hier)
    rmf_ps = {}
    for p in particles:
        molname = p.get_parent().get_parent().get_parent().get_name().strip()
        name = p.get_name().strip()
        coord = IMP.core.XYZ(p).get_coordinates()
        rmf_ps[(molname, name)] = coord
        
    # --------------------------------------------------------------
    # map pdb residues to rmf particles for each rigid body pdb file
    # --------------------------------------------------------------
    # read the topology file
    t = TopologyReader(topology_fn, pdb_dir=pdb_dir)
    components = t.get_components()

    map_pdb2rmf = {}
    rigid_body_models = {}
    rigid_body_residues = {}
    chain_ids = {} # these are matched to the chimerax rmf plugin
    chain_id_count = 0
    for c in components:
        # ignore unstructured residues
        if c.pdb_file == "BEADS": continue
        mol = c.molname
        pdb_prefix = os.path.basename(c.pdb_file).split(".pdb")[0]
        chain_id = c.chain
        resrange = c.residue_range
        offset = c.pdb_offset
        
        r0 = resrange[0] + offset
        r1 = resrange[1] + 1 + offset
        
        if mol not in chain_ids:
            chain_ids[mol] = string.ascii_uppercase[chain_id_count]
            chain_id_count += 1
        
        if pdb_prefix not in map_pdb2rmf:
            map_pdb2rmf[pdb_prefix] = {}
            this_rigid_body_model = PDBParser().get_structure("x", c.pdb_file)[0]
            this_rigid_body_residues = {(r.full_id[2], r.id[1]): r for r in this_rigid_body_model.get_residues()}
            rigid_body_models[pdb_prefix] = this_rigid_body_model
            rigid_body_residues[pdb_prefix] = this_rigid_body_residues
            
        for r in range(r0, r1):
            key = (chain_id, r)
            val = (mol, r)
            if key in rigid_body_residues[pdb_prefix]:
                map_pdb2rmf[pdb_prefix][key] = val
    
    # --------------------------------
    # align all pdb files with the rmf
    # --------------------------------
    print("\nAligning all rigid body structures...")
    align = SVDSuperimposer()
    for pdb_prefix, mapper in map_pdb2rmf.items():
        pdb_coords = []
        pdb_atoms = []
        rmf_coords = []
        
        residues = rigid_body_residues[pdb_prefix]
        for (chain, pdb_res), (mol, rmf_res) in mapper.items():
            r = residues[(chain, pdb_res)]
            pdb_coords.append(r["CA"].coord)
            pdb_atoms.extend([a for a in r.get_atoms()])
            rmf_coords.append(rmf_ps[(mol, str(rmf_res))])        
                 
        pdb_coords = np.array(pdb_coords)
        rmf_coords = np.array(rmf_coords)
        align.set(rmf_coords, pdb_coords)
        align.run()
        rotmat, vec = align.get_rotran()
        [a.transform(rotmat, vec) for a in pdb_atoms]
  
    # --------------------------
    # assemble the composite pdb
    # --------------------------
    mols = set(sorted([c.molname for c in components]))
    print("\nChain IDs by molecule:")
    for k, v in chain_ids.items():
        print("molecule %s, chain ID %s" % (k, v))
    
    reslists = {mol: [] for mol in mols}
    for pdb_prefix, mapper in map_pdb2rmf.items():
        residues = rigid_body_residues[pdb_prefix]
        for (chain, pdb_res), (mol, rmf_res) in mapper.items():
            r = residues[(chain, pdb_res)] ; resid = rmf_res
            new_id = (r.id[0], resid, r.id[2])
            new_resname = r.resname
            new_segid = r.segid
            new_atoms = r.get_atoms()
            new_residue = Residue.Residue(id=new_id, resname=new_resname, segid=new_segid)
            [new_residue.add(a) for a in new_atoms]
            reslists[mol].append(new_residue)
    
    composite_model = Model.Model(0)
    for mol, chain_id in chain_ids.items():
        this_residues = sorted(reslists[mol], key=lambda r: r.id[1])
        this_chain = Chain.Chain(chain_id)
        [this_chain.add(r) for r in this_residues]
        composite_model.add(this_chain)
    
    # save the composite pdb to file
    io = PDBIO()
    io.set_structure(composite_model)
    if outprefix is None:
        outprefix = "centroid_model"
    io.save(outprefix + ".pdb")

    # -------------------------------------------------------------------
    # chimerax rendering (hide most of the rmf except unstructured beads)
    # -------------------------------------------------------------------
    if not chimerax: exit()
    print("\nWriting UCSF Chimerax script...")
    s = ""
    s += "open %s\n" % (outprefix + ".pdb")
    s += "open %s\n" % rmf_fn
    s += "hide\n"
    s += "show cartoon\n"
    s += "color #%d %s\n" % (CHIMERAX_PDB_MODEL_NUM, STRUCT_COLOR)
    s += "color #%d %s\n" % (CHIMERAX_RMF_MODEL_NUM, UNSTRUCT_COLOR)
    s += "hide #%d\n" % CHIMERAX_RMF_MODEL_NUM
    
    struct_residues = []
    for key, val in map_pdb2rmf.items():
        struct_residues.extend(list(val.values()))
    
    unstruct_atomspec = {}
    for p in rmf_ps:
        molname, particle_name = p
        rmf_chain_id = chain_ids[molname]
        if "bead" in particle_name:
            r0, r1 = particle_name.split("_")[0].split("-")
            r0 = int(r0) ; r1 = int(r1)
            this_atomspec = "#%d/%s:%d-%d" % \
                            (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r0, r1)
            for r in range(r0, r1+1):
                unstruct_atomspec[(molname, r)] = this_atomspec
        else:
            if (molname, int(particle_name)) not in struct_residues:
                r = int(particle_name)
                this_atomspec = "#%d/%s:%d" % \
                (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r)
                unstruct_atomspec[(molname, r)] = this_atomspec
                
    s += "show %s\n" % (" ".join(set(unstruct_atomspec.values())))

    # ----------------------------------------------------------
    # if crosslink data is supplied, write out a pseudobond file
    # ----------------------------------------------------------
    if xl_fn is not None:
        # parse XL data
        df = pd.read_csv(os.path.abspath(xl_fn))
        xls = []
        for i in range(len(df)):
            this_df = df.iloc[i]
            p1 = this_df["protein1"] ; r1 = this_df["residue1"]
            p2 = this_df["protein2"] ; r2 = this_df["residue2"]
            sat = this_df["sat"]
            xls.append((p1, r1, p2, r2, sat))
        
        # get lists of struct atomspecs
        atomspec = {}
        for (mol, particle_name) in rmf_ps:
            if "bead" in particle_name: continue
            if (mol, int(particle_name)) in unstruct_atomspec: continue
            chain_id = chain_ids[mol]
            resid = int(particle_name)
            atomspec[(mol, resid)] = "#%d/%s:%d@CA" % \
                                     (CHIMERAX_PDB_MODEL_NUM, chain_id, resid)
        
        # now add in all the unstruct atomspecs
        atomspec.update(unstruct_atomspec)

        # write pseudobond script
        s_pb = ""
        s_pb += "; radius = %2.2f\n" % XL_RADIUS
        s_pb += "; dashes = 0\n"
        for xl in xls:
            p1, r1, p2, r2, sat = xl
            atomspec_1 = atomspec[(p1, r1)]
            atomspec_2 = atomspec[(p2, r2)]
            if atomspec_1 == atomspec_2:
                continue
            color = SAT_XL_COLOR if sat else VIOL_XL_COLOR
            s_pb += "%s %s %s\n" % (atomspec_1, atomspec_2, color)
        s_pb += "\n"
        pb_fn = outprefix + "_XLs.pb"
        with open(pb_fn, "w") as of:
            of.write(s_pb)        
        s += "open %s\n" % pb_fn
            
    s += "preset 'overall look' publication\n"
    chimerax_out_fn = outprefix + ".cxc"
    with open(chimerax_out_fn, "w") as of:
        of.write(s)
Beispiel #20
0
def analyse(input_file_name,
            refer_file_name,
            moved_chain_id,
            fixed_chain_id,
            r_moved_chain_id,
            r_fixed_chain_id,
            output_file1,
            output_file2,
            r_model_number=0):

    structure = PDBParser(PERMISSIVE=1).get_structure('to_analyse',
                                                      input_file_name)
    reference = PDBParser(PERMISSIVE=1).get_structure('reference',
                                                      refer_file_name)

    r_chain_moved = reference[r_model_number][r_moved_chain_id]
    r_chain_fixed = reference[r_model_number][r_fixed_chain_id]

    theta = []
    phi = []
    theta_x = []
    theta_y = []
    theta_z = []
    d = []
    coords_x = []
    coords_y = []
    coords_z = []
    matrix_entries = [_[:] for _ in [[]] * 9]

    for model_number, model in enumerate(structure):
        chain_moved = structure[model_number][moved_chain_id]
        chain_fixed = structure[model_number][fixed_chain_id]
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)
        for atom in chain_moved.get_atoms():
            position = atom.get_vector()
            com_numerator += Vector(position._ar * np.array(atom.mass))
            com_denominator += atom.mass

        moved_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)
        for atom in chain_fixed.get_atoms():
            position = atom.get_vector()
            com_numerator += Vector(position._ar * np.array(atom.mass))
            com_denominator += atom.mass

        fixed_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        reference_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in r_chain_fixed.get_atoms()]
        ])
        coordinate_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in chain_fixed.get_atoms()]
        ])
        sup = SVDSuperimposer()
        sup.set(reference_set, coordinate_set)
        sup.run()
        R, V = sup.get_rotran()
        for atom in model.get_atoms():
            atom.transform(R, V)
        for atom in chain_moved.get_atoms():
            com_numerator += Vector(
                (atom.get_vector())._ar * np.array(atom.mass))
            com_denominator += atom.mass
        moved_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        for atom in chain_fixed.get_atoms():
            com_numerator += Vector(
                (atom.get_vector())._ar * np.array(atom.mass))
            com_denominator += atom.mass
        fixed_centre = com_numerator.__div__(com_denominator)
        if fixed_centre.norm() > 0.5:
            print("Fixed chain norm is " + str(fixed_centre.norm()) +
                  " in model " + str(model_number) +
                  ". Should have been at the origin. Check code...")
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        x = moved_centre._ar[0]
        y = moved_centre._ar[1]
        z = moved_centre._ar[2]
        coords_x.append(x)
        coords_y.append(y)
        coords_z.append(z)

        d.append((moved_centre - fixed_centre).norm())
        if moved_centre.norm() > 1e-6:
            theta.append(moved_centre.angle(Vector(0, 0, 1)))
            norm = np.sqrt(x * x + y * y)
            if norm > 1e-6:
                phi.append(np.arctan2(y, x))
        else:
            theta.append(0.0)

        reference_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in r_chain_moved.get_atoms()]
        ])
        coordinate_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in chain_moved.get_atoms()]
        ])
        sup = SVDSuperimposer()
        sup.set(reference_set, coordinate_set)
        sup.run()
        R, V = sup.get_rotran()
        theta_x.append(np.arctan2(R[2][1], R[2][2]))
        theta_y.append(
            np.arctan2(-R[2][0],
                       np.sqrt(R[2][1] * R[2][1] + R[2][2] * R[2][2])))
        theta_z.append(np.arctan2(R[1][0], R[0][0]))
        for _ in range(3):
            matrix_entries[_].append(R[0][_])
            matrix_entries[_ + 3].append(R[1][_])
            matrix_entries[_ + 6].append(R[2][_])

    f_results1 = open(output_file1, "w+")
    for frame in range(0, len(structure)):
        f_results1.write(
            str(frame) + '\t' + str(d[frame]) + '\t' + str(theta[frame]) +
            '\t' + str(phi[frame]) + '\t' + str(theta_x[frame]) + '\t' +
            str(theta_y[frame]) + '\t' + str(theta_z[frame]) + '\n')
    f_results1.close()
    f_results2 = open(output_file2, "w+")
    for frame in range(0, len(structure)):
        f_results2.write(
            str(frame) + '\t' + str(coords_x[frame]) + '\t' +
            str(coords_y[frame]) + '\t' + str(coords_z[frame]) + '\t')
        for _ in range(3):
            f_results2.write(str(matrix_entries[_][frame]) + '\t')
            f_results2.write(str(matrix_entries[_ + 3][frame]) + '\t')
            f_results2.write(str(matrix_entries[_ + 6][frame]) + '\t')
        f_results2.write('\n')
    f_results2.close()
Beispiel #21
0
def merge_cc(coords_list, res_overlap, n_cc_helices):

    ref_coords = coords_list[0]
    aligned_coords = [deepcopy(coords_list[0])]
    n_atoms_per_res = 5
    n_atoms_mono = int(ref_coords.shape[0] / n_cc_helices)
    msds = []
    for coords, cc_overlap in zip(coords_list[1:], res_overlap):

        n_atoms_overlap = cc_overlap * n_atoms_per_res

        for i in range(n_cc_helices):
            hi_ref = ref_coords[(i + 1) * n_atoms_mono -
                                n_atoms_overlap:(i + 1) * n_atoms_mono]
            if i == 0:
                ref_atoms = hi_ref
            else:
                ref_atoms = np.append(ref_atoms, hi_ref, axis=0)

        for i in range(n_cc_helices):
            hi = coords[i * n_atoms_mono:i * n_atoms_mono + n_atoms_overlap]
            if i == 0:
                sup_atoms = hi
            else:
                sup_atoms = np.append(sup_atoms, hi, axis=0)

        sup = SVDSuperimposer()
        sup.set(ref_atoms, sup_atoms)
        sup.run()
        msds.append(sup.get_rms()**2)
        rot, tran = sup.get_rotran()
        coord_new = np.dot(coords, rot) + tran
        aligned_coords.append(coord_new)
        ref_coords = coord_new

    rmsd = np.sqrt(np.sum(msds))

    hi_all = []
    for i in range(n_cc_helices):
        hi_all.append(aligned_coords[0][i * n_atoms_mono:(i + 1) *
                                        n_atoms_mono])

    for coords, cc_overlap in zip(aligned_coords[1:], res_overlap):
        hi = []
        for i in range(n_cc_helices):
            hi.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono])

        n_atoms_overlap = cc_overlap * n_atoms_per_res
        for ind_overlap in range(cc_overlap):
            weight = (ind_overlap + 1) / float(cc_overlap + 1)
            for ind_atom in range(n_atoms_per_res):
                ind_shift = ind_overlap * n_atoms_per_res + ind_atom

                for i in range(n_cc_helices):
                    coordi_prev = hi_all[i][-n_atoms_overlap + ind_shift]
                    coordi_next = hi[i][ind_shift]
                    hi_all[i][-n_atoms_overlap + ind_shift] = (
                        1 - weight) * coordi_prev + weight * coordi_next

        for i in range(n_cc_helices):
            hi_rest = hi[i][n_atoms_overlap:]
            hi_all[i] = np.append(hi_all[i], hi_rest, axis=0)

    res_dimer = hi_all[0]
    for i in range(1, n_cc_helices):
        res_dimer = np.append(res_dimer, hi_all[i], axis=0)

    return res_dimer, rmsd
Beispiel #22
0
========================================================

Module contains functions that are used by two or more functions from
different modules.

Functions
---------

.. autofunction:: get_best_fit_rot_mat
"""
import os
import errno
import cStringIO
from Bio.SVDSuperimposer import SVDSuperimposer

superimpose_inst = SVDSuperimposer()


def get_best_fit_rot_mat(from_coord, to_coord):
    """
    Compute best-fit rotation matrix.

    The best-fit rotation matrix rotates from_coord such that the RMSD
    between the 2 sets of coordinates are minimized after the rotation.

    Parameters
    ----------
    from_coord, to_coord : np.array
        Nx3 coordinate arrays, where N is the number of atoms. The
        from_coord will rotated such that the rotation will minimize
        the RMSD between the rotated from_coord and to_coord.
Beispiel #23
0
def super_prot(atom_coords_1, atom_coords_2):           #this function uses BioPython to derive the RMSD from the superimposition of the two atom coordinate lists
    sup = SVDSuperimposer()
    sup.set(atom_coords_1,atom_coords_2)
    sup.run()
    return(sup.get_rms())                               #CAREFUL!! its get_rms, not get_rmsd
Beispiel #24
0
def tm_movement_2D(pdbs1, pdbs2, mode, data, gn_dictionary):
    string_mode = ["extracellular", "intracellular", "pocket", "middle"]
    intracellular = (mode == 1)
    print("COMPARISON", string_mode[mode])
    print(pdbs1)
    print("VS")
    print(pdbs2)

    distances_set1 = Distances()
    distances_set1.load_pdbs(pdbs1)
    distances_set1.filtered_gns = True

    distances_set2 = Distances()
    distances_set2.load_pdbs(pdbs2)
    distances_set2.filtered_gns = True

    conserved_set1 = distances_set1.fetch_conserved_gns_tm()
    conserved_set2 = distances_set2.fetch_conserved_gns_tm()
    conserved = [x for x in conserved_set2 if x in conserved_set1]

    gns = [[]] * 7
    middle_gpcr = [[]] * 7
    if mode <= 1: # Intracellular or Extracellular
        for i in range(0,7):
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if intracellular and i % 2 == 0: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            elif not intracellular and i % 2 == 1: # all even TMs (as # i+1)
                tm_only.reverse()
            if len(tm_only) < 3:
                print("too few residues")
                return []
            gns[i] = tm_only[0:3]

            for upwards in range(12, 6, -1):
                if len(tm_only) >= upwards:
                    middle_gpcr[i] = tm_only[(upwards-3):upwards]
                    break

        # INCLUDING References points from membrane middle of GPCR
        # ref_membrane_mid = {}
        # ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A
        # #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1
        # ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1
        # ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2
        # ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C
        # ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F
        #
        # middle_gpcr = ref_membrane_mid[data['gpcr_class']]
    elif mode == 2: # Major pocket (class A)
        ligand_references = [['1x39', '1x40','1x41'], ['2x56', '2x57','2x58'], ['3x31', '3x32', '3x33'], ['4x56', '4x57', '4x58'], ['5x43', '5x44', '5x45'], ['6x51', '6x52', '6x53'], ['7x39', '7x40', '7x41']]
        for i in range(0,7):
            gns[i] = [x for x in ligand_references[i] if x in conserved]
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if i % 2 == 1: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            if len(gns[i]) > 0:
                if i % 2 == 1: #all uneven TMs (as # = i+1)
                    start_pos = tm_only.index(gns[i][-1])
                else:
                    start_pos = tm_only.index(gns[i][0])

                gns[i] = tm_only[start_pos:(start_pos+3)]

                # Stay close for this as references
                #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)]
                for upwards in range(9, 6, -1):
                   if len(tm_only) >= (start_pos+upwards):
                       middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)]
                       continue
            else:
                if len(tm_only) < 9:
                    print("too few residues")
                    return []
                else:
                    #print("Refind",i, gns[i])
                    gns[i] = tm_only[0:3]
                    middle_gpcr[i] = tm_only[6:9]

                    # for upwards in range(15, 6, -1):
                    #     if len(tm_only) >= upwards:
                    #         middle_gpcr[i] = tm_only[(upwards-3):upwards]

        # # FILTER not conserved GNs
        # middle_gpcr = [[]] * 7
        # for i in range(0,7):
        #     tm_only = [x for x in conserved if x[0]==str(i+1)]
        #     if i % 2 == 0: #all uneven TMs (as # = i+1)
        #         tm_only.reverse()
        #
        #     if len(tm_only) < 3:
        #         print("too few residues")
        #         return []
        #
        #     middle_gpcr[i] = tm_only[0:3]
        #print(middle_gpcr)

    elif mode == 3: # Middle
        # References points from membrane middle of GPCR
        ref_membrane_mid = {}
        ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A
        #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1
        #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1
        ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['6x48', '6x49', '6x50'], ['7x47', '7x49']] # B1
        ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2
        ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C
        ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F

        membrane_mid = ref_membrane_mid[data['gpcr_class']]

        if data['gpcr_class'] != "001":
            inv_gn_dictionary = {v: k for k, v in gn_dictionary.items()}
            for index in range(len(membrane_mid)):
                membrane_mid[index] = [inv_gn_dictionary[res] for res in membrane_mid[index]]

        for i in range(0,7):
            gns[i] = [x for x in membrane_mid[i] if x in conserved]
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if i % 2 == 1: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            if len(gns[i]) > 0:
                if i % 2 == 1: #all uneven TMs (as # = i+1)
                    start_pos = tm_only.index(gns[i][-1])
                else:
                    start_pos = tm_only.index(gns[i][0])

                gns[i] = tm_only[start_pos:(start_pos+3)]

                # Stay close for this as references
                #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)]
                for upwards in range(6, 3, -1):
                   if len(tm_only) >= (start_pos+upwards):
                       middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)]
                       continue
            else:
                if len(tm_only) < 6:
                    print("too few residues")
                    return []
                else:
                    #print("Refind",i, gns[i])
                    gns[i] = tm_only[0:3]
                    middle_gpcr[i] = tm_only[3:6]

                    # for upwards in range(15, 6, -1):
                    #     if len(tm_only) >= upwards:
                    #         middle_gpcr[i] = tm_only[(upwards-3):upwards]

    # Merge the reference and the helper points
    gns_flat = [y for x in gns for y in x]
    middle_gpcr = [list(filter(lambda x: x in conserved and x not in gns_flat, tm_list)) for tm_list in middle_gpcr]
    # print(gns)
    # print(middle_gpcr)

    ends_and_middle = gns[:]
    ends_and_middle.extend(middle_gpcr)
    ends_and_middle_flat = [y for x in ends_and_middle for y in x]
    ends_and_middle_grouping = [x for x in range(0, len(ends_and_middle)) for y in ends_and_middle[x]]
    segment_order = [int(ends_and_middle[x][0][0])-1 for x in range(0, len(ends_and_middle))]

    distances_set1.filter_gns.extend([y for x in ends_and_middle for y in x])
    distances_set2.filter_gns = distances_set1.filter_gns
    distances_set1.fetch_distances_tm(distance_type = "HC")
    distances_set2.fetch_distances_tm(distance_type = "HC")


    membrane_data1 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)]
    membrane_data2 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)]
    for i in range(0,len(ends_and_middle_flat)-1):
        for j in range(i+1, len(ends_and_middle_flat)):
            if right_gn_order(ends_and_middle_flat[i], ends_and_middle_flat[j]):
                filter_key = ends_and_middle_flat[i] + "_" + ends_and_middle_flat[j]
            else:
                filter_key = ends_and_middle_flat[j] + "_" + ends_and_middle_flat[i]

            if ends_and_middle_flat[i] != ends_and_middle_flat[j]:
                membrane_data1[i][j] = sum(distances_set1.data[filter_key])/len(pdbs1)
                membrane_data1[j][i] = membrane_data1[i][j]
                membrane_data2[i][j] = sum(distances_set2.data[filter_key])/len(pdbs2)
                membrane_data2[j][i] = membrane_data2[i][j]

    # Identify most stable TMs by ranking the variations to all other helices
    membrane_data1 = np.array([np.array(x) for x in membrane_data1])
    membrane_data2 = np.array([np.array(x) for x in membrane_data2])
    diff_distances = [x[:] for x in [[0] * len(ends_and_middle)] * len(ends_and_middle)]
    for i in range(0,max(ends_and_middle_grouping)):
        for j in range(i+1, max(ends_and_middle_grouping)+1):
            # Calculate movements for each TM relative to their "normal" distance
            # selected residues for group 1 and 2
            group_1 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i]
            group_2 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == j]

            diff_distances[i][j] = np.sum(abs(membrane_data1[group_1][:, group_2] - membrane_data2[group_1][:, group_2]))/(np.sum(membrane_data1[group_1][:, group_2]+membrane_data2[group_1][:, group_2])/2)*100
            diff_distances[j][i] = diff_distances[i][j]

    # Ranking for each TM
    sum_differences = [sum(x) for x in diff_distances]
    # normalized_differences = [((sum_differences[i]-min(sum_differences[0:7]))/(max(sum_differences[0:7])-min(sum_differences[0:7])))**2 for i in range(0,7)]
    for i in range(0,7):
        diff_distances[i] = [sorted(diff_distances[i]).index(x) for x in diff_distances[i]]
    final_rank = [sum([diff_distances[j][i] for j in range(0,7)]) for i in range(0,7)]

    # Grab stable TMs
    tm_ranking = [0] * 7
    sorted_rank = sorted(final_rank)
    for i in range(0,7):
        tm_ranking[i] = final_rank.index(sorted_rank[i])
        final_rank[tm_ranking[i]] = 100 # make sure this TM isn't repeated

    # Calculate 3D coordinates from distance matrix
    tms_centroids_set1, tms_set1 = recreate3Dorder(membrane_data1, ends_and_middle_grouping)
    tms_centroids_set2, tms_set2 = recreate3Dorder(membrane_data2, ends_and_middle_grouping)

    # Align 3D points of set2 with 3D points of set1 using the most stable reference points
    best_rmsd = 1000
    best_set = []
    # Disabled the testing RMSD for now
    for comb in combinations(tm_ranking[:3], 3):
    #for comb in combinations(tm_ranking[:4], 3):
        sel_refs = [x for x in range(0,len(segment_order)) if segment_order[x] in comb]
        #print(sel_refs)

        tms_reference_set1 = np.array(tms_centroids_set1[sel_refs], copy = True)
        tms_reference_set2 = np.array(tms_centroids_set2[sel_refs], copy = True)

        imposer = SVDSuperimposer()
        imposer.set(tms_reference_set1, tms_reference_set2)
        imposer.run()
        rot, trans = imposer.get_rotran()
        rmsd = imposer.get_rms()

        print("RMSD", round(rmsd,2), tm_ranking)
        if rmsd < best_rmsd:
            best_set = comb
            best_rmsd = rmsd

    # Check for possible mirroring error
    test_set2 = np.dot(tms_centroids_set2, rot) + trans
    error = 0
    for i in tm_ranking[3:7]:
        if np.linalg.norm(test_set2[i] - tms_centroids_set1[i]) > 5:
            error += 1

    #if rmsd > 2:
    #if error >= 3 or rmsd > 2:
    if True:
        for i in range(0,len(tms_centroids_set2)):
            tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1

        # Align 3D points of set2 with 3D points of set1 using the most stable reference points
        tms_reference_set1 = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]]
        tms_reference_set2 = tms_centroids_set2[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]]

        imposer = SVDSuperimposer()
        imposer.set(tms_reference_set1, tms_reference_set2)
        imposer.run()
        new_rot, new_trans = imposer.get_rotran()
        new_rmsd = imposer.get_rms()
        print("RMSD2", round(new_rmsd,2))

        if new_rmsd < rmsd:
            rot = new_rot
            trans = new_trans
            rmsd = new_rmsd
        else:
            for i in range(0,len(tms_centroids_set2)):
                tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1

    # test_set2 = np.dot(tms_reference_set2, rot) + trans
    # for i in range(0,len(test_set2)):
    #     print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_reference_set1[i]]), "]")
    # for i in range(0,len(test_set2)):
    #     print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in test_set2[i]]), "]")
    #
    # print("############")
    # #test_set2 = np.dot(tms_centroids_set2, rot) + trans
    # test_set2 = np.array(tms_centroids_set2, copy = True)
    # for i in range(0,len(tms_centroids_set1)):
    #     print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set1[i]]), "]")
    # for i in range(0,len(tms_centroids_set2)):
    #     print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set2[i]]), "]")

    # if rmsd > 2:
    #     for i in range(0,len(tms_centroids_set2)):
    #         tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1
    #     # Huge error during alignment of "stable" helices, just use the references not the helper points
    #     tms_reference_set1 = tms_centroids_set1[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]]
    #     tms_reference_set2 = tms_centroids_set2[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]]
    #     imposer = SVDSuperimposer()
    #     imposer.set(tms_reference_set1, tms_reference_set2)
    #     imposer.run()
    #     rot, trans = imposer.get_rotran()
    #     rmsd = imposer.get_rms()
    #     print("RMSD3", round(rmsd,2))
    #

    tms_centroids_set2 = np.dot(tms_centroids_set2, rot) + trans
    tms_set2 = np.dot(tms_set2, rot) + trans

    # Calculate optimal plane through points in both sets and convert to 2D
    # Try normal based on TM7
    # tm7_centroids = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] == 6]]
    # if len(tm7_centroids) == 2:
    #     normal = (tm7_centroids[1] - tm7_centroids[0])/np.linalg.norm(tm7_centroids[1] - tm7_centroids[0])
    # else:
    #     # Using TM mid as reference plane
    #     normal, midpoint = calculatePlane(np.concatenate((tms_centroids_set1[7:], tms_centroids_set2[7:])), intracellular)

    # Alternative: use center of helical ends and center of helical middle
    #    normal = tms_centroids_set1[:7].mean(axis=0)  - tms_centroids_set1[7:].mean(axis=0)
    #    normal = normal/np.linalg.norm(normal)

    # 7TM references
    tm_centroids = {y:[] for y in range(0,7)}
    [tm_centroids[y].append(tms_centroids_set1[x]) for y in range(0,7) for x in range(0,len(segment_order)) if segment_order[x] == y]
    count = 0
    normal = np.array([0.0,0.0,0.0])
    for y in range(0,7):
        #if len(tm_centroids[y]) == 2 and (mode != 1 or y != 5):
        if len(tm_centroids[y]) == 2:
            normal += np.array((tm_centroids[y][1] - tm_centroids[y][0])/np.linalg.norm(tm_centroids[y][1] - tm_centroids[y][0]))
            count += 1
    normal = normal/count

    midpoint = tms_centroids_set1[:7].mean(axis=0)

    #plane_set1, z_set1 = convert3D_to_2D_plane(tms_centroids_set1[:7], intracellular, normal, midpoint)
    #plane_set2, z_set2 = convert3D_to_2D_plane(tms_centroids_set2[:7], intracellular, normal, midpoint)
    plane_set, z_set = convert3D_to_2D_plane(np.concatenate((tms_centroids_set1[:7], tms_centroids_set2[:7]), axis = 0), intracellular, normal, midpoint)
    plane_set1 = plane_set[:7]
    plane_set2 = plane_set[7:]
    z_set1 = z_set[:7]
    z_set2 = z_set[7:]

    # DO NOT REMOVE: possibly we want to upgrade to weighted superposing
    # Based on Biopython SVDSuperimposer
    # coords = tms_centroids_set2
    # reference_coords = tms_centroids_set1

    # OLD centroid calcalation
    # av1 = sum(coords) / len(coords)
    # av2 = sum(reference_coords) / len(reference_coords)

    # NEW weighted centroid calculation
    # print(normalized_differences)
    # av1, av2 = 0, 0
    # totalweight = 0
    # for i in range(0,7):
    #     # print("Round",i)
    #     #weight = 1+(7-tm_ranking.index(i))/7
    #     weight = (1-normalized_differences[i]+0.1)/1.1
    #     totalweight += weight
    #     print("TM", str(i+1), "weight",weight)
    #     av1 += coords[i]*weight
    #     av2 += reference_coords[i]*weight
    #
    # av1 = av1/totalweight
    # av2 = av2/totalweight
    #
    # coords = coords - av1
    # reference_coords = reference_coords - av2
    #
    # # correlation matrix
    # a = np.dot(np.transpose(coords), reference_coords)
    # u, d, vt = np.linalg.svd(a)
    # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u)))
    # # check if we have found a reflection
    # if np.linalg.det(rot) < 0:
    #     vt[2] = -vt[2]
    #     rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u)))
    # trans = av2 - np.dot(av1, rot)
    # rot, trans = imposer.get_rotran()
    # tms_set2 = np.dot(tms_set2, rot) + trans

    # CURRENT: Ca-angle to axis core
    rotations = [0] * 7
    for i in range(0,7):
        try:
            # rotations[i] = [data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1] if abs(data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1]) < 180 else -1*data['tab4'][gn_dictionary[x]]['angles_set2'][1]-data['tab4'][gn_dictionary[x]]['angles_set1'][1] for x in gns[i]]
            angles1 = [data['tab4'][gn_dictionary[x]]['angles_set1'][11] for x in gns[i]]
            angles1 = [angle if angle > 0 else angle + 360 for angle in angles1 ]
            angles2 = [data['tab4'][gn_dictionary[x]]['angles_set2'][11] for x in gns[i]]
            angles2 = [angle if angle > 0 else angle + 360 for angle in angles2 ]

            rotations[i] = [angles1[x] - angles2[x] for x in range(3)]
            rotations[i] = [value if abs(value) <= 180 else value-360 if value > 0 else value+360 for value in rotations[i]]

            # count=0
            # for x in gns[i]:
            #     print(i, x, data['tab4'][gn_dictionary[x]]['angles_set1'][11], data['tab4'][gn_dictionary[x]]['angles_set2'][11], rotations[i][count])
            #     count += 1

        except:
            rotations[i] = [0.0, 0.0, 0.0]  # TODO: verify other class B errors

        # UPDATE 20-02-2020 No mirroring but top-down through GPCR
        rotations[i] = sum(rotations[i])/3
        # if intracellular:
        #     rotations[i] = -1*sum(rotations[i])/3
        # else:
        #     rotations[i] = sum(rotations[i])/3


    # ALTERNATIVE: utilize TM tip alignment (needs debugging as some angles seem off, e.g. GLP-1 active vs inactive TM2)
    # Add rotation angle based on TM point placement
    # tms_2d_set1, junk = convert3D_to_2D_plane(tms_set1, intracellular, normal, midpoint)
    # tms_2d_set2, junk = convert3D_to_2D_plane(tms_set2, intracellular, normal, midpoint)

    # rotations = [0] * 7
    # for i in range(0,7):
    #     positions = [x for x in range(0, len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i]
    #     turn_set1 = tms_2d_set1[positions]
    #     turn_set2 = tms_2d_set2[positions]
    #
    #     # set to middle
    #     turn_set1 = turn_set1 - turn_set1.mean(axis=0)
    #     turn_set2 = turn_set2 - turn_set2.mean(axis=0)
    #
    #     # Calculate shift per residue and take average for this TM
    #     for j in range(0,len(turn_set1)):
    #         v1 = turn_set1[j]/np.linalg.norm(turn_set1[j])
    #         v2 = turn_set2[j]/np.linalg.norm(turn_set2[j])
    #         angle = np.degrees(np.arctan2(v2[1], v2[0]) - np.arctan2(v1[1],v1[0]))
    #
    #         if abs(angle) > 180:
    #             angle = 360 - abs(angle)
    #
    #         rotations[i] += angle/len(turn_set1)

    # TODO: check z-coordinates orientation
    # Step 1: collect movement relative to membrane mid
    # Step 2: find min and max TM
    # Step 3: check if orientation of min/max TM matches the z-scales + intra/extra - if not invert z-coordinates
    labeled_set1 = [{"label": "TM"+str(i+1), "x": float(plane_set1[i][0]), "y": float(plane_set1[i][1]), "z": float(z_set1[i]), "rotation" : 0} for i in range(0,7)]
    labeled_set2 = [{"label": "TM"+str(i+1), "x": float(plane_set2[i][0]), "y": float(plane_set2[i][1]), "z": float(z_set2[i]), "rotation" : rotations[i]} for i in range(0,7)]

    # Convert used GNs to right numbering
    gns_used = gns[:]
    for i in range(0,len(gns)):
        for j in range(0,len(gns[i])):
            gns_used[i][j] = gn_dictionary[gns[i][j]]
    return {"coordinates_set1" : labeled_set1, "coordinates_set2": labeled_set2, "gns_used": gns_used}
from Bio.SVDSuperimposer import SVDSuperimposer

# start with two coordinate sets (Nx3 arrays - Float0)

x = array([[51.65, -1.90, 50.07],
          [50.40, -1.23, 50.65],
          [50.68, -0.04, 51.54],
          [50.22, -0.02, 52.85]], 'f')

y = array([[51.30, -2.99, 46.54],
          [51.09, -1.88, 47.58],
          [52.36, -1.20, 48.03],
          [52.71, -1.18, 49.38]], 'f')

sup = SVDSuperimposer()

# set the coords
# y will be rotated and translated on x
sup.set(x, y)

# do the lsq fit
sup.run()

# get the rmsd
rms = sup.get_rms()

# get rotation (right multiplying!) and the translation
rot, tran = sup.get_rotran()

# rotate y on x manually
def compute_mean (reader, align_conf, num_confs, start = None, stop = None):
    """
        Computes the mean structure of a trajectory

        Structured to work with the multiprocessing process from UTILS/parallelize.py

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to take the mean of.
            align_conf (numpy.array): The position of each particle in the reference configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.
        
        Returns:
            mean_pos_storage (numpy.array): For each particle, the sum of positions in all configurations read.
            mean_a1_storage (numpy.array): For each particle, the sum of a1 orientation vectors in all configuraitons read.
            mean_a3_storage (numpy.array): For each particle, the sum of a3 orientation vectors in all configuraitons read.
            intermediate_mean_structures (list): mean structures computed periodically during the summing to check decoorrelation.
            confid (int): the number of configurations summed for the storage arrays.
    """
    if stop is None:
        stop = num_confs
    else: stop = int(stop)
    if start is None:
        start = 0
    else: start = int(start)

    mysystem = reader._get_system(N_skip = start)

    # storage for the intermediate mean structures
    intermediate_mean_structures = []
    # the class doing the alignment of 2 structures
    sup = SVDSuperimposer()

    mean_pos_storage = np.array([np.zeros(3) for _ in range(n_nuc)])
    mean_a1_storage  = np.array([np.zeros(3) for _ in range(n_nuc)])
    mean_a3_storage  = np.array([np.zeros(3) for _ in range(n_nuc)])

    # for every conf in the current trajectory we calculate the global mean
    confid = 0

    while mysystem != False and confid < stop:
        mysystem.inbox()
        cur_conf_pos = fetch_np(mysystem)
        indexed_cur_conf_pos = indexed_fetch_np(mysystem)
        cur_conf_a1 =  fetch_a1(mysystem)
        cur_conf_a3 =  fetch_a3(mysystem)

        # calculate alignment
        sup.set(align_conf, indexed_cur_conf_pos)
        sup.run()
        rot, tran = sup.get_rotran()

        cur_conf_pos = np.einsum('ij, ki -> kj', rot, cur_conf_pos) + tran
        cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1)
        cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3)
        mean_pos_storage += cur_conf_pos
        mean_a1_storage += cur_conf_a1
        mean_a3_storage += cur_conf_a3

        # print the rmsd of the alignment in case anyone is interested...
        print("Frame:", confid, "Time:", mysystem._time, "RMSF:", sup.get_rms())
        # thats all we do for a frame
        confid += 1
        mysystem = reader._get_system()

        # We produce 10 intermediate means to check decorrelation.
        # This can't be done neatly in parallel
        if not parallel and confid % INTERMEDIATE_EVERY == 0:
            mp = np.copy(mean_pos_storage)
            mp /= confid
            intermediate_mean_structures.append(
                prep_pos_for_json(mp)
            )
            print("INFO: Calculated intermediate mean for {} ".format(confid))

    return(mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, confid)
Beispiel #27
0
 def __init__(self):
     self.reference_coordinate = None
     self.model_coordinate = None
     self.sup = SVDSuperimposer()
def compute_deviations(reader,
                       mean_structure,
                       indexed_mean_structure,
                       num_confs,
                       start=None,
                       stop=None):
    """
        Computes RMSF of each particle from the mean structure

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # helper to fetch nucleotide positions
    fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides])

    indexed_fetch_np = lambda conf: np.array(
        [n.cm_pos for n in conf._nucleotides if n.index in indexes])

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    deviations = []

    mysystem = reader._get_system(N_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox()
        # calculate alignment transform
        cur_conf = fetch_np(mysystem)
        indexed_cur_conf = indexed_fetch_np(mysystem)
        sup.set(indexed_mean_structure, indexed_cur_conf)
        sup.run()
        print("Frame number:", confid, "RMSF:", sup.get_rms())
        # realign frame
        rot, tran = sup.get_rotran()
        # align structures and collect coordinates for each frame
        # compatible with json
        deviations.append(
            list(
                np.linalg.norm(np.einsum('ij, ki -> kj', rot, cur_conf) +
                               tran - mean_structure,
                               axis=1)))
        confid += 1
        mysystem = reader._get_system()

    return deviations