Beispiel #1
0
def normalize_points(points, n_type):
    assert n_type in NORMALIZED_BASE
    assert n_type in BASE_ATOMS
    p1, p2 = points
    norm_vec = []
    p_vec = []
    for a in BASE_ATOMS[n_type]:
        assert a in NORMALIZED_BASE[n_type]
        if a in p1:
            norm_vec.append(NORMALIZED_BASE[n_type][a])
            p_vec.append(p1[a])
    if len(p_vec) < 3:
        return (None, None)
    sup = SVDSuperimposer()
    sup.set(np.array(norm_vec, 'f'), np.array(p_vec, 'f'))
    sup.run()
    (rot, tran) = sup.get_rotran()
    new_points = []
    for p in (p1, p2):
        atoms = list(p.keys())
        vec = []
        for a in atoms:
            vec.append(p[a])
        new_vec = np.dot(np.array(vec, 'f'), rot) + tran
        new_points.append(dict(list(zip(atoms, new_vec))))
    return new_points
Beispiel #2
0
Datei: gc.py Projekt: biocryst/gc
def align_models(CA):
    n_models = CA.shape[0]
    working_CA = np.copy(CA)
    sup=SVDSuperimposer()
    
    ref_model = working_CA[0, :, :]
    rms_total = 0

    for i_model in range(1, n_models):
        sup.set(ref_model, working_CA[i_model])
        sup.run()
        rms_total += sup.get_rms()**2
        working_CA[i_model] = sup.get_transformed()

    rms_best = float("inf")
    epsilon = 0.001
    while rms_best - rms_total  > epsilon:
        rms_best = rms_total
        mean_model = np.mean(working_CA,0)
        rms_total = 0
        for i_model in range(n_models):
            sup.set(mean_model, working_CA[i_model])
            sup.run()
            rms_total += sup.get_rms()**2
            working_CA[i_model] = sup.get_transformed()

    transformations = []
    for start_model, result_model in zip(CA, working_CA):
        sup.set(result_model, start_model)
        sup.run()
        transformations.append(sup.get_rotran())

    return transformations,np.sqrt(rms_total/n_models)
Beispiel #3
0
def align(indexes, ref_conf, mysystem):
    """
    Aligns a single frame to the reference configuration.

    Parameters:
        indexes (list): The indexes of the particles to align.
        ref_conf (base_array): The reference configuration to align to.
        mysystem (base_array): The configuration to align.

    Returns:
        str: The aligned configuration in the format of the original trajectory file.
    """
    sup = SVDSuperimposer()
    #Need to get rid of fix_diffusion artifacts or SVD doesn't work
    mysystem.inbox()
    indexed_cur_conf = mysystem.positions[indexes]

    #Superimpose the configuration to the reference
    sup.set(ref_conf.positions[indexes], indexed_cur_conf)
    sup.run()
    rot, tran = sup.get_rotran()

    #Apply rotation and translation in one step
    mysystem.positions = np.einsum('ij, ki -> kj', rot,
                                   mysystem.positions) + tran
    mysystem.a1s = np.einsum('ij, ki -> kj', rot, mysystem.a1s)
    mysystem.a3s = np.einsum('ij, ki -> kj', rot, mysystem.a3s)
    return mysystem.conf_to_str()  # we finally need a string
Beispiel #4
0
def rmsd_distance(points,
                  ref_points,
                  sup_atoms,
                  rmsd_atoms=None,
                  multiple_rmsd_variants=False):
    (c1, c2) = points
    (p1, p2) = ref_points
    for atoms_list, c_res, p_res in (sup_atoms[0], c1, p1), (sup_atoms[1], c2,
                                                             p2):
        for a in atoms_list:
            if a not in c_res or a not in p_res:
                return 1000.0
    ref_p = [p1[a] for a in sup_atoms[0]] + [p2[a] for a in sup_atoms[1]]
    cur_p = [c1[a] for a in sup_atoms[0]] + [c2[a] for a in sup_atoms[1]]
    sup = SVDSuperimposer()
    sup.set(np.array(ref_p, 'f'), np.array(cur_p, 'f'))
    sup.run()
    if rmsd_atoms is not None:
        (rot, tran) = sup.get_rotran()
        if multiple_rmsd_variants:
            return min([
                _rmsd_formula(points, ref_points, rot, tran, r)
                for r in rmsd_atoms
            ])
        else:
            return _rmsd_formula(points, ref_points, rot, tran, rmsd_atoms)
    else:
        return sup.get_rms()
def compute_deviations(reader,
                       mean_structure,
                       indexed_mean_structure,
                       indexes,
                       num_confs,
                       start=None,
                       stop=None):
    """
        Computes RMSF of each particle from the mean structure

        Parameters:
            reader (readers.ErikReader): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    deviations = []
    RMSDs = []

    mysystem = reader.read(n_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox()
        # calculate alignment transform
        cur_conf = mysystem.positions
        indexed_cur_conf = cur_conf[indexes]
        sup.set(indexed_mean_structure, indexed_cur_conf)
        sup.run()
        print("Frame number:", confid, "Time:", mysystem.time, "RMSD:",
              sup.get_rms())
        # realign frame
        rot, tran = sup.get_rotran()
        # align structures and collect coordinates for each frame
        # compatible with json
        deviations.append(
            list(
                np.linalg.norm(np.einsum('ij, ki -> kj', rot, cur_conf) +
                               tran - mean_structure,
                               axis=1)))
        RMSDs.append(sup.get_rms() * 0.8518)
        confid += 1
        mysystem = reader.read()

    return (deviations, RMSDs)
Beispiel #6
0
def cal_rmsd(recon_c, raw_c, mask, gapmask):
    ### mask: max_len때문에 맞춰주는 것
    ### gapmask: gap때문에 맞춰주는 것
    try:
        mask = mask.bool().cpu().numpy()
        gapmask = gapmask.bool().cpu().numpy()

        raw_c = raw_c.view(-1, 3).cpu().numpy()
        target_atoms = ['N', 'CA', 'C', 'O']
        recon_coords = {c: list() for c in target_atoms}

        for atom in recon_c.get_atoms():
            atom_n = atom.get_name()
            if atom_n in target_atoms:
                recon_coords[atom_n].append(atom.get_coord())

        for c in target_atoms:
            recon_coords[c] = np.stack(recon_coords[c])
        recon_backbone = np.stack((recon_coords[c] for c in target_atoms),
                                  axis=1).reshape(-1, 3)
        a = np.repeat(mask, 4)
        sup = SVDSuperimposer()
        sup.set(raw_c[np.repeat(gapmask, 4)].reshape(-1, 3),
                recon_backbone[np.repeat(gapmask[mask], 4)].reshape(-1, 3))
        sup.run()
        rot, trans = sup.get_rotran()

        transform_c = np.dot(recon_backbone, rot) + trans
        diff = raw_c[np.repeat(gapmask, 4)] - transform_c[np.repeat(
            gapmask[mask], 4)]
        rmsd = np.sqrt(np.sum(diff * diff) / np.sum(gapmask * 4))
    except:
        rmsd = -1

    return rmsd
 def RMSD_biopython(self, x, y):
     """Kalbsch algorithm"""
     sup = SVDSuperimposer()
     sup.set(x, y)
     sup.run()
     rot, tran = sup.get_rotran()
     return rot
Beispiel #8
0
def compute_transformation(c_ref,c):
    sup = SVDSuperimposer()
    sup.set(c_ref, c)
    sup.run()
    rms = sup.get_rms()
    (rot,tran) = sup.get_rotran()
    return (rms,rot,tran)
def compute_centroid(reader, mean_structure, num_confs, start=None, stop=None):
    """
        Compares each structure to the mean and returns the one with the lowest RMSF

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            centroid (numpy.array): The positions corresponding to the structure with the lowest RMSF to the mean.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    lowest_rmsf = 100000  #if you have a larger number than this, we need to talk...
    centroid_candidate = np.zeros_like(mean_structure)
    centroid_a1 = np.zeros_like(mean_structure)
    centroid_a3 = np.zeros_like(mean_structure)

    mysystem = reader.read(n_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox()
        # calculate alignment transform
        cur_conf = mysystem.positions
        indexed_cur_conf = mysystem.positions[indexes]
        cur_conf_a1 = mysystem.a1s
        cur_conf_a3 = mysystem.a3s
        sup.set(mean_structure, indexed_cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()

        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1)
        cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3)
        RMSF = sup.get_rms()
        print("Frame number:", confid, "RMSF:", RMSF)
        if RMSF < lowest_rmsf:
            centroid_candidate = cur_conf
            centroid_a1 = cur_conf_a1
            centroid_a3 = cur_conf_a3
            lowest_rmsf = RMSF
            centroid_t = mysystem.time

        confid += 1
        mysystem = reader.read()

    return centroid_candidate, centroid_a1, centroid_a3, lowest_rmsf, centroid_t
Beispiel #10
0
def get_rot_tran(y, x):
    """Returns rotation, translation and RMDS values of the superimposed atoms."""
    sup = SVDSuperimposer()
    sup.set(x, y)  # AC over AD
    sup.run()
    rms = sup.get_rms()
    rot, tran = sup.get_rotran()
    return (rot, tran, rms)
Beispiel #11
0
def compute_deviations(reader,
                       mean_structure,
                       num_confs,
                       start=None,
                       stop=None):
    """
        Computes RMSF of each particle from the mean structure

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations (list): Each entry in the list is a numpy.array of the deviations for each particle at a given time.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)
    confid = 0

    # helper to fetch nucleotide positions
    fetch_np = lambda conf: np.array([n.cm_pos for n in conf._nucleotides])

    # Use the single-value decomposition method for superimposing configurations
    sup = SVDSuperimposer()
    deviations = []

    mysystem = reader._get_system(N_skip=start)

    while mysystem != False and confid < stop:
        mysystem.inbox_system()
        # calculate alignment transform
        cur_conf = fetch_np(mysystem)
        sup.set(mean_structure, cur_conf)
        sup.run()
        print("Frame number:", confid, "RMSF:", sup.get_rms())
        # realign frame
        rot, tran = sup.get_rotran()
        # align structures and collect coordinates for each frame
        # compatible with json
        deviations.append(
            list(
                map(
                    np.linalg.norm,
                    np.array([np.dot(n_pos, rot) + tran
                              for n_pos in cur_conf]) - mean_structure)))
        confid += 1
        mysystem = reader._get_system()

    return deviations
Beispiel #12
0
def super_pdb(coords1, coords2):
    if len(coords1) != len(coords2):
        print >> sys.stderr, 'ERROR: Structures with different length'
        sys.exit(1)
    svd = SVDSuperimposer()
    svd.set(np.array(coords1), np.array(coords2))
    svd.run()
    rot, tran = svd.get_rotran()
    rmsd = svd.get_rms()
    return rmsd
Beispiel #13
0
def change_basis(reader,
                 align_conf,
                 components,
                 num_confs,
                 start=None,
                 stop=None):
    """
    Transforms each configuration in a trajectory into a point in principal component space

    Parameters:
        reader (readers.ErikReader): An active reader on the trajectory file to analyze.
        align_conf (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
        components (numpy.array): The principal components of the trajectory.  A 3*Nx3*N array.
        num_confs (int): The number of configurations in the reader.  
        <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
        <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

    Returns:
        coordinates (numpy.array): The positions of each frame of the trajectory in principal component space.
    """

    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)

    mysystem = reader.read(n_skip=start)

    coordinates = np.empty((stop, len(mysystem.positions) * 3))
    coordinates2 = np.empty((stop, len(mysystem.positions) * 3))
    sup = SVDSuperimposer()
    confid = 0

    while mysystem != False and confid < stop:
        print("-->", "frame", confid, "time={}".format(mysystem.time))
        mysystem.inbox()
        cur_conf = mysystem.positions
        sup.set(align_conf, cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        #equivalent to taking the dot product of the rotation array and every vector in the deviations array
        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        coordinates[confid] = np.dot(components, cur_conf.flatten())

        confid += 1
        mysystem = reader.read()

    return (coordinates)
def get_rmsd(coord1, coord2):
    if len(coord1) != len(coord2):
        print >> sys.stderr, "ERROR: The sets of coordinates have different sizes"
        sys.exit(1)  #system error >/dev/null or 2>/dev/null
    svd = SVDSuperimposer()
    svd.set(np.array(coord1),
            np.array(coord2))  #transform a list into numeric python
    svd.run()
    rmsd = svd.get_rms()
    rot, tran = svd.get_rotran()
    print 'R', rot
    print 'T', tran
    print 'RMSD', rmsd
def get_rmsd(coord1,coord2):
    if len(coord1)!=len(coord2):
        print >> sys.stderr.write("ERROR: The set of Coordinate have different size.")
        sys.exit(1)
    svd=SVDSuperimposer()
    svd.set(np.array(coord1), np.array(coord2))
    svd.run()
    rmsd=svd.get_rms()
    #rot,tran=svd.get_rotran()
    T=svd.get_rotran()
    print("R", T[0])
    print("T", T[1])
    return(rmsd)
Beispiel #16
0
def get_cov(reader, align_conf, num_confs, start=None, stop=None):
    """
        Performs principal component analysis on deviations from the mean structure

        Parameters:
            reader (readers.ErikReader): An active reader on the trajectory file to analyze.
            align_conf (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations_marix (numpy.array): The difference in position from the mean for each configuration.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)

    mysystem = reader.read(n_skip=start)

    covariation_matrix = np.zeros(
        (len(mysystem.positions) * 3, len(mysystem.positions) * 3))
    sup = SVDSuperimposer()
    confid = 0

    #for every configuration in the trajectory chunk, align it to the mean and compute positional difference for every particle
    while mysystem != False and confid < stop:
        print("-->", "frame", confid, "time={}".format(mysystem.time))
        mysystem.inbox()
        cur_conf = mysystem.positions
        sup.set(align_conf, cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        #equivalent to taking the dot product of the rotation array and every vector in the deviations array
        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        difference_matrix = (cur_conf - align_conf).flatten()
        covariation_matrix += np.einsum('i,j -> ij', difference_matrix,
                                        difference_matrix)

        confid += 1
        mysystem = reader.read()

    return covariation_matrix
def run_sup3d(coord1, coord2):
    sup = SVDSuperimposer()
    sup.set(
        np.array(coord1), np.array(coord2)
    )  #set is setting the group of coordinates because i have initialized SVD, it is empty
    sup.run(
    )  #superimpose the coordinates, run does all the work. Then we compute the RMSD between vc1 and vc2 after transformation
    rmsd = sup.get_rms()
    rot, tran = sup.get_rotran(
    )  #shows the matrix of rotation and vector for translation
    tcoord = sup.get_transformed()
    print rmsd
    print rot
    print tran
    print tcoord  #you obtain the set of coordinates to be superimposable to the se 1, so the set of coordinates after transformation.
    return
Beispiel #18
0
def _superimpose_atoms(ref_points, points, atoms):
    if ref_points is None or points is None or atoms is None:
        return (None, None, None, None)
    ref_vec = []
    vec = []
    for a in atoms:
        if a in ref_points and a in points:
            ref_vec.append(ref_points[a])
            vec.append(points[a])
    if len(vec) < 3:
        return (None, None, None, None)
    sup = SVDSuperimposer()
    sup.set(np.array(ref_vec, 'f'), np.array(vec, 'f'))
    sup.run()
    (rot, tran) = sup.get_rotran()
    rms = sup.get_rms()
    return (_apply_rot_tran(points, rot, tran), rot, tran, rms)
def get_pca(reader, align_conf, num_confs, start=None, stop=None):
    """
        Performs principal component analysis on deviations from the mean structure

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to analyze.
            mean_structure (numpy.array): The position of each particle in the mean configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.

        Returns:
            deviations_marix (numpy.array): The difference in position from the mean for each configuration.
    """
    if stop is None:
        stop = num_confs
    else:
        stop = int(stop)
    if start is None:
        start = 0
    else:
        start = int(start)

    mysystem = reader._get_system(N_skip=start)

    deviations_matrix = np.empty((stop, (len(align_conf)) * 3))
    sup = SVDSuperimposer()
    confid = 0

    #for every configuration in the trajectory chunk, align it to the mean and compute positional difference for every particle
    while mysystem != False and confid < stop:
        print("-->", mysystem._time)
        mysystem.inbox()
        cur_conf = fetch_np(mysystem)
        sup.set(align_conf, cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        #equivalent to taking the dot product of the rotation array and every vector in the deviations array
        cur_conf = np.einsum('ij, ki -> kj', rot, cur_conf) + tran
        deviations_matrix[confid] = (cur_conf - align_conf).flatten()

        confid += 1
        mysystem = reader._get_system()

    return deviations_matrix
Beispiel #20
0
    def __init__(self, static, moving):
        """
        Align two structures

        :param static: the reference structure
        :param moving: the structure to the aligned to the reference
        """
        sup = SVDSuperimposer()
        sup.set(np.asarray(static), np.asarray(moving))
        sup.run()

        rot, trans = sup.get_rotran()

        self.rms = sup.get_rms()

        self.static = static

        self.moving = [
            np.dot(np.asarray(moving[atom]), rot) + trans
            for atom in range(len(moving))
        ]
Beispiel #21
0
    def set_atoms(self, fixed, moving):
        """Put (translate/rotate) the atoms in fixed on the atoms in
        moving, in such a way that the RMSD is minimized.

        :param fixed: list of (fixed) atoms
        :param moving: list of (moving) atoms
        :type fixed,moving: [L{Atom}, L{Atom},...]
        """
        if not len(fixed) == len(moving):
            raise PDBException("Fixed and moving atom lists differ in size")
        length = len(fixed)
        fixed_coord = numpy.zeros((length, 3))
        moving_coord = numpy.zeros((length, 3))
        for i in range(0, length):
            fixed_coord[i] = fixed[i].get_coord()
            moving_coord[i] = moving[i].get_coord()
        sup = SVDSuperimposer()
        sup.set(fixed_coord, moving_coord)
        sup.run()
        self.rms = sup.get_rms()
        self.rotran = sup.get_rotran()
Beispiel #22
0
    def set_atoms(self, fixed, moving):
        """Put (translate/rotate) the atoms in fixed on the atoms in
        moving, in such a way that the RMSD is minimized.

        @param fixed: list of (fixed) atoms
        @param moving: list of (moving) atoms
        @type fixed,moving: [L{Atom}, L{Atom},...]
        """
        if not len(fixed) == len(moving):
            raise PDBException("Fixed and moving atom lists differ in size")
        l = len(fixed)
        fixed_coord = numpy.zeros((l, 3))
        moving_coord = numpy.zeros((l, 3))
        for i in range(0, len(fixed)):
            fixed_coord[i] = fixed[i].get_coord()
            moving_coord[i] = moving[i].get_coord()
        sup = SVDSuperimposer()
        sup.set(fixed_coord, moving_coord)
        sup.run()
        self.rms = sup.get_rms()
        self.rotran = sup.get_rotran()
Beispiel #23
0
def main():
    parser = argparse.ArgumentParser(
        prog=os.path.basename(__file__),
        description=
        "superimposes one or more structures sharing a topology to a reference structure"
    )
    parser.add_argument('reference',
                        type=str,
                        nargs=1,
                        help="The reference configuration to superimpose to")
    parser.add_argument(
        'victims',
        type=str,
        nargs='+',
        help="The configuraitons to superimpose on the reference")
    parser.add_argument(
        '-i',
        metavar='index_file',
        dest='index_file',
        nargs=1,
        help=
        'Align to only a subset of particles from a space-separated list in the provided file'
    )
    args = parser.parse_args()

    #run system checks
    from oxDNA_analysis_tools.config import check_dependencies
    check_dependencies(["python", "numpy", "Bio"])

    #Get the reference files
    ref_dat = args.reference[0]

    #-i will make it only run on a subset of nucleotides.
    #The index file is a space-separated list of particle IDs
    if args.index_file:
        index_file = args.index_file[0]
        with open(index_file, 'r') as f:
            indexes = f.readline().split()
            try:
                indexes = [int(i) for i in indexes]
            except:
                print(
                    "ERROR: The index file must be a space-seperated list of particles.  These can be generated using oxView by clicking the \"Download Selected Base List\" button"
                )
    else:
        with ErikReader(ref_dat) as r:
            indexes = list(range(len(r.read().positions)))

    #Create list of configurations to superimpose
    to_sup = []
    r = ErikReader(ref_dat)
    ref = r.read()
    ref.inbox()
    ref_conf = ref.positions[indexes]
    for i in args.victims:
        r = ErikReader(i)
        sys = r.read()
        sys.inbox()
        to_sup.append(sys)

    sup = SVDSuperimposer()

    #Run the biopython superimposer on each configuration and rewrite its configuration file
    for i, sys in enumerate(to_sup):
        indexed_cur_conf = sys.positions[indexes]
        sup.set(ref_conf, indexed_cur_conf)
        sup.run()
        rot, tran = sup.get_rotran()
        sys.positions = np.einsum('ij, ki -> kj', rot, sys.positions) + tran
        sys.a1s = np.einsum('ij, ki -> kj', rot, sys.a1s)
        sys.a3s = np.einsum('ij, ki -> kj', rot, sys.a3s)
        sys.write_new("aligned{}.dat".format(i))
        print("INFO: Wrote file aligned{}.dat".format(i), file=stderr)
Beispiel #24
0
def analyse(input_file_name,
            refer_file_name,
            moved_chain_id,
            fixed_chain_id,
            r_moved_chain_id,
            r_fixed_chain_id,
            output_file1,
            output_file2,
            r_model_number=0):

    structure = PDBParser(PERMISSIVE=1).get_structure('to_analyse',
                                                      input_file_name)
    reference = PDBParser(PERMISSIVE=1).get_structure('reference',
                                                      refer_file_name)

    r_chain_moved = reference[r_model_number][r_moved_chain_id]
    r_chain_fixed = reference[r_model_number][r_fixed_chain_id]

    theta = []
    phi = []
    theta_x = []
    theta_y = []
    theta_z = []
    d = []
    coords_x = []
    coords_y = []
    coords_z = []
    matrix_entries = [_[:] for _ in [[]] * 9]

    for model_number, model in enumerate(structure):
        chain_moved = structure[model_number][moved_chain_id]
        chain_fixed = structure[model_number][fixed_chain_id]
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)
        for atom in chain_moved.get_atoms():
            position = atom.get_vector()
            com_numerator += Vector(position._ar * np.array(atom.mass))
            com_denominator += atom.mass

        moved_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)
        for atom in chain_fixed.get_atoms():
            position = atom.get_vector()
            com_numerator += Vector(position._ar * np.array(atom.mass))
            com_denominator += atom.mass

        fixed_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        reference_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in r_chain_fixed.get_atoms()]
        ])
        coordinate_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in chain_fixed.get_atoms()]
        ])
        sup = SVDSuperimposer()
        sup.set(reference_set, coordinate_set)
        sup.run()
        R, V = sup.get_rotran()
        for atom in model.get_atoms():
            atom.transform(R, V)
        for atom in chain_moved.get_atoms():
            com_numerator += Vector(
                (atom.get_vector())._ar * np.array(atom.mass))
            com_denominator += atom.mass
        moved_centre = com_numerator.__div__(com_denominator)
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        for atom in chain_fixed.get_atoms():
            com_numerator += Vector(
                (atom.get_vector())._ar * np.array(atom.mass))
            com_denominator += atom.mass
        fixed_centre = com_numerator.__div__(com_denominator)
        if fixed_centre.norm() > 0.5:
            print("Fixed chain norm is " + str(fixed_centre.norm()) +
                  " in model " + str(model_number) +
                  ". Should have been at the origin. Check code...")
        com_denominator = 0.0
        com_numerator = Vector(0, 0, 0)

        x = moved_centre._ar[0]
        y = moved_centre._ar[1]
        z = moved_centre._ar[2]
        coords_x.append(x)
        coords_y.append(y)
        coords_z.append(z)

        d.append((moved_centre - fixed_centre).norm())
        if moved_centre.norm() > 1e-6:
            theta.append(moved_centre.angle(Vector(0, 0, 1)))
            norm = np.sqrt(x * x + y * y)
            if norm > 1e-6:
                phi.append(np.arctan2(y, x))
        else:
            theta.append(0.0)

        reference_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in r_chain_moved.get_atoms()]
        ])
        coordinate_set = np.asarray([
            coord for coord in
            [atom.get_coord() for atom in chain_moved.get_atoms()]
        ])
        sup = SVDSuperimposer()
        sup.set(reference_set, coordinate_set)
        sup.run()
        R, V = sup.get_rotran()
        theta_x.append(np.arctan2(R[2][1], R[2][2]))
        theta_y.append(
            np.arctan2(-R[2][0],
                       np.sqrt(R[2][1] * R[2][1] + R[2][2] * R[2][2])))
        theta_z.append(np.arctan2(R[1][0], R[0][0]))
        for _ in range(3):
            matrix_entries[_].append(R[0][_])
            matrix_entries[_ + 3].append(R[1][_])
            matrix_entries[_ + 6].append(R[2][_])

    f_results1 = open(output_file1, "w+")
    for frame in range(0, len(structure)):
        f_results1.write(
            str(frame) + '\t' + str(d[frame]) + '\t' + str(theta[frame]) +
            '\t' + str(phi[frame]) + '\t' + str(theta_x[frame]) + '\t' +
            str(theta_y[frame]) + '\t' + str(theta_z[frame]) + '\n')
    f_results1.close()
    f_results2 = open(output_file2, "w+")
    for frame in range(0, len(structure)):
        f_results2.write(
            str(frame) + '\t' + str(coords_x[frame]) + '\t' +
            str(coords_y[frame]) + '\t' + str(coords_z[frame]) + '\t')
        for _ in range(3):
            f_results2.write(str(matrix_entries[_][frame]) + '\t')
            f_results2.write(str(matrix_entries[_ + 3][frame]) + '\t')
            f_results2.write(str(matrix_entries[_ + 6][frame]) + '\t')
        f_results2.write('\n')
    f_results2.close()
          [52.71, -1.18, 49.38]], 'f')

sup = SVDSuperimposer()

# set the coords
# y will be rotated and translated on x
sup.set(x, y)

# do the lsq fit
sup.run()

# get the rmsd
rms = sup.get_rms()

# get rotation (right multiplying!) and the translation
rot, tran = sup.get_rotran()

# rotate y on x manually
y_on_x1 = dot(y, rot) + tran

# same thing
y_on_x2 = sup.get_transformed()


def simple_matrix_print(matrix):
    """Simple string to display a floating point matrix

    This should give the same output on multiple systems.  This is
    needed because a simple "print matrix" uses scientific notation
    which varies between platforms.
Beispiel #26
0
class SVDSuperimposerTest(unittest.TestCase):
    def setUp(self):
        self.x = array([[51.65, -1.90, 50.07], [50.40, -1.23, 50.65],
                        [50.68, -0.04, 51.54], [50.22, -0.02, 52.85]])

        self.y = array([[51.30, -2.99, 46.54], [51.09, -1.88, 47.58],
                        [52.36, -1.20, 48.03], [52.71, -1.18, 49.38]])

        self.sup = SVDSuperimposer()
        self.sup.set(self.x, self.y)

    def test_get_init_rms(self):
        x = array([[1.19, 1.28, 1.37], [1.46, 1.55, 1.64], [1.73, 1.82, 1.91]])
        y = array([[1.91, 1.82, 1.73], [1.64, 1.55, 1.46], [1.37, 1.28, 1.19]])
        self.sup.set(x, y)
        self.assertIsNone(self.sup.init_rms)
        init_rms = 0.8049844719
        self.assertTrue(float("%.3f" % self.sup.get_init_rms()),
                        float("%.3f" % init_rms))

    def test_oldTest(self):
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3),
                        around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3),
                        around(self.y, decimals=3)))
        self.assertIsNone(self.sup.rot)
        self.assertIsNone(self.sup.tran)
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        self.sup.run()
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3),
                        around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3),
                        around(self.y, decimals=3)))
        rot = array([[0.68304983, 0.53664371, 0.49543563],
                     [-0.52277295, 0.83293229, -0.18147242],
                     [-0.51005037, -0.13504564, 0.84947707]])
        tran = array([38.78608157, -20.65451334, -15.42227366])
        self.assertTrue(
            array_equal(around(self.sup.rot, decimals=3),
                        around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.tran, decimals=3),
                        around(tran, decimals=3)))
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        rms = 0.00304266526014
        self.assertEqual(float("%.3f" % self.sup.get_rms()),
                         float("%.3f" % rms))

        rot_get, tran_get = self.sup.get_rotran()
        self.assertTrue(
            array_equal(around(rot_get, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(tran_get, decimals=3), around(tran,
                                                             decimals=3)))

        y_on_x1 = dot(self.y, rot) + tran
        y_x_solution = array(
            [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01],
             [5.03977138e+01, -1.22877050e+00, 5.06488200e+01],
             [5.06801788e+01, -4.16095666e-02, 5.15368866e+01],
             [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]])
        self.assertTrue(
            array_equal(around(y_on_x1, decimals=3),
                        around(y_x_solution, decimals=3)))

        y_on_x2 = self.sup.get_transformed()
        self.assertTrue(
            array_equal(around(y_on_x2, decimals=3),
                        around(y_x_solution, decimals=3)))
Beispiel #27
0
def tm_movement_2D(pdbs1, pdbs2, mode, data, gn_dictionary):
    string_mode = ["extracellular", "intracellular", "pocket", "middle"]
    intracellular = (mode == 1)
    print("COMPARISON", string_mode[mode])
    print(pdbs1)
    print("VS")
    print(pdbs2)

    distances_set1 = Distances()
    distances_set1.load_pdbs(pdbs1)
    distances_set1.filtered_gns = True

    distances_set2 = Distances()
    distances_set2.load_pdbs(pdbs2)
    distances_set2.filtered_gns = True

    conserved_set1 = distances_set1.fetch_conserved_gns_tm()
    conserved_set2 = distances_set2.fetch_conserved_gns_tm()
    conserved = [x for x in conserved_set2 if x in conserved_set1]

    gns = [[]] * 7
    middle_gpcr = [[]] * 7
    if mode <= 1: # Intracellular or Extracellular
        for i in range(0,7):
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if intracellular and i % 2 == 0: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            elif not intracellular and i % 2 == 1: # all even TMs (as # i+1)
                tm_only.reverse()
            if len(tm_only) < 3:
                print("too few residues")
                return []
            gns[i] = tm_only[0:3]

            for upwards in range(12, 6, -1):
                if len(tm_only) >= upwards:
                    middle_gpcr[i] = tm_only[(upwards-3):upwards]
                    break

        # INCLUDING References points from membrane middle of GPCR
        # ref_membrane_mid = {}
        # ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A
        # #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1
        # ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1
        # ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2
        # ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C
        # ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F
        #
        # middle_gpcr = ref_membrane_mid[data['gpcr_class']]
    elif mode == 2: # Major pocket (class A)
        ligand_references = [['1x39', '1x40','1x41'], ['2x56', '2x57','2x58'], ['3x31', '3x32', '3x33'], ['4x56', '4x57', '4x58'], ['5x43', '5x44', '5x45'], ['6x51', '6x52', '6x53'], ['7x39', '7x40', '7x41']]
        for i in range(0,7):
            gns[i] = [x for x in ligand_references[i] if x in conserved]
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if i % 2 == 1: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            if len(gns[i]) > 0:
                if i % 2 == 1: #all uneven TMs (as # = i+1)
                    start_pos = tm_only.index(gns[i][-1])
                else:
                    start_pos = tm_only.index(gns[i][0])

                gns[i] = tm_only[start_pos:(start_pos+3)]

                # Stay close for this as references
                #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)]
                for upwards in range(9, 6, -1):
                   if len(tm_only) >= (start_pos+upwards):
                       middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)]
                       continue
            else:
                if len(tm_only) < 9:
                    print("too few residues")
                    return []
                else:
                    #print("Refind",i, gns[i])
                    gns[i] = tm_only[0:3]
                    middle_gpcr[i] = tm_only[6:9]

                    # for upwards in range(15, 6, -1):
                    #     if len(tm_only) >= upwards:
                    #         middle_gpcr[i] = tm_only[(upwards-3):upwards]

        # # FILTER not conserved GNs
        # middle_gpcr = [[]] * 7
        # for i in range(0,7):
        #     tm_only = [x for x in conserved if x[0]==str(i+1)]
        #     if i % 2 == 0: #all uneven TMs (as # = i+1)
        #         tm_only.reverse()
        #
        #     if len(tm_only) < 3:
        #         print("too few residues")
        #         return []
        #
        #     middle_gpcr[i] = tm_only[0:3]
        #print(middle_gpcr)

    elif mode == 3: # Middle
        # References points from membrane middle of GPCR
        ref_membrane_mid = {}
        ref_membrane_mid["001"] = [['1x43', '1x44','1x45'], ['2x51', '2x52','2x53'], ['3x35', '3x36', '3x37'], ['4x53', '4x54', '4x55'], ['5x45', '5x46', '5x47'], ['6x47', '6x48', '6x49'], ['7x42', '7x43', '7x44']] # A
        #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x53', '4x54', '4x55'], ['5x44', '5x45', '5x46'], ['6x48', '6x49', '6x50'], ['7x49', '7x50', '7x51']] # B1
        #ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['7x47', '7x49']] # B1
        ref_membrane_mid["002"] = [['1x50', '1x51', '1x52'], ['2x57', '2x58', '2x59'], ['3x40','3x41','3x42'], ['4x55', '4x56'], ['5x42', '5x43', '5x44'], ['6x48', '6x49', '6x50'], ['7x47', '7x49']] # B1
        ref_membrane_mid["003"] = ref_membrane_mid["002"] # B2
        ref_membrane_mid["004"] = [['1x48', '1x49', '1x50'], ['2x47', '2x48', '2x49'], ['3x39', '3x40', '3x41'], ['4x40', '4x41', '4x42'], ['5x47', '5x48', '5x49'], ['6x47', '6x48', '6x49'], ['7x39', '7x40', '7x41']] # C
        ref_membrane_mid["006"] = [['1x42', '1x43', '1x44'], ['2x52', '2x53', '2x54'], ['3x37', '3x38', '3x39'], ['4x52', '4x53', '4x54'], ['5x52', '5x53', '5x54'], ['6x42', '6x43', '6x44'], ['7x46', '7x47', '7x48']] # F

        membrane_mid = ref_membrane_mid[data['gpcr_class']]

        if data['gpcr_class'] != "001":
            inv_gn_dictionary = {v: k for k, v in gn_dictionary.items()}
            for index in range(len(membrane_mid)):
                membrane_mid[index] = [inv_gn_dictionary[res] for res in membrane_mid[index]]

        for i in range(0,7):
            gns[i] = [x for x in membrane_mid[i] if x in conserved]
            tm_only = [x for x in conserved if x[0]==str(i+1)]
            if i % 2 == 1: #all uneven TMs (as # = i+1)
                tm_only.reverse()
            if len(gns[i]) > 0:
                if i % 2 == 1: #all uneven TMs (as # = i+1)
                    start_pos = tm_only.index(gns[i][-1])
                else:
                    start_pos = tm_only.index(gns[i][0])

                gns[i] = tm_only[start_pos:(start_pos+3)]

                # Stay close for this as references
                #middle_gpcr[i] = tm_only[(start_pos+6):(start_pos+9)]
                for upwards in range(6, 3, -1):
                   if len(tm_only) >= (start_pos+upwards):
                       middle_gpcr[i] = tm_only[(start_pos+upwards-3):(start_pos+upwards)]
                       continue
            else:
                if len(tm_only) < 6:
                    print("too few residues")
                    return []
                else:
                    #print("Refind",i, gns[i])
                    gns[i] = tm_only[0:3]
                    middle_gpcr[i] = tm_only[3:6]

                    # for upwards in range(15, 6, -1):
                    #     if len(tm_only) >= upwards:
                    #         middle_gpcr[i] = tm_only[(upwards-3):upwards]

    # Merge the reference and the helper points
    gns_flat = [y for x in gns for y in x]
    middle_gpcr = [list(filter(lambda x: x in conserved and x not in gns_flat, tm_list)) for tm_list in middle_gpcr]
    # print(gns)
    # print(middle_gpcr)

    ends_and_middle = gns[:]
    ends_and_middle.extend(middle_gpcr)
    ends_and_middle_flat = [y for x in ends_and_middle for y in x]
    ends_and_middle_grouping = [x for x in range(0, len(ends_and_middle)) for y in ends_and_middle[x]]
    segment_order = [int(ends_and_middle[x][0][0])-1 for x in range(0, len(ends_and_middle))]

    distances_set1.filter_gns.extend([y for x in ends_and_middle for y in x])
    distances_set2.filter_gns = distances_set1.filter_gns
    distances_set1.fetch_distances_tm(distance_type = "HC")
    distances_set2.fetch_distances_tm(distance_type = "HC")


    membrane_data1 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)]
    membrane_data2 = [x[:] for x in [[0] * len(ends_and_middle_flat)] * len(ends_and_middle_flat)]
    for i in range(0,len(ends_and_middle_flat)-1):
        for j in range(i+1, len(ends_and_middle_flat)):
            if right_gn_order(ends_and_middle_flat[i], ends_and_middle_flat[j]):
                filter_key = ends_and_middle_flat[i] + "_" + ends_and_middle_flat[j]
            else:
                filter_key = ends_and_middle_flat[j] + "_" + ends_and_middle_flat[i]

            if ends_and_middle_flat[i] != ends_and_middle_flat[j]:
                membrane_data1[i][j] = sum(distances_set1.data[filter_key])/len(pdbs1)
                membrane_data1[j][i] = membrane_data1[i][j]
                membrane_data2[i][j] = sum(distances_set2.data[filter_key])/len(pdbs2)
                membrane_data2[j][i] = membrane_data2[i][j]

    # Identify most stable TMs by ranking the variations to all other helices
    membrane_data1 = np.array([np.array(x) for x in membrane_data1])
    membrane_data2 = np.array([np.array(x) for x in membrane_data2])
    diff_distances = [x[:] for x in [[0] * len(ends_and_middle)] * len(ends_and_middle)]
    for i in range(0,max(ends_and_middle_grouping)):
        for j in range(i+1, max(ends_and_middle_grouping)+1):
            # Calculate movements for each TM relative to their "normal" distance
            # selected residues for group 1 and 2
            group_1 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i]
            group_2 = [x for x in range(0,len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == j]

            diff_distances[i][j] = np.sum(abs(membrane_data1[group_1][:, group_2] - membrane_data2[group_1][:, group_2]))/(np.sum(membrane_data1[group_1][:, group_2]+membrane_data2[group_1][:, group_2])/2)*100
            diff_distances[j][i] = diff_distances[i][j]

    # Ranking for each TM
    sum_differences = [sum(x) for x in diff_distances]
    # normalized_differences = [((sum_differences[i]-min(sum_differences[0:7]))/(max(sum_differences[0:7])-min(sum_differences[0:7])))**2 for i in range(0,7)]
    for i in range(0,7):
        diff_distances[i] = [sorted(diff_distances[i]).index(x) for x in diff_distances[i]]
    final_rank = [sum([diff_distances[j][i] for j in range(0,7)]) for i in range(0,7)]

    # Grab stable TMs
    tm_ranking = [0] * 7
    sorted_rank = sorted(final_rank)
    for i in range(0,7):
        tm_ranking[i] = final_rank.index(sorted_rank[i])
        final_rank[tm_ranking[i]] = 100 # make sure this TM isn't repeated

    # Calculate 3D coordinates from distance matrix
    tms_centroids_set1, tms_set1 = recreate3Dorder(membrane_data1, ends_and_middle_grouping)
    tms_centroids_set2, tms_set2 = recreate3Dorder(membrane_data2, ends_and_middle_grouping)

    # Align 3D points of set2 with 3D points of set1 using the most stable reference points
    best_rmsd = 1000
    best_set = []
    # Disabled the testing RMSD for now
    for comb in combinations(tm_ranking[:3], 3):
    #for comb in combinations(tm_ranking[:4], 3):
        sel_refs = [x for x in range(0,len(segment_order)) if segment_order[x] in comb]
        #print(sel_refs)

        tms_reference_set1 = np.array(tms_centroids_set1[sel_refs], copy = True)
        tms_reference_set2 = np.array(tms_centroids_set2[sel_refs], copy = True)

        imposer = SVDSuperimposer()
        imposer.set(tms_reference_set1, tms_reference_set2)
        imposer.run()
        rot, trans = imposer.get_rotran()
        rmsd = imposer.get_rms()

        print("RMSD", round(rmsd,2), tm_ranking)
        if rmsd < best_rmsd:
            best_set = comb
            best_rmsd = rmsd

    # Check for possible mirroring error
    test_set2 = np.dot(tms_centroids_set2, rot) + trans
    error = 0
    for i in tm_ranking[3:7]:
        if np.linalg.norm(test_set2[i] - tms_centroids_set1[i]) > 5:
            error += 1

    #if rmsd > 2:
    #if error >= 3 or rmsd > 2:
    if True:
        for i in range(0,len(tms_centroids_set2)):
            tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1

        # Align 3D points of set2 with 3D points of set1 using the most stable reference points
        tms_reference_set1 = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]]
        tms_reference_set2 = tms_centroids_set2[[x for x in range(0,len(segment_order)) if segment_order[x] in tm_ranking[0:3]]]

        imposer = SVDSuperimposer()
        imposer.set(tms_reference_set1, tms_reference_set2)
        imposer.run()
        new_rot, new_trans = imposer.get_rotran()
        new_rmsd = imposer.get_rms()
        print("RMSD2", round(new_rmsd,2))

        if new_rmsd < rmsd:
            rot = new_rot
            trans = new_trans
            rmsd = new_rmsd
        else:
            for i in range(0,len(tms_centroids_set2)):
                tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1

    # test_set2 = np.dot(tms_reference_set2, rot) + trans
    # for i in range(0,len(test_set2)):
    #     print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_reference_set1[i]]), "]")
    # for i in range(0,len(test_set2)):
    #     print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in test_set2[i]]), "]")
    #
    # print("############")
    # #test_set2 = np.dot(tms_centroids_set2, rot) + trans
    # test_set2 = np.array(tms_centroids_set2, copy = True)
    # for i in range(0,len(tms_centroids_set1)):
    #     print("pseudoatom s1_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set1[i]]), "]")
    # for i in range(0,len(tms_centroids_set2)):
    #     print("pseudoatom s2_tm" + str(i+1), ", pos=[", ','.join([str(x) for x in tms_centroids_set2[i]]), "]")

    # if rmsd > 2:
    #     for i in range(0,len(tms_centroids_set2)):
    #         tms_centroids_set2[i][2] = tms_centroids_set2[i][2]*-1
    #     # Huge error during alignment of "stable" helices, just use the references not the helper points
    #     tms_reference_set1 = tms_centroids_set1[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]]
    #     tms_reference_set2 = tms_centroids_set2[[x for x in range(0,7) if segment_order[x] in tm_ranking[0:4]]]
    #     imposer = SVDSuperimposer()
    #     imposer.set(tms_reference_set1, tms_reference_set2)
    #     imposer.run()
    #     rot, trans = imposer.get_rotran()
    #     rmsd = imposer.get_rms()
    #     print("RMSD3", round(rmsd,2))
    #

    tms_centroids_set2 = np.dot(tms_centroids_set2, rot) + trans
    tms_set2 = np.dot(tms_set2, rot) + trans

    # Calculate optimal plane through points in both sets and convert to 2D
    # Try normal based on TM7
    # tm7_centroids = tms_centroids_set1[[x for x in range(0,len(segment_order)) if segment_order[x] == 6]]
    # if len(tm7_centroids) == 2:
    #     normal = (tm7_centroids[1] - tm7_centroids[0])/np.linalg.norm(tm7_centroids[1] - tm7_centroids[0])
    # else:
    #     # Using TM mid as reference plane
    #     normal, midpoint = calculatePlane(np.concatenate((tms_centroids_set1[7:], tms_centroids_set2[7:])), intracellular)

    # Alternative: use center of helical ends and center of helical middle
    #    normal = tms_centroids_set1[:7].mean(axis=0)  - tms_centroids_set1[7:].mean(axis=0)
    #    normal = normal/np.linalg.norm(normal)

    # 7TM references
    tm_centroids = {y:[] for y in range(0,7)}
    [tm_centroids[y].append(tms_centroids_set1[x]) for y in range(0,7) for x in range(0,len(segment_order)) if segment_order[x] == y]
    count = 0
    normal = np.array([0.0,0.0,0.0])
    for y in range(0,7):
        #if len(tm_centroids[y]) == 2 and (mode != 1 or y != 5):
        if len(tm_centroids[y]) == 2:
            normal += np.array((tm_centroids[y][1] - tm_centroids[y][0])/np.linalg.norm(tm_centroids[y][1] - tm_centroids[y][0]))
            count += 1
    normal = normal/count

    midpoint = tms_centroids_set1[:7].mean(axis=0)

    #plane_set1, z_set1 = convert3D_to_2D_plane(tms_centroids_set1[:7], intracellular, normal, midpoint)
    #plane_set2, z_set2 = convert3D_to_2D_plane(tms_centroids_set2[:7], intracellular, normal, midpoint)
    plane_set, z_set = convert3D_to_2D_plane(np.concatenate((tms_centroids_set1[:7], tms_centroids_set2[:7]), axis = 0), intracellular, normal, midpoint)
    plane_set1 = plane_set[:7]
    plane_set2 = plane_set[7:]
    z_set1 = z_set[:7]
    z_set2 = z_set[7:]

    # DO NOT REMOVE: possibly we want to upgrade to weighted superposing
    # Based on Biopython SVDSuperimposer
    # coords = tms_centroids_set2
    # reference_coords = tms_centroids_set1

    # OLD centroid calcalation
    # av1 = sum(coords) / len(coords)
    # av2 = sum(reference_coords) / len(reference_coords)

    # NEW weighted centroid calculation
    # print(normalized_differences)
    # av1, av2 = 0, 0
    # totalweight = 0
    # for i in range(0,7):
    #     # print("Round",i)
    #     #weight = 1+(7-tm_ranking.index(i))/7
    #     weight = (1-normalized_differences[i]+0.1)/1.1
    #     totalweight += weight
    #     print("TM", str(i+1), "weight",weight)
    #     av1 += coords[i]*weight
    #     av2 += reference_coords[i]*weight
    #
    # av1 = av1/totalweight
    # av2 = av2/totalweight
    #
    # coords = coords - av1
    # reference_coords = reference_coords - av2
    #
    # # correlation matrix
    # a = np.dot(np.transpose(coords), reference_coords)
    # u, d, vt = np.linalg.svd(a)
    # rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u)))
    # # check if we have found a reflection
    # if np.linalg.det(rot) < 0:
    #     vt[2] = -vt[2]
    #     rot = np.transpose(np.dot(np.transpose(vt), np.transpose(u)))
    # trans = av2 - np.dot(av1, rot)
    # rot, trans = imposer.get_rotran()
    # tms_set2 = np.dot(tms_set2, rot) + trans

    # CURRENT: Ca-angle to axis core
    rotations = [0] * 7
    for i in range(0,7):
        try:
            # rotations[i] = [data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1] if abs(data['tab4'][gn_dictionary[x]]['angles_set1'][1]-data['tab4'][gn_dictionary[x]]['angles_set2'][1]) < 180 else -1*data['tab4'][gn_dictionary[x]]['angles_set2'][1]-data['tab4'][gn_dictionary[x]]['angles_set1'][1] for x in gns[i]]
            angles1 = [data['tab4'][gn_dictionary[x]]['angles_set1'][11] for x in gns[i]]
            angles1 = [angle if angle > 0 else angle + 360 for angle in angles1 ]
            angles2 = [data['tab4'][gn_dictionary[x]]['angles_set2'][11] for x in gns[i]]
            angles2 = [angle if angle > 0 else angle + 360 for angle in angles2 ]

            rotations[i] = [angles1[x] - angles2[x] for x in range(3)]
            rotations[i] = [value if abs(value) <= 180 else value-360 if value > 0 else value+360 for value in rotations[i]]

            # count=0
            # for x in gns[i]:
            #     print(i, x, data['tab4'][gn_dictionary[x]]['angles_set1'][11], data['tab4'][gn_dictionary[x]]['angles_set2'][11], rotations[i][count])
            #     count += 1

        except:
            rotations[i] = [0.0, 0.0, 0.0]  # TODO: verify other class B errors

        # UPDATE 20-02-2020 No mirroring but top-down through GPCR
        rotations[i] = sum(rotations[i])/3
        # if intracellular:
        #     rotations[i] = -1*sum(rotations[i])/3
        # else:
        #     rotations[i] = sum(rotations[i])/3


    # ALTERNATIVE: utilize TM tip alignment (needs debugging as some angles seem off, e.g. GLP-1 active vs inactive TM2)
    # Add rotation angle based on TM point placement
    # tms_2d_set1, junk = convert3D_to_2D_plane(tms_set1, intracellular, normal, midpoint)
    # tms_2d_set2, junk = convert3D_to_2D_plane(tms_set2, intracellular, normal, midpoint)

    # rotations = [0] * 7
    # for i in range(0,7):
    #     positions = [x for x in range(0, len(ends_and_middle_grouping)) if ends_and_middle_grouping[x] == i]
    #     turn_set1 = tms_2d_set1[positions]
    #     turn_set2 = tms_2d_set2[positions]
    #
    #     # set to middle
    #     turn_set1 = turn_set1 - turn_set1.mean(axis=0)
    #     turn_set2 = turn_set2 - turn_set2.mean(axis=0)
    #
    #     # Calculate shift per residue and take average for this TM
    #     for j in range(0,len(turn_set1)):
    #         v1 = turn_set1[j]/np.linalg.norm(turn_set1[j])
    #         v2 = turn_set2[j]/np.linalg.norm(turn_set2[j])
    #         angle = np.degrees(np.arctan2(v2[1], v2[0]) - np.arctan2(v1[1],v1[0]))
    #
    #         if abs(angle) > 180:
    #             angle = 360 - abs(angle)
    #
    #         rotations[i] += angle/len(turn_set1)

    # TODO: check z-coordinates orientation
    # Step 1: collect movement relative to membrane mid
    # Step 2: find min and max TM
    # Step 3: check if orientation of min/max TM matches the z-scales + intra/extra - if not invert z-coordinates
    labeled_set1 = [{"label": "TM"+str(i+1), "x": float(plane_set1[i][0]), "y": float(plane_set1[i][1]), "z": float(z_set1[i]), "rotation" : 0} for i in range(0,7)]
    labeled_set2 = [{"label": "TM"+str(i+1), "x": float(plane_set2[i][0]), "y": float(plane_set2[i][1]), "z": float(z_set2[i]), "rotation" : rotations[i]} for i in range(0,7)]

    # Convert used GNs to right numbering
    gns_used = gns[:]
    for i in range(0,len(gns)):
        for j in range(0,len(gns[i])):
            gns_used[i][j] = gn_dictionary[gns[i][j]]
    return {"coordinates_set1" : labeled_set1, "coordinates_set2": labeled_set2, "gns_used": gns_used}
Beispiel #28
0
class ResidueMutator(object):
    def __init__(self,
                 tripeptides=None,
                 components=None,
                 standard_residues=None):
        """ The mutator object takes a non-standard residue or incomplete residue and modifies it
        """
        try:
            from Bio.PDB import PDBParser
            from Bio.SVDSuperimposer import SVDSuperimposer
        except ModuleNotFoundError:
            raise ModuleNotFoundError(
                "BioPython is required for this functionality")

        # get defaults if not provided
        if standard_residues is None:
            standard_residues = data.standard_residues
        if tripeptides is None:
            tripeptides = data.tripeptides
        if components is None:
            components = data.chem_components
        self.components = components
        self.candidates = {}
        self.standard_residues = standard_residues
        self.imposer = SVDSuperimposer()
        self.parser = PDBParser(PERMISSIVE=1, QUIET=True)

        # build up candidate structures
        for fn in tripeptides:
            structure = self.parser.get_structure("", fn)
            resn = structure[0][" "][2].get_resname()
            self.candidates[resn] = []
            for model in structure:
                self.candidates[resn].append(model[" "][2])

    def mutate(self, residue, replace_backbone=True):
        resn = residue.get_resname()

        if self.standard(resn):
            # the residue is already a standard residue, here for repair
            parn = resn
        else:
            parn = self.components[resn]['_chem_comp.mon_nstd_parent_comp_id']
            if not self.standard(parn):
                # the parent residue is a nonstandard residue, can't mutate
                return False

        if parn not in self.candidates:
            # parent not in candidate structures
            return False

        sc_fixed = set(
            self.components[resn]
            ['side_chain_atoms'])  # side chain atoms of fixed residue
        sc_movin = set(
            self.components[parn]
            ['side_chain_atoms'])  # side chain atoms of standard parent
        atom_names = sc_fixed.intersection(sc_movin)

        # get list of side chain atoms present in residue
        atom_list = []
        for atom in atom_names:
            if atom in residue:
                atom_list.append(atom)

        if len(atom_list) == 0:
            return False

        # get side chain atom coordinates
        fixed_coord = np.zeros((len(atom_list), 3))
        for i in range(len(atom_list)):
            fixed_coord[i] = residue[atom_list[i]].get_coord()

        # loop over candidates, finding best RMSD
        moved_coord = np.zeros((len(atom_list), 3))
        min_rms = 99999
        rotm = None
        tran = None
        min_candidate = None
        for candidate in self.candidates[parn]:
            for j in range(len(atom_list)):
                moved_coord[j] = candidate[atom_list[j]].get_coord()
            # perfom SVD fitting
            self.imposer.set(fixed_coord, moved_coord)
            self.imposer.run()
            if self.imposer.get_rms() < min_rms:
                min_rms = self.imposer.get_rms()
                rotm, tran = self.imposer.get_rotran()
                min_candidate = candidate

        # copy the candidate to a new object
        candidate = min_candidate.copy()
        candidate.transform(rotm, tran)
        stripHydrogens(candidate)

        if replace_backbone:
            # replace backbone atoms of candidate
            backbone_atoms = self.components[resn]['main_chain_atoms']
            for atom in backbone_atoms:
                if atom not in residue:
                    continue
                if atom not in candidate:
                    candidate.add(residue[atom].copy())
                candidate[atom].set_coord(residue[atom].get_coord())

        return candidate

    def standard(self, resname):
        return resname in self.standard_residues

    def modified(self, resname):
        if resname in self.standard_residues:
            # it's standard, not modified
            return False

        if resname in self.components and '_chem_comp.mon_nstd_parent_comp_id' in self.components[
                resname]:
            return (
                (resname not in self.standard_residues) and
                (self.components[resname]['_chem_comp.mon_nstd_parent_comp_id']
                 in self.standard_residues))
        else:
            # has no standard parent field - can't be modified
            return False
Beispiel #29
0
def run_system(dir):

    pdb = os.path.basename(dir).split('_')[0]
    
    org_dir = os.getcwd()
    os.chdir(dir)

    f_coord = "coord.h5"
    f_RMSD  = "RMSD.txt"
    f_OC = os.path.join("..","..","cmap_coordinates",pdb+'.txt')

    if not os.path.exists(f_OC):
        print "Missing coordinates for cmap", dir
        os.chdir(org_dir)
        return dir
    
    if not os.path.exists(f_coord):
        print "Missing coordinates, extract_coordinates.py first", dir
        os.chdir(org_dir)
        return dir

    if os.path.exists(f_RMSD) and not _FORCE:
        print "RMSD file exists, skipping", dir
        os.chdir(org_dir)
        return dir
    
    h5 = h5py.File(f_coord,'r')
    C = h5["coord"][:]
    h5.close()
    OC = np.loadtxt(f_OC)

    # Move the coordinates to something sensible
    #C  -= C.mean(axis=0)
    #OC -= OC.mean(axis=0)

    median_OC = np.median([np.linalg.norm(a-b)
                           for a,b in zip(OC,OC[1:])])
    median_C  = np.median([np.linalg.norm(a-b)
                           for a,b in zip(C[-1],C[-1][1:])])

    assert(C[0].shape == OC.shape)
    RMSD = []
    org_RMSD = []

    sup = SVDSuperimposer()

    RG = []
    OC -= OC.mean(axis=0)
    OC_RG = ((np.linalg.norm(OC,axis=1)**2).sum()/len(OC)) ** 0.5

    for cx in C:
        cx -= cx.mean(axis=0)

        rg_cx = ((np.linalg.norm(cx,axis=1)**2).sum()/len(cx)) ** 0.5
        RG.append(rg_cx)
        
        sup.set(OC,cx)
        sup.run()
        RMSD.append(sup.get_rms())
        org_RMSD.append(sup.get_init_rms())


    rot, tran = sup.get_rotran()
    cx = np.dot(cx, rot) + tran

    RMSD = np.array(RMSD)
    org_RMSD = np.array(org_RMSD)
    RG = np.array(RG)
    
    #print dir, RMSD[-20:].mean(), org_RMSD[-20:].mean(),RG[-20:].mean()
    print "{} {: 0.4f} {: 0.4f}".format(dir, RMSD[-200:].mean(),
                                      RG[-200:].mean() / OC_RG)
    

    '''
    from mpl_toolkits.mplot3d import Axes3D
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    ax.scatter(OC[:,0],OC[:,1],OC[:,2],'b')
    #ax.plot(OC[:,0],OC[:,1],OC[:,2],'k',alpha=0.5)

    ax.scatter(cx[:,0],cx[:,1],cx[:,2],color='r')
    #ax.plot(cx[:,0],cx[:,1],cx[:,2],'k',alpha=0.5)
    plt.show()
    exit()

    print OC
    #exit()
    '''
    
    np.savetxt(f_RMSD,RMSD)
    os.chdir(org_dir)

    return dir
def compute_mean (reader, align_conf, num_confs, start = None, stop = None):
    """
        Computes the mean structure of a trajectory

        Structured to work with the multiprocessing process from UTILS/parallelize.py

        Parameters:
            reader (readers.LorenzoReader2): An active reader on the trajectory file to take the mean of.
            align_conf (numpy.array): The position of each particle in the reference configuration.  A 3xN array.
            num_confs (int): The number of configurations in the reader.  
            <optional> start (int): The starting configuration ID to begin averaging at.  Used if parallel.
            <optional> stop (int): The configuration ID on which to end the averaging.  Used if parallel.
        
        Returns:
            mean_pos_storage (numpy.array): For each particle, the sum of positions in all configurations read.
            mean_a1_storage (numpy.array): For each particle, the sum of a1 orientation vectors in all configuraitons read.
            mean_a3_storage (numpy.array): For each particle, the sum of a3 orientation vectors in all configuraitons read.
            intermediate_mean_structures (list): mean structures computed periodically during the summing to check decoorrelation.
            confid (int): the number of configurations summed for the storage arrays.
    """
    if stop is None:
        stop = num_confs
    else: stop = int(stop)
    if start is None:
        start = 0
    else: start = int(start)

    mysystem = reader._get_system(N_skip = start)

    # storage for the intermediate mean structures
    intermediate_mean_structures = []
    # the class doing the alignment of 2 structures
    sup = SVDSuperimposer()

    mean_pos_storage = np.array([np.zeros(3) for _ in range(n_nuc)])
    mean_a1_storage  = np.array([np.zeros(3) for _ in range(n_nuc)])
    mean_a3_storage  = np.array([np.zeros(3) for _ in range(n_nuc)])

    # for every conf in the current trajectory we calculate the global mean
    confid = 0

    while mysystem != False and confid < stop:
        mysystem.inbox()
        cur_conf_pos = fetch_np(mysystem)
        indexed_cur_conf_pos = indexed_fetch_np(mysystem)
        cur_conf_a1 =  fetch_a1(mysystem)
        cur_conf_a3 =  fetch_a3(mysystem)

        # calculate alignment
        sup.set(align_conf, indexed_cur_conf_pos)
        sup.run()
        rot, tran = sup.get_rotran()

        cur_conf_pos = np.einsum('ij, ki -> kj', rot, cur_conf_pos) + tran
        cur_conf_a1 = np.einsum('ij, ki -> kj', rot, cur_conf_a1)
        cur_conf_a3 = np.einsum('ij, ki -> kj', rot, cur_conf_a3)
        mean_pos_storage += cur_conf_pos
        mean_a1_storage += cur_conf_a1
        mean_a3_storage += cur_conf_a3

        # print the rmsd of the alignment in case anyone is interested...
        print("Frame:", confid, "Time:", mysystem._time, "RMSF:", sup.get_rms())
        # thats all we do for a frame
        confid += 1
        mysystem = reader._get_system()

        # We produce 10 intermediate means to check decorrelation.
        # This can't be done neatly in parallel
        if not parallel and confid % INTERMEDIATE_EVERY == 0:
            mp = np.copy(mean_pos_storage)
            mp /= confid
            intermediate_mean_structures.append(
                prep_pos_for_json(mp)
            )
            print("INFO: Calculated intermediate mean for {} ".format(confid))

    return(mean_pos_storage, mean_a1_storage, mean_a3_storage, intermediate_mean_structures, confid)
Beispiel #31
0
def assemble_multiscale_visualization(topology_fn, rmf_fn, pdb_dir,
                                      outprefix=None, chimerax=True,
                                      xl_fn=None):
    """
    Render multiscale versions of rigid bodies from PDB files + flexible
    beads from RMF files w/o mapped crosslinks.
    
    Args: 
    topology_fn (str): Topolgy file in pipe-separated-value (PSV) format
    as required in integrative modeling using IMP. For details on how
    to write a topology file, see:
    https://integrativemodeling.org/2.13.0/doc/ref/classIMP_1_1pmi_1_1topology_1_1TopologyReader.html
        
    rmf_fn (str): Name of the RMF file.
    
    pdb_dir (str): Directory containing all the PDB files for the rigid
    bodies used in modeling.
    
    outprefix (str, optional): Prefix for output files. Defaults to None.
    
    chimerax (bool, optional): If true, a Chimerax script will be written (extension ".cxc"). Defaults to True.
    
    xl_fn (str, optional): A file containing a XL dataset. Defaults to None.
    If this dataset is supplied, then it will be mapped on to the overall 
    structure with satisfied XLs drawn in blue and violated XLs drawn in red.
    A XL dataset should be supplied in a comma-separated-value (CSV) format
    containing at least the following fields
    
    protein1, residue1, protein2, residue2, sat
    
    where the last field <sat> is a boolean 1 or 0 depending on whether
    the particular XL is satisfied (in the ensemble sense) as a result of the
    integrative modeling exercise.
    """
    
    # -------------------------------------------
    # read the RMF file and extract all particles
    # -------------------------------------------
    of = RMF.open_rmf_file_read_only(rmf_fn)
    rmf_model = IMP.Model()
    hier = IMP.rmf.create_hierarchies(of, rmf_model)[0]
    IMP.rmf.load_frame(of, 0)
    particles = IMP.core.get_leaves(hier)
    rmf_ps = {}
    for p in particles:
        molname = p.get_parent().get_parent().get_parent().get_name().strip()
        name = p.get_name().strip()
        coord = IMP.core.XYZ(p).get_coordinates()
        rmf_ps[(molname, name)] = coord
        
    # --------------------------------------------------------------
    # map pdb residues to rmf particles for each rigid body pdb file
    # --------------------------------------------------------------
    # read the topology file
    t = TopologyReader(topology_fn, pdb_dir=pdb_dir)
    components = t.get_components()

    map_pdb2rmf = {}
    rigid_body_models = {}
    rigid_body_residues = {}
    chain_ids = {} # these are matched to the chimerax rmf plugin
    chain_id_count = 0
    for c in components:
        # ignore unstructured residues
        if c.pdb_file == "BEADS": continue
        mol = c.molname
        pdb_prefix = os.path.basename(c.pdb_file).split(".pdb")[0]
        chain_id = c.chain
        resrange = c.residue_range
        offset = c.pdb_offset
        
        r0 = resrange[0] + offset
        r1 = resrange[1] + 1 + offset
        
        if mol not in chain_ids:
            chain_ids[mol] = string.ascii_uppercase[chain_id_count]
            chain_id_count += 1
        
        if pdb_prefix not in map_pdb2rmf:
            map_pdb2rmf[pdb_prefix] = {}
            this_rigid_body_model = PDBParser().get_structure("x", c.pdb_file)[0]
            this_rigid_body_residues = {(r.full_id[2], r.id[1]): r for r in this_rigid_body_model.get_residues()}
            rigid_body_models[pdb_prefix] = this_rigid_body_model
            rigid_body_residues[pdb_prefix] = this_rigid_body_residues
            
        for r in range(r0, r1):
            key = (chain_id, r)
            val = (mol, r)
            if key in rigid_body_residues[pdb_prefix]:
                map_pdb2rmf[pdb_prefix][key] = val
    
    # --------------------------------
    # align all pdb files with the rmf
    # --------------------------------
    print("\nAligning all rigid body structures...")
    align = SVDSuperimposer()
    for pdb_prefix, mapper in map_pdb2rmf.items():
        pdb_coords = []
        pdb_atoms = []
        rmf_coords = []
        
        residues = rigid_body_residues[pdb_prefix]
        for (chain, pdb_res), (mol, rmf_res) in mapper.items():
            r = residues[(chain, pdb_res)]
            pdb_coords.append(r["CA"].coord)
            pdb_atoms.extend([a for a in r.get_atoms()])
            rmf_coords.append(rmf_ps[(mol, str(rmf_res))])        
                 
        pdb_coords = np.array(pdb_coords)
        rmf_coords = np.array(rmf_coords)
        align.set(rmf_coords, pdb_coords)
        align.run()
        rotmat, vec = align.get_rotran()
        [a.transform(rotmat, vec) for a in pdb_atoms]
  
    # --------------------------
    # assemble the composite pdb
    # --------------------------
    mols = set(sorted([c.molname for c in components]))
    print("\nChain IDs by molecule:")
    for k, v in chain_ids.items():
        print("molecule %s, chain ID %s" % (k, v))
    
    reslists = {mol: [] for mol in mols}
    for pdb_prefix, mapper in map_pdb2rmf.items():
        residues = rigid_body_residues[pdb_prefix]
        for (chain, pdb_res), (mol, rmf_res) in mapper.items():
            r = residues[(chain, pdb_res)] ; resid = rmf_res
            new_id = (r.id[0], resid, r.id[2])
            new_resname = r.resname
            new_segid = r.segid
            new_atoms = r.get_atoms()
            new_residue = Residue.Residue(id=new_id, resname=new_resname, segid=new_segid)
            [new_residue.add(a) for a in new_atoms]
            reslists[mol].append(new_residue)
    
    composite_model = Model.Model(0)
    for mol, chain_id in chain_ids.items():
        this_residues = sorted(reslists[mol], key=lambda r: r.id[1])
        this_chain = Chain.Chain(chain_id)
        [this_chain.add(r) for r in this_residues]
        composite_model.add(this_chain)
    
    # save the composite pdb to file
    io = PDBIO()
    io.set_structure(composite_model)
    if outprefix is None:
        outprefix = "centroid_model"
    io.save(outprefix + ".pdb")

    # -------------------------------------------------------------------
    # chimerax rendering (hide most of the rmf except unstructured beads)
    # -------------------------------------------------------------------
    if not chimerax: exit()
    print("\nWriting UCSF Chimerax script...")
    s = ""
    s += "open %s\n" % (outprefix + ".pdb")
    s += "open %s\n" % rmf_fn
    s += "hide\n"
    s += "show cartoon\n"
    s += "color #%d %s\n" % (CHIMERAX_PDB_MODEL_NUM, STRUCT_COLOR)
    s += "color #%d %s\n" % (CHIMERAX_RMF_MODEL_NUM, UNSTRUCT_COLOR)
    s += "hide #%d\n" % CHIMERAX_RMF_MODEL_NUM
    
    struct_residues = []
    for key, val in map_pdb2rmf.items():
        struct_residues.extend(list(val.values()))
    
    unstruct_atomspec = {}
    for p in rmf_ps:
        molname, particle_name = p
        rmf_chain_id = chain_ids[molname]
        if "bead" in particle_name:
            r0, r1 = particle_name.split("_")[0].split("-")
            r0 = int(r0) ; r1 = int(r1)
            this_atomspec = "#%d/%s:%d-%d" % \
                            (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r0, r1)
            for r in range(r0, r1+1):
                unstruct_atomspec[(molname, r)] = this_atomspec
        else:
            if (molname, int(particle_name)) not in struct_residues:
                r = int(particle_name)
                this_atomspec = "#%d/%s:%d" % \
                (CHIMERAX_RMF_MODEL_NUM, rmf_chain_id, r)
                unstruct_atomspec[(molname, r)] = this_atomspec
                
    s += "show %s\n" % (" ".join(set(unstruct_atomspec.values())))

    # ----------------------------------------------------------
    # if crosslink data is supplied, write out a pseudobond file
    # ----------------------------------------------------------
    if xl_fn is not None:
        # parse XL data
        df = pd.read_csv(os.path.abspath(xl_fn))
        xls = []
        for i in range(len(df)):
            this_df = df.iloc[i]
            p1 = this_df["protein1"] ; r1 = this_df["residue1"]
            p2 = this_df["protein2"] ; r2 = this_df["residue2"]
            sat = this_df["sat"]
            xls.append((p1, r1, p2, r2, sat))
        
        # get lists of struct atomspecs
        atomspec = {}
        for (mol, particle_name) in rmf_ps:
            if "bead" in particle_name: continue
            if (mol, int(particle_name)) in unstruct_atomspec: continue
            chain_id = chain_ids[mol]
            resid = int(particle_name)
            atomspec[(mol, resid)] = "#%d/%s:%d@CA" % \
                                     (CHIMERAX_PDB_MODEL_NUM, chain_id, resid)
        
        # now add in all the unstruct atomspecs
        atomspec.update(unstruct_atomspec)

        # write pseudobond script
        s_pb = ""
        s_pb += "; radius = %2.2f\n" % XL_RADIUS
        s_pb += "; dashes = 0\n"
        for xl in xls:
            p1, r1, p2, r2, sat = xl
            atomspec_1 = atomspec[(p1, r1)]
            atomspec_2 = atomspec[(p2, r2)]
            if atomspec_1 == atomspec_2:
                continue
            color = SAT_XL_COLOR if sat else VIOL_XL_COLOR
            s_pb += "%s %s %s\n" % (atomspec_1, atomspec_2, color)
        s_pb += "\n"
        pb_fn = outprefix + "_XLs.pb"
        with open(pb_fn, "w") as of:
            of.write(s_pb)        
        s += "open %s\n" % pb_fn
            
    s += "preset 'overall look' publication\n"
    chimerax_out_fn = outprefix + ".cxc"
    with open(chimerax_out_fn, "w") as of:
        of.write(s)
class SVDSuperimposerTest(unittest.TestCase):

    def setUp(self):
        self.x = array([[51.65, -1.90, 50.07],
                        [50.40, -1.23, 50.65],
                        [50.68, -0.04, 51.54],
                        [50.22, -0.02, 52.85]])

        self.y = array([[51.30, -2.99, 46.54],
                        [51.09, -1.88, 47.58],
                        [52.36, -1.20, 48.03],
                        [52.71, -1.18, 49.38]])

        self.sup = SVDSuperimposer()
        self.sup.set(self.x, self.y)

    def test_get_init_rms(self):
        x = array([[1.19, 1.28, 1.37],
                   [1.46, 1.55, 1.64],
                   [1.73, 1.82, 1.91]])
        y = array([[1.91, 1.82, 1.73],
                   [1.64, 1.55, 1.46],
                   [1.37, 1.28, 1.19]])
        self.sup.set(x, y)
        self.assertIsNone(self.sup.init_rms)
        init_rms = 0.8049844719
        self.assertTrue(
            float('%.3f' % self.sup.get_init_rms()), float('%.3f' % init_rms))

    def test_oldTest(self):
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3)))
        self.assertIsNone(self.sup.rot)
        self.assertIsNone(self.sup.tran)
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        self.sup.run()
        self.assertTrue(
            array_equal(around(self.sup.reference_coords, decimals=3), around(self.x, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.coords, decimals=3), around(self.y, decimals=3)))
        rot = array([[0.68304983, 0.53664371, 0.49543563],
                     [-0.52277295, 0.83293229, -0.18147242],
                     [-0.51005037, -0.13504564, 0.84947707]])
        tran = array([38.78608157, -20.65451334, -15.42227366])
        self.assertTrue(
            array_equal(around(self.sup.rot, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(self.sup.tran, decimals=3), around(tran, decimals=3)))
        self.assertIsNone(self.sup.rms)
        self.assertIsNone(self.sup.init_rms)

        rms = 0.00304266526014
        self.assertEqual(
            float('%.3f' % self.sup.get_rms()), float('%.3f' % rms))

        rot_get, tran_get = self.sup.get_rotran()
        self.assertTrue(
            array_equal(around(rot_get, decimals=3), around(rot, decimals=3)))
        self.assertTrue(
            array_equal(around(tran_get, decimals=3), around(tran, decimals=3)))

        y_on_x1 = dot(self.y, rot) + tran
        y_x_solution = array(
            [[5.16518846e+01, -1.90018270e+00, 5.00708397e+01],
             [5.03977138e+01, -1.22877050e+00, 5.06488200e+01],
             [5.06801788e+01, -4.16095666e-02, 5.15368866e+01],
             [5.02202228e+01, -1.94372374e-02, 5.28534537e+01]])
        self.assertTrue(
            array_equal(around(y_on_x1, decimals=3), around(y_x_solution, decimals=3)))

        y_on_x2 = self.sup.get_transformed()
        self.assertTrue(
            array_equal(around(y_on_x2, decimals=3), around(y_x_solution, decimals=3)))
Beispiel #33
0
class MetricsRMSD(object):
    def __init__(self):
        self.reference_coordinate = None
        self.model_coordinate = None
        self.sup = SVDSuperimposer()

    def clear(self):
        """
        clear reference_coordinate and model_coordinate
        """
        self.reference_coordinate = None
        self.model_coordinate = None

    def set(self, reference_atoms, model_atoms):
        """
        set reference_coordinate and model_coordinate

        :param reference_atoms:
        :param model_atoms:
        """
        reference_coordinate = to_vector_list(reference_atoms)
        model_coordinate = to_vector_list(model_atoms)

        if not len(reference_coordinate) == len(model_coordinate):
            raise ValueError(
                "Reference coordinate and coordinate moving atom lists differ in size"
            )
        elif len(reference_coordinate) == len(model_coordinate) and len(
                reference_coordinate) == 0:
            raise ValueError(
                "Reference coordinate and coordinate moving atom are empty")

        self.clear()
        self.reference_coordinate = reference_coordinate
        self.model_coordinate = model_coordinate

    def run_svd(self):
        """
        run superposition
        """
        self.sup.set(self.reference_coordinate, self.model_coordinate)
        self.sup.run()

    def pre_calculate_rms(self):
        rotation, translation = self.sup.get_rotran()

        transformed_coordinate = numpy.dot(self.model_coordinate,
                                           rotation) + translation

        diff = transformed_coordinate - self.reference_coordinate
        return diff

    def calculate_rms(self):
        """
        :return: return rmsd value
        """
        diff = self.pre_calculate_rms()
        return numpy.sqrt(sum(sum(diff * diff)) / len(self.model_coordinate))

    def calculate_mean(self):
        rms = self.calculate_rms()
        return rms * rms

    def calculate_std(self):
        diff = self.pre_calculate_rms()
        return numpy.std(diff)
Beispiel #34
0
def merge_cc(coords_list, res_overlap, n_cc_helices):

    ref_coords = coords_list[0]
    aligned_coords = [deepcopy(coords_list[0])]
    n_atoms_per_res = 5
    n_atoms_mono = int(ref_coords.shape[0] / n_cc_helices)
    msds = []
    for coords, cc_overlap in zip(coords_list[1:], res_overlap):

        n_atoms_overlap = cc_overlap * n_atoms_per_res

        for i in range(n_cc_helices):
            hi_ref = ref_coords[(i + 1) * n_atoms_mono -
                                n_atoms_overlap:(i + 1) * n_atoms_mono]
            if i == 0:
                ref_atoms = hi_ref
            else:
                ref_atoms = np.append(ref_atoms, hi_ref, axis=0)

        for i in range(n_cc_helices):
            hi = coords[i * n_atoms_mono:i * n_atoms_mono + n_atoms_overlap]
            if i == 0:
                sup_atoms = hi
            else:
                sup_atoms = np.append(sup_atoms, hi, axis=0)

        sup = SVDSuperimposer()
        sup.set(ref_atoms, sup_atoms)
        sup.run()
        msds.append(sup.get_rms()**2)
        rot, tran = sup.get_rotran()
        coord_new = np.dot(coords, rot) + tran
        aligned_coords.append(coord_new)
        ref_coords = coord_new

    rmsd = np.sqrt(np.sum(msds))

    hi_all = []
    for i in range(n_cc_helices):
        hi_all.append(aligned_coords[0][i * n_atoms_mono:(i + 1) *
                                        n_atoms_mono])

    for coords, cc_overlap in zip(aligned_coords[1:], res_overlap):
        hi = []
        for i in range(n_cc_helices):
            hi.append(coords[i * n_atoms_mono:(i + 1) * n_atoms_mono])

        n_atoms_overlap = cc_overlap * n_atoms_per_res
        for ind_overlap in range(cc_overlap):
            weight = (ind_overlap + 1) / float(cc_overlap + 1)
            for ind_atom in range(n_atoms_per_res):
                ind_shift = ind_overlap * n_atoms_per_res + ind_atom

                for i in range(n_cc_helices):
                    coordi_prev = hi_all[i][-n_atoms_overlap + ind_shift]
                    coordi_next = hi[i][ind_shift]
                    hi_all[i][-n_atoms_overlap + ind_shift] = (
                        1 - weight) * coordi_prev + weight * coordi_next

        for i in range(n_cc_helices):
            hi_rest = hi[i][n_atoms_overlap:]
            hi_all[i] = np.append(hi_all[i], hi_rest, axis=0)

    res_dimer = hi_all[0]
    for i in range(1, n_cc_helices):
        res_dimer = np.append(res_dimer, hi_all[i], axis=0)

    return res_dimer, rmsd