Ejemplo n.º 1
0
def rmsd_static(coordA, coordB, atom_types=("N", "CA", "C", "O")):
    "Calculate RMSD between two corresponding set of residues, without moving anything. Expects Pdb objects."

    if atom_types is not None:
        coordA = coordA.get_atoms_by_type(atom_types)
        coordB = coordB.get_atoms_by_type(atom_types)

    residuesA = coordA.residues()
    residuesB = coordB.residues()
    assert len(residuesA) == len(residuesB)

    sumsqrdist = 0.0
    count = 0

    for i in xrange(len(residuesA)):
        resA, resB = intersectAtomTypes(residuesA[i], residuesB[i])
        for a, b in zip(resA, resB):
            assert a.atom == b.atom
            sumsqrdist += (a.x - b.x)**2 + (a.y - b.y)**2 + (a.z - b.z)**2
            count += 1

    if count == 0:
        return float("inf")

    return math.sqrt(float(sumsqrdist) / float(count))
Ejemplo n.º 2
0
def averageCoordinates(fragScaffold, fragPrediction, invertWeights=False):
    """Averages the coordinates of the two Pdb arguments.
    
    Modifies the first object to reflect the result. By default, the first object is assumed to be part of the N-terminal chain, the second is part of the C-terminal chain. This can be reversed by setting invertWeights=True."""

    from prosci.util.pdb import intersectAtomTypes

    def transformCoord(P, S, D, l):
        # P = coordinate of prediction
        # S = coordinate of scaffold
        # D = distance (in residues) from (loop+anchor)-fragment end
        # l = anchor length
        return 1.0 / (l + 1) * (D * P + (l + 1 - D) * S)

    resS = fragScaffold.residues()
    resP = fragPrediction.residues()

    assert len(resS) == len(
        resP), "Anchor lengths differ in template and loop?? : %d, %d" % (
            len(resS), len(resP))

    for i, rS in enumerate(resS):
        rP = resP[i]

        if not rS.has_atom_type('CA') or not rP.has_atom_type('CA'):
            raise IllegalStateError("Residue must contain a CA atom:\nrS:\n" +
                                    str(rS) + "rP:\n" + str(rP))

        rS, rP = intersectAtomTypes(rS, rP)
        #raise IllegalStateError("Residue must contain only/all main-chain atoms (and optionally a CB):\nrS:\n"+str(rS)+"rP:\n"+str(rP))

        #print "##################################################################"
        #print "Residue", i
        #print
        #print "Scaffold:"
        #print rS
        #print "Prediction:"
        #print rP

        CA_vector = None
        CA_b = 999
        for j in xrange(min(len(rS), len(rP))):
            aS = rS[j]
            aP = rP[j]
            assert aS.atom == aP.atom, "Atom order differs between residues. %s != %s" % (
                aS.atom, aP.atom)

            if invertWeights:
                D = len(resS) - i
            else:
                D = i + 1

            if aS.atom == "CA":
                assert None == CA_vector, "More than one CA atom in residue??"
                CA_vector = [aS.x, aS.y, aS.z]

            #print "aS.x = transformCoord(%.3f, %.3f, %d, %d)" % (aP.x, aS.x, dist, len(resS))
            aS.x = transformCoord(aP.x, aS.x, D, len(resS))
            aS.y = transformCoord(aP.y, aS.y, D, len(resS))
            aS.z = transformCoord(aP.z, aS.z, D, len(resS))
            aS.b = transformCoord(aP.b, aS.b, D,
                                  len(resS))  # average B factors too

            if aS.atom == "CA":
                CA_vector[0] = aS.x - CA_vector[0]
                CA_vector[1] = aS.y - CA_vector[1]
                CA_vector[2] = aS.z - CA_vector[2]
                CA_b = aS.b

        if len(rS) > len(rS.parent):
            #assert rS[-1].atom == "CB", "Last atom in residue expected to be a CB atom. Instead '%s' found." % (rS[-1].atom)
            assert CA_vector is not None, "No CA atom in residue. Cannot use missing CA movement vector for unpaired atom transformation."
            for atm in rS.parent:
                if atm not in rS:
                    atm.x += CA_vector[0]
                    atm.y += CA_vector[1]
                    atm.z += CA_vector[2]
                    atm.b = CA_b
Ejemplo n.º 3
0
def reduceToAlignable(struc1_allchains,
                      struc2_allchains,
                      seq1,
                      seq2,
                      subset1=None,
                      subset2=None,
                      atom_types=("N", "CA", "C", "O"),
                      modify_structures=True):
    assert type(struc1_allchains) == type(struc2_allchains)
    assert type(subset1) == type(subset2)
    assert isinstance(struc1_allchains, Pdb)
    assert None == subset1 or isinstance(subset1, Pdb)

    if subset1 is None:
        subset1 = struc1_allchains
    if subset2 is None:
        subset2 = struc2_allchains
    if seq1 is None:
        seq1 = subset1.get_seq()
    if seq2 is None:
        seq2 = subset2.get_seq()

    if not (seq1 and seq2):
        raise ValueError(
            "Need to have non-empty sequence to align proteins:\nseq1:%s\nseq2:%s\n"
            % (seq1, seq2))
    ## if structure has more than 1 chain, only use the first one
    #if subset1.chaincount() > 1:
    #  subset1 = subset1.get_first_chain()
    #if subset2.chaincount() > 1:
    #  subset2 = subset2.get_first_chain()

    subset1_resbounds = subset1.residue_boundaries()
    subset2_resbounds = subset2.residue_boundaries()

    # residue count, according to the structure data
    pdb1_rescount = len(subset1_resbounds)
    pdb2_rescount = len(subset2_resbounds)

    #print deGappify(seq1)
    #print deGappify(subset1.get_seq())
    #print deGappify(seq2)
    #print deGappify(subset2.get_seq())

    # Make sure the residue counts coincide in sequence and structure data
    #
    assert length_ungapped(
        seq1
    ) == pdb1_rescount, "length_ungapped(seq1) = %d, pdb1_rescount = %d" % (
        length_ungapped(seq1), pdb1_rescount)
    assert length_ungapped(
        seq2
    ) == pdb2_rescount, "length_ungapped(seq2) = %d, pdb2_rescount = %d" % (
        length_ungapped(seq2), pdb2_rescount)

    # Get the residue indeces of aligned residues
    #
    aligned_indeces1, aligned_indeces2 = find_aligned_residues(seq1, seq2)
    assert len(aligned_indeces1) == len(aligned_indeces2)

    if not aligned_indeces1:
        raise ParsingError("No aligned residues?")

    #
    # Get the subset of backbone atoms corresponding to the aligned residues
    #
    #

    #subset1_CA = subset1.get_CA()
    #subset2_CA = subset2.get_CA()

    aligned_pdb1 = Pdb(subset1, [])
    aligned_pdb2 = Pdb(subset2, [])

    for ix1, ix2 in zip(aligned_indeces1, aligned_indeces2):
        #residue1 = subset1.get_residue(subset1_CA[ix1])
        #residue2 = subset2.get_residue(subset2_CA[ix2])
        #residue1 = residue1.get_atoms_by_type(atom_types)
        #residue2 = residue2.get_atoms_by_type(atom_types)
        residue1 = subset1.get_atoms(slice=subset1_resbounds[ix1],
                                     atom_types=atom_types)
        residue2 = subset2.get_atoms(slice=subset2_resbounds[ix2],
                                     atom_types=atom_types)

        if len(atom_types) != len(residue1) or len(atom_types) != len(
                residue2):
            residue1, residue2 = intersectAtomTypes(residue1, residue2)

        assert len(residue1) == len(residue2)
        aligned_pdb1.append_atoms(residue1)
        aligned_pdb2.append_atoms(residue2)

    assert len(aligned_pdb1) == len(aligned_pdb2)

    return aligned_pdb1, aligned_pdb2