Beispiel #1
0
def reduceToAlignable(struc1_allchains,
                      struc2_allchains,
                      seq1,
                      seq2,
                      subset1=None,
                      subset2=None,
                      atom_types=("N", "CA", "C", "O"),
                      modify_structures=True):
    assert type(struc1_allchains) == type(struc2_allchains)
    assert type(subset1) == type(subset2)
    assert isinstance(struc1_allchains, Pdb)
    assert None == subset1 or isinstance(subset1, Pdb)

    if subset1 is None:
        subset1 = struc1_allchains
    if subset2 is None:
        subset2 = struc2_allchains
    if seq1 is None:
        seq1 = subset1.get_seq()
    if seq2 is None:
        seq2 = subset2.get_seq()

    if not (seq1 and seq2):
        raise ValueError(
            "Need to have non-empty sequence to align proteins:\nseq1:%s\nseq2:%s\n"
            % (seq1, seq2))
    ## if structure has more than 1 chain, only use the first one
    #if subset1.chaincount() > 1:
    #  subset1 = subset1.get_first_chain()
    #if subset2.chaincount() > 1:
    #  subset2 = subset2.get_first_chain()

    subset1_resbounds = subset1.residue_boundaries()
    subset2_resbounds = subset2.residue_boundaries()

    # residue count, according to the structure data
    pdb1_rescount = len(subset1_resbounds)
    pdb2_rescount = len(subset2_resbounds)

    #print deGappify(seq1)
    #print deGappify(subset1.get_seq())
    #print deGappify(seq2)
    #print deGappify(subset2.get_seq())

    # Make sure the residue counts coincide in sequence and structure data
    #
    assert length_ungapped(
        seq1
    ) == pdb1_rescount, "length_ungapped(seq1) = %d, pdb1_rescount = %d" % (
        length_ungapped(seq1), pdb1_rescount)
    assert length_ungapped(
        seq2
    ) == pdb2_rescount, "length_ungapped(seq2) = %d, pdb2_rescount = %d" % (
        length_ungapped(seq2), pdb2_rescount)

    # Get the residue indeces of aligned residues
    #
    aligned_indeces1, aligned_indeces2 = find_aligned_residues(seq1, seq2)
    assert len(aligned_indeces1) == len(aligned_indeces2)

    if not aligned_indeces1:
        raise ParsingError("No aligned residues?")

    #
    # Get the subset of backbone atoms corresponding to the aligned residues
    #
    #

    #subset1_CA = subset1.get_CA()
    #subset2_CA = subset2.get_CA()

    aligned_pdb1 = Pdb(subset1, [])
    aligned_pdb2 = Pdb(subset2, [])

    for ix1, ix2 in zip(aligned_indeces1, aligned_indeces2):
        #residue1 = subset1.get_residue(subset1_CA[ix1])
        #residue2 = subset2.get_residue(subset2_CA[ix2])
        #residue1 = residue1.get_atoms_by_type(atom_types)
        #residue2 = residue2.get_atoms_by_type(atom_types)
        residue1 = subset1.get_atoms(slice=subset1_resbounds[ix1],
                                     atom_types=atom_types)
        residue2 = subset2.get_atoms(slice=subset2_resbounds[ix2],
                                     atom_types=atom_types)

        if len(atom_types) != len(residue1) or len(atom_types) != len(
                residue2):
            residue1, residue2 = intersectAtomTypes(residue1, residue2)

        assert len(residue1) == len(residue2)
        aligned_pdb1.append_atoms(residue1)
        aligned_pdb2.append_atoms(residue2)

    assert len(aligned_pdb1) == len(aligned_pdb2)

    return aligned_pdb1, aligned_pdb2