def reduceToAlignable(struc1_allchains, struc2_allchains, seq1, seq2, subset1=None, subset2=None, atom_types=("N", "CA", "C", "O"), modify_structures=True): assert type(struc1_allchains) == type(struc2_allchains) assert type(subset1) == type(subset2) assert isinstance(struc1_allchains, Pdb) assert None == subset1 or isinstance(subset1, Pdb) if subset1 is None: subset1 = struc1_allchains if subset2 is None: subset2 = struc2_allchains if seq1 is None: seq1 = subset1.get_seq() if seq2 is None: seq2 = subset2.get_seq() if not (seq1 and seq2): raise ValueError( "Need to have non-empty sequence to align proteins:\nseq1:%s\nseq2:%s\n" % (seq1, seq2)) ## if structure has more than 1 chain, only use the first one #if subset1.chaincount() > 1: # subset1 = subset1.get_first_chain() #if subset2.chaincount() > 1: # subset2 = subset2.get_first_chain() subset1_resbounds = subset1.residue_boundaries() subset2_resbounds = subset2.residue_boundaries() # residue count, according to the structure data pdb1_rescount = len(subset1_resbounds) pdb2_rescount = len(subset2_resbounds) #print deGappify(seq1) #print deGappify(subset1.get_seq()) #print deGappify(seq2) #print deGappify(subset2.get_seq()) # Make sure the residue counts coincide in sequence and structure data # assert length_ungapped( seq1 ) == pdb1_rescount, "length_ungapped(seq1) = %d, pdb1_rescount = %d" % ( length_ungapped(seq1), pdb1_rescount) assert length_ungapped( seq2 ) == pdb2_rescount, "length_ungapped(seq2) = %d, pdb2_rescount = %d" % ( length_ungapped(seq2), pdb2_rescount) # Get the residue indeces of aligned residues # aligned_indeces1, aligned_indeces2 = find_aligned_residues(seq1, seq2) assert len(aligned_indeces1) == len(aligned_indeces2) if not aligned_indeces1: raise ParsingError("No aligned residues?") # # Get the subset of backbone atoms corresponding to the aligned residues # # #subset1_CA = subset1.get_CA() #subset2_CA = subset2.get_CA() aligned_pdb1 = Pdb(subset1, []) aligned_pdb2 = Pdb(subset2, []) for ix1, ix2 in zip(aligned_indeces1, aligned_indeces2): #residue1 = subset1.get_residue(subset1_CA[ix1]) #residue2 = subset2.get_residue(subset2_CA[ix2]) #residue1 = residue1.get_atoms_by_type(atom_types) #residue2 = residue2.get_atoms_by_type(atom_types) residue1 = subset1.get_atoms(slice=subset1_resbounds[ix1], atom_types=atom_types) residue2 = subset2.get_atoms(slice=subset2_resbounds[ix2], atom_types=atom_types) if len(atom_types) != len(residue1) or len(atom_types) != len( residue2): residue1, residue2 = intersectAtomTypes(residue1, residue2) assert len(residue1) == len(residue2) aligned_pdb1.append_atoms(residue1) aligned_pdb2.append_atoms(residue2) assert len(aligned_pdb1) == len(aligned_pdb2) return aligned_pdb1, aligned_pdb2