def buildProtein(url_target): backboneList, sidechainList, proteinList = [], [], [] currentPos = 0 currentAminoAcid, currentSeq = "", "" stream = urllib2.urlopen(url_target) for line in stream: #All Lines Indexes are found: https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html if (line[0:4] == "ATOM"): """ This check is in here because, PDB Files do not necessarily have to start their amino acid count at 0. Most proteins will have non-amino acid residues before the start of their chain which is why the position differs. Additionally, each PDB File defines their amino acids as a single number representing the residue number. Which is why we can use that number as a way to detect when the start of a new amino acid occurs. Therefore, everytime the amino acid residue number changes (and that change is not from 0 to residue number) we can assume it is the start of a new amino acid residue. """ if ((currentPos != int(line[22:26])) and currentPos != 0): #When a new amino acid is started, append the completed one #Amino Acid, SEQRES, Position, Backbone Atoms [N][Ca][C], Sidechain Atoms [1]...[n] proteinList.append( AminoAcid(currentAminoAcid, currentSeq, currentPos, list(backboneList), list(sidechainList))) backboneList, sidechainList = [], [] #Reset the lists to empty #The index is defined by looking at the PDB Files, they are consistent across all PDB Files currentAminoAcid = str(line[17:20]) currentSeq = str(line[21:22]) currentPos = int(line[22:26]) atomName = line[12:16].strip() if (atomName in BACKBONE_ATOMS): backboneList.append( Atom(atomName, float(line[31:38]), float(line[39:46]), float(line[47:54]), str(line[76:78].replace(" ", "")))) else: sidechainList.append( Atom(atomName, float(line[31:38]), float(line[39:46]), float(line[47:54]), str(line[76:78].replace(" ", "")))) """ Because we always add the completed Atom after we detect its completion by examining whether or not the residue number changed, we need to do one more append for the LAST amino acid, since there won't be a residue change after the last amino acid has been completed """ proteinList.append( AminoAcid(currentAminoAcid, currentSeq, currentPos, list(backboneList), list(sidechainList))) return Protein(list(proteinList))
def ribosome_get_enzymes_from_strand(strand): if not strand: return length = len(strand) // 2 acid_chain = [] for i in range(length): duplet = strand[i*2:(i+1)*2] if duplet == 'AA': yield Enzyme(acid_chain) acid_chain = [] else: acid_chain.append(AminoAcid.get_acid_by_duplet(duplet)) yield Enzyme(acid_chain)
def test_can_get_acid_by_duplet(self): expected_acid = AminoAcid.dlt self.assertEqual(AminoAcid.get_acid_by_duplet('AG'), expected_acid)