Esempio n. 1
0
def buildProtein(url_target):
    backboneList, sidechainList, proteinList = [], [], []
    currentPos = 0
    currentAminoAcid, currentSeq = "", ""
    stream = urllib2.urlopen(url_target)
    for line in stream:
        #All Lines Indexes are found: https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
        if (line[0:4] == "ATOM"):
            """
			This check is in here because, PDB Files do not necessarily have to start their amino acid count
			at 0. Most proteins will have non-amino acid residues before the start of their chain which is
			why the position differs. Additionally, each PDB File defines their amino acids as a single number
			representing the residue number. Which is why we can use that number as a way to detect when the
			start of a new amino acid occurs.

			Therefore, everytime the amino acid residue number changes (and that change is not from 0 to residue
			number) we can assume it is the start of a new amino acid residue.
			"""
            if ((currentPos != int(line[22:26])) and currentPos != 0):
                #When a new amino acid is started, append the completed one
                #Amino Acid, SEQRES, Position, Backbone Atoms [N][Ca][C], Sidechain Atoms [1]...[n]
                proteinList.append(
                    AminoAcid(currentAminoAcid, currentSeq, currentPos,
                              list(backboneList), list(sidechainList)))
                backboneList, sidechainList = [], []  #Reset the lists to empty

            #The index is defined by looking at the PDB Files, they are consistent across all PDB Files
            currentAminoAcid = str(line[17:20])
            currentSeq = str(line[21:22])
            currentPos = int(line[22:26])

            atomName = line[12:16].strip()
            if (atomName in BACKBONE_ATOMS):
                backboneList.append(
                    Atom(atomName, float(line[31:38]), float(line[39:46]),
                         float(line[47:54]), str(line[76:78].replace(" ",
                                                                     ""))))
            else:
                sidechainList.append(
                    Atom(atomName, float(line[31:38]), float(line[39:46]),
                         float(line[47:54]), str(line[76:78].replace(" ",
                                                                     ""))))
    """
	Because we always add the completed Atom after we detect its completion by examining whether or not the 
	residue number changed, we need to do one more append for the LAST amino acid, since there won't be a
	residue change after the last amino acid has been completed
	"""
    proteinList.append(
        AminoAcid(currentAminoAcid, currentSeq, currentPos, list(backboneList),
                  list(sidechainList)))
    return Protein(list(proteinList))
Esempio n. 2
0
def ribosome_get_enzymes_from_strand(strand):
    if not strand:
        return
    length = len(strand) // 2
    acid_chain = []
    for i in range(length):
        duplet = strand[i*2:(i+1)*2]
        if duplet == 'AA':
            yield Enzyme(acid_chain)
            acid_chain = []
        else:
            acid_chain.append(AminoAcid.get_acid_by_duplet(duplet))
    yield Enzyme(acid_chain)
 def test_can_get_acid_by_duplet(self):
     expected_acid = AminoAcid.dlt
     self.assertEqual(AminoAcid.get_acid_by_duplet('AG'), expected_acid)