def write_pdb(balls, fn, strandlens, extraballs=False, reindexmap=None, mapfn=None): tmpballs = [] pos = 0 for i in range(len(strandlens)): strandlen = strandlens[i] tmpstrandballs = [] for j in range(strandlen): tmpstrandballs.append(balls[pos]) pos += 1 # remove the first and the last balls from the strands, which are extra balls if not extraballs: tmpstrandballs.pop(0) tmpstrandballs.pop(0) tmpstrandballs.pop(0) tmpstrandballs.pop(0) tmpstrandballs.pop(-1) tmpstrandballs.pop(-1) tmpstrandballs.pop(-1) tmpstrandballs.pop(-1) if i%2==1: tmpstrandballs = tmpstrandballs[::-1] tmpballs+=tmpstrandballs chain = Bio.PDB.Chain.Chain('A') for i in range(len(tmpballs)): try: res_id = (' ', tmpballs[i][Ball.p_resseqid], ' ') restype = AA.index_to_three(tmpballs[i][Ball.p_aaid]) residue = Bio.PDB.Residue.Residue(res_id, restype, ' ') cacoord = tmpballs[i].getcoord() atom = Bio.PDB.Atom.Atom('CA', cacoord, 0, 0, ' ', 'CA', tmpballs[i][Ball.p_resseqid], 'C') residue.add(atom) chain.add(residue) except: res_id = ('A', tmpballs[i][Ball.p_resseqid], ' ') restype = AA.index_to_three(tmpballs[i][Ball.p_aaid]) residue = Bio.PDB.Residue.Residue(res_id, restype, ' ') cacoord = tmpballs[i].getcoord() atom = Bio.PDB.Atom.Atom('CA', cacoord, 0, 0, ' ', 'CA', tmpballs[i][Ball.p_resseqid], 'C') residue.add(atom) chain.add(residue) model = Bio.PDB.Model.Model(1) model.add(chain) structure = Bio.PDB.Structure.Structure("ref") structure.add(model) io = Bio.PDB.PDBIO() io.set_structure(structure) io.save(fn, write_end=False) if reindexmap is not None and mapfn is not None: np.savetxt(mapfn, reindexmap, fmt='%d')
def __init__(self, gc_file): self.gc_file = gc_file self.codons = dict() self.amino_acids = dict() self.has_CAI = False with open(self.gc_file) as f: for row in f: l = row.strip("\n").split("\t") self.amino_acids[l[0]] = AminoAcid(*l) for c in l[3].split(","): self.codons[c] = l[0]
def calculate_Energy(df, matrix): radiusDict = LoadRadius() CurrentAANitrogen = None CurrentAACA = None Currentresidue_num = None EachAA = [] CurrentAA = None for line in df.readlines(): if (line[0:4] != "ATOM"): continue element_list = extract_Data(line) record_name = element_list[0] atom_name = element_list[2] residue_name = element_list[4] alternate_indicator = element_list[3] residue_num = element_list[-4] xcor = float(element_list[-3]) ycor = float(element_list[-2]) zcor = float(element_list[-1]) if (atom_name == "H"): continue if (residue_name not in matrix): continue if (CurrentAA == None): CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A" and line[15] == "1"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) else: #If another amino acid begins if (residue_num != Currentresidue_num): state = CurrentAA.CalculateCenter() if (state == False): CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num continue CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA) EachAA.append(CurrentAA) del CurrentAA CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A" and line[15] == "1"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) #If still the same amino acid else: if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A" and line[15] == "1"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) state = CurrentAA.CalculateCenter() if (state != False): CurrentAA.CalculateCenter() CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA) EachAA.append(CurrentAA) #Scan over. Each amino acid is stored as an object in EachAA. Next step is to calculate the energy, results will be saved in EnergyList. #Store the energy E = 0 for m in range(len(EachAA)): EachAA[m].EstablishCoordinate() for n in range(len(EachAA)): if (m == n): continue else: dis = EachAA[m].DistanceBetweenAA(EachAA[n].center) radiusSum = radiusDict[EachAA[m].name] + radiusDict[ EachAA[n].name] if ( dis <= radiusSum ): #If the distance between two amino acid less than 10, we believe the two amino acid have interaction rho, theta, phi = EachAA[m].ChangeCoordinate( EachAA[n].center) theta = min(int(math.floor(theta * 20 / np.pi)), 19) phi = min(int(math.floor(phi * 10 / np.pi) + 10), 19) E += matrix[EachAA[m].name][ EachAA[n].name][theta][phi] / rho return E
def calculate_Energy(df,matrix): # define some useful paramter radiusDict = LoadRadius() CurrentAANitrogen = None CurrentAACA = None Currentresidue_num = None CurrentAA = None # list of amino acids which have side chain UseAA_list = [] # list of amino acids which have no side chain IgnoreAA_list = [] # scan pdb file line one by one for line in df.readlines(): if(line[0:4] != "ATOM"): continue # obtain information element_list = extract_Data(line) record_name = element_list[0] atom_name = element_list[2] residue_name = element_list[4] alternate_indicator = element_list[3] residue_num = element_list[-4] xcor = float(element_list[-3]) ycor = float(element_list[-2]) zcor = float(element_list[-1]) # ignore hydrogen if(atom_name == "H"): continue # ignore amino acid out of the list if(residue_name not in matrix): continue # from here start to scan useful amino acid # first amino acid if(CurrentAA is None): CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num if(atom_name == "N" or atom_name == "CA"): if(alternate_indicator == " " or alternate_indicator == "A"): if(atom_name == "N"): CurrentAA.InputN(np.array([xcor,ycor,zcor])) else: CurrentAA.InputCA(np.array([xcor,ycor,zcor])) else: continue if(residue_name == "GLY" or atom_name not in {"N","CA","C","O","O1","02"}): if(alternate_indicator == " " or alternate_indicator == "A"): CurrentAA.SumCenters(xcor,ycor,zcor,atom_name) else: continue # current amino acid is not the first else: #If another amino acid begins if(residue_num != Currentresidue_num): state = CurrentAA.Check() # previous amino acid has no problem if(state == True): CurrentAA.CalculateCenter() UseAA_list.append(CurrentAA) # previous amino acid has problem else: info = [state,Currentresidue_num] IgnoreAA_list.append(info) CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num if(atom_name == "N" or atom_name == "CA"): if(alternate_indicator == " " or alternate_indicator == "A"): if(atom_name == "N"): CurrentAA.InputN(np.array([xcor,ycor,zcor])) else: CurrentAA.InputCA(np.array([xcor,ycor,zcor])) else: continue if(residue_name == "GLY" or atom_name not in {"N","CA","C","O","O1","02"}): if(alternate_indicator == " " or alternate_indicator == "A"): CurrentAA.SumCenters(xcor,ycor,zcor,atom_name) else: continue #If still the same amino acid else: if(atom_name == "N" or atom_name == "CA"): if(alternate_indicator == " " or alternate_indicator == "A"): if(atom_name == "N"): CurrentAA.InputN(np.array([xcor,ycor,zcor])) else: CurrentAA.InputCA(np.array([xcor,ycor,zcor])) else: continue if(residue_name == "GLY" or atom_name not in {"N","CA","C","O","O1","02"}): if(alternate_indicator == " " or alternate_indicator == "A"): CurrentAA.SumCenters(xcor,ycor,zcor,atom_name) else: continue state = CurrentAA.Check() if(state == True): CurrentAA.CalculateCenter() UseAA_list.append(CurrentAA) CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num else: info = [state,Currentresidue_num] IgnoreAA_list.append(info) CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num # Scan over. Each amino acid is stored as an object in UseAA_list. # Next step is to calculate the energy, results will be saved in EnergyList. E = 0 for m in range(len(UseAA_list)): UseAA_list[m].EstablishCoordinate() for n in range(len(UseAA_list)): if(m == n): continue else: dis = UseAA_list[m].DistanceBetweenAA(UseAA_list[n].center) radiusSum = radiusDict[UseAA_list[m].name] + radiusDict[UseAA_list[n].name] if(dis <= radiusSum):#If the distance between two amino acid less than 10, we believe the two amino acid have interaction rho,theta,phi = UseAA_list[m].ChangeCoordinate(UseAA_list[n].center) theta = min(int(math.floor(theta*20/np.pi)),19) phi = min(int(math.floor(phi*10/np.pi) + 10),19) E += matrix[UseAA_list[m].name][UseAA_list[n].name][theta][phi] / rho return E
def _construct_ideal_balls_(self): # load residues residues = np.loadtxt('inputs/{pdb}/{pdb}.res'.format(pdb=self.pdb)).astype(int).tolist() residues = [200]+residues # load regs and peris got from reg adjustment periregs = np.loadtxt('{inputdirn}/regs/{pdb}.regs'.format(inputdirn=self.inputdirn, pdb=self.pdb)).astype(int) peris = np.loadtxt('{inputdirn}/peris/{pdb}.peris'.format(inputdirn=self.inputdirn, pdb=self.pdb)).astype(int) # load facings got from reg adjustment with open('{inputdirn}/facings/{pdb}.facings'.format(inputdirn=self.inputdirn, pdb=self.pdb)) as f: lines = f.readlines() firstfacings = [ line.strip() for line in lines ] # load strandends strandends = np.loadtxt('inputs/{pdb}/{pdb}.strands'.format(pdb=self.pdb)).astype(int) # the strand ranges here may not be the ranges used for reg prediction # correct reg according to the difference between strandends to construct barrel and peris used in reg pred # after this loop, array periregs will be the predicted regs of strandends for strdi in range(len(strandends)): strdim1 = (strdi-1)%len(strandends) if strdi%2==0: periregs[strdi] -= strandends[strdi][0]-peris[strdi] periregs[strdim1] += strandends[strdi][0]-peris[strdi] else: periregs[strdi] += strandends[strdi][1]-peris[strdi] periregs[strdim1] -= strandends[strdi][1]-peris[strdi] # correct facing according to the difference between strandends to construct barrel and peris used in reg pred for strdi in range(len(strandends)): if strdi%2==0: if (strandends[strdi][0]-peris[strdi])%2!=0: if firstfacings[strdi]=='OUT': firstfacings[strdi]='IN' else: firstfacings[strdi]='OUT' else: if (strandends[strdi][1]-peris[strdi])%2!=0: if firstfacings[strdi]=='OUT': firstfacings[strdi]='IN' else: firstfacings[strdi]='OUT' # add extra residues for bbq for strdi in range(len(strandends)): strandends[strdi][0]-=Barrel.extra_ball_num strandends[strdi][1]+=Barrel.extra_ball_num # construct facing arrays for all residues (including extra residues) facings = [] for fac in firstfacings: if fac == 'OUT': facings.append([Ball.Facing.OUT]) else: facings.append([Ball.Facing.IN]) for strdi in range(len(strandends)): for resi in range(strandends[strdi][1]-strandends[strdi][0]): if facings[strdi][resi] == Ball.Facing.OUT: facings[strdi].append(Ball.Facing.IN) else: facings[strdi].append(Ball.Facing.OUT) peripositions = np.cumsum( np.hstack( ([0], -periregs) ) ) N = len(strandends) # strand num A = self.A # intrastrand Ca distance B = self.B # interstrand Ca distance S = sum(periregs) # shear number ## circle formula #a = math.sqrt( (N*B)**2+(S*A)**2 ) / 2.0 / math.pi # tilt angle #theta = math.asin(S*A/2.0/math.pi/a) # radius ## polygan formula theta = math.atan( S*A / (N*B) ) # tilt angle a = B / ( 2*math.sin(math.pi/N) * math.cos(theta) ) # radius self.radius = a b = a / math.tan(theta) # vertical speed c = math.sqrt( a*a + b*b ) delta = 2 * math.pi * a * a / (c*N) # offset on the neigbouring strand to ensure inter H-bond is perpendicular to the strand currid = 0 ids = [] seqids = [] restypes = [] cacoords = [] # construct the barrel for strdi in range(N): # seq ids if strdi%2==0: seqids.append( range( strandends[strdi][0], strandends[strdi][1]+1 ) ) else: seqids.append( range( strandends[strdi][0], strandends[strdi][1]+1 )[::-1] ) ids.append( range( currid, currid+len(seqids[strdi]) ) ) currid += len(seqids[strdi]) # residue types restypes.append([]) for seqid in seqids[strdi]: try: restypes[strdi].append(AA.index_to_one(residues[seqid])) except: restypes[strdi].append('C') cacoords.append([]) for resi in range(strandends[strdi][1]-strandends[strdi][0]+1): # zigzag deviation if facings[strdi][resi] == Ball.Facing.OUT: dr = self.dr # righthand side of out facing residue is always SH # lefthand side NH if strdi%2==0: dw = self.dw else: dw = -self.dw if self.np1_right: #test TODO if strdi%2==0: dw = -self.dw else: dw = self.dw else: dr = -self.dr if strdi%2==0: dw = -self.dw else: dw = self.dw if self.np1_right: #test TODO if strdi%2==0: dw = self.dw else: dw = -self.dw s = ( peripositions[strdi] + resi ) * A + strdi * delta x = (a+dr) * math.cos(s/c-2*math.pi*strdi/N); y = (a+dr) * math.sin(s/c-2*math.pi*strdi/N); if self.np1_right: #test TODO s = ( peripositions[strdi] + resi ) * A + (N-strdi) * delta x = (a+dr) * math.sin(s/c+2*math.pi*strdi/N); y = (a+dr) * math.cos(s/c+2*math.pi*strdi/N); z = b * s/c; xn1 = (a+dr) * ( - math.cos(s/c-2*math.pi*strdi/N) + math.cos((s+delta)/c-2*math.pi*(strdi+1)/N) ); yn1 = (a+dr) * ( - math.sin(s/c-2*math.pi*strdi/N) + math.sin((s+delta)/c-2*math.pi*(strdi+1)/N) ); zn1 = b*delta/c if (strdi%2==1 and facings[strdi][resi] == Ball.Facing.OUT) or (strdi%2==0 and facings[strdi][resi] == Ball.Facing.IN): xn1 = (a+dr) * ( - math.cos(s/c-2*math.pi*(strdi-1)/N) + math.cos((s+delta)/c-2*math.pi*strdi/N) ); yn1 = (a+dr) * ( - math.sin(s/c-2*math.pi*(strdi-1)/N) + math.sin((s+delta)/c-2*math.pi*strdi/N) ); zn1 = b*delta/c if self.np1_right: #test TODO xn1 = (a+dr) * ( - math.sin(s/c+2*math.pi*strdi/N) + math.sin((s+delta)/c+2*math.pi*(strdi+1)/N) ); yn1 = (a+dr) * ( - math.cos(s/c+2*math.pi*strdi/N) + math.cos((s+delta)/c+2*math.pi*(strdi+1)/N) ); if (strdi%2==1 and facings[strdi][resi] == Ball.Facing.OUT) or (strdi%2==0 and facings[strdi][resi] == Ball.Facing.IN): xn1 = (a+dr) * ( - math.sin(s/c+2*math.pi*(strdi-1)/N) + math.sin((s+delta)/c+2*math.pi*strdi/N) ); yn1 = (a+dr) * ( - math.cos(s/c+2*math.pi*(strdi-1)/N) + math.cos((s+delta)/c+2*math.pi*strdi/N) ); n1norm = math.sqrt(xn1*xn1+yn1*yn1+zn1*zn1) xn1 = xn1/n1norm yn1 = yn1/n1norm zn1 = zn1/n1norm x+=dw*xn1 y+=dw*yn1 z+=dw*zn1 cacoords[strdi].append(np.array([x,y,z])) self.strandlens.append(len(ids[strdi])) for i in range(len(ids)): for j in range(len(ids[i])): ## following line is for model/param selections #ball = Ball([ ids[i][j], seqids[i][j], cacoords[i][j][0], cacoords[i][j][1], cacoords[i][j][2], AA.one_to_index(restypes[i][j]), facings[i][j] ]) ## store ballids instead of seqids. needs to be correted after bbq ## if using seqids, bbq will have problems if i%2!=0: ball = Ball([ ids[i][j], ids[i][len(ids[i])-j-1], cacoords[i][j][0], cacoords[i][j][1], cacoords[i][j][2], AA.one_to_index(restypes[i][j]), facings[i][j] ]) if j >= Barrel.extra_ball_num and j < len(ids[i])-Barrel.extra_ball_num: self.reindexmap.append( (ids[i][len(ids[i])-j-1], seqids[i][j]) ) else: ball = Ball([ ids[i][j], ids[i][j], cacoords[i][j][0], cacoords[i][j][1], cacoords[i][j][2], AA.one_to_index(restypes[i][j]), facings[i][j] ]) if j >= Barrel.extra_ball_num and j < len(ids[i])-Barrel.extra_ball_num: self.reindexmap.append( (ids[i][j], seqids[i][j]) ) self.balls.append(ball)
def __str__(self): return str(self[Ball.p_ballid]) +" "+ str(self[Ball.p_resseqid]) +" " +\ str(self.getcoord())+" "+\ AA.index_to_one(self[Ball.p_aaid]) +" "+ str(self[Ball.p_facing])
def processAAforchian(chain,aaDict): CurrentAANitrogen = None CurrentAACA = None Currentresidue_num = None EachAA = [] CurrentAA = None for line in chain: if (line[0:4] != "ATOM"): continue element_list = extract_Data(line) record_name = element_list[0] atom_name = element_list[2] residue_name = element_list[4] alternate_indicator = element_list[3] residue_num = element_list[-4] chain_id = element_list[-5] xcor = float(element_list[-3]) ycor = float(element_list[-2]) zcor = float(element_list[-1]) if (atom_name == "H"): continue if (residue_name not in aaDict): continue if (CurrentAA == None): CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id) Currentresidue_num = residue_num if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): # If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) else: # If another amino acid begins if (residue_num != Currentresidue_num): state = CurrentAA.CalculateCenter() if (state == False): CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id) Currentresidue_num = residue_num continue CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA) CurrentAA.EstablishCoordinate() # Amino Acid check EachAA.append(CurrentAA) del CurrentAA CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id) Currentresidue_num = residue_num if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): # If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) # If still the same amino acid else: if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): # If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) state = CurrentAA.CalculateCenter() if (state != False): #CurrentAA.CalculateCenter() CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA) CurrentAA.EstablishCoordinate() EachAA.append(CurrentAA) return EachAA
def process_AA2(AA_information, matrix): CurrentAA = None CurrentAANitrogen = None CurrentAACA = None Currentresidue_num = None #for debug lines = AA_information for line in lines: if (line[0:4] != "ATOM"): continue element_list = extract_Data(line) record_name = element_list[0] atom_name = element_list[2] residue_name = element_list[4] alternate_indicator = element_list[3] #do some change residue_num = element_list[-4] #add chain_id chain_id = element_list[-5] xcor = float(element_list[-3]) ycor = float(element_list[-2]) zcor = float(element_list[-1]) if (atom_name == "H"): continue if (residue_name not in matrix): continue if (CurrentAA == None): CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id) Currentresidue_num = residue_num if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A #if(alternate_indicator == "A" and line[15] == "1"): if (alternate_indicator == "A"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) else: #If another amino acid begins if (residue_num != Currentresidue_num): state = CurrentAA.CalculateCenter() if (state == False): CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id) Currentresidue_num = residue_num #continue CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA) #residue_name='ALA' #all_amino_acids.append(CurrentAA) del CurrentAA CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id) Currentresidue_num = residue_num if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A #if(alternate_indicator == "A" and line[15] == "1"): if (alternate_indicator == "A"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) #If still the same amino acid else: if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A #if(alternate_indicator == "A" and line[15] == "1"): if (alternate_indicator == "A"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) CurrentAA.CalculateCenter() CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA) #CurrentAA.EstablishCoordinate() return CurrentAA
def main(protein_fasta_open_file, list_codon_usage_open_files, output_destination, restriction_enzymes=""): # parse protein record = Parser.parse_fasta_file(protein_fasta_open_file) name, id, sequence = record.name, record.id, record.seq creatures = {} # parse table if len(list_codon_usage_open_files) == 0: raise Exception("Error: Empty codon table filnames") # parses organism files , assuming they are already open for fname, open_file in list_codon_usage_open_files: creature_name = fname.split('.')[0] codon_usage_dict, codon_to_protein_dict, AA_list = Parser.parse_kazusa_codon_usage_table(open_file) creatures[creature_name] = codon_usage_dict, codon_to_protein_dict, AA_list # creates AA Amino_Acids_obj_list = [] AA_LIST = creatures[creature_name][2] codon_to_protein_dict = creatures[creature_name][1] for aa in AA_LIST: AA = AminoAcid.AminoAcid(aa, codon_to_protein_dict) Amino_Acids_obj_list.append(AA) for creature_name, creature_tuple in creatures.items(): codon_usage_dict, codon_to_protein_dict, AA_list = creature_tuple for AA in Amino_Acids_obj_list: AA.add_organism_codons(codon_usage_dict, creature_name) prot_analisys = ProtParam.ProteinAnalysis(sequence._data) aa_count_dict = prot_analisys.count_amino_acids() # replaces aa with codons from codon pool ouput_protein_list = Calculator.compute_and_Switch(Amino_Acids_obj_list, sequence, aa_count_dict) final_sequence = "".join(ouput_protein_list) final_sequence = final_sequence.replace("U", "T") # analyse final sequance if len(final_sequence) != len(sequence) * 3: raise Exception("final sequance length does not match input sequence length") # output_file_name = os.path.join(output_destination, "Ouput.fasta") record = SeqRecord.SeqRecord(Seq(final_sequence, ), name=name) if record.translate().seq != sequence: raise Exception("error- resulting DNA does not translate back to protein") # restriction enzymes- verifies they do not cut the sequence. if they do, pick the least cut sequence if restriction_enzymes != "": restriction_enzymes_list = restriction_enzymes.replace(",", " ").replace('\n', ' ').replace("\t", " ").split() batch = RestrictionBatch(restriction_enzymes_list) num_cutting = len(check_restriction(Seq(final_sequence, generic_dna), batch)) best_num_cutting = np.inf best_sequ = final_sequence iterations = 100 no_enzymes_cut = num_cutting == 0 # if the original sequence had a restriction site, repeat the sequence building 100 times , or until # a non- cut sequence is found while iterations > 0 and num_cutting > 0: ouput_protein_list = Calculator.compute_and_Switch(Amino_Acids_obj_list, sequence, aa_count_dict) final_sequence = "".join(ouput_protein_list) final_sequence = final_sequence.replace("U", "T") # analyse final sequance if len(final_sequence) != len(sequence) * 3: raise Exception("final sequance length does not match input sequence length") # output_file_name = os.path.join(output_destination, "Ouput.fasta") record = SeqRecord.SeqRecord(Seq(final_sequence, generic_dna), name=name) if record.translate().seq != sequence: print("error- resulting DNA does not translate back to protein") exit(1) # if achieved non cutting sequence, save and return num_cutting = len(check_restriction(Seq(final_sequence, generic_dna), batch)) if num_cutting == 0: check_restriction(Seq(final_sequence, generic_dna), batch, to_print=True) print("printing to output file....") SeqIO.write(record, output_destination, "fasta") print("ouput sucsessful") return "Output Sucsessful" best_num_cutting = min(best_num_cutting, num_cutting) if best_num_cutting == num_cutting: best_sequ = final_sequence iterations -= 1 # return best sequence, as in one that is cut by the least amount of restriction enzymes if best_num_cutting > 0: cutting = check_restriction(Seq(best_sequ, generic_dna), batch, to_print=True) record = SeqRecord.SeqRecord(Seq(best_sequ, generic_dna), name=name) SeqIO.write(record, output_destination, "fasta") return "The enzymes the cut the sequence are:" + str(cutting) + "\n Output printed to specified location." SeqIO.write(record, output_destination, "fasta") return "ouput sucsessful"
def ProcessPDB(chainlines, matrix): #df = open(file,'r') radiusDict = LoadRadius() CurrentAANitrogen = None CurrentAACA = None Currentresidue_num = None EachAA = [] CurrentAA = None for line in chainlines: if (line[0:4] != "ATOM"): continue element_list = extract_Data(line) record_name = element_list[0] atom_name = element_list[2] residue_name = element_list[4] alternate_indicator = element_list[3] residue_num = element_list[-4] xcor = float(element_list[-3]) ycor = float(element_list[-2]) zcor = float(element_list[-1]) if (atom_name == "H"): continue if (residue_name not in matrix): continue if (CurrentAA == None): CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A" and line[15] == "1"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) else: #If another amino acid begins if (residue_num != Currentresidue_num): state = CurrentAA.CalculateCenter() if (state == False): CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num continue CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA) EachAA.append(CurrentAA) del CurrentAA CurrentAA = AA.AminoAcid(residue_name) Currentresidue_num = residue_num if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A" and line[15] == "1"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) #If still the same amino acid else: if (atom_name == "N" or atom_name == "CA"): if (alternate_indicator == "B"): continue if (atom_name == "N"): CurrentAANitrogen = np.array([xcor, ycor, zcor]) else: CurrentAACA = np.array([xcor, ycor, zcor]) if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}): if (alternate_indicator != " "): #If cases like "AASN or BASN" appears, we only add A if (alternate_indicator == "A" and line[15] == "1"): CurrentAA.SumCenters(xcor, ycor, zcor) else: continue else: CurrentAA.SumCenters(xcor, ycor, zcor) state = CurrentAA.CalculateCenter() if (state != False): CurrentAA.CalculateCenter() CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA) EachAA.append(CurrentAA) return EachAA
def main( fn, fastafile ): genetic_code = dict() codon_dict = dict() with open( fn ) as f: for row in f: l = row.strip( "\n" ).split( "\t" ) genetic_code[l[0]] = AminoAcid( *l ) for c in l[3].split( "," ): c = c.replace( "U", "T" ) codon_dict[c] = l[0] #print genetic_code, len( genetic_code ) #print #print codon_dict, len( codon_dict ) #print #for a in genetic_code: #print genetic_code[a] #print orf_frame = dict() with open( "/Users/paulkorir/Dropbox/Euplotes/FrameshiftPredictionData/one_orfs.txt" ) as f: for row in f: if row[0] == "T": continue l = row.strip( "\n" ).split( "\t" ) orf_frame[l[0]] = ORFInfo( *l ) # read in the data from the fasta file total = 0 ok_count = 0 nok_count = 0 for seq_record in SeqIO.parse( fastafile, 'fasta' ): sequence = str( seq_record.seq ) seq_name = seq_record.id.split( " " )[0] # get the position of the first ATG frame = orf_frame[seq_name].frame last = orf_frame[seq_name].last i = frame start = None while i <= len( sequence ) - 3: codon = sequence[i:i+3] if codon == "ATG": start = i break else: i += 3 if start == None: print >> sys.stderr, "Missing ATG in frame %d in sequence %s" % ( frame, seq_name ) total += 1 nok_count += 1 continue #if ( last - start + 1 ) % 3 == 0: #ok_count += 1 #total += 1 #else: #print seq_name, start, last, ( last - start ) + 1, ( last - start + 1 ) % 3, i #nok_count += 1 #total += 1 #print ok_count/total, nok_count/total #print ok_count, nok_count, total # make sure it's in the first coding frame cds = sequence[start:last+1] print ">" + seq_record.id print cds
def SingleStructure(decoyname,DecoyPath,model_path,radius_path): cdDict={"ALA":{},"VAL":{},"LEU":{},"ILE":{},"PHE":{},\ "TRP":{},"MET":{},"PRO":{},"GLY":{},"SER":{},\ "THR":{},"CYS":{},"TYR":{},"ASN":{},"GLN":{},\ "HIS":{},"LYS":{},"ARG":{},"ASP":{},"GLU":{},} cdDict = loadModel(model_path,cdDict) radiusDict = LoadRadius(radius_path) if(decoyname == "native.pdb"): xcor = 6 ycor = 7 zcor = 8 AAnum = 5 else: xcor = 5 ycor = 6 zcor = 7 AAnum = 4 df = open(DecoyPath) #CurrentAAName = None CurrentAANitrogen = None CurrentAACA = None CurrentAANumber = None EachAA = [] CurrentAA = None for line in df.readlines(): #print line Element,AAtype,AANUMBER = ExtractData(line) if(Element[0] != "ATOM"): CurrentAA.CalculateCenter() CurrentAA.InputCAN(CurrentAANitrogen,CurrentAACA) EachAA.append(CurrentAA) continue if(Element[2] == "H"): continue if(AAtype not in cdDict): continue if(CurrentAA == None): #print("First object establised") #CurrentAAName = Element[3] CurrentAA = AA.AminoAcid(AAtype) CurrentAANumber = AANUMBER if(Element[2] == "N" or Element[2] == "CA"): if(line[16] == "B"): continue if(Element[2] == "N"): CurrentAANitrogen = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])]) else: CurrentAACA = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])]) if(AAtype == "GLY" or Element[2] not in {"N","CA","C","O","O1","02"}): if(line[16] != " "): #If cases like "AASN or BASN" appears, we only add A if(line[16] == "A" and line[15] == "1"): CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor])) else: continue else: CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor])) else: #If another amino acid begins if(AANUMBER != CurrentAANumber): #print CurrentAA.AminoAcidAmount #print CurrentAAName,Element[3] state = CurrentAA.CalculateCenter() if(state == False): CurrentAA = AA.AminoAcid(AAtype) CurrentAANumber = AANUMBER continue CurrentAA.InputCAN(CurrentAANitrogen,CurrentAACA) #print sys.getrefcount(CurrentAA) EachAA.append(CurrentAA) del CurrentAA CurrentAA = AA.AminoAcid(AAtype) #print sys.getrefcount(CurrentAA) CurrentAANumber = AANUMBER if(Element[2] == "N" or Element[2] == "CA"): if(line[16] == "B"): continue if(Element[2] == "N"): CurrentAANitrogen = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])]) else: CurrentAACA = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])]) if(AAtype == "GLY" or Element[2] not in {"N","CA","C","O","O1","02"}): if(line[16] != " "): #If cases like "AASN or BASN" appears, we only add A if(line[16] == "A" and line[15] == "1"): CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor])) else: continue else: CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor])) #If still the same amino acid else: if(Element[2] == "N" or Element[2] == "CA"): if(line[16] == "B"): continue if(Element[2] == "N"): CurrentAANitrogen = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])]) else: CurrentAACA = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])]) if(AAtype == "GLY" or Element[2] not in {"N","CA","C","O","O1","02"}): if(line[16] != " "): #If cases like "AASN or BASN" appears, we only add A if(line[16] == "A" and line[15] == "1"): CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor])) else: continue else: CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor])) del CurrentAA#Free the current object. #Scan over. Each amino acid is stored as an object in EachAA. Next step is to calculate the energy, results will be saved in EnergyList. E = 0 #Store the energy Time = 0 for m in range(len(EachAA)): #Establish axis first EachAA[m].EstablishCoordinate() for n in range(len(EachAA)): if(m == n): continue else: dis = EachAA[m].DistanceBetweenAA(EachAA[n].center) radiusSum = radiusDict[EachAA[m].name] + radiusDict[EachAA[n].name] if(dis <= radiusSum):#If the distance between two amino acid less than 10, we believe the two amino acid have interaction #print EachAA[m].ChangeCoordinate(EachAA[n].center) rho,theta,phi = EachAA[m].ChangeCoordinate(EachAA[n].center) theta = min(int(math.floor(theta*20/np.pi)),19) phi = min(int(math.floor(phi*10/np.pi) + 10),19) #print EachAA[m].name,EachAA[n].name E += cdDict[EachAA[m].name][EachAA[n].name][theta][phi] / rho Time += 1 return E,Time
def main(protein_fasta_filename, list_codon_usage_filenames,output_destination, restriction_enzymes="" ): #verify input verify_input() #parse protein record= Parser.parse_fasta_file(protein_fasta_filename) name, id, sequence =record.name, record.id, record.seq creatures = {} #parse table if len(list_codon_usage_filenames) ==0: print("Error: Empty codon table filnames") exit(1) for i, file_name in enumerate(list_codon_usage_filenames): creature_name = ntpath.basename(file_name).split('.')[0] #TODO watch out codon_usage_dict, codon_to_protein_dict, AA_list = Parser.parse_kazusa_codon_usage_table(str(file_name)) creatures[creature_name] = codon_usage_dict, codon_to_protein_dict, AA_list #creates AA Amino_Acids_obj_list =[] AA_LIST= creatures[creature_name][2] codon_to_protein_dict = creatures[creature_name][1] for aa in AA_LIST: AA = AminoAcid.AminoAcid(aa,codon_to_protein_dict ) Amino_Acids_obj_list.append(AA) for creature_name, creature_tuple in creatures.items(): codon_usage_dict, codon_to_protein_dict, AA_list = creature_tuple for AA in Amino_Acids_obj_list: AA.add_organism_codons(codon_usage_dict, creature_name) prot_analisys = ProtParam.ProteinAnalysis(sequence._data) aa_count_dict = prot_analisys.count_amino_acids() ouput_protein_list = Calculator.compute_and_Switch(Amino_Acids_obj_list, sequence,aa_count_dict) final_sequence = "".join(ouput_protein_list) #analyse final sequance if len(final_sequence) != len(sequence) * 3: print("final sequance length does not match input sequence length") exit(1) output_file_name = os.path.join (output_destination ,"Ouput.fasta" ) record = SeqRecord.SeqRecord(Seq(final_sequence, generic_dna) , name = name ) if record.translate().seq != sequence: print("error- resulting DNA does not translate back to protein") exit(1) #restriction enzymes if restriction_enzymes != "": restriction_enzymes_list = restriction_enzymes.replace(",", " ").replace('\n', ' ').replace("\t"," ").split() batch = RestrictionBatch(restriction_enzymes_list) num_cutting = check_restriction(Seq(final_sequence), batch) iterations = 100 while iterations> 0 and num_cutting > 0 : ouput_protein_list = Calculator.compute_and_Switch(Amino_Acids_obj_list, sequence, aa_count_dict) final_sequence = "".join(ouput_protein_list) # analyse final sequance if len(final_sequence) != len(sequence) * 3: print("final sequance length does not match input sequence length") exit(1) output_file_name = os.path.join(output_destination, "Ouput.fasta") record = SeqRecord.SeqRecord(Seq(final_sequence, generic_dna), name=name) if record.translate().seq != sequence: print("error- resulting DNA does not translate back to protein") exit(1) num_cutting = check_restriction(Seq(final_sequence), batch) iterations -= 1 print("printing to output file....") with open(output_file_name, "w") as output_handle: SeqIO.write(record, output_handle, "fasta") print("ouput sucsessful") return True