コード例 #1
0
def write_pdb(balls, fn, strandlens, extraballs=False, reindexmap=None, mapfn=None):
	tmpballs = []
	pos = 0
	for i in range(len(strandlens)):
		strandlen = strandlens[i]
		tmpstrandballs = []
		for j in range(strandlen):
			tmpstrandballs.append(balls[pos])
			pos += 1
		# remove the first and the last balls from the strands, which are extra balls
		if not extraballs:
			tmpstrandballs.pop(0)
			tmpstrandballs.pop(0)
			tmpstrandballs.pop(0)
			tmpstrandballs.pop(0)
			tmpstrandballs.pop(-1)
			tmpstrandballs.pop(-1)
			tmpstrandballs.pop(-1)
			tmpstrandballs.pop(-1)

		if i%2==1:
			tmpstrandballs = tmpstrandballs[::-1]
		tmpballs+=tmpstrandballs


	chain = Bio.PDB.Chain.Chain('A')
	for i in range(len(tmpballs)):
		try:
			res_id = (' ', tmpballs[i][Ball.p_resseqid], ' ')
			restype = AA.index_to_three(tmpballs[i][Ball.p_aaid])
			residue = Bio.PDB.Residue.Residue(res_id, restype, ' ')
			cacoord = tmpballs[i].getcoord()
			atom = Bio.PDB.Atom.Atom('CA', cacoord, 0, 0, ' ', 'CA', tmpballs[i][Ball.p_resseqid], 'C')
			residue.add(atom)
			chain.add(residue)
		except:
			res_id = ('A', tmpballs[i][Ball.p_resseqid], ' ')
			restype = AA.index_to_three(tmpballs[i][Ball.p_aaid])
			residue = Bio.PDB.Residue.Residue(res_id, restype, ' ')
			cacoord = tmpballs[i].getcoord()
			atom = Bio.PDB.Atom.Atom('CA', cacoord, 0, 0, ' ', 'CA', tmpballs[i][Ball.p_resseqid], 'C')
			residue.add(atom)
			chain.add(residue)
	model = Bio.PDB.Model.Model(1)
	model.add(chain)
	structure = Bio.PDB.Structure.Structure("ref")
	structure.add(model)
	io = Bio.PDB.PDBIO()
	io.set_structure(structure)
	io.save(fn, write_end=False)


	if reindexmap is not None and mapfn is not None:
		np.savetxt(mapfn, reindexmap, fmt='%d')
コード例 #2
0
 def __init__(self, gc_file):
     self.gc_file = gc_file
     self.codons = dict()
     self.amino_acids = dict()
     self.has_CAI = False
     with open(self.gc_file) as f:
         for row in f:
             l = row.strip("\n").split("\t")
             self.amino_acids[l[0]] = AminoAcid(*l)
             for c in l[3].split(","):
                 self.codons[c] = l[0]
コード例 #3
0
def calculate_Energy(df, matrix):

    radiusDict = LoadRadius()
    CurrentAANitrogen = None
    CurrentAACA = None
    Currentresidue_num = None
    EachAA = []
    CurrentAA = None

    for line in df.readlines():
        if (line[0:4] != "ATOM"):
            continue
        element_list = extract_Data(line)
        record_name = element_list[0]
        atom_name = element_list[2]
        residue_name = element_list[4]
        alternate_indicator = element_list[3]
        residue_num = element_list[-4]
        xcor = float(element_list[-3])
        ycor = float(element_list[-2])
        zcor = float(element_list[-1])

        if (atom_name == "H"):
            continue
        if (residue_name not in matrix):
            continue

        if (CurrentAA == None):
            CurrentAA = AA.AminoAcid(residue_name)
            Currentresidue_num = residue_num
            if (atom_name == "N" or atom_name == "CA"):
                if (alternate_indicator == "B"):
                    continue
                if (atom_name == "N"):
                    CurrentAANitrogen = np.array([xcor, ycor, zcor])
                else:
                    CurrentAACA = np.array([xcor, ycor, zcor])
            if (residue_name == "GLY"
                    or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                if (alternate_indicator != " "):
                    #If cases like "AASN or BASN" appears, we only add A
                    if (alternate_indicator == "A" and line[15] == "1"):
                        CurrentAA.SumCenters(xcor, ycor, zcor)
                    else:
                        continue
                else:
                    CurrentAA.SumCenters(xcor, ycor, zcor)
        else:
            #If another amino acid begins
            if (residue_num != Currentresidue_num):
                state = CurrentAA.CalculateCenter()
                if (state == False):
                    CurrentAA = AA.AminoAcid(residue_name)
                    Currentresidue_num = residue_num
                    continue

                CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA)
                EachAA.append(CurrentAA)
                del CurrentAA
                CurrentAA = AA.AminoAcid(residue_name)

                Currentresidue_num = residue_num
                if (atom_name == "N" or atom_name == "CA"):
                    if (alternate_indicator == "B"):
                        continue
                    if (atom_name == "N"):
                        CurrentAANitrogen = np.array([xcor, ycor, zcor])
                    else:
                        CurrentAACA = np.array([xcor, ycor, zcor])
                if (residue_name == "GLY"
                        or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                    if (alternate_indicator != " "):
                        #If cases like "AASN or BASN" appears, we only add A
                        if (alternate_indicator == "A" and line[15] == "1"):
                            CurrentAA.SumCenters(xcor, ycor, zcor)
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(xcor, ycor, zcor)
            #If still the same amino acid
            else:
                if (atom_name == "N" or atom_name == "CA"):
                    if (alternate_indicator == "B"):
                        continue
                    if (atom_name == "N"):
                        CurrentAANitrogen = np.array([xcor, ycor, zcor])
                    else:
                        CurrentAACA = np.array([xcor, ycor, zcor])
                if (residue_name == "GLY"
                        or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                    if (alternate_indicator != " "):
                        #If cases like "AASN or BASN" appears, we only add A
                        if (alternate_indicator == "A" and line[15] == "1"):
                            CurrentAA.SumCenters(xcor, ycor, zcor)
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(xcor, ycor, zcor)

    state = CurrentAA.CalculateCenter()
    if (state != False):
        CurrentAA.CalculateCenter()
        CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA)
        EachAA.append(CurrentAA)

    #Scan over. Each amino acid is stored as an object in EachAA. Next step is to calculate the energy, results will be saved in EnergyList.
    #Store the energy
    E = 0

    for m in range(len(EachAA)):
        EachAA[m].EstablishCoordinate()
        for n in range(len(EachAA)):
            if (m == n):
                continue
            else:
                dis = EachAA[m].DistanceBetweenAA(EachAA[n].center)
                radiusSum = radiusDict[EachAA[m].name] + radiusDict[
                    EachAA[n].name]
                if (
                        dis <= radiusSum
                ):  #If the distance between two amino acid less than 10, we believe the two amino acid have interaction
                    rho, theta, phi = EachAA[m].ChangeCoordinate(
                        EachAA[n].center)
                    theta = min(int(math.floor(theta * 20 / np.pi)), 19)
                    phi = min(int(math.floor(phi * 10 / np.pi) + 10), 19)

                    E += matrix[EachAA[m].name][
                        EachAA[n].name][theta][phi] / rho

    return E
コード例 #4
0
ファイル: Nepre_R.py プロジェクト: LiuLab-CSRC/Nepre
def calculate_Energy(df,matrix):
    
    # define some useful paramter
    radiusDict = LoadRadius()
    CurrentAANitrogen = None
    CurrentAACA = None
    Currentresidue_num = None
    CurrentAA = None
    # list of amino acids which have side chain
    UseAA_list = []
    # list of amino acids which have no side chain
    IgnoreAA_list = []
     
    # scan pdb file line one by one
    for line in df.readlines():        
        if(line[0:4] != "ATOM"):
            continue
        # obtain information
        element_list = extract_Data(line)
        record_name = element_list[0]
        atom_name = element_list[2]
        residue_name = element_list[4]
        alternate_indicator = element_list[3]
        residue_num = element_list[-4]
        xcor = float(element_list[-3])
        ycor = float(element_list[-2])
        zcor = float(element_list[-1])
        
        # ignore hydrogen
        if(atom_name == "H"):
            continue
        # ignore amino acid out of the list
        if(residue_name not in matrix):
            continue
        # from here start to scan useful amino acid
        # first amino acid
        if(CurrentAA is None):
            CurrentAA = AA.AminoAcid(residue_name)
            Currentresidue_num = residue_num
            if(atom_name == "N" or atom_name == "CA"):
                if(alternate_indicator == " " or alternate_indicator == "A"):
                    if(atom_name == "N"):
                        CurrentAA.InputN(np.array([xcor,ycor,zcor]))
                    else:
                        CurrentAA.InputCA(np.array([xcor,ycor,zcor]))
                else:
                    continue
                
            if(residue_name == "GLY" or atom_name not in {"N","CA","C","O","O1","02"}):
                if(alternate_indicator == " " or alternate_indicator == "A"):
                    CurrentAA.SumCenters(xcor,ycor,zcor,atom_name)
                else:
                    continue
        
        # current amino acid is not the first
        else:
            #If another amino acid begins
            if(residue_num != Currentresidue_num):
                state = CurrentAA.Check()
                # previous amino acid has no problem
                if(state == True):
                    CurrentAA.CalculateCenter()
                    UseAA_list.append(CurrentAA)
                # previous amino acid has problem
                else:
                    info = [state,Currentresidue_num]
                    IgnoreAA_list.append(info)
                
                CurrentAA = AA.AminoAcid(residue_name)
                Currentresidue_num = residue_num
                if(atom_name == "N" or atom_name == "CA"):
                    if(alternate_indicator == " " or alternate_indicator == "A"):
                        if(atom_name == "N"):
                            CurrentAA.InputN(np.array([xcor,ycor,zcor]))
                        else:
                            CurrentAA.InputCA(np.array([xcor,ycor,zcor]))
                    else:
                        continue
                if(residue_name == "GLY" or atom_name not in {"N","CA","C","O","O1","02"}):
                    if(alternate_indicator == " " or alternate_indicator == "A"):
                        CurrentAA.SumCenters(xcor,ycor,zcor,atom_name)
                    else:
                        continue
            #If still the same amino acid
            else:
                if(atom_name == "N" or atom_name == "CA"):
                    if(alternate_indicator == " " or alternate_indicator == "A"):
                        if(atom_name == "N"):
                            CurrentAA.InputN(np.array([xcor,ycor,zcor]))
                        else:
                            CurrentAA.InputCA(np.array([xcor,ycor,zcor]))
                    else:
                        continue
                if(residue_name == "GLY" or atom_name not in {"N","CA","C","O","O1","02"}):
                    if(alternate_indicator == " " or alternate_indicator == "A"):
                        CurrentAA.SumCenters(xcor,ycor,zcor,atom_name)
                    else:
                        continue
    
    state = CurrentAA.Check()
    if(state == True):
        CurrentAA.CalculateCenter()
        UseAA_list.append(CurrentAA)
        CurrentAA = AA.AminoAcid(residue_name)
        Currentresidue_num = residue_num
    
    else:
        info = [state,Currentresidue_num]
        IgnoreAA_list.append(info)
        CurrentAA = AA.AminoAcid(residue_name)
        Currentresidue_num = residue_num
    
    # Scan over. Each amino acid is stored as an object in UseAA_list.  
    # Next step is to calculate the energy, results will be saved in EnergyList.
    E = 0 

    for m in range(len(UseAA_list)):  
        UseAA_list[m].EstablishCoordinate()
        for n in range(len(UseAA_list)):
            if(m == n):
                continue
            else:
                dis = UseAA_list[m].DistanceBetweenAA(UseAA_list[n].center)
                radiusSum = radiusDict[UseAA_list[m].name] + radiusDict[UseAA_list[n].name]
                if(dis <= radiusSum):#If the distance between two amino acid less than 10, we believe the two amino acid have interaction  
                    rho,theta,phi = UseAA_list[m].ChangeCoordinate(UseAA_list[n].center)
                    theta = min(int(math.floor(theta*20/np.pi)),19)
                    phi = min(int(math.floor(phi*10/np.pi) + 10),19)
                    E += matrix[UseAA_list[m].name][UseAA_list[n].name][theta][phi] / rho 
                    
                    
    return E
コード例 #5
0
	def _construct_ideal_balls_(self):

		# load residues
		residues = np.loadtxt('inputs/{pdb}/{pdb}.res'.format(pdb=self.pdb)).astype(int).tolist()
		residues = [200]+residues

		# load regs and peris got from reg adjustment
		periregs = np.loadtxt('{inputdirn}/regs/{pdb}.regs'.format(inputdirn=self.inputdirn, pdb=self.pdb)).astype(int) 
		peris = np.loadtxt('{inputdirn}/peris/{pdb}.peris'.format(inputdirn=self.inputdirn, pdb=self.pdb)).astype(int) 
		# load facings  got from reg adjustment
		with open('{inputdirn}/facings/{pdb}.facings'.format(inputdirn=self.inputdirn, pdb=self.pdb)) as f:
			lines = f.readlines()
		firstfacings = [ line.strip() for line in lines ]

		# load strandends
		strandends = np.loadtxt('inputs/{pdb}/{pdb}.strands'.format(pdb=self.pdb)).astype(int) 
		
		# the strand ranges here may not be the ranges used for reg prediction
		# correct reg according to the difference between strandends to construct barrel and peris used in reg pred
		# after this loop, array periregs will be the predicted regs of strandends
		for strdi in range(len(strandends)):
			strdim1 = (strdi-1)%len(strandends)
			if strdi%2==0:
				periregs[strdi] -= strandends[strdi][0]-peris[strdi]
				periregs[strdim1] += strandends[strdi][0]-peris[strdi]
			else:
				periregs[strdi] += strandends[strdi][1]-peris[strdi]
				periregs[strdim1] -= strandends[strdi][1]-peris[strdi]

		# correct facing according to the difference between strandends to construct barrel and peris used in reg pred
		for strdi in range(len(strandends)):
			if strdi%2==0:
				if (strandends[strdi][0]-peris[strdi])%2!=0:
					if firstfacings[strdi]=='OUT':
						firstfacings[strdi]='IN'
					else:
						firstfacings[strdi]='OUT'
			else:
				if (strandends[strdi][1]-peris[strdi])%2!=0:
					if firstfacings[strdi]=='OUT':
						firstfacings[strdi]='IN'
					else:
						firstfacings[strdi]='OUT'


		# add extra residues for bbq
		for strdi in range(len(strandends)):
			strandends[strdi][0]-=Barrel.extra_ball_num
			strandends[strdi][1]+=Barrel.extra_ball_num

		# construct facing arrays for all residues (including extra residues)
		facings = []
		for fac in firstfacings:
			if fac == 'OUT':
				facings.append([Ball.Facing.OUT])
			else:
				facings.append([Ball.Facing.IN])
		for strdi in range(len(strandends)):
			for resi in range(strandends[strdi][1]-strandends[strdi][0]):
				if facings[strdi][resi] == Ball.Facing.OUT:
					facings[strdi].append(Ball.Facing.IN)
				else:
					facings[strdi].append(Ball.Facing.OUT)


		peripositions = np.cumsum( np.hstack( ([0], -periregs) ) )

		N = len(strandends) # strand num
		A = self.A # intrastrand Ca distance
		B = self.B # interstrand Ca distance
		S = sum(periregs) # shear number

		## circle formula
		#a = math.sqrt( (N*B)**2+(S*A)**2 ) / 2.0 / math.pi # tilt angle
		#theta = math.asin(S*A/2.0/math.pi/a) # radius

		## polygan formula
		theta = math.atan( S*A / (N*B) ) # tilt angle
		a = B / ( 2*math.sin(math.pi/N) * math.cos(theta) ) # radius

		self.radius = a
		b = a / math.tan(theta) # vertical speed
		c = math.sqrt( a*a + b*b )
		delta = 2 * math.pi * a * a / (c*N) # offset on the neigbouring strand to ensure inter H-bond is perpendicular to the strand

		currid = 0
		ids = []
		seqids = []
		restypes = []
		cacoords = []

		# construct the barrel
		for strdi in range(N):
			# seq ids
			if strdi%2==0:
				seqids.append( range( strandends[strdi][0], strandends[strdi][1]+1 ) )
			else:
				seqids.append( range( strandends[strdi][0], strandends[strdi][1]+1 )[::-1] )
			ids.append( range( currid, currid+len(seqids[strdi]) ) )
			currid += len(seqids[strdi])
			# residue types
			restypes.append([])
			for seqid in seqids[strdi]:
				try:
					restypes[strdi].append(AA.index_to_one(residues[seqid]))
				except:
					restypes[strdi].append('C')

			cacoords.append([])
			for resi in range(strandends[strdi][1]-strandends[strdi][0]+1):
				# zigzag deviation
				if facings[strdi][resi] == Ball.Facing.OUT:
					dr = self.dr
					# righthand side of out facing residue is always SH
					# lefthand side NH
					if strdi%2==0:
						dw = self.dw
					else:
						dw = -self.dw
					if self.np1_right: #test TODO
						if strdi%2==0:
							dw = -self.dw
						else:
							dw = self.dw
				else:
					dr = -self.dr
					if strdi%2==0:
						dw = -self.dw
					else:
						dw = self.dw
					if self.np1_right: #test TODO
						if strdi%2==0:
							dw = self.dw
						else:
							dw = -self.dw

				s = ( peripositions[strdi] + resi ) * A + strdi * delta
				x = (a+dr) * math.cos(s/c-2*math.pi*strdi/N);
				y = (a+dr) * math.sin(s/c-2*math.pi*strdi/N);
				if self.np1_right: #test TODO
					s = ( peripositions[strdi] + resi ) * A + (N-strdi) * delta
					x = (a+dr) * math.sin(s/c+2*math.pi*strdi/N);
					y = (a+dr) * math.cos(s/c+2*math.pi*strdi/N);

				z = b * s/c;

				xn1 = (a+dr) * ( - math.cos(s/c-2*math.pi*strdi/N) + math.cos((s+delta)/c-2*math.pi*(strdi+1)/N) );
				yn1 = (a+dr) * ( - math.sin(s/c-2*math.pi*strdi/N) + math.sin((s+delta)/c-2*math.pi*(strdi+1)/N) );
				zn1 = b*delta/c
				if (strdi%2==1 and facings[strdi][resi] == Ball.Facing.OUT) or (strdi%2==0 and facings[strdi][resi] == Ball.Facing.IN):
					xn1 = (a+dr) * ( - math.cos(s/c-2*math.pi*(strdi-1)/N) + math.cos((s+delta)/c-2*math.pi*strdi/N) );
					yn1 = (a+dr) * ( - math.sin(s/c-2*math.pi*(strdi-1)/N) + math.sin((s+delta)/c-2*math.pi*strdi/N) );
					zn1 = b*delta/c

				if self.np1_right: #test TODO
					xn1 = (a+dr) * ( - math.sin(s/c+2*math.pi*strdi/N) + math.sin((s+delta)/c+2*math.pi*(strdi+1)/N) );
					yn1 = (a+dr) * ( - math.cos(s/c+2*math.pi*strdi/N) + math.cos((s+delta)/c+2*math.pi*(strdi+1)/N) );
					if (strdi%2==1 and facings[strdi][resi] == Ball.Facing.OUT) or (strdi%2==0 and facings[strdi][resi] == Ball.Facing.IN):
						xn1 = (a+dr) * ( - math.sin(s/c+2*math.pi*(strdi-1)/N) + math.sin((s+delta)/c+2*math.pi*strdi/N) );
						yn1 = (a+dr) * ( - math.cos(s/c+2*math.pi*(strdi-1)/N) + math.cos((s+delta)/c+2*math.pi*strdi/N) );

				n1norm = math.sqrt(xn1*xn1+yn1*yn1+zn1*zn1)
				xn1 = xn1/n1norm
				yn1 = yn1/n1norm
				zn1 = zn1/n1norm
				x+=dw*xn1
				y+=dw*yn1
				z+=dw*zn1

				cacoords[strdi].append(np.array([x,y,z]))

			self.strandlens.append(len(ids[strdi]))

		for i in range(len(ids)):
			for j in range(len(ids[i])):
				## following line is for model/param selections
				#ball = Ball([ ids[i][j], seqids[i][j], cacoords[i][j][0], cacoords[i][j][1], cacoords[i][j][2], AA.one_to_index(restypes[i][j]), facings[i][j] ])
				## store ballids instead of seqids. needs to be correted after bbq
				## if using seqids, bbq will have problems
				if i%2!=0:
					ball = Ball([ ids[i][j], ids[i][len(ids[i])-j-1], cacoords[i][j][0], cacoords[i][j][1], cacoords[i][j][2], AA.one_to_index(restypes[i][j]), facings[i][j] ])
					if j >= Barrel.extra_ball_num and j < len(ids[i])-Barrel.extra_ball_num:
						self.reindexmap.append( (ids[i][len(ids[i])-j-1], seqids[i][j]) )
				else:
					ball = Ball([ ids[i][j], ids[i][j], cacoords[i][j][0], cacoords[i][j][1], cacoords[i][j][2], AA.one_to_index(restypes[i][j]), facings[i][j] ])
					if j >= Barrel.extra_ball_num and j < len(ids[i])-Barrel.extra_ball_num:
						self.reindexmap.append( (ids[i][j], seqids[i][j]) )
				self.balls.append(ball)
コード例 #6
0
	def __str__(self):
		return str(self[Ball.p_ballid]) +" "+ str(self[Ball.p_resseqid]) +" " +\
			str(self.getcoord())+" "+\
			AA.index_to_one(self[Ball.p_aaid]) +" "+ str(self[Ball.p_facing])
コード例 #7
0
def processAAforchian(chain,aaDict):
    CurrentAANitrogen = None
    CurrentAACA = None
    Currentresidue_num = None
    EachAA = []
    CurrentAA = None
    for line in chain:
        if (line[0:4] != "ATOM"):
            continue
        element_list = extract_Data(line)
        record_name = element_list[0]
        atom_name = element_list[2]
        residue_name = element_list[4]
        alternate_indicator = element_list[3]
        residue_num = element_list[-4]
        chain_id = element_list[-5]
        xcor = float(element_list[-3])
        ycor = float(element_list[-2])
        zcor = float(element_list[-1])

        if (atom_name == "H"):
            continue
        if (residue_name not in aaDict):
            continue
        if (CurrentAA == None):
            CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id)
            Currentresidue_num = residue_num
            if (atom_name == "N" or atom_name == "CA"):
                if (alternate_indicator == "B"):
                    continue
                if (atom_name == "N"):
                    CurrentAANitrogen = np.array([xcor, ycor, zcor])
                else:
                    CurrentAACA = np.array([xcor, ycor, zcor])
            if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                if (alternate_indicator != " "):
                    # If cases like "AASN or BASN" appears, we only add A
                    if (alternate_indicator == "A"):
                        CurrentAA.SumCenters(xcor, ycor, zcor)
                    else:
                        continue
                else:
                    CurrentAA.SumCenters(xcor, ycor, zcor)
        else:
            # If another amino acid begins
            if (residue_num != Currentresidue_num):
                state = CurrentAA.CalculateCenter()
                if (state == False):
                    CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id)
                    Currentresidue_num = residue_num
                    continue

                CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA)
                CurrentAA.EstablishCoordinate()
                # Amino Acid check
                EachAA.append(CurrentAA)
                del CurrentAA
                CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id)

                Currentresidue_num = residue_num
                if (atom_name == "N" or atom_name == "CA"):
                    if (alternate_indicator == "B"):
                        continue
                    if (atom_name == "N"):
                        CurrentAANitrogen = np.array([xcor, ycor, zcor])
                    else:
                        CurrentAACA = np.array([xcor, ycor, zcor])
                if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                    if (alternate_indicator != " "):
                        # If cases like "AASN or BASN" appears, we only add A
                        if (alternate_indicator == "A"):
                            CurrentAA.SumCenters(xcor, ycor, zcor)
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(xcor, ycor, zcor)
            # If still the same amino acid
            else:
                if (atom_name == "N" or atom_name == "CA"):
                    if (alternate_indicator == "B"):
                        continue
                    if (atom_name == "N"):
                        CurrentAANitrogen = np.array([xcor, ycor, zcor])
                    else:
                        CurrentAACA = np.array([xcor, ycor, zcor])
                if (residue_name == "GLY" or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                    if (alternate_indicator != " "):
                        # If cases like "AASN or BASN" appears, we only add A
                        if (alternate_indicator == "A"):
                            CurrentAA.SumCenters(xcor, ycor, zcor)
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(xcor, ycor, zcor)

    state = CurrentAA.CalculateCenter()
    if (state != False):
        #CurrentAA.CalculateCenter()
        CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA)
        CurrentAA.EstablishCoordinate()
        EachAA.append(CurrentAA)
    return EachAA
コード例 #8
0
ファイル: Nepre_v3.py プロジェクト: romendlf/SSBONDPredict
def process_AA2(AA_information, matrix):
    CurrentAA = None
    CurrentAANitrogen = None
    CurrentAACA = None
    Currentresidue_num = None
    #for debug
    lines = AA_information

    for line in lines:
        if (line[0:4] != "ATOM"):
            continue
        element_list = extract_Data(line)
        record_name = element_list[0]
        atom_name = element_list[2]
        residue_name = element_list[4]
        alternate_indicator = element_list[3]
        #do some change
        residue_num = element_list[-4]
        #add chain_id
        chain_id = element_list[-5]
        xcor = float(element_list[-3])
        ycor = float(element_list[-2])
        zcor = float(element_list[-1])

        if (atom_name == "H"):
            continue
        if (residue_name not in matrix):
            continue

        if (CurrentAA == None):
            CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id)
            Currentresidue_num = residue_num
            if (atom_name == "N" or atom_name == "CA"):
                if (alternate_indicator == "B"):
                    continue
                if (atom_name == "N"):
                    CurrentAANitrogen = np.array([xcor, ycor, zcor])
                else:
                    CurrentAACA = np.array([xcor, ycor, zcor])
            if (residue_name == "GLY"
                    or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                if (alternate_indicator != " "):
                    #If cases like "AASN or BASN" appears, we only add A
                    #if(alternate_indicator == "A" and line[15] == "1"):
                    if (alternate_indicator == "A"):
                        CurrentAA.SumCenters(xcor, ycor, zcor)
                    else:
                        continue
                else:
                    CurrentAA.SumCenters(xcor, ycor, zcor)
        else:
            #If another amino acid begins
            if (residue_num != Currentresidue_num):
                state = CurrentAA.CalculateCenter()
                if (state == False):
                    CurrentAA = AA.AminoAcid(residue_name, residue_num,
                                             chain_id)
                    Currentresidue_num = residue_num
                    #continue

                CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA)
                #residue_name='ALA'
                #all_amino_acids.append(CurrentAA)
                del CurrentAA
                CurrentAA = AA.AminoAcid(residue_name, residue_num, chain_id)

                Currentresidue_num = residue_num
                if (atom_name == "N" or atom_name == "CA"):
                    if (alternate_indicator == "B"):
                        continue
                    if (atom_name == "N"):
                        CurrentAANitrogen = np.array([xcor, ycor, zcor])
                    else:
                        CurrentAACA = np.array([xcor, ycor, zcor])
                if (residue_name == "GLY"
                        or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                    if (alternate_indicator != " "):
                        #If cases like "AASN or BASN" appears, we only add A
                        #if(alternate_indicator == "A" and line[15] == "1"):
                        if (alternate_indicator == "A"):
                            CurrentAA.SumCenters(xcor, ycor, zcor)
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(xcor, ycor, zcor)
            #If still the same amino acid
            else:
                if (atom_name == "N" or atom_name == "CA"):
                    if (alternate_indicator == "B"):
                        continue
                    if (atom_name == "N"):
                        CurrentAANitrogen = np.array([xcor, ycor, zcor])
                    else:
                        CurrentAACA = np.array([xcor, ycor, zcor])
                if (residue_name == "GLY"
                        or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                    if (alternate_indicator != " "):
                        #If cases like "AASN or BASN" appears, we only add A
                        #if(alternate_indicator == "A" and line[15] == "1"):
                        if (alternate_indicator == "A"):
                            CurrentAA.SumCenters(xcor, ycor, zcor)
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(xcor, ycor, zcor)
    CurrentAA.CalculateCenter()
    CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA)
    #CurrentAA.EstablishCoordinate()
    return CurrentAA
コード例 #9
0
def main(protein_fasta_open_file, list_codon_usage_open_files, output_destination, restriction_enzymes=""):
    # parse protein
    record = Parser.parse_fasta_file(protein_fasta_open_file)
    name, id, sequence = record.name, record.id, record.seq
    creatures = {}
    # parse table
    if len(list_codon_usage_open_files) == 0:
        raise Exception("Error: Empty codon table filnames")
    # parses organism files , assuming they are already open

    for fname, open_file in list_codon_usage_open_files:
        creature_name = fname.split('.')[0]
        codon_usage_dict, codon_to_protein_dict, AA_list = Parser.parse_kazusa_codon_usage_table(open_file)
        creatures[creature_name] = codon_usage_dict, codon_to_protein_dict, AA_list

    # creates AA
    Amino_Acids_obj_list = []
    AA_LIST = creatures[creature_name][2]
    codon_to_protein_dict = creatures[creature_name][1]
    for aa in AA_LIST:
        AA = AminoAcid.AminoAcid(aa, codon_to_protein_dict)
        Amino_Acids_obj_list.append(AA)
    for creature_name, creature_tuple in creatures.items():
        codon_usage_dict, codon_to_protein_dict, AA_list = creature_tuple
        for AA in Amino_Acids_obj_list:
            AA.add_organism_codons(codon_usage_dict, creature_name)

    prot_analisys = ProtParam.ProteinAnalysis(sequence._data)
    aa_count_dict = prot_analisys.count_amino_acids()

    # replaces aa with codons from codon pool
    ouput_protein_list = Calculator.compute_and_Switch(Amino_Acids_obj_list, sequence, aa_count_dict)
    final_sequence = "".join(ouput_protein_list)
    final_sequence = final_sequence.replace("U", "T")
    # analyse final sequance
    if len(final_sequence) != len(sequence) * 3:
        raise Exception("final sequance length does not match input sequence length")
    # output_file_name = os.path.join(output_destination, "Ouput.fasta")
    record = SeqRecord.SeqRecord(Seq(final_sequence, ), name=name)
    if record.translate().seq != sequence:
        raise Exception("error- resulting DNA does not translate back to protein")

    # restriction enzymes- verifies they do not cut the sequence. if they do, pick the least cut sequence
    if restriction_enzymes != "":
        restriction_enzymes_list = restriction_enzymes.replace(",", " ").replace('\n', ' ').replace("\t", " ").split()
        batch = RestrictionBatch(restriction_enzymes_list)
        num_cutting = len(check_restriction(Seq(final_sequence, generic_dna), batch))
        best_num_cutting = np.inf
        best_sequ = final_sequence
        iterations = 100
        no_enzymes_cut = num_cutting == 0
        # if the original sequence had a restriction site, repeat the sequence building 100 times , or until
        # a non- cut sequence is found
        while iterations > 0 and num_cutting > 0:
            ouput_protein_list = Calculator.compute_and_Switch(Amino_Acids_obj_list, sequence, aa_count_dict)
            final_sequence = "".join(ouput_protein_list)
            final_sequence = final_sequence.replace("U", "T")
            # analyse final sequance
            if len(final_sequence) != len(sequence) * 3:
                raise Exception("final sequance length does not match input sequence length")
            # output_file_name = os.path.join(output_destination, "Ouput.fasta")
            record = SeqRecord.SeqRecord(Seq(final_sequence, generic_dna), name=name)
            if record.translate().seq != sequence:
                print("error- resulting DNA does not translate back to protein")
                exit(1)
            # if achieved non cutting sequence, save and return
            num_cutting = len(check_restriction(Seq(final_sequence, generic_dna), batch))
            if num_cutting == 0:
                check_restriction(Seq(final_sequence, generic_dna), batch, to_print=True)
                print("printing to output file....")
                SeqIO.write(record, output_destination, "fasta")
                print("ouput sucsessful")
                return "Output Sucsessful"
            best_num_cutting = min(best_num_cutting, num_cutting)
            if best_num_cutting == num_cutting:
                best_sequ = final_sequence

            iterations -= 1
        # return best sequence, as in one that is cut by the least amount of restriction enzymes
        if best_num_cutting > 0:
            cutting = check_restriction(Seq(best_sequ, generic_dna), batch, to_print=True)
            record = SeqRecord.SeqRecord(Seq(best_sequ, generic_dna), name=name)
            SeqIO.write(record, output_destination, "fasta")
            return "The enzymes the cut the sequence are:" + str(cutting) + "\n Output printed to specified location."

    SeqIO.write(record, output_destination, "fasta")
    return "ouput sucsessful"
コード例 #10
0
ファイル: Nepre_chain400.py プロジェクト: romendlf/Nepre
def ProcessPDB(chainlines, matrix):
    #df = open(file,'r')
    radiusDict = LoadRadius()
    CurrentAANitrogen = None
    CurrentAACA = None
    Currentresidue_num = None
    EachAA = []
    CurrentAA = None

    for line in chainlines:
        if (line[0:4] != "ATOM"):
            continue
        element_list = extract_Data(line)
        record_name = element_list[0]
        atom_name = element_list[2]
        residue_name = element_list[4]
        alternate_indicator = element_list[3]
        residue_num = element_list[-4]
        xcor = float(element_list[-3])
        ycor = float(element_list[-2])
        zcor = float(element_list[-1])

        if (atom_name == "H"):
            continue
        if (residue_name not in matrix):
            continue

        if (CurrentAA == None):
            CurrentAA = AA.AminoAcid(residue_name)
            Currentresidue_num = residue_num
            if (atom_name == "N" or atom_name == "CA"):
                if (alternate_indicator == "B"):
                    continue
                if (atom_name == "N"):
                    CurrentAANitrogen = np.array([xcor, ycor, zcor])
                else:
                    CurrentAACA = np.array([xcor, ycor, zcor])
            if (residue_name == "GLY"
                    or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                if (alternate_indicator != " "):
                    #If cases like "AASN or BASN" appears, we only add A
                    if (alternate_indicator == "A" and line[15] == "1"):
                        CurrentAA.SumCenters(xcor, ycor, zcor)
                    else:
                        continue
                else:
                    CurrentAA.SumCenters(xcor, ycor, zcor)
        else:
            #If another amino acid begins
            if (residue_num != Currentresidue_num):
                state = CurrentAA.CalculateCenter()
                if (state == False):
                    CurrentAA = AA.AminoAcid(residue_name)
                    Currentresidue_num = residue_num
                    continue

                CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA)
                EachAA.append(CurrentAA)
                del CurrentAA
                CurrentAA = AA.AminoAcid(residue_name)

                Currentresidue_num = residue_num
                if (atom_name == "N" or atom_name == "CA"):
                    if (alternate_indicator == "B"):
                        continue
                    if (atom_name == "N"):
                        CurrentAANitrogen = np.array([xcor, ycor, zcor])
                    else:
                        CurrentAACA = np.array([xcor, ycor, zcor])
                if (residue_name == "GLY"
                        or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                    if (alternate_indicator != " "):
                        #If cases like "AASN or BASN" appears, we only add A
                        if (alternate_indicator == "A" and line[15] == "1"):
                            CurrentAA.SumCenters(xcor, ycor, zcor)
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(xcor, ycor, zcor)
            #If still the same amino acid
            else:
                if (atom_name == "N" or atom_name == "CA"):
                    if (alternate_indicator == "B"):
                        continue
                    if (atom_name == "N"):
                        CurrentAANitrogen = np.array([xcor, ycor, zcor])
                    else:
                        CurrentAACA = np.array([xcor, ycor, zcor])
                if (residue_name == "GLY"
                        or atom_name not in {"N", "CA", "C", "O", "O1", "02"}):
                    if (alternate_indicator != " "):
                        #If cases like "AASN or BASN" appears, we only add A
                        if (alternate_indicator == "A" and line[15] == "1"):
                            CurrentAA.SumCenters(xcor, ycor, zcor)
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(xcor, ycor, zcor)

    state = CurrentAA.CalculateCenter()
    if (state != False):
        CurrentAA.CalculateCenter()
        CurrentAA.InputCAN(CurrentAANitrogen, CurrentAACA)
        EachAA.append(CurrentAA)
    return EachAA
コード例 #11
0
ファイル: compute_CAI.py プロジェクト: polarise/BioClasses
def main( fn, fastafile ):
	genetic_code = dict()
	codon_dict = dict()
	with open( fn ) as f:
		for row in f:
			l = row.strip( "\n" ).split( "\t" )
			genetic_code[l[0]] = AminoAcid( *l )
			for c in l[3].split( "," ):
				c = c.replace( "U", "T" )
				codon_dict[c] = l[0]
	
	#print genetic_code, len( genetic_code )
	#print
	#print codon_dict, len( codon_dict )
	#print
	#for a in genetic_code:
		#print genetic_code[a]
		#print
	
	orf_frame = dict()
	with open( "/Users/paulkorir/Dropbox/Euplotes/FrameshiftPredictionData/one_orfs.txt" ) as f:
		for row in f:
			if row[0] == "T":
				continue
			l = row.strip( "\n" ).split( "\t" )
			orf_frame[l[0]] = ORFInfo( *l )
	
	# read in the data from the fasta file
	total = 0
	ok_count = 0
	nok_count = 0
	for seq_record in SeqIO.parse( fastafile, 'fasta' ):
		sequence = str( seq_record.seq )
		seq_name = seq_record.id.split( " " )[0]
		
		# get the position of the first ATG
		frame = orf_frame[seq_name].frame
		last = orf_frame[seq_name].last
		i = frame
		start = None
		while i <= len( sequence ) - 3:
			codon = sequence[i:i+3]
			if codon == "ATG":
				start = i
				break
			else:
				i += 3
		
		if start == None:
			print >> sys.stderr, "Missing ATG in frame %d in sequence %s" % ( frame, seq_name )
			total += 1
			nok_count += 1
			continue
		
		#if ( last - start + 1 ) % 3 == 0:
			#ok_count += 1
			#total += 1
		#else:
			#print seq_name, start, last, ( last - start ) + 1, ( last - start + 1 ) % 3, i
			#nok_count += 1
			#total += 1
			
	#print ok_count/total, nok_count/total
	#print ok_count, nok_count, total
		
		# make sure it's in the first coding frame
		cds = sequence[start:last+1]
		
		print ">" + seq_record.id
		print cds
コード例 #12
0
def SingleStructure(decoyname,DecoyPath,model_path,radius_path):
    
    cdDict={"ALA":{},"VAL":{},"LEU":{},"ILE":{},"PHE":{},\
        "TRP":{},"MET":{},"PRO":{},"GLY":{},"SER":{},\
        "THR":{},"CYS":{},"TYR":{},"ASN":{},"GLN":{},\
        "HIS":{},"LYS":{},"ARG":{},"ASP":{},"GLU":{},}

    cdDict = loadModel(model_path,cdDict)
    radiusDict = LoadRadius(radius_path)
    

    if(decoyname == "native.pdb"):   
        xcor = 6
        ycor = 7
        zcor = 8
        AAnum = 5
    else:
        xcor = 5
        ycor = 6
        zcor = 7
        AAnum = 4
    
   
    df = open(DecoyPath)
    #CurrentAAName = None
    CurrentAANitrogen = None
    CurrentAACA = None
    CurrentAANumber = None
    EachAA = []
    
    CurrentAA = None 


    for line in df.readlines():
        #print line
        Element,AAtype,AANUMBER = ExtractData(line)
        
        if(Element[0] != "ATOM"):
            CurrentAA.CalculateCenter()
            CurrentAA.InputCAN(CurrentAANitrogen,CurrentAACA)
            EachAA.append(CurrentAA)
            continue
        
        if(Element[2] == "H"):
            continue
        
        if(AAtype not in cdDict):
            continue
        if(CurrentAA == None):
            #print("First object establised")
            #CurrentAAName = Element[3]
            CurrentAA = AA.AminoAcid(AAtype)
            CurrentAANumber = AANUMBER
            if(Element[2] == "N" or Element[2] == "CA"):
                if(line[16] == "B"):
                    continue
                if(Element[2] == "N"):
                    CurrentAANitrogen = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])])
                else:
                    CurrentAACA = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])])
            if(AAtype == "GLY" or Element[2] not in {"N","CA","C","O","O1","02"}):
                if(line[16] != " "):
                    #If cases like "AASN or BASN" appears, we only add A 
                    if(line[16] == "A" and line[15] == "1"):
                        CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor]))
                    else:
                        continue
                else:
                    CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor]))
        else:
            #If another amino acid begins
            if(AANUMBER != CurrentAANumber):
                #print CurrentAA.AminoAcidAmount
                #print CurrentAAName,Element[3]
                state = CurrentAA.CalculateCenter()
                if(state == False):
                    CurrentAA = AA.AminoAcid(AAtype)
                    CurrentAANumber = AANUMBER
                    continue
                CurrentAA.InputCAN(CurrentAANitrogen,CurrentAACA)
                #print sys.getrefcount(CurrentAA)
                EachAA.append(CurrentAA)
                del CurrentAA
                CurrentAA = AA.AminoAcid(AAtype)
                #print sys.getrefcount(CurrentAA)
                CurrentAANumber = AANUMBER
                if(Element[2] == "N" or Element[2] == "CA"):
                    if(line[16] == "B"):
                        continue
                    if(Element[2] == "N"):
                        CurrentAANitrogen = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])])
                    else:
                        CurrentAACA = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])])
                if(AAtype == "GLY" or Element[2] not in {"N","CA","C","O","O1","02"}):
                    if(line[16] != " "):
                    #If cases like "AASN or BASN" appears, we only add A 
                        if(line[16] == "A" and line[15] == "1"):
                            CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor]))
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor]))
            #If still the same amino acid
            else:
                if(Element[2] == "N" or Element[2] == "CA"):
                    if(line[16] == "B"):
                        continue
                    if(Element[2] == "N"):
                        CurrentAANitrogen = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])])
                    else:
                        CurrentAACA = np.array([float(Element[xcor]),float(Element[ycor]),float(Element[zcor])])
                if(AAtype == "GLY" or Element[2] not in {"N","CA","C","O","O1","02"}):
                    if(line[16] != " "):
                    #If cases like "AASN or BASN" appears, we only add A 
                        if(line[16] == "A" and line[15] == "1"):
                            CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor]))
                        else:
                            continue
                    else:
                        CurrentAA.SumCenters(float(Element[xcor]),float(Element[ycor]),float(Element[zcor]))
        
        
    del CurrentAA#Free the current object.                
    #Scan over. Each amino acid is stored as an object in EachAA. Next step is to calculate the energy, results will be saved in EnergyList. 
    E = 0 #Store the energy
    Time = 0
    for m in range(len(EachAA)):
        #Establish axis first    
        EachAA[m].EstablishCoordinate()
        for n in range(len(EachAA)):
            if(m == n):
                continue
            else:
                dis = EachAA[m].DistanceBetweenAA(EachAA[n].center)
                radiusSum = radiusDict[EachAA[m].name] + radiusDict[EachAA[n].name]
                if(dis <= radiusSum):#If the distance between two amino acid less than 10, we believe the two amino acid have interaction
                    #print EachAA[m].ChangeCoordinate(EachAA[n].center)   
                    rho,theta,phi = EachAA[m].ChangeCoordinate(EachAA[n].center)
                    theta = min(int(math.floor(theta*20/np.pi)),19)
                    phi = min(int(math.floor(phi*10/np.pi) + 10),19)
                    
                    #print EachAA[m].name,EachAA[n].name
                    E += cdDict[EachAA[m].name][EachAA[n].name][theta][phi] / rho 
                    Time += 1
                    
    return E,Time
コード例 #13
0
ファイル: Main.py プロジェクト: MacTavdish/IGEM_Team_HUJI
def main(protein_fasta_filename, list_codon_usage_filenames,output_destination, restriction_enzymes="" ):
    #verify input
    verify_input()
    #parse protein
    record= Parser.parse_fasta_file(protein_fasta_filename)
    name, id, sequence =record.name, record.id, record.seq
    creatures = {}
    #parse table
    if len(list_codon_usage_filenames) ==0:
        print("Error: Empty codon table filnames")
        exit(1)
    for i, file_name in enumerate(list_codon_usage_filenames):
        creature_name = ntpath.basename(file_name).split('.')[0] #TODO watch out
        codon_usage_dict, codon_to_protein_dict, AA_list = Parser.parse_kazusa_codon_usage_table(str(file_name))
        creatures[creature_name] = codon_usage_dict, codon_to_protein_dict, AA_list
    #creates AA
    Amino_Acids_obj_list =[]
    AA_LIST= creatures[creature_name][2]
    codon_to_protein_dict = creatures[creature_name][1]
    for aa in AA_LIST:
        AA = AminoAcid.AminoAcid(aa,codon_to_protein_dict )
        Amino_Acids_obj_list.append(AA)
    for creature_name, creature_tuple in creatures.items():
        codon_usage_dict, codon_to_protein_dict, AA_list = creature_tuple
        for AA in Amino_Acids_obj_list:
            AA.add_organism_codons(codon_usage_dict, creature_name)

    prot_analisys = ProtParam.ProteinAnalysis(sequence._data)
    aa_count_dict = prot_analisys.count_amino_acids()


    ouput_protein_list = Calculator.compute_and_Switch(Amino_Acids_obj_list, sequence,aa_count_dict)
    final_sequence = "".join(ouput_protein_list)
    #analyse final sequance
    if len(final_sequence) != len(sequence) * 3:
        print("final sequance length does not match input sequence length")
        exit(1)
    output_file_name = os.path.join (output_destination ,"Ouput.fasta" )
    record = SeqRecord.SeqRecord(Seq(final_sequence, generic_dna) ,  name = name )
    if record.translate().seq != sequence:
        print("error- resulting DNA does not translate back to protein")
        exit(1)

    #restriction enzymes
    if restriction_enzymes != "":
       restriction_enzymes_list = restriction_enzymes.replace(",", " ").replace('\n', ' ').replace("\t"," ").split()
       batch = RestrictionBatch(restriction_enzymes_list)
       num_cutting = check_restriction(Seq(final_sequence), batch)
       iterations = 100
       while iterations> 0 and num_cutting > 0 :
           ouput_protein_list = Calculator.compute_and_Switch(Amino_Acids_obj_list, sequence, aa_count_dict)
           final_sequence = "".join(ouput_protein_list)
           # analyse final sequance
           if len(final_sequence) != len(sequence) * 3:
               print("final sequance length does not match input sequence length")
               exit(1)
           output_file_name = os.path.join(output_destination, "Ouput.fasta")
           record = SeqRecord.SeqRecord(Seq(final_sequence, generic_dna), name=name)
           if record.translate().seq != sequence:
               print("error- resulting DNA does not translate back to protein")
               exit(1)

           num_cutting = check_restriction(Seq(final_sequence), batch)
           iterations -= 1


    print("printing to output file....")
    with open(output_file_name, "w") as output_handle:
        SeqIO.write(record, output_handle, "fasta")
    print("ouput sucsessful")
    return True