def get_opt_cloningprimer_pair(seq): oligo_calc = primer3.thermoanalysis.ThermoAnalysis(mv_conc=20, dv_conc=1.5, dntp_conc=0.8, dna_conc=50, max_nn_length=60) # Change if you have personal condition for PCR ## mv_conc : The millimolar (mM) concentration of monovalent salt cations (usually KCl) in the PCR. ## dv_conc : The millimolar concentration of divalent salt cations (usually MgCl^(2+)) in the PCR. ## dntp_conc : The millimolar concentration of the sum of all deoxyribonucleotide triphosphates. ## dna_conc : A value to use as nanomolar (nM) concentration of each annealing oligo over the course the PCR. ## max_nn_length : longest seq length for primer Tm analysis primerF_list = [] primerR_list = [] for i in range(16,25): primerF = seq[0:i] #primerR = kang.rev_comp(seq[-i-3:-3]) # stop codon excluded primerR = kang.rev_comp(seq[-i:]) # stop codon included if 45 <= oligo_calc.calcTm(primerF) <= 63: primerF_list.append(primerF) else: pass #print (primerF, oligo_calc.calcTm(primerF) ) if 45 <= oligo_calc.calcTm(primerR) <= 63: primerR_list.append(primerR) primerF_list_Tm = [oligo_calc.calcTm(x) for x in primerF_list] primerR_list_Tm = [oligo_calc.calcTm(x) for x in primerR_list] return primerF_list,primerR_list,primerF_list_Tm,primerR_list_Tm
def get_opt_cloningprimer_pair(seq): oligo_calc = primer3.thermoanalysis.ThermoAnalysis(mv_conc=20, dv_conc=1.5, dntp_conc=0.8, dna_conc=50, max_nn_length=60) # Change if you have personal condition for PCR ## mv_conc : The millimolar (mM) concentration of monovalent salt cations (usually KCl) in the PCR. ## dv_conc : The millimolar concentration of divalent salt cations (usually MgCl^(2+)) in the PCR. ## dntp_conc : The millimolar concentration of the sum of all deoxyribonucleotide triphosphates. ## dna_conc : A value to use as nanomolar (nM) concentration of each annealing oligo over the course the PCR. ## max_nn_length : longest seq length for primer Tm analysis primerF_list = [] primerR_list = [] for i in range(16,25): primerF = seq[0:i] #primerR = kang.rev_comp(seq[-i-3:-3]) # stop codon excluded primerR = kang.rev_comp(seq[-i:]) # stop codon included if 56 <= oligo_calc.calcTm(primerF) <= 63: primerF_list.append(primerF) else: pass #print (primerF, oligo_calc.calcTm(primerF) ) if 56 <= oligo_calc.calcTm(primerR) <= 63: primerR_list.append(primerR) primerF_list_Tm = [oligo_calc.calcTm(x) for x in primerF_list] primerR_list_Tm = [oligo_calc.calcTm(x) for x in primerR_list] comp_tm = [] for i,tmF in enumerate(primerF_list_Tm): for j,tmR in enumerate(primerR_list_Tm): if abs(tmF-tmR) <= 3: comp_tm.append([i,j,tmF,tmR]) comp_tm.sort(key=lambda x : x[2]+x[3]) try: ixF,ixR = comp_tm[0][0],comp_tm[0][1] return primerF_list[ixF],primerF_list_Tm[ixF],primerR_list[ixR],primerR_list_Tm[ixR] except IndexError: return None,None,None,None
cT = dicT2cls[strTN] cT.strCDS_list.append([int(strL1), int(strL2)]) except KeyError: print('!!') exit() Outfile = open(file_fa + '.cds.fa', 'w') for TN in dicT2cls: cT = dicT2cls[TN] cds_locs = cT.strCDS_list cds_locs.sort(key=lambda x: x[0]) #cdsseq_pre = '' start_loc = cds_locs[0] intL1, intL2 = start_loc #cdsseq_pre = dicHD2seq[cT.strChr][intL1-1-1000:intL1-1] cdsseq_pre = '' for cds_loc in cds_locs: intL1, intL2 = cds_loc cdsseq_pre += dicHD2seq[cT.strChr][intL1 - 1:intL2] end_loc = cds_locs[-1] intL1, intL2 = end_loc #cdsseq_pre += dicHD2seq[cT.strChr][intL2:intL2+1000] if cT.strSTRD == '+': print('>' + cT.strTN, file=Outfile) print(cdsseq_pre, file=Outfile) elif cT.strSTRD == '-': print('>' + cT.strTN, file=Outfile) print(kang.rev_comp(cdsseq_pre), file=Outfile)
except KeyError: print('!!') exit() Outfile = open(file_fa+'.cds.fa','w') for TN in dicT2cls: cT = dicT2cls[TN] cds_locs = cT.strCDS_list cds_locs.sort(key = lambda x : x[0]) #cdsseq_pre = '' start_loc = cds_locs[0] intL1, intL2 = start_loc #cdsseq_pre = dicHD2seq[cT.strChr][intL1-1-1000:intL1-1] cdsseq_pre = '' for cds_loc in cds_locs: intL1, intL2 = cds_loc cdsseq_pre += dicHD2seq[cT.strChr][intL1-1:intL2] end_loc = cds_locs[-1] intL1, intL2 = end_loc #cdsseq_pre += dicHD2seq[cT.strChr][intL2:intL2+1000] if cT.strSTRD == '+': print('>'+cT.strTN,file=Outfile) print(cdsseq_pre,file=Outfile) elif cT.strSTRD == '-': print('>'+cT.strTN,file=Outfile) print(kang.rev_comp(cdsseq_pre),file=Outfile)
zip([x.split('=')[0] for x in strInfo_list], [x.split('=')[1] for x in strInfo_list])) try: genename = dicID2mRNA[dicInfo['Parent']] except KeyError: print(line) print(dicInfo['Parent']) exit() for strLoc, strM, strU, strDi in indexed_list: if intL1 <= int(strLoc) <= intL2: intLoc = int(strLoc) if sys.argv[2] == 'CG': if strDi == '+': context = dicHD2seq[strC][intLoc - 1 - 2:intLoc + 3] else: context = kang.rev_comp(dicHD2seq[strC][intLoc - 1 - 3:intLoc + 2]) else: if strDi == '+': context = dicHD2seq[strC][intLoc - 1 - 2:intLoc + 4] else: context = kang.rev_comp(dicHD2seq[strC][intLoc - 1 - 4:intLoc + 2]) if len(context) == dkmer[sys.argv[2]]: print('>' + strC + '_' + strLoc, file=Outfile_kmer) print(context, file=Outfile_kmer) else: pass try: dicGN2count[genename][1] += int(strU) dicGN2count[genename][0] += int(strM) except KeyError:
list_cds_seq_snp.append(cds_seq_snp) if strand == '+': list_cds_seq_snp_pep = [] for cds_seq_snp in list_cds_seq_snp: cds_seq_snp_pep = kang.translation(cds_seq_snp) list_cds_seq_snp_pep.append(cds_seq_snp_pep) if len(set(list_cds_seq_snp_pep)) > 1: # ... .. .. .... ... 1. ... pass else: continue else: list_cds_seq_snp_pep = [] for cds_seq_snp in list_cds_seq_snp: cds_seq_snp_pep = kang.translation(kang.rev_comp(cds_seq_snp)) list_cds_seq_snp_pep.append(cds_seq_snp_pep) if len(set(list_cds_seq_snp_pep)) > 1: # ... .. .. .... ... 1. ... pass else: continue print('#', genename, strand, dic_annot[genename], file=Outfile_nonsynalign) print('# pep_seq', file=Outfile_nonsynalign) pos_pepvar_array = seq_comp(list_vcf_label, list_cds_seq_snp_pep, Outfile_nonsynalign)
if line[0] == '#' or line.strip() == '': continue cell = line.strip().split('\t') strLG = cell[0] print(cell) strSC = cell[1].replace('*', '') if 'SS' in strSC: strSC = strSC.replace('SS', 'SuperScaf_') else: strSC = strSC.replace('s', 'scaffold_') LGIncludedSC.append(strSC) strD = cell[2] if strD == 'F': strSeq = dicHD2Seq[strSC] elif strD == 'R': strSeq = kang.rev_comp(dicHD2Seq[strSC]) else: strSeq = dicHD2Seq[strSC] try: dicLG2Seq[strLG] += 'N' * 500 + strSeq except KeyError: dicLG2Seq[strLG] = strSeq for strLG in dicLG2Seq: print('>' + strLG, file=Outfile_chr) print(dicLG2Seq[strLG], file=Outfile_chr) for strHD in dicHD2Seq: if strHD in LGIncludedSC: continue print('>' + strHD, file=Outfile_scaff) print(dicHD2Seq[strHD], file=Outfile_scaff)
if strand == '+': list_cds_seq_snp_pep = [] for cds_seq_snp in list_cds_seq_snp: cds_seq_snp_pep = kang.translation(cds_seq_snp) list_cds_seq_snp_pep.append(cds_seq_snp_pep) if len(set(list_cds_seq_snp_pep)) > 1: # ... .. .. .... ... 1. ... pass else: continue else: list_cds_seq_snp_pep = [] for cds_seq_snp in list_cds_seq_snp: cds_seq_snp_pep = kang.translation(kang.rev_comp(cds_seq_snp)) list_cds_seq_snp_pep.append(cds_seq_snp_pep) if len(set(list_cds_seq_snp_pep)) > 1: # ... .. .. .... ... 1. ... pass else: continue print ('#',genename,strand,dic_annot[genename],file=Outfile_nonsynalign) print ('# pep_seq',file=Outfile_nonsynalign) pos_pepvar_array = seq_comp(list_vcf_label,list_cds_seq_snp_pep,Outfile_nonsynalign) print ('# cds_seq',file=Outfile_nonsynalign) if strand == '+': pos_snp_array = seq_comp(list_vcf_label,list_cds_seq_snp,Outfile_nonsynalign) else: pos_snp_array = seq_comp(list_vcf_label,[kang.rev_comp(x) for x in list_cds_seq_snp],Outfile_nonsynalign) for n,pos_snp in enumerate(pos_snp_array):
for line in open(file_joo): if line[0] == '#' or line.strip() == '': continue cell = line.strip().split('\t') strLG = cell[0] print(cell) strSC = cell[1].replace('*','') if 'SS' in strSC: strSC = strSC.replace('SS','SuperScaf_') else : strSC = strSC.replace('s','scaffold_') LGIncludedSC.append(strSC) strD = cell[2] if strD == 'F': strSeq = dicHD2Seq[strSC] elif strD == 'R': strSeq = kang.rev_comp(dicHD2Seq[strSC]) else : strSeq = dicHD2Seq[strSC] try: dicLG2Seq[strLG] += 'N'*500+strSeq except KeyError: dicLG2Seq[strLG] = strSeq for strLG in dicLG2Seq: print('>'+strLG,file=Outfile_chr) print(dicLG2Seq[strLG],file=Outfile_chr) for strHD in dicHD2Seq: if strHD in LGIncludedSC: continue print('>'+strHD,file=Outfile_scaff) print(dicHD2Seq[strHD],file=Outfile_scaff)
for line in open(file_in): cell = line.strip().split('\t') strLink = 'SuperScaf_%d' % i i += 1 seq = '' for ecell in cell: strSC = ecell.split(',')[0].replace('s', 'scaffold_') # scaffold name if strSC in done: print(strSC) print('!!!') exit() done.append(strSC) strOrt = ecell.split(',')[1][0] # Orientation F or R if strOrt == 'F': if seq == '': seq += dicHD2seq[strSC] else: seq += 'N' * 500 + dicHD2seq[strSC] else: if seq == '': seq += dicHD2seq[strSC] else: seq += 'N' * 500 + kang.rev_comp(dicHD2seq[strSC]) print('>' + strLink, file=Outfile) print(seq, file=Outfile) for strHD in dicHD2seq: if strHD in done: continue print('>' + strHD, file=Outfile) print(dicHD2seq[strHD], file=Outfile)
seq = '' for ecell in cell: strSC = ecell.split(',')[0].replace('s','scaffold_') # scaffold name if strSC in done: print(strSC) print('!!!') exit() done.append(strSC) strOrt = ecell.split(',')[1][0] # Orientation F or R if strOrt == 'F': if seq == '': seq += dicHD2seq[strSC] else: seq += 'N'*500 + dicHD2seq[strSC] else : if seq == '': seq += dicHD2seq[strSC] else: seq += 'N'*500 + kang.rev_comp(dicHD2seq[strSC]) print('>'+strLink,file=Outfile) print(seq,file=Outfile) for strHD in dicHD2seq: if strHD in done: continue print('>'+strHD,file=Outfile) print(dicHD2seq[strHD],file=Outfile)