Exemple #1
0
def get_opt_cloningprimer_pair(seq):
    oligo_calc = primer3.thermoanalysis.ThermoAnalysis(mv_conc=20, dv_conc=1.5, dntp_conc=0.8, dna_conc=50, max_nn_length=60)
    # Change if you have personal condition for PCR 
    ## mv_conc   : The millimolar (mM) concentration of monovalent salt cations (usually KCl) in the PCR.
    ## dv_conc   : The millimolar concentration of divalent salt cations (usually MgCl^(2+)) in the PCR.
    ## dntp_conc : The millimolar concentration of the sum of all deoxyribonucleotide triphosphates.
    ## dna_conc  : A value to use as nanomolar (nM) concentration of each annealing oligo over the course the PCR. 
    ## max_nn_length : longest seq length for primer Tm analysis
    primerF_list = []
    primerR_list = []
    for i in range(16,25):
        primerF = seq[0:i]
        #primerR = kang.rev_comp(seq[-i-3:-3]) # stop codon excluded
        primerR = kang.rev_comp(seq[-i:]) # stop codon included
        if 45 <= oligo_calc.calcTm(primerF) <= 63:
            primerF_list.append(primerF)
        else:
            pass
            #print (primerF, oligo_calc.calcTm(primerF) )
        if 45 <= oligo_calc.calcTm(primerR) <= 63:
            primerR_list.append(primerR)

    primerF_list_Tm = [oligo_calc.calcTm(x) for x in primerF_list]
    primerR_list_Tm = [oligo_calc.calcTm(x) for x in primerR_list]

    return primerF_list,primerR_list,primerF_list_Tm,primerR_list_Tm
Exemple #2
0
def get_opt_cloningprimer_pair(seq):
    oligo_calc = primer3.thermoanalysis.ThermoAnalysis(mv_conc=20, dv_conc=1.5, dntp_conc=0.8, dna_conc=50, max_nn_length=60)
    # Change if you have personal condition for PCR 
    ## mv_conc   : The millimolar (mM) concentration of monovalent salt cations (usually KCl) in the PCR.
    ## dv_conc   : The millimolar concentration of divalent salt cations (usually MgCl^(2+)) in the PCR.
    ## dntp_conc : The millimolar concentration of the sum of all deoxyribonucleotide triphosphates.
    ## dna_conc  : A value to use as nanomolar (nM) concentration of each annealing oligo over the course the PCR. 
    ## max_nn_length : longest seq length for primer Tm analysis
    primerF_list = []
    primerR_list = []
    for i in range(16,25):
        primerF = seq[0:i]
        #primerR = kang.rev_comp(seq[-i-3:-3]) # stop codon excluded
        primerR = kang.rev_comp(seq[-i:]) # stop codon included
        if 56 <= oligo_calc.calcTm(primerF) <= 63:
            primerF_list.append(primerF)
        else:
            pass
            #print (primerF, oligo_calc.calcTm(primerF) )
        if 56 <= oligo_calc.calcTm(primerR) <= 63:
            primerR_list.append(primerR)

    primerF_list_Tm = [oligo_calc.calcTm(x) for x in primerF_list]
    primerR_list_Tm = [oligo_calc.calcTm(x) for x in primerR_list]


    comp_tm = []
    for i,tmF in enumerate(primerF_list_Tm):
        for j,tmR in enumerate(primerR_list_Tm):
            if abs(tmF-tmR) <= 3:  
                comp_tm.append([i,j,tmF,tmR])
    comp_tm.sort(key=lambda x : x[2]+x[3])

    try:
        ixF,ixR = comp_tm[0][0],comp_tm[0][1]
        return primerF_list[ixF],primerF_list_Tm[ixF],primerR_list[ixR],primerR_list_Tm[ixR]
    except IndexError:
        return None,None,None,None
Exemple #3
0
            cT = dicT2cls[strTN]
            cT.strCDS_list.append([int(strL1), int(strL2)])
        except KeyError:
            print('!!')
            exit()

Outfile = open(file_fa + '.cds.fa', 'w')
for TN in dicT2cls:
    cT = dicT2cls[TN]
    cds_locs = cT.strCDS_list
    cds_locs.sort(key=lambda x: x[0])
    #cdsseq_pre = ''

    start_loc = cds_locs[0]
    intL1, intL2 = start_loc
    #cdsseq_pre 	= dicHD2seq[cT.strChr][intL1-1-1000:intL1-1]
    cdsseq_pre = ''
    for cds_loc in cds_locs:
        intL1, intL2 = cds_loc
        cdsseq_pre += dicHD2seq[cT.strChr][intL1 - 1:intL2]
    end_loc = cds_locs[-1]
    intL1, intL2 = end_loc
    #cdsseq_pre += dicHD2seq[cT.strChr][intL2:intL2+1000]

    if cT.strSTRD == '+':
        print('>' + cT.strTN, file=Outfile)
        print(cdsseq_pre, file=Outfile)
    elif cT.strSTRD == '-':
        print('>' + cT.strTN, file=Outfile)
        print(kang.rev_comp(cdsseq_pre), file=Outfile)
Exemple #4
0
		except KeyError:
			print('!!')
			exit()
	

Outfile = open(file_fa+'.cds.fa','w')
for TN in dicT2cls:
	cT 		= dicT2cls[TN]
	cds_locs 	= cT.strCDS_list
	cds_locs.sort(key = lambda x : x[0])
	#cdsseq_pre = ''

	start_loc 	= cds_locs[0]
	intL1, intL2 	= start_loc
	#cdsseq_pre 	= dicHD2seq[cT.strChr][intL1-1-1000:intL1-1] 
	cdsseq_pre	= ''
	for cds_loc in cds_locs:
		intL1, intL2 = cds_loc
		cdsseq_pre += dicHD2seq[cT.strChr][intL1-1:intL2]
	end_loc		= cds_locs[-1]
	intL1, intL2	= end_loc
	#cdsseq_pre += dicHD2seq[cT.strChr][intL2:intL2+1000]

	if cT.strSTRD == '+':
		print('>'+cT.strTN,file=Outfile)
		print(cdsseq_pre,file=Outfile)
	elif cT.strSTRD == '-':
		print('>'+cT.strTN,file=Outfile)
		print(kang.rev_comp(cdsseq_pre),file=Outfile)

     zip([x.split('=')[0] for x in strInfo_list],
         [x.split('=')[1] for x in strInfo_list]))
 try:
     genename = dicID2mRNA[dicInfo['Parent']]
 except KeyError:
     print(line)
     print(dicInfo['Parent'])
     exit()
 for strLoc, strM, strU, strDi in indexed_list:
     if intL1 <= int(strLoc) <= intL2:
         intLoc = int(strLoc)
         if sys.argv[2] == 'CG':
             if strDi == '+':
                 context = dicHD2seq[strC][intLoc - 1 - 2:intLoc + 3]
             else:
                 context = kang.rev_comp(dicHD2seq[strC][intLoc - 1 -
                                                         3:intLoc + 2])
         else:
             if strDi == '+':
                 context = dicHD2seq[strC][intLoc - 1 - 2:intLoc + 4]
             else:
                 context = kang.rev_comp(dicHD2seq[strC][intLoc - 1 -
                                                         4:intLoc + 2])
         if len(context) == dkmer[sys.argv[2]]:
             print('>' + strC + '_' + strLoc, file=Outfile_kmer)
             print(context, file=Outfile_kmer)
         else:
             pass
         try:
             dicGN2count[genename][1] += int(strU)
             dicGN2count[genename][0] += int(strM)
         except KeyError:
Exemple #6
0
            list_cds_seq_snp.append(cds_seq_snp)

        if strand == '+':
            list_cds_seq_snp_pep = []
            for cds_seq_snp in list_cds_seq_snp:
                cds_seq_snp_pep = kang.translation(cds_seq_snp)
                list_cds_seq_snp_pep.append(cds_seq_snp_pep)

            if len(set(list_cds_seq_snp_pep)) > 1:  # ... .. .. .... ... 1. ...
                pass
            else:
                continue
        else:
            list_cds_seq_snp_pep = []
            for cds_seq_snp in list_cds_seq_snp:
                cds_seq_snp_pep = kang.translation(kang.rev_comp(cds_seq_snp))
                list_cds_seq_snp_pep.append(cds_seq_snp_pep)

            if len(set(list_cds_seq_snp_pep)) > 1:  # ... .. .. .... ... 1. ...
                pass
            else:
                continue

        print('#',
              genename,
              strand,
              dic_annot[genename],
              file=Outfile_nonsynalign)
        print('# pep_seq', file=Outfile_nonsynalign)
        pos_pepvar_array = seq_comp(list_vcf_label, list_cds_seq_snp_pep,
                                    Outfile_nonsynalign)
Exemple #7
0
    if line[0] == '#' or line.strip() == '':
        continue
    cell = line.strip().split('\t')
    strLG = cell[0]
    print(cell)
    strSC = cell[1].replace('*', '')
    if 'SS' in strSC:
        strSC = strSC.replace('SS', 'SuperScaf_')
    else:
        strSC = strSC.replace('s', 'scaffold_')
    LGIncludedSC.append(strSC)
    strD = cell[2]
    if strD == 'F':
        strSeq = dicHD2Seq[strSC]
    elif strD == 'R':
        strSeq = kang.rev_comp(dicHD2Seq[strSC])
    else:
        strSeq = dicHD2Seq[strSC]
    try:
        dicLG2Seq[strLG] += 'N' * 500 + strSeq
    except KeyError:
        dicLG2Seq[strLG] = strSeq
for strLG in dicLG2Seq:
    print('>' + strLG, file=Outfile_chr)
    print(dicLG2Seq[strLG], file=Outfile_chr)
for strHD in dicHD2Seq:
    if strHD in LGIncludedSC:
        continue
    print('>' + strHD, file=Outfile_scaff)
    print(dicHD2Seq[strHD], file=Outfile_scaff)
Exemple #8
0
     
 
 
 if strand == '+':
     list_cds_seq_snp_pep = []
     for cds_seq_snp in list_cds_seq_snp:
         cds_seq_snp_pep = kang.translation(cds_seq_snp)
         list_cds_seq_snp_pep.append(cds_seq_snp_pep)
     
     if len(set(list_cds_seq_snp_pep)) > 1: # ... .. .. .... ... 1. ... 
         pass
     else: continue
 else:
     list_cds_seq_snp_pep = []
     for cds_seq_snp in list_cds_seq_snp:
         cds_seq_snp_pep = kang.translation(kang.rev_comp(cds_seq_snp))
         list_cds_seq_snp_pep.append(cds_seq_snp_pep)
     
     if len(set(list_cds_seq_snp_pep)) > 1: # ... .. .. .... ... 1. ... 
         pass
     else: continue
     
 print ('#',genename,strand,dic_annot[genename],file=Outfile_nonsynalign)
 print ('# pep_seq',file=Outfile_nonsynalign)
 pos_pepvar_array = seq_comp(list_vcf_label,list_cds_seq_snp_pep,Outfile_nonsynalign)
 print ('# cds_seq',file=Outfile_nonsynalign)
 if strand == '+':
     pos_snp_array = seq_comp(list_vcf_label,list_cds_seq_snp,Outfile_nonsynalign)
 else:
     pos_snp_array = seq_comp(list_vcf_label,[kang.rev_comp(x) for x in list_cds_seq_snp],Outfile_nonsynalign)
 for n,pos_snp in enumerate(pos_snp_array):
Exemple #9
0
for line in open(file_joo):
	if line[0] == '#' or line.strip() == '':
		continue
	cell 	= line.strip().split('\t')
	strLG 	= cell[0]
	print(cell)
	strSC	= cell[1].replace('*','')
	if 'SS' in strSC:
		strSC = strSC.replace('SS','SuperScaf_')
	else : strSC = strSC.replace('s','scaffold_') 
	LGIncludedSC.append(strSC)
	strD	= cell[2]
	if strD == 'F':
		strSeq = dicHD2Seq[strSC]
	elif strD == 'R':
		strSeq = kang.rev_comp(dicHD2Seq[strSC])
	else : strSeq = dicHD2Seq[strSC]
	try:
		dicLG2Seq[strLG] += 'N'*500+strSeq
	except KeyError:
		dicLG2Seq[strLG] = strSeq
for strLG in dicLG2Seq:
	print('>'+strLG,file=Outfile_chr)
	print(dicLG2Seq[strLG],file=Outfile_chr)
for strHD in dicHD2Seq:
	if strHD in LGIncludedSC:
		continue
	print('>'+strHD,file=Outfile_scaff)
	print(dicHD2Seq[strHD],file=Outfile_scaff)

	
Exemple #10
0
for line in open(file_in):
    cell = line.strip().split('\t')
    strLink = 'SuperScaf_%d' % i
    i += 1
    seq = ''
    for ecell in cell:
        strSC = ecell.split(',')[0].replace('s', 'scaffold_')  # scaffold name
        if strSC in done:
            print(strSC)
            print('!!!')
            exit()
        done.append(strSC)
        strOrt = ecell.split(',')[1][0]  # Orientation F or R
        if strOrt == 'F':
            if seq == '':
                seq += dicHD2seq[strSC]
            else:
                seq += 'N' * 500 + dicHD2seq[strSC]
        else:
            if seq == '':
                seq += dicHD2seq[strSC]
            else:
                seq += 'N' * 500 + kang.rev_comp(dicHD2seq[strSC])
    print('>' + strLink, file=Outfile)
    print(seq, file=Outfile)
for strHD in dicHD2seq:
    if strHD in done:
        continue
    print('>' + strHD, file=Outfile)
    print(dicHD2seq[strHD], file=Outfile)
Exemple #11
0
	seq = ''
	for ecell in cell:
		strSC  = ecell.split(',')[0].replace('s','scaffold_') # scaffold name
		if strSC in done:
			print(strSC)
			print('!!!')
			exit()
		done.append(strSC)
		strOrt = ecell.split(',')[1][0] # Orientation F or R
		if strOrt == 'F':
			if seq == '':
				seq += dicHD2seq[strSC]
			else:
				seq += 'N'*500 + dicHD2seq[strSC]
		else : 
			if seq == '':
				seq += dicHD2seq[strSC]
			else:
				seq += 'N'*500 + kang.rev_comp(dicHD2seq[strSC])
	print('>'+strLink,file=Outfile)
	print(seq,file=Outfile)
for strHD in dicHD2seq:
	if strHD in done:
		continue
	print('>'+strHD,file=Outfile)
	print(dicHD2seq[strHD],file=Outfile)