コード例 #1
0
import shutil
import os.path
import glob
import csv
import pandas as pd
import primer3    
import sys
import numpy as np
import re
from Santalucia_NN_Tm import NN_Tm, complement, mM_monovalent
from Bio import SeqIO
from time_stamp import Time_stamp
from parameters import *
from sequence_processing_functions import fasta_to_seq, gc_content, rev_complement

monovalent_cation_eq    =    mM_monovalent(Na=Na, K=K, Tris=Tris, Mg=Mg, dNTPs=dNTPs)

############################################################
#### FUNCTIONS
############################################################      

### FUNCTION TO CALCULATE HAIRPIN TM
def hairpin_Tm(primer_sequence, mv_cation=0,primer_conc=0): 
    Tm_hairpin =  (primer3.calcHairpin(primer_sequence,mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=37, max_loop=30)).tm
    return ("{0:.2f}".format(round(Tm_hairpin,2)))    
    
### FUNCTION TO CALCULATE HOMODIMER TM
def homodimer_Tm(primer_sequence, mv_cation=0,primer_conc=0): 
    Tm_homodimer = (primer3.calcHomodimer(primer_sequence,mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=37, max_loop=30)).tm
    return ("{0:.2f}".format(round(Tm_homodimer,2)))
コード例 #2
0
def main():
    seq_file = open(outdir + "sequence.txt", "r")
    #pdb.set_trace()
    parameters_used = open(outdir + 'run_summary.txt', 'a')

    unblast_file = open(outdir + "UOD_featureFilter.txt",
                        "w")  #after feature filter
    UOD_all_fasta = outdir + "UOD_featureFilter.fasta"
    FRprimer = open(UOD_all_fasta, "w")  #similar with unblast_file

    FPrimer = open(outdir + "UOD_forward_primer.fasta",
                   "w")  #final all forward primers
    RPrimer = open(outdir + "UOD_reverse_primer.fasta",
                   "w")  #final all reverse primers
    final_UOD_primer = open(
        outdir + "UOD_final_primer.txt", "w"
    )  #UOD final primer result include all primers include forward and reverse primers
    final_UOD_all_primer_info = open(
        outdir + "UOD_final_all_primer_info.fasta", "w")
    final_UOD_all_primer_fasta = open(
        outdir + "UOD_final_all_primer_fasta.fasta", "w")

    unblast_file.write(
        "chrom\tstart\tend\toccurrence\tsequence\tlen\tstrand\ttm\n")
    monovalent_cation_eq = mM_monovalent(Na=Na,
                                         K=K,
                                         Tris=Tris,
                                         Mg=Mg,
                                         dNTPs=dNTPs)
    primer_dict_all = dict()
    primer_num = 0

    #open TRS.py result sequence.txt
    for line in seq_file.readlines():
        if line.startswith('>'):
            primer_num += 1
            llist = line.rstrip().split('_')
            chrom, start = llist[1], llist[2]
        else:
            #chop region and filter primer
            primer_l_min, primer_l_max = int(
                primer_size_range.split("-")[0]), int(
                    primer_size_range.split("-")[1])
            for primer_l in range(primer_l_min, primer_l_max + 1, 1):
                primer_dict = ChopImputSeq(line.rstrip(), primer_num, primer_l, chrom, start, minTm, maxTm, GC_range_min, GC_range_max, CheckATends_flag, \
                        CheckGCclamp_flag, NucleotideRepeatFilter_flag, NucleotideRepeatFilter_threshold, self_Tmdiff, monovalent_cation_eq)
                primer_dict_all.update(primer_dict)
    no_primers_designed = len(primer_dict_all)
    sorted_primer_dict = sorted(primer_dict_all.iteritems(),
                                key=itemgetter(1),
                                reverse=False)

    #write meet all condition primers to "UOD_featureFilter.txt" and "UOD_forward_primer.fasta" and "UOD_reverse_primer.fasta"
    for primer_info in sorted_primer_dict:
        primer = primer_info[0]
        primer_region_num = primer_info[1][0]
        chrom = primer_info[1][1]
        start = int(primer_info[1][2]) + primer_info[1][3]
        end = start + primer_info[1][4]
        occu = primer_info[1][5]
        length = primer_info[1][4]
        tm = primer_info[1][6]
        strand = primer_info[1][7]
        unblast_file.write("%d\t%s\t%d\t%d\t%d\t%s\t%d\t%s\t%f\n" %
                           (primer_region_num, chrom, start, end, occu, primer,
                            length, strand, tm))
        FRprimer.write(">TA_" + str(primer_region_num) + "_" + chrom + "_" +
                       str(start) + "_" + str(length) + "_" + str(tm) + "_" +
                       str(strand) + "\n")
        FRprimer.write(primer + "\n")

    unblast_file.close()
    FRprimer.close()

    #judge whether the genome file is exist
    if not os.path.exists(genome_fasta):
        sys.stderr.write('\r[*] Please give the hg19.fasta file[hg19.fasta]\n')
        exit(-1)

    os.chdir(refDB_path)
    #create database of balstn
    file_set = {
        "zmv2all.nin", "zmv2all.nhr", "zmv2all.nsq", "zmv2all.nsi",
        "zmv2all.nsd", "zmv2all.nog"
    }
    if set(glob.glob("zmv2all.*")) < file_set:
        f0 = open(os.devnull, 'w')
        sp.call([
            "makeblastdb", "-in",
            "%s" % genome_fasta, "-dbtype", "nucl", "-parse_seqids", "-out",
            "%szmv2all" % refDB_path
        ],
                stdout=f0,
                stderr=f0)

    word_size = int(primer_size_range.split("-")[0])  #exact blast word size
    fasta_input_file = UOD_all_fasta
    #make exact balst for the forward primer sequence
    p1 = sp.Popen(["blastn","-task","blastn","-db","%szmv2all" %refDB_path,"-query","%s" %fasta_input_file,"-evalue","%s" %em_e_value,"-word_size","%s"  \
                %word_size,"-gapopen","%s" %em_gapopen,"-gapextend","%s" %em_gapextend,"-reward","%s" %em_reward,"-penalty","%s" %em_penalty,"-dust","no", \
                "-perc_identity","%s" %em_perc_identity,"-max_target_seqs","%s" %em_max_target_seqs,"-max_hsps","%s" %em_max_hsps, \
                "-outfmt","10 qseq qlen qseqid sacc sstart send sstrand", "-num_threads","%s" %em_num_threads],stdout=sp.PIPE)
    exact_match_output, error = p1.communicate()

    #process the blast result
    exact_match_set = set()
    for exact_match_output_line in exact_match_output.split('\n')[:-1]:
        print("%s" % (exact_match_output_line))
        exact_match_output_line = exact_match_output_line.strip(' ').split(',')
        Primer = exact_match_output_line[0]
        qseqid = exact_match_output_line[2].split('_')
        qseq_chr = qseqid[2]
        qseq_start = int(qseqid[3])
        qseq_strand = qseqid[6]
        qseq_stop = int(qseq_start) + int(qseqid[4])
        targetseq_chr = exact_match_output_line[3]
        targetseq_start = int(exact_match_output_line[4])
        targetseq_stop = int(exact_match_output_line[5])
        alignment_length = len(Primer)
        query_length = int(exact_match_output_line[1])
        if alignment_length == query_length:
            if qseq_chr != targetseq_chr:  #off-target primer sequence//chrom dont same
                exact_match_set.add(Primer)
            if qseq_chr == targetseq_chr:
                if qseq_strand == "+":
                    if (qseq_start + 1) != targetseq_start and (
                            qseq_stop - 1
                    ) != targetseq_stop:  #off-target primer sequence//chrom same but position dont same
                        exact_match_set.add(Primer)
                if qseq_strand == "-":
                    if (qseq_start + 1) != targetseq_stop and (
                            qseq_stop -
                            1) != targetseq_start:  #follow the same
                        exact_match_set.add(Primer)

    ### Remove from original dictionary, those primers with exact matches elsewhere in the genome
    for primer_exact_match in exact_match_set:
        if primer_exact_match in primer_dict_all:
            primer_dict_all.pop(primer_exact_match,
                                None)  #pop the primer have the off-target
    no_primers_no_exact_match = len(primer_dict_all)

    #write the  primer sequence after exact balst
    final_UOD_primer.write("chrom\tstart\tend\tseq\ttm\tstrand\n")
    for primer, value in primer_dict_all.items():
        primer_region_num = value[0]
        chrom = value[1]
        primer_start_pos = int(value[2]) + value[3]
        primer_end_pos = int(value[2]) + value[3] + value[4]
        tm = value[6]
        strand = value[7]
        if strand == "+":
            FPrimer.write(">TA_" + str(primer_region_num) + "_" + chrom + "_" +
                          str(primer_start_pos) + "_" + str(len(primer)) +
                          "_" + str(tm) + "_" + strand + "_" + primer + "\n")
            FPrimer.write(primer + "\n")
        if strand == "-":
            RPrimer.write(">TA_" + str(primer_region_num) + "_" + chrom + "_" +
                          str(primer_start_pos) + "_" + str(len(primer)) +
                          "_" + str(tm) + "_" + strand + "_" + primer + "\n")
            RPrimer.write(primer + "\n")
        final_UOD_primer.write(
            str(primer_region_num) + "\t" + chrom + "\t" +
            str(primer_start_pos) + "\t" + str(primer_end_pos) + "\t" +
            primer + "\t" + str(tm) + "\t" + strand + "\n")
        final_UOD_all_primer_info.write(">TA_" + str(primer_region_num) + "_" +
                                        chrom + "_" + str(primer_start_pos) +
                                        "_" + str(len(primer)) + "_" +
                                        str(tm) + "\n")
        final_UOD_all_primer_info.write(primer + "\n")
        final_UOD_all_primer_fasta.write(">TA_" + str(primer_region_num) +
                                         "_" + chrom + "_" +
                                         str(primer_start_pos) + "_" +
                                         str(len(primer)) + "_" + str(tm) +
                                         "_" + strand + "_" + primer + "\n")
        final_UOD_all_primer_fasta.write(primer + "\n")

    ############################################################
    #Time to run the code: end timer
    ############################################################
    t1 = time.time()
    total = t1 - t0
    total = ("{0:.2f}".format(round(total, 2)))
    parameters_used.write(
        "no. of primers designed based on filter criteria : " +
        str(no_primers_designed) + '\n'
        "no. of primers without exact match               : " +
        str(no_primers_no_exact_match) + '\n'
        "### UOD run duration : " + str(total) + " seconds" + '\n'
        "##########################################################" + "\n" +
        "\n" + "\n")
    parameters_used.close()