Exemple #1
0
def make_primers(query):
    '''
        In case of user's input, make list of primer dicts similar as the result of design_primer module
        Input:
            query: a string in multi-lines
    '''
    if global_var.stop_run is True:
        return {'error': 'Stop running'}

    primers = {}

    for line in query.splitlines():
        if line.strip() == '':
            continue
        line_data = re.split(r'\s+', line.strip())
        if len(line_data) == 4:
            (id, rank, seq_F, seq_R) = line_data
            rank = int(rank)
        elif len(line_data) == 3:
            (id, seq_F, seq_R) = line_data
            rank = 0
        else:
            return {
                'error':
                f'Your input: {line} does not have three or four columns'
            }

        # whether seq_F and seq_R are valid primer seqs
        if check_primer_seq(seq_F) is False:
            return {
                'error':
                f'Your input: {seq_F} does not seem like a vaild primer seq'
            }
        if check_primer_seq(seq_R) is False:
            return {
                'error':
                f'Your input: {seq_R} does not seem like a vaild primer seq'
            }

        # generate primers
        if id not in primers:
            primers[id] = {}
        primers[id]['PRIMER_PAIR_NUM_RETURNED'] = rank + 1
        primers[id][f'PRIMER_PAIR_{rank}_PENALTY'] = 0
        primers[id][f'PRIMER_LEFT_{rank}_SEQUENCE'] = seq_F
        primers[id][f'PRIMER_RIGHT_{rank}_SEQUENCE'] = seq_R
        primers[id][f'PRIMER_LEFT_{rank}'] = [-1, len(seq_F)]
        primers[id][f'PRIMER_RIGHT_{rank}'] = [-1, len(seq_R)]
        primers[id][f'PRIMER_LEFT_{rank}_TM'] = primer3.calcTm(
            transform_degenerate(seq_F))
        primers[id][f'PRIMER_RIGHT_{rank}_TM'] = primer3.calcTm(
            transform_degenerate(seq_R))
        primers[id][f'PRIMER_LEFT_{rank}_GC_PERCENT'] = calculate_GC(
            transform_degenerate(seq_F))
        primers[id][f'PRIMER_RIGHT_{rank}_GC_PERCENT'] = calculate_GC(
            transform_degenerate(seq_R))
        primers[id][f'PRIMER_PAIR_{rank}_PRODUCT_SIZE'] = -1
        primers[id]['SEQUENCE_RELATIVE_TARGET_START'] = 0

    return primers
Exemple #2
0
def writePadlocksToCSV(padlock_results: Dict[str, List[PadHit]], filename: str):
    '''Write padlocks to to a CSV file
    '''
    tp = P_PARAMS['thermo_params']
    with io.open(filename, 'w') as fd:
        fd.write(   'gene_name, name0, name1, strand_dir, genome_idx, index, '
                    'gap_size, sequence, barcode, right_arm, scaffold, '
                    'left_arm, right_tm, left_tm\n')
        temp = '%s, %s, %s, %s, %d, %d, %d, %s, %s, %s, %s, %s, %2.3f, %2.3f\n'
        for gene, seq_list in padlock_results.items():
            for seq_tuple in seq_list:
                seq_r, seq_l = seq_tuple.seq_r, seq_tuple.seq_l
                tm_tuple = (calcTm(seq_r, **tp), calcTm(seq_l, **tp))
                fd.write(temp % ((gene,) + seq_tuple + tm_tuple) )
    print('Wrote padlocks to %s' % filename)
Exemple #3
0
def get_tm(sequence):
    return float(
        primer3.calcTm(sequence,
                       mv_conc=monovalent,
                       dv_conc=divalent,
                       dntp_conc=dntps,
                       dna_conc=dna))
Exemple #4
0
    def __init__(self, construct, start, end):
        """
        Primers are specified by giving start and stop indices into another 
        sequence.  If the start index is less than the end index, the primer 
        will be taken directly from the associated sequence.  If the start 
        index in greater than the end index, the primer will be taken to be the 
        reverse complement of the associated sequence.
        """
        assert start != end

        self._construct = construct
        self._start = start
        self._end = end

        if start < end:
            self._sequence = construct.dna[start:end]
        else:
            self._sequence = dna_reverse_complement(construct.dna[end:start])

        self._melting_temp = primer3.calcTm(self._sequence,
                                            tm_method='breslauer')

        self._gc_content = sum(x in 'GC' for x in self._sequence) / len(self)
        left_gc_count = sum(x in 'GC' for x in self._sequence[:5])
        right_gc_count = sum(x in 'GC' for x in self._sequence[-5:])
        self.has_gc_clamp = \
                (1 <= left_gc_count <= 3) and (1 <= right_gc_count <= 3)
Exemple #5
0
def GetPrimers(n_ind, genom, len_primer, GC, Tm, gnum):
    """ Нахождение праймеров в геноме"""
    results = []
    len_g = len(genom)

    for i in range(n_ind, len_g - len_primer):  #нахождение праймеров
        if gnum == 1:
            primer = genom[i:i + len_primer]
        else:
            primer = genom[i:i + len_primer][::-1]

        primer_Tm = calcTm(primer)
        gc_count = (primer.count('G') + primer.count('C'))
        gc_count = gc_count / len_primer * 100

        if (primer_Tm >= Tm[0]
                and primer_Tm <= Tm[1]) and (gc_count >= GC[0]
                                             and gc_count <= GC[1]):
            results.append([
                i + len_primer - 1,
                [primer, len_primer,
                 str(gc_count),
                 str(primer_Tm)]
            ])

    return results
Exemple #6
0
def calc_characteristics(infile):
    print("Using sequence file", infile,
          "to calculate Tm and structure potential")
    seqs = list(SeqIO.parse(infile, "fasta"))
    chars = dict()
    for myseq in seqs:
        # Forward
        forward = str(myseq.seq)
        hairpinF = calcHairpin(forward)
        homoF = calcHomodimer(forward)

        #Reverse complement
        reverse = str(myseq.seq.reverse_complement())
        hairpinR = calcHairpin(reverse)
        homoR = calcHomodimer(reverse)
        # print(hairpinF,'\n', homoF,'\n', hairpinR,'\n', homoR, '\n', calcTm(forward))

        # Values to save
        tm = calcTm(forward)
        hairpinF = hairpinF.tm if hairpinF.structure_found else "NA"
        hairpinR = hairpinR.tm if hairpinR.structure_found else "NA"
        homoF = homoF.tm if homoF.structure_found else "NA"
        homoR = homoR.tm if homoR.structure_found else "NA"

        output = [
            str(x)
            for x in [forward, tm, hairpinF, homoF, reverse, hairpinR, homoR]
        ]
        chars[myseq.id] = "\t" + "\t".join(output)
        # print(chars[myseq.id])
    return chars
Exemple #7
0
def writePadlocksToCSV(padlock_results: Dict[str, List[PadHit]],
                       filename: str):
    '''Write padlocks to to a CSV file
    '''
    tp = P_PARAMS['thermo_params']
    with io.open(filename, 'w') as fd:
        fd.write('gene_name, name0, name1, strand_dir, genome_idx, index, '
                 'gap_size, sequence, barcode, right_arm, scaffold, '
                 'left_arm, right_tm, left_tm\n')
        temp = '%s, %s, %s, %s, %d, %d, %d, %s, %s, %s, %s, %s, %2.3f, %2.3f\n'
        for gene, seq_list in padlock_results.items():
            for seq_tuple in seq_list:
                seq_r, seq_l = seq_tuple.seq_r, seq_tuple.seq_l
                tm_tuple = (calcTm(seq_r, **tp), calcTm(seq_l, **tp))
                fd.write(temp % ((gene, ) + seq_tuple + tm_tuple))
    print('Wrote padlocks to %s' % filename)
    def __call__(self, primer: str) -> float:
        """
        We created NEB like calculation method from here:
        According to the article: https://tmcalculator.neb.com/#!/help
        we offset our results by 3 and it yields similar results as default NEB calculator
        for Q5 product group + High Fidelity.

        :param primer: [str] primer sequence
        :return: melting temperature
        """
        if len(primer) > 0:
            if self.cached:
                if primer in self.cache:
                    return self.cache[primer]
            temp = calcTm(
                primer,
                dna_conc=(500 / 6) * 7,  # primer is assumed 6x template
                mv_conc=(60 + 20),
                dv_conc=2,
                tm_method='santalucia',
                salt_corrections_method='owczarzy'
            ) + 3  # +3 because NEB documentation recommends it and it is fairly close
            temp = round(temp, self.precision)
            if self.cached:
                self.cache[primer] = temp
            return temp
        else:
            # TODO remove and let raise exception
            return -float("inf")
        raise ValueError("Cannot calculate temperature of empty primer")
Exemple #9
0
def get_tm(sequence, mv, dv, dntps, dna):
    return float(
        primer3.calcTm(sequence,
                       mv_conc=mv,
                       dv_conc=dv,
                       dntp_conc=dntps,
                       dna_conc=dna))
Exemple #10
0
 def __init__(self, seq, start, direction, name="", penalty=0):
     super().__init__(seq, start, direction)
     self.name = name
     self.penalty = penalty
     self.identity = 0
     self.tm = calcTm(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6)
     self.gc = 100.0 * (seq.count("G") + seq.count("C")) / len(seq)
     self.alignments = []
Exemple #11
0
 def check(self, seq):
     aseq = str(seq).upper()
     Tm = primer3.calcTm(aseq)
     if Tm < self.min_Tm or Tm > self.max_Tm:
         rtn = False
     else:
         rtn = True
     return rtn
Exemple #12
0
def has_melting_temperature_between_50_and_55(location):
    """Return False if the 20-basepair segment around the location
    has a melting temperature outside 50-55 Celsius."""
    if min(location, len(sequence) - location) < 20:
        return True
    subsequence = sequence[location - 10:location + 10]
    melting_temperature = primer3.calcTm(subsequence)
    return 50 < melting_temperature < 55
 def melting_temp(self, seq):
     return (primer3.calcTm(seq,
                            mv_conc=self.mv_conc,
                            dv_conc=self.dv_conc,
                            dntp_conc=self.dntp_conc,
                            dna_conc=self.dna_conc,
                            max_nn_length=MAX_NN_LENGTH,
                            tm_method=PRIMER3_METHOD,
                            salt_corrections_method=PRIMER3_METHOD))
Exemple #14
0
    def set_annealing_ta(self):
        """Attempts to set the optimal annealing temperature for the product
           and pair of primers
           """
        product = self.get_product()
        # Optimal annealing temperature calculated with Rychlik et. al formula
        anneal_tm = ((0.3*self.unst_primer.Tm) + (0.7*calcTm(product))) - 14.9

        self._annealing_ta = anneal_tm
Exemple #15
0
 def tmvalue(sequence):
     #tm = mt.Tm_NN(Seq(sequence),Na=50,Mg=0,dnac1=250,dnac2=250)
     if sequence.find("R") > -1:
         sequence = re.sub("R", "G", sequence)
     if sequence.find("Y") > -1:
         sequence = re.sub("Y", "C", sequence)
     tm = primer3.calcTm(sequence,
                         tm_method="breslauer",
                         salt_corrections_method="schildkraut")
     return int(tm * 100) / 100
Exemple #16
0
def pick_primer_with_best_tm(seqs, tm):
    import primer3
    seq_tms = [
        # primer3 seems to produce garbage results if given lowercase
        # sequences.
        (seq, primer3.calcTm(seq.upper(), tm_method='breslauer'))
        for seq in seqs
    ]
    seq_tms.sort(key=lambda seq_tm: abs(seq_tm[1] - tm))
    return seq_tms[0]
Exemple #17
0
    def createPrimers(self,db,bowtie='bowtie2', delete=True, tags={}, tmThreshold=50.0, endMatch=6, maxAln=20):
        # run bowtie (max 1000 alignments, allow for one gap/mismatch?)
        mapfile = self.file+'.sam'
        if not os.path.exists(mapfile):
            proc = subprocess.check_call( \
                [bowtie, '-f', '--end-to-end', '-p 2', \
                '-k '+str(maxAln), '-L 10', '-N 1', '-D 20', '-R 3', \
                '-x', db, '-U', self.file, '>', mapfile ])
        # Read fasta file (Create Primer)
        primers = {}
        with pysam.FastaFile(self.file) as fasta:
            for s in fasta.references:
                # parse target locus from fasta file
                try:
                    primername, targetposition = s.split('|')
                    reTargetposition = re.match(r'(\w+):(\d+)-(\d+):([+-])',targetposition)
                except:
                    primername = s
                    targetLocus = None
                else:
                    # create stranded targetlocus
                    reverse = True if reTargetposition.group(4)=='-' else False
                    tm = primer3.calcTm(fasta.fetch(s))  # assume targetlocus is full match
                    targetLocus = Locus(reTargetposition.group(1), int(reTargetposition.group(2)), int(reTargetposition.group(3))-int(reTargetposition.group(2)), reverse, tm)
                # create primer (with target locus)
                primertag = tags[primername] if primername in tags.keys() else None
                primers[primername] = Primer(primername,fasta.fetch(s),targetLocus,tag=primertag)

        # read SAM OUTPUT and filter alignments
        mappings = pysam.Samfile(mapfile,'r')
        alnCount = Counter()  # count alignments to kill locations of non-specific primers (count == -k)
        for aln in mappings:
            primername = aln.qname.split('|')[0]
            if aln.is_unmapped:
                continue
            else:
                alnCount[primername] += 1
            ## get reference sequence
            qry = aln.query_sequence.upper()
            ref = aln.get_reference_sequence().upper()
            refrc = ref.translate(revcmp)[::-1]
            aln_tm = primer3.calcHeterodimerTm(qry,refrc)
            # TmThreshold and mimatches in 3'end check
            if aln_tm > tmThreshold:
                if len(qry)>endMatch and len(ref)>endMatch:
                    if len([ x for x in zip(qry[-endMatch:], ref[-endMatch:]) if x[0]!=x[1] ]) == 0:
                        primers[primername].addTarget(mappings.getrname(aln.reference_id), aln.pos, aln.is_reverse, aln_tm)
        # remove primer locations for those that have hit maximum
        for k, v in primers.items():
            if len(v.loci) >= maxAln:
                v.loci = []
        # cleanup
        if delete:
            os.unlink(self.file+'.sam') # delete mapping FILE
        return primers.values()
Exemple #18
0
 def _calc_Tm(self, seq, seqtype):
     if seqtype == "Primer" or seqtype == "Product":
         mv, dv, dntp, dna = self.primer_monovalent_cations, self.primer_divalent_cations, self.primer_dntps, self.primer_annealing_oligo
     else:
         mv, dv, dntp, dna = self.probe_monovalent_cations, self.probe_divalent_cations, self.probe_dntps, self.probe_annealing_oligo
     try:
         tm = primer3.calcTm(str(seq), mv_conc = mv, dv_conc = dv, dntp_conc = dntp, dna_conc = dna)
         tm = round(tm, 2)
     except:
         tm = "NaN"
     return tm
Exemple #19
0
 def __init__(self, sequence):
     self.sequence = sequence
     try:
         # Calculates melting temperature of the primer. Primer3 is quite fast but BioPython provides more constumization.
         #self.Tm = mt.Tm_NN( Seq( self.sequence ), nn_table=mt.DNA_NN4, Na=50, Mg=2.0, dNTPs=0.2  )
         self.Tm = primer3.calcTm(self.sequence,
                                  mv_conc=50,
                                  dv_conc=2.0,
                                  dntp_conc=0.2)
     except IndexError:
         print(self.sequence)
         exit(69)
Exemple #20
0
def SMARTplex(right):
    seq = right.seq
    ref = right.alignments[0].aln_ref
    for i in range(5, len(seq)):
        RTprimer = settings.RLBseq + seq[-i:]
        lcs = LCSubStr(ref, RTprimer)
        thermo = calcTm(RTprimer[-lcs:], mv_conc=75, dv_conc=3, dntp_conc=0.5)
        if thermo > 40.0:
            break
    subseq = RTprimer[-lcs:]
    lensubseq = i
    lenmatch = lcs
    return RTprimer, thermo, subseq, lensubseq, lenmatch
Exemple #21
0
def primer_generator(length, digestion_site, tests, end_CG=True):
    mly_primer_20 = list()
    i = 0
    rc_digestion_site = str(Seq(digestion_site).reverse_complement())
    bp = length - 5 - len(digestion_site)
    while i <= tests:
        i = i + 1
        mly_primer = str(RandomDNA_without_site(
            bp, digestion_site)) + digestion_site + str(
                RandomDNA_without_site(5, digestion_site))
        s = primer3.calcHairpin(mly_primer)
        if end_CG:
            if 53<primer3.calcTm(mly_primer)<55 and mly_primer.count(digestion_site) + mly_primer.count(rc_digestion_site) == 1\
            and not s.structure_found and 50 <= gc_counter(mly_primer) <= 60 and end_3(mly_primer) \
            and runs_counter(mly_primer) and repeat_counter(mly_primer):
                mly_primer_20.append(mly_primer)
        else:
            if 53<primer3.calcTm(mly_primer)<55 and mly_primer.count(digestion_site) + mly_primer.count(rc_digestion_site) == 1\
            and not s.structure_found and 50 <= gc_counter(mly_primer) <= 60 \
            and runs_counter(mly_primer) and repeat_counter(mly_primer):
                mly_primer_20.append(mly_primer)
    return (list(tuple(mly_primer_20)))
Exemple #22
0
 def __init__(self,name,seq,targetposition=None,tag=None,loci=[],location=None):
     self.rank = -1
     self.name = name
     self.seq = str(seq.upper())
     self.tag = tag
     self.tm = primer3.calcTm(self.seq)
     self.gc = (self.seq.count('G') + self.seq.count('C')) / float(len(self.seq))
     self.loci = []  # genome matches
     self.snp = []  # same order as loci attribute
     self.meta = {}  # metadata
     self.targetposition = targetposition
     self.location = location  # storage location
     if loci:
         pass
Exemple #23
0
 def __init__(self, direction, name, seq):
     self.direction = direction
     self.name = name
     self.seq = seq
     self.tm = calcTm(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6)
     self.homodimer = calcHomodimer(self.seq,
                                    mv_conc=50,
                                    dv_conc=1.5,
                                    dntp_conc=0.6).tm
     self.hairpin = calcHairpin(self.seq,
                                mv_conc=50,
                                dv_conc=1.5,
                                dntp_conc=0.6).tm
     self.gc = 100.0 * (seq.count('G') + seq.count('C')) / len(seq)
def tm(sequence):

    sequence = str(sequence).strip().lower().replace(' ', '').replace('u', 't')
    import primer3
    #from primer3 import calcTm
    tm = int(primer3.calcTm(sequence))

    if float(tm) < 0:
        print('请检测序列是否输入正确')
        return 0
    #print ('Tm is',tm,'℃')

    #返回 str
    return int(tm)
Exemple #25
0
def gen_training_set(n_seq: int, seq_len_min: int,
                     seq_len_max: int) -> List[tuple]:
    """
    Create a set of training data, of size n_seq,
    and of sequnces between seq_len_min and seq_len_max in length.
    Using primer3 to calculate training Tm values.
    """
    train_list = []
    for i in range(n_seq):
        train_seq = random_dna_sequence(
            seq_length=random.randrange(seq_len_min, seq_len_max))
        train_tm = primer3.calcTm(train_seq)
        train_list.append((train_seq, train_tm))
    return train_list
Exemple #26
0
 def primerMatch(self,locus,seq,ampsize):
     # get sequence with flank
     chromStart = locus.offset-ampsize[1] if locus.reverse else locus.offset+locus.length+ampsize[0]
     chromEnd   = locus.offset-ampsize[0] if locus.reverse else locus.offset+locus.length+ampsize[1]
     with pysam.FastaFile(self.file) as fasta:
         seqslice = fasta.fetch(locus.chrom,chromStart,chromEnd)
     # find sequence
     qrySeq = seq if locus.reverse else seq.translate(revcmp)[::-1]
     # create new loci
     loci = []
     for i in [ match.start() for match in re.finditer(re.escape(qrySeq), seqslice) ]:
         tm = primer3.calcTm(qrySeq)
         loci.append(Locus(locus.chrom, chromStart+i, len(qrySeq), not locus.reverse, tm))
     return loci
    def compute_tm(self, sequence):
        """Return the melting temp of the sequence.

        If Primer3 is available, it's internal melting temperature calculator
        is used with ``self.primer3_params`` used as parameters.
        Else the heuristic AT/GC=2/4C is used.
        """
        if self.params == {}:
            return sum([4 if c in "GC" else 2 for c in sequence])
        if not PRIMER3_AVAILABLE:
            raise ImportError(
                "Melting temperature computation with '%s' "
                "Requires Primer3 installed." %
                self.primer3_params.get('method', "[unknown method]"))
        return primer3.calcTm(sequence, **self.primer3_params)
Exemple #28
0
 def __init__(self, name, seq, targetposition=None, tag=None, loci=[], location=None):
     self.rank = -1
     self.name = name
     self.seq = str(seq.upper())
     self.tag = tag
     self.tm = primer3.calcTm(self.seq)
     self.gc = (self.seq.count("G") + self.seq.count("C")) / float(len(self.seq))
     self.loci = []  # genome matches
     self.snp = []  # same order as loci attribute
     self.meta = {}  # metadata
     self.targetposition = targetposition
     # if isinstance(self.targetposition,str):
     #    if self.targetposition.lower().startswith("chr"):
     #        self.targetposition=self.targetposition[3:]
     self.location = location  # storage location
     if loci:
         pass
def expanded_primer_stats(degen,
                          oligo_DNA=50,
                          dNTPs=0.2,
                          salt_monovalent=50,
                          salt_divalent=1.5):
    """
    Calculating per-non-degen-primer stats (Tm) and averaging
    """
    logging.info('Calculating stats on primer sets...')
    if degen is None:
        return None
    for num in degen.keys():
        for cat in degen[num].keys():
            for degen_seq in degen[num][cat].keys():
                stats = {'Tm': [], 'GC': [], 'hairpin': [], 'homodimer': []}
                # stats on each expanded primer
                for seq in list(degen[num][cat][degen_seq]['expanded']):
                    # degeneracies
                    # melting temp
                    stats['Tm'].append(
                        primer3.calcTm(seq,
                                       dna_conc=oligo_DNA,
                                       dntp_conc=dNTPs,
                                       mv_conc=salt_monovalent,
                                       dv_conc=salt_divalent))
                    # GC
                    stats['GC'].append(calc_GC(seq))
                    # hairpin
                    stats['hairpin'].append(
                        primer3.calcHairpin(seq,
                                            dna_conc=oligo_DNA,
                                            dntp_conc=dNTPs,
                                            mv_conc=salt_monovalent,
                                            dv_conc=salt_divalent).tm)
                    # homodimer
                    stats['homodimer'].append(
                        primer3.calcHomodimer(seq,
                                              dna_conc=oligo_DNA,
                                              dntp_conc=dNTPs,
                                              mv_conc=salt_monovalent,
                                              dv_conc=salt_divalent).tm)
                # summarizing stats (average & std)
                for k, v in stats.items():
                    degen[num][cat][degen_seq][k] = [avg(v), sd(v)]

    return degen
Exemple #30
0
 def primerMatch(self, locus, seq, ampsize):
     # get sequence with flank
     chromStart = (
         locus.offset - ampsize[1] if locus.reverse else locus.offset + locus.length + ampsize[0]
     )
     chromEnd = (
         locus.offset - ampsize[0] if locus.reverse else locus.offset + locus.length + ampsize[1]
     )
     with pysam.FastaFile(self.file) as fasta:
         seqslice = fasta.fetch(locus.chrom, chromStart, chromEnd)
     # find sequence
     qrySeq = seq if locus.reverse else seq.translate(revcmp)[::-1]
     # create new loci
     loci = []
     for i in [match.start() for match in re.finditer(re.escape(qrySeq), seqslice)]:
         tm = primer3.calcTm(qrySeq)
         loci.append(Locus(locus.chrom, chromStart + i, len(qrySeq), not locus.reverse, tm))
     return loci
Exemple #31
0
def make_primers(n=200000):
    """Create a bunch of random sequences between 13 and 20 bp."""

    bp = "ATGC"

    seq_to_tm = []  # map from sequence to estimated tm
    for _ in range(n):
        primer_len = random.randint(10, 30)
        primer_seq = "".join([bp[random.randint(0, 3)] for _ in range(primer_len)])
        primer_tm = primer3.calcTm(primer_seq)
        primer_hairpin = primer3.calcHairpin(primer_seq).dg

        seq_to_tm.append((primer_seq, primer_tm, primer_hairpin))

    with open("primers.csv", "w") as output:
        output.write("seq,tm,hairpin\n")
        for seq, tm, hairpin in seq_to_tm:
            output.write(f"{seq},{tm},{hairpin}\n")
Exemple #32
0
def GetPrimers(aut, len_primer, GC, Tm):
    dict_nucl = 'AGCT'

    for el in itertools.product(dict_nucl, repeat=len_primer):
        primer = ''.join(el)

        primer_Tm = calcTm(primer)
        gc_count = (el.count('G') + el.count('C'))
        gc_count = gc_count / len_primer * 100

        if (primer_Tm >= Tm[0]
                and primer_Tm <= Tm[1]) and (gc_count >= GC[0]
                                             and gc_count <= GC[1]):
            aut.add_word(primer,
                         [primer, len_primer,
                          str(gc_count),
                          str(primer_Tm)])

    return aut
Exemple #33
0
def primer_bindings(primers: list, template: str, min_bases=10) -> pd.DataFrame:
    """
    Generate a primer binding dataframe from a list of primers

    :param primers: list of loaded primers (with sequence in the 'seq' attribute)
    :type primers: list
    :param template: template sequence
    :type template: basestring
    :return: data frame of primer binding sites
    :rtype: pandas.DataFrame
    """

    bindings = find_initial_bindings(primers, template, min_bases)

    rows = []

    for binding in bindings:
        t = template
        if binding.direction == -1:
            t = reverse_complement(t)
        matches = _extend_match(binding.matchseq, binding.primerseq, t)
        for match in matches:
            if binding.direction == 1:
                abs_start = match.start
                abs_end = match.end
            else:
                abs_start = len(template) - match.start
                abs_end = len(template) - match.end
            row = OrderedDict()
            row['name'] = binding.primer.name
            row['sequence'] = binding.primerseq
            row['direction'] = binding.direction
            row['overhang'] = match.overhang
            row['annealing'] = match.anneal
            row['start'] = match.start
            row['end'] = match.end
            row['abs_start'] = abs_start
            row['abs_end'] = abs_end
            row['Tm'] = round(primer3.calcTm(match.anneal[-60:].upper(), dv_conc=15), 2)
            row['match'] = binding.matchseq
            rows.append(row)
    return rows
Exemple #34
0
    def __init__(self, oligomer, start=None, max_runs=4):
        base_runs_format = re.compile("\w*(" +
                                      "".join(["A{", str(max_runs), "}|"]) +
                                      "".join(["T{", str(max_runs), "}|"]) +
                                      "".join(["G{", str(max_runs), "}|"]) +
                                      "".join(["C{", str(max_runs), "}"]) +
                                      ")\w*")

        self.seq = oligomer

        self.Tm = calcTm(oligomer)
        self.hairpin = Hairpin(oligomer)
        self.homodimer = Homodimer(oligomer)
        self.GC = SeqUtils.GC(oligomer)

        self.base_run = (re.match(base_runs_format, oligomer) is not None)

        self.start = start

        self._rating = None
Exemple #35
0
def primer3_cal(sequence, mintm=37, maxhtm=37, dtm=10):

    primer3ft = True

    tm = primer3.calcTm(sequence)

    htm = primer3.calcHairpinTm(sequence)

    if tm < mintm:

        primer3ft = False

    if htm > maxhtm:

        primer3ft = False

    if (tm-htm) > dtm:

        primer3ft = False

    return (sequence, primer3ft)
Exemple #36
0
    def is_good_primer(primer):
        # ref1. http://www.premierbiosoft.com/tech_notes/PCR_Primer_Design.html
        seq = ''.join([i[1] for i in primer])
        if re.search(poly, seq) is not None:
            return False, 0, 'Poly(NNNNN) structure found'
        if re.search(tandem, seq) is not None:
            return False, 0, 'Tandom(NN*5) exist'
            # no more 3 ambiguous base
        if len(re.findall(ambiguous_base, seq)) >= ambiguous_base_n:
            return False, 0, 'More than 3 ambiguous base'

# primer3.setGlobals seems have no effect on calcTm, so I have to replace all
# ambiguous base to A to get an approximate value. Othervise calcTm() will
# generate -99999 if there is ambiguous base.
        pure_seq = re.sub(ambiguous_base, 'A', seq)
        tm = primer3.calcTm(pure_seq)
        hairpin_tm = primer3.calcHairpinTm(pure_seq)
        homodimer_tm = primer3.calcHomodimerTm(pure_seq)
        if max(tm, hairpin_tm, homodimer_tm) != tm:
            return False, 0, 'Hairpin or homodimer found'
        return True, tm, 'Ok'
Exemple #37
0
def primer3_filter(sequence, mintm=37, maxhtm=35, dtm=10):

    primer3ft = False

    tm = primer3.calcTm(sequence)

    htm = primer3.calcHairpinTm(sequence)

    if tm < mintm:

        primer3ft = True

    if htm > maxhtm:

        primer3ft = True

    if (tm-htm) < dtm:

        primer3ft = True

    # print(sequence, tm, htm, dtm)

    return primer3ft
Exemple #38
0
def primer3_filter_withRprimer(sequence, rprimer, mintm=37, maxhtm=35, dtm=10):

    primer3ft = False

    tm = primer3.calcTm(sequence)

    fseq = rprimer + sequence

    htmF = primer3.calcHairpinTm(fseq)

    rseq = rprimer + revcom(sequence)

    htmR = primer3.calcHairpinTm(rseq)

    if tm < mintm:

        primer3ft = True

    if htmF > maxhtm:

        primer3ft = True

    if (tm-htmF) < dtm:

        primer3ft = True

    if htmR > maxhtm:

        primer3ft = True

    if (tm-htmR) < dtm:

        primer3ft = True
    # print(sequence, tm, htm, dtm)

    return primer3ft
Exemple #39
0
 def query(self, query):
     '''returns suitable primer pairs for the specified interval'''
     try:
         self.db = sqlite3.connect(self.sqlite)
     except:
         raise
     else:
         cursor = self.db.cursor()
         datematch = re.compile("([0-9\s-]+)$")
         if datematch.match(str(query)): # query date
             subSearchName = '%'+query+'%'
             cursor.execute('''SELECT DISTINCT p.pairid, l.tag, r.tag, l.seq, r.seq, p.left, p.right,
                 p.chrom, p.start, p.end, l.vessel, l.well, r.vessel, r.well, 0
                 FROM pairs AS p
                 LEFT JOIN primer as l ON p.left = l.name
                 LEFT JOIN primer as r ON p.right = r.name
                 where p.dateadded LIKE ?
                 ORDER BY p.pairid;''', \
                 (subSearchName,))
         elif type(query) in [str,unicode]:  # use primerpair name
             subSearchName = '%'+query+'%'
             cursor.execute('''SELECT DISTINCT p.pairid, l.tag, r.tag, l.seq, r.seq, p.left, p.right,
                 p.chrom, p.start, p.end, l.vessel, l.well, r.vessel, r.well, 0
                 FROM pairs AS p
                 LEFT JOIN primer as l ON p.left = l.name
                 LEFT JOIN primer as r ON p.right = r.name
                 WHERE p.pairid LIKE ?
                 ORDER BY p.pairid;''', \
                 (subSearchName,))
         else:  # is interval
             cursor.execute('''SELECT DISTINCT p.pairid, l.tag, r.tag, l.seq, r.seq, p.left, p.right,
                 p.chrom, p.start, p.end, l.vessel, l.well, r.vessel, r.well,
                 abs(p.start+((p.end-p.start)/2) - ?) as midpointdistance
                 FROM pairs AS p
                 LEFT JOIN primer as l ON p.left = l.name
                 LEFT JOIN primer as r ON p.right = r.name
                 WHERE p.chrom = ?
                 AND p.start + length(l.seq) <= ?
                 AND p.end - length(r.seq) >= ?
                 ORDER BY midpointdistance;''', \
                 (int(query.chromStart+int(query.chromEnd-query.chromStart)/2.0), query.chrom, query.chromStart, query.chromEnd))
         rows = cursor.fetchall()
     finally:
         self.db.close()
     # return primer pairs that would match
     primerPairs = []
     for row in rows:
         # build targets
         leftTargetposition = Locus(row[7], row[8], len(row[3]), False, primer3.calcTm(str(row[3])))
         rightTargetposition = Locus(row[7], row[9]-len(row[4]), len(row[4]), True, primer3.calcTm(str(row[4])))
         # build storage locations (if available)
         leftLocation = Location(*row[10:12]) if all(row[10:12]) else None
         rightLocation = Location(*row[12:14]) if all(row[12:14]) else None
         # Build primers
         leftPrimer = Primer(row[5], row[3], targetposition=leftTargetposition, tag=row[1], location=leftLocation)
         rightPrimer = Primer(row[6], row[4], targetposition=rightTargetposition, tag=row[2], location=rightLocation)
         # get reverse status (from name)
         orientations = [ x[1] for x in map(parsePrimerName,row[5:7]) ]
         if not any(orientations) or len(set(orientations))==1:
             print >> sys.stderr, '\rWARNING: {} orientation is ambiguous ({},{}){}\r'.format(row[0],\
                 '???' if orientations[0]==0 else 'rev' if orientations[0]<0 else 'fwd', \
                 '???' if orientations[0]==0 else 'rev' if orientations[1]<0 else 'fwd'," "*20)
             reverse = False
         elif orientations[0]>0 or orientations[1]<0:
             reverse = False
         elif orientations[1]>0 or orientations[0]<0:
             reverse = True
         else:
             raise Exception('PrimerPairStrandError')
         # Build pair
         primerPairs.append(PrimerPair([leftPrimer, rightPrimer],name=row[0],reverse=reverse))
     return primerPairs  # ordered by midpoint distance
Exemple #40
0
res = primer3.designPrimers(seq_arg, global_arg)
res = primer3.designPrimers(seq_arg, global_arg)

import pprint
# pprint.pprint(res)
# print res['PRIMER_LEFT_0_SEQUENCE']


fp = ftail53_s + Seq(res.get('PRIMER_LEFT_0_SEQUENCE'), IUPAC.unambiguous_dna)
rp = rtail53 + Seq(res.get('PRIMER_RIGHT_0_SEQUENCE'), IUPAC.unambiguous_dna)

print ftail53_s, rtail53
print fp, rp

print primer3.calcTm(str(fp)), primer3.calcTm(str(rp))

# output = open('results/primers.txt', 'w')
# for k, v in ...
#    output.write(h + '\n')
# output.close()

seq_arg_batch = {
    'SEQUENCE_ID': 'test_leg',
    'SEQUENCE_TEMPLATE': test_seq,
    # 'SEQUENCE_INCLUDED_REGION': [3, len(test_seq)],
    # 'SEQUENCE_PRIMER': str(fp),
    # 'SEQUENCE_PRIMER_REVCOMP': str(rp),
    'SEQUENCE_FORCE_LEFT_START': 3,
    'SEQUENCE_FORCE_RIGHT_START': len(test_seq)-1,
}
Exemple #41
0
def screenPadlockArms(  p_l_seq: str,
                        p_r_seq: str,
                        loop_seq: str,
                        p_params: dict,
                        do_print: bool = False) -> Tuple[bool, dict]:
    is_good = True
    tp = p_params['thermo_params']
    report = {
        'arm_gc_min_l': 0,
        'arm_gc_max_l': 0,
        'arm_gc_min_r': 0,
        'arm_gc_max_r': 0,
        'l_clamp': True,
        'tm_arm_min_l': 0,
        'tm_arm_min_r': 0,
        'ex_seq': [],
        'tm_hairpin_l': 0,
        'tm_hairpin_r': 0,
        'tm_hetero_0': 0,
        'tm_hetero_1': 0,
        'tm_hetero_2': 0
    }

    "1. GC content checks"
    p_l_gc_content = gcContent(p_l_seq)
    p_r_gc_content = gcContent(p_r_seq)
    if p_l_gc_content < p_params['arm_gc_min']:
        if do_print:
            print("\tgc content L min fail %0.3f" % p_l_gc_content)
        is_good = False
    report['arm_gc_min_l'] = p_l_gc_content
    if p_r_gc_content < p_params['arm_gc_min']:
        if do_print:
            print("\tgc content R min fail %0.3f" % p_r_gc_content)
        is_good = False
    report['arm_gc_min_r'] = p_r_gc_content
    if p_l_gc_content > p_params['arm_gc_max']:
        if do_print:
            print("\tgc content L max fail %0.3f" % p_l_gc_content)
        is_good = False
    report['arm_gc_max_l'] = p_l_gc_content
    if p_r_gc_content > p_params['arm_gc_max']:
        if do_print:
            print("\tgc content R max fail %0.3f" % p_r_gc_content)
        is_good = False
    report['arm_gc_max_r'] = p_r_gc_content


    "2. GC clamp checks"
    l_3p_check = padlockLeftArmGCClamp(p_l_seq)
    if l_3p_check > 3:
        if do_print:
            print("\tl clamp fail")
        is_good = False
    report['l_clamp'] = False

    "3. Arm Tm check"
    p_arm_tm_l = calcTm(p_l_seq, **tp)
    p_arm_tm_r = calcTm(p_r_seq, **tp)
    if p_arm_tm_l < p_params['arm_tm_min']:
        if do_print:
            print("\tArm L fail %2.3f" % p_arm_tm_l)
        is_good = False
    report['tm_arm_min_l'] = p_arm_tm_l
    if p_arm_tm_r < p_params['arm_tm_min']:
        if do_print:
            print("\tArm R fail %2.3f" % p_arm_tm_r)
        is_good = False
    report['tm_arm_min_r'] = p_arm_tm_r

    p_seq = (
        p_r_seq + loop_seq + p_l_seq
    )
    "4. Check for excluded seqs"
    ex_fail = False
    for ex_seq in p_params['exclude_seqs']:
        if ex_seq in p_seq:
            ex_fail = True
            report['ex_seq'].append(ex_seq)
            break
    if ex_fail:
        is_good = False

    "5. Secondary structure / primer dimer checks"
    p_het_tm_0 = calcHeterodimerTm(p_l_seq, p_r_seq, **tp)
    p_het_tm_1 = calcHeterodimerTm(p_l_seq, loop_seq, **tp)
    p_het_tm_2 = calcHeterodimerTm(p_r_seq, loop_seq, **tp)
    if p_het_tm_0 > p_params['structure_tm_max']:
        if do_print:
            print("\thetero 0 fail")
        is_good = False
    report['tm_hetero_0'] = p_het_tm_0
    if p_het_tm_1 > p_params['structure_tm_max']:
        if do_print:
            print("\thetero 1 fail")
        is_good = False
    report['tm_hetero_1'] = p_het_tm_1
    if p_het_tm_2 > p_params['structure_tm_max']:
        if do_print:
            print("\thetero 2 fail")
        is_good = False
    report['tm_hetero_2'] = p_het_tm_2
    return is_good, report
Exemple #42
0
 def calcProperties(self):
     # get Tm via primer3
     self.tm = primer3.calcTm(self.seq)
     # calc GC
     self.gc = (self.seq.count('G') + self.seq.count('C')) / float(len(self.seq))
     return
Exemple #43
0
 def getTM(self):
     ## This function returns the melting temp of the sequence object
     seq = self.seq
     return primer3.calcTm(seq)