Example #1
0
def calc_characteristics(infile):
    print("Using sequence file", infile,
          "to calculate Tm and structure potential")
    seqs = list(SeqIO.parse(infile, "fasta"))
    chars = dict()
    for myseq in seqs:
        # Forward
        forward = str(myseq.seq)
        hairpinF = calcHairpin(forward)
        homoF = calcHomodimer(forward)

        #Reverse complement
        reverse = str(myseq.seq.reverse_complement())
        hairpinR = calcHairpin(reverse)
        homoR = calcHomodimer(reverse)
        # print(hairpinF,'\n', homoF,'\n', hairpinR,'\n', homoR, '\n', calcTm(forward))

        # Values to save
        tm = calcTm(forward)
        hairpinF = hairpinF.tm if hairpinF.structure_found else "NA"
        hairpinR = hairpinR.tm if hairpinR.structure_found else "NA"
        homoF = homoF.tm if homoF.structure_found else "NA"
        homoR = homoR.tm if homoR.structure_found else "NA"

        output = [
            str(x)
            for x in [forward, tm, hairpinF, homoF, reverse, hairpinR, homoR]
        ]
        chars[myseq.id] = "\t" + "\t".join(output)
        # print(chars[myseq.id])
    return chars
Example #2
0
    def _find_self_binding_ranges(self) -> List[Tuple[int, int, float]]:
        """
        Find segments of the gene which can form a hairpin or homodimer for given Tm.
        :return: A list of triples (from_offset, to_offset, Tm), each representing a start/end and mel;ting temperature for
        a hairpin or homodimer region.
        """
        hairpin_lengths = [5, 10]  # Lengths of DNA segments evaluated for possible hairpins
        homodimer_lengths = [25, 50]
        homodimer_offset_step = 10  # step, in bp, for the start for potential homodimers tested

        monovalent_conc = self.config.temperature_config.k
        divalent_conc = self.config.temperature_config.mg
        dntp_conc = self.config.temperature_config.dntp

        hairpin_segments = []
        for length in hairpin_lengths:
            for start in range(0, self.gene_length - length):
                end = start + length - 1
                thermo_result = calcHairpin(self.gene[start:end], monovalent_conc, divalent_conc, dntp_conc)
                if self._is_new_hairpin_or_homodimer(start, end, thermo_result, hairpin_segments):
                    hairpin_segments.append((start, end, thermo_result.tm))

        homodimer_segments = []
        for length in homodimer_lengths:
            for start in range(0, self.gene_length - length, homodimer_offset_step):
                end = start + length - 1
                thermo_result = calcHomodimer(self.gene[start:end], monovalent_conc, divalent_conc, dntp_conc)
                if thermo_result.structure_found:
                    if self._is_new_hairpin_or_homodimer(start, end, thermo_result, hairpin_segments):
                        homodimer_segments.append((start, end, thermo_result.tm))

        return hairpin_segments + homodimer_segments
Example #3
0
def primer3_check_hairpin(primer):
    thermoResult = primer3.calcHairpin(primer)
    dg = thermoResult.dg / 1000
    if (thermoResult.structure_found == True and dg < -10):
        return False
    else:
        return True
Example #4
0
def is_hairpin(sequence, mv, dv, dntps, dna, temp):
    return primer3.calcHairpin(sequence,
                               mv_conc=mv,
                               dv_conc=dv,
                               dntp_conc=dntps,
                               dna_conc=dna,
                               temp_c=temp).structure_found
Example #5
0
    def primer_hairpin_check_by_primer3_inner(primers_ID, primers, Tm):
        primer_hairpin_satisfied_ID = []
        primer_hairpin_satisfied = []
        primer_hairpin_not_satisfied_ID = []
        primer_hairpin_not_satisfied = []

        for ID, primer in zip(primers_ID, primers):
            hairpin = primer3.calcHairpin(str(primer),
                                          mv_conc=50.0,
                                          dv_conc=3,
                                          dna_conc=200,
                                          temp_c=25)  # 发夹结构检查
            if float(hairpin.tm) <= float(Tm):
                primer_hairpin_satisfied_ID.append(str(ID))
                primer_hairpin_satisfied.append(str(primer))
            else:
                primer_hairpin_not_satisfied_ID.append(
                    str(ID) + "_hairpin@" + str(round(hairpin.tm, 2)))
                primer_hairpin_not_satisfied.append(str(primer))

        primer_hairpin_check_by_primer3_inner_result = [
            primer_hairpin_satisfied_ID, primer_hairpin_satisfied,
            primer_hairpin_not_satisfied_ID, primer_hairpin_not_satisfied
        ]

        return primer_hairpin_check_by_primer3_inner_result
Example #6
0
def checkHairpinEnd(primer):
    primerEnd3_rc=revComplement(primer[-4:])
    primerHairpin=round(primer3.calcHairpin(primer,
                                            mv_conc=args.mvConc,
                                            dv_conc=args.dvConc).dg/1000,2)
    if primerEnd3_rc in primer[1:-3-4] and primerHairpin<-1:
        return(str(primerHairpin)+'*')
    return(primerHairpin)
Example #7
0
def Hairpin(Sequence):
    TemResult = primer3.calcHairpin(Sequence)
    ##    print(TemResult.dg/1000)
    if TemResult.structure_found == 1 and abs(
            TemResult.dg / 1000) > HarpinDeltaG:
        return 0
    else:
        return 1
Example #8
0
 def pcalcHairpin(seq):
     res = primer3.calcHairpin(seq)
     dg = "{0:.2f}".format(res.dg)
     tm = "{0:.2f}".format(res.tm)
     if res.structure_found == True and float(dg) < -1:
         return "{0}{1}".format("Self_Hairpin:",
                                "+:tm:" + tm + ";deltaG:" + dg)
     else:
         return ""
def hairpin_Tm(primer_sequence, mv_cation=0, primer_conc=0):
    Tm_hairpin = (primer3.calcHairpin(primer_sequence,
                                      mv_conc=mv_cation,
                                      dv_conc=0,
                                      dntp_conc=0,
                                      dna_conc=primer_conc,
                                      temp_c=37,
                                      max_loop=30)).tm
    return ("{0:.2f}".format(round(Tm_hairpin, 2)))
Example #10
0
    def find_structures(cls,
                        folder,
                        seq1,
                        seq2=None,
                        sodium=0.05,
                        magnesium=0.0,
                        temperature=25,
                        concentration=0.00000025,
                        **kwargs):
        """
        Should return the list of 'Structure' objects with delta-G, deltaH, deltaS, and Tm values.
        
        Accepts 1 or 2 input sequences. Automatically runs either:
         * Hairpin     (1 input sequence: A=seq1, UNAFold run on A)
         * Homodimer   (2 identical input sequences: A=seq1=seq2, UNAFold run on A & A)
         * Heterodimer (2 input sequences: A=seq1 B=seq2, UNAFold run on A & B)
        """
        import primer3

        mv_conc = sodium * 1000  # 50.0 # in mM
        dv_conc = magnesium * 1000  # 0.0 # in mM
        dntp_conc = 0.6  # in mM
        dna_conc = concentration * 1000 * 1000 * 1000  # 250.0 # in nM
        #temperature = 25 # keep as-is

        if (seq1 == seq2):  # Homodimer calculation
            t = primer3.calcHomodimer(seq1,
                                      mv_conc=mv_conc,
                                      dv_conc=dv_conc,
                                      dntp_conc=dntp_conc,
                                      dna_conc=dna_conc,
                                      temp_c=temperature)
        elif (seq2 == None):  # Hairpin calculation
            t = primer3.calcHairpin(seq1,
                                    mv_conc=mv_conc,
                                    dv_conc=dv_conc,
                                    dntp_conc=dntp_conc,
                                    dna_conc=dna_conc,
                                    temp_c=temperature)
        else:  # Heterodimer calculation, Tm calculation [seq1, rc(seq1)]
            t = primer3.calcHeterodimer(seq1,
                                        seq2,
                                        mv_conc=mv_conc,
                                        dv_conc=dv_conc,
                                        dntp_conc=dntp_conc,
                                        dna_conc=dna_conc,
                                        temp_c=temperature)

        if t.structure_found:
            s = Structure(seq1, seq2, t.dg / 1000, t.dh / 1000, t.ds, t.tm,
                          sodium, magnesium, temperature, concentration)
        else:
            s = Structure(seq1, seq2, math.inf, math.inf, math.inf, math.inf,
                          sodium, magnesium, temperature, concentration)

        return [s]
Example #11
0
 def _hairpin_check(self, seq_tuple):
     package, oligo, seq, mv, dv, dntps, annealing_oligo, annealing_temp = seq_tuple
     try:
         tr = primer3.calcHairpin(seq, mv_conc = mv, dv_conc = dv, dntp_conc = dntps, dna_conc = annealing_oligo, temp_c = annealing_temp)
         deltaG = tr.dg
         Tm = tr.tm
     except:
         deltaG = "NaN"
         Tm = "NaN"
     df = pd.DataFrame(columns=['Package', 'Oligo', 'dG', 'Tm'])
     df.loc[len(df)] = [str(package), str(oligo), str(deltaG), str(Tm)]
     return df
Example #12
0
    def __init__(self, oligomer):
        self.oligomer = oligomer

        thermo = calcHairpin(self.oligomer, output_structure=True)

        self.tm = thermo.tm
        self.dg = thermo.dg
        self.dh = thermo.dh
        self.ds = thermo.ds

        # Structure and structure lines are given in the primer3 format
        self.structure = thermo.ascii_structure
        self.structure_lines = thermo.ascii_structure_lines
Example #13
0
 def __init__(self, direction, name, seq):
     self.direction = direction
     self.name = name
     self.seq = seq
     self.tm = calcTm(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6)
     self.homodimer = calcHomodimer(self.seq,
                                    mv_conc=50,
                                    dv_conc=1.5,
                                    dntp_conc=0.6).tm
     self.hairpin = calcHairpin(self.seq,
                                mv_conc=50,
                                dv_conc=1.5,
                                dntp_conc=0.6).tm
     self.gc = 100.0 * (seq.count('G') + seq.count('C')) / len(seq)
Example #14
0
def calcHairpin(seq,
                tm_Threshold=47,
                mv_conc=50.0,
                dv_conc=1.5,
                dntp_conc=0.25,
                dna_conc=50.0,
                temp_c=37,
                max_loop=30):
    thermoresult = primer3.calcHairpin(seq, mv_conc, dv_conc, dntp_conc,
                                       dna_conc, temp_c, max_loop)
    if thermoresult.tm >= tm_Threshold:
        return thermoresult.tm, thermoresult.dg / 1000
    else:
        return -1, -1
 def hairpin(self, primer: str) -> float:
     """
     Cached hair pin computation with Primer3 library.
     :param primer: [str] primer sequence
     :return: melting temperature
     """
     if len(primer) > 0:
         if self.cached:
             if primer in self.cache_hairpin:
                 return self.cache_hairpin[primer]
         temp = calcHairpin(primer, self.mv, self.dv, self.dntp).tm
         if self.cached:
             self.cache_hairpin[primer] = temp
         return temp
     else:
         return 0
def structure_filter(df, hairpin_min, dimer_min, Na_conc, filter=True):
    '''
    Use primer3 to calculate energy of hairpin structure.
    https://libnano.github.io/primer3-py/quickstart.html#thermodynamic-analysis
    '''
    df['hairpin_dG'] = df['sequence'].apply(
        lambda x: primer3.calcHairpin(x, mv_conc=Na_conc).dg / 1000)
    df['homodimer_dG'] = df['sequence'].apply(
        lambda x: primer3.calcHomodimer(x, mv_conc=Na_conc).dg / 1000)

    df['passed_structure'] = (df['hairpin_dG'] >=
                              hairpin_min) & (df['homodimer_dG'] >= dimer_min)

    if filter == True:
        df = df[df['passed_structure']].copy()
    return df
def expanded_primer_stats(degen,
                          oligo_DNA=50,
                          dNTPs=0.2,
                          salt_monovalent=50,
                          salt_divalent=1.5):
    """
    Calculating per-non-degen-primer stats (Tm) and averaging
    """
    logging.info('Calculating stats on primer sets...')
    if degen is None:
        return None
    for num in degen.keys():
        for cat in degen[num].keys():
            for degen_seq in degen[num][cat].keys():
                stats = {'Tm': [], 'GC': [], 'hairpin': [], 'homodimer': []}
                # stats on each expanded primer
                for seq in list(degen[num][cat][degen_seq]['expanded']):
                    # degeneracies
                    # melting temp
                    stats['Tm'].append(
                        primer3.calcTm(seq,
                                       dna_conc=oligo_DNA,
                                       dntp_conc=dNTPs,
                                       mv_conc=salt_monovalent,
                                       dv_conc=salt_divalent))
                    # GC
                    stats['GC'].append(calc_GC(seq))
                    # hairpin
                    stats['hairpin'].append(
                        primer3.calcHairpin(seq,
                                            dna_conc=oligo_DNA,
                                            dntp_conc=dNTPs,
                                            mv_conc=salt_monovalent,
                                            dv_conc=salt_divalent).tm)
                    # homodimer
                    stats['homodimer'].append(
                        primer3.calcHomodimer(seq,
                                              dna_conc=oligo_DNA,
                                              dntp_conc=dNTPs,
                                              mv_conc=salt_monovalent,
                                              dv_conc=salt_divalent).tm)
                # summarizing stats (average & std)
                for k, v in stats.items():
                    degen[num][cat][degen_seq][k] = [avg(v), sd(v)]

    return degen
Example #18
0
def make_primers(n=200000):
    """Create a bunch of random sequences between 13 and 20 bp."""

    bp = "ATGC"

    seq_to_tm = []  # map from sequence to estimated tm
    for _ in range(n):
        primer_len = random.randint(10, 30)
        primer_seq = "".join([bp[random.randint(0, 3)] for _ in range(primer_len)])
        primer_tm = primer3.calcTm(primer_seq)
        primer_hairpin = primer3.calcHairpin(primer_seq).dg

        seq_to_tm.append((primer_seq, primer_tm, primer_hairpin))

    with open("primers.csv", "w") as output:
        output.write("seq,tm,hairpin\n")
        for seq, tm, hairpin in seq_to_tm:
            output.write(f"{seq},{tm},{hairpin}\n")
Example #19
0
def EvaluatePrimerForPCR(Primer: str):
    """

    Legacy method of calculating primer PCR score;

    """

    Score = 1.0

    Score -= PenaltyGCContent(Primer)
    Score -= PenaltyGCExtremities(Primer)
    Score -= PenaltyMeltingTemperature(Primer)

    # -- check for 2D primer formation;
    Hairpin = primer3.calcHairpin(Primer)
    if Hairpin.structure_found:
        Score -= 1.0

    return Score
Example #20
0
    def write_probe_map_counts(self, fn):
        """Write number of sequences mapped by each probe to a file.

        Args:
            fn: path to file to write to
        """
        with open(fn, 'w') as f:
            header = [
                "Probe identifier", "Probe sequence",
                "Number sequences mapped to", "Melt temp", "Homodimer Tm",
                "Hairpin Tm"
            ]
            f.write('\t'.join(header) + '\n')

            # Create an row for every probe
            for p, count in self.probe_map_counts.items():
                row = [
                    p.identifier(),
                    p.seq_str,
                    count,
                    # primer3 ionic concentrations from Primal Scheme
                    round(
                        primer3.calcTm(p.seq_str,
                                       mv_conc=50,
                                       dv_conc=1.5,
                                       dntp_conc=0.6), 2),
                    round(
                        primer3.calcHomodimer(p.seq_str,
                                              mv_conc=50,
                                              dv_conc=1.5,
                                              dntp_conc=0.6).tm, 2),
                    round(
                        primer3.calcHairpin(p.seq_str,
                                            mv_conc=50,
                                            dv_conc=1.5,
                                            dntp_conc=0.6).tm, 2)
                ]
                line = '\t'.join([str(x) for x in row])
                f.write(line + '\n')
Example #21
0
def analyze_homostructures(
    primers,
    structures,
    homodimer_threshold=-5000,
    hairpin_threshold=-1000,
    mv_conc=None,
    dv_conc=None,
):
    for primer in primers:
        checked_seq = primer.seq
        if len(primer) > 60:
            logger.warning(
                f"Primer {primer.name} is too long ({len(primer)}bp > 60 bp) for analysis of "
                f"homostructures. ")
            logger.info(
                f"Trimming primer to 60 bp by removing bases from 5'-end")
            checked_seq = primer.seq[-60:]

        keyword_args = {
            "mv_conc": mv_conc,
            "dv_conc": dv_conc,
            "dna_conc": primer.conc
        }

        hairpin = primer3.calcHairpin(checked_seq, **keyword_args)

        if hairpin.structure_found and hairpin.dg < hairpin_threshold:
            structures.add_struture(primer.name, "Hairpin",
                                    round(hairpin.dg, 1), round(hairpin.tm, 1))

        homodimer = primer3.calcHomodimer(checked_seq, **keyword_args)

        if homodimer.structure_found and homodimer.dg < homodimer_threshold:
            structures.add_struture(
                primer.name,
                "Self-dimer",
                round(homodimer.dg, 1),
                round(homodimer.tm, 1),
            )
Example #22
0
 def thermo(self):
     settings = self._thermo_settings
     if self._thermo is None:
         if len(self.sequence) > 60:
             warning = "sequence length greater than 60. Thermo results are limited to 60bp."
         else:
             warning = ""
         self._thermo = {
             "hairpin":
             primer3.calcHairpin(self._safe_sequence, **settings),
             "homodimer":
             primer3.calcHomodimer(self._safe_sequence, **settings),
             "annealing":
             primer3.calcHeterodimer(self.anneal, rc(self.anneal),
                                     **settings),
             "sequence":
             primer3.calcHeterodimer(self._safe_sequence,
                                     rc(self._safe_sequence), **settings),
             "warning":
             warning,
         }
     return self._thermo
Example #23
0
def primer_generator(length, digestion_site, tests, end_CG=True):
    mly_primer_20 = list()
    i = 0
    rc_digestion_site = str(Seq(digestion_site).reverse_complement())
    bp = length - 5 - len(digestion_site)
    while i <= tests:
        i = i + 1
        mly_primer = str(RandomDNA_without_site(
            bp, digestion_site)) + digestion_site + str(
                RandomDNA_without_site(5, digestion_site))
        s = primer3.calcHairpin(mly_primer)
        if end_CG:
            if 53<primer3.calcTm(mly_primer)<55 and mly_primer.count(digestion_site) + mly_primer.count(rc_digestion_site) == 1\
            and not s.structure_found and 50 <= gc_counter(mly_primer) <= 60 and end_3(mly_primer) \
            and runs_counter(mly_primer) and repeat_counter(mly_primer):
                mly_primer_20.append(mly_primer)
        else:
            if 53<primer3.calcTm(mly_primer)<55 and mly_primer.count(digestion_site) + mly_primer.count(rc_digestion_site) == 1\
            and not s.structure_found and 50 <= gc_counter(mly_primer) <= 60 \
            and runs_counter(mly_primer) and repeat_counter(mly_primer):
                mly_primer_20.append(mly_primer)
    return (list(tuple(mly_primer_20)))
Example #24
0
def judge_hairpin(oligo_input):
    oligo = oligo_input['oligo']
    min_Tm = min(oligo['Tm'], oligo_input['min_Tm'])
    hairpin = primer3.calcHairpin(oligo['seq'], output_structure=True)
    if hairpin.tm > min_Tm:
        return ([oligo, oligo, round(hairpin.tm, 2), hairpin.ascii_structure])
Example #25
0
def main():
    if len(sys.argv) != 3:
        mes = '*** Usage: python {} params.config file.uniq2ref.primer'
        print(
            mes.format(os.path.basename(sys.argv[0])),
            file=sys.stderr,
        )
        sys.exit(1)

    configf = sys.argv[1]
    primerfile = sys.argv[2]

    d = yaml.load(open(configf))

    pass_cnt = 0
    cnt = 0
    for rec in screed.open(primerfile):
        cnt += 1
        _name = rec.name
        name, _contig = _name.split(None, 1)
        contig_len = _contig.split('__', 1)[1]
        seq = rec.sequence
        # primer3 functions only accept byte-strings
        seq = seq.encode('utf-8')
        #seq = bytes(seq, 'utf-8')
        seq_rc = RC(seq)

        a_ambi = numpy.array(has_ambiguous(seq), has_ambiguous(seq_rc))
        if sum(a_ambi) == 2:
            continue

        # check tm
        tm = primer3.calcTm(seq)
        tm_rc = primer3.calcTm(seq_rc)
        a_tm = numpy.array(
            (tm < d['TM_LOWER'] or tm > d['TM_UPPER']),
            (tm_rc < d['TM_LOWER'] or tm_rc > d['TM_UPPER']),
        )
        if sum(a_tm) == 2:
            continue

        # check gc
        gc = check_gc(seq)
        gc_rc = check_gc(seq_rc)
        a_gc = numpy.array(
            (gc < d['GC_LOWER'] or gc > d['GC_UPPER']),
            (gc_rc < d['GC_LOWER'] or gc_rc > d['GC_UPPER']),
        )
        if sum(a_gc) == 2:
            continue

        if d['GC_CLAMP']:
            c = end_gc_count(seq)
            c_rc = end_gc_count(seq_rc)
            a_endgc = numpy.array(
                c > 3 or c < 1,
                c_rc > 3 or c_rc < 1,
            )
            if sum(a_endgc) == 2:
                continue

        if d['SS']:
            hp = primer3.calcHairpin(seq)
            ho = primer3.calcHomodimer(seq)
            hp_rc = primer3.calcHairpin(seq_rc)
            ho_rc = primer3.calcHomodimer(seq_rc)
            orig_pass = ((hp.dg < d['HP_DG_LIMIT'] or hp.dg > 0)
                         & (ho.dg < d['DI_DG_LIMIT'] or ho.dg > 0))
            rc_pass = ((hp_rc.dg < d['HP_DG_LIMIT'] or hp_rc.dg > 0)
                       & (ho_rc.dg < d['DI_DG_LIMIT'] or ho_rc.dg > 0))
            if ho.dg < d['DI_DG_LIMIT'] or ho.dg > 0:
                continue

        pass_cnt += 1
        mes = '>{}  contiglen__{};tm__{};gc__{}\n{}'
        print(mes.format(name, contig_len, tm, gc, seq), file=sys.stdout)

    if cnt == 0:
        mes = '*** Empty file detected: {} (file.uniq2ref.primer), skipping..'
        print(
            mes.format(os.path.basename(primerfile)),
            file=sys.stderr,
        )
        sys.exit(0)
Example #26
0
def hairpin_Tm(primer_sequence, mv_cation=0,primer_conc=0): 
    Tm_hairpin =  (primer3.calcHairpin(primer_sequence,mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=37, max_loop=30)).tm
    return ("{0:.2f}".format(round(Tm_hairpin,2)))    
#### Reverse complement the primers and test for criterias for TSO compatibility by Primer3. Select only the primers with Tm > 50. Remove any primers with "CC" or "TTT".

rc_pr_list = []
for i in full_list:

    if ("CC" not in rc(i) and "TTT" not in rc(i)
            and primer3.calcTm(rc(i)) > 50):
        l = OH + rc(i)
        rc_pr_list.append(l)

#### Select the primers with least tendency to form heterodimers with TSO. dG > -3000 was chosen acccording to Fabio's DENV2 primer.

dg_3000 = []
for i in rc_pr_list:
    result = primer3.calcHeterodimer(i, TSO)
    pin = primer3.calcHairpin(i)
    if result.dg > -3000:
        #print(i, result.tm,result.dg, primer3.calcTm(i[23:]))
        #Check for the formation of hairpins.
        #print(i, primer3.calcHairpin(i))
        tttt = [
            i, primer3.calcTm(i[23:]), result.tm, result.dg, pin.tm, pin.dg
        ]
        dg_3000.append(tttt)
dg_3000 = pd.DataFrame(np.array(dg_3000),
                       columns=[
                           "Primer", "Annealing Tm", "HeteroDimer Tm",
                           "HeteroDimer dG", "Hairpin Tm", "Hairpin dG"
                       ])
dg_3000.iloc[:, 1:] = dg_3000.iloc[:, 1:].astype(float).round(2)
print(dg_3000)
Example #28
0
def run_filter(
    input: str,
    output: str,
    repeats_threshold: int,
    gc_upper_threshold: float,
    gc_lower_threshold: float,
    gc_clamp: bool,
    gc_uniformity_threshold: float,
    hairpin_threshold: int,
    homodimer_threshold: int,
    dna_conc: float,
    mv_conc: float,
    dv_conc: float,
):

    summary = Counter()

    struct_configs = {
        "mv_conc": mv_conc,
        "dv_conc": dv_conc,
        "dna_conc": dna_conc,
    }

    with smart_open(input,
                    mode="r") as infile, smart_open(output,
                                                    mode="w") as outfile:
        for entry in tqdm(SeqIO.parse(infile, "fasta"),
                          desc="Parsing primers"):
            summary["Primers reads"] += 1
            primer = Primer(name=entry.name, seq=str(entry.seq))

            if not (gc_lower_threshold <= primer.gc <= gc_upper_threshold):
                summary["Out of GC-range"] += 1
                continue

            if primer.has_repeats(threshold=repeats_threshold):
                summary["Repeats"] += 1
                continue

            if gc_clamp and not primer.has_gc_clamp():
                summary["No GC-clamp"] += 1
                continue

            if primer.has_uniform_gc(window=5,
                                     threshold=gc_uniformity_threshold):
                summary["Non-uniform GC rate"] += 1
                continue

            hairpin = primer3.calcHairpin(primer.seq, **struct_configs)

            if hairpin.structure_found and hairpin.dg < hairpin_threshold:
                summary["Hairpin"] += 1
                continue

            homodimer = primer3.calcHomodimer(primer.seq, **struct_configs)

            if homodimer.structure_found and homodimer.dg < homodimer_threshold:
                summary["Homodimer"] += 1
                continue

            summary["Primers written"] += 1
            SeqIO.write(entry, outfile, format="fasta")

    print_stats(summary)
Example #29
0
            long_seq_list.append(seq_record.id)

    idx = 1
    short = 0
    for seq_probe in probe_list:
        probe = (prefix_nt * poly_nt) + str(seq_probe)
        probe_length = len(probe)
        #Tm_ori = primer3.calcTm(probe)
        #Tm_ori2 = ("%.2f" % Tm_ori)
        #GC_percent_ori = GC(probe)
        #GC_per_2 = ("%.2f" % GC_percent_ori)
        #print(seq_record.id + "_" + str(idx) +  "\t" + probe + "\t" + str(GC_per_2) + "\t" +  str(Tm_ori2) + "\t", end = '')
        #if(probe_length <=40):
        #    short = short+1
        # else:
        #    print("\n")
        if (probe_length <= 60):
            Tm = primer3.calcTm(probe)
            Tm2 = ("%.2f" % Tm)
            GC_percent = GC(probe)
            GC_per = ("%.2f" % GC_percent)
            Homodimer = primer3.calcHomodimer(probe)
            Hairpin = primer3.calcHairpin(probe)
            print(seq_record.id + "_" + str(idx) + "\t" + probe + "\t" + "\t" +
                  str(GC_per) + "\t" + str(Tm2) + "\t" +
                  str(Hairpin.structure_found) + "\t" +
                  str(Homodimer.structure_found))
        idx = idx + 1

output.close()
Example #30
0
    framelen = len(frame)
    postframe = seq[lenseq - args.f:].upper()
    preframe = seq[: args.f].upper()
    preframe_obj = Seq(preframe, generic_dna)
    preframerev = str(preframe_obj.reverse_complement())
    frame_obj = Seq(frame, generic_dna)
    framerev = str(frame_obj.reverse_complement())
    postframe_obj = Seq(postframe, generic_dna)
    postframerev = str(postframe_obj.reverse_complement())

    for index in range(0, args.f - args.pl, 1):
        fcandidate = preframe[index:index+args.pl]
        fcand_len = str(len(fcandidate))
        Tm = int(primer3.calcTm(fcandidate))
        if Tm > args.tmin and Tm < args.tmax:
            Hairpin = primer3.calcHairpin(fcandidate)
            if Hairpin.structure_found is False:
                outfile.write(title +
                              "\tPreframe forward\t" +
                              fcandidate + "\t" + str(Tm) + "\t" +
                              fcand_len + "\t" + str(index) + "\t" +
                              str(framelen) + "\t" + str(orientation) + "\n")

    for index in range(0, framelen - args.pl, 3):
        fcandidate = frame[index:index+args.pl]
        fcand_len = str(len(fcandidate))
        Tm = int(primer3.calcTm(fcandidate))
        if Tm > args.tmin and Tm < args.tmax:
            Hairpin = primer3.calcHairpin(fcandidate)
            if Hairpin.structure_found is False:
                outfile.write(title + "\tIn frame forward\t" +
Example #31
0
    def scan_sequence(self, seq, primer_size=(18, 26), amplicon_size=(50, 60)):
        if (__name__ == "__main__"):
            from oligo import Primer, PrimerPair
        else:
            from .oligo import Primer, PrimerPair

        import primer3

        number_records = 20

        mv_conc = 50.0  # in mM
        dv_conc = 0.0  # in mM
        dntp_conc = 0.6  # in mM
        dna_conc = 250.0  # in nM

        temperature = 25

        seq_args = {'SEQUENCE_ID': 'TEST', 'SEQUENCE_TEMPLATE': seq}

        global_args = {
            # Parameters for design
            #'PRIMER_TASK': 'generic',
            #  generic
            #  check_primers
            #  pick_primer_list
            #  pick_sequencing_primers
            #  pick_cloning_primers
            #  pick_discriminative_primers
            'PRIMER_PICK_LEFT_PRIMER': 1,
            'PRIMER_PICK_INTERNAL_OLIGO': 0,
            'PRIMER_PICK_RIGHT_PRIMER': 1,
            'PRIMER_NUM_RETURN': number_records,  # in output records
            'PRIMER_THERMODYNAMIC_OLIGO_ALIGNMENT': 1,

            # Parameters for LEFT/RIGHT oligos
            'PRIMER_OPT_SIZE': 20,  # in nt
            'PRIMER_MIN_SIZE': primer_size[0],  # in nt
            'PRIMER_MAX_SIZE': primer_size[1],  # in nt
            'PRIMER_MAX_POLY_X': 4,  # in nt
            'PRIMER_MAX_NS_ACCEPTED': 0,  # in nt
            'PRIMER_MAX_END_GC': 3,  # in nt
            'PRIMER_GC_CLAMP': 1,  # in nt (1 or more trailing G or C nt)
            'PRIMER_PRODUCT_OPT_SIZE':
            0,  # in nt (0 means don't prefer any one size)
            #'PRIMER_PRODUCT_SIZE_RANGE': [(31,40),(41,50),(51,60),(61,70)], # in nt
            'PRIMER_PRODUCT_SIZE_RANGE': [amplicon_size],  # in nt
            'PRIMER_OPT_TM': 60.0,  # in degrees C
            'PRIMER_MIN_TM': 55.0,  # in degrees C
            'PRIMER_MAX_TM': 65.0,  # in degrees C
            'PRIMER_PAIR_MAX_DIFF_TM': 2.0,  # in degrees C
            'PRIMER_OPT_GC_PERCENT': 50.0,  # in percent
            'PRIMER_MIN_GC': 40.0,  # in percent
            'PRIMER_MAX_GC': 60.0,  # in percent
            'PRIMER_SALT_MONOVALENT': mv_conc,  # in mM
            'PRIMER_SALT_DIVALENT': dv_conc,  # in mM
            'PRIMER_DNA_CONC':
            dna_conc,  # in nM (Not the concentration of oligos in the reaction mix but of those annealing to template.)
            'PRIMER_DNTP_CONC': dntp_conc,  # in mM
            'PRIMER_MAX_SELF_ANY': 8,  # alignment score
            'PRIMER_MAX_SELF_END': 3,  # alignment score
            'PRIMER_PAIR_MAX_COMPL_ANY': 8,  # alignment score
            'PRIMER_PAIR_MAX_COMPL_END': 3,  # alignment score
            'PRIMER_MAX_SELF_ANY_TH': 45.0,  # degrees C
            'PRIMER_MAX_SELF_END_TH': 35.0,  # degrees C
            'PRIMER_PAIR_MAX_COMPL_ANY_TH': 45.0,  # degrees C
            'PRIMER_PAIR_MAX_COMPL_END_TH': 35.0,  # degrees C
            'PRIMER_MAX_HAIRPIN_TH': 35.0,  # degrees C

            # parameters for INTERNAL oligos
            'PRIMER_INTERNAL_OPT_SIZE': 20,  # in nt
            'PRIMER_INTERNAL_MIN_SIZE': primer_size[0],  # in nt
            'PRIMER_INTERNAL_MAX_SIZE': primer_size[1],  # in nt
            'PRIMER_INTERNAL_MAX_POLY_X': 4,  # in nt
            'PRIMER_INTERNAL_MAX_NS_ACCEPTED': 0,  # in nt
            'PRIMER_INTERNAL_OPT_TM': 60.0,  # in degrees C
            'PRIMER_INTERNAL_MIN_TM': 55.0,  # in degrees C
            'PRIMER_INTERNAL_MAX_TM': 65.0,  # in degrees C
            'PRIMER_INTERNAL_OPT_GC_PERCENT': 50.0,  # in percent
            'PRIMER_INTERNAL_MIN_GC': 40.0,  # in percent
            'PRIMER_INTERNAL_MAX_GC': 60.0,  # in percent
            'PRIMER_INTERNAL_SALT_MONOVALENT': mv_conc,  # in mM
            'PRIMER_INTERNAL_SALT_DIVALENT ': dv_conc,  # in mM
            'PRIMER_INTERNAL_DNTP_CONC': dntp_conc,  # in mM
            'PRIMER_INTERNAL_DNA_CONC': dna_conc,  # in nM
            'PRIMER_INTERNAL_MAX_SELF_ANY': 8,  # alignment score
            'PRIMER_INTERNAL_MAX_SELF_END': 3,  # alignment score
            'PRIMER_INTERNAL_MAX_SELF_ANY_TH': 45.0,  # degrees C
            'PRIMER_INTERNAL_MAX_SELF_END_TH': 35.0,  # degrees C
            'PRIMER_INTERNAL_MAX_HAIRPIN_TH': 24.0,  # degrees C
        }
        primers = primer3.bindings.designPrimers(seq_args, global_args)
        records = []
        found_records = primers['PRIMER_PAIR_NUM_RETURNED']
        for i in range(min(found_records, number_records)):
            n = str(i)
            headers = [
                '^PRIMER_PAIR_' + n + '_', '^PRIMER_LEFT_' + n + '_',
                '^PRIMER_RIGHT_' + n + '_'
            ]
            rr = '|'.join(headers)
            records.append({})
            for p in primers:
                m = regex.search(rr, p)
                if m:
                    records[-1][p] = primers[p]
            #print(p, primers[p])

        outputs = []
        for i, r in enumerate(records):
            n = str(i)

            prefix = 'PRIMER_LEFT_' + n + '_'
            p_seq = r[prefix + 'SEQUENCE']
            left_seq = p_seq
            p_pos = seq.find(p_seq)  # 0-based indexing
            p_hairpin = primer3.calcHairpin(p_seq,
                                            mv_conc=mv_conc,
                                            dv_conc=dv_conc,
                                            dntp_conc=dntp_conc,
                                            dna_conc=dna_conc,
                                            temp_c=temperature)
            p_homodimer = primer3.calcHomodimer(p_seq,
                                                mv_conc=mv_conc,
                                                dv_conc=dv_conc,
                                                dntp_conc=dntp_conc,
                                                dna_conc=dna_conc,
                                                temp_c=temperature)
            p_rc = primer3.calcHeterodimer(p_seq,
                                           rc(p_seq),
                                           mv_conc=mv_conc,
                                           dv_conc=dv_conc,
                                           dntp_conc=dntp_conc,
                                           dna_conc=dna_conc,
                                           temp_c=temperature)
            p_gc = (p_seq.count('C') + p_seq.count('G')) / len(p_seq)
            o_left = Primer(p_seq, p_pos, '+', p_hairpin, p_homodimer, p_rc,
                            p_gc)

            prefix = 'PRIMER_RIGHT_' + n + '_'
            p_seq = r[prefix + 'SEQUENCE']
            right_seq = p_seq
            p_pos = seq.find(rc(p_seq))  # 0-based indexing
            p_hairpin = primer3.calcHairpin(p_seq,
                                            mv_conc=mv_conc,
                                            dv_conc=dv_conc,
                                            dntp_conc=dntp_conc,
                                            dna_conc=dna_conc,
                                            temp_c=temperature)
            p_homodimer = primer3.calcHomodimer(p_seq,
                                                mv_conc=mv_conc,
                                                dv_conc=dv_conc,
                                                dntp_conc=dntp_conc,
                                                dna_conc=dna_conc,
                                                temp_c=temperature)
            p_rc = primer3.calcHeterodimer(p_seq,
                                           rc(p_seq),
                                           mv_conc=mv_conc,
                                           dv_conc=dv_conc,
                                           dntp_conc=dntp_conc,
                                           dna_conc=dna_conc,
                                           temp_c=temperature)
            p_gc = (p_seq.count('C') + p_seq.count('G')) / len(p_seq)
            o_right = Primer(p_seq, p_pos, '-', p_hairpin, p_homodimer, p_rc,
                             p_gc)

            p_heterodimer = primer3.calcHeterodimer(left_seq,
                                                    right_seq,
                                                    mv_conc=mv_conc,
                                                    dv_conc=dv_conc,
                                                    dntp_conc=dntp_conc,
                                                    dna_conc=dna_conc,
                                                    temp_c=temperature)
            o_het = PrimerPair(o_left, o_right, p_heterodimer)

            # ThermoResult object:
            #  dg               deltaG (Gibbs free energy) of the structure (cal/mol)
            #  dh               deltaH (entropy) of the structure (cal/mol)
            #  ds               deltaS (enthalpy) of the structure (cal/K*mol)
            #  structure_found  Whether or not a structure (hairpin, dimer, etc) was found as a result of the calculation.
            #  tm               Melting temperature of the structure in deg. C

            #print(i, left_seq, right_seq, min([left_hairpin.dg, right_hairpin.dg, left_homodimer.dg, right_homodimer.dg, heterodimer.dg]))

            outputs.append(o_het)

        return outputs