def calc_characteristics(infile): print("Using sequence file", infile, "to calculate Tm and structure potential") seqs = list(SeqIO.parse(infile, "fasta")) chars = dict() for myseq in seqs: # Forward forward = str(myseq.seq) hairpinF = calcHairpin(forward) homoF = calcHomodimer(forward) #Reverse complement reverse = str(myseq.seq.reverse_complement()) hairpinR = calcHairpin(reverse) homoR = calcHomodimer(reverse) # print(hairpinF,'\n', homoF,'\n', hairpinR,'\n', homoR, '\n', calcTm(forward)) # Values to save tm = calcTm(forward) hairpinF = hairpinF.tm if hairpinF.structure_found else "NA" hairpinR = hairpinR.tm if hairpinR.structure_found else "NA" homoF = homoF.tm if homoF.structure_found else "NA" homoR = homoR.tm if homoR.structure_found else "NA" output = [ str(x) for x in [forward, tm, hairpinF, homoF, reverse, hairpinR, homoR] ] chars[myseq.id] = "\t" + "\t".join(output) # print(chars[myseq.id]) return chars
def _find_self_binding_ranges(self) -> List[Tuple[int, int, float]]: """ Find segments of the gene which can form a hairpin or homodimer for given Tm. :return: A list of triples (from_offset, to_offset, Tm), each representing a start/end and mel;ting temperature for a hairpin or homodimer region. """ hairpin_lengths = [5, 10] # Lengths of DNA segments evaluated for possible hairpins homodimer_lengths = [25, 50] homodimer_offset_step = 10 # step, in bp, for the start for potential homodimers tested monovalent_conc = self.config.temperature_config.k divalent_conc = self.config.temperature_config.mg dntp_conc = self.config.temperature_config.dntp hairpin_segments = [] for length in hairpin_lengths: for start in range(0, self.gene_length - length): end = start + length - 1 thermo_result = calcHairpin(self.gene[start:end], monovalent_conc, divalent_conc, dntp_conc) if self._is_new_hairpin_or_homodimer(start, end, thermo_result, hairpin_segments): hairpin_segments.append((start, end, thermo_result.tm)) homodimer_segments = [] for length in homodimer_lengths: for start in range(0, self.gene_length - length, homodimer_offset_step): end = start + length - 1 thermo_result = calcHomodimer(self.gene[start:end], monovalent_conc, divalent_conc, dntp_conc) if thermo_result.structure_found: if self._is_new_hairpin_or_homodimer(start, end, thermo_result, hairpin_segments): homodimer_segments.append((start, end, thermo_result.tm)) return hairpin_segments + homodimer_segments
def primer3_check_hairpin(primer): thermoResult = primer3.calcHairpin(primer) dg = thermoResult.dg / 1000 if (thermoResult.structure_found == True and dg < -10): return False else: return True
def is_hairpin(sequence, mv, dv, dntps, dna, temp): return primer3.calcHairpin(sequence, mv_conc=mv, dv_conc=dv, dntp_conc=dntps, dna_conc=dna, temp_c=temp).structure_found
def primer_hairpin_check_by_primer3_inner(primers_ID, primers, Tm): primer_hairpin_satisfied_ID = [] primer_hairpin_satisfied = [] primer_hairpin_not_satisfied_ID = [] primer_hairpin_not_satisfied = [] for ID, primer in zip(primers_ID, primers): hairpin = primer3.calcHairpin(str(primer), mv_conc=50.0, dv_conc=3, dna_conc=200, temp_c=25) # 发夹结构检查 if float(hairpin.tm) <= float(Tm): primer_hairpin_satisfied_ID.append(str(ID)) primer_hairpin_satisfied.append(str(primer)) else: primer_hairpin_not_satisfied_ID.append( str(ID) + "_hairpin@" + str(round(hairpin.tm, 2))) primer_hairpin_not_satisfied.append(str(primer)) primer_hairpin_check_by_primer3_inner_result = [ primer_hairpin_satisfied_ID, primer_hairpin_satisfied, primer_hairpin_not_satisfied_ID, primer_hairpin_not_satisfied ] return primer_hairpin_check_by_primer3_inner_result
def checkHairpinEnd(primer): primerEnd3_rc=revComplement(primer[-4:]) primerHairpin=round(primer3.calcHairpin(primer, mv_conc=args.mvConc, dv_conc=args.dvConc).dg/1000,2) if primerEnd3_rc in primer[1:-3-4] and primerHairpin<-1: return(str(primerHairpin)+'*') return(primerHairpin)
def Hairpin(Sequence): TemResult = primer3.calcHairpin(Sequence) ## print(TemResult.dg/1000) if TemResult.structure_found == 1 and abs( TemResult.dg / 1000) > HarpinDeltaG: return 0 else: return 1
def pcalcHairpin(seq): res = primer3.calcHairpin(seq) dg = "{0:.2f}".format(res.dg) tm = "{0:.2f}".format(res.tm) if res.structure_found == True and float(dg) < -1: return "{0}{1}".format("Self_Hairpin:", "+:tm:" + tm + ";deltaG:" + dg) else: return ""
def hairpin_Tm(primer_sequence, mv_cation=0, primer_conc=0): Tm_hairpin = (primer3.calcHairpin(primer_sequence, mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=37, max_loop=30)).tm return ("{0:.2f}".format(round(Tm_hairpin, 2)))
def find_structures(cls, folder, seq1, seq2=None, sodium=0.05, magnesium=0.0, temperature=25, concentration=0.00000025, **kwargs): """ Should return the list of 'Structure' objects with delta-G, deltaH, deltaS, and Tm values. Accepts 1 or 2 input sequences. Automatically runs either: * Hairpin (1 input sequence: A=seq1, UNAFold run on A) * Homodimer (2 identical input sequences: A=seq1=seq2, UNAFold run on A & A) * Heterodimer (2 input sequences: A=seq1 B=seq2, UNAFold run on A & B) """ import primer3 mv_conc = sodium * 1000 # 50.0 # in mM dv_conc = magnesium * 1000 # 0.0 # in mM dntp_conc = 0.6 # in mM dna_conc = concentration * 1000 * 1000 * 1000 # 250.0 # in nM #temperature = 25 # keep as-is if (seq1 == seq2): # Homodimer calculation t = primer3.calcHomodimer(seq1, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) elif (seq2 == None): # Hairpin calculation t = primer3.calcHairpin(seq1, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) else: # Heterodimer calculation, Tm calculation [seq1, rc(seq1)] t = primer3.calcHeterodimer(seq1, seq2, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) if t.structure_found: s = Structure(seq1, seq2, t.dg / 1000, t.dh / 1000, t.ds, t.tm, sodium, magnesium, temperature, concentration) else: s = Structure(seq1, seq2, math.inf, math.inf, math.inf, math.inf, sodium, magnesium, temperature, concentration) return [s]
def _hairpin_check(self, seq_tuple): package, oligo, seq, mv, dv, dntps, annealing_oligo, annealing_temp = seq_tuple try: tr = primer3.calcHairpin(seq, mv_conc = mv, dv_conc = dv, dntp_conc = dntps, dna_conc = annealing_oligo, temp_c = annealing_temp) deltaG = tr.dg Tm = tr.tm except: deltaG = "NaN" Tm = "NaN" df = pd.DataFrame(columns=['Package', 'Oligo', 'dG', 'Tm']) df.loc[len(df)] = [str(package), str(oligo), str(deltaG), str(Tm)] return df
def __init__(self, oligomer): self.oligomer = oligomer thermo = calcHairpin(self.oligomer, output_structure=True) self.tm = thermo.tm self.dg = thermo.dg self.dh = thermo.dh self.ds = thermo.ds # Structure and structure lines are given in the primer3 format self.structure = thermo.ascii_structure self.structure_lines = thermo.ascii_structure_lines
def __init__(self, direction, name, seq): self.direction = direction self.name = name self.seq = seq self.tm = calcTm(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6) self.homodimer = calcHomodimer(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm self.hairpin = calcHairpin(self.seq, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm self.gc = 100.0 * (seq.count('G') + seq.count('C')) / len(seq)
def calcHairpin(seq, tm_Threshold=47, mv_conc=50.0, dv_conc=1.5, dntp_conc=0.25, dna_conc=50.0, temp_c=37, max_loop=30): thermoresult = primer3.calcHairpin(seq, mv_conc, dv_conc, dntp_conc, dna_conc, temp_c, max_loop) if thermoresult.tm >= tm_Threshold: return thermoresult.tm, thermoresult.dg / 1000 else: return -1, -1
def hairpin(self, primer: str) -> float: """ Cached hair pin computation with Primer3 library. :param primer: [str] primer sequence :return: melting temperature """ if len(primer) > 0: if self.cached: if primer in self.cache_hairpin: return self.cache_hairpin[primer] temp = calcHairpin(primer, self.mv, self.dv, self.dntp).tm if self.cached: self.cache_hairpin[primer] = temp return temp else: return 0
def structure_filter(df, hairpin_min, dimer_min, Na_conc, filter=True): ''' Use primer3 to calculate energy of hairpin structure. https://libnano.github.io/primer3-py/quickstart.html#thermodynamic-analysis ''' df['hairpin_dG'] = df['sequence'].apply( lambda x: primer3.calcHairpin(x, mv_conc=Na_conc).dg / 1000) df['homodimer_dG'] = df['sequence'].apply( lambda x: primer3.calcHomodimer(x, mv_conc=Na_conc).dg / 1000) df['passed_structure'] = (df['hairpin_dG'] >= hairpin_min) & (df['homodimer_dG'] >= dimer_min) if filter == True: df = df[df['passed_structure']].copy() return df
def expanded_primer_stats(degen, oligo_DNA=50, dNTPs=0.2, salt_monovalent=50, salt_divalent=1.5): """ Calculating per-non-degen-primer stats (Tm) and averaging """ logging.info('Calculating stats on primer sets...') if degen is None: return None for num in degen.keys(): for cat in degen[num].keys(): for degen_seq in degen[num][cat].keys(): stats = {'Tm': [], 'GC': [], 'hairpin': [], 'homodimer': []} # stats on each expanded primer for seq in list(degen[num][cat][degen_seq]['expanded']): # degeneracies # melting temp stats['Tm'].append( primer3.calcTm(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent)) # GC stats['GC'].append(calc_GC(seq)) # hairpin stats['hairpin'].append( primer3.calcHairpin(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent).tm) # homodimer stats['homodimer'].append( primer3.calcHomodimer(seq, dna_conc=oligo_DNA, dntp_conc=dNTPs, mv_conc=salt_monovalent, dv_conc=salt_divalent).tm) # summarizing stats (average & std) for k, v in stats.items(): degen[num][cat][degen_seq][k] = [avg(v), sd(v)] return degen
def make_primers(n=200000): """Create a bunch of random sequences between 13 and 20 bp.""" bp = "ATGC" seq_to_tm = [] # map from sequence to estimated tm for _ in range(n): primer_len = random.randint(10, 30) primer_seq = "".join([bp[random.randint(0, 3)] for _ in range(primer_len)]) primer_tm = primer3.calcTm(primer_seq) primer_hairpin = primer3.calcHairpin(primer_seq).dg seq_to_tm.append((primer_seq, primer_tm, primer_hairpin)) with open("primers.csv", "w") as output: output.write("seq,tm,hairpin\n") for seq, tm, hairpin in seq_to_tm: output.write(f"{seq},{tm},{hairpin}\n")
def EvaluatePrimerForPCR(Primer: str): """ Legacy method of calculating primer PCR score; """ Score = 1.0 Score -= PenaltyGCContent(Primer) Score -= PenaltyGCExtremities(Primer) Score -= PenaltyMeltingTemperature(Primer) # -- check for 2D primer formation; Hairpin = primer3.calcHairpin(Primer) if Hairpin.structure_found: Score -= 1.0 return Score
def write_probe_map_counts(self, fn): """Write number of sequences mapped by each probe to a file. Args: fn: path to file to write to """ with open(fn, 'w') as f: header = [ "Probe identifier", "Probe sequence", "Number sequences mapped to", "Melt temp", "Homodimer Tm", "Hairpin Tm" ] f.write('\t'.join(header) + '\n') # Create an row for every probe for p, count in self.probe_map_counts.items(): row = [ p.identifier(), p.seq_str, count, # primer3 ionic concentrations from Primal Scheme round( primer3.calcTm(p.seq_str, mv_conc=50, dv_conc=1.5, dntp_conc=0.6), 2), round( primer3.calcHomodimer(p.seq_str, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm, 2), round( primer3.calcHairpin(p.seq_str, mv_conc=50, dv_conc=1.5, dntp_conc=0.6).tm, 2) ] line = '\t'.join([str(x) for x in row]) f.write(line + '\n')
def analyze_homostructures( primers, structures, homodimer_threshold=-5000, hairpin_threshold=-1000, mv_conc=None, dv_conc=None, ): for primer in primers: checked_seq = primer.seq if len(primer) > 60: logger.warning( f"Primer {primer.name} is too long ({len(primer)}bp > 60 bp) for analysis of " f"homostructures. ") logger.info( f"Trimming primer to 60 bp by removing bases from 5'-end") checked_seq = primer.seq[-60:] keyword_args = { "mv_conc": mv_conc, "dv_conc": dv_conc, "dna_conc": primer.conc } hairpin = primer3.calcHairpin(checked_seq, **keyword_args) if hairpin.structure_found and hairpin.dg < hairpin_threshold: structures.add_struture(primer.name, "Hairpin", round(hairpin.dg, 1), round(hairpin.tm, 1)) homodimer = primer3.calcHomodimer(checked_seq, **keyword_args) if homodimer.structure_found and homodimer.dg < homodimer_threshold: structures.add_struture( primer.name, "Self-dimer", round(homodimer.dg, 1), round(homodimer.tm, 1), )
def thermo(self): settings = self._thermo_settings if self._thermo is None: if len(self.sequence) > 60: warning = "sequence length greater than 60. Thermo results are limited to 60bp." else: warning = "" self._thermo = { "hairpin": primer3.calcHairpin(self._safe_sequence, **settings), "homodimer": primer3.calcHomodimer(self._safe_sequence, **settings), "annealing": primer3.calcHeterodimer(self.anneal, rc(self.anneal), **settings), "sequence": primer3.calcHeterodimer(self._safe_sequence, rc(self._safe_sequence), **settings), "warning": warning, } return self._thermo
def primer_generator(length, digestion_site, tests, end_CG=True): mly_primer_20 = list() i = 0 rc_digestion_site = str(Seq(digestion_site).reverse_complement()) bp = length - 5 - len(digestion_site) while i <= tests: i = i + 1 mly_primer = str(RandomDNA_without_site( bp, digestion_site)) + digestion_site + str( RandomDNA_without_site(5, digestion_site)) s = primer3.calcHairpin(mly_primer) if end_CG: if 53<primer3.calcTm(mly_primer)<55 and mly_primer.count(digestion_site) + mly_primer.count(rc_digestion_site) == 1\ and not s.structure_found and 50 <= gc_counter(mly_primer) <= 60 and end_3(mly_primer) \ and runs_counter(mly_primer) and repeat_counter(mly_primer): mly_primer_20.append(mly_primer) else: if 53<primer3.calcTm(mly_primer)<55 and mly_primer.count(digestion_site) + mly_primer.count(rc_digestion_site) == 1\ and not s.structure_found and 50 <= gc_counter(mly_primer) <= 60 \ and runs_counter(mly_primer) and repeat_counter(mly_primer): mly_primer_20.append(mly_primer) return (list(tuple(mly_primer_20)))
def judge_hairpin(oligo_input): oligo = oligo_input['oligo'] min_Tm = min(oligo['Tm'], oligo_input['min_Tm']) hairpin = primer3.calcHairpin(oligo['seq'], output_structure=True) if hairpin.tm > min_Tm: return ([oligo, oligo, round(hairpin.tm, 2), hairpin.ascii_structure])
def main(): if len(sys.argv) != 3: mes = '*** Usage: python {} params.config file.uniq2ref.primer' print( mes.format(os.path.basename(sys.argv[0])), file=sys.stderr, ) sys.exit(1) configf = sys.argv[1] primerfile = sys.argv[2] d = yaml.load(open(configf)) pass_cnt = 0 cnt = 0 for rec in screed.open(primerfile): cnt += 1 _name = rec.name name, _contig = _name.split(None, 1) contig_len = _contig.split('__', 1)[1] seq = rec.sequence # primer3 functions only accept byte-strings seq = seq.encode('utf-8') #seq = bytes(seq, 'utf-8') seq_rc = RC(seq) a_ambi = numpy.array(has_ambiguous(seq), has_ambiguous(seq_rc)) if sum(a_ambi) == 2: continue # check tm tm = primer3.calcTm(seq) tm_rc = primer3.calcTm(seq_rc) a_tm = numpy.array( (tm < d['TM_LOWER'] or tm > d['TM_UPPER']), (tm_rc < d['TM_LOWER'] or tm_rc > d['TM_UPPER']), ) if sum(a_tm) == 2: continue # check gc gc = check_gc(seq) gc_rc = check_gc(seq_rc) a_gc = numpy.array( (gc < d['GC_LOWER'] or gc > d['GC_UPPER']), (gc_rc < d['GC_LOWER'] or gc_rc > d['GC_UPPER']), ) if sum(a_gc) == 2: continue if d['GC_CLAMP']: c = end_gc_count(seq) c_rc = end_gc_count(seq_rc) a_endgc = numpy.array( c > 3 or c < 1, c_rc > 3 or c_rc < 1, ) if sum(a_endgc) == 2: continue if d['SS']: hp = primer3.calcHairpin(seq) ho = primer3.calcHomodimer(seq) hp_rc = primer3.calcHairpin(seq_rc) ho_rc = primer3.calcHomodimer(seq_rc) orig_pass = ((hp.dg < d['HP_DG_LIMIT'] or hp.dg > 0) & (ho.dg < d['DI_DG_LIMIT'] or ho.dg > 0)) rc_pass = ((hp_rc.dg < d['HP_DG_LIMIT'] or hp_rc.dg > 0) & (ho_rc.dg < d['DI_DG_LIMIT'] or ho_rc.dg > 0)) if ho.dg < d['DI_DG_LIMIT'] or ho.dg > 0: continue pass_cnt += 1 mes = '>{} contiglen__{};tm__{};gc__{}\n{}' print(mes.format(name, contig_len, tm, gc, seq), file=sys.stdout) if cnt == 0: mes = '*** Empty file detected: {} (file.uniq2ref.primer), skipping..' print( mes.format(os.path.basename(primerfile)), file=sys.stderr, ) sys.exit(0)
def hairpin_Tm(primer_sequence, mv_cation=0,primer_conc=0): Tm_hairpin = (primer3.calcHairpin(primer_sequence,mv_conc=mv_cation, dv_conc=0, dntp_conc=0, dna_conc=primer_conc, temp_c=37, max_loop=30)).tm return ("{0:.2f}".format(round(Tm_hairpin,2)))
#### Reverse complement the primers and test for criterias for TSO compatibility by Primer3. Select only the primers with Tm > 50. Remove any primers with "CC" or "TTT". rc_pr_list = [] for i in full_list: if ("CC" not in rc(i) and "TTT" not in rc(i) and primer3.calcTm(rc(i)) > 50): l = OH + rc(i) rc_pr_list.append(l) #### Select the primers with least tendency to form heterodimers with TSO. dG > -3000 was chosen acccording to Fabio's DENV2 primer. dg_3000 = [] for i in rc_pr_list: result = primer3.calcHeterodimer(i, TSO) pin = primer3.calcHairpin(i) if result.dg > -3000: #print(i, result.tm,result.dg, primer3.calcTm(i[23:])) #Check for the formation of hairpins. #print(i, primer3.calcHairpin(i)) tttt = [ i, primer3.calcTm(i[23:]), result.tm, result.dg, pin.tm, pin.dg ] dg_3000.append(tttt) dg_3000 = pd.DataFrame(np.array(dg_3000), columns=[ "Primer", "Annealing Tm", "HeteroDimer Tm", "HeteroDimer dG", "Hairpin Tm", "Hairpin dG" ]) dg_3000.iloc[:, 1:] = dg_3000.iloc[:, 1:].astype(float).round(2) print(dg_3000)
def run_filter( input: str, output: str, repeats_threshold: int, gc_upper_threshold: float, gc_lower_threshold: float, gc_clamp: bool, gc_uniformity_threshold: float, hairpin_threshold: int, homodimer_threshold: int, dna_conc: float, mv_conc: float, dv_conc: float, ): summary = Counter() struct_configs = { "mv_conc": mv_conc, "dv_conc": dv_conc, "dna_conc": dna_conc, } with smart_open(input, mode="r") as infile, smart_open(output, mode="w") as outfile: for entry in tqdm(SeqIO.parse(infile, "fasta"), desc="Parsing primers"): summary["Primers reads"] += 1 primer = Primer(name=entry.name, seq=str(entry.seq)) if not (gc_lower_threshold <= primer.gc <= gc_upper_threshold): summary["Out of GC-range"] += 1 continue if primer.has_repeats(threshold=repeats_threshold): summary["Repeats"] += 1 continue if gc_clamp and not primer.has_gc_clamp(): summary["No GC-clamp"] += 1 continue if primer.has_uniform_gc(window=5, threshold=gc_uniformity_threshold): summary["Non-uniform GC rate"] += 1 continue hairpin = primer3.calcHairpin(primer.seq, **struct_configs) if hairpin.structure_found and hairpin.dg < hairpin_threshold: summary["Hairpin"] += 1 continue homodimer = primer3.calcHomodimer(primer.seq, **struct_configs) if homodimer.structure_found and homodimer.dg < homodimer_threshold: summary["Homodimer"] += 1 continue summary["Primers written"] += 1 SeqIO.write(entry, outfile, format="fasta") print_stats(summary)
long_seq_list.append(seq_record.id) idx = 1 short = 0 for seq_probe in probe_list: probe = (prefix_nt * poly_nt) + str(seq_probe) probe_length = len(probe) #Tm_ori = primer3.calcTm(probe) #Tm_ori2 = ("%.2f" % Tm_ori) #GC_percent_ori = GC(probe) #GC_per_2 = ("%.2f" % GC_percent_ori) #print(seq_record.id + "_" + str(idx) + "\t" + probe + "\t" + str(GC_per_2) + "\t" + str(Tm_ori2) + "\t", end = '') #if(probe_length <=40): # short = short+1 # else: # print("\n") if (probe_length <= 60): Tm = primer3.calcTm(probe) Tm2 = ("%.2f" % Tm) GC_percent = GC(probe) GC_per = ("%.2f" % GC_percent) Homodimer = primer3.calcHomodimer(probe) Hairpin = primer3.calcHairpin(probe) print(seq_record.id + "_" + str(idx) + "\t" + probe + "\t" + "\t" + str(GC_per) + "\t" + str(Tm2) + "\t" + str(Hairpin.structure_found) + "\t" + str(Homodimer.structure_found)) idx = idx + 1 output.close()
framelen = len(frame) postframe = seq[lenseq - args.f:].upper() preframe = seq[: args.f].upper() preframe_obj = Seq(preframe, generic_dna) preframerev = str(preframe_obj.reverse_complement()) frame_obj = Seq(frame, generic_dna) framerev = str(frame_obj.reverse_complement()) postframe_obj = Seq(postframe, generic_dna) postframerev = str(postframe_obj.reverse_complement()) for index in range(0, args.f - args.pl, 1): fcandidate = preframe[index:index+args.pl] fcand_len = str(len(fcandidate)) Tm = int(primer3.calcTm(fcandidate)) if Tm > args.tmin and Tm < args.tmax: Hairpin = primer3.calcHairpin(fcandidate) if Hairpin.structure_found is False: outfile.write(title + "\tPreframe forward\t" + fcandidate + "\t" + str(Tm) + "\t" + fcand_len + "\t" + str(index) + "\t" + str(framelen) + "\t" + str(orientation) + "\n") for index in range(0, framelen - args.pl, 3): fcandidate = frame[index:index+args.pl] fcand_len = str(len(fcandidate)) Tm = int(primer3.calcTm(fcandidate)) if Tm > args.tmin and Tm < args.tmax: Hairpin = primer3.calcHairpin(fcandidate) if Hairpin.structure_found is False: outfile.write(title + "\tIn frame forward\t" +
def scan_sequence(self, seq, primer_size=(18, 26), amplicon_size=(50, 60)): if (__name__ == "__main__"): from oligo import Primer, PrimerPair else: from .oligo import Primer, PrimerPair import primer3 number_records = 20 mv_conc = 50.0 # in mM dv_conc = 0.0 # in mM dntp_conc = 0.6 # in mM dna_conc = 250.0 # in nM temperature = 25 seq_args = {'SEQUENCE_ID': 'TEST', 'SEQUENCE_TEMPLATE': seq} global_args = { # Parameters for design #'PRIMER_TASK': 'generic', # generic # check_primers # pick_primer_list # pick_sequencing_primers # pick_cloning_primers # pick_discriminative_primers 'PRIMER_PICK_LEFT_PRIMER': 1, 'PRIMER_PICK_INTERNAL_OLIGO': 0, 'PRIMER_PICK_RIGHT_PRIMER': 1, 'PRIMER_NUM_RETURN': number_records, # in output records 'PRIMER_THERMODYNAMIC_OLIGO_ALIGNMENT': 1, # Parameters for LEFT/RIGHT oligos 'PRIMER_OPT_SIZE': 20, # in nt 'PRIMER_MIN_SIZE': primer_size[0], # in nt 'PRIMER_MAX_SIZE': primer_size[1], # in nt 'PRIMER_MAX_POLY_X': 4, # in nt 'PRIMER_MAX_NS_ACCEPTED': 0, # in nt 'PRIMER_MAX_END_GC': 3, # in nt 'PRIMER_GC_CLAMP': 1, # in nt (1 or more trailing G or C nt) 'PRIMER_PRODUCT_OPT_SIZE': 0, # in nt (0 means don't prefer any one size) #'PRIMER_PRODUCT_SIZE_RANGE': [(31,40),(41,50),(51,60),(61,70)], # in nt 'PRIMER_PRODUCT_SIZE_RANGE': [amplicon_size], # in nt 'PRIMER_OPT_TM': 60.0, # in degrees C 'PRIMER_MIN_TM': 55.0, # in degrees C 'PRIMER_MAX_TM': 65.0, # in degrees C 'PRIMER_PAIR_MAX_DIFF_TM': 2.0, # in degrees C 'PRIMER_OPT_GC_PERCENT': 50.0, # in percent 'PRIMER_MIN_GC': 40.0, # in percent 'PRIMER_MAX_GC': 60.0, # in percent 'PRIMER_SALT_MONOVALENT': mv_conc, # in mM 'PRIMER_SALT_DIVALENT': dv_conc, # in mM 'PRIMER_DNA_CONC': dna_conc, # in nM (Not the concentration of oligos in the reaction mix but of those annealing to template.) 'PRIMER_DNTP_CONC': dntp_conc, # in mM 'PRIMER_MAX_SELF_ANY': 8, # alignment score 'PRIMER_MAX_SELF_END': 3, # alignment score 'PRIMER_PAIR_MAX_COMPL_ANY': 8, # alignment score 'PRIMER_PAIR_MAX_COMPL_END': 3, # alignment score 'PRIMER_MAX_SELF_ANY_TH': 45.0, # degrees C 'PRIMER_MAX_SELF_END_TH': 35.0, # degrees C 'PRIMER_PAIR_MAX_COMPL_ANY_TH': 45.0, # degrees C 'PRIMER_PAIR_MAX_COMPL_END_TH': 35.0, # degrees C 'PRIMER_MAX_HAIRPIN_TH': 35.0, # degrees C # parameters for INTERNAL oligos 'PRIMER_INTERNAL_OPT_SIZE': 20, # in nt 'PRIMER_INTERNAL_MIN_SIZE': primer_size[0], # in nt 'PRIMER_INTERNAL_MAX_SIZE': primer_size[1], # in nt 'PRIMER_INTERNAL_MAX_POLY_X': 4, # in nt 'PRIMER_INTERNAL_MAX_NS_ACCEPTED': 0, # in nt 'PRIMER_INTERNAL_OPT_TM': 60.0, # in degrees C 'PRIMER_INTERNAL_MIN_TM': 55.0, # in degrees C 'PRIMER_INTERNAL_MAX_TM': 65.0, # in degrees C 'PRIMER_INTERNAL_OPT_GC_PERCENT': 50.0, # in percent 'PRIMER_INTERNAL_MIN_GC': 40.0, # in percent 'PRIMER_INTERNAL_MAX_GC': 60.0, # in percent 'PRIMER_INTERNAL_SALT_MONOVALENT': mv_conc, # in mM 'PRIMER_INTERNAL_SALT_DIVALENT ': dv_conc, # in mM 'PRIMER_INTERNAL_DNTP_CONC': dntp_conc, # in mM 'PRIMER_INTERNAL_DNA_CONC': dna_conc, # in nM 'PRIMER_INTERNAL_MAX_SELF_ANY': 8, # alignment score 'PRIMER_INTERNAL_MAX_SELF_END': 3, # alignment score 'PRIMER_INTERNAL_MAX_SELF_ANY_TH': 45.0, # degrees C 'PRIMER_INTERNAL_MAX_SELF_END_TH': 35.0, # degrees C 'PRIMER_INTERNAL_MAX_HAIRPIN_TH': 24.0, # degrees C } primers = primer3.bindings.designPrimers(seq_args, global_args) records = [] found_records = primers['PRIMER_PAIR_NUM_RETURNED'] for i in range(min(found_records, number_records)): n = str(i) headers = [ '^PRIMER_PAIR_' + n + '_', '^PRIMER_LEFT_' + n + '_', '^PRIMER_RIGHT_' + n + '_' ] rr = '|'.join(headers) records.append({}) for p in primers: m = regex.search(rr, p) if m: records[-1][p] = primers[p] #print(p, primers[p]) outputs = [] for i, r in enumerate(records): n = str(i) prefix = 'PRIMER_LEFT_' + n + '_' p_seq = r[prefix + 'SEQUENCE'] left_seq = p_seq p_pos = seq.find(p_seq) # 0-based indexing p_hairpin = primer3.calcHairpin(p_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_homodimer = primer3.calcHomodimer(p_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_rc = primer3.calcHeterodimer(p_seq, rc(p_seq), mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_gc = (p_seq.count('C') + p_seq.count('G')) / len(p_seq) o_left = Primer(p_seq, p_pos, '+', p_hairpin, p_homodimer, p_rc, p_gc) prefix = 'PRIMER_RIGHT_' + n + '_' p_seq = r[prefix + 'SEQUENCE'] right_seq = p_seq p_pos = seq.find(rc(p_seq)) # 0-based indexing p_hairpin = primer3.calcHairpin(p_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_homodimer = primer3.calcHomodimer(p_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_rc = primer3.calcHeterodimer(p_seq, rc(p_seq), mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) p_gc = (p_seq.count('C') + p_seq.count('G')) / len(p_seq) o_right = Primer(p_seq, p_pos, '-', p_hairpin, p_homodimer, p_rc, p_gc) p_heterodimer = primer3.calcHeterodimer(left_seq, right_seq, mv_conc=mv_conc, dv_conc=dv_conc, dntp_conc=dntp_conc, dna_conc=dna_conc, temp_c=temperature) o_het = PrimerPair(o_left, o_right, p_heterodimer) # ThermoResult object: # dg deltaG (Gibbs free energy) of the structure (cal/mol) # dh deltaH (entropy) of the structure (cal/mol) # ds deltaS (enthalpy) of the structure (cal/K*mol) # structure_found Whether or not a structure (hairpin, dimer, etc) was found as a result of the calculation. # tm Melting temperature of the structure in deg. C #print(i, left_seq, right_seq, min([left_hairpin.dg, right_hairpin.dg, left_homodimer.dg, right_homodimer.dg, heterodimer.dg])) outputs.append(o_het) return outputs