def primer_pair_generator(mly_primer_20): mly_primer_pair = list() mly_primer_20_a = list(mly_primer_20) for i in mly_primer_20: mly_primer_20_a.pop(0) for j in mly_primer_20_a: if primer3.calcHeterodimerTm(i, j) < 0: mly_primer_pair.append((i, j)) return mly_primer_pair
def thermo_tm(self): return { "heterodimer": primer3.calcHeterodimerTm(self.p1._safe_sequence, self.p2._safe_sequence), "left": self.p1.thermo_tm(), "right": self.p2.thermo_tm(), }
def top_com(primers_list): temp_list = list() primers_list_a = primers_list[:] for i in primers_list: primers_list_a.pop(0) for j in primers_list_a: temp_list.append((primer3.calcHeterodimerTm(i, j), i, j)) temp_list.sort(key=lambda x: x[0]) top = temp_list[0] return (top)
def createPrimers(self,db,bowtie='bowtie2', delete=True, tags={}, tmThreshold=50.0, endMatch=6, maxAln=20): # run bowtie (max 1000 alignments, allow for one gap/mismatch?) mapfile = self.file+'.sam' if not os.path.exists(mapfile): proc = subprocess.check_call( \ [bowtie, '-f', '--end-to-end', '-p 2', \ '-k '+str(maxAln), '-L 10', '-N 1', '-D 20', '-R 3', \ '-x', db, '-U', self.file, '>', mapfile ]) # Read fasta file (Create Primer) primers = {} with pysam.FastaFile(self.file) as fasta: for s in fasta.references: # parse target locus from fasta file try: primername, targetposition = s.split('|') reTargetposition = re.match(r'(\w+):(\d+)-(\d+):([+-])',targetposition) except: primername = s targetLocus = None else: # create stranded targetlocus reverse = True if reTargetposition.group(4)=='-' else False tm = primer3.calcTm(fasta.fetch(s)) # assume targetlocus is full match targetLocus = Locus(reTargetposition.group(1), int(reTargetposition.group(2)), int(reTargetposition.group(3))-int(reTargetposition.group(2)), reverse, tm) # create primer (with target locus) primertag = tags[primername] if primername in tags.keys() else None primers[primername] = Primer(primername,fasta.fetch(s),targetLocus,tag=primertag) # read SAM OUTPUT and filter alignments mappings = pysam.Samfile(mapfile,'r') alnCount = Counter() # count alignments to kill locations of non-specific primers (count == -k) for aln in mappings: primername = aln.qname.split('|')[0] if aln.is_unmapped: continue else: alnCount[primername] += 1 ## get reference sequence qry = aln.query_sequence.upper() ref = aln.get_reference_sequence().upper() refrc = ref.translate(revcmp)[::-1] aln_tm = primer3.calcHeterodimerTm(qry,refrc) # TmThreshold and mimatches in 3'end check if aln_tm > tmThreshold: if len(qry)>endMatch and len(ref)>endMatch: if len([ x for x in zip(qry[-endMatch:], ref[-endMatch:]) if x[0]!=x[1] ]) == 0: primers[primername].addTarget(mappings.getrname(aln.reference_id), aln.pos, aln.is_reverse, aln_tm) # remove primer locations for those that have hit maximum for k, v in primers.items(): if len(v.loci) >= maxAln: v.loci = [] # cleanup if delete: os.unlink(self.file+'.sam') # delete mapping FILE return primers.values()
def compute_heterodimer_err(this_primer_pair: SSMPrimerPair, config: SSMConfig, flanks: SSMFlankingSequences): """ Computes error of heterodimer temperature for forward and reverse primers. The error is computed as sum of all hetero-dimers combinations. e.g. we have primer A as this primer pair. Other_primer_pairs contains primers A,B,C,D,E. We compute error for A-B, A-C, A-D, A-E. We skip A-A combination because it does not make sens to compute heterodimer error with its-self. :return: square root ( sum of (weighted heterodimers error pairs) ) """ if flanks.reverse_flank is None or flanks.forward_flank is None: return 0 return primer3.calcHeterodimerTm(this_primer_pair.fw_primer.normal_order_sequence, flanks.reverse_flank, config.temperature_config.k, config.temperature_config.mg, config.temperature_config.dntp) + \ primer3.calcHeterodimerTm(this_primer_pair.rw_primer.normal_order_sequence, flanks.forward_flank, config.temperature_config.k, config.temperature_config.mg, config.temperature_config.dntp)
def _fwdStrand(): fwd_hamming_distances = seqstr.rollingHammingDistance(primer_str, genome_rc_str) fwd_hd_thresh = np.percentile(fwd_hamming_distances, hamming_percentile) fwd_primer_footprint = (-(primer_idx+primer_length), (-primer_idx)) fwd_hamming_distances[fwd_primer_footprint[0]: \ fwd_primer_footprint[1]] = primer_length fwd_hotspots, = np.where((fwd_hamming_distances < fwd_hd_thresh)) highest_tm_idx = None highest_tm = -100 for idx in fwd_hotspots: tm = primer3.calcHeterodimerTm( primer_str, genome_str[-(idx+primer_length):-idx], **params['thermo_params']) if tm > highest_tm: highest_tm_idx = idx highest_tm = tm strand_results.put((highest_tm, highest_tm_idx, 1))
def _revStrand(): rev_hamming_distances = seqstr.rollingHammingDistance(primer_str, genome_str) rev_hd_thresh = np.percentile(rev_hamming_distances, hamming_percentile) rev_primer_footprint = ((primer_idx), (primer_idx+primer_length)) rev_hamming_distances[rev_primer_footprint[0]: \ rev_primer_footprint[1]] = primer_length rev_hotspots, = np.where((rev_hamming_distances < rev_hd_thresh)) highest_tm_idx = None highest_tm = -100 for idx in rev_hotspots: tm = primer3.calcHeterodimerTm( primer_str, genome_rc_str[idx:idx+primer_length], **params['thermo_params']) if tm > highest_tm: highest_tm_idx = idx highest_tm = tm strand_results.put((highest_tm, highest_tm_idx, 0))
def createPrimers(self, db, bowtie='bowtie2', delete=True, tags={}, tmThreshold=50.0, endMatch=6, maxAln=20): # run bowtie (max 1000 alignments, allow for one gap/mismatch?) mapfile = self.file + '.sam' if not os.path.exists(mapfile): proc = subprocess.check_call( \ [bowtie, '-f', '--end-to-end', '-p 2', \ '-k '+str(maxAln), '-L 10', '-N 1', '-D 20', '-R 3', \ '-x', db, '-U', self.file, '>', mapfile ]) # Read fasta file (Create Primer) primers = {} with pysam.FastaFile(self.file) as fasta: for s in fasta.references: # parse target locus from fasta file try: primername, targetposition = s.split('|') reTargetposition = re.match(r'(\w+):(\d+)-(\d+):([+-])', targetposition) except: primername = s targetLocus = None else: # create stranded targetlocus reverse = True if reTargetposition.group( 4) == '-' else False tm = primer3.calcTm( fasta.fetch(s)) # assume targetlocus is full match targetLocus = Locus( reTargetposition.group(1), int(reTargetposition.group(2)), int(reTargetposition.group(3)) - int(reTargetposition.group(2)), reverse, tm) # create primer (with target locus) primertag = tags[primername] if primername in tags.keys( ) else None primers[primername] = Primer(primername, fasta.fetch(s), targetLocus, tag=primertag) # read SAM OUTPUT and filter alignments mappings = pysam.Samfile(mapfile, 'r') alnCount = Counter( ) # count alignments to kill locations of non-specific primers (count == -k) for aln in mappings: primername = aln.qname.split('|')[0] if aln.is_unmapped: continue else: alnCount[primername] += 1 ## get reference sequence qry = aln.query_sequence.upper() ref = aln.get_reference_sequence().upper() refrc = ref.translate(revcmp)[::-1] aln_tm = primer3.calcHeterodimerTm(qry, refrc) # TmThreshold and mimatches in 3'end check if aln_tm > tmThreshold: if len(qry) > endMatch and len(ref) > endMatch: if len([ x for x in zip(qry[-endMatch:], ref[-endMatch:]) if x[0] != x[1] ]) == 0: primers[primername].addTarget( mappings.getrname(aln.reference_id), aln.pos, aln.is_reverse, aln_tm) # remove primer locations for those that have hit maximum for k, v in primers.items(): if len(v.loci) >= maxAln: v.loci = [] # cleanup if delete: os.unlink(self.file + '.sam') # delete mapping FILE return primers.values()
def screenPadlockArms(p_l_seq: str, p_r_seq: str, loop_seq: str, p_params: dict, do_print: bool = False) -> Tuple[bool, dict]: is_good = True tp = p_params['thermo_params'] report = { 'arm_gc_min_l': 0, 'arm_gc_max_l': 0, 'arm_gc_min_r': 0, 'arm_gc_max_r': 0, 'l_clamp': True, 'tm_arm_min_l': 0, 'tm_arm_min_r': 0, 'ex_seq': [], 'tm_hairpin_l': 0, 'tm_hairpin_r': 0, 'tm_hetero_0': 0, 'tm_hetero_1': 0, 'tm_hetero_2': 0 } "1. GC content checks" p_l_gc_content = gcContent(p_l_seq) p_r_gc_content = gcContent(p_r_seq) if p_l_gc_content < p_params['arm_gc_min']: if do_print: print("\tgc content L min fail %0.3f" % p_l_gc_content) is_good = False report['arm_gc_min_l'] = p_l_gc_content if p_r_gc_content < p_params['arm_gc_min']: if do_print: print("\tgc content R min fail %0.3f" % p_r_gc_content) is_good = False report['arm_gc_min_r'] = p_r_gc_content if p_l_gc_content > p_params['arm_gc_max']: if do_print: print("\tgc content L max fail %0.3f" % p_l_gc_content) is_good = False report['arm_gc_max_l'] = p_l_gc_content if p_r_gc_content > p_params['arm_gc_max']: if do_print: print("\tgc content R max fail %0.3f" % p_r_gc_content) is_good = False report['arm_gc_max_r'] = p_r_gc_content "2. GC clamp checks" l_3p_check = padlockLeftArmGCClamp(p_l_seq) if l_3p_check > 3: if do_print: print("\tl clamp fail") is_good = False report['l_clamp'] = False "3. Arm Tm check" p_arm_tm_l = calcTm(p_l_seq, **tp) p_arm_tm_r = calcTm(p_r_seq, **tp) if p_arm_tm_l < p_params['arm_tm_min']: if do_print: print("\tArm L fail %2.3f" % p_arm_tm_l) is_good = False report['tm_arm_min_l'] = p_arm_tm_l if p_arm_tm_r < p_params['arm_tm_min']: if do_print: print("\tArm R fail %2.3f" % p_arm_tm_r) is_good = False report['tm_arm_min_r'] = p_arm_tm_r p_seq = (p_r_seq + loop_seq + p_l_seq) "4. Check for excluded seqs" ex_fail = False for ex_seq in p_params['exclude_seqs']: if ex_seq in p_seq: ex_fail = True report['ex_seq'].append(ex_seq) break if ex_fail: is_good = False "5. Secondary structure / primer dimer checks" p_het_tm_0 = calcHeterodimerTm(p_l_seq, p_r_seq, **tp) p_het_tm_1 = calcHeterodimerTm(p_l_seq, loop_seq, **tp) p_het_tm_2 = calcHeterodimerTm(p_r_seq, loop_seq, **tp) if p_het_tm_0 > p_params['structure_tm_max']: if do_print: print("\thetero 0 fail") is_good = False report['tm_hetero_0'] = p_het_tm_0 if p_het_tm_1 > p_params['structure_tm_max']: if do_print: print("\thetero 1 fail") is_good = False report['tm_hetero_1'] = p_het_tm_1 if p_het_tm_2 > p_params['structure_tm_max']: if do_print: print("\thetero 2 fail") is_good = False report['tm_hetero_2'] = p_het_tm_2 return is_good, report
def screenPadlockArms( p_l_seq: str, p_r_seq: str, loop_seq: str, p_params: dict, do_print: bool = False) -> Tuple[bool, dict]: is_good = True tp = p_params['thermo_params'] report = { 'arm_gc_min_l': 0, 'arm_gc_max_l': 0, 'arm_gc_min_r': 0, 'arm_gc_max_r': 0, 'l_clamp': True, 'tm_arm_min_l': 0, 'tm_arm_min_r': 0, 'ex_seq': [], 'tm_hairpin_l': 0, 'tm_hairpin_r': 0, 'tm_hetero_0': 0, 'tm_hetero_1': 0, 'tm_hetero_2': 0 } "1. GC content checks" p_l_gc_content = gcContent(p_l_seq) p_r_gc_content = gcContent(p_r_seq) if p_l_gc_content < p_params['arm_gc_min']: if do_print: print("\tgc content L min fail %0.3f" % p_l_gc_content) is_good = False report['arm_gc_min_l'] = p_l_gc_content if p_r_gc_content < p_params['arm_gc_min']: if do_print: print("\tgc content R min fail %0.3f" % p_r_gc_content) is_good = False report['arm_gc_min_r'] = p_r_gc_content if p_l_gc_content > p_params['arm_gc_max']: if do_print: print("\tgc content L max fail %0.3f" % p_l_gc_content) is_good = False report['arm_gc_max_l'] = p_l_gc_content if p_r_gc_content > p_params['arm_gc_max']: if do_print: print("\tgc content R max fail %0.3f" % p_r_gc_content) is_good = False report['arm_gc_max_r'] = p_r_gc_content "2. GC clamp checks" l_3p_check = padlockLeftArmGCClamp(p_l_seq) if l_3p_check > 3: if do_print: print("\tl clamp fail") is_good = False report['l_clamp'] = False "3. Arm Tm check" p_arm_tm_l = calcTm(p_l_seq, **tp) p_arm_tm_r = calcTm(p_r_seq, **tp) if p_arm_tm_l < p_params['arm_tm_min']: if do_print: print("\tArm L fail %2.3f" % p_arm_tm_l) is_good = False report['tm_arm_min_l'] = p_arm_tm_l if p_arm_tm_r < p_params['arm_tm_min']: if do_print: print("\tArm R fail %2.3f" % p_arm_tm_r) is_good = False report['tm_arm_min_r'] = p_arm_tm_r p_seq = ( p_r_seq + loop_seq + p_l_seq ) "4. Check for excluded seqs" ex_fail = False for ex_seq in p_params['exclude_seqs']: if ex_seq in p_seq: ex_fail = True report['ex_seq'].append(ex_seq) break if ex_fail: is_good = False "5. Secondary structure / primer dimer checks" p_het_tm_0 = calcHeterodimerTm(p_l_seq, p_r_seq, **tp) p_het_tm_1 = calcHeterodimerTm(p_l_seq, loop_seq, **tp) p_het_tm_2 = calcHeterodimerTm(p_r_seq, loop_seq, **tp) if p_het_tm_0 > p_params['structure_tm_max']: if do_print: print("\thetero 0 fail") is_good = False report['tm_hetero_0'] = p_het_tm_0 if p_het_tm_1 > p_params['structure_tm_max']: if do_print: print("\thetero 1 fail") is_good = False report['tm_hetero_1'] = p_het_tm_1 if p_het_tm_2 > p_params['structure_tm_max']: if do_print: print("\thetero 2 fail") is_good = False report['tm_hetero_2'] = p_het_tm_2 return is_good, report