def Amplicon_pos(self): "locate amplicon; the positions are 0 indexed." F3 = self['F3'] B3c = self['B3c'] s, _ = REFape.locate_primer(F3) _, e = REFape.locate_primer(B3c) self['A_start'] = s self['A_end'] = e return self
def LoopHairpin(self): "check loop region of amplicon hairpin" lfs, _ = REFape.locate_primer(self['F2']) lfe, _ = REFape.locate_primer(self['F1']) _, lrs = REFape.locate_primer(self['B1c']) _, lre = REFape.locate_primer(self['B2c']) lf = revcomp(REFape[lfs:lfe])[0:60] lr = REFape[lrs:lre][0:60] lfr = primer3.bindings.calcHairpin(lf, mv_conc, dv_conc, dntp_conc) lrr = primer3.bindings.calcHairpin(lr, mv_conc, dv_conc, dntp_conc) self['FloopdG'] = round(lfr.dg / 1000, 4) self['FloopTm'] = round(lfr.tm, 3) self['RloopdG'] = round(lrr.dg / 1000, 4) self['RloopTm'] = round(lrr.tm, 3) return self
def iter_primerset_html(files): """ iterate over primerdesign result from PrimerExplorer website. yield: PrimerSetRecord() [setname,F3,B3,FIP,BIP,LF,LB,B2c,B1c,F2,F1,gene,B3c,LFc,PEdG] """ keys = [ 'name', 'F3', 'B3', 'FIP', 'BIP', 'LF', 'LB', 'B2c', 'B1c', 'F2', 'F1', 'gene', 'B3c', 'LFc', 'PEdG' ] # p1 = re.compile("""<td><span class="dnaString">\[(?P<id>\d*)\]</span></td>(?P<content>.*)<td><span class="dnaString">\[(?P=id)\]</span></td>""",flags=re.DOTALL) p1 = re.compile( """<td>\[(?P<id>\d*)\]</td>(?P<content>.*)<td><span class="dnaString">\[(?P=id)\]</span></td>""", flags=re.DOTALL) p2 = re.compile( '<span class="dnaString" style="color:#(?P<color>.{6});">(?P<seq>[ATCGatcg]*)</span>' ) dG = re.compile('<td align="left">(?P<dG>[-.\d]+)</td>') name_counter = Counter() for file in files: with open(file, 'rt') as fr: text = fr.read() for s in p1.finditer(text): content = s['content'] PEdG = float(dG.search(content)['dG']) out = [] color = "" case = None for d in p2.finditer(content): seq = d['seq'] if d['color'] != color or case != seq.isupper(): color = d['color'] case = seq.isupper() out.append("") out[-1] += seq F3, F2, F1c, B1c, B2, B3 = [i.upper() for i in out] F1c, B2, B3 = F1c[::-1], B2[::-1], B3[::-1] B2c = revcomp(B2) B3c = revcomp(B3) F1 = revcomp(F1c) FIP = F1c + F2 BIP = B1c + B2 gene = REFape.name_primer(F1) name_counter[gene[0]] += 1 setname = gene[0] + str(name_counter[gene[0]]) yield PrimerSetRecord( zip(keys, [ setname, F3, B3, FIP, BIP, '', '', B2c, B1c, F2, F1, gene, B3c, '', PEdG ]))
def iter_primerset_lamp_design_csv(*files, skiprows=None, usecols=[1, 2], skipfooter=0, return_df=False): """ read all csv files convert to a single DataFrame then iterate over primer sets,give it a name based on locus yield: PrimerSetRecord() [setname,F3,B3,FIP,BIP,LF,LB,B2c,B1c,F2,F1,gene,B3c,LFc,] """ df = pd.DataFrame(columns=['name', 'seq']) dfs = [df] for f in files: if skiprows == None: with open(f, 'rt') as ff: data = ff.read().split('\n') for k, line in enumerate(data): if line.startswith('set'): skiprows = k _df = pd.read_csv(f, skiprows=list(range(skiprows)), usecols=usecols, skipfooter=skipfooter) dfs.append(_df) df = pd.concat(dfs, axis=0, ignore_index=True) if return_df: yield df else: name_counter = Counter() for i in range((len(df) // 8)): F3, F2, F1, B1c, B2c, B3c, LFc, LB = list(df.loc[i * 8:(i * 8 + 7), 'seq']) names = list(df.loc[i * 8:(i * 8 + 7), 'name']) assert len(set(names)) == 8, f"Primer set incomplete at index {i}." gene = REFape.name_primer(F1) name_counter[gene[0]] += 1 setname = gene[0] + str(name_counter[gene[0]]) yield PrimerSetRecord([ setname, F3, revcomp(B3c), revcomp(F1) + F2, B1c + revcomp(B2c), revcomp(LFc), LB, B2c, B1c, F2, F1, gene, B3c, LFc ])
def iter_primerset_excel(): "yield:PrimerSetRecord([setname,F3,B3,FIP,BIP,LF,LB,B2c,B1c,F2,F1,gene,B3c,LFc,])" ps = read_primerset_excel() for p in ps: name = p['set'] pm = p['feature'] pd = {i.replace(name + '-', ''): j for i, _, j in pm} F3 = pd.get('F3', None) B3 = pd.get('B3', None) F2 = pd.get('F2', None) F1c = pd.get('F1c', None) B2 = pd.get('B2', None) B1c = pd.get('B1c', None) LF = pd.get('LF', None) LB = pd.get('LB', None) gene = REFape.name_primer(revcomp(F1c)) if all([F3, B3, F2, F1c, B2, B1c, LF, LB]): yield PrimerSetRecord([ name, F3, B3, F1c + F2, B1c + B2, LF, LB, revcomp(B2), B1c, F2, revcomp(F1c), gene, revcomp(B3), revcomp(LF) ])
def draw_primerset(self, basepairposition=None, saveas=False, alignwith=None, drawgene=True, drawproperty=[], figwidth=None, figheight=None, drawgrid=False): """ draw ther primers on plot. basepairposition: draw only primerset record with A_start and A_end within the interval given. saveas: file path to save. alignwith: the gene fragment to aligh, defalut align to gene. can be F3, F2, F1, B1c, LFc, LB etc. drawgene: choose whether draw the genebar. drawproperty: a list of (property_name, property_format) to draw. ('Inclusivity','{:.2%}') """ pl = self if basepairposition: pl = PrimerSetRecordList(i for i in pl if i['A_start'] >= basepairposition[0] and i['A_end'] <= basepairposition[1]) facecolor = ('tab:blue', 'tab:orange', 'tab:green', 'tab:red', 'tab:purple', 'tab:brown', 'tab:pink', 'tab:olive') fig, ax = plt.subplots(figsize=(figwidth or 10, figheight or 0.26 * (len(pl) + drawgene) + 0.68)) left_pos = [REFape.locate_primer(i['F3'])[0] for i in pl] right_pos = [REFape.locate_primer(i['B3c'])[1] for i in pl] left_min = min(left_pos) right_min = max(right_pos) common_fragment = REFape.truncate(left_min, right_min) gene_bar = [(i['start'] - 1, i['end'] - i['start'] + 1) for i in common_fragment.features] # plot realative position to N gene: gene_bar_name = [ f"{ i['tag']}:{i['start']+left_min - REFape.get_feature_pos(i['tag'])[0]}-{i['end']+left_min- REFape.get_feature_pos(i['tag'])[0]}" for i in common_fragment.features ] y_labels = [i['name'] for i in pl] ax.set_yticks(list(range(len(y_labels)))) if drawgrid: ax.grid(axis='y', linewidth=0.3, linestyle='-.') ax.set_yticklabels(y_labels) ax.set_ylim([-1, len(pl) + drawgene]) for y, p in enumerate(pl): plot_bar = [] plot_bar_name = [] for n, i in p.iter('fragment'): if i: pos = REFape.locate_primer(i) plot_bar_name.append(n) plot_bar.append((pos[0], pos[1] - pos[0])) # if alignwith = F3 or other fragment name, use that position. if alignwith: _index = plot_bar_name.index(alignwith) left_min = plot_bar[_index][0] # relative position of the primer to this gene. _relative_left = REFape.get_relative_pos(plot_bar[0][0]) relative_left = f"{_relative_left[1]+1}" if _relative_left else f"{plot_bar[0][0]}" _relative_right = REFape.get_relative_pos(plot_bar[3][0]) relative_right = f"{_relative_right[1]+plot_bar[3][1]}" if _relative_right else f"{plot_bar[3][0]}" plot_bar = [(i - left_min, j) for i, j in plot_bar] ax.broken_barh(plot_bar, (y - 0.3, 0.6), facecolors=facecolor) for n, (_p, w) in zip(plot_bar_name, plot_bar): ax.text(_p + w / 2, y - 0.3, n, ha='center', va='bottom') ax.text(plot_bar[0][0] - plot_bar[0][1] * 0.05, y - 0.3, relative_left, ha='right', va='bottom') ax.text(plot_bar[3][0] + plot_bar[3][1] * 1.05, y - 0.3, relative_right, ha='left', va='bottom') if drawproperty: propertytext = [] for i, f in drawproperty: propertytext.append(f.format(p[i])) ax.text((right_min - left_min) * 1.06, y - 0.3, " " + ' | '.join(propertytext), ha='left', va='bottom', family='monospace') if drawproperty: ax.text( (right_min - left_min) * 1.06, y + 0.7, '|'.join(i[0][0:(len(t) + 2)] + " " * (len(t) + 2 - len(i[0])) for i, t in zip(drawproperty, propertytext)), ha='left', va='bottom', family='monospace') # if need to draw gen: if drawgene: ax.broken_barh(gene_bar, (y + 0.7, 0.6), facecolor=facecolor) for n, (p, w) in zip(gene_bar_name, gene_bar): ax.text(p + w / 2, y + 0.7, n, ha='center', va='bottom') ax.set_xticks([]) plt.tight_layout() if saveas: plt.savefig(saveas) else: plt.show()
def loopR(self): return REFape[REFape.locate_primer(self['B1c'])[1]:REFape. locate_primer(self['B2c'])[1]]
def loopF(self): return revcomp(REFape[REFape.locate_primer(self['F2'])[0]:REFape. locate_primer(self['F1'])[0]])
def to_ape(self, ): primers = dict(self.iter('primer')) return REFape.label_from_primers(primers, name=self['name'])
def gap_positions(self): "return a list of all 12 gap positions, 0 indexed." return [ j for i in ['F3', 'F2', 'F1', 'B1c', 'B2c', 'B3c'] for j in REFape.locate_primer(self[i]) ]