Example #1
0
 def Amplicon_pos(self):
     "locate amplicon; the positions are 0 indexed."
     F3 = self['F3']
     B3c = self['B3c']
     s, _ = REFape.locate_primer(F3)
     _, e = REFape.locate_primer(B3c)
     self['A_start'] = s
     self['A_end'] = e
     return self
Example #2
0
 def LoopHairpin(self):
     "check loop region of amplicon hairpin"
     lfs, _ = REFape.locate_primer(self['F2'])
     lfe, _ = REFape.locate_primer(self['F1'])
     _, lrs = REFape.locate_primer(self['B1c'])
     _, lre = REFape.locate_primer(self['B2c'])
     lf = revcomp(REFape[lfs:lfe])[0:60]
     lr = REFape[lrs:lre][0:60]
     lfr = primer3.bindings.calcHairpin(lf, mv_conc, dv_conc, dntp_conc)
     lrr = primer3.bindings.calcHairpin(lr, mv_conc, dv_conc, dntp_conc)
     self['FloopdG'] = round(lfr.dg / 1000, 4)
     self['FloopTm'] = round(lfr.tm, 3)
     self['RloopdG'] = round(lrr.dg / 1000, 4)
     self['RloopTm'] = round(lrr.tm, 3)
     return self
Example #3
0
def iter_primerset_html(files):
    """
    iterate over primerdesign result from PrimerExplorer website.
    yield: PrimerSetRecord()
    [setname,F3,B3,FIP,BIP,LF,LB,B2c,B1c,F2,F1,gene,B3c,LFc,PEdG]
    """
    keys = [
        'name', 'F3', 'B3', 'FIP', 'BIP', 'LF', 'LB', 'B2c', 'B1c', 'F2', 'F1',
        'gene', 'B3c', 'LFc', 'PEdG'
    ]
    # p1 = re.compile("""<td><span class="dnaString">\[(?P<id>\d*)\]</span></td>(?P<content>.*)<td><span class="dnaString">\[(?P=id)\]</span></td>""",flags=re.DOTALL)
    p1 = re.compile(
        """<td>\[(?P<id>\d*)\]</td>(?P<content>.*)<td><span class="dnaString">\[(?P=id)\]</span></td>""",
        flags=re.DOTALL)
    p2 = re.compile(
        '<span class="dnaString" style="color:#(?P<color>.{6});">(?P<seq>[ATCGatcg]*)</span>'
    )
    dG = re.compile('<td align="left">(?P<dG>[-.\d]+)</td>')
    name_counter = Counter()
    for file in files:
        with open(file, 'rt') as fr:
            text = fr.read()
        for s in p1.finditer(text):
            content = s['content']
            PEdG = float(dG.search(content)['dG'])
            out = []
            color = ""
            case = None
            for d in p2.finditer(content):
                seq = d['seq']
                if d['color'] != color or case != seq.isupper():
                    color = d['color']
                    case = seq.isupper()
                    out.append("")
                out[-1] += seq
            F3, F2, F1c, B1c, B2, B3 = [i.upper() for i in out]
            F1c, B2, B3 = F1c[::-1], B2[::-1], B3[::-1]
            B2c = revcomp(B2)
            B3c = revcomp(B3)
            F1 = revcomp(F1c)
            FIP = F1c + F2
            BIP = B1c + B2
            gene = REFape.name_primer(F1)
            name_counter[gene[0]] += 1
            setname = gene[0] + str(name_counter[gene[0]])

            yield PrimerSetRecord(
                zip(keys, [
                    setname, F3, B3, FIP, BIP, '', '', B2c, B1c, F2, F1, gene,
                    B3c, '', PEdG
                ]))
Example #4
0
def iter_primerset_lamp_design_csv(*files,
                                   skiprows=None,
                                   usecols=[1, 2],
                                   skipfooter=0,
                                   return_df=False):
    """
    read all csv files convert to a single DataFrame
    then iterate over primer sets,give it a name based on locus
    yield: PrimerSetRecord()
    [setname,F3,B3,FIP,BIP,LF,LB,B2c,B1c,F2,F1,gene,B3c,LFc,]
    """
    df = pd.DataFrame(columns=['name', 'seq'])
    dfs = [df]
    for f in files:
        if skiprows == None:
            with open(f, 'rt') as ff:
                data = ff.read().split('\n')
            for k, line in enumerate(data):
                if line.startswith('set'):
                    skiprows = k
        _df = pd.read_csv(f,
                          skiprows=list(range(skiprows)),
                          usecols=usecols,
                          skipfooter=skipfooter)
        dfs.append(_df)
    df = pd.concat(dfs, axis=0, ignore_index=True)
    if return_df:
        yield df
    else:
        name_counter = Counter()
        for i in range((len(df) // 8)):
            F3, F2, F1, B1c, B2c, B3c, LFc, LB = list(df.loc[i * 8:(i * 8 + 7),
                                                             'seq'])
            names = list(df.loc[i * 8:(i * 8 + 7), 'name'])
            assert len(set(names)) == 8, f"Primer set incomplete at index {i}."
            gene = REFape.name_primer(F1)
            name_counter[gene[0]] += 1
            setname = gene[0] + str(name_counter[gene[0]])
            yield PrimerSetRecord([
                setname, F3,
                revcomp(B3c),
                revcomp(F1) + F2, B1c + revcomp(B2c),
                revcomp(LFc), LB, B2c, B1c, F2, F1, gene, B3c, LFc
            ])
Example #5
0
def iter_primerset_excel():
    "yield:PrimerSetRecord([setname,F3,B3,FIP,BIP,LF,LB,B2c,B1c,F2,F1,gene,B3c,LFc,])"
    ps = read_primerset_excel()
    for p in ps:
        name = p['set']
        pm = p['feature']
        pd = {i.replace(name + '-', ''): j for i, _, j in pm}
        F3 = pd.get('F3', None)
        B3 = pd.get('B3', None)
        F2 = pd.get('F2', None)
        F1c = pd.get('F1c', None)
        B2 = pd.get('B2', None)
        B1c = pd.get('B1c', None)
        LF = pd.get('LF', None)
        LB = pd.get('LB', None)
        gene = REFape.name_primer(revcomp(F1c))
        if all([F3, B3, F2, F1c, B2, B1c, LF, LB]):
            yield PrimerSetRecord([
                name, F3, B3, F1c + F2, B1c + B2, LF, LB,
                revcomp(B2), B1c, F2,
                revcomp(F1c), gene,
                revcomp(B3),
                revcomp(LF)
            ])
Example #6
0
    def draw_primerset(self,
                       basepairposition=None,
                       saveas=False,
                       alignwith=None,
                       drawgene=True,
                       drawproperty=[],
                       figwidth=None,
                       figheight=None,
                       drawgrid=False):
        """
        draw ther primers on plot.
        basepairposition: draw only primerset record with A_start and A_end within the interval given.
        saveas: file path to save.
        alignwith: the gene fragment to aligh, defalut align to gene. can be F3, F2, F1, B1c, LFc, LB etc.
        drawgene: choose whether draw the genebar.
        drawproperty: a list of (property_name, property_format) to draw. ('Inclusivity','{:.2%}')
        """
        pl = self
        if basepairposition:
            pl = PrimerSetRecordList(i for i in pl
                                     if i['A_start'] >= basepairposition[0]
                                     and i['A_end'] <= basepairposition[1])
        facecolor = ('tab:blue', 'tab:orange', 'tab:green', 'tab:red',
                     'tab:purple', 'tab:brown', 'tab:pink', 'tab:olive')

        fig, ax = plt.subplots(figsize=(figwidth or 10, figheight or 0.26 *
                                        (len(pl) + drawgene) + 0.68))
        left_pos = [REFape.locate_primer(i['F3'])[0] for i in pl]
        right_pos = [REFape.locate_primer(i['B3c'])[1] for i in pl]
        left_min = min(left_pos)
        right_min = max(right_pos)
        common_fragment = REFape.truncate(left_min, right_min)
        gene_bar = [(i['start'] - 1, i['end'] - i['start'] + 1)
                    for i in common_fragment.features]
        # plot realative position to N gene:
        gene_bar_name = [
            f"{ i['tag']}:{i['start']+left_min - REFape.get_feature_pos(i['tag'])[0]}-{i['end']+left_min- REFape.get_feature_pos(i['tag'])[0]}"
            for i in common_fragment.features
        ]
        y_labels = [i['name'] for i in pl]

        ax.set_yticks(list(range(len(y_labels))))
        if drawgrid:
            ax.grid(axis='y', linewidth=0.3, linestyle='-.')
        ax.set_yticklabels(y_labels)
        ax.set_ylim([-1, len(pl) + drawgene])
        for y, p in enumerate(pl):
            plot_bar = []
            plot_bar_name = []
            for n, i in p.iter('fragment'):
                if i:
                    pos = REFape.locate_primer(i)
                    plot_bar_name.append(n)
                    plot_bar.append((pos[0], pos[1] - pos[0]))
            # if alignwith = F3 or other fragment name, use that position.
            if alignwith:
                _index = plot_bar_name.index(alignwith)
                left_min = plot_bar[_index][0]
            # relative position of the primer to this gene.
            _relative_left = REFape.get_relative_pos(plot_bar[0][0])
            relative_left = f"{_relative_left[1]+1}" if _relative_left else f"{plot_bar[0][0]}"
            _relative_right = REFape.get_relative_pos(plot_bar[3][0])
            relative_right = f"{_relative_right[1]+plot_bar[3][1]}" if _relative_right else f"{plot_bar[3][0]}"

            plot_bar = [(i - left_min, j) for i, j in plot_bar]
            ax.broken_barh(plot_bar, (y - 0.3, 0.6), facecolors=facecolor)
            for n, (_p, w) in zip(plot_bar_name, plot_bar):
                ax.text(_p + w / 2, y - 0.3, n, ha='center', va='bottom')

            ax.text(plot_bar[0][0] - plot_bar[0][1] * 0.05,
                    y - 0.3,
                    relative_left,
                    ha='right',
                    va='bottom')
            ax.text(plot_bar[3][0] + plot_bar[3][1] * 1.05,
                    y - 0.3,
                    relative_right,
                    ha='left',
                    va='bottom')
            if drawproperty:
                propertytext = []
                for i, f in drawproperty:
                    propertytext.append(f.format(p[i]))
                ax.text((right_min - left_min) * 1.06,
                        y - 0.3,
                        " " + ' | '.join(propertytext),
                        ha='left',
                        va='bottom',
                        family='monospace')

        if drawproperty:
            ax.text(
                (right_min - left_min) * 1.06,
                y + 0.7,
                '|'.join(i[0][0:(len(t) + 2)] + " " * (len(t) + 2 - len(i[0]))
                         for i, t in zip(drawproperty, propertytext)),
                ha='left',
                va='bottom',
                family='monospace')

        # if need to draw gen:
        if drawgene:
            ax.broken_barh(gene_bar, (y + 0.7, 0.6), facecolor=facecolor)
            for n, (p, w) in zip(gene_bar_name, gene_bar):
                ax.text(p + w / 2, y + 0.7, n, ha='center', va='bottom')

        ax.set_xticks([])

        plt.tight_layout()

        if saveas:
            plt.savefig(saveas)
        else:
            plt.show()
Example #7
0
 def loopR(self):
     return REFape[REFape.locate_primer(self['B1c'])[1]:REFape.
                   locate_primer(self['B2c'])[1]]
Example #8
0
 def loopF(self):
     return revcomp(REFape[REFape.locate_primer(self['F2'])[0]:REFape.
                           locate_primer(self['F1'])[0]])
Example #9
0
 def to_ape(self, ):
     primers = dict(self.iter('primer'))
     return REFape.label_from_primers(primers, name=self['name'])
Example #10
0
 def gap_positions(self):
     "return a list of all 12 gap positions, 0 indexed."
     return [
         j for i in ['F3', 'F2', 'F1', 'B1c', 'B2c', 'B3c']
         for j in REFape.locate_primer(self[i])
     ]