Ejemplo n.º 1
0
    def build_inferred_seq(self, seq, all_germlines, outline):
        assert self.excisions[0]['region'] == 'v'  # makes it easier a.t.m.
        assert self.excisions[1]['region'] == 'j'
        assert self.excisions[2]['region'] == 'd'
        germlines, hmms, ihmms = {}, {}, {}
        for region in utils.regions:
            germlines[region] = all_germlines[region][utils.unsanitize_name(self.best_matches[region]['target_name'])]
            hmms[region] = self.best_matches[region]['hmm_seq']
            ihmms[region] = germlines[region].find(hmms[region].upper())  # position at which the consensus (hmm) starts in the germline sequence
            try:
                assert ihmms[region] >= 0
            except:
                print germlines[region]
                print hmms[region].upper()
                print ihmms[region]
                assert False
            print '  hmm for %s runs from %d to %d (inclusive)' % (region, ihmms[region], ihmms[region] + len(hmms[region]) - 1)

        outline['v_5p_del'] = ihmms['v'] # TODO kinda otter be zero
        outline['v_3p_del'] = len(germlines['v']) - ihmms['v'] - len(hmms['v']) # len(germlines['v']) - len(hmms['v']) - germlines['v'].find(hmms['v'].upper())
        outline['d_5p_del'] = ihmms['d'] # germlines['d'].find(hmms['d'].upper())
        outline['d_3p_del'] = len(germlines['d']) - ihmms['d'] - len(hmms['d']) # len(germlines['d']) - len(hmms['d']) - germlines['d'].find(hmms['d'].upper())
        outline['j_5p_del'] = ihmms['j'] # germlines['j'].find(hmms['j'].upper())
        outline['j_3p_del'] = len(germlines['j']) - ihmms['j'] - len(hmms['j'])  # TODO kinda otter be zero

        for ex in self.excisions:
            match = self.best_matches[ex['region']]
            print '  excised match %s: %d --> %d' % (ex['region'], ex['from'], ex['to'])
            print '        test %s' % match['test_seq']
            hmm_start = ihmms[ex['region']]
            hmm_end = ihmms[ex['region']] + len(hmms[ex['region']]) - 1
            print '         hmm %s' % (hmm_start * '.' + match['hmm_seq'].upper() + (len(germlines[ex['region']]) - ihmms[ex['region']] - len(hmms[ex['region']])) * '.')  # NOTE ali_from includes the d part!
            print '    germline %s' % all_germlines[ex['region']][utils.unsanitize_name(match['target_name'])]

        #----------------------------------------------------------------------------------------
        # NOTE these are inclusive
        seq_match_start, seq_match_end = {}, {}
        seq_match_start['v'] = self.best_matches['v']['ali_from'] - 1
        seq_match_end['v'] = seq_match_start['v'] + len(hmms['v']) - 1
        seq_match_start['d'] = self.excisions[0]['to'] - self.excisions[0]['from'] + self.best_matches['d']['ali_from']
        seq_match_end['d'] = seq_match_start['d'] + len(hmms['d']) - 1
        seq_match_start['j'] = self.excisions[0]['to'] - self.excisions[0]['from'] + self.best_matches['j']['ali_from']
        seq_match_end['j'] = seq_match_start['j'] + len(hmms['j']) - 1
        outline['vd_insertion'] = seq[seq_match_end['v']+1 : seq_match_start['d']]
        outline['dj_insertion'] = seq[seq_match_end['d']+1 : seq_match_start['j']]

        actual_seq_length = len(seq)
        inferred_seq_length = outline['v_5p_del'] + len(hmms['v']) + len(outline['vd_insertion']) + len(hmms['d']) + len(outline['dj_insertion']) + len(hmms['j']) + outline['j_3p_del']
        print '    actual %d  inferred %d' % (actual_seq_length,inferred_seq_length)
        if actual_seq_length != inferred_seq_length:
            
            outline['ack'] = True
Ejemplo n.º 2
0
    def make_transition_plot(self, gene_name, model):
        """ NOTE shares a lot with make_mutefreq_plot() in python/paramutils.py """
        fig, ax = plotting.mpl_init()
        fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)])

        ibin = 0
        print utils.color_gene(utils.unsanitize_name(gene_name))
        legend_colors = set()  # add a color to this the first time you plot it
        for state in model.states:

            # bin label
            ax.text(-0.5 + ibin, -0.075, paramutils.simplify_state_name(state.name), rotation='vertical', size=8)

            sorted_to_states = {}
            for name in state.transitions.keys():
                if name.find('IG') == 0 or name.find('TR') == 0:
                    sorted_to_states[name] = int(paramutils.simplify_state_name(name))
                else:
                    sorted_to_states[name] = name
            sorted_to_states = sorted(sorted_to_states.items(), key=operator.itemgetter(1))

            total = 0.0
            for to_state, simple_to_state in sorted_to_states:

                prob = state.transitions[to_state]

                alpha = 0.6
                width = 3

                if 'insert' in str(simple_to_state):
                    label = 'insert'
                    color = '#3498db'  # blue
                elif str(simple_to_state) == 'end':
                    label = 'end'
                    color = 'red'
                else:  # regional/internal states
                    assert to_state.find('IG') == 0 or to_state.find('TR') == 0
                    label = 'internal'
                    color = 'green'

                label_to_use = None
                if color not in legend_colors:
                    label_to_use = label
                    legend_colors.add(color)

                # horizontal line at height total+prob
                ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, linewidth=width, alpha=alpha, label=label_to_use)

                # vertical line from total to total + prob
                ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=width)

                midpoint = 0.5*(prob + 2*total)
                # ax.text(ibin, midpoint, paramutils.simplify_state_name(to_state))  # nicely labels the midpoint of the chunk between lines, but there isn't really room for it

                total += prob
    
            ibin += 1

        ax.get_xaxis().set_visible(False)
        plotting.mpl_finish(ax, self.base_plotdir + '/transitions', gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(model.states) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
Ejemplo n.º 3
0
    def make_transition_plot(self, gene_name, model):
        """ NOTE shares a lot with make_mutefreq_plot() in python/paramutils.py """
        fig, ax = plotting.mpl_init()
        fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)])

        ibin = 0
        print utils.color_gene(utils.unsanitize_name(gene_name))
        legend_colors = set()  # add a color to this the first time you plot it
        for state in model.states:

            # bin label
            ax.text(-0.5 + ibin, -0.075, paramutils.simplify_state_name(state.name), rotation='vertical', size=8)

            sorted_to_states = {}
            for name in state.transitions.keys():
                if name.find('IG') == 0:
                    sorted_to_states[name] = int(paramutils.simplify_state_name(name))
                else:
                    sorted_to_states[name] = name
            sorted_to_states = sorted(sorted_to_states.items(), key=operator.itemgetter(1))

            total = 0.0
            for to_state, simple_to_state in sorted_to_states:

                prob = state.transitions[to_state]

                alpha = 0.6
                width = 3

                if 'insert' in str(simple_to_state):
                    label = 'insert'
                    color = '#3498db'  # blue
                elif str(simple_to_state) == 'end':
                    label = 'end'
                    color = 'red'
                else:  # regional/internal states
                    assert to_state.find('IG') == 0
                    label = 'internal'
                    color = 'green'

                label_to_use = None
                if color not in legend_colors:
                    label_to_use = label
                    legend_colors.add(color)

                # horizontal line at height total+prob
                ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, linewidth=width, alpha=alpha, label=label_to_use)

                # vertical line from total to total + prob
                ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=width)

                midpoint = 0.5*(prob + 2*total)
                # ax.text(ibin, midpoint, paramutils.simplify_state_name(to_state))  # nicely labels the midpoint of the chunk between lines, but there isn't really room for it

                total += prob
    
            ibin += 1

        ax.get_xaxis().set_visible(False)
        plotting.mpl_finish(ax, self.base_plotdir + '/transitions', gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(model.states) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
Ejemplo n.º 4
0
def make_mutefreq_plot(plotdir, gene_name, positions):
    nuke_colors = {'A':kRed+1, 'C':kBlue-7, 'G':kOrange-3, 'T':kGreen+2}

    ibin = 0
    drawn_name_texts, lines, vlines, texts = {}, {}, {}, {}
    for info in positions:
        posname = info['name']

        # make label below bin
        drawn_name_texts[posname] = TPaveText(-0.5 + ibin, -0.1, 0.5 + ibin, -0.05)
        drawn_name_texts[posname].SetBorderSize(0)
        drawn_name_texts[posname].SetFillColor(0)
        drawn_name_texts[posname].SetFillStyle(0)
        drawn_name_texts[posname].AddText(-0.5 + ibin, -0.075, simplify_state_name(posname))

        total = 0.0
        lines[posname], vlines[posname], texts[posname] = [], [], []
        for nuke, prob in sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True):
            # horizontal line at height total+prob
            lines[posname].append(TLine(-0.5 + ibin, total + prob, 0.5 + ibin, total + prob))
            lines[posname][-1].SetLineWidth(6)

            # vertical line from total to total+prob
            vlines[posname].append(TLine(ibin, total, ibin, total + prob))
            vlines[posname][-1].SetLineWidth(6)
            vlines[posname][-1].SetLineColor(nuke_colors[nuke])

            # write [ACGT] at midpoint between total and total+prob
            midpoint = 0.5*(prob + 2*total)
            texts[posname].append(TPaveText(-0.5 + ibin, midpoint-0.04, 0.5 + ibin, midpoint + 0.01))
            texts[posname][-1].AddText(-0.5 + ibin, midpoint, nuke)
            texts[posname][-1].SetBorderSize(0)
            texts[posname][-1].SetFillColor(0)
            texts[posname][-1].SetFillStyle(0)

            total += prob

        ibin += 1

    cvn = TCanvas('cvn-2', '', 1000, 300)
    n_bins = ibin
    hframe = TH1D(gene_name + '-emission-frame', utils.unsanitize_name(gene_name), n_bins, -0.5, n_bins - 0.5)
    hframe.SetNdivisions(202, 'y')
    hframe.SetNdivisions(0, 'x')
    hframe.Draw()

    for state_name in lines.keys():
        drawn_name_texts[state_name].Draw()
        for itrans in range(len(lines[state_name])):
            # lines[state_name][itrans].Draw()  # hm, maybe don't need the horizontal lines any more
            vlines[state_name][itrans].Draw()
            # texts[state_name][itrans].Draw()  # don't label the bases at the moment, you can tell by the color just fine

    cvn.SaveAs(plotdir + '/plots/' + gene_name + '.png')
Ejemplo n.º 5
0
def make_mutefreq_plot(plotdir, gene_name, positions):
    import plotting
    """ NOTE shares a lot with make_transition_plot() in bin/plot-hmms.py. """
    nuke_colors = {'A' : 'red', 'C' : 'blue', 'G' : 'orange', 'T' : 'green'}
    fig, ax = plotting.mpl_init()
    fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)])

    ibin = 0
    print utils.color_gene(utils.unsanitize_name(gene_name))
    legend_colors = set()
    for info in positions:
        posname = info['name']

        # make label below bin
        ax.text(-0.5 + ibin, -0.075, simplify_state_name(posname), rotation='vertical', size=8)

        total = 0.0
        alpha = 0.6
        for nuke, prob in sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True):
            color = nuke_colors[nuke]

            label_to_use = None
            if color not in legend_colors:
                label_to_use = nuke
                legend_colors.add(color)

            # horizontal line at height total+prob
            ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, alpha=alpha, linewidth=3, label=label_to_use)

            # vertical line from total to total + prob
            ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=3)

            # # write [ACGT] at midpoint between total and total+prob
            # midpoint = 0.5*(prob + 2*total)
            # ... *redacted*

            total += prob

        ibin += 1

    ax.get_xaxis().set_visible(False)
    plotting.mpl_finish(ax, plotdir, gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(positions) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
Ejemplo n.º 6
0
def make_mutefreq_plot(plotdir, gene_name, positions):
    """ NOTE shares a lot with make_transition_plot() in bin/plot-hmms.py. """
    nuke_colors = {'A' : 'red', 'C' : 'blue', 'G' : 'orange', 'T' : 'green'}
    fig, ax = plotting.mpl_init()
    fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)])

    ibin = 0
    print utils.color_gene(utils.unsanitize_name(gene_name))
    legend_colors = set()
    for info in positions:
        posname = info['name']

        # make label below bin
        ax.text(-0.5 + ibin, -0.075, simplify_state_name(posname), rotation='vertical', size=8)

        total = 0.0
        alpha = 0.6
        for nuke, prob in sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True):
            color = nuke_colors[nuke]

            label_to_use = None
            if color not in legend_colors:
                label_to_use = nuke
                legend_colors.add(color)

            # horizontal line at height total+prob
            ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, alpha=alpha, linewidth=3, label=label_to_use)

            # vertical line from total to total + prob
            ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=3)

            # # write [ACGT] at midpoint between total and total+prob
            # midpoint = 0.5*(prob + 2*total)
            # ... *redacted*

            total += prob

        ibin += 1

    ax.get_xaxis().set_visible(False)
    plotting.mpl_finish(ax, plotdir, gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(positions) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
Ejemplo n.º 7
0
def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
    if varname in plotconfig.gene_usage_columns:
        hlist = plotting.add_bin_labels_not_in_all_hists(hlist)

    no_labels = False
    xline, bounds, figsize = None, None, None
    stats = args.extra_stats
    translegend = [0.0, -0.2]
    xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle
    if xtitle == '':  # arg, plotting.py thinks default should be None, hist.py thinks it's ''
        xtitle = None
    if '-mean-bins' in varname:
        raise Exception(
            'darn, I was hoping I wasn\'t making these plots any more')
    plottitle = plotconfig.plot_titles[
        varname] if varname in plotconfig.plot_titles else varname

    ytitle = 'frequency' if args.normalize else 'counts'

    if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues:
        assert not args.normalize
        ytitle = 'mutation freq'

    if varname in plotconfig.gene_usage_columns:
        xtitle = 'allele'
        if hlist[0].n_bins == 2:
            stats = ' 0-bin'  # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
    # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2:
    #     xtitle = '???'

    line_width_override = None
    if args.performance_plots:
        if 'hamming_to_true_naive' in varname:
            xtitle = 'hamming distance'
            if '_normed' in varname:
                xtitle = 'fractional ' + xtitle
        elif '_vs_mute_freq' in varname:
            xtitle = 'mutation freq'
            ytitle = 'fraction correct'
            if varname[0] == 'v' or varname[0] == 'j':
                translegend = [-0.4, -0.4]
        elif varname.find('_gene') == 1:
            xtitle = ''
            ytitle = 'fraction correct'
        else:
            xtitle = 'inferred - true'
        bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(
            varname, None)
    else:
        bounds = plotconfig.default_hard_bounds.setdefault(varname, None)
        if bounds is None and 'insertion' in varname:
            bounds = plotconfig.default_hard_bounds.setdefault(
                'all_insertions', None)
        if varname in plotconfig.gene_usage_columns:
            no_labels = True
            if 'j_' not in varname:
                figsize = (10, 5)
            line_width_override = 1
        elif 'per-gene-per-position/v' in pathnameclues:
            figsize = (20, 5)
            bounds = plotconfig.default_hard_bounds.setdefault(
                utils.unsanitize_name(varname), None)

    if 'IG' in varname or 'TR' in varname:
        if 'mute-freqs' in pathnameclues:
            gene = utils.unsanitize_name(varname)
            plottitle = gene  # + ' -- mutation frequency'
            xtitle = 'position'
            if utils.get_region(gene) == 'j':
                translegend = [0.1, 0.]  #(-0.35, -0.02)
            else:
                translegend = [0.15, -0.02]
            xline = None
            if args.glfo is not None:
                if utils.get_region(gene) in utils.conserved_codons[
                        args.locus]:
                    xline = args.glfo[utils.conserved_codons[args.locus][
                        utils.get_region(gene)] + '-positions'][gene]
        else:
            ilastdash = varname.rfind('-')
            gene = utils.unsanitize_name(varname[:ilastdash])
            base_varname = varname[ilastdash + 1:]
            base_plottitle = plotconfig.plot_titles[
                base_varname] if base_varname in plotconfig.plot_titles else ''
            plottitle = gene + ' -- ' + base_plottitle

    if len(hlist) > 9:  # skootch it down so they (maybe) all fit
        translegend[1] -= 0.5
    if args.translegend is not None:  # override with the command line
        translegend = args.translegend
    if args.extra_stats == 'auto':  # kind of hackey
        if xtitle == 'inferred - true':
            stats = 'absmean'
        else:
            stats = 'mean'
    # draw that little #$*(!
    linewidths = [
        line_width_override,
    ] if line_width_override is not None else args.linewidths
    alphas = [0.6 for _ in range(len(hlist))]
    plotting.draw_no_root(
        hlist[0],
        plotname=varname,
        plotdir=outdir,
        more_hists=hlist[1:],
        write_csv=False,
        stats=stats,
        bounds=bounds,
        shift_overflows=(os.path.basename(outdir) != 'gene-call'),
        plottitle=plottitle,
        colors=args.colors,
        xtitle=xtitle,
        ytitle=ytitle,
        xline=xline,
        normalize=(args.normalize and '_vs_mute_freq' not in varname),
        linewidths=linewidths,
        alphas=alphas,
        errors=True,
        figsize=figsize,
        no_labels=no_labels,
        log=args.log,
        translegend=translegend)
Ejemplo n.º 8
0
    def make_transition_plot(self, gene_name, model):
        ibin = 0
        drawn_name_texts, lines, texts = {}, {}, {}
        for state in model.states:
            if utils.get_region(gene_name) in self.skip_boring_states:
                if state.name != 'init' and len(state.transitions) == 1:  # skip uninteresting states
                    to_state = state.transitions.keys()[0]  # skip states with only transitions to end
                    if to_state == 'end':
                        continue
                    if find_state_number(state.name) + 1 == find_state_number(to_state):  # skip states with only transitions to next state
                        continue

            drawn_name_texts[state.name] = TPaveText(-0.5 + ibin, -0.1, 0.5 + ibin, -0.05)
            drawn_name_texts[state.name].SetBorderSize(0)
            drawn_name_texts[state.name].SetFillColor(0)
            drawn_name_texts[state.name].SetFillStyle(0)
            drawn_name_texts[state.name].AddText(-0.5 + ibin, -0.075, paramutils.simplify_state_name(state.name))

            sorted_to_states = {}
            for name in state.transitions.keys():
                if name.find('IGH') == 0:
                    sorted_to_states[name] = int(paramutils.simplify_state_name(name))
                else:
                    sorted_to_states[name] = name
            sorted_to_states = sorted(sorted_to_states.items(), key=operator.itemgetter(1))

            total = 0.0
            lines[state.name], texts[state.name] = [], []
            for to_state, simple_to_state in sorted_to_states:

                prob = state.transitions[to_state]
                lines[state.name].append(TLine(-0.5 + ibin, total + prob, 0.5 + ibin, total + prob))
                lines[state.name][-1].SetLineColor(kGreen+2)
                lines[state.name][-1].SetLineWidth(6)

                midpoint = 0.5*(prob + 2*total)
                texts[state.name].append(TPaveText(-0.5 + ibin, midpoint-0.04, 0.5 + ibin, midpoint + 0.01))
                texts[state.name][-1].AddText(-0.5 + ibin, midpoint, paramutils.simplify_state_name(to_state))
                texts[state.name][-1].SetBorderSize(0)
                texts[state.name][-1].SetFillColor(0)
                texts[state.name][-1].SetFillStyle(0)

                total += prob
    
            ibin += 1

        cvn = TCanvas('mod-cvn', '', 1000, 400)
        n_bins = ibin
        hframe = TH1D(model.name + '-transition-frame', utils.unsanitize_name(model.name), n_bins, -0.5, n_bins - 0.5)
        if utils.get_region(gene_name) in self.skip_boring_states:
            hframe.SetTitle(hframe.GetTitle() + ' (skipped boring states)')
        hframe.SetNdivisions(202, 'y')
        hframe.SetNdivisions(0, 'x')
        hframe.Draw()

        for state_name in lines.keys():
            drawn_name_texts[state_name].Draw()
            for itrans in range(len(lines[state_name])):
                lines[state_name][itrans].Draw()
                texts[state_name][itrans].Draw()

        cvn.SaveAs(self.base_plotdir + '/transitions/plots/' + gene_name + '.png')
Ejemplo n.º 9
0
    def make_transition_plot(self, gene_name, model):
        ibin = 0
        drawn_name_texts, lines, texts = {}, {}, {}
        for state in model.states:
            if utils.get_region(gene_name) in self.skip_boring_states:
                if state.name != 'init' and len(
                        state.transitions) == 1:  # skip uninteresting states
                    to_state = state.transitions.keys()[
                        0]  # skip states with only transitions to end
                    if to_state == 'end':
                        continue
                    if find_state_number(state.name) + 1 == find_state_number(
                            to_state
                    ):  # skip states with only transitions to next state
                        continue

            drawn_name_texts[state.name] = TPaveText(-0.5 + ibin, -0.1,
                                                     0.5 + ibin, -0.05)
            drawn_name_texts[state.name].SetBorderSize(0)
            drawn_name_texts[state.name].SetFillColor(0)
            drawn_name_texts[state.name].SetFillStyle(0)
            drawn_name_texts[state.name].AddText(
                -0.5 + ibin, -0.075,
                paramutils.simplify_state_name(state.name))

            sorted_to_states = {}
            for name in state.transitions.keys():
                if name.find('IGH') == 0:
                    sorted_to_states[name] = int(
                        paramutils.simplify_state_name(name))
                else:
                    sorted_to_states[name] = name
            sorted_to_states = sorted(sorted_to_states.items(),
                                      key=operator.itemgetter(1))

            total = 0.0
            lines[state.name], texts[state.name] = [], []
            for to_state, simple_to_state in sorted_to_states:

                prob = state.transitions[to_state]
                lines[state.name].append(
                    TLine(-0.5 + ibin, total + prob, 0.5 + ibin, total + prob))
                lines[state.name][-1].SetLineColor(kGreen + 2)
                lines[state.name][-1].SetLineWidth(6)

                midpoint = 0.5 * (prob + 2 * total)
                texts[state.name].append(
                    TPaveText(-0.5 + ibin, midpoint - 0.04, 0.5 + ibin,
                              midpoint + 0.01))
                texts[state.name][-1].AddText(
                    -0.5 + ibin, midpoint,
                    paramutils.simplify_state_name(to_state))
                texts[state.name][-1].SetBorderSize(0)
                texts[state.name][-1].SetFillColor(0)
                texts[state.name][-1].SetFillStyle(0)

                total += prob

            ibin += 1

        cvn = TCanvas('mod-cvn', '', 1000, 400)
        n_bins = ibin
        hframe = TH1D(model.name + '-transition-frame',
                      utils.unsanitize_name(model.name), n_bins, -0.5,
                      n_bins - 0.5)
        if utils.get_region(gene_name) in self.skip_boring_states:
            hframe.SetTitle(hframe.GetTitle() + ' (skipped boring states)')
        hframe.SetNdivisions(202, 'y')
        hframe.SetNdivisions(0, 'x')
        hframe.Draw()

        for state_name in lines.keys():
            drawn_name_texts[state_name].Draw()
            for itrans in range(len(lines[state_name])):
                lines[state_name][itrans].Draw()
                texts[state_name][itrans].Draw()

        cvn.SaveAs(self.base_plotdir + '/transitions/plots/' + gene_name +
                   '.png')
Ejemplo n.º 10
0
def compare_directories(args, xtitle='', use_hard_bounds=''):
    """ 
    Read all the histograms stored as .csv files in <args.plotdirs>, and overlay them on a new plot.
    If there's a <varname> that's missing from any dir, we skip that plot entirely and print a warning message.
    """
    # print 'TODO move csvs to a subdir not named "plots"'
    # utils.prep_dir(args.outdir + '/plots', multilings=['*.png', '*.svg', '*.csv'])

    utils.prep_dir(args.outdir, multilings=['*.png', '*.svg', '*.csv'])
    if args.leaves_per_tree is not None:
        assert len(args.leaves_per_tree) == len(args.plotdirs)

    # read hists from <args.plotdirs>
    hists = []
    for idir in range(len(args.plotdirs)):
        string_to_ignore = None if args.strings_to_ignore is None else args.strings_to_ignore[idir]
        hist_list = get_hists_from_dir(args.plotdirs[idir], args.names[idir], string_to_ignore=string_to_ignore)
        hists.append(hist_list)

    # then loop over all the <varname>s we found
    all_names, all_means, all_sems, all_normalized_means = [], [], [], []
    # ----------------------------------------------------------------------------------------
    # vs_rebin = 2
    vs_rebin = 1
    if 'v_gene_right_vs_mute_freq' in hists[0].keys():
        add_gene_calls_vs_mute_freq_plots(args, hists, rebin=vs_rebin)
    # ----------------------------------------------------------------------------------------
    for varname, hist in hists[0].items():
        # add the hists
        all_hists = [hist,]
        missing_hist = False
        for idir in range(1, len(args.plotdirs)):
            try:  # add the hist
                all_hists.append(hists[idir][varname])
            except KeyError:  # oops, didn't find it in this dir, so skip this variable entirely
                print args.names[idir], varname
                all_hists.append(Hist(1, 0, 1))

        if '_gene' in varname and '_vs_' not in varname:  # for the gene usage frequencies we need to make sure all the plots have the genes in the same order
            all_hists = add_bin_labels_not_in_all_hists(all_hists)

        if args.calculate_mean_info:
            raise Exception('needs updating (at least to remove plots/ )')
            meaninfo = get_mean_info(all_hists)
            all_names.append(varname)
            all_means.append(meaninfo['means'])
            all_sems.append(meaninfo['sems'])
            all_normalized_means.append(meaninfo['normalized_means'])
            meaninfo['mean_bin_hist'].write(args.outdir + '/plots/' + varname + '-mean-bins.csv')

        # bullshit complicated config stuff
        bounds, no_labels, figsize = None, False, None
        translegend = (0.0, -0.2)
        extrastats, log = '', ''
        xtitle, ytitle, xline, normalization_bounds = hist.xtitle, hist.ytitle, None, None
        simplevarname = varname.replace('-mean-bins', '')
        plottitle = plotconfig.plot_titles[simplevarname] if simplevarname in plotconfig.plot_titles else simplevarname

        if args.normalize:
            ytitle = 'frequency'

        if 'mute-freqs/v' in args.plotdirs[0] or 'mute-freqs/d' in args.plotdirs[0] or 'mute-freqs/j' in args.plotdirs[0]:
            assert not args.normalize
            ytitle = 'mutation freq'

        if '_gene' in varname and '_vs_' not in varname:
            xtitle = 'allele'
            if hist.n_bins == 2:
                extrastats = ' 0-bin'  # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
        else:
            xtitle = 'bases'

        line_width_override = None
        rebin = args.rebin
        errors = not args.no_errors
        if args.plot_performance:
            if 'hamming_to_true_naive' in varname:
                xtitle = 'hamming distance'
                if '_normed' in varname:
                    xtitle = 'fractional ' + xtitle
            elif '_vs_mute_freq' in varname:
                xtitle = 'mutation freq'
                ytitle = 'fraction correct'
                if varname[0] == 'v' or varname[0] == 'j':
                    translegend = (-0.4, -0.4)
                # errors = True
                rebin = vs_rebin
            else:
                xtitle = 'inferred - true'
            bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None)
        else:
            bounds = plotconfig.default_hard_bounds.setdefault(varname.replace('-mean-bins', ''), None)
            if bounds is None and 'insertion' in varname:
                bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None)
            if '_gene' in varname and '_vs_' not in varname:
                no_labels = True
                if 'j_' not in varname:
                    figsize = (10, 5)
                line_width_override = 1
            elif 'mute-freqs/v' in args.plotdirs[0] or 'mute-freqs/j' in args.plotdirs[0]:
                figsize = (10, 5)
                bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname.replace('-mean-bins', '')), None)

        if 'IGH' in varname:
            if 'mute-freqs' in args.plotdirs[0]:
                gene = utils.unsanitize_name(simplevarname)
                plottitle = gene  # + ' -- mutation frequency'
                xtitle = 'position'
                if utils.get_region(gene) == 'j':
                    translegend = (0.1, 0.)  #(-0.35, -0.02)
                else:
                    translegend = (0.15, -0.02)
                xline = None
                if utils.get_region(gene) == 'v' and args.cyst_positions is not None:
                    xline = args.cyst_positions[gene]
                    # normalization_bounds = (int(cyst_positions[gene]) - 70, None)
                elif utils.get_region(gene) == 'j' and args.tryp_positions is not None:
                    xline = args.tryp_positions[gene]
                    # normalization_bounds = (None, int(tryp_positions[gene]) + 5)
            else:
                ilastdash = simplevarname.rfind('-')
                gene = utils.unsanitize_name(simplevarname[:ilastdash])
                base_varname = simplevarname[ilastdash + 1 :]
                base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else ''
                plottitle = gene + ' -- ' + base_plottitle

        # draw that little #$*(!
        linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths
        assert args.leaves_per_tree is None
        # scale_errors = math.sqrt(args.leaves_per_tree[idir]) if args.leaves_per_tree is not None else args.scale_errors
        draw_no_root(all_hists[0], plotname=varname, plotdir=args.outdir, more_hists=all_hists[1:], write_csv=False, stats=args.stats + ' ' + extrastats, bounds=bounds,
                     shift_overflows=False, errors=errors, scale_errors=args.scale_errors, rebin=rebin, plottitle=plottitle, colors=args.colors, linestyles=args.linestyles,
                     xtitle=xtitle, ytitle=ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname),
                     linewidths=linewidths, markersizes=args.markersizes, figsize=figsize, no_labels=no_labels, log=log, translegend=translegend, alphas=args.alphas)

    if args.calculate_mean_info:
        assert False
        # write mean info
        with opener('w')(args.outdir + '/plots/means.csv') as meanfile:
            writer = csv.DictWriter(meanfile, ('name', 'means', 'sems', 'normalized-means'))
            writer.writeheader()
            for ivar in range(len(all_means)):
                writer.writerow({
                    'name':all_names[ivar],
                    'means':':'.join([str(m) for m in all_means[ivar]]),
                    'sems':':'.join([str(s) for s in all_sems[ivar]]),
                    'normalized-means':':'.join([str(nm) for nm in all_normalized_means[ivar]])
                })

    if not args.only_csv_plots:
        make_html(args.outdir)
Ejemplo n.º 11
0
def compare_directories(args, xtitle='', use_hard_bounds=''):
    """ 
    Read all the histograms stored as .csv files in <args.plotdirs>, and overlay them on a new plot.
    If there's a <varname> that's missing from any dir, we skip that plot entirely and print a warning message.
    """
    utils.prep_dir(args.outdir + '/plots',
                   multilings=['*.png', '*.svg', '*.csv'])
    if args.leaves_per_tree is not None:
        assert len(args.leaves_per_tree) == len(args.plotdirs)

    # read hists from <args.plotdirs>
    hists = []
    for idir in range(len(args.plotdirs)):
        string_to_ignore = None if args.strings_to_ignore is None else args.strings_to_ignore[
            idir]
        hists.append(
            get_hists_from_dir(args.plotdirs[idir] + '/plots',
                               args.names[idir],
                               string_to_ignore=string_to_ignore))

    # then loop over all the <varname>s we found
    all_names, all_means, all_sems, all_normalized_means = [], [], [], []
    # ----------------------------------------------------------------------------------------
    vs_rebin = 2
    if 'v_gene_right_vs_mute_freq' in hists[0].keys():
        add_gene_calls_vs_mute_freq_plots(args, hists, rebin=vs_rebin)
    # ----------------------------------------------------------------------------------------
    for varname, hist in hists[0].items():
        # add the hists
        all_hists = [
            hist,
        ]
        missing_hist = False
        for idir in range(1, len(args.plotdirs)):
            try:  # add the hist
                all_hists.append(hists[idir][varname])
            except KeyError:  # oops, didn't find it in this dir, so skip this variable entirely
                print args.names[idir], varname
                all_hists.append(TH1D())

        if '_gene' in varname and '_vs_' not in varname:  # for the gene usage frequencies we need to make sure all the plots have the genes in the same order
            all_hists = add_bin_labels_not_in_all_hists(all_hists)

        if not args.dont_calculate_mean_info:
            meaninfo = get_mean_info(all_hists)
            all_names.append(varname)
            all_means.append(meaninfo['means'])
            all_sems.append(meaninfo['sems'])
            all_normalized_means.append(meaninfo['normalized_means'])
            meaninfo['mean_bin_hist'].write(args.outdir + '/plots/' + varname +
                                            '-mean-bins.csv')

        # bullshit complicated config stuff
        var_type = 'int' if hist.GetXaxis().GetBinLabel(1) == '' else 'bool'
        bounds, cwidth, cheight, translegend, no_labels = None, None, None, (
            0.0, 0.0), False
        extrastats, log = '', ''
        xtitle, ytitle, xline, draw_str, normalization_bounds = hist.GetXaxis(
        ).GetTitle(), hist.GetYaxis().GetTitle(), None, None, None
        simplevarname = varname.replace('-mean-bins', '')
        plottitle = plotconfig.plot_titles[
            simplevarname] if simplevarname in plotconfig.plot_titles else simplevarname

        if args.normalize:
            ytitle = 'frequency'

        if 'mute-freqs/v' in args.plotdirs[
                0] or 'mute-freqs/d' in args.plotdirs[
                    0] or 'mute-freqs/j' in args.plotdirs[0]:
            assert not args.normalize
            ytitle = 'mutation freq'
            args.graphify = True

        if '_gene' in varname and '_vs_' not in varname:
            xtitle = 'allele'
            gStyle.SetNdivisions(0, "x")
            # gStyle.SetLabelSize(0.00010, 'X')
            if hist.GetNbinsX() == 2:
                extrastats = ' 0-bin'  # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
            if 'v_gene' in varname:
                pass
                # log += 'y'
        else:
            gStyle.SetNdivisions(505, "x")
            xtitle = 'bases'

        line_width_override = None
        rebin = args.rebin
        errors = not args.no_errors
        if args.plot_performance:
            if 'hamming_to_true_naive' in varname:
                xtitle = 'hamming distance'
                if '_normed' in varname:
                    xtitle = 'fractional ' + xtitle
            elif '_vs_mute_freq' in varname:
                xtitle = 'mutation freq'
                ytitle = 'fraction correct'
                if varname[0] == 'v' or varname[0] == 'j':
                    translegend = (-0.4, -0.4)
                # errors = True
                rebin = vs_rebin
            else:
                xtitle = 'inferred - true'
            bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(
                varname, None)
        else:
            bounds = plotconfig.default_hard_bounds.setdefault(
                varname.replace('-mean-bins', ''), None)
            if bounds is None and 'insertion' in varname:
                bounds = plotconfig.default_hard_bounds.setdefault(
                    'all_insertions', None)
            if '_gene' in varname and '_vs_' not in varname:
                no_labels = True
                if 'j_' not in varname:
                    cwidth, cheight = 1000, 500
                line_width_override = 1
            elif 'mute-freqs/v' in args.plotdirs[
                    0] or 'mute-freqs/j' in args.plotdirs[0]:
                cwidth, cheight = 1000, 500
                bounds = plotconfig.default_hard_bounds.setdefault(
                    utils.unsanitize_name(varname.replace('-mean-bins', '')),
                    None)

        if 'IGH' in varname:
            if 'mute-freqs' in args.plotdirs[0]:
                gene = utils.unsanitize_name(simplevarname)
                plottitle = gene  # + ' -- mutation frequency'
                xtitle = 'position'
                if utils.get_region(gene) == 'j':
                    translegend = (0.1, 0.)  #(-0.35, -0.02)
                else:
                    translegend = (0.15, -0.02)
                xline = None
                if utils.get_region(
                        gene) == 'v' and args.cyst_positions is not None:
                    xline = args.cyst_positions[gene]['cysteine-position']
                    # normalization_bounds = (int(cyst_positions[gene]['cysteine-position']) - 70, None)
                elif utils.get_region(
                        gene) == 'j' and args.tryp_positions is not None:
                    xline = int(args.tryp_positions[gene])
                    # normalization_bounds = (None, int(tryp_positions[gene]) + 5)
            else:
                ilastdash = simplevarname.rfind('-')
                gene = utils.unsanitize_name(simplevarname[:ilastdash])
                base_varname = simplevarname[ilastdash + 1:]
                base_plottitle = plotconfig.plot_titles[
                    base_varname] if base_varname in plotconfig.plot_titles else ''
                plottitle = gene + ' -- ' + base_plottitle

        # draw that little #$*(!
        linewidths = [
            line_width_override,
        ] if line_width_override is not None else args.linewidths
        assert args.leaves_per_tree is None
        # scale_errors = math.sqrt(args.leaves_per_tree[idir]) if args.leaves_per_tree is not None else args.scale_errors
        draw(all_hists[0],
             var_type,
             plotname=varname,
             plotdir=args.outdir,
             more_hists=all_hists[1:],
             write_csv=False,
             stats=args.stats + ' ' + extrastats,
             bounds=bounds,
             shift_overflows=False,
             errors=errors,
             scale_errors=args.scale_errors,
             rebin=rebin,
             plottitle=plottitle,
             colors=args.colors,
             linestyles=args.linestyles,
             xtitle=xtitle,
             ytitle=ytitle,
             xline=xline,
             draw_str=draw_str,
             normalize=(args.normalize and '_vs_mute_freq' not in varname),
             normalization_bounds=normalization_bounds,
             linewidths=linewidths,
             markersizes=args.markersizes,
             cwidth=cwidth,
             cheight=cheight,
             no_labels=no_labels,
             graphify=args.graphify,
             log=log,
             translegend=translegend)

    if not args.dont_calculate_mean_info:
        # write mean info
        with opener('w')(args.outdir + '/plots/means.csv') as meanfile:
            writer = csv.DictWriter(
                meanfile, ('name', 'means', 'sems', 'normalized-means'))
            writer.writeheader()
            for ivar in range(len(all_means)):
                writer.writerow({
                    'name':
                    all_names[ivar],
                    'means':
                    ':'.join([str(m) for m in all_means[ivar]]),
                    'sems':
                    ':'.join([str(s) for s in all_sems[ivar]]),
                    'normalized-means':
                    ':'.join([str(nm) for nm in all_normalized_means[ivar]])
                })

    check_call(
        ['./bin/permissify-www', args.outdir]
    )  # NOTE this should really permissify starting a few directories higher up
    check_call(['./bin/makeHtml', args.outdir, '3', 'null', 'svg'])
Ejemplo n.º 12
0
def make_mutefreq_plot(plotdir, gene_name, positions, debug=False):
    import plotting
    """ NOTE shares a lot with make_transition_plot() in bin/plot-hmms.py. """
    nuke_colors = {'A': 'red', 'C': 'blue', 'G': 'orange', 'T': 'green'}
    fig, ax = plotting.mpl_init()
    fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)])

    ibin = 0
    if debug:
        print '  %s' % utils.color_gene(utils.unsanitize_name(gene_name))
    legend_colors = set()
    for info in positions:
        posname = info['name']

        # make label below bin for position and germline nuke
        ax.text(-0.5 + ibin,
                -0.075,
                simplify_state_name(posname),
                rotation='vertical',
                size=8)
        ax.text(-0.5 + ibin,
                -0.15,
                info.get('gl_nuke', '?'),
                fontsize=10,
                fontweight='bold')
        sorted_nukes, _ = zip(*sorted(info['nuke_freqs'].items(),
                                      key=operator.itemgetter(1),
                                      reverse=True))
        if 'gl_nuke' in info and info['gl_nuke'] in info[
                'nuke_freqs']:  # put the germline nuke first if we have it (second clause is for states with germline N))
            sorted_nukes = [info['gl_nuke']] + [
                n for n in sorted_nukes if n != info['gl_nuke']
            ]

        total = 0.0
        alpha = 0.6
        for nuke in sorted_nukes:
            prob = info['nuke_freqs'][nuke]
            color = nuke_colors[nuke]

            label_to_use = None
            if color not in legend_colors:
                label_to_use = nuke
                legend_colors.add(color)

            # horizontal line at height total+prob
            ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob],
                    color=color,
                    alpha=alpha,
                    linewidth=3,
                    label=label_to_use)

            # vertical line from total to total + prob
            ax.plot([ibin, ibin], [total + 0.01, total + prob],
                    color=color,
                    alpha=alpha,
                    linewidth=3)

            # # write [ACGT] at midpoint between total and total+prob
            # midpoint = 0.5*(prob + 2*total)
            # ... *redacted*

            total += prob

        ibin += 1

    ax.get_xaxis().set_visible(False)
    plotting.mpl_finish(ax,
                        plotdir,
                        gene_name,
                        ybounds=(-0.01, 1.01),
                        xbounds=(-3, len(positions) + 3),
                        leg_loc=(0.95, 0.1),
                        adjust={
                            'left': 0.1,
                            'right': 0.8
                        },
                        leg_prop={'size': 8})
Ejemplo n.º 13
0
def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
    if varname in plotconfig.gene_usage_columns:
        hlist = plotting.add_bin_labels_not_in_all_hists(hlist)

    no_labels = False
    xline, bounds, figsize = None, None, None
    translegend = (0.0, -0.2)
    extrastats, log = '', ''
    xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle
    if xtitle == '':  # arg, plotting.py thinks default should be None, hist.py thinks it's ''
        xtitle = None
    if '-mean-bins' in varname:
        raise Exception('darn, I was hoping I wasn\'t making these plots any more')
    plottitle = plotconfig.plot_titles[varname] if varname in plotconfig.plot_titles else varname

    ytitle = 'frequency' if args.normalize else 'counts'

    if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues:
        assert not args.normalize
        ytitle = 'mutation freq'

    if varname in plotconfig.gene_usage_columns:
        xtitle = 'allele'
        if hlist[0].n_bins == 2:
            extrastats = ' 0-bin'  # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
    # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2:
    #     xtitle = '???'

    line_width_override = None
    if args.performance_plots:
        if 'hamming_to_true_naive' in varname:
            xtitle = 'hamming distance'
            if '_normed' in varname:
                xtitle = 'fractional ' + xtitle
        elif '_vs_mute_freq' in varname:
            xtitle = 'mutation freq'
            ytitle = 'fraction correct'
            if varname[0] == 'v' or varname[0] == 'j':
                translegend = (-0.4, -0.4)
        elif varname.find('_gene') == 1:
            xtitle = ''
            ytitle = 'fraction correct'
        else:
            xtitle = 'inferred - true'
        bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None)
    else:
        bounds = plotconfig.default_hard_bounds.setdefault(varname, None)
        if bounds is None and 'insertion' in varname:
            bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None)
        if varname in plotconfig.gene_usage_columns:
            no_labels = True
            if 'j_' not in varname:
                figsize = (10, 5)
            line_width_override = 1
        elif 'per-gene-per-position/v' in pathnameclues:
            figsize = (20, 5)
            bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname), None)

    if 'IG' in varname:
        if 'mute-freqs' in pathnameclues:
            gene = utils.unsanitize_name(varname)
            plottitle = gene  # + ' -- mutation frequency'
            xtitle = 'position'
            if utils.get_region(gene) == 'j':
                translegend = (0.1, 0.)  #(-0.35, -0.02)
            else:
                translegend = (0.15, -0.02)
            xline = None
            if args.glfo is not None:
                if utils.get_region(gene) in utils.conserved_codons[args.chain]:
                    xline = args.glfo[utils.conserved_codons[args.chain][utils.get_region(gene)] + '-positions'][gene]
        else:
            ilastdash = varname.rfind('-')
            gene = utils.unsanitize_name(varname[:ilastdash])
            base_varname = varname[ilastdash + 1 :]
            base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else ''
            plottitle = gene + ' -- ' + base_plottitle

    # draw that little #$*(!
    linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths
    alphas = [0.6 for _ in range(len(hlist))]
    plotting.draw_no_root(hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=extrastats, bounds=bounds,
                          shift_overflows=(os.path.basename(outdir) != 'gene-call'), plottitle=plottitle, colors=args.colors,
                          xtitle=xtitle, ytitle=ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname),
                          linewidths=linewidths, alphas=alphas, errors=True,
                          figsize=figsize, no_labels=no_labels, log=log, translegend=translegend)
Ejemplo n.º 14
0
def make_mutefreq_plot(plotdir, gene_name, positions):
    nuke_colors = {
        'A': kRed + 1,
        'C': kBlue - 7,
        'G': kOrange - 3,
        'T': kGreen + 2
    }

    ibin = 0
    drawn_name_texts, lines, vlines, texts = {}, {}, {}, {}
    for info in positions:
        posname = info['name']

        # make label below bin
        drawn_name_texts[posname] = TPaveText(-0.5 + ibin, -0.1, 0.5 + ibin,
                                              -0.05)
        drawn_name_texts[posname].SetBorderSize(0)
        drawn_name_texts[posname].SetFillColor(0)
        drawn_name_texts[posname].SetFillStyle(0)
        drawn_name_texts[posname].AddText(-0.5 + ibin, -0.075,
                                          simplify_state_name(posname))

        total = 0.0
        lines[posname], vlines[posname], texts[posname] = [], [], []
        for nuke, prob in sorted(info['nuke_freqs'].items(),
                                 key=operator.itemgetter(1),
                                 reverse=True):
            # horizontal line at height total+prob
            lines[posname].append(
                TLine(-0.5 + ibin, total + prob, 0.5 + ibin, total + prob))
            lines[posname][-1].SetLineWidth(6)

            # vertical line from total to total+prob
            vlines[posname].append(TLine(ibin, total, ibin, total + prob))
            vlines[posname][-1].SetLineWidth(6)
            vlines[posname][-1].SetLineColor(nuke_colors[nuke])

            # write [ACGT] at midpoint between total and total+prob
            midpoint = 0.5 * (prob + 2 * total)
            texts[posname].append(
                TPaveText(-0.5 + ibin, midpoint - 0.04, 0.5 + ibin,
                          midpoint + 0.01))
            texts[posname][-1].AddText(-0.5 + ibin, midpoint, nuke)
            texts[posname][-1].SetBorderSize(0)
            texts[posname][-1].SetFillColor(0)
            texts[posname][-1].SetFillStyle(0)

            total += prob

        ibin += 1

    cvn = TCanvas('cvn-2', '', 1000, 300)
    n_bins = ibin
    hframe = TH1D(gene_name + '-emission-frame',
                  utils.unsanitize_name(gene_name), n_bins, -0.5, n_bins - 0.5)
    hframe.SetNdivisions(202, 'y')
    hframe.SetNdivisions(0, 'x')
    hframe.Draw()

    for state_name in lines.keys():
        drawn_name_texts[state_name].Draw()
        for itrans in range(len(lines[state_name])):
            # lines[state_name][itrans].Draw()  # hm, maybe don't need the horizontal lines any more
            vlines[state_name][itrans].Draw()
            # texts[state_name][itrans].Draw()  # don't label the bases at the moment, you can tell by the color just fine

    cvn.SaveAs(plotdir + '/plots/' + gene_name + '.png')
Ejemplo n.º 15
0
with opener('r')(infname) as infile:
    germlines = utils.read_germlines('../../../recombinator')
    reader = csv.DictReader(infile)
    for inline in reader:
        print 'searching'
#        inline['seq'] = inline['seq'][-130:]
        searcher = Searcher(inline['seq'], debug=True, n_matches_max=2)
        searcher.search()
        inferred_group_str = ''
        true_group_str = ''
        outline = {}
        outline['seq'] = inline['seq']
        print 'RESULT ',
        for region in utils.regions:
            inferred_name = searcher.get_best_match_name(region)
            outline[region + '_gene'] = utils.unsanitize_name(inferred_name)
            true_name = utils.sanitize_name(inline[region + '_gene'])

            inferred_group_str += inferred_name
            true_group_str += true_name
            if inferred_name == 'none':
                print ' none',
            elif  inferred_name == true_name:
                print '  -  ',
            else:
                print '  x  ',
        for region in utils.regions:
            print '%3d' % searcher.n_tries[region],
        print ''
        print '  true'
        utils.print_reco_event(germlines, inline, -1, -1)
Ejemplo n.º 16
0
def compare_directories(args, xtitle='', use_hard_bounds=''):
    """ 
    Read all the histograms stored as .csv files in <args.plotdirs>, and overlay them on a new plot.
    If there's a <varname> that's missing from any dir, we skip that plot entirely and print a warning message.
    """
    utils.prep_dir(args.outdir + '/plots', multilings=['*.png', '*.svg', '*.csv'])
    if args.leaves_per_tree is not None:
        assert len(args.leaves_per_tree) == len(args.plotdirs)

    # read hists from <args.plotdirs>
    hists = []
    for idir in range(len(args.plotdirs)):
        string_to_ignore = None if args.strings_to_ignore is None else args.strings_to_ignore[idir]
        hists.append(get_hists_from_dir(args.plotdirs[idir] + '/plots', args.names[idir], string_to_ignore=string_to_ignore))

    # then loop over all the <varname>s we found
    all_names, all_means, all_sems, all_normalized_means = [], [], [], []
    for varname, hist in hists[0].iteritems():
        # add the hists
        all_hists = [hist,]
        missing_hist = False
        for idir in range(1, len(args.plotdirs)):
            try:  # add the hist
                all_hists.append(hists[idir][varname])
            except KeyError:  # oops, didn't find it in this dir, so skip this variable entirely
                print args.names[idir], varname
                all_hists.append(TH1D())

        if '_gene' in varname:  # for the gene usage frequencies we need to make sure all the plots have the genes in the same order
            all_hists = add_bin_labels_not_in_all_hists(all_hists)

        if not args.dont_calculate_mean_info:
            meaninfo = get_mean_info(all_hists)
            all_names.append(varname)
            all_means.append(meaninfo['means'])
            all_sems.append(meaninfo['sems'])
            all_normalized_means.append(meaninfo['normalized_means'])
            meaninfo['mean_bin_hist'].write(args.outdir + '/plots/' + varname + '-mean-bins.csv')

        # bullshit complicated config stuff
        var_type = 'int' if hist.GetXaxis().GetBinLabel(1) == '' else 'bool'
        bounds, cwidth, cheight, translegend, no_labels = None, None, None, (0.0, 0.0), False
        extrastats, log = '', ''
        xtitle, ytitle, xline, draw_str, normalization_bounds = hist.GetXaxis().GetTitle(), hist.GetYaxis().GetTitle(), None, None, None
        simplevarname = varname.replace('-mean-bins', '')
        plottitle = plotconfig.plot_titles[simplevarname] if simplevarname in plotconfig.plot_titles else simplevarname

        if args.normalize:
            ytitle = 'frequency'

        if 'mute-freqs/v' in args.plotdirs[0] or 'mute-freqs/d' in args.plotdirs[0] or 'mute-freqs/j' in args.plotdirs[0]:
            assert not args.normalize
            ytitle = 'mutation freq'
            args.graphify = True

        if '_gene' in varname:
            xtitle = 'allele'
            gStyle.SetNdivisions(0,"x")
            # gStyle.SetLabelSize(0.00010, 'X')
            if hist.GetNbinsX() == 2:
                extrastats = ' 0-bin'  # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
            if 'v_gene' in varname:
                pass
                # log += 'y'
        else:
            gStyle.SetNdivisions(505,"x")
            xtitle = 'bases'

        line_width_override = None
        if args.plot_performance:
            if 'hamming_to_true_naive' in varname:
                xtitle = 'hamming distance'
                if '_normed' in varname:
                    xtitle = 'fractional ' + xtitle
            else:
                xtitle = 'inferred - true'
            bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None)
        else:
            bounds = plotconfig.default_hard_bounds.setdefault(varname.replace('-mean-bins', ''), None)
            if '_gene' in varname:
                no_labels = True
                if 'j_' not in varname:
                    cwidth, cheight = 1000, 500
                line_width_override = 1
            elif 'mute-freqs/v' in args.plotdirs[0] or 'mute-freqs/j' in args.plotdirs[0]:
                cwidth, cheight = 1000, 500
                bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname.replace('-mean-bins', '')), None)

        if 'IGH' in varname:
            if 'mute-freqs' in args.plotdirs[0]:
                gene = utils.unsanitize_name(simplevarname)
                plottitle = gene  # + ' -- mutation frequency'
                xtitle = 'position'
                if utils.get_region(gene) == 'j':
                    translegend = (0.1, 0.)  #(-0.35, -0.02)
                else:
                    translegend = (0.15, -0.02)
                xline = None
                if utils.get_region(gene) == 'v' and args.cyst_positions is not None:
                    xline = args.cyst_positions[gene]['cysteine-position']
                    # normalization_bounds = (int(cyst_positions[gene]['cysteine-position']) - 70, None)
                elif utils.get_region(gene) == 'j' and args.tryp_positions is not None:
                    xline = int(args.tryp_positions[gene])
                    # normalization_bounds = (None, int(tryp_positions[gene]) + 5)
            else:
                ilastdash = simplevarname.rfind('-')
                gene = utils.unsanitize_name(simplevarname[:ilastdash])
                base_varname = simplevarname[ilastdash + 1 :]
                base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else ''
                plottitle = gene + ' -- ' + base_plottitle

        # draw that little #$*(!
        linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths
        assert args.leaves_per_tree is None
        # scale_errors = math.sqrt(args.leaves_per_tree[idir]) if args.leaves_per_tree is not None else args.scale_errors
        draw(all_hists[0], var_type, plotname=varname, plotdir=args.outdir, more_hists=all_hists[1:], write_csv=False, stats=args.stats + ' ' + extrastats, bounds=bounds,
             shift_overflows=False, errors=(not args.no_errors), scale_errors=args.scale_errors, rebin=args.rebin, plottitle=plottitle, colors=args.colors, linestyles=args.linestyles,
             xtitle=xtitle, ytitle=ytitle, xline=xline, draw_str=draw_str, normalize=args.normalize, normalization_bounds=normalization_bounds,
             linewidths=linewidths, markersizes=args.markersizes, cwidth=cwidth, cheight=cheight, no_labels=no_labels, graphify=args.graphify, log=log, translegend=translegend)

    if not args.dont_calculate_mean_info:
        # write mean info
        with opener('w')(args.outdir + '/plots/means.csv') as meanfile:
            writer = csv.DictWriter(meanfile, ('name', 'means', 'sems', 'normalized-means'))
            writer.writeheader()
            for ivar in range(len(all_means)):
                writer.writerow({
                    'name':all_names[ivar],
                    'means':':'.join([str(m) for m in all_means[ivar]]),
                    'sems':':'.join([str(s) for s in all_sems[ivar]]),
                    'normalized-means':':'.join([str(nm) for nm in all_normalized_means[ivar]])
                })

    check_call(['./bin/permissify-www', args.outdir])  # NOTE this should really permissify starting a few directories higher up
    check_call(['./bin/makeHtml', args.outdir, '3', 'null', 'svg'])