Esempio n. 1
0
def create_pathway_index_html(data, fitter, fits, basedir, gene_dir,
                              series_dir, use_correlations, b_unique):

    dct_pathways = load_17_pathways_breakdown(b_unique)

    n_ranks = 3  # actually we'll have ranks of 0 to n_ranks
    flat_fits = {}  # (gene,region) -> fit (may be None)
    for g in data.gene_names:
        for r in data.region_names:
            flat_fits[(g, r)] = None
    for dsfits in fits.itervalues():
        for (g, r), fit in dsfits.iteritems():
            if use_correlations:
                score = fit.with_correlations.LOO_score
            else:
                score = fit.LOO_score
            fit.score = score
            fit.rank = int(np.ceil(n_ranks * score)) if score > 0 else 0
            flat_fits[(g, r)] = fit

    html = get_jinja_env().get_template('pathways.jinja').render(**locals())
    str_unique = '-unique' if b_unique else ''
    filename = 'pathway-fits{}.html'.format(str_unique)
    with open(join(basedir, filename), 'w') as f:
        f.write(html)
Esempio n. 2
0
def save_fits_and_create_html(data, fitter, fits=None, basedir=None, 
                              do_genes=True, do_series=True, do_hist=True, do_html=True, only_main_html=False,
                              k_of_n=None, 
                              use_correlations=False, correlations=None,
                              show_change_distributions=False,
                              html_kw=None,
                              figure_kw=None):
    if fits is None:
        fits = get_all_fits(data,fitter,k_of_n)
    if basedir is None:
        basedir = join(results_dir(), fit_results_relative_path(data,fitter))
        if use_correlations:
            basedir = join(basedir,'with-correlations')
    if html_kw is None:
        html_kw = {}
    if figure_kw is None:
        figure_kw = {}
    print 'Writing HTML under {}'.format(basedir)
    ensure_dir(basedir)
    gene_dir = 'gene-subplot'
    series_dir = 'gene-region-fits'
    correlations_dir = 'gene-correlations'
    scores_dir = 'score_distributions'
    if do_genes and not only_main_html: # relies on the sharding of the fits respecting gene boundaries
        plot_and_save_all_genes(data, fitter, fits, join(basedir,gene_dir), show_change_distributions)
    if do_series and not only_main_html:
        plot_and_save_all_series(data, fitter, fits, join(basedir,series_dir), use_correlations, show_change_distributions, figure_kw)
    if do_hist and k_of_n is None and not only_main_html:
        create_score_distribution_html(fits, use_correlations, join(basedir,scores_dir))
    if do_html and k_of_n is None:
        link_to_correlation_plots = use_correlations and correlations is not None
        if link_to_correlation_plots and not only_main_html:
            plot_and_save_all_gene_correlations(data, correlations, join(basedir,correlations_dir))
        dct_pathways = load_17_pathways_breakdown()
        pathway_genes = set.union(*dct_pathways.values())
        data_genes = set(data.gene_names)
        missing = pathway_genes - data_genes
        b_pathways = len(missing) < len(pathway_genes)/2 # simple heuristic to create pathways only if we have most of the genes (currently 61 genes are missing)
        create_html(
            data, fitter, fits, basedir, gene_dir, series_dir, scores_dir, correlations_dir=correlations_dir,
            use_correlations=use_correlations, link_to_correlation_plots=link_to_correlation_plots, 
            b_pathways=b_pathways, **html_kw
        )
Esempio n. 3
0
    return fig


cfg.verbosity = 1
age_scaler = LogScaler()

all_data = GeneData.load('both').scale_ages(age_scaler)
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
shape = Sigmoid(priors='sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter)

R2_threshold = 0.5
for b_unique in [False, True]:
    dct_pathways = load_17_pathways_breakdown(b_unique)
    dct_pathways['17 pathways'] = None
    for name, genes in dct_pathways.iteritems():
        fig = plot_onset_times(all_data, data, fitter, fits, {name: genes},
                               R2_threshold, b_unique)
        str_dir = 'unique' if b_unique else 'overlapping'
        str_unique = ' (unique)' if b_unique else ''
        filename = 'RP/{}/change-distributions-{}{}.png'.format(
            str_dir, name, str_unique)
        save_figure(fig, filename, under_results=True)

    # selected plots
    lst_pathways = [
        '17 pathways', 'Amphetamine addiction', 'Cholinergic synapse',
        'Cocaine addiction', 'Glutamatergic synapse'
    ]
Esempio n. 4
0
def save_fits_and_create_html(data,
                              fitter,
                              fits=None,
                              basedir=None,
                              do_genes=True,
                              do_series=True,
                              do_hist=True,
                              do_html=True,
                              only_main_html=False,
                              k_of_n=None,
                              use_correlations=False,
                              correlations=None,
                              show_change_distributions=False,
                              exons_layout=False,
                              html_kw=None,
                              figure_kw=None):
    if fits is None:
        fits = get_all_fits(data, fitter, k_of_n)
    if basedir is None:
        basedir = join(results_dir(), fit_results_relative_path(data, fitter))
        if use_correlations:
            basedir = join(basedir, 'with-correlations')
    if html_kw is None:
        html_kw = {}
    if figure_kw is None:
        figure_kw = {}
    print 'Writing HTML under {}'.format(basedir)
    ensure_dir(basedir)
    gene_dir = 'gene-subplot'
    exons_dir = 'exons_subplot_series' if cfg.exons_plots_from_series else 'exons_subplot'
    series_dir = 'gene-region-fits'
    correlations_dir = 'gene-correlations'
    scores_dir = 'score_distributions'
    if do_genes and not only_main_html:  # relies on the sharding of the fits respecting gene boundaries
        plot_and_save_all_genes(data, fitter, fits, join(basedir, gene_dir),
                                show_change_distributions)
    if do_series and not only_main_html:
        plot_and_save_all_series(data, fitter, fits, join(basedir, series_dir),
                                 use_correlations, show_change_distributions,
                                 exons_layout, figure_kw)
    if exons_layout and not only_main_html:
        if cfg.exons_plots_from_series:
            plot_and_save_all_exons_from_series(fits, join(basedir, exons_dir),
                                                join(basedir, series_dir))
        else:
            plot_and_save_all_exons(data, fitter, fits,
                                    join(basedir, exons_dir))
    if do_hist and k_of_n is None and not only_main_html:
        create_score_distribution_html(fits, use_correlations,
                                       join(basedir, scores_dir))
    if do_html and k_of_n is None:
        link_to_correlation_plots = use_correlations and correlations is not None
        if link_to_correlation_plots and not only_main_html:
            plot_and_save_all_gene_correlations(
                data, correlations, join(basedir, correlations_dir))
        dct_pathways = load_17_pathways_breakdown()
        pathway_genes = set.union(*dct_pathways.values())
        data_genes = set(data.gene_names)
        missing = pathway_genes - data_genes
        b_pathways = len(missing) < len(
            pathway_genes
        ) / 2  # simple heuristic to create pathways only if we have most of the genes (currently 61 genes are missing)
        create_html(data,
                    fitter,
                    fits,
                    basedir,
                    gene_dir,
                    exons_dir,
                    series_dir,
                    scores_dir,
                    correlations_dir=correlations_dir,
                    use_correlations=use_correlations,
                    link_to_correlation_plots=link_to_correlation_plots,
                    b_pathways=b_pathways,
                    exons_layout=exons_layout,
                    **html_kw)
Esempio n. 5
0
def create_pathway_index_html(data, fitter, fits, basedir, gene_dir, series_dir, use_correlations, b_unique):
    from jinja2 import Template

    dct_pathways = load_17_pathways_breakdown(b_unique)

    n_ranks = 3 # actually we'll have ranks of 0 to n_ranks
    flat_fits = {} # (gene,region) -> fit (may be None)
    for g in data.gene_names:
        for r in data.region_names:
            flat_fits[(g,r)] = None
    for dsfits in fits.itervalues():
        for (g,r),fit in dsfits.iteritems():
            if use_correlations:
                score = fit.with_correlations.LOO_score
            else:
                score = fit.LOO_score
            fit.score = score
            fit.rank = int(np.ceil(n_ranks * score)) if score > 0 else 0
            flat_fits[(g,r)] = fit     
            
    html = Template("""
<html>
<head>
    <link rel="stylesheet" type="text/css" href="fits.css">
</head>
<body>
<H1>Fits broken down by pathway {% if b_unique %} (unique genes only) {% endif %} </H1>
{% for pathway_name, pathway_genes in dct_pathways.iteritems() %}
<P>
<H2>{{pathway_name}}</H2>
<table>
    <th>
        {% for region_name in data.region_names %}
        <td class="tableHeading">
            <b>{{region_name}}</b>
        </td>
        {% endfor %}
    </th>
    {% for gene_name in pathway_genes %}
    <tr>
        <td>
            <a href="{{gene_dir}}/{{gene_name}}.png"><b>{{gene_name}}</b></a>
        </td>
        {% for region_name in data.region_names %}
        <td>
            {% if flat_fits[(gene_name,region_name)] %}
                <a href="{{series_dir}}/fit-{{gene_name}}-{{region_name}}.png">
                {% if flat_fits[(gene_name,region_name)].score %}
                    <div class="score rank{{flat_fits[(gene_name,region_name)].rank}}">
                   {{flat_fits[(gene_name,region_name)].score | round(2)}}
                   </div>
                {% else %}
                   No Score
                {% endif %}
                </a>
            {% endif %}
        </td>
        {% endfor %}
    </tr>
    {% endfor %}
</table>
</P>
{% endfor %} {# dct_pathways #}

</body>
</html>    
""").render(**locals())
    str_unique = '-unique' if b_unique else ''
    filename = 'pathway-fits{}.html'.format(str_unique)
    with open(join(basedir,filename), 'w') as f:
        f.write(html)
Esempio n. 6
0
    return fig

cfg.verbosity = 1
age_scaler = LogScaler()

all_data = GeneData.load('both').scale_ages(age_scaler)
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
shape = Sigmoid(priors='sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter)

R2_threshold = 0.5
for b_unique in [False,True]:
    dct_pathways = load_17_pathways_breakdown(b_unique)
    dct_pathways['17 pathways'] = None
    for name,genes in dct_pathways.iteritems():
        fig = plot_onset_times(all_data, data, fitter, fits, {name:genes}, R2_threshold, b_unique)
        str_dir = 'unique' if b_unique else 'overlapping'
        str_unique = ' (unique)' if b_unique else ''
        filename = 'RP/{}/change-distributions-{}{}.png'.format(str_dir,name,str_unique)
        save_figure(fig, filename, under_results=True)

    # selected plots
    lst_pathways = ['17 pathways', 'Amphetamine addiction', 'Cholinergic synapse', 'Cocaine addiction', 'Glutamatergic synapse']
    dct_pathways = {k:dct_pathways[k] for k in lst_pathways}
    fig = plot_onset_times(all_data, data, fitter, fits, dct_pathways, R2_threshold, b_unique)
    str_dir = 'unique' if b_unique else 'overlapping'
    str_unique = ' (unique)' if b_unique else ''
    filename = 'RP/{}/selected-change-distributions{}.png'.format(str_dir,str_unique)