def create_pathway_index_html(data, fitter, fits, basedir, gene_dir, series_dir, use_correlations, b_unique): dct_pathways = load_17_pathways_breakdown(b_unique) n_ranks = 3 # actually we'll have ranks of 0 to n_ranks flat_fits = {} # (gene,region) -> fit (may be None) for g in data.gene_names: for r in data.region_names: flat_fits[(g, r)] = None for dsfits in fits.itervalues(): for (g, r), fit in dsfits.iteritems(): if use_correlations: score = fit.with_correlations.LOO_score else: score = fit.LOO_score fit.score = score fit.rank = int(np.ceil(n_ranks * score)) if score > 0 else 0 flat_fits[(g, r)] = fit html = get_jinja_env().get_template('pathways.jinja').render(**locals()) str_unique = '-unique' if b_unique else '' filename = 'pathway-fits{}.html'.format(str_unique) with open(join(basedir, filename), 'w') as f: f.write(html)
def save_fits_and_create_html(data, fitter, fits=None, basedir=None, do_genes=True, do_series=True, do_hist=True, do_html=True, only_main_html=False, k_of_n=None, use_correlations=False, correlations=None, show_change_distributions=False, html_kw=None, figure_kw=None): if fits is None: fits = get_all_fits(data,fitter,k_of_n) if basedir is None: basedir = join(results_dir(), fit_results_relative_path(data,fitter)) if use_correlations: basedir = join(basedir,'with-correlations') if html_kw is None: html_kw = {} if figure_kw is None: figure_kw = {} print 'Writing HTML under {}'.format(basedir) ensure_dir(basedir) gene_dir = 'gene-subplot' series_dir = 'gene-region-fits' correlations_dir = 'gene-correlations' scores_dir = 'score_distributions' if do_genes and not only_main_html: # relies on the sharding of the fits respecting gene boundaries plot_and_save_all_genes(data, fitter, fits, join(basedir,gene_dir), show_change_distributions) if do_series and not only_main_html: plot_and_save_all_series(data, fitter, fits, join(basedir,series_dir), use_correlations, show_change_distributions, figure_kw) if do_hist and k_of_n is None and not only_main_html: create_score_distribution_html(fits, use_correlations, join(basedir,scores_dir)) if do_html and k_of_n is None: link_to_correlation_plots = use_correlations and correlations is not None if link_to_correlation_plots and not only_main_html: plot_and_save_all_gene_correlations(data, correlations, join(basedir,correlations_dir)) dct_pathways = load_17_pathways_breakdown() pathway_genes = set.union(*dct_pathways.values()) data_genes = set(data.gene_names) missing = pathway_genes - data_genes b_pathways = len(missing) < len(pathway_genes)/2 # simple heuristic to create pathways only if we have most of the genes (currently 61 genes are missing) create_html( data, fitter, fits, basedir, gene_dir, series_dir, scores_dir, correlations_dir=correlations_dir, use_correlations=use_correlations, link_to_correlation_plots=link_to_correlation_plots, b_pathways=b_pathways, **html_kw )
return fig cfg.verbosity = 1 age_scaler = LogScaler() all_data = GeneData.load('both').scale_ages(age_scaler) pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter) R2_threshold = 0.5 for b_unique in [False, True]: dct_pathways = load_17_pathways_breakdown(b_unique) dct_pathways['17 pathways'] = None for name, genes in dct_pathways.iteritems(): fig = plot_onset_times(all_data, data, fitter, fits, {name: genes}, R2_threshold, b_unique) str_dir = 'unique' if b_unique else 'overlapping' str_unique = ' (unique)' if b_unique else '' filename = 'RP/{}/change-distributions-{}{}.png'.format( str_dir, name, str_unique) save_figure(fig, filename, under_results=True) # selected plots lst_pathways = [ '17 pathways', 'Amphetamine addiction', 'Cholinergic synapse', 'Cocaine addiction', 'Glutamatergic synapse' ]
def save_fits_and_create_html(data, fitter, fits=None, basedir=None, do_genes=True, do_series=True, do_hist=True, do_html=True, only_main_html=False, k_of_n=None, use_correlations=False, correlations=None, show_change_distributions=False, exons_layout=False, html_kw=None, figure_kw=None): if fits is None: fits = get_all_fits(data, fitter, k_of_n) if basedir is None: basedir = join(results_dir(), fit_results_relative_path(data, fitter)) if use_correlations: basedir = join(basedir, 'with-correlations') if html_kw is None: html_kw = {} if figure_kw is None: figure_kw = {} print 'Writing HTML under {}'.format(basedir) ensure_dir(basedir) gene_dir = 'gene-subplot' exons_dir = 'exons_subplot_series' if cfg.exons_plots_from_series else 'exons_subplot' series_dir = 'gene-region-fits' correlations_dir = 'gene-correlations' scores_dir = 'score_distributions' if do_genes and not only_main_html: # relies on the sharding of the fits respecting gene boundaries plot_and_save_all_genes(data, fitter, fits, join(basedir, gene_dir), show_change_distributions) if do_series and not only_main_html: plot_and_save_all_series(data, fitter, fits, join(basedir, series_dir), use_correlations, show_change_distributions, exons_layout, figure_kw) if exons_layout and not only_main_html: if cfg.exons_plots_from_series: plot_and_save_all_exons_from_series(fits, join(basedir, exons_dir), join(basedir, series_dir)) else: plot_and_save_all_exons(data, fitter, fits, join(basedir, exons_dir)) if do_hist and k_of_n is None and not only_main_html: create_score_distribution_html(fits, use_correlations, join(basedir, scores_dir)) if do_html and k_of_n is None: link_to_correlation_plots = use_correlations and correlations is not None if link_to_correlation_plots and not only_main_html: plot_and_save_all_gene_correlations( data, correlations, join(basedir, correlations_dir)) dct_pathways = load_17_pathways_breakdown() pathway_genes = set.union(*dct_pathways.values()) data_genes = set(data.gene_names) missing = pathway_genes - data_genes b_pathways = len(missing) < len( pathway_genes ) / 2 # simple heuristic to create pathways only if we have most of the genes (currently 61 genes are missing) create_html(data, fitter, fits, basedir, gene_dir, exons_dir, series_dir, scores_dir, correlations_dir=correlations_dir, use_correlations=use_correlations, link_to_correlation_plots=link_to_correlation_plots, b_pathways=b_pathways, exons_layout=exons_layout, **html_kw)
def create_pathway_index_html(data, fitter, fits, basedir, gene_dir, series_dir, use_correlations, b_unique): from jinja2 import Template dct_pathways = load_17_pathways_breakdown(b_unique) n_ranks = 3 # actually we'll have ranks of 0 to n_ranks flat_fits = {} # (gene,region) -> fit (may be None) for g in data.gene_names: for r in data.region_names: flat_fits[(g,r)] = None for dsfits in fits.itervalues(): for (g,r),fit in dsfits.iteritems(): if use_correlations: score = fit.with_correlations.LOO_score else: score = fit.LOO_score fit.score = score fit.rank = int(np.ceil(n_ranks * score)) if score > 0 else 0 flat_fits[(g,r)] = fit html = Template(""" <html> <head> <link rel="stylesheet" type="text/css" href="fits.css"> </head> <body> <H1>Fits broken down by pathway {% if b_unique %} (unique genes only) {% endif %} </H1> {% for pathway_name, pathway_genes in dct_pathways.iteritems() %} <P> <H2>{{pathway_name}}</H2> <table> <th> {% for region_name in data.region_names %} <td class="tableHeading"> <b>{{region_name}}</b> </td> {% endfor %} </th> {% for gene_name in pathway_genes %} <tr> <td> <a href="{{gene_dir}}/{{gene_name}}.png"><b>{{gene_name}}</b></a> </td> {% for region_name in data.region_names %} <td> {% if flat_fits[(gene_name,region_name)] %} <a href="{{series_dir}}/fit-{{gene_name}}-{{region_name}}.png"> {% if flat_fits[(gene_name,region_name)].score %} <div class="score rank{{flat_fits[(gene_name,region_name)].rank}}"> {{flat_fits[(gene_name,region_name)].score | round(2)}} </div> {% else %} No Score {% endif %} </a> {% endif %} </td> {% endfor %} </tr> {% endfor %} </table> </P> {% endfor %} {# dct_pathways #} </body> </html> """).render(**locals()) str_unique = '-unique' if b_unique else '' filename = 'pathway-fits{}.html'.format(str_unique) with open(join(basedir,filename), 'w') as f: f.write(html)
return fig cfg.verbosity = 1 age_scaler = LogScaler() all_data = GeneData.load('both').scale_ages(age_scaler) pathway = '17full' data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler) shape = Sigmoid(priors='sigmoid_wide') fitter = Fitter(shape, sigma_prior='normal') fits = get_all_fits(data, fitter) R2_threshold = 0.5 for b_unique in [False,True]: dct_pathways = load_17_pathways_breakdown(b_unique) dct_pathways['17 pathways'] = None for name,genes in dct_pathways.iteritems(): fig = plot_onset_times(all_data, data, fitter, fits, {name:genes}, R2_threshold, b_unique) str_dir = 'unique' if b_unique else 'overlapping' str_unique = ' (unique)' if b_unique else '' filename = 'RP/{}/change-distributions-{}{}.png'.format(str_dir,name,str_unique) save_figure(fig, filename, under_results=True) # selected plots lst_pathways = ['17 pathways', 'Amphetamine addiction', 'Cholinergic synapse', 'Cocaine addiction', 'Glutamatergic synapse'] dct_pathways = {k:dct_pathways[k] for k in lst_pathways} fig = plot_onset_times(all_data, data, fitter, fits, dct_pathways, R2_threshold, b_unique) str_dir = 'unique' if b_unique else 'overlapping' str_unique = ' (unique)' if b_unique else '' filename = 'RP/{}/selected-change-distributions{}.png'.format(str_dir,str_unique)