def make_essentiality_matrix(feature_x_sample, feature_x_fit, n_x_grids=3000, factor=1): """ :param feature_x_sample: DataFrame; :param feature_x_fit: DataFrame; :param n_x_grids: int; :param factor: number; :return: """ common_indices = feature_x_sample.index & feature_x_fit.index if any(common_indices): print_log( 'Making essentiality matrix using {} common features (indices) ...' .format(common_indices.size)) else: print_log('No common features (indices).') gene_x_sample = feature_x_sample.ix[common_indices, :] gene_x_fit = feature_x_fit.ix[common_indices, :] skew_t = ACSkewT_gen() essentiality_matrix = empty(gene_x_sample.shape) for i, (g, (n, df, shape, location, scale)) in enumerate(gene_x_fit.iterrows()): # Skew-t PDF vector = asarray(gene_x_sample.ix[g, :]) x_grids = linspace(vector.min(), vector.max(), n_x_grids) skew_t_pdf = skew_t.pdf(x_grids, df, shape, loc=location, scale=scale) # Reflected Skew-t PDF x_grids_for_reflection = define_x_coordinates_for_reflection( skew_t_pdf, x_grids) skew_t_pdf_reflected = skew_t.pdf(x_grids_for_reflection, df, shape, loc=location, scale=scale) # Essentiality indices essentiality_indices = define_cumulative_area_ratio_function( skew_t_pdf, skew_t_pdf_reflected, x_grids, direction=['+', '-'][shape > 0]) essentiality_matrix[i, :] = [ factor * sign(shape) * essentiality_indices[argmin(abs(x_grids - v))] for v in vector ] return DataFrame(essentiality_matrix, index=gene_x_sample.index, columns=gene_x_sample.columns)
def test_skewt(): skewt = ACSkewT_gen() x = [-2, -1, -0.5, 0, 1, 2] #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10))) #default in R:sn is df=inf pdf_r = np.array([ 2.973416551551523e-90, 3.687562713971017e-24, 2.018401586422970e-07, 3.989422804014327e-01, 4.839414490382867e-01, 1.079819330263761e-01 ]) pdf_st = skewt.pdf(x, 1000000, 10) pass np.allclose(pdf_st, pdf_r, rtol=0, atol=1e-6) np.allclose(pdf_st, pdf_r, rtol=1e-1, atol=0) #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10))) cdf_r = np.array([ 0.000000000000000e+00, 0.000000000000000e+00, 3.729478836866917e-09, 3.172551743055357e-02, 6.826894921370859e-01, 9.544997361036416e-01 ]) cdf_st = skewt.cdf(x, 1000000, 10) np.allclose(cdf_st, cdf_r, rtol=0, atol=1e-6) np.allclose(cdf_st, cdf_r, rtol=1e-1, atol=0) #assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-15)) #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=5))) pdf_r = np.array([ 2.185448836190663e-07, 1.272381597868587e-05, 5.746937644959992e-04, 3.796066898224945e-01, 4.393468708859825e-01, 1.301804021075493e-01 ]) pdf_st = skewt.pdf(x, 5, 10) #args = (df, alpha) assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25)) #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=5))) cdf_r = np.array([ 8.822783669199699e-08, 2.638467463775795e-06, 6.573106017198583e-05, 3.172551743055352e-02, 6.367851708183412e-01, 8.980606093979784e-01 ]) cdf_st = skewt.cdf(x, 5, 10) #args = (df, alpha) assert_(np.allclose(cdf_st, cdf_r, rtol=1e-10, atol=0)) #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=1))) pdf_r = np.array([ 3.941955996757291e-04, 1.568067236862745e-03, 6.136996029432048e-03, 3.183098861837907e-01, 3.167418189469279e-01, 1.269297588738406e-01 ]) pdf_st = skewt.pdf(x, 1, 10) #args = (df, alpha) = (1, 10)) assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25)) #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=1))) cdf_r = np.array([ 7.893671370544414e-04, 1.575817262600422e-03, 3.128720749105560e-03, 3.172551743055351e-02, 5.015758172626005e-01, 7.056221318361879e-01 ]) cdf_st = skewt.cdf(x, 1, 10) #args = (df, alpha) = (1, 10) assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-25))
def test_skewt(): skewt = ACSkewT_gen() x = [-2, -1, -0.5, 0, 1, 2] #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10))) #default in R:sn is df=inf pdf_r = np.array([2.973416551551523e-90, 3.687562713971017e-24, 2.018401586422970e-07, 3.989422804014327e-01, 4.839414490382867e-01, 1.079819330263761e-01]) pdf_st = skewt.pdf(x, 1000000, 10) pass np.allclose(pdf_st, pdf_r, rtol=0, atol=1e-6) np.allclose(pdf_st, pdf_r, rtol=1e-1, atol=0) #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10))) cdf_r = np.array([0.000000000000000e+00, 0.000000000000000e+00, 3.729478836866917e-09, 3.172551743055357e-02, 6.826894921370859e-01, 9.544997361036416e-01]) cdf_st = skewt.cdf(x, 1000000, 10) np.allclose(cdf_st, cdf_r, rtol=0, atol=1e-6) np.allclose(cdf_st, cdf_r, rtol=1e-1, atol=0) #assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-15)) #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=5))) pdf_r = np.array([2.185448836190663e-07, 1.272381597868587e-05, 5.746937644959992e-04, 3.796066898224945e-01, 4.393468708859825e-01, 1.301804021075493e-01]) pdf_st = skewt.pdf(x, 5, 10) #args = (df, alpha) assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25)) #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=5))) cdf_r = np.array([8.822783669199699e-08, 2.638467463775795e-06, 6.573106017198583e-05, 3.172551743055352e-02, 6.367851708183412e-01, 8.980606093979784e-01]) cdf_st = skewt.cdf(x, 5, 10) #args = (df, alpha) assert_(np.allclose(cdf_st, cdf_r, rtol=1e-10, atol=0)) #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=1))) pdf_r = np.array([3.941955996757291e-04, 1.568067236862745e-03, 6.136996029432048e-03, 3.183098861837907e-01, 3.167418189469279e-01, 1.269297588738406e-01]) pdf_st = skewt.pdf(x, 1, 10) #args = (df, alpha) = (1, 10)) assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25)) #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=1))) cdf_r = np.array([7.893671370544414e-04, 1.575817262600422e-03, 3.128720749105560e-03, 3.172551743055351e-02, 5.015758172626005e-01, 7.056221318361879e-01]) cdf_st = skewt.cdf(x, 1, 10) #args = (df, alpha) = (1, 10) assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-25))
def make_essentiality_matrix(feature_x_sample, feature_x_fit, n_grids=3000, function='scaled_fractional_difference', factor=1): """ :param feature_x_sample: DataFrame; (n_features, n_samples) :param feature_x_fit: DataFrame; :param n_grids: int; :param function: str; :param factor: number; :return: DataFrame; (n_features, n_samples) """ print('\tApplying {} to each feature ...'.format(function)) empty_ = empty(feature_x_sample.shape) skew_t = ACSkewT_gen() for i, (f_i, f_v) in enumerate(feature_x_sample.iterrows()): # Build skew-t PDF grids = linspace(f_v.min(), f_v.max(), n_grids) n, df, shape, location, scale = feature_x_fit.ix[i, :] skew_t_pdf = skew_t.pdf(grids, df, shape, loc=location, scale=scale) # Build reflected skew-t PDF skew_t_pdf_r = skew_t.pdf(define_x_coordinates_for_reflection( skew_t_pdf, grids), df, shape, loc=location, scale=scale) # Set up function if function.startswith('scaled_fractional_difference'): function = 'where(f2 < f1, ((f1 - f2) / f1)**{}, 0)'.format(scale) ei = _compute_essentiality_index(skew_t_pdf, skew_t_pdf_r, function, ['+', '-'][shape > 0], grids[1] - grids[0]) ei = normalize_1d(ei, '0-1') empty_[i, :] = ei[[argmin(abs(grids - x)) for x in asarray(f_v)]] * sign(shape) * factor return DataFrame(empty_, index=feature_x_sample.index, columns=feature_x_sample.columns)
def compute_vector_context( vector, n_data=None, location=None, scale=None, degree_of_freedom=None, shape=None, fit_initial_location=None, fit_initial_scale=None, n_grid=int(1e3), degree_of_freedom_for_tail_reduction=1e8, multiply_distance_from_reference_argmax=False, global_location=None, global_scale=None, global_degree_of_freedom=None, global_shape=None, ): is_good = ~check_array_for_bad(vector, raise_for_bad=False) vector_good = vector[is_good] if any( parameter is None for parameter in (n_data, location, scale, degree_of_freedom, shape) ): (n_data, location, scale, degree_of_freedom, shape) = fit_vector_to_skew_t_pdf( vector_good, fit_initial_location=fit_initial_location, fit_initial_scale=fit_initial_scale, ) grid = linspace(vector_good.min(), vector_good.max(), num=n_grid) skew_t_model = ACSkewT_gen() pdf = skew_t_model.pdf(grid, degree_of_freedom, shape, loc=location, scale=scale) shape_pdf_reference = minimum( pdf, skew_t_model.pdf( make_reflecting_grid(grid, grid[pdf.argmax()]), degree_of_freedom_for_tail_reduction, shape, loc=location, scale=scale, ), ) shape_context = compute_pdf_and_pdf_reference_context( grid, pdf, shape_pdf_reference, multiply_distance_from_reference_argmax ) if any( parameter is None for parameter in ( global_location, global_scale, global_degree_of_freedom, global_shape, ) ): location_pdf_reference = None location_context = None context = shape_context else: location_pdf_reference = minimum( pdf, skew_t_model.pdf( grid, global_degree_of_freedom, global_shape, loc=global_location, scale=global_scale, ), ) location_context = compute_pdf_and_pdf_reference_context( grid, pdf, location_pdf_reference, multiply_distance_from_reference_argmax ) context = shape_context + location_context context_like_array = full(vector.size, nan) context_like_array[is_good] = context[ [absolute(grid - value).argmin() for value in vector_good] ] return { "fit": array((n_data, location, scale, degree_of_freedom, shape)), "grid": grid, "pdf": pdf, "shape_pdf_reference": shape_pdf_reference, "shape_context": shape_context, "location_pdf_reference": location_pdf_reference, "location_context": location_context, "context": context, "context_like_array": context_like_array, }
def _plot_essentiality(vector, bars, n, df, shape, location, scale, n_bins, n_x_grids, figure_size, dpi, plot_vertical_extention_factor, plot_fits, pdf_color, pdf_reversed_color, essentiality_index_color, gene_fontsize, labels_fontsize, bars_linewidth, bar0_color, bar1_color, bar2_color, filepath, overwrite, show_plot): """ :param vector: :param bars: :param n: :param df: :param shape: :param location: :param scale: :param n_bins: :param n_x_grids: :param figure_size: :param plot_vertical_extention_factor: :param plot_fits: bool; :param pdf_color: :param pdf_reversed_color: :param essentiality_index_color: :param gene_fontsize: :param labels_fontsize: :param bars_linewidth: :param bar0_color: :param bar1_color: :param bar2_color: :param filepath: :param overwrite: :param show_plot: :return: """ # ================================================================================================================== # Set up # ================================================================================================================== # Initialize a figure figure = plt.figure(figsize=figure_size) # Set figure styles set_style('ticks') despine(offset=9) # Set figure grids n_rows = 10 n_rows_graph = 5 gridspec = GridSpec(n_rows, 1) # Make graph ax ax_graph = plt.subplot(gridspec[:n_rows_graph, :]) # Set bar axes ax_bar0 = plt.subplot(gridspec[n_rows_graph + 1:n_rows_graph + 2, :]) ax_bar1 = plt.subplot(gridspec[n_rows_graph + 2:n_rows_graph + 3, :]) ax_bar2 = plt.subplot(gridspec[n_rows_graph + 3:n_rows_graph + 4, :]) for ax in [ax_bar1, ax_bar0, ax_bar2]: ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['right'].set_visible(False) for t in ax.get_xticklines(): t.set_visible(False) for t in ax.get_xticklabels(): t.set_visible(False) for t in ax.get_yticklines(): t.set_visible(False) for t in ax.get_yticklabels(): t.set_visible(False) # ================================================================================================================== # Plot histogram # ================================================================================================================== distplot(vector, hist=True, bins=n_bins, kde=False, hist_kws={ 'linewidth': 0.92, 'alpha': 0.24, 'color': pdf_color }, ax=ax_graph) # ================================================================================================================== # Plot skew-t fit PDF # ================================================================================================================== # Initialize a skew-t generator skew_t = ACSkewT_gen() # Set up x-grids x_grids = linspace(vector.min(), vector.max(), n_x_grids) # Generate skew-t PDF skew_t_pdf = skew_t.pdf(x_grids, df, shape, loc=location, scale=scale) # Scale skew-t PDF histogram_max = histogram(vector, bins=n_bins)[0].max() scale_factor = histogram_max / skew_t_pdf.max() skew_t_pdf *= scale_factor if plot_fits: # Plot skew-t PDF line_kwargs = {'linestyle': '-', 'linewidth': 2.6} ax_graph.plot(x_grids, skew_t_pdf, color=pdf_color, **line_kwargs) # Extend plot vertically ax_graph.axis([ vector.min(), vector.max(), 0, histogram_max * plot_vertical_extention_factor ]) # ================================================================================================================== # Plot reflected skew-t PDF # ================================================================================================================== # Get the x-grids to get the reflecting PDF x_grids_for_reflection = define_x_coordinates_for_reflection( skew_t_pdf, x_grids) # Generate skew-t PDF over reflected x-grids, and scale skew_t_pdf_reflected = skew_t.pdf( x_grids_for_reflection, df, shape, loc=location, scale=scale) * scale_factor if plot_fits: # Plot over the original x-grids ax_graph.plot(x_grids, skew_t_pdf_reflected, color=pdf_reversed_color, **line_kwargs) # ================================================================================================================== # Plot essentiality indices # ================================================================================================================== essentiality_indices = define_cumulative_area_ratio_function( skew_t_pdf, skew_t_pdf_reflected, x_grids, direction=['+', '-'][shape > 0]) if plot_fits: ax_graph.plot(x_grids, essentiality_indices, color=essentiality_index_color, **line_kwargs) # ================================================================================================================== # Decorate # ================================================================================================================== # Set title figure.text(0.5, 0.96, vector.name, fontsize=gene_fontsize, weight='bold', horizontalalignment='center') if plot_fits: figure.text( 0.5, 0.92, 'N={:.2f} DF={:.2f} Shape={:.2f} Location={:.2f} Scale={:.2f}' .format(n, df, shape, location, scale), fontsize=gene_fontsize * 0.6, weight='bold', horizontalalignment='center') # Set labels label_kwargs = {'weight': 'bold', 'fontsize': labels_fontsize} ax_graph.set_xlabel('RNAi Score', **label_kwargs) ax_graph.set_ylabel('Frequency', **label_kwargs) # Set ticks tick_kwargs = {'size': labels_fontsize * 0.81, 'weight': 'normal'} for t in ax_graph.get_xticklabels(): t.set(**tick_kwargs) for t in ax_graph.get_yticklabels(): t.set(**tick_kwargs) # ================================================================================================================== # Plot bars # ================================================================================================================== bar_kwargs = { 'rotation': 90, 'weight': 'bold', 'fontsize': labels_fontsize * 0.81 } bar_specifications = { 0: { 'vector': bars.iloc[0, :], 'ax': ax_bar0, 'color': bar0_color }, 1: { 'vector': bars.iloc[1, :], 'ax': ax_bar1, 'color': bar1_color }, 2: { 'vector': bars.iloc[2, :], 'ax': ax_bar2, 'color': bar2_color } } for i, spec in bar_specifications.items(): v = spec['vector'] ax = spec['ax'] c = spec['color'] rugplot(v * vector, height=1, color=c, ax=ax, linewidth=bars_linewidth) ax.set_ylabel(v.name[-3:], **bar_kwargs) # ================================================================================================================== # Save # ================================================================================================================== if filepath: save_plot(filepath, dpi=dpi, overwrite=overwrite) if show_plot: plt.show() # TODO: properly close plt.clf() plt.close()
def compute_context( _1d_array, skew_t_model=None, location=None, scale=None, degree_of_freedom=None, shape=None, fit_fixed_location=None, fit_fixed_scale=None, fit_initial_location=None, fit_initial_scale=None, n_grid=1e3, degree_of_freedom_for_tail_reduction=1e8, multiply_distance_from_location=False, global_location=None, global_scale=None, global_degree_of_freedom=None, global_shape=None, ): is_bad_value = check_nd_array_for_bad_value( _1d_array, raise_for_bad_value=False, ) _1d_array_good = _1d_array[~is_bad_value] if skew_t_model is None: skew_t_model = ACSkewT_gen() if any(parameter is None for parameter in ( location, scale, degree_of_freedom, shape, )): n, location, scale, degree_of_freedom, shape = fit_skew_t_pdf( _1d_array_good, skew_t_model=skew_t_model, fit_fixed_location=fit_fixed_location, fit_fixed_scale=fit_fixed_scale, fit_initial_location=fit_initial_location, fit_initial_scale=fit_initial_scale, ) else: n = _1d_array_good.size grid = linspace( _1d_array_good.min(), _1d_array_good.max(), n_grid, ) pdf = skew_t_model.pdf( grid, degree_of_freedom, shape, loc=location, scale=scale, ) shape_pdf_reference = minimum( pdf, skew_t_model.pdf( get_coordinates_for_reflection(grid, pdf), degree_of_freedom_for_tail_reduction, shape, loc=location, scale=scale, ), ) shape_pdf_reference[shape_pdf_reference < EPS] = EPS shape_kl = pdf * log(pdf / shape_pdf_reference) shape_kl_darea = shape_kl / shape_kl.sum() shape_pdf_reference_argmax = shape_pdf_reference.argmax() shape_context_indices = concatenate(( -cumsum(shape_kl_darea[:shape_pdf_reference_argmax][::-1])[::-1], cumsum(shape_kl_darea[shape_pdf_reference_argmax:]), )) if multiply_distance_from_location: shape_context_indices *= absolute(grid - grid[shape_pdf_reference_argmax]) shape_context_indices *= (1 + absolute(shape)) / ( scale * log(1 + degree_of_freedom)) if all(parameter is not None for parameter in ( global_location, global_scale, global_degree_of_freedom, global_shape, )): location_pdf_reference = minimum( pdf, skew_t_model.pdf( grid, global_degree_of_freedom, global_shape, loc=global_location, scale=global_scale, ), ) location_pdf_reference[location_pdf_reference < EPS] = EPS location_kl = pdf * log(pdf / location_pdf_reference) location_kl_darea = location_kl / location_kl.sum() location_pdf_reference_argmax = location_pdf_reference.argmax() location_context_indices = concatenate(( -cumsum( location_kl_darea[:location_pdf_reference_argmax][::-1])[::-1], cumsum(location_kl_darea[location_pdf_reference_argmax:]), )) location_context_indices *= absolute( grid - grid[location_pdf_reference_argmax]) location_context_indices /= scale + global_scale context_indices = location_context_indices + shape_context_indices else: location_pdf_reference = None location_context_indices = None context_indices = shape_context_indices context_indices_like_array = full( _1d_array.size, nan, ) context_indices_like_array[~is_bad_value] = context_indices[[ absolute(grid - value).argmin() for value in _1d_array_good ]] return { 'fit': asarray(( n, location, scale, degree_of_freedom, shape, )), 'grid': grid, 'pdf': pdf, 'shape_pdf_reference': shape_pdf_reference, 'shape_context_indices': shape_context_indices, 'location_pdf_reference': location_pdf_reference, 'location_context_indices': location_context_indices, 'context_indices': context_indices, 'context_indices_like_array': context_indices_like_array, }
def compute_context( _1d_array, n_data=None, location=None, scale=None, degree_of_freedom=None, shape=None, fit_fixed_location=None, fit_fixed_scale=None, fit_initial_location=None, fit_initial_scale=None, n_grid=1e3, degree_of_freedom_for_tail_reduction=1e8, minimum_kl=1e-2, scale_with_kl=True, multiply_distance_from_reference_argmax=False, global_location=None, global_scale=None, global_degree_of_freedom=None, global_shape=None, ): is_bad = check_nd_array_for_bad(_1d_array, raise_for_bad=False) _1d_array_good = _1d_array[~is_bad] if any( parameter is None for parameter in (n_data, location, scale, degree_of_freedom, shape) ): n_data, location, scale, degree_of_freedom, shape = fit_skew_t_pdf( _1d_array_good, fit_fixed_location=fit_fixed_location, fit_fixed_scale=fit_fixed_scale, fit_initial_location=fit_initial_location, fit_initial_scale=fit_initial_scale, ) grid = linspace(_1d_array_good.min(), _1d_array_good.max(), n_grid) skew_t_model = ACSkewT_gen() pdf = skew_t_model.pdf(grid, degree_of_freedom, shape, loc=location, scale=scale) shape_pdf_reference = minimum( pdf, skew_t_model.pdf( make_coordinates_for_reflection(grid, grid[pdf.argmax()]), degree_of_freedom_for_tail_reduction, shape, loc=location, scale=scale, ), ) shape_context_indices = _compute_context_indices( grid, pdf, shape_pdf_reference, minimum_kl, scale_with_kl, multiply_distance_from_reference_argmax, ) if any( parameter is None for parameter in ( global_location, global_scale, global_degree_of_freedom, global_shape, ) ): location_pdf_reference = None location_context_indices = None context_indices = shape_context_indices else: location_pdf_reference = minimum( pdf, skew_t_model.pdf( grid, global_degree_of_freedom, global_shape, loc=global_location, scale=global_scale, ), ) location_context_indices = _compute_context_indices( grid, pdf, location_pdf_reference, minimum_kl, scale_with_kl, multiply_distance_from_reference_argmax, ) context_indices = shape_context_indices + location_context_indices context_indices_like_array = full(_1d_array.size, nan) context_indices_like_array[~is_bad] = context_indices[ [absolute(grid - value).argmin() for value in _1d_array_good] ] return { "fit": asarray((n_data, location, scale, degree_of_freedom, shape)), "grid": grid, "pdf": pdf, "shape_pdf_reference": shape_pdf_reference, "shape_context_indices": shape_context_indices, "location_pdf_reference": location_pdf_reference, "location_context_indices": location_context_indices, "context_indices": context_indices, "context_indices_like_array": context_indices_like_array, }
def plot_essentiality(feature_x_sample, feature_x_fit, bar_df, directory_path, features=(), enumerate_functions=False, figure_size=FIGURE_SIZE, n_x_grids=3000, n_bins=50, plot_fits=True, show_plot=True, dpi=DPI): """ Make essentiality plot for each gene. :param feature_x_sample: DataFrame or str; (n_features, n_samples) or a filepath to a file :param feature_x_fit: DataFrame or str; (n_features, 5 (n, df, shape, location, scale)) or a filepath to a file :param bar_df: dataframe; :param directory_path: str; directory_path/essentiality_plots/feature<id>.png will be saved :param features: iterable; (n_selected_features) :param enumerate_functions: bool; :param figure_size: tuple; figure size :param n_x_grids: int; number of x grids :param n_bins: int; number of histogram bins :param plot_fits: bool; plot fitted lines or not :param show_plot: bool; show plot or not :param dpi: int; dots per inch :return: None """ # ========================================================================== # Select features to plot # ========================================================================== if len(features): # Plot only specified features is_ = [f for f in features if f in feature_x_sample.index] if len(is_): print('Plotting features: {} ...'.format(', '.join(is_))) feature_x_sample = feature_x_sample.ix[is_, :] else: raise ValueError('Specified features not found.') else: # Plot all features print('Plotting all features ...') # ========================================================================== # Plot each feature # ========================================================================== for i, (f_i, f_v) in enumerate(feature_x_sample.iterrows()): print('Plotting {} (@{}/{}) ...'.format(f_i, i, feature_x_sample.shape[0])) # ====================================================================== # Set up figure # ====================================================================== # Initialize a figure fig = figure(figsize=figure_size) # Set figure grids n_rows = 10 n_rows_graph = 5 gridspec = GridSpec(n_rows, 1) # Make graph ax ax_graph = subplot(gridspec[:n_rows_graph, :]) # Set bar axes ax_bar0 = subplot(gridspec[n_rows_graph + 1:n_rows_graph + 2, :]) ax_bar1 = subplot(gridspec[n_rows_graph + 2:n_rows_graph + 3, :]) ax_bar2 = subplot(gridspec[n_rows_graph + 3:n_rows_graph + 4, :]) for ax in (ax_bar1, ax_bar0, ax_bar2): ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['right'].set_visible(False) for t in ax.get_xticklines(): t.set_visible(False) for t in ax.get_xticklabels(): t.set_visible(False) for t in ax.get_yticklines(): t.set_visible(False) for t in ax.get_yticklabels(): t.set_visible(False) # ====================================================================== # Plot histogram # ====================================================================== distplot(f_v, bins=n_bins, kde=False, norm_hist=True, hist_kws=dict(linewidth=0.92, color='#20d9ba', alpha=0.26), ax=ax_graph) # ============================================================== # Decorate # ============================================================== decorate(ax=ax_graph, style='white', title=f_i, xlabel='RNAi Score', ylabel='Frequency') # ================================================================== # Plot skew-t fit PDF # ================================================================== # Initialize a skew-t generator skew_t = ACSkewT_gen() # Set up grids grids = linspace(f_v.min(), f_v.max(), n_x_grids) # Parse fitted parameters n, df, shape, location, scale = feature_x_fit.ix[ f_i, ['N', 'DF', 'Shape', 'Location', 'Scale']] fig.text(0.5, 0.9, 'N={:.0f} DF={:.2f} Shape={:.2f} Location={:.2f} ' 'Scale={:.2f}'.format(n, df, shape, location, scale), size=16, weight='bold', color='#220530', horizontalalignment='center') # Generate skew-t PDF skew_t_pdf = skew_t.pdf(grids, df, shape, loc=location, scale=scale) # Plot skew-t PDF line_kwargs = dict(linestyle='-', linewidth=2.6) ax_graph.plot(grids, skew_t_pdf, color='#20d9ba', **line_kwargs) # ================================================================== # Plot reflected skew-t PDF # ================================================================== # Generate skew-t PDF over reflected grids skew_t_pdf_r = skew_t.pdf(define_x_coordinates_for_reflection( skew_t_pdf, grids), df, shape, loc=location, scale=scale) # Plot over the original grids ax_graph.plot(grids, skew_t_pdf_r, color='#4e41d9', **line_kwargs) # ================================================================== # Plot essentiality indices from various functions # ================================================================== figure_size_ = (asarray(figure_size) * 0.7).astype(int) if enumerate_functions: functions = [ # f1 /f2 # Explode 'f1 / f2', # Signal at center 'log(f1 / f2)', # Explode 'where(f2 < f1, f1 / f2, 0)', # Not that good during entropy test 'where(f2 < f1, log(f1 / # f2), 0)', # - f2 /f1 # Signal at center '-(f2 / f1)', # Signal at center '-log(f2 / f1)', # Spikes to 0 after center 'where(f2 < f1, -(f2 / f1), 0)', # == log(f1/ f2) 'where(f2 < f1, -log(f2 / f1), 0)', # carea1 / carea2 # Explode 'carea1 / carea2', # Not that good during entropy test 'log(carea1 / carea2)', # Explode 'where(f2 < f1, carea1 / carea2, 0)', # 0ing abruptly drops 'where(f2 < f1, log(carea1 / carea2), 0)', # (f1 - f2) / f1 # Better during only f2 < f1 '(f1 - f2) / f1', # Normalized same as not logging and raising to a power'log( # (f1 - f2) / f1 )', 'where(f2 < f1, (f1 - f2) / f1, 0)', # Spikes to 0 after center 'where(f2 < f1, log( (f1 - f2) / # f1 ), 0)', # ((f1 - f2) / f1)^scale # Super negative '((f1 - f2) / f1)**{}'.format(scale), 'where(f2 < f1log, ((f1 - f2) / f1)**{}, 0)'.format(scale), # log # Same as just log 'where(f2 < f1, log( ((f1 - f2) / f1)**{} # ), 0 )'.format(scale), # Hard to interpret # ((f1 - f2) / f1)^(1/scale) # log(-)=nan after center '((f1 - f2) / f1)**(1/{})'.format( # scale), # Widens wide 'where(f2 < f1, ((f1 - f2) / f1)**(1/{}), # 0)'.format(scale), # Hard to interpret # ((f1 - f2) / f1)^std(ei) # log(-)=nan after center '((f1 - f2) / f1)**(((f1 - f2) / # f1).std())', # Hard to interpret 'where(f2 < f1, ((f1 - f2) / f1)**(((f1 - # f2) / f1).std()), 0) ', # Spikes to 0 after center 'where(f2 < f1, log( ((f1 - f2) / # f1)**(((f1 - f2) / f1).std()) ), 0) ', # Hard to interpret # ((f1 - f2) / f1)^(1/std(ei)) # log(-)=nan after center '((f1 - f2) / f1)**(1/((f1 - f2) / # f1).std())', # Hard to interpret (best during entropy test) 'where(f2 < # f1, ((f1 - f2) / f1)**(1/((f1 - f2) / f1).std()), 0) ', # Same as just log 'where(f2 < f1, log( ((f1 - f2) / f1)**( # 1/((f1 - f2) / f1).std()) ), 0) ', ] eis = [] # Plot each function for j, f in enumerate(functions): figure(figsize=figure_size_) # Compute essentiality index ei = _compute_essentiality_index(skew_t_pdf, skew_t_pdf_r, f, ['+', '-'][shape > 0], grids[1] - grids[0]) c = CMAP_CATEGORICAL(j / len(functions)) eis.append((ei, c)) plot(grids, ei, color=c, **line_kwargs) decorate(title=f) # Plot all functions figure(figsize=figure_size_) distplot(f_v, bins=n_bins, kde=False, norm_hist=True, hist_kws=dict(linewidth=0.92, color='#070707', alpha=0.26)) for ei_, c in eis: plot(grids, (ei_ - ei_.min()) / (ei_.max() - ei_.min()) * skew_t_pdf.max(), color=c, linewidth=line_kwargs['linewidth']) decorate(title=f_i) # ================================================================== # Plot essentiality index (#fc154f) # ================================================================== ei = _compute_essentiality_index( skew_t_pdf, skew_t_pdf_r, 'where(f2 < f1, ((f1 - f2) / f1)**{}, 0)'.format(scale), ['+', '-'][shape > 0], grids[1] - grids[0]) ax_graph.plot(grids, (ei - ei.min()) / (ei.max() - ei.min()) * skew_t_pdf.max(), color='#fc154f', **line_kwargs) # ================================================================== # Plot bars # ================================================================== a_m_d = _get_amp_mut_del(bar_df, f_i) bar_specifications = [ dict(vector=a_m_d.iloc[0, :], ax=ax_bar0, color='#9017e6'), dict(vector=a_m_d.iloc[1, :], ax=ax_bar1, color='#6410a0'), dict(vector=a_m_d.iloc[2, :], ax=ax_bar2, color='#470b72'), ] for spec in bar_specifications: v = spec['vector'] ax = spec['ax'] c = spec['color'] rugplot(v * f_v, height=1, color=c, linewidth=2.4, ax=ax) decorate(ax=ax, ylabel=v.name[-3:]) # ================================================================== # Save # ================================================================== save_plot(join(directory_path, 'essentiality_plots/{}.png'.format(f_i)), dpi=dpi) if show_plot: show() close()