Example #1
0
def make_essentiality_matrix(feature_x_sample,
                             feature_x_fit,
                             n_x_grids=3000,
                             factor=1):
    """

    :param feature_x_sample: DataFrame;
    :param feature_x_fit: DataFrame;
    :param n_x_grids: int;
    :param factor: number;
    :return:
    """

    common_indices = feature_x_sample.index & feature_x_fit.index
    if any(common_indices):
        print_log(
            'Making essentiality matrix using {} common features (indices) ...'
            .format(common_indices.size))
    else:
        print_log('No common features (indices).')

    gene_x_sample = feature_x_sample.ix[common_indices, :]
    gene_x_fit = feature_x_fit.ix[common_indices, :]

    skew_t = ACSkewT_gen()
    essentiality_matrix = empty(gene_x_sample.shape)
    for i, (g, (n, df, shape, location,
                scale)) in enumerate(gene_x_fit.iterrows()):
        # Skew-t PDF
        vector = asarray(gene_x_sample.ix[g, :])
        x_grids = linspace(vector.min(), vector.max(), n_x_grids)
        skew_t_pdf = skew_t.pdf(x_grids, df, shape, loc=location, scale=scale)

        # Reflected Skew-t PDF
        x_grids_for_reflection = define_x_coordinates_for_reflection(
            skew_t_pdf, x_grids)
        skew_t_pdf_reflected = skew_t.pdf(x_grids_for_reflection,
                                          df,
                                          shape,
                                          loc=location,
                                          scale=scale)

        # Essentiality indices
        essentiality_indices = define_cumulative_area_ratio_function(
            skew_t_pdf,
            skew_t_pdf_reflected,
            x_grids,
            direction=['+', '-'][shape > 0])

        essentiality_matrix[i, :] = [
            factor * sign(shape) *
            essentiality_indices[argmin(abs(x_grids - v))] for v in vector
        ]

    return DataFrame(essentiality_matrix,
                     index=gene_x_sample.index,
                     columns=gene_x_sample.columns)
Example #2
0
def example_T():
    skewt = ACSkewT_gen()
    rvs = skewt.rvs(10, 0, size=500)
    print('sample mean var: ', rvs.mean(), rvs.var())
    print('theoretical mean var', skewt.stats(10, 0))
    print('t mean var', stats.t.stats(10))
    print(skewt.stats(10, 1000))  # -> folded t distribution, as alpha -> inf
    rvs = np.abs(stats.t.rvs(10, size=1000))
    print(rvs.mean(), rvs.var())
Example #3
0
def example_T():
    skewt = ACSkewT_gen()
    rvs = skewt.rvs(10,0,size=500)
    print 'sample mean var: ', rvs.mean(), rvs.var()
    print 'theoretical mean var', skewt.stats(10,0)
    print 't mean var', stats.t.stats(10)
    print skewt.stats(10,1000) # -> folded t distribution, as alpha -> inf
    rvs = np.abs(stats.t.rvs(10,size=1000))
    print rvs.mean(), rvs.var()
Example #4
0
def fit_skew_t_pdf(
    _1d_array,
    skew_t_model=None,
    fit_fixed_location=None,
    fit_fixed_scale=None,
    fit_initial_location=None,
    fit_initial_scale=None,
):

    if skew_t_model is None:

        skew_t_model = ACSkewT_gen()

    kwargs = {}

    if fit_fixed_location is not None:

        kwargs['floc'] = fit_fixed_location

    if fit_fixed_scale is not None:

        kwargs['fscale'] = fit_fixed_scale

    if fit_initial_location is not None:

        kwargs['loc'] = fit_initial_location

    else:

        kwargs['loc'] = _1d_array.mean()

    if fit_initial_scale is not None:

        kwargs['scale'] = fit_initial_scale

    else:

        kwargs['scale'] = _1d_array.std()

    degree_of_freedom, shape, location, scale = skew_t_model.fit(
        _1d_array,
        **kwargs,
    )

    if 32 < abs(shape):

        warn('Refitting with the median to be the fixed location ...')

        degree_of_freedom, shape, location, scale = skew_t_model.fit(
            _1d_array,
            floc=median(_1d_array),
        )

    return _1d_array.size, location, scale, degree_of_freedom, shape
Example #5
0
def make_essentiality_matrix(feature_x_sample,
                             feature_x_fit,
                             n_grids=3000,
                             function='scaled_fractional_difference',
                             factor=1):
    """

    :param feature_x_sample: DataFrame; (n_features, n_samples)
    :param feature_x_fit: DataFrame;
    :param n_grids: int;
    :param function: str;
    :param factor: number;
    :return: DataFrame; (n_features, n_samples)
    """

    print('\tApplying {} to each feature ...'.format(function))

    empty_ = empty(feature_x_sample.shape)

    skew_t = ACSkewT_gen()

    for i, (f_i, f_v) in enumerate(feature_x_sample.iterrows()):

        # Build skew-t PDF
        grids = linspace(f_v.min(), f_v.max(), n_grids)
        n, df, shape, location, scale = feature_x_fit.ix[i, :]
        skew_t_pdf = skew_t.pdf(grids, df, shape, loc=location, scale=scale)

        # Build reflected skew-t PDF
        skew_t_pdf_r = skew_t.pdf(define_x_coordinates_for_reflection(
            skew_t_pdf, grids),
                                  df,
                                  shape,
                                  loc=location,
                                  scale=scale)

        # Set up function
        if function.startswith('scaled_fractional_difference'):
            function = 'where(f2 < f1, ((f1 - f2) / f1)**{}, 0)'.format(scale)

        ei = _compute_essentiality_index(skew_t_pdf, skew_t_pdf_r, function,
                                         ['+',
                                          '-'][shape > 0], grids[1] - grids[0])

        ei = normalize_1d(ei, '0-1')

        empty_[i, :] = ei[[argmin(abs(grids - x))
                           for x in asarray(f_v)]] * sign(shape) * factor

    return DataFrame(empty_,
                     index=feature_x_sample.index,
                     columns=feature_x_sample.columns)
Example #6
0
def _fit_essentiality(f_x_s):
    f_x_f = DataFrame(index=f_x_s.index,
                      columns=['N', 'DF', 'Shape', 'Location', 'Scale'])

    for i, (f_i, f_v) in enumerate(f_x_s.iterrows()):
        print('Fitting {} (@{}/{}) ...'.format(f_i, i, f_x_s.shape[0]))

        # Fit skew-t PDF and save
        skew_t = ACSkewT_gen()
        f_v.dropna(inplace=True)
        df, shape, location, scale = skew_t.fit(f_v)
        f_x_f.ix[f_i, :] = f_v.size, df, shape, location, scale

    return f_x_f
Example #7
0
def test_skewt():
    skewt = ACSkewT_gen()
    x = [-2, -1, -0.5, 0, 1, 2]
    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10)))
    #default in R:sn is df=inf
    pdf_r = np.array([
        2.973416551551523e-90, 3.687562713971017e-24, 2.018401586422970e-07,
        3.989422804014327e-01, 4.839414490382867e-01, 1.079819330263761e-01
    ])
    pdf_st = skewt.pdf(x, 1000000, 10)
    pass
    np.allclose(pdf_st, pdf_r, rtol=0, atol=1e-6)
    np.allclose(pdf_st, pdf_r, rtol=1e-1, atol=0)

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10)))
    cdf_r = np.array([
        0.000000000000000e+00, 0.000000000000000e+00, 3.729478836866917e-09,
        3.172551743055357e-02, 6.826894921370859e-01, 9.544997361036416e-01
    ])
    cdf_st = skewt.cdf(x, 1000000, 10)
    np.allclose(cdf_st, cdf_r, rtol=0, atol=1e-6)
    np.allclose(cdf_st, cdf_r, rtol=1e-1, atol=0)
    #assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-15))

    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
    pdf_r = np.array([
        2.185448836190663e-07, 1.272381597868587e-05, 5.746937644959992e-04,
        3.796066898224945e-01, 4.393468708859825e-01, 1.301804021075493e-01
    ])
    pdf_st = skewt.pdf(x, 5, 10)  #args = (df, alpha)
    assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
    cdf_r = np.array([
        8.822783669199699e-08, 2.638467463775795e-06, 6.573106017198583e-05,
        3.172551743055352e-02, 6.367851708183412e-01, 8.980606093979784e-01
    ])
    cdf_st = skewt.cdf(x, 5, 10)  #args = (df, alpha)
    assert_(np.allclose(cdf_st, cdf_r, rtol=1e-10, atol=0))

    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
    pdf_r = np.array([
        3.941955996757291e-04, 1.568067236862745e-03, 6.136996029432048e-03,
        3.183098861837907e-01, 3.167418189469279e-01, 1.269297588738406e-01
    ])
    pdf_st = skewt.pdf(x, 1, 10)  #args = (df, alpha) = (1, 10))
    assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
    cdf_r = np.array([
        7.893671370544414e-04, 1.575817262600422e-03, 3.128720749105560e-03,
        3.172551743055351e-02, 5.015758172626005e-01, 7.056221318361879e-01
    ])
    cdf_st = skewt.cdf(x, 1, 10)  #args = (df, alpha) = (1, 10)
    assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-25))
Example #8
0
def fit_skew_t_pdf(_1d_array,
                   fit_initial_location=None,
                   fit_initial_scale=None):

    _1d_array = _1d_array[
        ~check_nd_array_for_bad(_1d_array, raise_for_bad=False)]

    keyword_arguments = {}

    mean = _1d_array.mean()

    if abs(mean) <= ALMOST_ZERO:

        mean = 0

    keyword_arguments["loc"] = mean

    keyword_arguments["scale"] = _1d_array.std() / 2

    skew_t_model = ACSkewT_gen()

    degree_of_freedom, shape, location, scale = skew_t_model.fit(
        _1d_array, **keyword_arguments)

    if 24 < abs(shape):

        warn("Refitting with fixed scale ...")

        keyword_arguments["fscale"] = keyword_arguments["scale"]

        degree_of_freedom, shape, location, scale = skew_t_model.fit(
            _1d_array, **keyword_arguments)

        if 24 < abs(shape):

            warn("Refitting with fixed location ...")

            keyword_arguments["floc"] = keyword_arguments["loc"]

            degree_of_freedom, shape, location, scale = skew_t_model.fit(
                _1d_array, **keyword_arguments)

    return _1d_array.size, location, scale, degree_of_freedom, shape
Example #9
0
def _fit_skew_t_pdfs(df):

    skew_t_model = ACSkewT_gen()

    skew_t_pdf_fit_parameter = full(
        (
            df.shape[0],
            5,
        ),
        nan,
    )

    n = df.shape[0]

    n_per_print = max(
        1,
        n // 10,
    )

    for i, (
            index,
            series,
    ) in enumerate(df.iterrows()):

        if i % n_per_print == 0:

            print('({}/{}) {} ...'.format(
                i + 1,
                n,
                index,
            ))

        _1d_array = series.values

        skew_t_pdf_fit_parameter[i] = fit_skew_t_pdf(
            _1d_array[~check_nd_array_for_bad_value(
                _1d_array,
                raise_for_bad_value=False,
            )],
            skew_t_model=skew_t_model,
        )

    return DataFrame(
        skew_t_pdf_fit_parameter,
        index=df.index,
        columns=(
            'N',
            'Location',
            'Scale',
            'Degree of Freedom',
            'Shape',
        ),
    )
Example #10
0
def _fit_essentiality(args):
    feature_x_sample, plot, directory_path, bar_df, n_xgrids, overwrite, show_plot = args
    feature_x_fit = DataFrame(
        index=feature_x_sample.index,
        columns=['N', 'DF', 'Shape', 'Location', 'Scale'])

    # TODO: paralellize
    for i, (f_i, f_v) in enumerate(feature_x_sample.iterrows()):
        print_log('Fitting {} (@{}) ...'.format(f_i, i))

        # Fit skew-t PDF on this gene
        f_v.dropna(inplace=True)
        skew_t = ACSkewT_gen()
        n = f_v.size
        df, shape, location, scale = skew_t.fit(f_v)
        feature_x_fit.ix[f_i, :] = n, df, shape, location, scale

        # Plot
        if plot:

            # Make an output filepath
            if directory_path:
                filepath = join(directory_path, 'essentiality_plots',
                                '{}.png'.format(f_i))
            else:
                filepath = None

            _plot_essentiality(feature_x_sample.ix[f_i, :],
                               get_amp_mut_del(bar_df, f_i),
                               n=n,
                               df=df,
                               shape=shape,
                               location=location,
                               scale=scale,
                               n_x_grids=n_xgrids,
                               filepath=filepath,
                               overwrite=overwrite,
                               show_plot=show_plot)
    return feature_x_fit
Example #11
0
def test_skewt():
    skewt = ACSkewT_gen()
    x = [-2, -1, -0.5, 0, 1, 2]
    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10)))
    #default in R:sn is df=inf
    pdf_r = np.array([2.973416551551523e-90, 3.687562713971017e-24,
                      2.018401586422970e-07, 3.989422804014327e-01,
                      4.839414490382867e-01, 1.079819330263761e-01])
    pdf_st = skewt.pdf(x, 1000000, 10)
    pass
    np.allclose(pdf_st, pdf_r, rtol=0, atol=1e-6)
    np.allclose(pdf_st, pdf_r, rtol=1e-1, atol=0)


    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10)))
    cdf_r = np.array([0.000000000000000e+00, 0.000000000000000e+00,
                      3.729478836866917e-09, 3.172551743055357e-02,
                      6.826894921370859e-01, 9.544997361036416e-01])
    cdf_st = skewt.cdf(x, 1000000, 10)
    np.allclose(cdf_st, cdf_r, rtol=0, atol=1e-6)
    np.allclose(cdf_st, cdf_r, rtol=1e-1, atol=0)
    #assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-15))


    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
    pdf_r = np.array([2.185448836190663e-07, 1.272381597868587e-05,
                      5.746937644959992e-04, 3.796066898224945e-01,
                      4.393468708859825e-01, 1.301804021075493e-01])
    pdf_st = skewt.pdf(x, 5, 10)  #args = (df, alpha)
    assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
    cdf_r = np.array([8.822783669199699e-08, 2.638467463775795e-06,
                      6.573106017198583e-05, 3.172551743055352e-02,
                      6.367851708183412e-01, 8.980606093979784e-01])
    cdf_st = skewt.cdf(x, 5, 10)  #args = (df, alpha)
    assert_(np.allclose(cdf_st, cdf_r, rtol=1e-10, atol=0))


    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
    pdf_r = np.array([3.941955996757291e-04, 1.568067236862745e-03,
                      6.136996029432048e-03, 3.183098861837907e-01,
                      3.167418189469279e-01, 1.269297588738406e-01])
    pdf_st = skewt.pdf(x, 1, 10)  #args = (df, alpha) = (1, 10))
    assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
    cdf_r = np.array([7.893671370544414e-04, 1.575817262600422e-03,
                      3.128720749105560e-03, 3.172551743055351e-02,
                      5.015758172626005e-01, 7.056221318361879e-01])
    cdf_st = skewt.cdf(x, 1, 10)  #args = (df, alpha) = (1, 10)
    assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-25))
Example #12
0
def _plot_essentiality(vector, bars, n, df, shape, location, scale, n_bins,
                       n_x_grids, figure_size, dpi,
                       plot_vertical_extention_factor, plot_fits, pdf_color,
                       pdf_reversed_color, essentiality_index_color,
                       gene_fontsize, labels_fontsize, bars_linewidth,
                       bar0_color, bar1_color, bar2_color, filepath, overwrite,
                       show_plot):
    """

    :param vector:
    :param bars:
    :param n:
    :param df:
    :param shape:
    :param location:
    :param scale:
    :param n_bins:
    :param n_x_grids:
    :param figure_size:
    :param plot_vertical_extention_factor:
    :param plot_fits: bool;
    :param pdf_color:
    :param pdf_reversed_color:
    :param essentiality_index_color:
    :param gene_fontsize:
    :param labels_fontsize:
    :param bars_linewidth:
    :param bar0_color:
    :param bar1_color:
    :param bar2_color:
    :param filepath:
    :param overwrite:
    :param show_plot:
    :return:
    """

    # ==================================================================================================================
    # Set up
    # ==================================================================================================================
    # Initialize a figure
    figure = plt.figure(figsize=figure_size)

    # Set figure styles
    set_style('ticks')
    despine(offset=9)

    # Set figure grids
    n_rows = 10
    n_rows_graph = 5
    gridspec = GridSpec(n_rows, 1)

    # Make graph ax
    ax_graph = plt.subplot(gridspec[:n_rows_graph, :])

    # Set bar axes
    ax_bar0 = plt.subplot(gridspec[n_rows_graph + 1:n_rows_graph + 2, :])
    ax_bar1 = plt.subplot(gridspec[n_rows_graph + 2:n_rows_graph + 3, :])
    ax_bar2 = plt.subplot(gridspec[n_rows_graph + 3:n_rows_graph + 4, :])
    for ax in [ax_bar1, ax_bar0, ax_bar2]:
        ax.spines['top'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.spines['right'].set_visible(False)
        for t in ax.get_xticklines():
            t.set_visible(False)
        for t in ax.get_xticklabels():
            t.set_visible(False)
        for t in ax.get_yticklines():
            t.set_visible(False)
        for t in ax.get_yticklabels():
            t.set_visible(False)

    # ==================================================================================================================
    # Plot histogram
    # ==================================================================================================================
    distplot(vector,
             hist=True,
             bins=n_bins,
             kde=False,
             hist_kws={
                 'linewidth': 0.92,
                 'alpha': 0.24,
                 'color': pdf_color
             },
             ax=ax_graph)

    # ==================================================================================================================
    # Plot skew-t fit PDF
    # ==================================================================================================================
    # Initialize a skew-t generator
    skew_t = ACSkewT_gen()

    # Set up x-grids
    x_grids = linspace(vector.min(), vector.max(), n_x_grids)

    # Generate skew-t PDF
    skew_t_pdf = skew_t.pdf(x_grids, df, shape, loc=location, scale=scale)

    # Scale skew-t PDF
    histogram_max = histogram(vector, bins=n_bins)[0].max()
    scale_factor = histogram_max / skew_t_pdf.max()
    skew_t_pdf *= scale_factor

    if plot_fits:
        # Plot skew-t PDF
        line_kwargs = {'linestyle': '-', 'linewidth': 2.6}
        ax_graph.plot(x_grids, skew_t_pdf, color=pdf_color, **line_kwargs)

    # Extend plot vertically
    ax_graph.axis([
        vector.min(),
        vector.max(), 0, histogram_max * plot_vertical_extention_factor
    ])

    # ==================================================================================================================
    # Plot reflected skew-t PDF
    # ==================================================================================================================
    # Get the x-grids to get the reflecting PDF
    x_grids_for_reflection = define_x_coordinates_for_reflection(
        skew_t_pdf, x_grids)

    # Generate skew-t PDF over reflected x-grids, and scale
    skew_t_pdf_reflected = skew_t.pdf(
        x_grids_for_reflection, df, shape, loc=location,
        scale=scale) * scale_factor

    if plot_fits:
        # Plot over the original x-grids
        ax_graph.plot(x_grids,
                      skew_t_pdf_reflected,
                      color=pdf_reversed_color,
                      **line_kwargs)

    # ==================================================================================================================
    # Plot essentiality indices
    # ==================================================================================================================
    essentiality_indices = define_cumulative_area_ratio_function(
        skew_t_pdf,
        skew_t_pdf_reflected,
        x_grids,
        direction=['+', '-'][shape > 0])
    if plot_fits:
        ax_graph.plot(x_grids,
                      essentiality_indices,
                      color=essentiality_index_color,
                      **line_kwargs)

    # ==================================================================================================================
    # Decorate
    # ==================================================================================================================
    # Set title
    figure.text(0.5,
                0.96,
                vector.name,
                fontsize=gene_fontsize,
                weight='bold',
                horizontalalignment='center')
    if plot_fits:
        figure.text(
            0.5,
            0.92,
            'N={:.2f}    DF={:.2f}    Shape={:.2f}    Location={:.2f}    Scale={:.2f}'
            .format(n, df, shape, location, scale),
            fontsize=gene_fontsize * 0.6,
            weight='bold',
            horizontalalignment='center')

    # Set labels
    label_kwargs = {'weight': 'bold', 'fontsize': labels_fontsize}
    ax_graph.set_xlabel('RNAi Score', **label_kwargs)
    ax_graph.set_ylabel('Frequency', **label_kwargs)

    # Set ticks
    tick_kwargs = {'size': labels_fontsize * 0.81, 'weight': 'normal'}
    for t in ax_graph.get_xticklabels():
        t.set(**tick_kwargs)
    for t in ax_graph.get_yticklabels():
        t.set(**tick_kwargs)

    # ==================================================================================================================
    # Plot bars
    # ==================================================================================================================
    bar_kwargs = {
        'rotation': 90,
        'weight': 'bold',
        'fontsize': labels_fontsize * 0.81
    }
    bar_specifications = {
        0: {
            'vector': bars.iloc[0, :],
            'ax': ax_bar0,
            'color': bar0_color
        },
        1: {
            'vector': bars.iloc[1, :],
            'ax': ax_bar1,
            'color': bar1_color
        },
        2: {
            'vector': bars.iloc[2, :],
            'ax': ax_bar2,
            'color': bar2_color
        }
    }

    for i, spec in bar_specifications.items():
        v = spec['vector']
        ax = spec['ax']
        c = spec['color']
        rugplot(v * vector, height=1, color=c, ax=ax, linewidth=bars_linewidth)
        ax.set_ylabel(v.name[-3:], **bar_kwargs)

    # ==================================================================================================================
    # Save
    # ==================================================================================================================
    if filepath:
        save_plot(filepath, dpi=dpi, overwrite=overwrite)

    if show_plot:
        plt.show()

    # TODO: properly close
    plt.clf()
    plt.close()
Example #13
0
def fit_skew_t_pdf(
    _1d_array,
    fit_fixed_location=None,
    fit_fixed_scale=None,
    fit_initial_location=None,
    fit_initial_scale=None,
):

    _1d_array = _1d_array[
        ~check_nd_array_for_bad(_1d_array, raise_for_bad=False)]

    keyword_arguments = {}

    guessed_location = _1d_array.mean()

    guessed_scale = _1d_array.std() / 2

    if fit_fixed_location is not None:

        keyword_arguments["floc"] = fit_fixed_location

    if fit_fixed_scale is not None:

        keyword_arguments["fscale"] = fit_fixed_scale

    if fit_initial_location is not None:

        keyword_arguments["loc"] = fit_initial_location

    else:

        keyword_arguments["loc"] = guessed_location

    if fit_initial_scale is not None:

        keyword_arguments["scale"] = fit_initial_scale

    else:

        keyword_arguments["scale"] = guessed_scale

    skew_t_model = ACSkewT_gen()

    degree_of_freedom, shape, location, scale = skew_t_model.fit(
        _1d_array, **keyword_arguments)

    if 24 < abs(shape):

        warn("Refitting with scale = (standard deviation / 2) ...")

        keyword_arguments["fscale"] = guessed_scale

        degree_of_freedom, shape, location, scale = skew_t_model.fit(
            _1d_array, **keyword_arguments)

        if 24 < abs(shape):

            warn("Refitting with location = mean ...")

            keyword_arguments["floc"] = guessed_location

            degree_of_freedom, shape, location, scale = skew_t_model.fit(
                _1d_array, **keyword_arguments)

    return _1d_array.size, location, scale, degree_of_freedom, shape
Example #14
0
def plot_essentiality(feature_x_sample,
                      feature_x_fit,
                      bar_df,
                      directory_path,
                      features=(),
                      enumerate_functions=False,
                      figure_size=FIGURE_SIZE,
                      n_x_grids=3000,
                      n_bins=50,
                      plot_fits=True,
                      show_plot=True,
                      dpi=DPI):
    """
    Make essentiality plot for each gene.
    :param feature_x_sample: DataFrame or str;
        (n_features, n_samples) or a filepath to a file
    :param feature_x_fit: DataFrame or str;
        (n_features, 5 (n, df, shape, location, scale)) or a filepath to a file
    :param bar_df: dataframe;
    :param directory_path: str;
        directory_path/essentiality_plots/feature<id>.png will be saved

    :param features: iterable; (n_selected_features)

    :param enumerate_functions: bool;

    :param figure_size: tuple; figure size
    :param n_x_grids: int; number of x grids
    :param n_bins: int; number of histogram bins
    :param plot_fits: bool; plot fitted lines or not
    :param show_plot: bool; show plot or not
    :param dpi: int; dots per inch
    :return: None
    """

    # ==========================================================================
    # Select features to plot
    # ==========================================================================
    if len(features):  # Plot only specified features
        is_ = [f for f in features if f in feature_x_sample.index]

        if len(is_):
            print('Plotting features: {} ...'.format(', '.join(is_)))
            feature_x_sample = feature_x_sample.ix[is_, :]
        else:
            raise ValueError('Specified features not found.')
    else:  # Plot all features
        print('Plotting all features ...')

    # ==========================================================================
    # Plot each feature
    # ==========================================================================
    for i, (f_i, f_v) in enumerate(feature_x_sample.iterrows()):
        print('Plotting {} (@{}/{}) ...'.format(f_i, i,
                                                feature_x_sample.shape[0]))

        # ======================================================================
        # Set up figure
        # ======================================================================
        # Initialize a figure
        fig = figure(figsize=figure_size)

        # Set figure grids
        n_rows = 10
        n_rows_graph = 5
        gridspec = GridSpec(n_rows, 1)

        # Make graph ax
        ax_graph = subplot(gridspec[:n_rows_graph, :])

        # Set bar axes
        ax_bar0 = subplot(gridspec[n_rows_graph + 1:n_rows_graph + 2, :])
        ax_bar1 = subplot(gridspec[n_rows_graph + 2:n_rows_graph + 3, :])
        ax_bar2 = subplot(gridspec[n_rows_graph + 3:n_rows_graph + 4, :])
        for ax in (ax_bar1, ax_bar0, ax_bar2):
            ax.spines['top'].set_visible(False)
            ax.spines['bottom'].set_visible(False)
            ax.spines['left'].set_visible(False)
            ax.spines['right'].set_visible(False)
            for t in ax.get_xticklines():
                t.set_visible(False)
            for t in ax.get_xticklabels():
                t.set_visible(False)
            for t in ax.get_yticklines():
                t.set_visible(False)
            for t in ax.get_yticklabels():
                t.set_visible(False)

        # ======================================================================
        # Plot histogram
        # ======================================================================
        distplot(f_v,
                 bins=n_bins,
                 kde=False,
                 norm_hist=True,
                 hist_kws=dict(linewidth=0.92, color='#20d9ba', alpha=0.26),
                 ax=ax_graph)

        # ==============================================================
        # Decorate
        # ==============================================================
        decorate(ax=ax_graph,
                 style='white',
                 title=f_i,
                 xlabel='RNAi Score',
                 ylabel='Frequency')

        # ==================================================================
        # Plot skew-t fit PDF
        # ==================================================================
        # Initialize a skew-t generator
        skew_t = ACSkewT_gen()

        # Set up grids
        grids = linspace(f_v.min(), f_v.max(), n_x_grids)

        # Parse fitted parameters
        n, df, shape, location, scale = feature_x_fit.ix[
            f_i, ['N', 'DF', 'Shape', 'Location', 'Scale']]
        fig.text(0.5,
                 0.9,
                 'N={:.0f}    DF={:.2f}    Shape={:.2f}    Location={:.2f}    '
                 'Scale={:.2f}'.format(n, df, shape, location, scale),
                 size=16,
                 weight='bold',
                 color='#220530',
                 horizontalalignment='center')

        # Generate skew-t PDF
        skew_t_pdf = skew_t.pdf(grids, df, shape, loc=location, scale=scale)

        # Plot skew-t PDF
        line_kwargs = dict(linestyle='-', linewidth=2.6)
        ax_graph.plot(grids, skew_t_pdf, color='#20d9ba', **line_kwargs)

        # ==================================================================
        # Plot reflected skew-t PDF
        # ==================================================================
        # Generate skew-t PDF over reflected grids
        skew_t_pdf_r = skew_t.pdf(define_x_coordinates_for_reflection(
            skew_t_pdf, grids),
                                  df,
                                  shape,
                                  loc=location,
                                  scale=scale)

        # Plot over the original grids
        ax_graph.plot(grids, skew_t_pdf_r, color='#4e41d9', **line_kwargs)

        # ==================================================================
        # Plot essentiality indices from various functions
        # ==================================================================
        figure_size_ = (asarray(figure_size) * 0.7).astype(int)
        if enumerate_functions:
            functions = [
                # f1 /f2
                # Explode 'f1 / f2',
                # Signal at center 'log(f1 / f2)',
                # Explode 'where(f2 < f1, f1 / f2, 0)',
                # Not that good during entropy test 'where(f2 < f1, log(f1 /
                # f2), 0)',

                # - f2 /f1
                # Signal at center '-(f2 / f1)',
                # Signal at center '-log(f2 / f1)',
                # Spikes to 0 after center 'where(f2 < f1, -(f2 / f1), 0)',
                # == log(f1/ f2) 'where(f2 < f1, -log(f2 / f1), 0)',

                # carea1 / carea2
                # Explode 'carea1 / carea2',
                # Not that good during entropy test 'log(carea1 / carea2)',
                # Explode 'where(f2 < f1, carea1 / carea2, 0)',
                # 0ing abruptly drops 'where(f2 < f1, log(carea1 / carea2), 0)',

                # (f1 - f2) / f1
                # Better during only f2 < f1 '(f1 - f2) / f1',
                # Normalized same as not logging and raising to a power'log(
                # (f1 - f2) / f1 )',
                'where(f2 < f1, (f1 - f2) / f1, 0)',
                # Spikes to 0 after center 'where(f2 < f1, log( (f1 - f2) /
                # f1 ), 0)',

                # ((f1 - f2) / f1)^scale
                # Super negative '((f1 - f2) / f1)**{}'.format(scale),
                'where(f2 < f1log, ((f1 - f2) / f1)**{}, 0)'.format(scale),
                # log
                # Same as just log 'where(f2 < f1, log( ((f1 - f2) / f1)**{}
                # ), 0 )'.format(scale),

                # Hard to interpret # ((f1 - f2) / f1)^(1/scale)
                # log(-)=nan after center '((f1 - f2) / f1)**(1/{})'.format(
                # scale),
                # Widens wide 'where(f2 < f1, ((f1 - f2) / f1)**(1/{}),
                # 0)'.format(scale),

                # Hard to interpret # ((f1 - f2) / f1)^std(ei)
                # log(-)=nan after center '((f1 - f2) / f1)**(((f1 - f2) /
                # f1).std())',
                # Hard to interpret 'where(f2 < f1, ((f1 - f2) / f1)**(((f1 -
                #  f2) / f1).std()), 0) ',
                # Spikes to 0 after center 'where(f2 < f1, log( ((f1 - f2) /
                # f1)**(((f1 - f2) / f1).std()) ), 0) ',

                # Hard to interpret # ((f1 - f2) / f1)^(1/std(ei))
                # log(-)=nan after center '((f1 - f2) / f1)**(1/((f1 - f2) /
                # f1).std())',
                # Hard to interpret (best during entropy test)  'where(f2 <
                # f1, ((f1 - f2) / f1)**(1/((f1 - f2) / f1).std()), 0) ',
                # Same as just log 'where(f2 < f1, log( ((f1 - f2) / f1)**(
                # 1/((f1 - f2) / f1).std()) ), 0) ',
            ]
            eis = []

            # Plot each function
            for j, f in enumerate(functions):
                figure(figsize=figure_size_)

                # Compute essentiality index
                ei = _compute_essentiality_index(skew_t_pdf, skew_t_pdf_r, f,
                                                 ['+', '-'][shape > 0],
                                                 grids[1] - grids[0])

                c = CMAP_CATEGORICAL(j / len(functions))
                eis.append((ei, c))

                plot(grids, ei, color=c, **line_kwargs)
                decorate(title=f)

            # Plot all functions
            figure(figsize=figure_size_)
            distplot(f_v,
                     bins=n_bins,
                     kde=False,
                     norm_hist=True,
                     hist_kws=dict(linewidth=0.92, color='#070707',
                                   alpha=0.26))
            for ei_, c in eis:
                plot(grids, (ei_ - ei_.min()) / (ei_.max() - ei_.min()) *
                     skew_t_pdf.max(),
                     color=c,
                     linewidth=line_kwargs['linewidth'])
            decorate(title=f_i)

        # ==================================================================
        # Plot essentiality index (#fc154f)
        # ==================================================================
        ei = _compute_essentiality_index(
            skew_t_pdf, skew_t_pdf_r,
            'where(f2 < f1, ((f1 - f2) / f1)**{}, 0)'.format(scale),
            ['+', '-'][shape > 0], grids[1] - grids[0])
        ax_graph.plot(grids, (ei - ei.min()) / (ei.max() - ei.min()) *
                      skew_t_pdf.max(),
                      color='#fc154f',
                      **line_kwargs)
        # ==================================================================
        # Plot bars
        # ==================================================================
        a_m_d = _get_amp_mut_del(bar_df, f_i)

        bar_specifications = [
            dict(vector=a_m_d.iloc[0, :], ax=ax_bar0, color='#9017e6'),
            dict(vector=a_m_d.iloc[1, :], ax=ax_bar1, color='#6410a0'),
            dict(vector=a_m_d.iloc[2, :], ax=ax_bar2, color='#470b72'),
        ]

        for spec in bar_specifications:
            v = spec['vector']
            ax = spec['ax']
            c = spec['color']
            rugplot(v * f_v, height=1, color=c, linewidth=2.4, ax=ax)
            decorate(ax=ax, ylabel=v.name[-3:])

        # ==================================================================
        # Save
        # ==================================================================
        save_plot(join(directory_path,
                       'essentiality_plots/{}.png'.format(f_i)),
                  dpi=dpi)

        if show_plot:
            show()

        close()
Example #15
0
def compute_context(
    _1d_array,
    n_data=None,
    location=None,
    scale=None,
    degree_of_freedom=None,
    shape=None,
    fit_fixed_location=None,
    fit_fixed_scale=None,
    fit_initial_location=None,
    fit_initial_scale=None,
    n_grid=1e3,
    degree_of_freedom_for_tail_reduction=1e8,
    minimum_kl=1e-2,
    scale_with_kl=True,
    multiply_distance_from_reference_argmax=False,
    global_location=None,
    global_scale=None,
    global_degree_of_freedom=None,
    global_shape=None,
):

    is_bad = check_nd_array_for_bad(_1d_array, raise_for_bad=False)

    _1d_array_good = _1d_array[~is_bad]

    if any(
        parameter is None
        for parameter in (n_data, location, scale, degree_of_freedom, shape)
    ):

        n_data, location, scale, degree_of_freedom, shape = fit_skew_t_pdf(
            _1d_array_good,
            fit_fixed_location=fit_fixed_location,
            fit_fixed_scale=fit_fixed_scale,
            fit_initial_location=fit_initial_location,
            fit_initial_scale=fit_initial_scale,
        )

    grid = linspace(_1d_array_good.min(), _1d_array_good.max(), n_grid)

    skew_t_model = ACSkewT_gen()

    pdf = skew_t_model.pdf(grid, degree_of_freedom, shape, loc=location, scale=scale)

    shape_pdf_reference = minimum(
        pdf,
        skew_t_model.pdf(
            make_coordinates_for_reflection(grid, grid[pdf.argmax()]),
            degree_of_freedom_for_tail_reduction,
            shape,
            loc=location,
            scale=scale,
        ),
    )

    shape_context_indices = _compute_context_indices(
        grid,
        pdf,
        shape_pdf_reference,
        minimum_kl,
        scale_with_kl,
        multiply_distance_from_reference_argmax,
    )

    if any(
        parameter is None
        for parameter in (
            global_location,
            global_scale,
            global_degree_of_freedom,
            global_shape,
        )
    ):

        location_pdf_reference = None

        location_context_indices = None

        context_indices = shape_context_indices

    else:

        location_pdf_reference = minimum(
            pdf,
            skew_t_model.pdf(
                grid,
                global_degree_of_freedom,
                global_shape,
                loc=global_location,
                scale=global_scale,
            ),
        )

        location_context_indices = _compute_context_indices(
            grid,
            pdf,
            location_pdf_reference,
            minimum_kl,
            scale_with_kl,
            multiply_distance_from_reference_argmax,
        )

        context_indices = shape_context_indices + location_context_indices

    context_indices_like_array = full(_1d_array.size, nan)

    context_indices_like_array[~is_bad] = context_indices[
        [absolute(grid - value).argmin() for value in _1d_array_good]
    ]

    return {
        "fit": asarray((n_data, location, scale, degree_of_freedom, shape)),
        "grid": grid,
        "pdf": pdf,
        "shape_pdf_reference": shape_pdf_reference,
        "shape_context_indices": shape_context_indices,
        "location_pdf_reference": location_pdf_reference,
        "location_context_indices": location_context_indices,
        "context_indices": context_indices,
        "context_indices_like_array": context_indices_like_array,
    }
Example #16
0
def compute_vector_context(
    vector,
    n_data=None,
    location=None,
    scale=None,
    degree_of_freedom=None,
    shape=None,
    fit_initial_location=None,
    fit_initial_scale=None,
    n_grid=int(1e3),
    degree_of_freedom_for_tail_reduction=1e8,
    multiply_distance_from_reference_argmax=False,
    global_location=None,
    global_scale=None,
    global_degree_of_freedom=None,
    global_shape=None,
):

    is_good = ~check_array_for_bad(vector, raise_for_bad=False)

    vector_good = vector[is_good]

    if any(
        parameter is None
        for parameter in (n_data, location, scale, degree_of_freedom, shape)
    ):

        (n_data, location, scale, degree_of_freedom, shape) = fit_vector_to_skew_t_pdf(
            vector_good,
            fit_initial_location=fit_initial_location,
            fit_initial_scale=fit_initial_scale,
        )

    grid = linspace(vector_good.min(), vector_good.max(), num=n_grid)

    skew_t_model = ACSkewT_gen()

    pdf = skew_t_model.pdf(grid, degree_of_freedom, shape, loc=location, scale=scale)

    shape_pdf_reference = minimum(
        pdf,
        skew_t_model.pdf(
            make_reflecting_grid(grid, grid[pdf.argmax()]),
            degree_of_freedom_for_tail_reduction,
            shape,
            loc=location,
            scale=scale,
        ),
    )

    shape_context = compute_pdf_and_pdf_reference_context(
        grid, pdf, shape_pdf_reference, multiply_distance_from_reference_argmax
    )

    if any(
        parameter is None
        for parameter in (
            global_location,
            global_scale,
            global_degree_of_freedom,
            global_shape,
        )
    ):

        location_pdf_reference = None

        location_context = None

        context = shape_context

    else:

        location_pdf_reference = minimum(
            pdf,
            skew_t_model.pdf(
                grid,
                global_degree_of_freedom,
                global_shape,
                loc=global_location,
                scale=global_scale,
            ),
        )

        location_context = compute_pdf_and_pdf_reference_context(
            grid, pdf, location_pdf_reference, multiply_distance_from_reference_argmax
        )

        context = shape_context + location_context

    context_like_array = full(vector.size, nan)

    context_like_array[is_good] = context[
        [absolute(grid - value).argmin() for value in vector_good]
    ]

    return {
        "fit": array((n_data, location, scale, degree_of_freedom, shape)),
        "grid": grid,
        "pdf": pdf,
        "shape_pdf_reference": shape_pdf_reference,
        "shape_context": shape_context,
        "location_pdf_reference": location_pdf_reference,
        "location_context": location_context,
        "context": context,
        "context_like_array": context_like_array,
    }
Example #17
0
def _make_context_matrix(
        df,
        skew_t_pdf_fit_parameter,
        n_grid,
        degree_of_freedom_for_tail_reduction,
        multiply_distance_from_location,
        global_location,
        global_scale,
        global_degree_of_freedom,
        global_shape,
):

    skew_t_model = ACSkewT_gen()

    context_matrix = full(
        df.shape,
        nan,
    )

    n = df.shape[0]

    n_per_print = max(
        1,
        n // 10,
    )

    for i, (
            index,
            series,
    ) in enumerate(df.iterrows()):

        if i % n_per_print == 0:

            print('({}/{}) {} ...'.format(
                i + 1,
                n,
                index,
            ))

        if skew_t_pdf_fit_parameter is None:

            location = scale = degree_of_freedom = shape = None

        else:

            location, scale, degree_of_freedom, shape = skew_t_pdf_fit_parameter.loc[
                index, [
                    'Location',
                    'Scale',
                    'Degree of Freedom',
                    'Shape',
                ]]

        context_matrix[i] = compute_context(
            series.values,
            skew_t_model=skew_t_model,
            location=location,
            scale=scale,
            degree_of_freedom=degree_of_freedom,
            shape=shape,
            n_grid=n_grid,
            degree_of_freedom_for_tail_reduction=
            degree_of_freedom_for_tail_reduction,
            multiply_distance_from_location=multiply_distance_from_location,
            global_location=global_location,
            global_scale=global_scale,
            global_degree_of_freedom=global_degree_of_freedom,
            global_shape=global_shape,
        )['context_indices_like_array']

    return DataFrame(
        context_matrix,
        index=df.index,
        columns=df.columns,
    )
Example #18
0
def compute_context(
    _1d_array,
    skew_t_model=None,
    location=None,
    scale=None,
    degree_of_freedom=None,
    shape=None,
    fit_fixed_location=None,
    fit_fixed_scale=None,
    fit_initial_location=None,
    fit_initial_scale=None,
    n_grid=1e3,
    degree_of_freedom_for_tail_reduction=1e8,
    multiply_distance_from_location=False,
    global_location=None,
    global_scale=None,
    global_degree_of_freedom=None,
    global_shape=None,
):

    is_bad_value = check_nd_array_for_bad_value(
        _1d_array,
        raise_for_bad_value=False,
    )

    _1d_array_good = _1d_array[~is_bad_value]

    if skew_t_model is None:

        skew_t_model = ACSkewT_gen()

    if any(parameter is None for parameter in (
            location,
            scale,
            degree_of_freedom,
            shape,
    )):

        n, location, scale, degree_of_freedom, shape = fit_skew_t_pdf(
            _1d_array_good,
            skew_t_model=skew_t_model,
            fit_fixed_location=fit_fixed_location,
            fit_fixed_scale=fit_fixed_scale,
            fit_initial_location=fit_initial_location,
            fit_initial_scale=fit_initial_scale,
        )

    else:

        n = _1d_array_good.size

    grid = linspace(
        _1d_array_good.min(),
        _1d_array_good.max(),
        n_grid,
    )

    pdf = skew_t_model.pdf(
        grid,
        degree_of_freedom,
        shape,
        loc=location,
        scale=scale,
    )

    shape_pdf_reference = minimum(
        pdf,
        skew_t_model.pdf(
            get_coordinates_for_reflection(grid, pdf),
            degree_of_freedom_for_tail_reduction,
            shape,
            loc=location,
            scale=scale,
        ),
    )

    shape_pdf_reference[shape_pdf_reference < EPS] = EPS

    shape_kl = pdf * log(pdf / shape_pdf_reference)

    shape_kl_darea = shape_kl / shape_kl.sum()

    shape_pdf_reference_argmax = shape_pdf_reference.argmax()

    shape_context_indices = concatenate((
        -cumsum(shape_kl_darea[:shape_pdf_reference_argmax][::-1])[::-1],
        cumsum(shape_kl_darea[shape_pdf_reference_argmax:]),
    ))

    if multiply_distance_from_location:

        shape_context_indices *= absolute(grid -
                                          grid[shape_pdf_reference_argmax])

    shape_context_indices *= (1 + absolute(shape)) / (
        scale * log(1 + degree_of_freedom))

    if all(parameter is not None for parameter in (
            global_location,
            global_scale,
            global_degree_of_freedom,
            global_shape,
    )):

        location_pdf_reference = minimum(
            pdf,
            skew_t_model.pdf(
                grid,
                global_degree_of_freedom,
                global_shape,
                loc=global_location,
                scale=global_scale,
            ),
        )

        location_pdf_reference[location_pdf_reference < EPS] = EPS

        location_kl = pdf * log(pdf / location_pdf_reference)

        location_kl_darea = location_kl / location_kl.sum()

        location_pdf_reference_argmax = location_pdf_reference.argmax()

        location_context_indices = concatenate((
            -cumsum(
                location_kl_darea[:location_pdf_reference_argmax][::-1])[::-1],
            cumsum(location_kl_darea[location_pdf_reference_argmax:]),
        ))

        location_context_indices *= absolute(
            grid - grid[location_pdf_reference_argmax])

        location_context_indices /= scale + global_scale

        context_indices = location_context_indices + shape_context_indices

    else:

        location_pdf_reference = None

        location_context_indices = None

        context_indices = shape_context_indices

    context_indices_like_array = full(
        _1d_array.size,
        nan,
    )

    context_indices_like_array[~is_bad_value] = context_indices[[
        absolute(grid - value).argmin() for value in _1d_array_good
    ]]

    return {
        'fit': asarray((
            n,
            location,
            scale,
            degree_of_freedom,
            shape,
        )),
        'grid': grid,
        'pdf': pdf,
        'shape_pdf_reference': shape_pdf_reference,
        'shape_context_indices': shape_context_indices,
        'location_pdf_reference': location_pdf_reference,
        'location_context_indices': location_context_indices,
        'context_indices': context_indices,
        'context_indices_like_array': context_indices_like_array,
    }