예제 #1
0
def make_essentiality_matrix(feature_x_sample,
                             feature_x_fit,
                             n_x_grids=3000,
                             factor=1):
    """

    :param feature_x_sample: DataFrame;
    :param feature_x_fit: DataFrame;
    :param n_x_grids: int;
    :param factor: number;
    :return:
    """

    common_indices = feature_x_sample.index & feature_x_fit.index
    if any(common_indices):
        print_log(
            'Making essentiality matrix using {} common features (indices) ...'
            .format(common_indices.size))
    else:
        print_log('No common features (indices).')

    gene_x_sample = feature_x_sample.ix[common_indices, :]
    gene_x_fit = feature_x_fit.ix[common_indices, :]

    skew_t = ACSkewT_gen()
    essentiality_matrix = empty(gene_x_sample.shape)
    for i, (g, (n, df, shape, location,
                scale)) in enumerate(gene_x_fit.iterrows()):
        # Skew-t PDF
        vector = asarray(gene_x_sample.ix[g, :])
        x_grids = linspace(vector.min(), vector.max(), n_x_grids)
        skew_t_pdf = skew_t.pdf(x_grids, df, shape, loc=location, scale=scale)

        # Reflected Skew-t PDF
        x_grids_for_reflection = define_x_coordinates_for_reflection(
            skew_t_pdf, x_grids)
        skew_t_pdf_reflected = skew_t.pdf(x_grids_for_reflection,
                                          df,
                                          shape,
                                          loc=location,
                                          scale=scale)

        # Essentiality indices
        essentiality_indices = define_cumulative_area_ratio_function(
            skew_t_pdf,
            skew_t_pdf_reflected,
            x_grids,
            direction=['+', '-'][shape > 0])

        essentiality_matrix[i, :] = [
            factor * sign(shape) *
            essentiality_indices[argmin(abs(x_grids - v))] for v in vector
        ]

    return DataFrame(essentiality_matrix,
                     index=gene_x_sample.index,
                     columns=gene_x_sample.columns)
예제 #2
0
def test_skewt():
    skewt = ACSkewT_gen()
    x = [-2, -1, -0.5, 0, 1, 2]
    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10)))
    #default in R:sn is df=inf
    pdf_r = np.array([
        2.973416551551523e-90, 3.687562713971017e-24, 2.018401586422970e-07,
        3.989422804014327e-01, 4.839414490382867e-01, 1.079819330263761e-01
    ])
    pdf_st = skewt.pdf(x, 1000000, 10)
    pass
    np.allclose(pdf_st, pdf_r, rtol=0, atol=1e-6)
    np.allclose(pdf_st, pdf_r, rtol=1e-1, atol=0)

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10)))
    cdf_r = np.array([
        0.000000000000000e+00, 0.000000000000000e+00, 3.729478836866917e-09,
        3.172551743055357e-02, 6.826894921370859e-01, 9.544997361036416e-01
    ])
    cdf_st = skewt.cdf(x, 1000000, 10)
    np.allclose(cdf_st, cdf_r, rtol=0, atol=1e-6)
    np.allclose(cdf_st, cdf_r, rtol=1e-1, atol=0)
    #assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-15))

    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
    pdf_r = np.array([
        2.185448836190663e-07, 1.272381597868587e-05, 5.746937644959992e-04,
        3.796066898224945e-01, 4.393468708859825e-01, 1.301804021075493e-01
    ])
    pdf_st = skewt.pdf(x, 5, 10)  #args = (df, alpha)
    assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
    cdf_r = np.array([
        8.822783669199699e-08, 2.638467463775795e-06, 6.573106017198583e-05,
        3.172551743055352e-02, 6.367851708183412e-01, 8.980606093979784e-01
    ])
    cdf_st = skewt.cdf(x, 5, 10)  #args = (df, alpha)
    assert_(np.allclose(cdf_st, cdf_r, rtol=1e-10, atol=0))

    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
    pdf_r = np.array([
        3.941955996757291e-04, 1.568067236862745e-03, 6.136996029432048e-03,
        3.183098861837907e-01, 3.167418189469279e-01, 1.269297588738406e-01
    ])
    pdf_st = skewt.pdf(x, 1, 10)  #args = (df, alpha) = (1, 10))
    assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
    cdf_r = np.array([
        7.893671370544414e-04, 1.575817262600422e-03, 3.128720749105560e-03,
        3.172551743055351e-02, 5.015758172626005e-01, 7.056221318361879e-01
    ])
    cdf_st = skewt.cdf(x, 1, 10)  #args = (df, alpha) = (1, 10)
    assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-25))
예제 #3
0
def test_skewt():
    skewt = ACSkewT_gen()
    x = [-2, -1, -0.5, 0, 1, 2]
    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10)))
    #default in R:sn is df=inf
    pdf_r = np.array([2.973416551551523e-90, 3.687562713971017e-24,
                      2.018401586422970e-07, 3.989422804014327e-01,
                      4.839414490382867e-01, 1.079819330263761e-01])
    pdf_st = skewt.pdf(x, 1000000, 10)
    pass
    np.allclose(pdf_st, pdf_r, rtol=0, atol=1e-6)
    np.allclose(pdf_st, pdf_r, rtol=1e-1, atol=0)


    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10)))
    cdf_r = np.array([0.000000000000000e+00, 0.000000000000000e+00,
                      3.729478836866917e-09, 3.172551743055357e-02,
                      6.826894921370859e-01, 9.544997361036416e-01])
    cdf_st = skewt.cdf(x, 1000000, 10)
    np.allclose(cdf_st, cdf_r, rtol=0, atol=1e-6)
    np.allclose(cdf_st, cdf_r, rtol=1e-1, atol=0)
    #assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-15))


    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
    pdf_r = np.array([2.185448836190663e-07, 1.272381597868587e-05,
                      5.746937644959992e-04, 3.796066898224945e-01,
                      4.393468708859825e-01, 1.301804021075493e-01])
    pdf_st = skewt.pdf(x, 5, 10)  #args = (df, alpha)
    assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=5)))
    cdf_r = np.array([8.822783669199699e-08, 2.638467463775795e-06,
                      6.573106017198583e-05, 3.172551743055352e-02,
                      6.367851708183412e-01, 8.980606093979784e-01])
    cdf_st = skewt.cdf(x, 5, 10)  #args = (df, alpha)
    assert_(np.allclose(cdf_st, cdf_r, rtol=1e-10, atol=0))


    #noquote(sprintf("%.15e,", dst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
    pdf_r = np.array([3.941955996757291e-04, 1.568067236862745e-03,
                      6.136996029432048e-03, 3.183098861837907e-01,
                      3.167418189469279e-01, 1.269297588738406e-01])
    pdf_st = skewt.pdf(x, 1, 10)  #args = (df, alpha) = (1, 10))
    assert_(np.allclose(pdf_st, pdf_r, rtol=1e-13, atol=1e-25))

    #noquote(sprintf("%.15e,", pst(c(-2,-1, -0.5,0,1,2), shape=10, df=1)))
    cdf_r = np.array([7.893671370544414e-04, 1.575817262600422e-03,
                      3.128720749105560e-03, 3.172551743055351e-02,
                      5.015758172626005e-01, 7.056221318361879e-01])
    cdf_st = skewt.cdf(x, 1, 10)  #args = (df, alpha) = (1, 10)
    assert_(np.allclose(cdf_st, cdf_r, rtol=1e-13, atol=1e-25))
예제 #4
0
def make_essentiality_matrix(feature_x_sample,
                             feature_x_fit,
                             n_grids=3000,
                             function='scaled_fractional_difference',
                             factor=1):
    """

    :param feature_x_sample: DataFrame; (n_features, n_samples)
    :param feature_x_fit: DataFrame;
    :param n_grids: int;
    :param function: str;
    :param factor: number;
    :return: DataFrame; (n_features, n_samples)
    """

    print('\tApplying {} to each feature ...'.format(function))

    empty_ = empty(feature_x_sample.shape)

    skew_t = ACSkewT_gen()

    for i, (f_i, f_v) in enumerate(feature_x_sample.iterrows()):

        # Build skew-t PDF
        grids = linspace(f_v.min(), f_v.max(), n_grids)
        n, df, shape, location, scale = feature_x_fit.ix[i, :]
        skew_t_pdf = skew_t.pdf(grids, df, shape, loc=location, scale=scale)

        # Build reflected skew-t PDF
        skew_t_pdf_r = skew_t.pdf(define_x_coordinates_for_reflection(
            skew_t_pdf, grids),
                                  df,
                                  shape,
                                  loc=location,
                                  scale=scale)

        # Set up function
        if function.startswith('scaled_fractional_difference'):
            function = 'where(f2 < f1, ((f1 - f2) / f1)**{}, 0)'.format(scale)

        ei = _compute_essentiality_index(skew_t_pdf, skew_t_pdf_r, function,
                                         ['+',
                                          '-'][shape > 0], grids[1] - grids[0])

        ei = normalize_1d(ei, '0-1')

        empty_[i, :] = ei[[argmin(abs(grids - x))
                           for x in asarray(f_v)]] * sign(shape) * factor

    return DataFrame(empty_,
                     index=feature_x_sample.index,
                     columns=feature_x_sample.columns)
예제 #5
0
파일: _context.py 프로젝트: KwatME/kraft
def compute_vector_context(
    vector,
    n_data=None,
    location=None,
    scale=None,
    degree_of_freedom=None,
    shape=None,
    fit_initial_location=None,
    fit_initial_scale=None,
    n_grid=int(1e3),
    degree_of_freedom_for_tail_reduction=1e8,
    multiply_distance_from_reference_argmax=False,
    global_location=None,
    global_scale=None,
    global_degree_of_freedom=None,
    global_shape=None,
):

    is_good = ~check_array_for_bad(vector, raise_for_bad=False)

    vector_good = vector[is_good]

    if any(
        parameter is None
        for parameter in (n_data, location, scale, degree_of_freedom, shape)
    ):

        (n_data, location, scale, degree_of_freedom, shape) = fit_vector_to_skew_t_pdf(
            vector_good,
            fit_initial_location=fit_initial_location,
            fit_initial_scale=fit_initial_scale,
        )

    grid = linspace(vector_good.min(), vector_good.max(), num=n_grid)

    skew_t_model = ACSkewT_gen()

    pdf = skew_t_model.pdf(grid, degree_of_freedom, shape, loc=location, scale=scale)

    shape_pdf_reference = minimum(
        pdf,
        skew_t_model.pdf(
            make_reflecting_grid(grid, grid[pdf.argmax()]),
            degree_of_freedom_for_tail_reduction,
            shape,
            loc=location,
            scale=scale,
        ),
    )

    shape_context = compute_pdf_and_pdf_reference_context(
        grid, pdf, shape_pdf_reference, multiply_distance_from_reference_argmax
    )

    if any(
        parameter is None
        for parameter in (
            global_location,
            global_scale,
            global_degree_of_freedom,
            global_shape,
        )
    ):

        location_pdf_reference = None

        location_context = None

        context = shape_context

    else:

        location_pdf_reference = minimum(
            pdf,
            skew_t_model.pdf(
                grid,
                global_degree_of_freedom,
                global_shape,
                loc=global_location,
                scale=global_scale,
            ),
        )

        location_context = compute_pdf_and_pdf_reference_context(
            grid, pdf, location_pdf_reference, multiply_distance_from_reference_argmax
        )

        context = shape_context + location_context

    context_like_array = full(vector.size, nan)

    context_like_array[is_good] = context[
        [absolute(grid - value).argmin() for value in vector_good]
    ]

    return {
        "fit": array((n_data, location, scale, degree_of_freedom, shape)),
        "grid": grid,
        "pdf": pdf,
        "shape_pdf_reference": shape_pdf_reference,
        "shape_context": shape_context,
        "location_pdf_reference": location_pdf_reference,
        "location_context": location_context,
        "context": context,
        "context_like_array": context_like_array,
    }
예제 #6
0
def _plot_essentiality(vector, bars, n, df, shape, location, scale, n_bins,
                       n_x_grids, figure_size, dpi,
                       plot_vertical_extention_factor, plot_fits, pdf_color,
                       pdf_reversed_color, essentiality_index_color,
                       gene_fontsize, labels_fontsize, bars_linewidth,
                       bar0_color, bar1_color, bar2_color, filepath, overwrite,
                       show_plot):
    """

    :param vector:
    :param bars:
    :param n:
    :param df:
    :param shape:
    :param location:
    :param scale:
    :param n_bins:
    :param n_x_grids:
    :param figure_size:
    :param plot_vertical_extention_factor:
    :param plot_fits: bool;
    :param pdf_color:
    :param pdf_reversed_color:
    :param essentiality_index_color:
    :param gene_fontsize:
    :param labels_fontsize:
    :param bars_linewidth:
    :param bar0_color:
    :param bar1_color:
    :param bar2_color:
    :param filepath:
    :param overwrite:
    :param show_plot:
    :return:
    """

    # ==================================================================================================================
    # Set up
    # ==================================================================================================================
    # Initialize a figure
    figure = plt.figure(figsize=figure_size)

    # Set figure styles
    set_style('ticks')
    despine(offset=9)

    # Set figure grids
    n_rows = 10
    n_rows_graph = 5
    gridspec = GridSpec(n_rows, 1)

    # Make graph ax
    ax_graph = plt.subplot(gridspec[:n_rows_graph, :])

    # Set bar axes
    ax_bar0 = plt.subplot(gridspec[n_rows_graph + 1:n_rows_graph + 2, :])
    ax_bar1 = plt.subplot(gridspec[n_rows_graph + 2:n_rows_graph + 3, :])
    ax_bar2 = plt.subplot(gridspec[n_rows_graph + 3:n_rows_graph + 4, :])
    for ax in [ax_bar1, ax_bar0, ax_bar2]:
        ax.spines['top'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.spines['right'].set_visible(False)
        for t in ax.get_xticklines():
            t.set_visible(False)
        for t in ax.get_xticklabels():
            t.set_visible(False)
        for t in ax.get_yticklines():
            t.set_visible(False)
        for t in ax.get_yticklabels():
            t.set_visible(False)

    # ==================================================================================================================
    # Plot histogram
    # ==================================================================================================================
    distplot(vector,
             hist=True,
             bins=n_bins,
             kde=False,
             hist_kws={
                 'linewidth': 0.92,
                 'alpha': 0.24,
                 'color': pdf_color
             },
             ax=ax_graph)

    # ==================================================================================================================
    # Plot skew-t fit PDF
    # ==================================================================================================================
    # Initialize a skew-t generator
    skew_t = ACSkewT_gen()

    # Set up x-grids
    x_grids = linspace(vector.min(), vector.max(), n_x_grids)

    # Generate skew-t PDF
    skew_t_pdf = skew_t.pdf(x_grids, df, shape, loc=location, scale=scale)

    # Scale skew-t PDF
    histogram_max = histogram(vector, bins=n_bins)[0].max()
    scale_factor = histogram_max / skew_t_pdf.max()
    skew_t_pdf *= scale_factor

    if plot_fits:
        # Plot skew-t PDF
        line_kwargs = {'linestyle': '-', 'linewidth': 2.6}
        ax_graph.plot(x_grids, skew_t_pdf, color=pdf_color, **line_kwargs)

    # Extend plot vertically
    ax_graph.axis([
        vector.min(),
        vector.max(), 0, histogram_max * plot_vertical_extention_factor
    ])

    # ==================================================================================================================
    # Plot reflected skew-t PDF
    # ==================================================================================================================
    # Get the x-grids to get the reflecting PDF
    x_grids_for_reflection = define_x_coordinates_for_reflection(
        skew_t_pdf, x_grids)

    # Generate skew-t PDF over reflected x-grids, and scale
    skew_t_pdf_reflected = skew_t.pdf(
        x_grids_for_reflection, df, shape, loc=location,
        scale=scale) * scale_factor

    if plot_fits:
        # Plot over the original x-grids
        ax_graph.plot(x_grids,
                      skew_t_pdf_reflected,
                      color=pdf_reversed_color,
                      **line_kwargs)

    # ==================================================================================================================
    # Plot essentiality indices
    # ==================================================================================================================
    essentiality_indices = define_cumulative_area_ratio_function(
        skew_t_pdf,
        skew_t_pdf_reflected,
        x_grids,
        direction=['+', '-'][shape > 0])
    if plot_fits:
        ax_graph.plot(x_grids,
                      essentiality_indices,
                      color=essentiality_index_color,
                      **line_kwargs)

    # ==================================================================================================================
    # Decorate
    # ==================================================================================================================
    # Set title
    figure.text(0.5,
                0.96,
                vector.name,
                fontsize=gene_fontsize,
                weight='bold',
                horizontalalignment='center')
    if plot_fits:
        figure.text(
            0.5,
            0.92,
            'N={:.2f}    DF={:.2f}    Shape={:.2f}    Location={:.2f}    Scale={:.2f}'
            .format(n, df, shape, location, scale),
            fontsize=gene_fontsize * 0.6,
            weight='bold',
            horizontalalignment='center')

    # Set labels
    label_kwargs = {'weight': 'bold', 'fontsize': labels_fontsize}
    ax_graph.set_xlabel('RNAi Score', **label_kwargs)
    ax_graph.set_ylabel('Frequency', **label_kwargs)

    # Set ticks
    tick_kwargs = {'size': labels_fontsize * 0.81, 'weight': 'normal'}
    for t in ax_graph.get_xticklabels():
        t.set(**tick_kwargs)
    for t in ax_graph.get_yticklabels():
        t.set(**tick_kwargs)

    # ==================================================================================================================
    # Plot bars
    # ==================================================================================================================
    bar_kwargs = {
        'rotation': 90,
        'weight': 'bold',
        'fontsize': labels_fontsize * 0.81
    }
    bar_specifications = {
        0: {
            'vector': bars.iloc[0, :],
            'ax': ax_bar0,
            'color': bar0_color
        },
        1: {
            'vector': bars.iloc[1, :],
            'ax': ax_bar1,
            'color': bar1_color
        },
        2: {
            'vector': bars.iloc[2, :],
            'ax': ax_bar2,
            'color': bar2_color
        }
    }

    for i, spec in bar_specifications.items():
        v = spec['vector']
        ax = spec['ax']
        c = spec['color']
        rugplot(v * vector, height=1, color=c, ax=ax, linewidth=bars_linewidth)
        ax.set_ylabel(v.name[-3:], **bar_kwargs)

    # ==================================================================================================================
    # Save
    # ==================================================================================================================
    if filepath:
        save_plot(filepath, dpi=dpi, overwrite=overwrite)

    if show_plot:
        plt.show()

    # TODO: properly close
    plt.clf()
    plt.close()
예제 #7
0
def compute_context(
    _1d_array,
    skew_t_model=None,
    location=None,
    scale=None,
    degree_of_freedom=None,
    shape=None,
    fit_fixed_location=None,
    fit_fixed_scale=None,
    fit_initial_location=None,
    fit_initial_scale=None,
    n_grid=1e3,
    degree_of_freedom_for_tail_reduction=1e8,
    multiply_distance_from_location=False,
    global_location=None,
    global_scale=None,
    global_degree_of_freedom=None,
    global_shape=None,
):

    is_bad_value = check_nd_array_for_bad_value(
        _1d_array,
        raise_for_bad_value=False,
    )

    _1d_array_good = _1d_array[~is_bad_value]

    if skew_t_model is None:

        skew_t_model = ACSkewT_gen()

    if any(parameter is None for parameter in (
            location,
            scale,
            degree_of_freedom,
            shape,
    )):

        n, location, scale, degree_of_freedom, shape = fit_skew_t_pdf(
            _1d_array_good,
            skew_t_model=skew_t_model,
            fit_fixed_location=fit_fixed_location,
            fit_fixed_scale=fit_fixed_scale,
            fit_initial_location=fit_initial_location,
            fit_initial_scale=fit_initial_scale,
        )

    else:

        n = _1d_array_good.size

    grid = linspace(
        _1d_array_good.min(),
        _1d_array_good.max(),
        n_grid,
    )

    pdf = skew_t_model.pdf(
        grid,
        degree_of_freedom,
        shape,
        loc=location,
        scale=scale,
    )

    shape_pdf_reference = minimum(
        pdf,
        skew_t_model.pdf(
            get_coordinates_for_reflection(grid, pdf),
            degree_of_freedom_for_tail_reduction,
            shape,
            loc=location,
            scale=scale,
        ),
    )

    shape_pdf_reference[shape_pdf_reference < EPS] = EPS

    shape_kl = pdf * log(pdf / shape_pdf_reference)

    shape_kl_darea = shape_kl / shape_kl.sum()

    shape_pdf_reference_argmax = shape_pdf_reference.argmax()

    shape_context_indices = concatenate((
        -cumsum(shape_kl_darea[:shape_pdf_reference_argmax][::-1])[::-1],
        cumsum(shape_kl_darea[shape_pdf_reference_argmax:]),
    ))

    if multiply_distance_from_location:

        shape_context_indices *= absolute(grid -
                                          grid[shape_pdf_reference_argmax])

    shape_context_indices *= (1 + absolute(shape)) / (
        scale * log(1 + degree_of_freedom))

    if all(parameter is not None for parameter in (
            global_location,
            global_scale,
            global_degree_of_freedom,
            global_shape,
    )):

        location_pdf_reference = minimum(
            pdf,
            skew_t_model.pdf(
                grid,
                global_degree_of_freedom,
                global_shape,
                loc=global_location,
                scale=global_scale,
            ),
        )

        location_pdf_reference[location_pdf_reference < EPS] = EPS

        location_kl = pdf * log(pdf / location_pdf_reference)

        location_kl_darea = location_kl / location_kl.sum()

        location_pdf_reference_argmax = location_pdf_reference.argmax()

        location_context_indices = concatenate((
            -cumsum(
                location_kl_darea[:location_pdf_reference_argmax][::-1])[::-1],
            cumsum(location_kl_darea[location_pdf_reference_argmax:]),
        ))

        location_context_indices *= absolute(
            grid - grid[location_pdf_reference_argmax])

        location_context_indices /= scale + global_scale

        context_indices = location_context_indices + shape_context_indices

    else:

        location_pdf_reference = None

        location_context_indices = None

        context_indices = shape_context_indices

    context_indices_like_array = full(
        _1d_array.size,
        nan,
    )

    context_indices_like_array[~is_bad_value] = context_indices[[
        absolute(grid - value).argmin() for value in _1d_array_good
    ]]

    return {
        'fit': asarray((
            n,
            location,
            scale,
            degree_of_freedom,
            shape,
        )),
        'grid': grid,
        'pdf': pdf,
        'shape_pdf_reference': shape_pdf_reference,
        'shape_context_indices': shape_context_indices,
        'location_pdf_reference': location_pdf_reference,
        'location_context_indices': location_context_indices,
        'context_indices': context_indices,
        'context_indices_like_array': context_indices_like_array,
    }
예제 #8
0
def compute_context(
    _1d_array,
    n_data=None,
    location=None,
    scale=None,
    degree_of_freedom=None,
    shape=None,
    fit_fixed_location=None,
    fit_fixed_scale=None,
    fit_initial_location=None,
    fit_initial_scale=None,
    n_grid=1e3,
    degree_of_freedom_for_tail_reduction=1e8,
    minimum_kl=1e-2,
    scale_with_kl=True,
    multiply_distance_from_reference_argmax=False,
    global_location=None,
    global_scale=None,
    global_degree_of_freedom=None,
    global_shape=None,
):

    is_bad = check_nd_array_for_bad(_1d_array, raise_for_bad=False)

    _1d_array_good = _1d_array[~is_bad]

    if any(
        parameter is None
        for parameter in (n_data, location, scale, degree_of_freedom, shape)
    ):

        n_data, location, scale, degree_of_freedom, shape = fit_skew_t_pdf(
            _1d_array_good,
            fit_fixed_location=fit_fixed_location,
            fit_fixed_scale=fit_fixed_scale,
            fit_initial_location=fit_initial_location,
            fit_initial_scale=fit_initial_scale,
        )

    grid = linspace(_1d_array_good.min(), _1d_array_good.max(), n_grid)

    skew_t_model = ACSkewT_gen()

    pdf = skew_t_model.pdf(grid, degree_of_freedom, shape, loc=location, scale=scale)

    shape_pdf_reference = minimum(
        pdf,
        skew_t_model.pdf(
            make_coordinates_for_reflection(grid, grid[pdf.argmax()]),
            degree_of_freedom_for_tail_reduction,
            shape,
            loc=location,
            scale=scale,
        ),
    )

    shape_context_indices = _compute_context_indices(
        grid,
        pdf,
        shape_pdf_reference,
        minimum_kl,
        scale_with_kl,
        multiply_distance_from_reference_argmax,
    )

    if any(
        parameter is None
        for parameter in (
            global_location,
            global_scale,
            global_degree_of_freedom,
            global_shape,
        )
    ):

        location_pdf_reference = None

        location_context_indices = None

        context_indices = shape_context_indices

    else:

        location_pdf_reference = minimum(
            pdf,
            skew_t_model.pdf(
                grid,
                global_degree_of_freedom,
                global_shape,
                loc=global_location,
                scale=global_scale,
            ),
        )

        location_context_indices = _compute_context_indices(
            grid,
            pdf,
            location_pdf_reference,
            minimum_kl,
            scale_with_kl,
            multiply_distance_from_reference_argmax,
        )

        context_indices = shape_context_indices + location_context_indices

    context_indices_like_array = full(_1d_array.size, nan)

    context_indices_like_array[~is_bad] = context_indices[
        [absolute(grid - value).argmin() for value in _1d_array_good]
    ]

    return {
        "fit": asarray((n_data, location, scale, degree_of_freedom, shape)),
        "grid": grid,
        "pdf": pdf,
        "shape_pdf_reference": shape_pdf_reference,
        "shape_context_indices": shape_context_indices,
        "location_pdf_reference": location_pdf_reference,
        "location_context_indices": location_context_indices,
        "context_indices": context_indices,
        "context_indices_like_array": context_indices_like_array,
    }
예제 #9
0
def plot_essentiality(feature_x_sample,
                      feature_x_fit,
                      bar_df,
                      directory_path,
                      features=(),
                      enumerate_functions=False,
                      figure_size=FIGURE_SIZE,
                      n_x_grids=3000,
                      n_bins=50,
                      plot_fits=True,
                      show_plot=True,
                      dpi=DPI):
    """
    Make essentiality plot for each gene.
    :param feature_x_sample: DataFrame or str;
        (n_features, n_samples) or a filepath to a file
    :param feature_x_fit: DataFrame or str;
        (n_features, 5 (n, df, shape, location, scale)) or a filepath to a file
    :param bar_df: dataframe;
    :param directory_path: str;
        directory_path/essentiality_plots/feature<id>.png will be saved

    :param features: iterable; (n_selected_features)

    :param enumerate_functions: bool;

    :param figure_size: tuple; figure size
    :param n_x_grids: int; number of x grids
    :param n_bins: int; number of histogram bins
    :param plot_fits: bool; plot fitted lines or not
    :param show_plot: bool; show plot or not
    :param dpi: int; dots per inch
    :return: None
    """

    # ==========================================================================
    # Select features to plot
    # ==========================================================================
    if len(features):  # Plot only specified features
        is_ = [f for f in features if f in feature_x_sample.index]

        if len(is_):
            print('Plotting features: {} ...'.format(', '.join(is_)))
            feature_x_sample = feature_x_sample.ix[is_, :]
        else:
            raise ValueError('Specified features not found.')
    else:  # Plot all features
        print('Plotting all features ...')

    # ==========================================================================
    # Plot each feature
    # ==========================================================================
    for i, (f_i, f_v) in enumerate(feature_x_sample.iterrows()):
        print('Plotting {} (@{}/{}) ...'.format(f_i, i,
                                                feature_x_sample.shape[0]))

        # ======================================================================
        # Set up figure
        # ======================================================================
        # Initialize a figure
        fig = figure(figsize=figure_size)

        # Set figure grids
        n_rows = 10
        n_rows_graph = 5
        gridspec = GridSpec(n_rows, 1)

        # Make graph ax
        ax_graph = subplot(gridspec[:n_rows_graph, :])

        # Set bar axes
        ax_bar0 = subplot(gridspec[n_rows_graph + 1:n_rows_graph + 2, :])
        ax_bar1 = subplot(gridspec[n_rows_graph + 2:n_rows_graph + 3, :])
        ax_bar2 = subplot(gridspec[n_rows_graph + 3:n_rows_graph + 4, :])
        for ax in (ax_bar1, ax_bar0, ax_bar2):
            ax.spines['top'].set_visible(False)
            ax.spines['bottom'].set_visible(False)
            ax.spines['left'].set_visible(False)
            ax.spines['right'].set_visible(False)
            for t in ax.get_xticklines():
                t.set_visible(False)
            for t in ax.get_xticklabels():
                t.set_visible(False)
            for t in ax.get_yticklines():
                t.set_visible(False)
            for t in ax.get_yticklabels():
                t.set_visible(False)

        # ======================================================================
        # Plot histogram
        # ======================================================================
        distplot(f_v,
                 bins=n_bins,
                 kde=False,
                 norm_hist=True,
                 hist_kws=dict(linewidth=0.92, color='#20d9ba', alpha=0.26),
                 ax=ax_graph)

        # ==============================================================
        # Decorate
        # ==============================================================
        decorate(ax=ax_graph,
                 style='white',
                 title=f_i,
                 xlabel='RNAi Score',
                 ylabel='Frequency')

        # ==================================================================
        # Plot skew-t fit PDF
        # ==================================================================
        # Initialize a skew-t generator
        skew_t = ACSkewT_gen()

        # Set up grids
        grids = linspace(f_v.min(), f_v.max(), n_x_grids)

        # Parse fitted parameters
        n, df, shape, location, scale = feature_x_fit.ix[
            f_i, ['N', 'DF', 'Shape', 'Location', 'Scale']]
        fig.text(0.5,
                 0.9,
                 'N={:.0f}    DF={:.2f}    Shape={:.2f}    Location={:.2f}    '
                 'Scale={:.2f}'.format(n, df, shape, location, scale),
                 size=16,
                 weight='bold',
                 color='#220530',
                 horizontalalignment='center')

        # Generate skew-t PDF
        skew_t_pdf = skew_t.pdf(grids, df, shape, loc=location, scale=scale)

        # Plot skew-t PDF
        line_kwargs = dict(linestyle='-', linewidth=2.6)
        ax_graph.plot(grids, skew_t_pdf, color='#20d9ba', **line_kwargs)

        # ==================================================================
        # Plot reflected skew-t PDF
        # ==================================================================
        # Generate skew-t PDF over reflected grids
        skew_t_pdf_r = skew_t.pdf(define_x_coordinates_for_reflection(
            skew_t_pdf, grids),
                                  df,
                                  shape,
                                  loc=location,
                                  scale=scale)

        # Plot over the original grids
        ax_graph.plot(grids, skew_t_pdf_r, color='#4e41d9', **line_kwargs)

        # ==================================================================
        # Plot essentiality indices from various functions
        # ==================================================================
        figure_size_ = (asarray(figure_size) * 0.7).astype(int)
        if enumerate_functions:
            functions = [
                # f1 /f2
                # Explode 'f1 / f2',
                # Signal at center 'log(f1 / f2)',
                # Explode 'where(f2 < f1, f1 / f2, 0)',
                # Not that good during entropy test 'where(f2 < f1, log(f1 /
                # f2), 0)',

                # - f2 /f1
                # Signal at center '-(f2 / f1)',
                # Signal at center '-log(f2 / f1)',
                # Spikes to 0 after center 'where(f2 < f1, -(f2 / f1), 0)',
                # == log(f1/ f2) 'where(f2 < f1, -log(f2 / f1), 0)',

                # carea1 / carea2
                # Explode 'carea1 / carea2',
                # Not that good during entropy test 'log(carea1 / carea2)',
                # Explode 'where(f2 < f1, carea1 / carea2, 0)',
                # 0ing abruptly drops 'where(f2 < f1, log(carea1 / carea2), 0)',

                # (f1 - f2) / f1
                # Better during only f2 < f1 '(f1 - f2) / f1',
                # Normalized same as not logging and raising to a power'log(
                # (f1 - f2) / f1 )',
                'where(f2 < f1, (f1 - f2) / f1, 0)',
                # Spikes to 0 after center 'where(f2 < f1, log( (f1 - f2) /
                # f1 ), 0)',

                # ((f1 - f2) / f1)^scale
                # Super negative '((f1 - f2) / f1)**{}'.format(scale),
                'where(f2 < f1log, ((f1 - f2) / f1)**{}, 0)'.format(scale),
                # log
                # Same as just log 'where(f2 < f1, log( ((f1 - f2) / f1)**{}
                # ), 0 )'.format(scale),

                # Hard to interpret # ((f1 - f2) / f1)^(1/scale)
                # log(-)=nan after center '((f1 - f2) / f1)**(1/{})'.format(
                # scale),
                # Widens wide 'where(f2 < f1, ((f1 - f2) / f1)**(1/{}),
                # 0)'.format(scale),

                # Hard to interpret # ((f1 - f2) / f1)^std(ei)
                # log(-)=nan after center '((f1 - f2) / f1)**(((f1 - f2) /
                # f1).std())',
                # Hard to interpret 'where(f2 < f1, ((f1 - f2) / f1)**(((f1 -
                #  f2) / f1).std()), 0) ',
                # Spikes to 0 after center 'where(f2 < f1, log( ((f1 - f2) /
                # f1)**(((f1 - f2) / f1).std()) ), 0) ',

                # Hard to interpret # ((f1 - f2) / f1)^(1/std(ei))
                # log(-)=nan after center '((f1 - f2) / f1)**(1/((f1 - f2) /
                # f1).std())',
                # Hard to interpret (best during entropy test)  'where(f2 <
                # f1, ((f1 - f2) / f1)**(1/((f1 - f2) / f1).std()), 0) ',
                # Same as just log 'where(f2 < f1, log( ((f1 - f2) / f1)**(
                # 1/((f1 - f2) / f1).std()) ), 0) ',
            ]
            eis = []

            # Plot each function
            for j, f in enumerate(functions):
                figure(figsize=figure_size_)

                # Compute essentiality index
                ei = _compute_essentiality_index(skew_t_pdf, skew_t_pdf_r, f,
                                                 ['+', '-'][shape > 0],
                                                 grids[1] - grids[0])

                c = CMAP_CATEGORICAL(j / len(functions))
                eis.append((ei, c))

                plot(grids, ei, color=c, **line_kwargs)
                decorate(title=f)

            # Plot all functions
            figure(figsize=figure_size_)
            distplot(f_v,
                     bins=n_bins,
                     kde=False,
                     norm_hist=True,
                     hist_kws=dict(linewidth=0.92, color='#070707',
                                   alpha=0.26))
            for ei_, c in eis:
                plot(grids, (ei_ - ei_.min()) / (ei_.max() - ei_.min()) *
                     skew_t_pdf.max(),
                     color=c,
                     linewidth=line_kwargs['linewidth'])
            decorate(title=f_i)

        # ==================================================================
        # Plot essentiality index (#fc154f)
        # ==================================================================
        ei = _compute_essentiality_index(
            skew_t_pdf, skew_t_pdf_r,
            'where(f2 < f1, ((f1 - f2) / f1)**{}, 0)'.format(scale),
            ['+', '-'][shape > 0], grids[1] - grids[0])
        ax_graph.plot(grids, (ei - ei.min()) / (ei.max() - ei.min()) *
                      skew_t_pdf.max(),
                      color='#fc154f',
                      **line_kwargs)
        # ==================================================================
        # Plot bars
        # ==================================================================
        a_m_d = _get_amp_mut_del(bar_df, f_i)

        bar_specifications = [
            dict(vector=a_m_d.iloc[0, :], ax=ax_bar0, color='#9017e6'),
            dict(vector=a_m_d.iloc[1, :], ax=ax_bar1, color='#6410a0'),
            dict(vector=a_m_d.iloc[2, :], ax=ax_bar2, color='#470b72'),
        ]

        for spec in bar_specifications:
            v = spec['vector']
            ax = spec['ax']
            c = spec['color']
            rugplot(v * f_v, height=1, color=c, linewidth=2.4, ax=ax)
            decorate(ax=ax, ylabel=v.name[-3:])

        # ==================================================================
        # Save
        # ==================================================================
        save_plot(join(directory_path,
                       'essentiality_plots/{}.png'.format(f_i)),
                  dpi=dpi)

        if show_plot:
            show()

        close()