Python kern_scipy_gaus Examples, kern_density_est.kern_scipy_gaus Python Examples

Example #1

0

Show file

File: smooth_correl.py Project: ManchesterBioinference/EP_Bayes

def smooth_correl(correl_of_true_inter_total, correl_of_false_inter_total):
	import kern_density_est
	import numpy as np

	correl_of_true_inter_total_prob_smooth, correl_of_true_inter_total_bins_smooth = kern_density_est.kern_scipy_gaus(correl_of_true_inter_total, 'g', np.linspace(-1., 1., 2000))
	correl_of_false_inter_total_prob_smooth, correl_of_false_inter_total_bins_smooth = kern_density_est.kern_scipy_gaus(np.random.choice(correl_of_false_inter_total, int(len(correl_of_false_inter_total)/50.), replace = False), 'y', np.linspace(-1., 1., 2000))


	mean_diff_true = np.diff(correl_of_true_inter_total_prob_smooth)/2.
	correl_of_true_inter_total_prob_smooth = correl_of_true_inter_total_prob_smooth[:-1] + mean_diff_true
	mean_diff_false = np.diff(correl_of_false_inter_total_prob_smooth)/2.
	correl_of_false_inter_total_prob_smooth = correl_of_false_inter_total_prob_smooth[:-1] + mean_diff_false

	extend_mass_true_first = correl_of_true_inter_total_prob_smooth[0]*np.diff(correl_of_true_inter_total_bins_smooth)[0] 
	extend_mass_true_last = correl_of_true_inter_total_prob_smooth[-1]*np.diff(correl_of_true_inter_total_bins_smooth)[-1]
	correl_of_true_inter_total_bins_smooth[0] = -1.00001
	correl_of_true_inter_total_bins_smooth[-1] = 1.00001
	correl_of_true_inter_total_prob_smooth[0] = extend_mass_true_first/np.diff(correl_of_true_inter_total_bins_smooth)[0] 
	correl_of_true_inter_total_prob_smooth[-1] = extend_mass_true_last/np.diff(correl_of_true_inter_total_bins_smooth)[-1]

	extend_mass_false_first = correl_of_false_inter_total_prob_smooth[0]*np.diff(correl_of_false_inter_total_bins_smooth)[0] 
	extend_mass_false_last = correl_of_false_inter_total_prob_smooth[-1]*np.diff(correl_of_false_inter_total_bins_smooth)[-1]
	correl_of_false_inter_total_bins_smooth[0] = -1.00001
	correl_of_false_inter_total_bins_smooth[-1] = 1.00001
	correl_of_false_inter_total_prob_smooth[0] = extend_mass_false_first/np.diff(correl_of_false_inter_total_bins_smooth)[0] 
	correl_of_false_inter_total_prob_smooth[-1] = extend_mass_false_last/np.diff(correl_of_false_inter_total_bins_smooth)[-1]				
	
	return correl_of_true_inter_total_prob_smooth, correl_of_true_inter_total_bins_smooth, correl_of_false_inter_total_prob_smooth, correl_of_false_inter_total_bins_smooth

Example #2

0

Show file

File: prior_histograms_cl.py Project: ManchesterBioinference/EP_Bayes

	def calculate_kern(attribute_of_interaction_, sample_, l_limit, up_limit, number_of_bins, colour = ("r", "b"), weights_ = None, bandwidth_pos = None, bandwidth_neg = None):
	
		prob_ = {}
		bins_ = {}
		import kern_density_est
		kern_density_est.plot_atr = plot_atr_kernel

		xgrid = [[],[]]
		xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0])
		xgrid[1] = np.linspace(l_limit, up_limit, number_of_bins[1])

		if domain:
	
			if attribute_of_interaction_ == "distance":

				prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], bandwidth = bandwidth_pos)
				prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], bandwidth = bandwidth_neg)

			else:

				prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos)
				prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = bandwidth_neg)	
		else:


			prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth=bandwidth_pos)
			prob_["negative_interactions"], bins_["negative_interactions"] = [], []
			if not(Sample_MoG_classificator): prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth=bandwidth_neg)

		if use_smooth_prior_for_estimation:	return  prob_, bins_
		else: return  [[], []], [[], []]

Example #3

0

Show file

 def calculate_kern(sample_, bins, band="scott"):
     import kern_density_est
     prob_, bins_ = kern_density_est.kern_scipy_gaus(sample_,
                                                     "g",
                                                     bins,
                                                     bandwidth=band,
                                                     plot_atr=False)
     return prob_, bins_

Example #4

0

Show file

File: Gaussian_probs.py Project: ManchesterBioinference/EP_Bayes

	def calculate_kern(sample_, l_limit, up_limit, number_of_bins):
		import kern_density_est

		xgrid = [[],[]]
		xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0])

		prob_, bins_ = kern_density_est.kern_scipy_gaus(np.r_[sample_["positive_interactions"], sample_["negative_interactions"]], colour[0], xgrid[0], bandwidth=optimum)

		return prob_, bins_

Example #5

0

Show file

    def calculate_kern(sample_, l_limit, up_limit, number_of_bins):
        import kern_density_est

        xgrid = [[], []]
        xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0])

        prob_, bins_ = kern_density_est.kern_scipy_gaus(
            np.r_[sample_["positive_interactions"],
                  sample_["negative_interactions"]],
            colour[0],
            xgrid[0],
            bandwidth=optimum)

        return prob_, bins_

Example #6

0

Show file

def smooth_correl(correl_of_true_inter_total, correl_of_false_inter_total):
    import kern_density_est
    import numpy as np

    correl_of_true_inter_total_prob_smooth, correl_of_true_inter_total_bins_smooth = kern_density_est.kern_scipy_gaus(
        correl_of_true_inter_total, 'g', np.linspace(-1., 1., 2000))
    correl_of_false_inter_total_prob_smooth, correl_of_false_inter_total_bins_smooth = kern_density_est.kern_scipy_gaus(
        np.random.choice(correl_of_false_inter_total,
                         int(len(correl_of_false_inter_total) / 50.),
                         replace=False), 'y', np.linspace(-1., 1., 2000))

    mean_diff_true = np.diff(correl_of_true_inter_total_prob_smooth) / 2.
    correl_of_true_inter_total_prob_smooth = correl_of_true_inter_total_prob_smooth[:
                                                                                    -1] + mean_diff_true
    mean_diff_false = np.diff(correl_of_false_inter_total_prob_smooth) / 2.
    correl_of_false_inter_total_prob_smooth = correl_of_false_inter_total_prob_smooth[:
                                                                                      -1] + mean_diff_false

    extend_mass_true_first = correl_of_true_inter_total_prob_smooth[
        0] * np.diff(correl_of_true_inter_total_bins_smooth)[0]
    extend_mass_true_last = correl_of_true_inter_total_prob_smooth[
        -1] * np.diff(correl_of_true_inter_total_bins_smooth)[-1]
    correl_of_true_inter_total_bins_smooth[0] = -1.00001
    correl_of_true_inter_total_bins_smooth[-1] = 1.00001
    correl_of_true_inter_total_prob_smooth[
        0] = extend_mass_true_first / np.diff(
            correl_of_true_inter_total_bins_smooth)[0]
    correl_of_true_inter_total_prob_smooth[
        -1] = extend_mass_true_last / np.diff(
            correl_of_true_inter_total_bins_smooth)[-1]

    extend_mass_false_first = correl_of_false_inter_total_prob_smooth[
        0] * np.diff(correl_of_false_inter_total_bins_smooth)[0]
    extend_mass_false_last = correl_of_false_inter_total_prob_smooth[
        -1] * np.diff(correl_of_false_inter_total_bins_smooth)[-1]
    correl_of_false_inter_total_bins_smooth[0] = -1.00001
    correl_of_false_inter_total_bins_smooth[-1] = 1.00001
    correl_of_false_inter_total_prob_smooth[
        0] = extend_mass_false_first / np.diff(
            correl_of_false_inter_total_bins_smooth)[0]
    correl_of_false_inter_total_prob_smooth[
        -1] = extend_mass_false_last / np.diff(
            correl_of_false_inter_total_bins_smooth)[-1]

    return correl_of_true_inter_total_prob_smooth, correl_of_true_inter_total_bins_smooth, correl_of_false_inter_total_prob_smooth, correl_of_false_inter_total_bins_smooth

Example #7

0

Show file

    def calculate_kern(attribute_of_interaction_,
                       sample_,
                       l_limit,
                       up_limit,
                       number_of_bins,
                       colour=("r", "b"),
                       weights_=None,
                       bandwidth_pos=None,
                       bandwidth_neg=None):

        prob_ = {}
        bins_ = {}
        import kern_density_est
        kern_density_est.plot_atr = plot_atr_kernel

        xgrid = [[], []]
        xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0])
        xgrid[1] = np.linspace(l_limit, up_limit, number_of_bins[1])

        if domain:

            if attribute_of_interaction_ == "distance":

                prob_["positive_interactions"], bins_[
                    "positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(
                        sample_["positive_interactions"],
                        colour[0],
                        xgrid[0],
                        weights=weights_["positive_interactions"],
                        bandwidth=bandwidth_pos)
                prob_["negative_interactions"], bins_[
                    "negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(
                        sample_["negative_interactions"],
                        colour[1],
                        xgrid[1],
                        weights=weights_["negative_interactions"],
                        bandwidth=bandwidth_neg)

            else:

                prob_["positive_interactions"], bins_[
                    "positive_interactions"] = kern_density_est.kern_scipy_gaus(
                        sample_["positive_interactions"],
                        colour[0],
                        xgrid[0],
                        bandwidth=bandwidth_pos)
                prob_["negative_interactions"], bins_[
                    "negative_interactions"] = kern_density_est.kern_scipy_gaus(
                        sample_["negative_interactions"],
                        colour[1],
                        xgrid[1],
                        bandwidth=bandwidth_neg)
        else:

            prob_["positive_interactions"], bins_[
                "positive_interactions"] = kern_density_est.kern_scipy_gaus(
                    sample_["positive_interactions"],
                    colour[0],
                    xgrid[0],
                    bandwidth=bandwidth_pos)
            prob_["negative_interactions"], bins_[
                "negative_interactions"] = [], []
            if not (Sample_MoG_classificator):
                prob_["negative_interactions"], bins_[
                    "negative_interactions"] = kern_density_est.kern_scipy_gaus(
                        sample_["negative_interactions"],
                        colour[1],
                        xgrid[1],
                        bandwidth=bandwidth_neg)

        if use_smooth_prior_for_estimation: return prob_, bins_
        else: return [[], []], [[], []]

Example #8

0

Show file

File: smooth_priors.py Project: resurgo-genetics/EP_Bayes

def smooth_priors(true_dist_of_total_prob,
                  true_dist_of_total_bins,
                  false_dist_of_total_prob,
                  false_dist_of_total_bins,
                  kde_mode,
                  bandwidth="silverman",
                  kernel="exponential"):
    import matplotlib.pyplot as plt
    import numpy as np

    miss_mass_true = np.diff(true_dist_of_total_bins)[[
        0, -1
    ]] * true_dist_of_total_prob[[0, -1]]
    miss_mass_false = np.diff(false_dist_of_total_bins)[[
        0, -1
    ]] * false_dist_of_total_prob[[0, -1]]

    import hist_sampler
    x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[1:-1],
                                        true_dist_of_total_bins[1:-1], 400000)
    y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[1:-1],
                                        false_dist_of_total_bins[1:-1], 400000)

    import kern_density_est

    if kde_mode == "scipy_kde":
        true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(
            x_, 'g',
            np.linspace(true_dist_of_total_bins[1] - 50000,
                        true_dist_of_total_bins[-2] + 50000, 1000), bandwidth
        )  # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs)

    elif kde_mode == "sklearn_kde":
        true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(
            x_,
            'g',
            np.linspace(true_dist_of_total_bins[1] - 50000,
                        true_dist_of_total_bins[-2] + 50000, 1000),
            bandwidth,
            kernel_=kernel)

    from scipy.stats import kde
    import bisect

    insert_index_1 = bisect.bisect_left(
        true_dist_of_total_prob_smooth - true_dist_of_total_prob[0], 0.)
    insert_index_2 = bisect.bisect_left(
        true_dist_of_total_prob_smooth[::-1] - true_dist_of_total_prob[-1], 0.)

    true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[
        insert_index_1 + 1:-1 * insert_index_2 -
        1], true_dist_of_total_bins_smooth[insert_index_1 +
                                           1:-1 * insert_index_2 - 1]

    false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(
        y_, 'y',
        np.linspace(false_dist_of_total_bins[1] - 200000,
                    false_dist_of_total_bins[-2] + 200000, 1000), bandwidth)
    plt.clf()

    insert_index_3 = bisect.bisect_left(
        false_dist_of_total_prob_smooth - false_dist_of_total_prob[0], 0.)
    insert_index_4 = bisect.bisect_left(
        false_dist_of_total_prob_smooth[::-1] - false_dist_of_total_prob[-1],
        0.)

    false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = false_dist_of_total_prob_smooth[
        insert_index_3 + 1:-1 * insert_index_4 -
        1], false_dist_of_total_bins_smooth[insert_index_3 +
                                            1:-1 * insert_index_4 - 1]

    prob_true_ = true_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_true))
    prob_false_ = false_dist_of_total_prob_smooth * (1 -
                                                     np.sum(miss_mass_false))

    bins_t_plot = np.r_[true_dist_of_total_bins[0],
                        true_dist_of_total_bins_smooth[0],
                        true_dist_of_total_bins_smooth,
                        true_dist_of_total_bins_smooth[-1],
                        true_dist_of_total_bins[-1]]
    prob_t_plot = np.r_[true_dist_of_total_prob[0], true_dist_of_total_prob[0],
                        prob_true_, true_dist_of_total_prob[-1],
                        true_dist_of_total_prob[-1]]

    bins_f_plot = np.r_[false_dist_of_total_bins[0],
                        false_dist_of_total_bins_smooth[0],
                        false_dist_of_total_bins_smooth,
                        false_dist_of_total_bins_smooth[-1],
                        false_dist_of_total_bins[-1]]
    prob_f_plot = np.r_[false_dist_of_total_prob[0],
                        false_dist_of_total_prob[0], prob_false_,
                        false_dist_of_total_prob[-1],
                        false_dist_of_total_prob[-1]]

    plt.plot(bins_t_plot, prob_t_plot, 'g')
    plt.plot(bins_f_plot, prob_f_plot, 'y')

    plt.fill_between(bins_t_plot,
                     prob_t_plot,
                     where=None,
                     color='g',
                     alpha=0.2)
    plt.fill_between(bins_f_plot,
                     prob_f_plot,
                     where=None,
                     color='y',
                     alpha=0.2)
    plt.axis([
        bins_f_plot[1] - 100000, bins_f_plot[-2] + 100000, 0.,
        max(prob_t_plot)
    ])

    mean_diff_true = np.diff(true_dist_of_total_prob_smooth) / 2.
    true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true

    mean_diff_false = np.diff(false_dist_of_total_prob_smooth) / 2.
    false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false

    miss_mass_true_ = np.r_[true_dist_of_total_prob[0] *
                            abs(true_dist_of_total_bins[0] -
                                true_dist_of_total_bins_smooth[0])], [
                                    true_dist_of_total_prob[-1] *
                                    abs(true_dist_of_total_bins[-1] -
                                        true_dist_of_total_bins_smooth[-1])
                                ]
    miss_mass_false_ = np.r_[false_dist_of_total_prob[0] *
                             abs(false_dist_of_total_bins[0] -
                                 false_dist_of_total_bins_smooth[0])], [
                                     false_dist_of_total_prob[-1] *
                                     abs(false_dist_of_total_bins[-1] -
                                         false_dist_of_total_bins_smooth[-1])
                                 ]

    prob_true_norm = true_smoothed_mean_probabilities / np.sum(
        true_smoothed_mean_probabilities *
        np.diff(true_dist_of_total_bins_smooth)) * (1. -
                                                    np.sum(miss_mass_true_))
    prob_false_norm = false_smoothed_mean_probabilities / np.sum(
        false_smoothed_mean_probabilities *
        np.diff(false_dist_of_total_bins_smooth)) * (1. -
                                                     np.sum(miss_mass_false_))

    prob_true = np.r_[true_dist_of_total_prob[0], prob_true_norm,
                      true_dist_of_total_prob[-1]]
    bins_true = np.r_[true_dist_of_total_bins[0],
                      true_dist_of_total_bins_smooth,
                      true_dist_of_total_bins[-1]]

    prob_false = np.r_[false_dist_of_total_prob[0], prob_false_norm,
                       false_dist_of_total_prob[-1]]
    bins_false = np.r_[false_dist_of_total_bins[0],
                       false_dist_of_total_bins_smooth,
                       false_dist_of_total_bins[-1]]

    #plt.bar(bins_true[1:-2], prob_true[1:-1], np.diff(bins_true[1:-1]), alpha=0.15, color="blue")

    return prob_true, bins_true, prob_false, bins_false

Example #9

0

Show file

File: smooth_priors_non_domain.py Project: ManchesterBioinference/EP_Bayes

def smooth_priors(
    true_dist_of_total_prob,
    true_dist_of_total_bins,
    false_dist_of_total_prob,
    false_dist_of_total_bins,
    kde_mode,
    bandwidth="silverman",
    kernel="exponential",
):
    import matplotlib.pyplot as plt
    import numpy as np

    miss_mass_true = np.diff(true_dist_of_total_bins)[[-2, -1]] * true_dist_of_total_prob[[-2, -1]]
    miss_mass_false = np.diff(false_dist_of_total_bins)[[-1]] * false_dist_of_total_prob[[-1]]

    import hist_sampler

    x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[:-2], true_dist_of_total_bins[:-2], 800000)
    y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[:-2], false_dist_of_total_bins[:-2], 100000)

    import kern_density_est

    if kde_mode == "scipy_kde":
        true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(
            x_, "g", np.linspace(true_dist_of_total_bins[0], true_dist_of_total_bins[-3] + 50000, 3000), bandwidth
        )  # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs)

    elif kde_mode == "sklearn_kde":
        true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(
            x_,
            "g",
            np.linspace(true_dist_of_total_bins[0], true_dist_of_total_bins[-3] + 50000, 3000),
            bandwidth,
            kernel_=kernel,
        )

    from scipy.stats import kde
    import bisect

    insert_index_1 = bisect.bisect_left(true_dist_of_total_prob_smooth - true_dist_of_total_prob[0], 0.0)

    true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = (
        true_dist_of_total_prob_smooth[0:insert_index_1],
        true_dist_of_total_bins_smooth[0:insert_index_1],
    )

    false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(
        y_, "y", np.linspace(false_dist_of_total_bins[0], false_dist_of_total_bins[-2], 2000), bandwidth
    )
    plt.clf()

    prob_true_ = true_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_true))
    prob_false_ = false_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_false))

    mean_diff_true = np.diff(true_dist_of_total_prob_smooth) / 2.0
    true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true

    mean_diff_false = np.diff(false_dist_of_total_prob_smooth) / 2.0
    false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false

    miss_mass_true_ = [
        true_dist_of_total_prob[-1] * abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1])
    ]
    miss_mass_false_ = [
        false_dist_of_total_prob[-1] * abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1])
    ]

    prob_true_norm = (
        true_smoothed_mean_probabilities
        / np.sum(true_smoothed_mean_probabilities * np.diff(true_dist_of_total_bins_smooth))
        * (1.0 - np.sum(miss_mass_true_))
    )
    prob_false_norm = (
        false_smoothed_mean_probabilities
        / np.sum(false_smoothed_mean_probabilities * np.diff(false_dist_of_total_bins_smooth))
        * (1.0 - np.sum(miss_mass_false_))
    )

    prob_true = np.r_[prob_true_norm, true_dist_of_total_prob[-1]]
    bins_true = np.r_[true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]]

    prob_false = np.r_[prob_false_norm, false_dist_of_total_prob[-1]]
    bins_false = np.r_[false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]]

    return prob_true, bins_true, prob_false, bins_false

Example #10

0

Show file

File: smooth_priors.py Project: ManchesterBioinference/EP_Bayes

def smooth_priors(true_dist_of_total_prob, true_dist_of_total_bins, false_dist_of_total_prob, false_dist_of_total_bins, kde_mode, bandwidth = "silverman", kernel = "exponential"):
	import matplotlib.pyplot as plt
	import numpy as np

	

	miss_mass_true = np.diff(true_dist_of_total_bins)[[0,-1]]*true_dist_of_total_prob[[0,-1]]
	miss_mass_false = np.diff(false_dist_of_total_bins)[[0,-1]]*false_dist_of_total_prob[[0,-1]]

	import hist_sampler
	x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[1:-1], true_dist_of_total_bins[1:-1], 400000)
	y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[1:-1], false_dist_of_total_bins[1:-1], 400000)


	import kern_density_est
	
	if kde_mode == "scipy_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(x_, 'g', np.linspace(true_dist_of_total_bins[1]-50000, true_dist_of_total_bins[-2]+50000, 1000), bandwidth) # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs)

	elif kde_mode == "sklearn_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth =  kern_density_est.kern_sklearn_expon(x_, 'g', np.linspace(true_dist_of_total_bins[1]-50000, true_dist_of_total_bins[-2]+50000, 1000), bandwidth, kernel_ = kernel)

	from scipy.stats import kde
	import bisect
	
	insert_index_1 = bisect.bisect_left(true_dist_of_total_prob_smooth-true_dist_of_total_prob[0], 0.)
	insert_index_2 = bisect.bisect_left(true_dist_of_total_prob_smooth[::-1]-true_dist_of_total_prob[-1], 0.)	

	true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[insert_index_1+1:-1*insert_index_2-1], true_dist_of_total_bins_smooth[insert_index_1+1:-1*insert_index_2-1]

	false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(y_, 'y', np.linspace(false_dist_of_total_bins[1]-200000, false_dist_of_total_bins[-2]+200000, 1000), bandwidth)
	plt.clf()

	insert_index_3 = bisect.bisect_left(false_dist_of_total_prob_smooth-false_dist_of_total_prob[0], 0.)
	insert_index_4 = bisect.bisect_left(false_dist_of_total_prob_smooth[::-1]-false_dist_of_total_prob[-1], 0.)

	false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = false_dist_of_total_prob_smooth[insert_index_3+1:-1*insert_index_4-1], false_dist_of_total_bins_smooth[insert_index_3+1:-1*insert_index_4-1]

	prob_true_ = true_dist_of_total_prob_smooth*(1-np.sum(miss_mass_true))
	prob_false_ = false_dist_of_total_prob_smooth*(1-np.sum(miss_mass_false))
			
	bins_t_plot = np.r_[true_dist_of_total_bins[0], true_dist_of_total_bins_smooth[0], true_dist_of_total_bins_smooth, true_dist_of_total_bins_smooth[-1], true_dist_of_total_bins[-1]]
	prob_t_plot = np.r_[true_dist_of_total_prob[0], true_dist_of_total_prob[0], prob_true_, true_dist_of_total_prob[-1], true_dist_of_total_prob[-1]]

	bins_f_plot = np.r_[false_dist_of_total_bins[0], false_dist_of_total_bins_smooth[0], false_dist_of_total_bins_smooth, false_dist_of_total_bins_smooth[-1], false_dist_of_total_bins[-1]]
	prob_f_plot = np.r_[false_dist_of_total_prob[0], false_dist_of_total_prob[0], prob_false_, false_dist_of_total_prob[-1], false_dist_of_total_prob[-1]]

	plt.plot(bins_t_plot, prob_t_plot, 'g')
	plt.plot(bins_f_plot, prob_f_plot, 'y')	

	plt.fill_between(bins_t_plot, prob_t_plot, where=None, color='g', alpha = 0.2)
	plt.fill_between(bins_f_plot, prob_f_plot, where=None, color='y', alpha = 0.2)
	plt.axis([bins_f_plot[1]-100000, bins_f_plot[-2]+100000, 0., max(prob_t_plot)])


	mean_diff_true = np.diff(true_dist_of_total_prob_smooth)/2.
	true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true

	mean_diff_false = np.diff(false_dist_of_total_prob_smooth)/2.
	false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false

	
	miss_mass_true_ = np.r_[true_dist_of_total_prob[0]*abs(true_dist_of_total_bins[0] - true_dist_of_total_bins_smooth[0])], [true_dist_of_total_prob[-1]*abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1])]
	miss_mass_false_ = np.r_[false_dist_of_total_prob[0]*abs(false_dist_of_total_bins[0] - false_dist_of_total_bins_smooth[0])], [false_dist_of_total_prob[-1]*abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1])]

	prob_true_norm = true_smoothed_mean_probabilities/np.sum(true_smoothed_mean_probabilities*np.diff(true_dist_of_total_bins_smooth))*(1.-np.sum(miss_mass_true_))
	prob_false_norm = false_smoothed_mean_probabilities/np.sum(false_smoothed_mean_probabilities*np.diff(false_dist_of_total_bins_smooth))*(1.-np.sum(miss_mass_false_))

	prob_true = np.r_[true_dist_of_total_prob[0], prob_true_norm, true_dist_of_total_prob[-1]]
	bins_true = np.r_[true_dist_of_total_bins[0], true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]]
	
	prob_false = np.r_[false_dist_of_total_prob[0], prob_false_norm, false_dist_of_total_prob[-1]]
	bins_false = np.r_[false_dist_of_total_bins[0], false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]]

	#plt.bar(bins_true[1:-2], prob_true[1:-1], np.diff(bins_true[1:-1]), alpha=0.15, color="blue")


	return prob_true, bins_true, prob_false, bins_false

Example #11

0

Show file

File: plot_histograms_figures_MOG.py Project: ManchesterBioinference/EP_Bayes

	def calculate_kern(sample_, bins, band = "scott"):
		import kern_density_est
		prob_, bins_ = kern_density_est.kern_scipy_gaus(sample_, "g", bins, bandwidth = band, plot_atr = False)
		return prob_, bins_

Example #12

0

Show file

File: smooth_priors_non_domain.py Project: resurgo-genetics/EP_Bayes

def smooth_priors(true_dist_of_total_prob,
                  true_dist_of_total_bins,
                  false_dist_of_total_prob,
                  false_dist_of_total_bins,
                  kde_mode,
                  bandwidth="silverman",
                  kernel="exponential"):
    import matplotlib.pyplot as plt
    import numpy as np

    miss_mass_true = np.diff(true_dist_of_total_bins)[[
        -2, -1
    ]] * true_dist_of_total_prob[[-2, -1]]
    miss_mass_false = np.diff(false_dist_of_total_bins)[[
        -1
    ]] * false_dist_of_total_prob[[-1]]

    import hist_sampler
    x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[:-2],
                                        true_dist_of_total_bins[:-2], 800000)
    y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[:-2],
                                        false_dist_of_total_bins[:-2], 100000)

    import kern_density_est

    if kde_mode == "scipy_kde":
        true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(
            x_, 'g',
            np.linspace(true_dist_of_total_bins[0],
                        true_dist_of_total_bins[-3] + 50000, 3000), bandwidth
        )  # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs)

    elif kde_mode == "sklearn_kde":
        true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(
            x_,
            'g',
            np.linspace(true_dist_of_total_bins[0],
                        true_dist_of_total_bins[-3] + 50000, 3000),
            bandwidth,
            kernel_=kernel)

    from scipy.stats import kde
    import bisect

    insert_index_1 = bisect.bisect_left(
        true_dist_of_total_prob_smooth - true_dist_of_total_prob[0], 0.)

    true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[
        0:insert_index_1], true_dist_of_total_bins_smooth[0:insert_index_1]

    false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(
        y_, 'y',
        np.linspace(false_dist_of_total_bins[0], false_dist_of_total_bins[-2],
                    2000), bandwidth)
    plt.clf()

    prob_true_ = true_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_true))
    prob_false_ = false_dist_of_total_prob_smooth * (1 -
                                                     np.sum(miss_mass_false))

    mean_diff_true = np.diff(true_dist_of_total_prob_smooth) / 2.
    true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true

    mean_diff_false = np.diff(false_dist_of_total_prob_smooth) / 2.
    false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false

    miss_mass_true_ = [
        true_dist_of_total_prob[-1] *
        abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1])
    ]
    miss_mass_false_ = [
        false_dist_of_total_prob[-1] *
        abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1])
    ]

    prob_true_norm = true_smoothed_mean_probabilities / np.sum(
        true_smoothed_mean_probabilities *
        np.diff(true_dist_of_total_bins_smooth)) * (1. -
                                                    np.sum(miss_mass_true_))
    prob_false_norm = false_smoothed_mean_probabilities / np.sum(
        false_smoothed_mean_probabilities *
        np.diff(false_dist_of_total_bins_smooth)) * (1. -
                                                     np.sum(miss_mass_false_))

    prob_true = np.r_[prob_true_norm, true_dist_of_total_prob[-1]]
    bins_true = np.r_[true_dist_of_total_bins_smooth,
                      true_dist_of_total_bins[-1]]

    prob_false = np.r_[prob_false_norm, false_dist_of_total_prob[-1]]
    bins_false = np.r_[false_dist_of_total_bins_smooth,
                       false_dist_of_total_bins[-1]]

    return prob_true, bins_true, prob_false, bins_false

Example #13

0

Show file

File: prior_histograms_cl.py Project: paolopavan/EP_Bayes

	def calculate_or_plot_kern(attribute_of_interaction_, sample_, l_limit, up_limit, number_of_bins, colour = ("r", "b"), weights_ = None, bandwidth_pos = None, bandwidth_neg = None):
	
		prob_ = {}
		bins_ = {}
		import kern_density_est
		kern_density_est.plot_atr = plot_atr_kernel

		xgrid = [[],[]]
		xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0])
		xgrid[1] = np.linspace(l_limit, up_limit, number_of_bins[1])

		if domain:
	
			if attribute_of_interaction_ == "distance":


				#prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], bandwidth = "scott", factor = None)#bandwidth_pos)
				#prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], bandwidth = "scott", factor = None)#bandwidth_neg)
				prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], bandwidth = bandwidth_pos, plot_atr = True)#bandwidth_pos)
				prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], bandwidth = bandwidth_neg, plot_atr = True)#bandwidth_neg)

				#bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"])# * sample_["positive_interactions"].std(ddof=1)
				#bandwidth_neg = kern_density_est.cross_validation(sample_["negative_interactions"])# * sample_["negative_interactions"].std(ddof=1)

				#prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kernel_weighted_samples(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], fft = False, bw=bandwidth_pos)
				#prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kernel_weighted_samples(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], fft = False, bw=bandwidth_neg)

			else:
				#kernel_ = "gaussian"

				#bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"], kernel = kernel_) # kernel = 
				#bandwidth_neg = kern_density_est.cross_validation(sample_["negative_interactions"], kernel = kernel_)

				#prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_sklearn_expon(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos, kernel_ = kernel_)
				#prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_sklearn_expon(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = bandwidth_neg, kernel_ = kernel_)

				bandwidth_pos = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", plot_likelihood_function = False)
				bandwidth_neg = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "negative_interactions", plot_likelihood_function = False)

				prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos)
				prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = bandwidth_neg)	
		else:
			#if attribute_of_interaction_ == "distance": bandwidth_pos = optimum["distance"][ite]
			#else: bandwidth_pos = optimum[data_set_name]

				
			if attribute_of_interaction_ == "distance" and positive_or_negative_side == "negative_side": label_1, label_2 = None, None
			else: label_1, label_2 = "positive interactions", "negative interactions"

			if likelihood_cross_validation:
				if attribute_of_interaction_ == "correlation":
					bandwidth_pos = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", plot_likelihood_function = False)

				print bandwidth_pos
				prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos, label = label_1)
				prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = "scott", label = label_2)	
			else:

				bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"])# * sample_["positive_interactions"].std(ddof=1)
				print bandwidth_pos
				prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth=bandwidth_pos, label = label_1)
				prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth="scott", label = label_2)
				


		if use_smooth_prior_for_estimation:	return  prob_, bins_
		else: return  [[], []], [[], []]

Example #14

0

Show file

File: smooth_priors_domain.py Project: ManchesterBioinference/EP_Bayes

def smooth_priors(ll, true_dist_of_total_prob, true_dist_of_total_bins, false_dist_of_total_prob, false_dist_of_total_bins, kde_mode, bandwidth = "silverman", kernel = "exponential"):

	import matplotlib.pyplot as plt
	import numpy as np

	miss_mass_true = np.diff(true_dist_of_total_bins)[[0,-1]]*true_dist_of_total_prob[[0,-1]]
	miss_mass_false = np.diff(false_dist_of_total_bins)[[0,-1]]*false_dist_of_total_prob[[0,-1]]

	import hist_sampler

	x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[1:-1], true_dist_of_total_bins[1:-1], 800000)
	y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[1:-1], false_dist_of_total_bins[1:-1], 800000)

	prob_x_2 ,bins_x_2, patch = plt.hist(abs(x_), 200, normed = True, alpha = 0.15)
	prob_x_3 ,bins_x_3, patch = plt.hist(abs(y_), bins = 600, normed = True, alpha = 0.15)

	plt.clf()

	import kern_density_est
	
	if kde_mode == "scipy_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(abs(x_[abs(x_) < true_dist_of_total_bins[-2]]), 'g', np.linspace(0, true_dist_of_total_bins[-2]+50000, 1000), bandwidth) # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs)

	elif kde_mode == "sklearn_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth =  kern_density_est.kern_sklearn_expon(abs(x_), 'r', np.linspace(0, true_dist_of_total_bins[-2]+50000, 3000), bandwidth, kernel_ = kernel)


	false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(abs(y_), 'y', np.linspace(0, false_dist_of_total_bins[-2]+50000, 2000), bandwidth = bandwidth)

	plt.clf()

	plt.plot(true_dist_of_total_bins_smooth, true_dist_of_total_prob_smooth*(1-sum(miss_mass_true)), lw = 2,  color = 'g')
	plt.clf()	
	

	plt.plot(false_dist_of_total_bins_smooth, false_dist_of_total_prob_smooth*(1-sum(miss_mass_false)), lw = 2,  color = 'y')	

	plt.clf()	
	#plt.show()

	from scipy.stats import kde
	import bisect

	
	insert_index_2 = bisect.bisect_left(true_dist_of_total_prob_smooth[::-1]-true_dist_of_total_prob[-1], 0.)	

	true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[0:-1*insert_index_2-1], true_dist_of_total_bins_smooth[0:-1*insert_index_2-1]


	insert_index_4 = bisect.bisect_left(false_dist_of_total_prob_smooth[::-1]-false_dist_of_total_prob[-1], 0.)

	false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = false_dist_of_total_prob_smooth[0:-1*insert_index_4-1], false_dist_of_total_bins_smooth[0:-1*insert_index_4-1]

	prob_true_ = true_dist_of_total_prob_smooth*(1-np.sum(miss_mass_true))
	prob_false_ = false_dist_of_total_prob_smooth*(1-np.sum(miss_mass_false))
			
	bins_t_plot = np.r_[0, true_dist_of_total_bins_smooth, true_dist_of_total_bins_smooth[-1], true_dist_of_total_bins[-1]]
	prob_t_plot = np.r_[0, prob_true_, true_dist_of_total_prob[-1], true_dist_of_total_prob[-1]]

	bins_f_plot = np.r_[0, false_dist_of_total_bins_smooth, false_dist_of_total_bins_smooth[-1], false_dist_of_total_bins[-1]]
	prob_f_plot = np.r_[0, prob_false_, false_dist_of_total_prob[-1], false_dist_of_total_prob[-1]]


	plt.clf()
	plt.bar(bins_x_2[:-1], prob_x_2*(1-sum(miss_mass_true)), np.diff(bins_x_2), alpha=0.2, color = "green")
	plt.bar(bins_x_3[:-1], prob_x_3*(1-sum(miss_mass_false)), np.diff(bins_x_3), alpha=0.2, color = "yellow")
	plt.axis([bins_f_plot[1], bins_f_plot[-2]+100000, 0., max(prob_t_plot)*1.5])
	ll.savefig()
	plt.clf()

	plt.plot(bins_t_plot, prob_t_plot, 'g')
	plt.plot(bins_f_plot, prob_f_plot, 'y')	

	plt.fill_between(bins_t_plot, prob_t_plot, where=None, color='g', alpha = 0.2, lw = 2)
	plt.fill_between(bins_f_plot, prob_f_plot, where=None, color='y', alpha = 0.2, lw = 2)
	plt.axis([bins_f_plot[1], bins_f_plot[-2]+100000, 0., max(prob_t_plot)*1.5])

	ll.savefig()

	mean_diff_true = np.diff(true_dist_of_total_prob_smooth)/2.
	true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true

	mean_diff_false = np.diff(false_dist_of_total_prob_smooth)/2.
	false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false

	
	miss_mass_true_ = [true_dist_of_total_prob[-1]*abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1])]
	miss_mass_false_ = [false_dist_of_total_prob[-1]*abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1])]

	prob_true_norm = true_smoothed_mean_probabilities/np.sum(true_smoothed_mean_probabilities*np.diff(true_dist_of_total_bins_smooth))*(1.-np.sum(miss_mass_true_))
	prob_false_norm = false_smoothed_mean_probabilities/np.sum(false_smoothed_mean_probabilities*np.diff(false_dist_of_total_bins_smooth))*(1.-np.sum(miss_mass_false_))

	prob_true = np.r_[prob_true_norm, true_dist_of_total_prob[-1]]
	bins_true = np.r_[true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]]
	
	prob_false = np.r_[prob_false_norm, false_dist_of_total_prob[-1]]
	bins_false = np.r_[false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]]

	plt.clf()

	return prob_true, bins_true, prob_false, bins_false, ll

Example #15

0

Show file

def smooth_priors(ll,
                  true_dist_of_total_prob,
                  true_dist_of_total_bins,
                  false_dist_of_total_prob,
                  false_dist_of_total_bins,
                  kde_mode,
                  bandwidth="silverman",
                  kernel="exponential"):

    import matplotlib.pyplot as plt
    import numpy as np

    miss_mass_true = np.diff(true_dist_of_total_bins)[[
        0, -1
    ]] * true_dist_of_total_prob[[0, -1]]
    miss_mass_false = np.diff(false_dist_of_total_bins)[[
        0, -1
    ]] * false_dist_of_total_prob[[0, -1]]

    import hist_sampler

    x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[1:-1],
                                        true_dist_of_total_bins[1:-1], 800000)
    y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[1:-1],
                                        false_dist_of_total_bins[1:-1], 800000)

    prob_x_2, bins_x_2, patch = plt.hist(abs(x_), 200, normed=True, alpha=0.15)
    prob_x_3, bins_x_3, patch = plt.hist(abs(y_),
                                         bins=600,
                                         normed=True,
                                         alpha=0.15)

    plt.clf()

    import kern_density_est

    if kde_mode == "scipy_kde":
        true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(
            abs(x_[abs(x_) < true_dist_of_total_bins[-2]]), 'g',
            np.linspace(0, true_dist_of_total_bins[-2] + 50000,
                        1000), bandwidth
        )  # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs)

    elif kde_mode == "sklearn_kde":
        true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(
            abs(x_),
            'r',
            np.linspace(0, true_dist_of_total_bins[-2] + 50000, 3000),
            bandwidth,
            kernel_=kernel)

    false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(
        abs(y_),
        'y',
        np.linspace(0, false_dist_of_total_bins[-2] + 50000, 2000),
        bandwidth=bandwidth)

    plt.clf()

    plt.plot(true_dist_of_total_bins_smooth,
             true_dist_of_total_prob_smooth * (1 - sum(miss_mass_true)),
             lw=2,
             color='g')
    plt.clf()

    plt.plot(false_dist_of_total_bins_smooth,
             false_dist_of_total_prob_smooth * (1 - sum(miss_mass_false)),
             lw=2,
             color='y')

    plt.clf()
    #plt.show()

    from scipy.stats import kde
    import bisect

    insert_index_2 = bisect.bisect_left(
        true_dist_of_total_prob_smooth[::-1] - true_dist_of_total_prob[-1], 0.)

    true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[
        0:-1 * insert_index_2 -
        1], true_dist_of_total_bins_smooth[0:-1 * insert_index_2 - 1]

    insert_index_4 = bisect.bisect_left(
        false_dist_of_total_prob_smooth[::-1] - false_dist_of_total_prob[-1],
        0.)

    false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = false_dist_of_total_prob_smooth[
        0:-1 * insert_index_4 -
        1], false_dist_of_total_bins_smooth[0:-1 * insert_index_4 - 1]

    prob_true_ = true_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_true))
    prob_false_ = false_dist_of_total_prob_smooth * (1 -
                                                     np.sum(miss_mass_false))

    bins_t_plot = np.r_[0, true_dist_of_total_bins_smooth,
                        true_dist_of_total_bins_smooth[-1],
                        true_dist_of_total_bins[-1]]
    prob_t_plot = np.r_[0, prob_true_, true_dist_of_total_prob[-1],
                        true_dist_of_total_prob[-1]]

    bins_f_plot = np.r_[0, false_dist_of_total_bins_smooth,
                        false_dist_of_total_bins_smooth[-1],
                        false_dist_of_total_bins[-1]]
    prob_f_plot = np.r_[0, prob_false_, false_dist_of_total_prob[-1],
                        false_dist_of_total_prob[-1]]

    plt.clf()
    plt.bar(bins_x_2[:-1],
            prob_x_2 * (1 - sum(miss_mass_true)),
            np.diff(bins_x_2),
            alpha=0.2,
            color="green")
    plt.bar(bins_x_3[:-1],
            prob_x_3 * (1 - sum(miss_mass_false)),
            np.diff(bins_x_3),
            alpha=0.2,
            color="yellow")
    plt.axis(
        [bins_f_plot[1], bins_f_plot[-2] + 100000, 0.,
         max(prob_t_plot) * 1.5])
    ll.savefig()
    plt.clf()

    plt.plot(bins_t_plot, prob_t_plot, 'g')
    plt.plot(bins_f_plot, prob_f_plot, 'y')

    plt.fill_between(bins_t_plot,
                     prob_t_plot,
                     where=None,
                     color='g',
                     alpha=0.2,
                     lw=2)
    plt.fill_between(bins_f_plot,
                     prob_f_plot,
                     where=None,
                     color='y',
                     alpha=0.2,
                     lw=2)
    plt.axis(
        [bins_f_plot[1], bins_f_plot[-2] + 100000, 0.,
         max(prob_t_plot) * 1.5])

    ll.savefig()

    mean_diff_true = np.diff(true_dist_of_total_prob_smooth) / 2.
    true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true

    mean_diff_false = np.diff(false_dist_of_total_prob_smooth) / 2.
    false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false

    miss_mass_true_ = [
        true_dist_of_total_prob[-1] *
        abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1])
    ]
    miss_mass_false_ = [
        false_dist_of_total_prob[-1] *
        abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1])
    ]

    prob_true_norm = true_smoothed_mean_probabilities / np.sum(
        true_smoothed_mean_probabilities *
        np.diff(true_dist_of_total_bins_smooth)) * (1. -
                                                    np.sum(miss_mass_true_))
    prob_false_norm = false_smoothed_mean_probabilities / np.sum(
        false_smoothed_mean_probabilities *
        np.diff(false_dist_of_total_bins_smooth)) * (1. -
                                                     np.sum(miss_mass_false_))

    prob_true = np.r_[prob_true_norm, true_dist_of_total_prob[-1]]
    bins_true = np.r_[true_dist_of_total_bins_smooth,
                      true_dist_of_total_bins[-1]]

    prob_false = np.r_[prob_false_norm, false_dist_of_total_prob[-1]]
    bins_false = np.r_[false_dist_of_total_bins_smooth,
                       false_dist_of_total_bins[-1]]

    plt.clf()

    return prob_true, bins_true, prob_false, bins_false, ll

Example #16

0

Show file

File: interacting_filter_hist_test.py Project: resurgo-genetics/EP_Bayes

		interacting_enhancers_, non_interacting_enhancers_ = inter_enhancer(chrom)

		chrom_enh_survived_non_interacting = np.where(((enh_chroms == chrom)*np.invert(proximal_enhancers_mask)*filtered_enhancers)[non_interacting_enhancers_])[0]#

		chrom_enh_survived_interacting = np.where(((enh_chroms == chrom)*np.invert(proximal_enhancers_mask)*filtered_enhancers)[interacting_enhancers_])[0]

		total_interacting += enh_time_series[chrom_enh_survived_interacting].sum(1).tolist()
		total_non_interacting += enh_time_series[chrom_enh_survived_non_interacting].sum(1).tolist()


	for x, label, colour in [[np.log10(total_interacting), 'known interacting', "g"], [np.log10(total_non_interacting), 'unknown status', "r"]]:

		xgrid = np.linspace(min(x)*0.8, max(x)*1.2, num=200)

		freq__, bins__ = kern.kern_scipy_gaus(x, colour, xgrid, bandwidth = "scott", plot_atr = False, label=None)

		#print len(freq__), len(bins__)
		plt.figure(n+1, figsize=(8, 6), dpi=200)			
		plot_double_sided_dist_smooth_histograms(bins__[:-1] + np.diff(bins__)/2, freq__[:-1], colour, label)


	if data_set_name == "ER": plt.title(u'ER-\u03B1', fontsize=28)
	else: plt.title(data_set_name, fontsize=28)
	#plt.ylabel('density', fontsize=28)
	#plt.xlabel('correlation', fontsize=28)



	#plt.figure(n, figsize=(9,6))
	plt.xlabel("enrichment", fontsize = size_of_y_label)