def smooth_correl(correl_of_true_inter_total, correl_of_false_inter_total): import kern_density_est import numpy as np correl_of_true_inter_total_prob_smooth, correl_of_true_inter_total_bins_smooth = kern_density_est.kern_scipy_gaus(correl_of_true_inter_total, 'g', np.linspace(-1., 1., 2000)) correl_of_false_inter_total_prob_smooth, correl_of_false_inter_total_bins_smooth = kern_density_est.kern_scipy_gaus(np.random.choice(correl_of_false_inter_total, int(len(correl_of_false_inter_total)/50.), replace = False), 'y', np.linspace(-1., 1., 2000)) mean_diff_true = np.diff(correl_of_true_inter_total_prob_smooth)/2. correl_of_true_inter_total_prob_smooth = correl_of_true_inter_total_prob_smooth[:-1] + mean_diff_true mean_diff_false = np.diff(correl_of_false_inter_total_prob_smooth)/2. correl_of_false_inter_total_prob_smooth = correl_of_false_inter_total_prob_smooth[:-1] + mean_diff_false extend_mass_true_first = correl_of_true_inter_total_prob_smooth[0]*np.diff(correl_of_true_inter_total_bins_smooth)[0] extend_mass_true_last = correl_of_true_inter_total_prob_smooth[-1]*np.diff(correl_of_true_inter_total_bins_smooth)[-1] correl_of_true_inter_total_bins_smooth[0] = -1.00001 correl_of_true_inter_total_bins_smooth[-1] = 1.00001 correl_of_true_inter_total_prob_smooth[0] = extend_mass_true_first/np.diff(correl_of_true_inter_total_bins_smooth)[0] correl_of_true_inter_total_prob_smooth[-1] = extend_mass_true_last/np.diff(correl_of_true_inter_total_bins_smooth)[-1] extend_mass_false_first = correl_of_false_inter_total_prob_smooth[0]*np.diff(correl_of_false_inter_total_bins_smooth)[0] extend_mass_false_last = correl_of_false_inter_total_prob_smooth[-1]*np.diff(correl_of_false_inter_total_bins_smooth)[-1] correl_of_false_inter_total_bins_smooth[0] = -1.00001 correl_of_false_inter_total_bins_smooth[-1] = 1.00001 correl_of_false_inter_total_prob_smooth[0] = extend_mass_false_first/np.diff(correl_of_false_inter_total_bins_smooth)[0] correl_of_false_inter_total_prob_smooth[-1] = extend_mass_false_last/np.diff(correl_of_false_inter_total_bins_smooth)[-1] return correl_of_true_inter_total_prob_smooth, correl_of_true_inter_total_bins_smooth, correl_of_false_inter_total_prob_smooth, correl_of_false_inter_total_bins_smooth
def calculate_kern(attribute_of_interaction_, sample_, l_limit, up_limit, number_of_bins, colour = ("r", "b"), weights_ = None, bandwidth_pos = None, bandwidth_neg = None): prob_ = {} bins_ = {} import kern_density_est kern_density_est.plot_atr = plot_atr_kernel xgrid = [[],[]] xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0]) xgrid[1] = np.linspace(l_limit, up_limit, number_of_bins[1]) if domain: if attribute_of_interaction_ == "distance": prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], bandwidth = bandwidth_pos) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], bandwidth = bandwidth_neg) else: prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = bandwidth_neg) else: prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth=bandwidth_pos) prob_["negative_interactions"], bins_["negative_interactions"] = [], [] if not(Sample_MoG_classificator): prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth=bandwidth_neg) if use_smooth_prior_for_estimation: return prob_, bins_ else: return [[], []], [[], []]
def calculate_kern(sample_, bins, band="scott"): import kern_density_est prob_, bins_ = kern_density_est.kern_scipy_gaus(sample_, "g", bins, bandwidth=band, plot_atr=False) return prob_, bins_
def calculate_kern(sample_, l_limit, up_limit, number_of_bins): import kern_density_est xgrid = [[],[]] xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0]) prob_, bins_ = kern_density_est.kern_scipy_gaus(np.r_[sample_["positive_interactions"], sample_["negative_interactions"]], colour[0], xgrid[0], bandwidth=optimum) return prob_, bins_
def calculate_kern(sample_, l_limit, up_limit, number_of_bins): import kern_density_est xgrid = [[], []] xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0]) prob_, bins_ = kern_density_est.kern_scipy_gaus( np.r_[sample_["positive_interactions"], sample_["negative_interactions"]], colour[0], xgrid[0], bandwidth=optimum) return prob_, bins_
def smooth_correl(correl_of_true_inter_total, correl_of_false_inter_total): import kern_density_est import numpy as np correl_of_true_inter_total_prob_smooth, correl_of_true_inter_total_bins_smooth = kern_density_est.kern_scipy_gaus( correl_of_true_inter_total, 'g', np.linspace(-1., 1., 2000)) correl_of_false_inter_total_prob_smooth, correl_of_false_inter_total_bins_smooth = kern_density_est.kern_scipy_gaus( np.random.choice(correl_of_false_inter_total, int(len(correl_of_false_inter_total) / 50.), replace=False), 'y', np.linspace(-1., 1., 2000)) mean_diff_true = np.diff(correl_of_true_inter_total_prob_smooth) / 2. correl_of_true_inter_total_prob_smooth = correl_of_true_inter_total_prob_smooth[: -1] + mean_diff_true mean_diff_false = np.diff(correl_of_false_inter_total_prob_smooth) / 2. correl_of_false_inter_total_prob_smooth = correl_of_false_inter_total_prob_smooth[: -1] + mean_diff_false extend_mass_true_first = correl_of_true_inter_total_prob_smooth[ 0] * np.diff(correl_of_true_inter_total_bins_smooth)[0] extend_mass_true_last = correl_of_true_inter_total_prob_smooth[ -1] * np.diff(correl_of_true_inter_total_bins_smooth)[-1] correl_of_true_inter_total_bins_smooth[0] = -1.00001 correl_of_true_inter_total_bins_smooth[-1] = 1.00001 correl_of_true_inter_total_prob_smooth[ 0] = extend_mass_true_first / np.diff( correl_of_true_inter_total_bins_smooth)[0] correl_of_true_inter_total_prob_smooth[ -1] = extend_mass_true_last / np.diff( correl_of_true_inter_total_bins_smooth)[-1] extend_mass_false_first = correl_of_false_inter_total_prob_smooth[ 0] * np.diff(correl_of_false_inter_total_bins_smooth)[0] extend_mass_false_last = correl_of_false_inter_total_prob_smooth[ -1] * np.diff(correl_of_false_inter_total_bins_smooth)[-1] correl_of_false_inter_total_bins_smooth[0] = -1.00001 correl_of_false_inter_total_bins_smooth[-1] = 1.00001 correl_of_false_inter_total_prob_smooth[ 0] = extend_mass_false_first / np.diff( correl_of_false_inter_total_bins_smooth)[0] correl_of_false_inter_total_prob_smooth[ -1] = extend_mass_false_last / np.diff( correl_of_false_inter_total_bins_smooth)[-1] return correl_of_true_inter_total_prob_smooth, correl_of_true_inter_total_bins_smooth, correl_of_false_inter_total_prob_smooth, correl_of_false_inter_total_bins_smooth
def calculate_kern(attribute_of_interaction_, sample_, l_limit, up_limit, number_of_bins, colour=("r", "b"), weights_=None, bandwidth_pos=None, bandwidth_neg=None): prob_ = {} bins_ = {} import kern_density_est kern_density_est.plot_atr = plot_atr_kernel xgrid = [[], []] xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0]) xgrid[1] = np.linspace(l_limit, up_limit, number_of_bins[1]) if domain: if attribute_of_interaction_ == "distance": prob_["positive_interactions"], bins_[ "positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted( sample_["positive_interactions"], colour[0], xgrid[0], weights=weights_["positive_interactions"], bandwidth=bandwidth_pos) prob_["negative_interactions"], bins_[ "negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted( sample_["negative_interactions"], colour[1], xgrid[1], weights=weights_["negative_interactions"], bandwidth=bandwidth_neg) else: prob_["positive_interactions"], bins_[ "positive_interactions"] = kern_density_est.kern_scipy_gaus( sample_["positive_interactions"], colour[0], xgrid[0], bandwidth=bandwidth_pos) prob_["negative_interactions"], bins_[ "negative_interactions"] = kern_density_est.kern_scipy_gaus( sample_["negative_interactions"], colour[1], xgrid[1], bandwidth=bandwidth_neg) else: prob_["positive_interactions"], bins_[ "positive_interactions"] = kern_density_est.kern_scipy_gaus( sample_["positive_interactions"], colour[0], xgrid[0], bandwidth=bandwidth_pos) prob_["negative_interactions"], bins_[ "negative_interactions"] = [], [] if not (Sample_MoG_classificator): prob_["negative_interactions"], bins_[ "negative_interactions"] = kern_density_est.kern_scipy_gaus( sample_["negative_interactions"], colour[1], xgrid[1], bandwidth=bandwidth_neg) if use_smooth_prior_for_estimation: return prob_, bins_ else: return [[], []], [[], []]
def smooth_priors(true_dist_of_total_prob, true_dist_of_total_bins, false_dist_of_total_prob, false_dist_of_total_bins, kde_mode, bandwidth="silverman", kernel="exponential"): import matplotlib.pyplot as plt import numpy as np miss_mass_true = np.diff(true_dist_of_total_bins)[[ 0, -1 ]] * true_dist_of_total_prob[[0, -1]] miss_mass_false = np.diff(false_dist_of_total_bins)[[ 0, -1 ]] * false_dist_of_total_prob[[0, -1]] import hist_sampler x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[1:-1], true_dist_of_total_bins[1:-1], 400000) y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[1:-1], false_dist_of_total_bins[1:-1], 400000) import kern_density_est if kde_mode == "scipy_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus( x_, 'g', np.linspace(true_dist_of_total_bins[1] - 50000, true_dist_of_total_bins[-2] + 50000, 1000), bandwidth ) # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs) elif kde_mode == "sklearn_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon( x_, 'g', np.linspace(true_dist_of_total_bins[1] - 50000, true_dist_of_total_bins[-2] + 50000, 1000), bandwidth, kernel_=kernel) from scipy.stats import kde import bisect insert_index_1 = bisect.bisect_left( true_dist_of_total_prob_smooth - true_dist_of_total_prob[0], 0.) insert_index_2 = bisect.bisect_left( true_dist_of_total_prob_smooth[::-1] - true_dist_of_total_prob[-1], 0.) true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[ insert_index_1 + 1:-1 * insert_index_2 - 1], true_dist_of_total_bins_smooth[insert_index_1 + 1:-1 * insert_index_2 - 1] false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus( y_, 'y', np.linspace(false_dist_of_total_bins[1] - 200000, false_dist_of_total_bins[-2] + 200000, 1000), bandwidth) plt.clf() insert_index_3 = bisect.bisect_left( false_dist_of_total_prob_smooth - false_dist_of_total_prob[0], 0.) insert_index_4 = bisect.bisect_left( false_dist_of_total_prob_smooth[::-1] - false_dist_of_total_prob[-1], 0.) false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = false_dist_of_total_prob_smooth[ insert_index_3 + 1:-1 * insert_index_4 - 1], false_dist_of_total_bins_smooth[insert_index_3 + 1:-1 * insert_index_4 - 1] prob_true_ = true_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_true)) prob_false_ = false_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_false)) bins_t_plot = np.r_[true_dist_of_total_bins[0], true_dist_of_total_bins_smooth[0], true_dist_of_total_bins_smooth, true_dist_of_total_bins_smooth[-1], true_dist_of_total_bins[-1]] prob_t_plot = np.r_[true_dist_of_total_prob[0], true_dist_of_total_prob[0], prob_true_, true_dist_of_total_prob[-1], true_dist_of_total_prob[-1]] bins_f_plot = np.r_[false_dist_of_total_bins[0], false_dist_of_total_bins_smooth[0], false_dist_of_total_bins_smooth, false_dist_of_total_bins_smooth[-1], false_dist_of_total_bins[-1]] prob_f_plot = np.r_[false_dist_of_total_prob[0], false_dist_of_total_prob[0], prob_false_, false_dist_of_total_prob[-1], false_dist_of_total_prob[-1]] plt.plot(bins_t_plot, prob_t_plot, 'g') plt.plot(bins_f_plot, prob_f_plot, 'y') plt.fill_between(bins_t_plot, prob_t_plot, where=None, color='g', alpha=0.2) plt.fill_between(bins_f_plot, prob_f_plot, where=None, color='y', alpha=0.2) plt.axis([ bins_f_plot[1] - 100000, bins_f_plot[-2] + 100000, 0., max(prob_t_plot) ]) mean_diff_true = np.diff(true_dist_of_total_prob_smooth) / 2. true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true mean_diff_false = np.diff(false_dist_of_total_prob_smooth) / 2. false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false miss_mass_true_ = np.r_[true_dist_of_total_prob[0] * abs(true_dist_of_total_bins[0] - true_dist_of_total_bins_smooth[0])], [ true_dist_of_total_prob[-1] * abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1]) ] miss_mass_false_ = np.r_[false_dist_of_total_prob[0] * abs(false_dist_of_total_bins[0] - false_dist_of_total_bins_smooth[0])], [ false_dist_of_total_prob[-1] * abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1]) ] prob_true_norm = true_smoothed_mean_probabilities / np.sum( true_smoothed_mean_probabilities * np.diff(true_dist_of_total_bins_smooth)) * (1. - np.sum(miss_mass_true_)) prob_false_norm = false_smoothed_mean_probabilities / np.sum( false_smoothed_mean_probabilities * np.diff(false_dist_of_total_bins_smooth)) * (1. - np.sum(miss_mass_false_)) prob_true = np.r_[true_dist_of_total_prob[0], prob_true_norm, true_dist_of_total_prob[-1]] bins_true = np.r_[true_dist_of_total_bins[0], true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]] prob_false = np.r_[false_dist_of_total_prob[0], prob_false_norm, false_dist_of_total_prob[-1]] bins_false = np.r_[false_dist_of_total_bins[0], false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]] #plt.bar(bins_true[1:-2], prob_true[1:-1], np.diff(bins_true[1:-1]), alpha=0.15, color="blue") return prob_true, bins_true, prob_false, bins_false
def smooth_priors( true_dist_of_total_prob, true_dist_of_total_bins, false_dist_of_total_prob, false_dist_of_total_bins, kde_mode, bandwidth="silverman", kernel="exponential", ): import matplotlib.pyplot as plt import numpy as np miss_mass_true = np.diff(true_dist_of_total_bins)[[-2, -1]] * true_dist_of_total_prob[[-2, -1]] miss_mass_false = np.diff(false_dist_of_total_bins)[[-1]] * false_dist_of_total_prob[[-1]] import hist_sampler x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[:-2], true_dist_of_total_bins[:-2], 800000) y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[:-2], false_dist_of_total_bins[:-2], 100000) import kern_density_est if kde_mode == "scipy_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus( x_, "g", np.linspace(true_dist_of_total_bins[0], true_dist_of_total_bins[-3] + 50000, 3000), bandwidth ) # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs) elif kde_mode == "sklearn_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon( x_, "g", np.linspace(true_dist_of_total_bins[0], true_dist_of_total_bins[-3] + 50000, 3000), bandwidth, kernel_=kernel, ) from scipy.stats import kde import bisect insert_index_1 = bisect.bisect_left(true_dist_of_total_prob_smooth - true_dist_of_total_prob[0], 0.0) true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = ( true_dist_of_total_prob_smooth[0:insert_index_1], true_dist_of_total_bins_smooth[0:insert_index_1], ) false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus( y_, "y", np.linspace(false_dist_of_total_bins[0], false_dist_of_total_bins[-2], 2000), bandwidth ) plt.clf() prob_true_ = true_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_true)) prob_false_ = false_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_false)) mean_diff_true = np.diff(true_dist_of_total_prob_smooth) / 2.0 true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true mean_diff_false = np.diff(false_dist_of_total_prob_smooth) / 2.0 false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false miss_mass_true_ = [ true_dist_of_total_prob[-1] * abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1]) ] miss_mass_false_ = [ false_dist_of_total_prob[-1] * abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1]) ] prob_true_norm = ( true_smoothed_mean_probabilities / np.sum(true_smoothed_mean_probabilities * np.diff(true_dist_of_total_bins_smooth)) * (1.0 - np.sum(miss_mass_true_)) ) prob_false_norm = ( false_smoothed_mean_probabilities / np.sum(false_smoothed_mean_probabilities * np.diff(false_dist_of_total_bins_smooth)) * (1.0 - np.sum(miss_mass_false_)) ) prob_true = np.r_[prob_true_norm, true_dist_of_total_prob[-1]] bins_true = np.r_[true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]] prob_false = np.r_[prob_false_norm, false_dist_of_total_prob[-1]] bins_false = np.r_[false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]] return prob_true, bins_true, prob_false, bins_false
def smooth_priors(true_dist_of_total_prob, true_dist_of_total_bins, false_dist_of_total_prob, false_dist_of_total_bins, kde_mode, bandwidth = "silverman", kernel = "exponential"): import matplotlib.pyplot as plt import numpy as np miss_mass_true = np.diff(true_dist_of_total_bins)[[0,-1]]*true_dist_of_total_prob[[0,-1]] miss_mass_false = np.diff(false_dist_of_total_bins)[[0,-1]]*false_dist_of_total_prob[[0,-1]] import hist_sampler x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[1:-1], true_dist_of_total_bins[1:-1], 400000) y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[1:-1], false_dist_of_total_bins[1:-1], 400000) import kern_density_est if kde_mode == "scipy_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(x_, 'g', np.linspace(true_dist_of_total_bins[1]-50000, true_dist_of_total_bins[-2]+50000, 1000), bandwidth) # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs) elif kde_mode == "sklearn_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(x_, 'g', np.linspace(true_dist_of_total_bins[1]-50000, true_dist_of_total_bins[-2]+50000, 1000), bandwidth, kernel_ = kernel) from scipy.stats import kde import bisect insert_index_1 = bisect.bisect_left(true_dist_of_total_prob_smooth-true_dist_of_total_prob[0], 0.) insert_index_2 = bisect.bisect_left(true_dist_of_total_prob_smooth[::-1]-true_dist_of_total_prob[-1], 0.) true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[insert_index_1+1:-1*insert_index_2-1], true_dist_of_total_bins_smooth[insert_index_1+1:-1*insert_index_2-1] false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(y_, 'y', np.linspace(false_dist_of_total_bins[1]-200000, false_dist_of_total_bins[-2]+200000, 1000), bandwidth) plt.clf() insert_index_3 = bisect.bisect_left(false_dist_of_total_prob_smooth-false_dist_of_total_prob[0], 0.) insert_index_4 = bisect.bisect_left(false_dist_of_total_prob_smooth[::-1]-false_dist_of_total_prob[-1], 0.) false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = false_dist_of_total_prob_smooth[insert_index_3+1:-1*insert_index_4-1], false_dist_of_total_bins_smooth[insert_index_3+1:-1*insert_index_4-1] prob_true_ = true_dist_of_total_prob_smooth*(1-np.sum(miss_mass_true)) prob_false_ = false_dist_of_total_prob_smooth*(1-np.sum(miss_mass_false)) bins_t_plot = np.r_[true_dist_of_total_bins[0], true_dist_of_total_bins_smooth[0], true_dist_of_total_bins_smooth, true_dist_of_total_bins_smooth[-1], true_dist_of_total_bins[-1]] prob_t_plot = np.r_[true_dist_of_total_prob[0], true_dist_of_total_prob[0], prob_true_, true_dist_of_total_prob[-1], true_dist_of_total_prob[-1]] bins_f_plot = np.r_[false_dist_of_total_bins[0], false_dist_of_total_bins_smooth[0], false_dist_of_total_bins_smooth, false_dist_of_total_bins_smooth[-1], false_dist_of_total_bins[-1]] prob_f_plot = np.r_[false_dist_of_total_prob[0], false_dist_of_total_prob[0], prob_false_, false_dist_of_total_prob[-1], false_dist_of_total_prob[-1]] plt.plot(bins_t_plot, prob_t_plot, 'g') plt.plot(bins_f_plot, prob_f_plot, 'y') plt.fill_between(bins_t_plot, prob_t_plot, where=None, color='g', alpha = 0.2) plt.fill_between(bins_f_plot, prob_f_plot, where=None, color='y', alpha = 0.2) plt.axis([bins_f_plot[1]-100000, bins_f_plot[-2]+100000, 0., max(prob_t_plot)]) mean_diff_true = np.diff(true_dist_of_total_prob_smooth)/2. true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true mean_diff_false = np.diff(false_dist_of_total_prob_smooth)/2. false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false miss_mass_true_ = np.r_[true_dist_of_total_prob[0]*abs(true_dist_of_total_bins[0] - true_dist_of_total_bins_smooth[0])], [true_dist_of_total_prob[-1]*abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1])] miss_mass_false_ = np.r_[false_dist_of_total_prob[0]*abs(false_dist_of_total_bins[0] - false_dist_of_total_bins_smooth[0])], [false_dist_of_total_prob[-1]*abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1])] prob_true_norm = true_smoothed_mean_probabilities/np.sum(true_smoothed_mean_probabilities*np.diff(true_dist_of_total_bins_smooth))*(1.-np.sum(miss_mass_true_)) prob_false_norm = false_smoothed_mean_probabilities/np.sum(false_smoothed_mean_probabilities*np.diff(false_dist_of_total_bins_smooth))*(1.-np.sum(miss_mass_false_)) prob_true = np.r_[true_dist_of_total_prob[0], prob_true_norm, true_dist_of_total_prob[-1]] bins_true = np.r_[true_dist_of_total_bins[0], true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]] prob_false = np.r_[false_dist_of_total_prob[0], prob_false_norm, false_dist_of_total_prob[-1]] bins_false = np.r_[false_dist_of_total_bins[0], false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]] #plt.bar(bins_true[1:-2], prob_true[1:-1], np.diff(bins_true[1:-1]), alpha=0.15, color="blue") return prob_true, bins_true, prob_false, bins_false
def calculate_kern(sample_, bins, band = "scott"): import kern_density_est prob_, bins_ = kern_density_est.kern_scipy_gaus(sample_, "g", bins, bandwidth = band, plot_atr = False) return prob_, bins_
def smooth_priors(true_dist_of_total_prob, true_dist_of_total_bins, false_dist_of_total_prob, false_dist_of_total_bins, kde_mode, bandwidth="silverman", kernel="exponential"): import matplotlib.pyplot as plt import numpy as np miss_mass_true = np.diff(true_dist_of_total_bins)[[ -2, -1 ]] * true_dist_of_total_prob[[-2, -1]] miss_mass_false = np.diff(false_dist_of_total_bins)[[ -1 ]] * false_dist_of_total_prob[[-1]] import hist_sampler x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[:-2], true_dist_of_total_bins[:-2], 800000) y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[:-2], false_dist_of_total_bins[:-2], 100000) import kern_density_est if kde_mode == "scipy_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus( x_, 'g', np.linspace(true_dist_of_total_bins[0], true_dist_of_total_bins[-3] + 50000, 3000), bandwidth ) # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs) elif kde_mode == "sklearn_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon( x_, 'g', np.linspace(true_dist_of_total_bins[0], true_dist_of_total_bins[-3] + 50000, 3000), bandwidth, kernel_=kernel) from scipy.stats import kde import bisect insert_index_1 = bisect.bisect_left( true_dist_of_total_prob_smooth - true_dist_of_total_prob[0], 0.) true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[ 0:insert_index_1], true_dist_of_total_bins_smooth[0:insert_index_1] false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus( y_, 'y', np.linspace(false_dist_of_total_bins[0], false_dist_of_total_bins[-2], 2000), bandwidth) plt.clf() prob_true_ = true_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_true)) prob_false_ = false_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_false)) mean_diff_true = np.diff(true_dist_of_total_prob_smooth) / 2. true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true mean_diff_false = np.diff(false_dist_of_total_prob_smooth) / 2. false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false miss_mass_true_ = [ true_dist_of_total_prob[-1] * abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1]) ] miss_mass_false_ = [ false_dist_of_total_prob[-1] * abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1]) ] prob_true_norm = true_smoothed_mean_probabilities / np.sum( true_smoothed_mean_probabilities * np.diff(true_dist_of_total_bins_smooth)) * (1. - np.sum(miss_mass_true_)) prob_false_norm = false_smoothed_mean_probabilities / np.sum( false_smoothed_mean_probabilities * np.diff(false_dist_of_total_bins_smooth)) * (1. - np.sum(miss_mass_false_)) prob_true = np.r_[prob_true_norm, true_dist_of_total_prob[-1]] bins_true = np.r_[true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]] prob_false = np.r_[prob_false_norm, false_dist_of_total_prob[-1]] bins_false = np.r_[false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]] return prob_true, bins_true, prob_false, bins_false
def calculate_or_plot_kern(attribute_of_interaction_, sample_, l_limit, up_limit, number_of_bins, colour = ("r", "b"), weights_ = None, bandwidth_pos = None, bandwidth_neg = None): prob_ = {} bins_ = {} import kern_density_est kern_density_est.plot_atr = plot_atr_kernel xgrid = [[],[]] xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0]) xgrid[1] = np.linspace(l_limit, up_limit, number_of_bins[1]) if domain: if attribute_of_interaction_ == "distance": #prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], bandwidth = "scott", factor = None)#bandwidth_pos) #prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], bandwidth = "scott", factor = None)#bandwidth_neg) prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], bandwidth = bandwidth_pos, plot_atr = True)#bandwidth_pos) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], bandwidth = bandwidth_neg, plot_atr = True)#bandwidth_neg) #bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"])# * sample_["positive_interactions"].std(ddof=1) #bandwidth_neg = kern_density_est.cross_validation(sample_["negative_interactions"])# * sample_["negative_interactions"].std(ddof=1) #prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kernel_weighted_samples(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], fft = False, bw=bandwidth_pos) #prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kernel_weighted_samples(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], fft = False, bw=bandwidth_neg) else: #kernel_ = "gaussian" #bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"], kernel = kernel_) # kernel = #bandwidth_neg = kern_density_est.cross_validation(sample_["negative_interactions"], kernel = kernel_) #prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_sklearn_expon(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos, kernel_ = kernel_) #prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_sklearn_expon(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = bandwidth_neg, kernel_ = kernel_) bandwidth_pos = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", plot_likelihood_function = False) bandwidth_neg = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "negative_interactions", plot_likelihood_function = False) prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = bandwidth_neg) else: #if attribute_of_interaction_ == "distance": bandwidth_pos = optimum["distance"][ite] #else: bandwidth_pos = optimum[data_set_name] if attribute_of_interaction_ == "distance" and positive_or_negative_side == "negative_side": label_1, label_2 = None, None else: label_1, label_2 = "positive interactions", "negative interactions" if likelihood_cross_validation: if attribute_of_interaction_ == "correlation": bandwidth_pos = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", plot_likelihood_function = False) print bandwidth_pos prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos, label = label_1) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = "scott", label = label_2) else: bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"])# * sample_["positive_interactions"].std(ddof=1) print bandwidth_pos prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth=bandwidth_pos, label = label_1) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth="scott", label = label_2) if use_smooth_prior_for_estimation: return prob_, bins_ else: return [[], []], [[], []]
def smooth_priors(ll, true_dist_of_total_prob, true_dist_of_total_bins, false_dist_of_total_prob, false_dist_of_total_bins, kde_mode, bandwidth = "silverman", kernel = "exponential"): import matplotlib.pyplot as plt import numpy as np miss_mass_true = np.diff(true_dist_of_total_bins)[[0,-1]]*true_dist_of_total_prob[[0,-1]] miss_mass_false = np.diff(false_dist_of_total_bins)[[0,-1]]*false_dist_of_total_prob[[0,-1]] import hist_sampler x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[1:-1], true_dist_of_total_bins[1:-1], 800000) y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[1:-1], false_dist_of_total_bins[1:-1], 800000) prob_x_2 ,bins_x_2, patch = plt.hist(abs(x_), 200, normed = True, alpha = 0.15) prob_x_3 ,bins_x_3, patch = plt.hist(abs(y_), bins = 600, normed = True, alpha = 0.15) plt.clf() import kern_density_est if kde_mode == "scipy_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus(abs(x_[abs(x_) < true_dist_of_total_bins[-2]]), 'g', np.linspace(0, true_dist_of_total_bins[-2]+50000, 1000), bandwidth) # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs) elif kde_mode == "sklearn_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(abs(x_), 'r', np.linspace(0, true_dist_of_total_bins[-2]+50000, 3000), bandwidth, kernel_ = kernel) false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon(abs(y_), 'y', np.linspace(0, false_dist_of_total_bins[-2]+50000, 2000), bandwidth = bandwidth) plt.clf() plt.plot(true_dist_of_total_bins_smooth, true_dist_of_total_prob_smooth*(1-sum(miss_mass_true)), lw = 2, color = 'g') plt.clf() plt.plot(false_dist_of_total_bins_smooth, false_dist_of_total_prob_smooth*(1-sum(miss_mass_false)), lw = 2, color = 'y') plt.clf() #plt.show() from scipy.stats import kde import bisect insert_index_2 = bisect.bisect_left(true_dist_of_total_prob_smooth[::-1]-true_dist_of_total_prob[-1], 0.) true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[0:-1*insert_index_2-1], true_dist_of_total_bins_smooth[0:-1*insert_index_2-1] insert_index_4 = bisect.bisect_left(false_dist_of_total_prob_smooth[::-1]-false_dist_of_total_prob[-1], 0.) false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = false_dist_of_total_prob_smooth[0:-1*insert_index_4-1], false_dist_of_total_bins_smooth[0:-1*insert_index_4-1] prob_true_ = true_dist_of_total_prob_smooth*(1-np.sum(miss_mass_true)) prob_false_ = false_dist_of_total_prob_smooth*(1-np.sum(miss_mass_false)) bins_t_plot = np.r_[0, true_dist_of_total_bins_smooth, true_dist_of_total_bins_smooth[-1], true_dist_of_total_bins[-1]] prob_t_plot = np.r_[0, prob_true_, true_dist_of_total_prob[-1], true_dist_of_total_prob[-1]] bins_f_plot = np.r_[0, false_dist_of_total_bins_smooth, false_dist_of_total_bins_smooth[-1], false_dist_of_total_bins[-1]] prob_f_plot = np.r_[0, prob_false_, false_dist_of_total_prob[-1], false_dist_of_total_prob[-1]] plt.clf() plt.bar(bins_x_2[:-1], prob_x_2*(1-sum(miss_mass_true)), np.diff(bins_x_2), alpha=0.2, color = "green") plt.bar(bins_x_3[:-1], prob_x_3*(1-sum(miss_mass_false)), np.diff(bins_x_3), alpha=0.2, color = "yellow") plt.axis([bins_f_plot[1], bins_f_plot[-2]+100000, 0., max(prob_t_plot)*1.5]) ll.savefig() plt.clf() plt.plot(bins_t_plot, prob_t_plot, 'g') plt.plot(bins_f_plot, prob_f_plot, 'y') plt.fill_between(bins_t_plot, prob_t_plot, where=None, color='g', alpha = 0.2, lw = 2) plt.fill_between(bins_f_plot, prob_f_plot, where=None, color='y', alpha = 0.2, lw = 2) plt.axis([bins_f_plot[1], bins_f_plot[-2]+100000, 0., max(prob_t_plot)*1.5]) ll.savefig() mean_diff_true = np.diff(true_dist_of_total_prob_smooth)/2. true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true mean_diff_false = np.diff(false_dist_of_total_prob_smooth)/2. false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false miss_mass_true_ = [true_dist_of_total_prob[-1]*abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1])] miss_mass_false_ = [false_dist_of_total_prob[-1]*abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1])] prob_true_norm = true_smoothed_mean_probabilities/np.sum(true_smoothed_mean_probabilities*np.diff(true_dist_of_total_bins_smooth))*(1.-np.sum(miss_mass_true_)) prob_false_norm = false_smoothed_mean_probabilities/np.sum(false_smoothed_mean_probabilities*np.diff(false_dist_of_total_bins_smooth))*(1.-np.sum(miss_mass_false_)) prob_true = np.r_[prob_true_norm, true_dist_of_total_prob[-1]] bins_true = np.r_[true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]] prob_false = np.r_[prob_false_norm, false_dist_of_total_prob[-1]] bins_false = np.r_[false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]] plt.clf() return prob_true, bins_true, prob_false, bins_false, ll
def smooth_priors(ll, true_dist_of_total_prob, true_dist_of_total_bins, false_dist_of_total_prob, false_dist_of_total_bins, kde_mode, bandwidth="silverman", kernel="exponential"): import matplotlib.pyplot as plt import numpy as np miss_mass_true = np.diff(true_dist_of_total_bins)[[ 0, -1 ]] * true_dist_of_total_prob[[0, -1]] miss_mass_false = np.diff(false_dist_of_total_bins)[[ 0, -1 ]] * false_dist_of_total_prob[[0, -1]] import hist_sampler x_ = hist_sampler.histogram_sampler(true_dist_of_total_prob[1:-1], true_dist_of_total_bins[1:-1], 800000) y_ = hist_sampler.histogram_sampler(false_dist_of_total_prob[1:-1], false_dist_of_total_bins[1:-1], 800000) prob_x_2, bins_x_2, patch = plt.hist(abs(x_), 200, normed=True, alpha=0.15) prob_x_3, bins_x_3, patch = plt.hist(abs(y_), bins=600, normed=True, alpha=0.15) plt.clf() import kern_density_est if kde_mode == "scipy_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_scipy_gaus( abs(x_[abs(x_) < true_dist_of_total_bins[-2]]), 'g', np.linspace(0, true_dist_of_total_bins[-2] + 50000, 1000), bandwidth ) # can change the kernel to an expenential kern_density_est.kern_sklearn_expon(x, color, bandwidth, kernel = "exponential", **kwargs) elif kde_mode == "sklearn_kde": true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon( abs(x_), 'r', np.linspace(0, true_dist_of_total_bins[-2] + 50000, 3000), bandwidth, kernel_=kernel) false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = kern_density_est.kern_sklearn_expon( abs(y_), 'y', np.linspace(0, false_dist_of_total_bins[-2] + 50000, 2000), bandwidth=bandwidth) plt.clf() plt.plot(true_dist_of_total_bins_smooth, true_dist_of_total_prob_smooth * (1 - sum(miss_mass_true)), lw=2, color='g') plt.clf() plt.plot(false_dist_of_total_bins_smooth, false_dist_of_total_prob_smooth * (1 - sum(miss_mass_false)), lw=2, color='y') plt.clf() #plt.show() from scipy.stats import kde import bisect insert_index_2 = bisect.bisect_left( true_dist_of_total_prob_smooth[::-1] - true_dist_of_total_prob[-1], 0.) true_dist_of_total_prob_smooth, true_dist_of_total_bins_smooth = true_dist_of_total_prob_smooth[ 0:-1 * insert_index_2 - 1], true_dist_of_total_bins_smooth[0:-1 * insert_index_2 - 1] insert_index_4 = bisect.bisect_left( false_dist_of_total_prob_smooth[::-1] - false_dist_of_total_prob[-1], 0.) false_dist_of_total_prob_smooth, false_dist_of_total_bins_smooth = false_dist_of_total_prob_smooth[ 0:-1 * insert_index_4 - 1], false_dist_of_total_bins_smooth[0:-1 * insert_index_4 - 1] prob_true_ = true_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_true)) prob_false_ = false_dist_of_total_prob_smooth * (1 - np.sum(miss_mass_false)) bins_t_plot = np.r_[0, true_dist_of_total_bins_smooth, true_dist_of_total_bins_smooth[-1], true_dist_of_total_bins[-1]] prob_t_plot = np.r_[0, prob_true_, true_dist_of_total_prob[-1], true_dist_of_total_prob[-1]] bins_f_plot = np.r_[0, false_dist_of_total_bins_smooth, false_dist_of_total_bins_smooth[-1], false_dist_of_total_bins[-1]] prob_f_plot = np.r_[0, prob_false_, false_dist_of_total_prob[-1], false_dist_of_total_prob[-1]] plt.clf() plt.bar(bins_x_2[:-1], prob_x_2 * (1 - sum(miss_mass_true)), np.diff(bins_x_2), alpha=0.2, color="green") plt.bar(bins_x_3[:-1], prob_x_3 * (1 - sum(miss_mass_false)), np.diff(bins_x_3), alpha=0.2, color="yellow") plt.axis( [bins_f_plot[1], bins_f_plot[-2] + 100000, 0., max(prob_t_plot) * 1.5]) ll.savefig() plt.clf() plt.plot(bins_t_plot, prob_t_plot, 'g') plt.plot(bins_f_plot, prob_f_plot, 'y') plt.fill_between(bins_t_plot, prob_t_plot, where=None, color='g', alpha=0.2, lw=2) plt.fill_between(bins_f_plot, prob_f_plot, where=None, color='y', alpha=0.2, lw=2) plt.axis( [bins_f_plot[1], bins_f_plot[-2] + 100000, 0., max(prob_t_plot) * 1.5]) ll.savefig() mean_diff_true = np.diff(true_dist_of_total_prob_smooth) / 2. true_smoothed_mean_probabilities = true_dist_of_total_prob_smooth[:-1] + mean_diff_true mean_diff_false = np.diff(false_dist_of_total_prob_smooth) / 2. false_smoothed_mean_probabilities = false_dist_of_total_prob_smooth[:-1] + mean_diff_false miss_mass_true_ = [ true_dist_of_total_prob[-1] * abs(true_dist_of_total_bins[-1] - true_dist_of_total_bins_smooth[-1]) ] miss_mass_false_ = [ false_dist_of_total_prob[-1] * abs(false_dist_of_total_bins[-1] - false_dist_of_total_bins_smooth[-1]) ] prob_true_norm = true_smoothed_mean_probabilities / np.sum( true_smoothed_mean_probabilities * np.diff(true_dist_of_total_bins_smooth)) * (1. - np.sum(miss_mass_true_)) prob_false_norm = false_smoothed_mean_probabilities / np.sum( false_smoothed_mean_probabilities * np.diff(false_dist_of_total_bins_smooth)) * (1. - np.sum(miss_mass_false_)) prob_true = np.r_[prob_true_norm, true_dist_of_total_prob[-1]] bins_true = np.r_[true_dist_of_total_bins_smooth, true_dist_of_total_bins[-1]] prob_false = np.r_[prob_false_norm, false_dist_of_total_prob[-1]] bins_false = np.r_[false_dist_of_total_bins_smooth, false_dist_of_total_bins[-1]] plt.clf() return prob_true, bins_true, prob_false, bins_false, ll
interacting_enhancers_, non_interacting_enhancers_ = inter_enhancer(chrom) chrom_enh_survived_non_interacting = np.where(((enh_chroms == chrom)*np.invert(proximal_enhancers_mask)*filtered_enhancers)[non_interacting_enhancers_])[0]# chrom_enh_survived_interacting = np.where(((enh_chroms == chrom)*np.invert(proximal_enhancers_mask)*filtered_enhancers)[interacting_enhancers_])[0] total_interacting += enh_time_series[chrom_enh_survived_interacting].sum(1).tolist() total_non_interacting += enh_time_series[chrom_enh_survived_non_interacting].sum(1).tolist() for x, label, colour in [[np.log10(total_interacting), 'known interacting', "g"], [np.log10(total_non_interacting), 'unknown status', "r"]]: xgrid = np.linspace(min(x)*0.8, max(x)*1.2, num=200) freq__, bins__ = kern.kern_scipy_gaus(x, colour, xgrid, bandwidth = "scott", plot_atr = False, label=None) #print len(freq__), len(bins__) plt.figure(n+1, figsize=(8, 6), dpi=200) plot_double_sided_dist_smooth_histograms(bins__[:-1] + np.diff(bins__)/2, freq__[:-1], colour, label) if data_set_name == "ER": plt.title(u'ER-\u03B1', fontsize=28) else: plt.title(data_set_name, fontsize=28) #plt.ylabel('density', fontsize=28) #plt.xlabel('correlation', fontsize=28) #plt.figure(n, figsize=(9,6)) plt.xlabel("enrichment", fontsize = size_of_y_label)