def calculate_bandwidth_dist(sample_): import kern_density_est optimum = {} if domain: print "positive_interactions" if likelihood_cross_validation: if domain: print "negative_interactions" optimum[ "positive_interactions"] = kern_density_est.chrom_cross_validation_distance( prior_elements, thresholds=np.linspace(0.01, .4, 200), classification_of_interactions="positive_interactions", weights=weights) optimum[ "negative_interactions"] = kern_density_est.chrom_cross_validation_distance( prior_elements, thresholds=np.linspace(0.01, .4, 200), classification_of_interactions="negative_interactions", weights=weights) else: optimum[ "positive_interactions"] = kern_density_est.chrom_cross_validation_distance( prior_elements, thresholds=np.linspace(0.01, .4, 200), classification_of_interactions="positive_interactions") optimum["negative_interactions"] = {} for positive_or_negative_side in [ "positive_side", "negative_side" ]: optimum["negative_interactions"][ positive_or_negative_side] = "scott" else: optimum["positive_interactions"] = {} optimum["negative_interactions"] = {} for positive_or_negative_side in [ "positive_side", "negative_side" ]: optimum["positive_interactions"][ positive_or_negative_side] = kern_density_est.cross_validation( sample_[positive_or_negative_side] ["positive_interactions"]) optimum["negative_interactions"][ positive_or_negative_side] = "scott" return optimum
def calculate_bandwidth_dist(sample_): import kern_density_est optimum = {} if domain: print "positive_interactions" if likelihood_cross_validation: if domain: print "negative_interactions" optimum["positive_interactions"] = kern_density_est.chrom_cross_validation_distance(prior_elements, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", weights = weights) optimum["negative_interactions"] = kern_density_est.chrom_cross_validation_distance(prior_elements, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "negative_interactions", weights = weights) else: optimum["positive_interactions"] = kern_density_est.chrom_cross_validation_distance(prior_elements, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions") optimum["negative_interactions"] = {} for positive_or_negative_side in ["positive_side", "negative_side"]: optimum["negative_interactions"][positive_or_negative_side] = "scott" else: optimum["positive_interactions"] = {} optimum["negative_interactions"] = {} for positive_or_negative_side in ["positive_side", "negative_side"]: optimum["positive_interactions"][positive_or_negative_side] = kern_density_est.cross_validation(sample_[positive_or_negative_side]["positive_interactions"]) optimum["negative_interactions"][positive_or_negative_side] = "scott" return optimum
def prior_bins_prob_and_plotter(prior_elements, low_dist, up_dist, use_smooth_prior_for_estimation, plot_atr, plot_atr_kernel): from matplotlib.backends.backend_pdf import PdfPages import config_variables domain = config_variables.domain mode = config_variables.mode chroms_in_prior = config_variables.chroms_in_prior np = config_variables.np dataset_names_option = config_variables.dataset_names_option low_cor, up_cor = -1., 1. one_sided_or_two_sided = config_variables.one_sided_or_two_sided log_distances = config_variables.log_distances likelihood_cross_validation = config_variables.likelihood_cross_validation distant_enh_only = config_variables.distant_enh_only interacting_enhancers_only = config_variables.interacting_enhancers_only def bins_prep_adaptive(array, l_limit, u_limit, how_many_in_bin): array_sorted=sorted(np.concatenate((array, [l_limit], [u_limit]))) bins=[] bins = [array_sorted[n] for n in range(len(array_sorted)) if (n % how_many_in_bin)==0] if bins[-1] <> array_sorted[-1]: bins.append(array_sorted[-1]) return bins def profile_histogram_adaptive_domains(array, l_limit, u_limit, how_many_in_bin, possible_distances_counts):#, f, colour): """ gives the empirical probabilities of the interactions based on the mean interactions in "IHH015M_ipet.tsv" """ bins = bins_prep_adaptive(array, l_limit, u_limit, how_many_in_bin) distance_allocations = np.digitize(array, bins=bins) distance_allocations = distance_allocations - 1 probabilities = np.zeros(len(bins)-1) norm = np.sum(1/possible_distances_counts.astype(float)) for index, el in enumerate(np.unique(distance_allocations)): probabilities[index] = np.sum(1/possible_distances_counts[el == distance_allocations].astype(float)) differences = np.diff(bins) prob = probabilities/norm/differences #if one_sided_or_two_sided == "single_sided": # prob, bins = two_sided_to_one_sided_domain(prob, bins, number_of_samples = 800000, number_of_new_bins = 600) return prob, bins def profile_histogram_adaptive(array, l_limit, u_limit, how_many_in_bin): """ gives the empirical probabilities of the interactions based on the mean interactions in "IHH015M_ipet.tsv" """ bins = bins_prep_adaptive(array, l_limit, u_limit, how_many_in_bin) n, bins = np.histogram(array, bins=bins, density = True) return n, bins import itertools def create_priors_domains(): if not(domain): if mode == 'promoter_enhancer_interactions': if interacting_enhancers_only: initiatie_number_of_bins = iter([25, 20, 2000, 2000]) else: initiatie_number_of_bins = iter([25, 20, 10000, 10000]) else: if interacting_enhancers_only: initiatie_number_of_bins = iter([80, 70, 20000, 20000]) else: initiatie_number_of_bins = iter([80, 70, 200000, 200000]) else: if mode == 'promoter_enhancer_interactions': initiatie_number_of_bins = iter([25, 20, 40, 50]) else: initiatie_number_of_bins = iter([70, 70, 200, 200]) for classification_of_interactions in ["positive_interactions", "negative_interactions"]: for attribute_of_interaction in ["distance", "correlation"]: number_in_bin = initiatie_number_of_bins.next() prior_elements[mode][classification_of_interactions][attribute_of_interaction]["number_in_bin_of_histogram"] = number_in_bin if attribute_of_interaction == "distance": total_array = [prior_elements[mode][classification_of_interactions][attribute_of_interaction]["attribute_values"][chrom_] for chrom_ in chroms_in_prior] total_array = np.array(list(itertools.chain.from_iterable(total_array))) if not(domain): if one_sided_or_two_sided == "double_sided": for sign, positive_or_negative_side in zip([1, -1], ["positive_side", "negative_side"]): prior_elements[mode][classification_of_interactions][attribute_of_interaction][positive_or_negative_side] = {} [prior_elements[mode][classification_of_interactions][attribute_of_interaction][positive_or_negative_side]["prior_frequencies"], prior_elements[mode][classification_of_interactions][attribute_of_interaction][positive_or_negative_side]["prior_bins"]] = profile_histogram_adaptive(total_array[sign*total_array > 0], l_limit = low_dist[positive_or_negative_side], u_limit = up_dist[positive_or_negative_side], how_many_in_bin = int(number_in_bin/2.)) else: [prior_elements[mode][classification_of_interactions][attribute_of_interaction]["prior_frequencies"], prior_elements[mode][classification_of_interactions][attribute_of_interaction]["prior_bins"]] = profile_histogram_adaptive(total_array, low_dist, up_dist, number_in_bin) else: possible_distances_counts = prior_elements[mode][classification_of_interactions][attribute_of_interaction]["possible_distances_counts"] possible_distances_counts = np.array(possible_distances_counts)[possible_distances_counts <> 0] total_array = np.array(total_array)[possible_distances_counts <> 0] if one_sided_or_two_sided == "double_sided": for sign, positive_or_negative_side in zip([1, -1], ["positive_side", "negative_side"]): prior_elements[mode][classification_of_interactions][attribute_of_interaction][positive_or_negative_side] = {} [prior_elements[mode][classification_of_interactions][attribute_of_interaction][positive_or_negative_side]["prior_frequencies"], prior_elements[mode][classification_of_interactions][attribute_of_interaction][positive_or_negative_side]["prior_bins"]] = profile_histogram_adaptive_domains(total_array[sign*total_array > 0], low_dist[positive_or_negative_side], up_dist[positive_or_negative_side], int(number_in_bin/2.), possible_distances_counts[sign*total_array > 0]) elif one_sided_or_two_sided == "single_sided": [prior_elements[mode][classification_of_interactions][attribute_of_interaction]["prior_frequencies"], prior_elements[mode][classification_of_interactions][attribute_of_interaction]["prior_bins"]] = profile_histogram_adaptive_domains(total_array, low_dist, up_dist, number_in_bin, possible_distances_counts) if attribute_of_interaction == "correlation": for data_set_name in dataset_names_option: total_array = [prior_elements[mode][classification_of_interactions][attribute_of_interaction][data_set_name]["attribute_values"][chrom_] for chrom_ in chroms_in_prior] total_array = list(itertools.chain.from_iterable(total_array)) [prior_elements[mode][classification_of_interactions][attribute_of_interaction][data_set_name]["prior_frequencies"], prior_elements[mode][classification_of_interactions][attribute_of_interaction][data_set_name]["prior_bins"]] = profile_histogram_adaptive(total_array, low_cor, up_cor, number_in_bin) #if one_sided_or_two_sided == "single_sided" and domain: #new_boundries_after_folding = prior_elements[mode]["positive_interactions"]["distance"]["prior_bins"][[0, -1]].tolist() + prior_elements[mode]["negative_interactions"]["distance"]["prior_bins"][[0, -1]].tolist() #config_variables.low_dist, config_variables.up_dist = min(new_boundries_after_folding), max(new_boundries_after_folding) create_priors_domains() def join_two_sides_of_prior_together(classification_of_interactions): prob = {} bins = {} for positive_or_negative_side in ["positive_side", "negative_side"]: prob[positive_or_negative_side], bins[positive_or_negative_side] = [prior_elements[mode][classification_of_interactions]["distance"][positive_or_negative_side]["prior_frequencies"], prior_elements[mode][classification_of_interactions]["distance"][positive_or_negative_side]["prior_bins"]] bins_ = np.r_[bins["negative_side"], bins["positive_side"]] if log_distances: prob_ = np.r_[prob["negative_side"], [0], prob["positive_side"]] else: prob_ = np.r_[prob["negative_side"], [(prob["negative_side"][-1] + prob["positive_side"][0])/2.], prob["positive_side"]] prob_ /= sum(prob_*np.diff(bins_)) return prob_, bins_ def plot_histogram_priors(bins_, prob_, colour = ("g", "y")): plt.bar(bins_["positive_interactions"][:-1], prob_["positive_interactions"], np.diff(bins_["positive_interactions"]), alpha=0.2, color=colour[0]) plt.bar(bins_["negative_interactions"][:-1], prob_["negative_interactions"], np.diff(bins_["negative_interactions"]), alpha=0.2, color=colour[1]) def calculate_or_plot_kern(attribute_of_interaction_, sample_, l_limit, up_limit, number_of_bins, colour = ("r", "b"), weights_ = None, bandwidth_pos = None, bandwidth_neg = None): prob_ = {} bins_ = {} import kern_density_est kern_density_est.plot_atr = plot_atr_kernel xgrid = [[],[]] xgrid[0] = np.linspace(l_limit, up_limit, number_of_bins[0]) xgrid[1] = np.linspace(l_limit, up_limit, number_of_bins[1]) if domain: if attribute_of_interaction_ == "distance": #prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], bandwidth = "scott", factor = None)#bandwidth_pos) #prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], bandwidth = "scott", factor = None)#bandwidth_neg) prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], bandwidth = bandwidth_pos, plot_atr = True)#bandwidth_pos) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus_weighted(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], bandwidth = bandwidth_neg, plot_atr = True)#bandwidth_neg) #bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"])# * sample_["positive_interactions"].std(ddof=1) #bandwidth_neg = kern_density_est.cross_validation(sample_["negative_interactions"])# * sample_["negative_interactions"].std(ddof=1) #prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kernel_weighted_samples(sample_["positive_interactions"], colour[0], xgrid[0], weights = weights_["positive_interactions"], fft = False, bw=bandwidth_pos) #prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kernel_weighted_samples(sample_["negative_interactions"], colour[1], xgrid[1], weights = weights_["negative_interactions"], fft = False, bw=bandwidth_neg) else: #kernel_ = "gaussian" #bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"], kernel = kernel_) # kernel = #bandwidth_neg = kern_density_est.cross_validation(sample_["negative_interactions"], kernel = kernel_) #prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_sklearn_expon(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos, kernel_ = kernel_) #prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_sklearn_expon(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = bandwidth_neg, kernel_ = kernel_) bandwidth_pos = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", plot_likelihood_function = False) bandwidth_neg = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "negative_interactions", plot_likelihood_function = False) prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = bandwidth_neg) else: #if attribute_of_interaction_ == "distance": bandwidth_pos = optimum["distance"][ite] #else: bandwidth_pos = optimum[data_set_name] if attribute_of_interaction_ == "distance" and positive_or_negative_side == "negative_side": label_1, label_2 = None, None else: label_1, label_2 = "positive interactions", "negative interactions" if likelihood_cross_validation: if attribute_of_interaction_ == "correlation": bandwidth_pos = kern_density_est.chrom_cross_validation_correlation(prior_elements, data_set_name, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", plot_likelihood_function = False) print bandwidth_pos prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth = bandwidth_pos, label = label_1) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth = "scott", label = label_2) else: bandwidth_pos = kern_density_est.cross_validation(sample_["positive_interactions"])# * sample_["positive_interactions"].std(ddof=1) print bandwidth_pos prob_["positive_interactions"], bins_["positive_interactions"] = kern_density_est.kern_scipy_gaus(sample_["positive_interactions"], colour[0], xgrid[0], bandwidth=bandwidth_pos, label = label_1) prob_["negative_interactions"], bins_["negative_interactions"] = kern_density_est.kern_scipy_gaus(sample_["negative_interactions"], colour[1], xgrid[1], bandwidth="scott", label = label_2) if use_smooth_prior_for_estimation: return prob_, bins_ else: return [[], []], [[], []] import matplotlib.pyplot as plt plt.rcParams['xtick.labelsize'] = 20. prob={} bins={} optimum = {} number_of_bins = [2000,2000] #number_of_samples = [800000, 800000] if one_sided_or_two_sided == "double_sided": if plot_atr or plot_atr_kernel: plt.figure(1, figsize=(8, 6), dpi=200) plt.title("Distance prior", fontsize=20) plt.ylabel('density', fontsize=20) plt.xlabel("distance [B]", fontsize=20) tick_labels = [8, 4, 0 , 4, 8] string_labels = [r"$10^{%2d}$" % (i) for i in tick_labels] plt.xticks([-8., -5., 0., 5., 8.], string_labels, fontsize=20)#["a", "b", "c", "d", "e"])# plt.xlim([-8.5, 8.5]) prob_smooth={} bins_smooth={} attribute_ = {} weights = {} for sign, positive_or_negative_side in zip([1, -1], ["positive_side", "negative_side"]): prob[positive_or_negative_side] = {} bins[positive_or_negative_side] = {} attribute_[positive_or_negative_side] = {} weights[positive_or_negative_side] = {} prob_smooth[positive_or_negative_side] = {} bins_smooth[positive_or_negative_side] = {} for classification_of_interactions in ["positive_interactions", "negative_interactions"]: prob[positive_or_negative_side][classification_of_interactions] = prior_elements[mode][classification_of_interactions]["distance"][positive_or_negative_side]["prior_frequencies"] bins[positive_or_negative_side][classification_of_interactions] = prior_elements[mode][classification_of_interactions]["distance"][positive_or_negative_side]["prior_bins"] total_array = [prior_elements[mode][classification_of_interactions]["distance"]["attribute_values"][chrom_] for chrom_ in chroms_in_prior] total_array = np.array(list(itertools.chain.from_iterable(total_array))) attribute_[positive_or_negative_side][classification_of_interactions] = total_array[sign*total_array > 0] if domain: possible_distances_counts = prior_elements[mode][classification_of_interactions]["distance"]["possible_distances_counts"] possible_distances_counts = np.array(possible_distances_counts)[possible_distances_counts <> 0] total_array = np.array(total_array)[possible_distances_counts <> 0] weights[positive_or_negative_side][classification_of_interactions] = (1./possible_distances_counts[sign*total_array > 0])/np.sum(1./possible_distances_counts[sign*total_array > 0]) attribute_[positive_or_negative_side][classification_of_interactions] = total_array[sign*total_array > 0] if plot_atr: plot_histogram_priors(bins[positive_or_negative_side], prob[positive_or_negative_side]) if use_smooth_prior_for_estimation or plot_atr_kernel: import kern_density_est optimum["distance"] = {} if domain: print "positive_interactions" if likelihood_cross_validation: if domain: print "negative_interactions" optimum["distance"]["positive_interactions"] = kern_density_est.chrom_cross_validation_distance(prior_elements, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", plot_likelihood_function = False, weights = weights) optimum["distance"]["negative_interactions"] = kern_density_est.chrom_cross_validation_distance(prior_elements, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "negative_interactions", plot_likelihood_function = False, weights = weights) else: optimum["distance"]["positive_interactions"] = kern_density_est.chrom_cross_validation_distance(prior_elements, thresholds = np.linspace(0.01, .4, 200), classification_of_interactions = "positive_interactions", plot_likelihood_function = False) optimum["distance"]["negative_interactions"] = {} for positive_or_negative_side in ["positive_side", "negative_side"]: optimum["distance"]["negative_interactions"][positive_or_negative_side] = None else: optimum["distance"]["positive_interactions"] = {} for positive_or_negative_side in ["positive_side", "negative_side"]: optimum["distance"]["positive_interactions"][positive_or_negative_side] = None optimum["distance"]["negative_interactions"] = {} for positive_or_negative_side in ["positive_side", "negative_side"]: optimum["distance"]["negative_interactions"][positive_or_negative_side] = None for sign, positive_or_negative_side in zip([1, -1], ["positive_side", "negative_side"]): print positive_or_negative_side prob_smooth[positive_or_negative_side], bins_smooth[positive_or_negative_side] = calculate_or_plot_kern("distance", attribute_[positive_or_negative_side], low_dist[positive_or_negative_side], up_dist[positive_or_negative_side], number_of_bins, colour = ("g", "y"), weights_ = weights[positive_or_negative_side], bandwidth_pos = optimum["distance"]["positive_interactions"][positive_or_negative_side], bandwidth_neg = optimum["distance"]["negative_interactions"][positive_or_negative_side]) if use_smooth_prior_for_estimation: for positive_or_negative_side in ["positive_side", "negative_side"]: for classification_of_interactions in ["positive_interactions", "negative_interactions"]: prob_smooth_ = prob_smooth[positive_or_negative_side][classification_of_interactions][:-1] + np.diff(prob_smooth[positive_or_negative_side][classification_of_interactions])/2. prob_smooth_ /= sum(prob_smooth_*np.diff(bins_smooth[positive_or_negative_side][classification_of_interactions])) [prior_elements[mode][classification_of_interactions]["distance"][positive_or_negative_side]["prior_frequencies"], prior_elements[mode][classification_of_interactions]["distance"][positive_or_negative_side]["prior_bins"]] = prob_smooth_, bins_smooth[positive_or_negative_side][classification_of_interactions] for classification_of_interactions in ["positive_interactions", "negative_interactions"]: [prior_elements[mode][classification_of_interactions]["distance"]["prior_frequencies"], prior_elements[mode][classification_of_interactions]["distance"]["prior_bins"]] = join_two_sides_of_prior_together(classification_of_interactions) else: for classification_of_interactions in ["positive_interactions", "negative_interactions"]: prob[classification_of_interactions] = prior_elements[mode][classification_of_interactions]["distance"]["prior_frequencies"] bins[classification_of_interactions] = prior_elements[mode][classification_of_interactions]["distance"]["prior_bins"] total_array = [prior_elements[mode][classification_of_interactions]["distance"]["attribute_values"][chrom_] for chrom_ in chroms_in_prior] total_array = np.array(list(itertools.chain.from_iterable(total_array))) if plot_atr or plot_atr_kernel: plt.figure(1, figsize=(8, 8), dpi=200) plt.title("Distance prior", fontsize=20) plt.ylabel('density', fontsize=20) plt.xlabel('distance', fontsize=20) if plot_atr: plot_histogram_priors(bins, prob) if use_smooth_prior_for_estimation or plot_atr_kernel: prob_smooth, bins_smooth = calculate_or_plot_kern(total_array, low_dist, up_dist, number_of_bins, colour = ("g", "y")) if use_smooth_prior_for_estimation: for classification_of_interactions in ["positive_interactions", "negative_interactions"]: prob_smooth_ = prob_smooth[classification_of_interactions][:-1] + np.diff(prob_smooth[classification_of_interactions])/2. prob_smooth_ /= sum(prob_smooth_*np.diff(bins_smooth[classification_of_interactions])) [prior_elements[mode][classification_of_interactions]["distance"]["prior_frequencies"], prior_elements[mode][classification_of_interactions]["distance"]["prior_bins"]] = prob_smooth_, bins_smooth[classification_of_interactions] if plot_atr or plot_atr_kernel: x1,x2,y1,y2 = plt.axis(); plt.axis((x1,x2,0.,y2*1.2)); plt.legend(); pdf = PdfPages('multipage_priors_average{0}.pdf'.format(one_sided_or_two_sided)); pdf.savefig() prob={} bins={} attribute_={} number_of_bins = [2000,2000] #number_of_samples = [800000, 800000] for i, data_set_name in enumerate(dataset_names_option): print data_set_name for classification_of_interactions in ["positive_interactions", "negative_interactions"]: prob[classification_of_interactions] = prior_elements[mode][classification_of_interactions]["correlation"][data_set_name]["prior_frequencies"] bins[classification_of_interactions] = prior_elements[mode][classification_of_interactions]["correlation"][data_set_name]["prior_bins"] total_array = [prior_elements[mode][classification_of_interactions]["correlation"][data_set_name]["attribute_values"][chrom_] for chrom_ in chroms_in_prior] total_array = np.array(list(itertools.chain.from_iterable(total_array))) attribute_[classification_of_interactions] = total_array if plot_atr or plot_atr_kernel: plt.figure(i+2, figsize=(8, 6), dpi=200) if data_set_name == "ER": plt.title(u'ER-\u03B1', fontsize=20) else: plt.title(data_set_name, fontsize=20) plt.ylabel('density', fontsize=20) plt.xlabel('correlation', fontsize=20) #x1,x2,y1,y2 = plt.axis() #plt.axis((x1,x2,0,y2*1.2)) if plot_atr: plot_histogram_priors(bins, prob) if use_smooth_prior_for_estimation or plot_atr_kernel: import kern_density_est prob_smooth, bins_smooth = calculate_or_plot_kern("correlation", attribute_, -1., 1., number_of_bins, colour = ("g", "y")) if use_smooth_prior_for_estimation: for classification_of_interactions in ["positive_interactions", "negative_interactions"]: prob_smooth_ = prob_smooth[classification_of_interactions][:-1] + np.diff(prob_smooth[classification_of_interactions])/2. prob_smooth_ /= sum(prob_smooth_*np.diff(bins_smooth[classification_of_interactions])) [prior_elements[mode][classification_of_interactions]["correlation"][data_set_name]["prior_frequencies"], prior_elements[mode][classification_of_interactions]["correlation"][data_set_name]["prior_bins"]] = prob_smooth_, bins_smooth[classification_of_interactions] if plot_atr or plot_atr_kernel: x1,x2,y1,y2 = plt.axis(); plt.axis((x1,x2,0,y2*1.2)); plt.legend(); pdf.savefig() #plt.ylim(0, plt.ylim()[0]); if plot_atr_kernel or plot_atr: pdf.close(); plt.show() return prior_elements