import graph_plotter_all_PR_PCA_true_only_MAP_per_million_multi_chroms_multi_filters_non_domain_inter_background_pro_dist_alt_mixer_cleaned ENHANCER_GENE_INTERACTOR = graph_plotter_all_PR_PCA_true_only_MAP_per_million_multi_chroms_multi_filters_non_domain_inter_background_pro_dist_alt_mixer_cleaned #graph_plotter_all_PR_PCA_true_only_MAP_per_million_multi_chroms_multi_filters_non_domain_inter_background_pro_dist_alt_mixer_cleaned.executor(redo_raw_CHIA_PET_interactions = False, upstream = 300) PR_CURVES = ["SELECTIVE", "ALL"][0] mode_of_code = ["ODD", "EVEN", "FULL", "GAUSSIAN_SAMPLE", "MK_PAIRWISE"][3] mode_of_code_2 = ["WITHOUT", "ADD_GAUSSIAN_VALIDATION"][0] mode_of_features_and_interactions = [ "FEATURES_AND_INTERACTIONS_TOGETHER", "FEATURES_AND_INTERACTIONS_SEPERATE" ][0] import itertools as itertools import numpy as np import selected_combinations as sel combinations, selected_combinations = sel.selected_combinations(PR_CURVES) option_correl_select = selected_combinations[3] number_of_samples = 240000 burn_in = 120000 kappa_0, mu_0, alpha_0, Beta_0 = 1.0, 0.0, 2.0, 2.0 mode_of_sampler = [ "distance_prior", "distance_MOG", "dirichlet_MOG", "distance_MOG_empir_mu" ][3] num = 3 continue_sampling = False interacting_enhancers_only_MOG = False
import graph_plotter_all_PR_PCA_true_only_MAP_per_million_multi_chroms_multi_filters_non_domain_inter_background_pro_dist_alt_mixer_cleaned ENHANCER_GENE_INTERACTOR = graph_plotter_all_PR_PCA_true_only_MAP_per_million_multi_chroms_multi_filters_non_domain_inter_background_pro_dist_alt_mixer_cleaned # graph_plotter_all_PR_PCA_true_only_MAP_per_million_multi_chroms_multi_filters_non_domain_inter_background_pro_dist_alt_mixer_cleaned.executor(redo_raw_CHIA_PET_interactions = False, upstream = 300) PR_CURVES = ["SELECTIVE", "ALL"][0] mode_of_code = ["ODD", "EVEN", "FULL", "GAUSSIAN_SAMPLE"][3] mode_of_code_2 = ["WITHOUT", "ADD_GAUSSIAN_VALIDATION"][0] mode_of_features_and_interactions = ["FEATURES_AND_INTERACTIONS_TOGETHER", "FEATURES_AND_INTERACTIONS_SEPERATE"][0] import itertools as itertools import numpy as np import selected_combinations as sel combinations, selected_combinations = sel.selected_combinations(PR_CURVES) option_correl_select = selected_combinations[3] number_of_samples = 20000 kappa_0, mu_0, alpha_0, Beta_0 = 1.0, 0.0, 2.0, 2.0 print number_of_samples, kappa_0, mu_0, alpha_0, Beta_0 mode_of_sampler = ["distance_prior", "distance_MOG", "dirichlet_MOG", "distance_MOG_empir_mu"][3] ENHANCER_GENE_INTERACTOR.executor( PR_CURVES, mode_of_code, mode_of_features_and_interactions, redo_raw_CHIA_PET_interactions=False, upstream=300,
def executor(PR_CURVES = "SELECTIVE", mode_of_code = "EVEN", mode_of_features_and_interactions = "FEATURES_AND_INTERACTIONS_TOGETHER", GENE_OR_PROMOTER_MODE = "GENE_MODE", redo_raw_CHIA_PET_interactions = True, mode_atr = ["FIRST_TS", "SECOND_TS"][1], plot_TF_enrichments_in_cluster = False, upstream = 300, downstream = 0, upstream_t_s = 300, downstream_t_s = 0, do_clustering = False, re_do_clustering = False, cluster_figure_selection = None, DB_version = False, calculate_number_of_within_domain_interactions = True, option_correl_select = [1], number_of_samples = 10000, kappa_0 = 1.0, mu_0 = 0.0, alpha_0 = 2.0, Beta_0 = 2.0, mode_of_sampler = "distance_MOG_empir_mu", burn_in = 0, csf_mode = False, mode_of_code_2 = "WITHOUT", chain_number = 1, continue_sampling = False, interacting_enhancers_only_MOG = False, number_of_samples_arr = [], burn_in_start = []): import numpy as np import re from sys import argv import matplotlib.pyplot as plt import itertools import bisect as bis import random as random import time import kern_density_est import smooth_priors import smooth_priors_non_domain import smooth_correl import smooth_priors_domain import matplotlib as mpl import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages copy_and_paste_mode = False if copy_and_paste_mode: #PR_CURVES = "SELECTIVE" mode_of_code = ["ODD","EVEN", "FULL", "GAUSSIAN_SAMPLE", "MK_PAIRWISE"][1] mode_of_code_2 = ["WITHOUT", "ADD_GAUSSIAN_VALIDATION"][1] mode_of_features_and_interactions = "FEATURES_AND_INTERACTIONS_SEPERATE" GENE_OR_PROMOTER_MODE = "GENE_MODE" redo_raw_CHIA_PET_interactions = False mode_atr = ["FIRST_TS", "SECOND_TS"][1] plot_TF_enrichments_in_cluster = False upstream = 300 downstream = 0 upstream_t_s = 300 downstream_t_s = 0 do_clustering = False re_do_clustering = False cluster_figure_selection = "cluster_ER_enhancer" DB_version = False csf_mode = False calculate_number_of_within_domain_interactions = True kappa_0, mu_0, alpha_0, Beta_0 = 4.0, 0.0, 2.0, 2.0 # here betta is in scale. np. gamma is in scale so you can plot the gammma with the scale to have an estimate on nice beta. #derivations are in 1/betta. number_of_samples = 30#23000#100001#30 burn_in = 1000 mode_of_sampler = ["distance_prior", "distance_MOG", "dirichlet_MOG", "distance_MOG_empir_mu"][3] chain_number = 1 continue_sampling = False interacting_enhancers_only_MOG = True #number_of_samples_correl, burn_in_correl = [62000, 62000, 240000, 240000, 70000], [31000, 31000, 120000, 120000, 10000] #[70000]*5, [10000]*5 number_of_samples_correl, burn_in_correl = [80000, 80000, 80000, 80000, 70000], [40000, 40000, 40000, 40000, 10000] #[70000]*5, [10000]*5 number_of_samples_dist, burn_in_dist = 70000, 10000 chain_number_correl = [1,1,1, 1, False] chain_number_dist = False if csf_mode: mpl.use('Agg') #np.seterr(all=None, divide='raise', over=None, under=None, invalid=None) if mode_of_features_and_interactions == "FEATURES_AND_INTERACTIONS_TOGETHER": disentagled_features_validation = False # it just mean that it's either gene or TSS mode upstream_t_s = upstream downstream_t_s = downstream elif mode_of_features_and_interactions == "FEATURES_AND_INTERACTIONS_SEPERATE": if GENE_OR_PROMOTER_MODE == "GENE_MODE": disentagled_features_validation = False if GENE_OR_PROMOTER_MODE == "TSS_MODE": disentagled_features_validation = True filter_value, filter_enh, count_f_p, count_f_e, ER_pro_filtered_, path = '-1.', '-1.', 30, 30, 'True', 1 # change to 2 if you want path 2 interactions alternative_classificator = True alternative_classificator_outside_enhancers = False#True # that option is for domain. Generator domain = False domain_like_chromosome_correction = False interacting_negatives = False log_distances = True plot_atr, plot_atr_kernel = False, True use_smooth_prior_for_estimation = True likelihood_cross_validation = True distant_enh_only = True # matters for enhancer-enhancer interactions and MAPS for all enhancers not only the interacting ones filter_values = np.array([-1., -0.6, -0.2]) filter_value = filter_values[0] number_of_bins = 4000, 4000 # to implement-easy FDR = np.array([0.10, 0.2, 0.25, 0.3, 0.35, 0.4])# add 0.1 import os data_folder = "./data/" temp_output = "./temp_output/" results_folder = "./results/" if not os.path.exists(temp_output): os.makedirs(temp_output) if not os.path.exists(results_folder): os.makedirs(results_folder) print mode_of_code #scripts: chrom_names = np.array(map(lambda x: "chr{0}".format(x), np.r_[np.arange(1, 23).astype(dtype='S2'), ['X'], ['Y']])) if mode_of_code == "FULL": chroms_in_prior = np.arange(0,23,1)#+1#np.arange(0,13,1)#np.arange(0,13,1) chroms_to_infer = np.arange(0,23,1)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1) FDR_mode = False interacting_enhancers_only = False TOP_PR_interaction_plotter_clean_chrom_to_plot = chrom_names[chroms_to_infer[1]] option_for_predictive_FULL_mode = 2 genes_predicted_with_FDR_for_GRO_seq_validation = 0.25 TOP_PR_interaction_plotter_FDR_thresholds_to_plot = FDR[:3] calculate_number_of_within_domain_interactions = True elif mode_of_code == "ODD": chroms_in_prior = np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,13,1) chroms_to_infer = np.arange(0,23,2)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1) FDR_mode = True # that fuction apply for odd-odd and odd-even only. interacting_enhancers_only = True elif mode_of_code == "EVEN": chroms_in_prior = np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,13,1) chroms_to_infer = np.arange(0,22,2)+1#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1) FDR_mode = True interacting_enhancers_only = True Sample_MoG_classificator = False MoG_classificator = False if mode_of_code == "GAUSSIAN_SAMPLE": chroms_in_prior = np.arange(0,23,1)#+1#np.arange(0,13,1)#np.arange(0,13,1) chroms_to_infer = np.arange(0,23,1)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1) interacting_enhancers_only = False # set the upper-lower-bounds-of-distace-prior-otherwise-there-would-be-allocation-problem-of-high/low-distance FDR_mode = False if csf_mode: plot_atr, plot_atr_kernel = False, False Sample_MoG_classificator = True if mode_of_code == "MK_PAIRWISE": chroms_in_prior = np.arange(0,23,2)#+1#np.arange(0,13,1)#np.arange(0,13,1) chroms_to_infer = np.arange(0,23,1)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1) interacting_enhancers_only = False # set the upper-lower-bounds-of-distace-prior-otherwise-there-would-be-allocation-problem-of-high/low-distance FDR_mode = False if csf_mode: plot_atr, plot_atr_kernel = False, False Sample_MoG_classificator = False if mode_of_code == "convergence_checker": chroms_in_prior = np.arange(0,23,2)#+1#np.arange(0,13,1)#np.arange(0,13,1) chroms_to_infer = np.arange(0,23,1)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1) interacting_enhancers_only = False # set the upper-lower-bounds-of-distace-prior-otherwise-there-would-be-allocation-problem-of-high/low-distance FDR_mode = False if csf_mode: plot_atr, plot_atr_kernel = False, False Sample_MoG_classificator = False if mode_of_code_2 == "ADD_GAUSSIAN_VALIDATION": MoG_classificator = True # else: # burn_in = 0 mode = ["promoter_enhancer_interactions", "enhancer_enhancer_interactions"][0] one_sided_or_two_sided = ["single_sided", "double_sided"][1] TSS_or_intra_genic_for_domain_filter = ["Intra_genic", "TSS_only"][0] generator_mode = ["filter_independent_generator", "filter_correl_dependent_generator", "filter_dependent_generator"][1] promoter_overlaps_enhancer_file = temp_output + "intersect_with_full_genes_l_{0}_r_{1}".format(upstream, downstream) name_of_promoter_file_for_overlap = data_folder + "Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed.gz" name_of_enhancer_file_for_overlap = data_folder + "common_region_peaks_extended_less_time_points_corrected_0_indexed"#"common_region_peaks_extended_less_time_points_sorted" name_of_time_series_promoter_file_for_TSS_start = data_folder + "Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered.gz" name_of_overlap_file_pro = temp_output + 'ER_promoters_{0}_{1}'.format(upstream, downstream) name_of_overlap_file_enh = temp_output + 'ER_peaks_overlapping_promoters_{0}_{1}'.format(upstream, downstream) # you can now make every feature to behave differentely. However how to count a signal would depend on where TSS is. import selected_combinations as sel combinations, selected_combinations = sel.selected_combinations("SELECTIVE") #---------------------------------------------------------- #print mode_of_code, MoG_classificator, Sample_MoG_classificator chroms_to_infer = chrom_names[chroms_to_infer] chroms_in_prior = chrom_names[chroms_in_prior] option = [0,1,2,3,4] filt_option = option time_points = 8 datasets_names = np.array(['PolII_2012-03', 'PolII', 'H2AZ', 'ER', 'H3K4me3'])#, '2012-03_RNA', 'RNA']) dataset_names_option = datasets_names[option] dict_option = dict(zip(range(len(datasets_names)), datasets_names)) link_data_set_name_to_file_name = {} name_of_time_series_file = {} name_of_time_series_file["enhancers"] = name_of_enhancer_file_for_overlap + "_unfiltered_count" name_of_enhancer_file_for_overlap = name_of_enhancer_file_for_overlap + ".gz" if upstream_t_s <> 0: name_of_time_series_file["promoters"] = data_folder + "Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_{0}_unfiltered_count".format(upstream_t_s) else: name_of_time_series_file["promoters"] = data_folder + "Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_unfiltered_count" full_list_enhancers = np.array([name_of_time_series_file["enhancers"] + "_{0}.gz".format(name_of_TF) for name_of_TF in datasets_names]) full_list_promoters = np.array([name_of_time_series_file["promoters"] + "_{0}.gz".format(name_of_TF) for name_of_TF in datasets_names]) link_data_set_name_to_file_name["enhancers"] = dict(zip(datasets_names, full_list_enhancers)) link_data_set_name_to_file_name["promoters"] = dict(zip(datasets_names, full_list_promoters)) import config_variables reload(config_variables) config_variables.data_folder = data_folder config_variables.results_folder = results_folder #------------------------------------------------------------------------------------------------------------ import time_series_prepare_filter as initiate_time_series initiate_time_series.datasets_names = datasets_names initiate_time_series.time_points = time_points dataset_time_series_dict = initiate_time_series.time_series_prepare(full_list_promoters[option], full_list_enhancers[option]) #------------------------------------------------------------------------------------------------------------------ classificator_elements = {} for filter_value_ in filter_values: classificator_elements[filter_value_] = {} for mode_ in ["promoter_enhancer_interactions", "enhancer_enhancer_interactions"]: classificator_elements[filter_value_][mode_] = {} for classification_of_interactions in ["positive_interactions", "negative_interactions"]: classificator_elements[filter_value_][mode_][classification_of_interactions] = {} for attribute_of_interaction in ["distance", "correlation"]: classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction] = {} for probability_of_being_positive_or_negative in ["probabilities_of_being_positive_interactions", "probabilities_of_being_negative_interactions"]: classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative] = {} classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["prior_bins"] = np.array([]) classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["prior_frequencies"] = np.array([]) if attribute_of_interaction == "correlation": for data_set_name in dataset_names_option: classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name] = {} classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["prior_bins"] = np.array([]) classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["prior_frequencies"] = np.array([]) classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["posterior_component_values"] = {} for chrom_ in chroms_to_infer: classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["posterior_component_values"][chrom_] = np.array([]) else: for chrom_ in chroms_to_infer: classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["posterior_component_values"] = {} classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["posterior_component_values"][chrom_] = np.array([]) config_variables.temp_output = temp_output config_variables.np = np config_variables.link_data_set_name_to_file_name = link_data_set_name_to_file_name config_variables.chroms_in_prior = chroms_in_prior config_variables.mode = mode config_variables.dataset_names_option = dataset_names_option config_variables.count_f_p = count_f_p config_variables.count_f_e = count_f_e config_variables.filter_enh = filter_enh config_variables.domain = domain config_variables.dataset_time_series_dict = dataset_time_series_dict config_variables.re = re config_variables.path = path config_variables.upstream = upstream config_variables.interacting_negatives = interacting_negatives config_variables.interacting_enhancers_only = interacting_enhancers_only config_variables.chroms_to_infer = chroms_to_infer config_variables.filter_value = filter_value config_variables.filter_values = filter_values config_variables.datasets_names = datasets_names config_variables.full_list_promoters = full_list_promoters config_variables.option = option config_variables.time_points = time_points config_variables.distant_enh_only = distant_enh_only config_variables.full_list_enhancers = full_list_enhancers config_variables.ER_pro_filtered_ = ER_pro_filtered_ config_variables.TSS_or_intra_genic_for_domain_filter = TSS_or_intra_genic_for_domain_filter config_variables.one_sided_or_two_sided = one_sided_or_two_sided config_variables.chrom_names = chrom_names config_variables.promoter_overlaps_enhancer_file = promoter_overlaps_enhancer_file config_variables.name_of_time_series_promoter_file_for_TSS_start = name_of_time_series_promoter_file_for_TSS_start config_variables.upstream = upstream config_variables.downstream = downstream config_variables.upstream_t_s = upstream_t_s config_variables.name_of_promoter_file_for_overlap = name_of_promoter_file_for_overlap config_variables.name_of_enhancer_file_for_overlap = name_of_enhancer_file_for_overlap config_variables.name_of_overlap_file_pro = name_of_overlap_file_pro config_variables.name_of_overlap_file_enh = name_of_overlap_file_enh config_variables.filt_option = filt_option config_variables.log_distances = log_distances config_variables.domain_like_chromosome_correction = domain_like_chromosome_correction config_variables.alternative_classificator = alternative_classificator config_variables.likelihood_cross_validation = likelihood_cross_validation config_variables.alternative_classificator_outside_enhancers = alternative_classificator_outside_enhancers config_variables.dict_option = dict_option config_variables.kappa_0, config_variables.mu_0, config_variables.alpha_0, config_variables.Beta_0 = kappa_0, mu_0, alpha_0, Beta_0 config_variables.MoG_classificator, config_variables.Sample_MoG_classificator = MoG_classificator, Sample_MoG_classificator config_variables.number_of_samples = number_of_samples config_variables.use_smooth_prior_for_estimation = use_smooth_prior_for_estimation config_variables.FDR = FDR config_variables.FDR_mode = FDR_mode config_variables.number_of_bins = number_of_bins config_variables.disentagled_features_validation = disentagled_features_validation config_variables.mode_of_code = mode_of_code config_variables.interacting_enhancers_only_MOG = interacting_enhancers_only_MOG #----------------------------------------------- #prepares variables and calculates model for a filter_value #run twice to get TSS and GENE mode. if redo_raw_CHIA_PET_interactions: import interaction_finder_wrapper import filters_clean #if not(domain) or alternative_classificator: # dict_chrom_pro_survived, dict_chrom_pro_not_survived, filtered_promoters, Pol_2_correl_filtered_promoters = filters_clean.features_filtered(filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, add_overl = False) # dict_chrom_enh_survived, dict_chrom_enh_not_survived, filtered_enhancers, Pol_2_correl_filtered_enhancers = filters_clean.features_filtered(filter_enh, count_f_e, full_list_enhancers, filt_option, name_of_overlap_file_enh, add_overl = False) #else: # if mode == "promoter_enhancer_interactions": # dict_chrom_pro_survived, dict_chrom_pro_not_survived, filtered_promoters, Pol_2_correl_filtered_promoters = filters_clean.features_filtered(filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, add_overl = False, remove_single_domain_elements = True) # filter_, count_f, list_of_datasets, options, name_of_overlap_file, add_overl, remove_single_domain_elements = filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, False, True # dict_chrom_enh_survived, dict_chrom_enh_not_survived, filtered_enhancers, Pol_2_correl_filtered_enhancers = filters_clean.features_filtered(filter_enh, count_f_e, full_list_enhancers, filt_option, name_of_overlap_file_enh, add_overl = False) # else: # dict_chrom_pro_survived, dict_chrom_pro_not_survived, filtered_promoters, Pol_2_correl_filtered_promoters = filters_clean.features_filtered(filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, add_overl = False) # dict_chrom_enh_survived, dict_chrom_enh_not_survived, filtered_enhancers, Pol_2_correl_filtered_enhancers = filters_clean.features_filtered(filter_enh, count_f_e, full_list_enhancers, filt_option, name_of_overlap_file_enh, add_overl = False, remove_single_domain_elements = True) dict_chrom_pro_survived, dict_chrom_pro_not_survived, filtered_promoters, Pol_2_correl_filtered_promoters = filters_clean.features_filtered(filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, add_overl = False) dict_chrom_enh_survived, dict_chrom_enh_not_survived, filtered_enhancers, Pol_2_correl_filtered_enhancers = filters_clean.features_filtered(filter_enh, count_f_e, full_list_enhancers, filt_option, name_of_overlap_file_enh, add_overl = False) config_variables.Pol_2_correl_filtered_promoters = Pol_2_correl_filtered_promoters config_variables.Pol_2_correl_filtered_enhancers = Pol_2_correl_filtered_enhancers config_variables.dict_chrom_distant, config_variables.dict_chrom_proximal, config_variables.proximal_enhancers_mask = filters_clean.distant_enh_only_filter(name_of_overlap_file_enh) if do_clustering: correl_value_filter = False distant_enh_only_log = False if cluster_figure_selection == "cluster_ER_enhancer": cluster_mode_setting = ["promoters", "enhancers"][1]; datasets_to_concat = datasets_names[[3]]; filter_each_dataset = 100; correl_value_filter = False; distant_enh_only_log = True; elif cluster_figure_selection == "cluster_Pol2s_enhancer": cluster_mode_setting = ["promoters", "enhancers"][1]; datasets_to_concat = datasets_names[[0, 1]]; filter_each_dataset = 30; correl_value_filter = 0.2; distant_enh_only_log = True; elif cluster_figure_selection == "cluster_Pol2s_promoter": cluster_mode_setting = ["promoters", "enhancers"][0]; datasets_to_concat = datasets_names[[0,1]]; filter_each_dataset = 30; correl_value_filter = 0.2; elif cluster_figure_selection == "cluster_ER_promoter": cluster_mode_setting = ["promoters", "enhancers"][0]; datasets_to_concat = datasets_names[[3]]; filter_each_dataset = 100; elif cluster_figure_selection == "cluster_Pol2s_ER_enhancer": cluster_mode_setting = ["promoters", "enhancers"][1]; datasets_to_concat = datasets_names[[1, 3]]; filter_each_dataset = 200; elif cluster_figure_selection == "cluster_Pol2s_ER_enhancer_test": cluster_mode_setting = ["promoters", "enhancers"][1]; datasets_to_concat = datasets_names[[1, 3]]; filter_each_dataset = 300; config_variables.dataset_time_series_dict_mean_std = initiate_time_series.time_series_prepare_mean_std(full_list_promoters[option], full_list_enhancers[option]) config_variables.name_of_time_series_file = name_of_time_series_file import AP_clustering config_variables.name_of_overlap_file_dict = dict(zip(["promoters", "enhancers"], [name_of_overlap_file_pro, name_of_overlap_file_enh])) merged_time_series_to_cluster = AP_clustering.concatenator( cluster_mode = cluster_mode_setting, merge_time_series_option = datasets_to_concat, count_filter_each_data_set = filter_each_dataset, pol2_rep_correl_filt = correl_value_filter, distant_enh_only = distant_enh_only_log) if re_do_clustering: config_variables.merged_time_series_to_cluster = merged_time_series_to_cluster AP_clustering.AP_clustering(merged_time_series_to_cluster, number_of_clusters = 40) config_variables.labels = np.loadtxt(merged_time_series_to_cluster + "_labels", dtype = str) import os as os cwd = os.getcwd() path_to_R = cwd + "/R_scripts/" os.chdir(path_to_R) print ("Rscript " + path_to_R + "ER_enhancer.R") if cluster_figure_selection == "cluster_ER_enhancer": os.system("Rscript " + path_to_R + "ER_enhancer.R") elif cluster_figure_selection == "cluster_Pol2s_enhancer": os.system("Rscript " + path_to_R + "PolIIs_enhancer.R") elif cluster_figure_selection == "cluster_Pol2s_promoter": os.system("Rscript " + path_to_R + "PolIIs_promoter.R") elif cluster_figure_selection == "cluster_ER_promoter": os.system("Rscript " + path_to_R + "ER_promoter.R") elif cluster_figure_selection == "cluster_Pol2s_ER_enhancer": os.system("Rscript " + path_to_R + "Pol2_ER.R") os.chdir(cwd) #if not(copy_and_paste_mode): return 0 if plot_TF_enrichments_in_cluster: all_analysis = [["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_PolII_ER_200", 0, 0, ["FIRST_TS", "SECOND_TS"][0], "ENHANCER"], ["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_PolII_ER_200", 0, 0, ["FIRST_TS", "SECOND_TS"][1], "ENHANCER"], ["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_ER_100_distant_only", 0, 0, ["FIRST_TS", "SECOND_TS"][0], "ENHANCER"], ["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2_distant_only", 0, 0, ["FIRST_TS", "SECOND_TS"][0], "ENHANCER"], ["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2_distant_only", 0, 0, ["FIRST_TS", "SECOND_TS"][1], "ENHANCER"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_ER_100", 1000, 1000, ["FIRST_TS", "SECOND_TS"][0], "TSS"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 1000, 1000, ["FIRST_TS", "SECOND_TS"][0], "TSS"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 1000, 1000, ["FIRST_TS", "SECOND_TS"][1], "TSS"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_ER_100", 10000, 10000, ["FIRST_TS", "SECOND_TS"][0], "TSS"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 10000, 10000, ["FIRST_TS", "SECOND_TS"][0], "TSS"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 10000, 10000, ["FIRST_TS", "SECOND_TS"][1], "TSS"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_ER_100", 300, 0, ["FIRST_TS", "SECOND_TS"][0], "GENE"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 300, 0, ["FIRST_TS", "SECOND_TS"][0], "GENE"], ["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 300, 0, ["FIRST_TS", "SECOND_TS"][1], "GENE"]] import overlapper_hg19_clean #mode_of_data_sets, sorted_mode = ["Ciiras", "Others_from_cistrom_finder"][1], ["amplitude_sorted", "size_sorted"][0] for merged_time_series_to_cluster, upstream_TSS, downstream_TSS, mode_atr, mode_atr2 in all_analysis: for mode_of_data_sets in ["Ciiras", "Others_from_cistrom_finder"]: if mode_of_data_sets == "Others_from_cistrom_finder": if mode_atr2 == "ENHANCER": dont_plot = ["ESR1", "ESR2", "RAD21"] else: dont_plot = ["ESR2", "RAD21"] else: dont_plot = [] for sorted_mode in ["amplitude_sorted", "size_sorted"]: overlapper_hg19_clean.executor(merged_time_series_to_cluster, upstream_TSS = upstream_TSS, downstream_TSS = downstream_TSS, diff_bind_version = DB_version, mode_atr = mode_atr, mode_atr2 = mode_atr2, mode_of_data_sets = mode_of_data_sets, sorted_mode = sorted_mode, dont_plot = dont_plot) # mode attribute specifies whether it should use ER mean or Pol2 mean of a cluster to assess raising or falling tendencies. #if not(copy_and_paste_mode): return 0 import generator_executor f_name = generator_executor.interactions_producer_filter(generator_mode, domain, 2, TSS_or_intra_genic_for_domain_filter, "GENE_MODE") #in order to get path 2 interactions change to 3 config_variables.dict_chrom_pro_survived = dict_chrom_pro_survived config_variables.dict_chrom_pro_not_survived = dict_chrom_pro_not_survived config_variables.f_name = f_name config_variables.filtered_promoters = filtered_promoters config_variables.filtered_enhancers = filtered_enhancers config_variables.dict_chrom_enh_survived = dict_chrom_enh_survived config_variables.dict_chrom_enh_not_survived = dict_chrom_enh_not_survived import prepare_interactions_clean alternative_classificator_outside_enhancers = True # had something to do with enhancers outside domains - it's for MAP, enhancers which are interacting within domain and outside. Althought it's a bit ambigious for enhancers which may have one link inside domain and one outside if alternative_classificator_outside_enhancers: f_name_2 = generator_executor.interactions_producer_filter(generator_mode, True, 2, TSS_or_intra_genic_for_domain_filter, "GENE_MODE") chr_interactions_dict_pro_enh, chr_interactions_dict_enh_enh, dict_total_enh, dict_total_pro = prepare_interactions_clean.filter_true_interactions_of_promoters_and_enhancers_which_didnt_survive_filtering(f_name_2) from prepare_interactions_clean import un_string chrom_interacting_enhancers_pro = {} for chrom__ in chrom_names: chrom_interacting_enhancers_pro[chrom__] = np.unique(un_string(chr_interactions_dict_pro_enh[chrom__])[:,1]) config_variables.chrom_interacting_enhancers_pro = chrom_interacting_enhancers_pro chr_interactions_dict_pro_enh, chr_interactions_dict_enh_enh, dict_total_enh, dict_total_pro = prepare_interactions_clean.filter_true_interactions_of_promoters_and_enhancers_which_didnt_survive_filtering(f_name) if disentagled_features_validation: #That TSS_MODE can be still buggy to some extend. Check that later if you need to f_name_TSS = generator_executor.interactions_producer_filter(generator_mode, domain, 2, TSS_or_intra_genic_for_domain_filter, "TSS_MODE") config_variables.chr_interactions_dict_pro_enh_TSS, config_variables.chr_interactions_dict_enh_enh_TSS, config_variables.dict_total_enh_TSS, config_variables.dict_total_pro_TSS = prepare_interactions_clean.filter_true_interactions_of_promoters_and_enhancers_which_didnt_survive_filtering(f_name_TSS) config_variables.dict_total_enh = dict_total_enh config_variables.dict_total_pro = dict_total_pro config_variables.chr_interactions_dict_pro_enh = chr_interactions_dict_pro_enh config_variables.chr_interactions_dict_enh_enh = chr_interactions_dict_enh_enh import chrom_specific_negative_interactions as negative_interactions config_variables.negative_interactions = negative_interactions import prior_producer import classificator_clean import prepare_upper_and_lower_bounds_for_priors as prior_bounds import prior_histograms_cl import allocator import plot_histograms_figures if Sample_MoG_classificator: config_variables.interacting_enhancers_only = False reload(negative_interactions) config_variables.negative_interactions = negative_interactions config_variables.alternative_classificator_outside_enhancers = False prior_elements = prior_producer.prior_producer() config_variables.alternative_classificator_outside_enhancers = False#True infered_elements = classificator_clean.infered_elements_filler() low_dist, up_dist = prior_bounds.prepare_upper_and_lower_bounds_for_priors(prior_elements, infered_elements) config_variables.interacting_enhancers_only = True reload(negative_interactions) config_variables.negative_interactions = negative_interactions config_variables.alternative_classificator_outside_enhancers = False prior_elements = prior_producer.prior_producer() config_variables.alternative_classificator_outside_enhancers = False#True infered_elements = classificator_clean.infered_elements_filler() else: config_variables.alternative_classificator_outside_enhancers = False prior_elements = prior_producer.prior_producer() config_variables.alternative_classificator_outside_enhancers = False#True infered_elements = classificator_clean.infered_elements_filler() low_dist, up_dist = prior_bounds.prepare_upper_and_lower_bounds_for_priors(prior_elements, infered_elements) prior_elements = prior_histograms_cl.prior_bins_prob_and_plotter(prior_elements, low_dist, up_dist, use_smooth_prior_for_estimation, plot_atr, plot_atr_kernel, Sample_MoG_classificator = False) if not(csf_mode): plot_histograms_figures.execute(prior_elements, plot_atr, plot_atr_kernel) infered_elements = allocator.allocator(infered_elements, prior_elements) #for mode in modes: # for filter_value in filter_values: for classification_of_interactions in ["positive_interactions", "negative_interactions"]: for attribute_of_interaction in ["distance", "correlation"]: for probability_of_being_positive_or_negative in ["probabilities_of_being_positive_interactions", "probabilities_of_being_negative_interactions"]: if attribute_of_interaction == "correlation": for data_set_name in dataset_names_option: for chrom_ in chroms_to_infer: update = infered_elements[mode][classification_of_interactions][attribute_of_interaction][data_set_name][probability_of_being_positive_or_negative][chrom_] classificator_elements[filter_value][mode][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["posterior_component_values"][chrom_] = update elif attribute_of_interaction == "distance": for chrom_ in chroms_to_infer: update = infered_elements[mode][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][chrom_] classificator_elements[filter_value][mode][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["posterior_component_values"][chrom_] = update config_variables.classificator_elements = classificator_elements import classifiers_clean config_variables.classifiers_clean = classifiers_clean if mode_of_code == "GAUSSIAN_SAMPLE": print "Sample_MoG_classificator" from multiprocessing import Pool import Gaussian_probs prior_elements[mode]["MOG_distance"]["prior_frequencies"], prior_elements[mode]["MOG_distance"]["prior_bins"] = Gaussian_probs.executor(prior_elements, low_dist, up_dist) config_variables.probabilities_of_a_bin = prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]/prior_elements[mode]["MOG_distance"]["prior_frequencies"] #prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]/(prior_elements[mode]["negative_interactions"]["distance"]["prior_frequencies"]) config_variables.adequate_histogram_bins = prior_elements[mode]["MOG_distance"]["prior_bins"] #prior_elements[mode]["positive_interactions"]["distance"]["prior_bins"] it's the same but just in case #prior_elements[mode]["positive_interactions"]["distance"]["prior_bins"] config_variables.test_prior = False import finite_MOG_object_orientated_1d_times_n_case_log_calc_prob_visited_float64_distance_low_distances_active_promoters_clean as MOG #def func_star(args): return MOG.executor(*args) p = Pool(5) #option_correl_select = [1] arguments = [(mode_of_sampler, number_of_samples, option_correl__, chrom_, chain_number, continue_sampling) for chrom_ in chroms_to_infer for option_correl__ in selected_combinations if option_correl__ == option_correl_select] #arguments = arguments[-6:] #bla = [] #for el in arguments:bla += MOG.executor(el) p.map(MOG.executor, arguments) posterior_ = {} import classifiers_clean if mode_of_sampler == "distance_prior": posterior_["positive_interactions"], posterior_["negative_interactions"] = classifiers_clean.MOG_classifier(mode_of_sampler, number_of_samples = number_of_samples, burn_in = burn_in, pairwise_number_in_pack = 150/2) else: posterior_["positive_interactions"], posterior_["negative_interactions"] = {}, {} comb = "_".join([dict_option[el] for el in option_correl_select]) posterior_["positive_interactions"][comb], posterior_["negative_interactions"][comb] = classifiers_clean.MOG_classifier(mode_of_sampler, comb = comb, kappa_0 = kappa_0, mu_0 = mu_0 , alpha_0 = alpha_0, Beta_0 = Beta_0, number_of_samples = number_of_samples, burn_in = burn_in, chain = chain_number) if config_variables.test_prior: import Gaussian_probs prior_elements[mode]["MOG_distance"]["prior_frequencies"], prior_elements[mode]["MOG_distance"]["prior_bins"] = Gaussian_probs.executor(prior_elements, low_dist, up_dist) config_variables.probabilities_of_a_bin = prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]/prior_elements[mode]["MOG_distance"]["prior_frequencies"]#prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]/(prior_elements[mode]["negative_interactions"]["distance"]["prior_frequencies"])# + prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]) from prepare_interactions_clean import un_string def inter_enhancer(chrom): negative_interactions = config_variables.negative_interactions indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:] if config_variables.disentagled_features_validation: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom] else: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom] true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int) i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1] interacting_enhancers = np.unique(j_s_t)-total_e return len(interacting_enhancers) arguments = [(mode_of_sampler, inter_enhancer(chrom_), option_correl__, chrom_, chain_number, continue_sampling) for chrom_ in chroms_in_prior for option_correl__ in selected_combinations if option_correl__ == option_correl_select] def calculate_kern(sample_, bins, band = "scott"): import kern_density_est prob_, bins_ = kern_density_est.kern_scipy_gaus(sample_, "g", bins, bandwidth = band, plot_atr = True) return prob_, bins_ #config_variables.test_prior = False reload(config_variables) reload(MOG) #bla = p.map(MOG.executor, arguments) bla = [] for i in arguments: bla += MOG.executor(i) #plt.hist(bla, bins = 200, normed=True) import plot_histograms_figures_MOG pr, bi = calculate_kern(bla, bins = prior_elements[mode]["MOG_distance"]["prior_bins"], band = 0.025) plot_histograms_figures_MOG.execute(prior_elements, bla, plot_atr, plot_atr_kernel) #plt.plot(bi,pr) plt.show() elif mode_of_code == "MK_PAIRWISE": posterior_ = {} import classifiers_clean if mode_of_sampler == "distance_prior": posterior_["positive_interactions"], posterior_["negative_interactions"] = classifiers_clean.MOG_classifier(mode_of_sampler, number_of_samples = number_of_samples, burn_in = burn_in, pairwise_number_in_pack = 150/2) else: posterior_["positive_interactions"], posterior_["negative_interactions"] = {}, {} comb = "_".join([dict_option[el] for el in option_correl_select]) posterior_["positive_interactions"][comb], posterior_["negative_interactions"][comb] = classifiers_clean.MOG_classifier(mode_of_sampler, comb = comb, kappa_0 = kappa_0, mu_0 = mu_0 , alpha_0 = alpha_0, Beta_0 = Beta_0, number_of_samples = number_of_samples, burn_in = burn_in, chain = chain_number) elif mode_of_code == "convergence_checker": import convergence_checker as conv conv.convergence_checker(number_of_samples_arr, burn_in_start) else: posterior = {} type_of_models = ["dist", "correl", "correl_dist"] if MoG_classificator: type_of_models += ["MOG_dist", "MOG_correl_dist"] for classification_of_interactions in ["positive_interactions", "negative_interactions"]: posterior[classification_of_interactions] = {} for type_of_model in type_of_models: posterior[classification_of_interactions][type_of_model] = {} posterior["positive_interactions"]["dist"], posterior["negative_interactions"]["dist"] = classifiers_clean.posterior_producer([0], []) if "MOG_dist" in type_of_models: posterior["positive_interactions"]["MOG_dist"], posterior["negative_interactions"]["MOG_dist"] = classifiers_clean.MOG_classifier("distance_prior", number_of_samples = number_of_samples_dist, burn_in = burn_in_dist, chain = chain_number_dist) #infered_elements['promoter_enhancer_interactions']["positive_interactions"]["distance"]['probabilities_of_being_positive_interactions'], infered_elements['promoter_enhancer_interactions']["negative_interactions"]["distance"]['probabilities_of_being_positive_interactions'] # if MoG_classificator: combinations, selected_combinations = sel.selected_combinations("SELECTIVE") else: combinations, selected_combinations = sel.selected_combinations("ALL") for ind, option_ in enumerate(selected_combinations): comb = "_".join([dict_option[el] for el in option_]) posterior["positive_interactions"]["correl_dist"][comb], posterior["negative_interactions"]["correl_dist"][comb] = classifiers_clean.posterior_producer([0], option_) posterior["positive_interactions"]["correl"][comb], posterior["negative_interactions"]["correl"][comb] = classifiers_clean.posterior_producer([], option_) if "MOG_correl_dist" in type_of_models: posterior["positive_interactions"]["MOG_correl_dist"][comb], posterior["negative_interactions"]["MOG_correl_dist"][comb] = classifiers_clean.MOG_classifier("distance_MOG_empir_mu", comb = comb, kappa_0 = kappa_0, mu_0 = mu_0 , alpha_0 = alpha_0, Beta_0 = Beta_0, number_of_samples = number_of_samples_correl[ind], burn_in = burn_in_correl[ind], chain = chain_number_correl[ind]) if mode_of_code == "ODD" or mode_of_code == "EVEN": #import PR_top #PR_top.execute() #import PR_top_MAP_dots import MAP_invoker #MAP_probabilites, infered_elements, match_MAP, sensitivity_match_MAP = MAP_invoker.executor(posterior, type_of_models) match_MAP, sensitivity_match_MAP, MAP_probabilites, infered_elements_MAP, probabilities_for_promoters_of_interacting_enhancers = MAP_invoker.executor(posterior, selected_combinations, type_of_models) import PR_top_MAP_dots_alternative_domain for PR_CURVES in np.array(["SELECTIVE", "ALL"])[:1]: if MoG_classificator and PR_CURVES == "ALL": continue #PR_top_MAP_dots_alternative_domain.execute(sensitivity_match_MAP, number_of_interacting_enhancers_ = np.sum([len(match_MAP["dist"][chrom_]) for chrom_ in chroms_to_infer]), option_to_plot = PR_CURVES, type_of_models=type_of_models, posterior_MOG = posterior, kappa_0=kappa_0, mu_0=mu_0, alpha_0=alpha_0, Beta_0=Beta_0, number_of_samples = [number_of_samples_dist] + number_of_samples_correl, burn_in = [burn_in_dist] + burn_in_correl) if MoG_classificator: type_of_models = ["correl_dist", "MOG_correl_dist","MOG_dist"] if MoG_classificator: type_of_models = ["dist", "MOG_dist"] PR_top_MAP_dots_alternative_domain.execute(sensitivity_match_MAP, number_of_interacting_enhancers_ = np.sum([len(match_MAP["dist"][chrom_]) for chrom_ in chroms_to_infer]), option_to_plot = PR_CURVES, type_of_models = type_of_models, posterior_MOG = posterior, kappa_0=kappa_0, mu_0=mu_0, alpha_0=alpha_0, Beta_0=Beta_0, number_of_samples = [number_of_samples_dist] + number_of_samples_correl, burn_in = [burn_in_dist] + burn_in_correl)#"correl_dist","MOG_correl_dist"] if mode_of_code == "FULL": #import MAP_clustering_labels_clean #MAP_clustering_labels_clean.executor(MAP_probabilites_correl_dist, infered_elements_correl_dist) import TOP_FDR_PR_gene_list_clean TOP_FDR_PR_gene_list_clean.executor(selection_option = option_for_predictive_FULL_mode) import TOP_FDR_PR_table_clean TOP_FDR_PR_table_clean.executor(selection_option = option_for_predictive_FULL_mode) import script_python_analys_PR script_python_analys_PR.executor(selection_option = option_for_predictive_FULL_mode, FDR_level = genes_predicted_with_FDR_for_GRO_seq_validation) #import MAP_interaction_plotter_clean #MAP_interaction_plotter_clean.executor(MAP_probabilites_correl_dist, infered_elements_correl_dist, match_MAP_correl_dist) import TOP_PR_interaction_plotter_clean TOP_PR_interaction_plotter_clean.executor(selection_option = option_for_predictive_FULL_mode, chrom_to_plot = TOP_PR_interaction_plotter_clean_chrom_to_plot, FDR_thresholds_to_plot = TOP_PR_interaction_plotter_FDR_thresholds_to_plot, calculate_number_of_within_domain_interactions = calculate_number_of_within_domain_interactions)
def convergence_checker(number_of_samples_arr, burn_in_start): import iter_loadtxt import numpy as np import matplotlib as mpl mpl.use('Agg') from matplotlib.backends.backend_pdf import PdfPages import config_variables import os results_folder = config_variables.results_folder chrom_names = config_variables.chrom_names dataset_time_series_dict = config_variables.dataset_time_series_dict link_data_set_name_to_file_name = config_variables.link_data_set_name_to_file_name import matplotlib.pyplot as plt from pylab import rcParams #blue_line = mlines.Line2D([], [], color='blue', marker='^', markersize = red_blue_yellow_cyan_marker_size_legend_box, label='data+prior') #name_std = 'MOG_distance_emprirical_mu_trace_of_std_{0}_{1}_{2}_{3}_{4}_{5}_{6}_{7}'.format(kappa_0, mu_0, alpha_0, Beta_0, chrom, comb, number_of_samples, chain_number) #name_mean = 'MOG_distance_emprirical_mu_trace_of_mean_{0}_{1}_{2}_{3}_{4}_{5}_{6}_{7}'.format(kappa_0, mu_0, alpha_0, Beta_0, chrom, comb, number_of_samples, chain_number) #output_mean = open(save_to_folder + name_mean, 'w') #output_std = open(save_to_folder + name_std, 'w') #mode_of_sampler = ["distance_prior", "distance_MOG", "dirichlet_MOG", "distance_MOG_empir_mu"][3] import selected_combinations as sel combinations, selected_combinations = sel.selected_combinations("SELECTIVE") #number_of_samples = 61000 chains = [1,2,3] #number_of_samples_arr = [[62000]*3, [62000]*3, [240000]*3, [240000]*3, [1200000]*3] #number_of_samples_arr = [[80000]*3, [80000]*3, [80000]*3, [80000]*3, [160000]*3] min_number_of_samples = np.min(np.array(number_of_samples_arr)) #burn_in_start = (np.array(number_of_samples_arr)/2.).astype(int) #burn_in_start[-1] = [300000]*3 def opener(option_correl, chrom, number_of_samples_ar): comb = "_".join([config_variables.dict_option[el] for el in option_correl]) kappa_0, mu_0, alpha_0, Beta_0 = config_variables.kappa_0, config_variables.mu_0, config_variables.alpha_0, config_variables.Beta_0 save_to_folder = os.getcwd() + "/MOG_results_/" parameter_multiple_chains = {} for chain_number, number_of_samples in zip(chains, number_of_samples_ar): name = 'MOG_distance_emprirical_mu_trace_of_c_{0}_{1}_{2}_{3}_{4}_{5}_{6}_{7}'.format(kappa_0, mu_0, alpha_0, Beta_0, chrom, comb, number_of_samples, chain_number) name = save_to_folder + name print name parameter_multiple_chains[chain_number] = iter_loadtxt.iter_loadtxt(name, ",", dtype = int, skiprows=min(burn_in_start[index_opt])) n_ = parameter_multiple_chains[1].shape[0] j_ = parameter_multiple_chains[1].shape[1] m_ = len(chains) tensor_parameter_multiple_chains = np.zeros((m_, n_, j_)) for chain_number in chains: tensor_parameter_multiple_chains[chain_number-1] = parameter_multiple_chains[chain_number] return tensor_parameter_multiple_chains def Gelman_Rubin_R_hat(tensor_parameter_multiple_chains): m, n, j = tensor_parameter_multiple_chains.shape means_j = tensor_parameter_multiple_chains.mean(1) def within(): S_j_squared = np.var(tensor_parameter_multiple_chains, ddof=1, axis=1) #((tensor_parameter_multiple_chains-means_j.reshape(m,1,j))**2).sum(1)/float(n-1) # W = S_j_squared.mean(0) return W def between(): mean_of_means = means_j.mean(0) B = np.var(means_j, ddof = 1, axis = 0)*n#n/float(m-1)*((means_j-mean_of_means)**2).sum(0) # return B W = within() B = between() Var_par = (1-n**(-1))*W + n**(-1)*B R_hat = (Var_par/W)**0.5 return R_hat def inter_enhancer(chrom): pro_chroms, pro_coords, pro_time_series = dataset_time_series_dict[link_data_set_name_to_file_name["promoters"]["ER"]] enh_chroms, enh_coords, enh_time_series = dataset_time_series_dict[link_data_set_name_to_file_name["enhancers"]["ER"]] filtered_enhancers = config_variables.filtered_enhancers proximal_enhancers_mask = config_variables.proximal_enhancers_mask chrom_enh_survived = np.where((enh_chroms == chrom)*np.invert(proximal_enhancers_mask)*filtered_enhancers)[0] negative_interactions = config_variables.negative_interactions from prepare_interactions_clean import un_string indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:] if config_variables.disentagled_features_validation: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom] else: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom] true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int) i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1] interacting_enhancers_ = np.unique(j_s_t)-total_e enhancer_array_survived = np.zeros(len(indexes_e), bool) enhancer_array_interacting = np.zeros(len(indexes_e), bool) enhancer_array_survived[chrom_enh_survived-total_e] = True enhancer_array_interacting[interacting_enhancers_] = True mask_interacting_c = np.in1d(np.where(enhancer_array_survived)[0], np.where(enhancer_array_interacting)[0]) return np.where(mask_interacting_c)[0] #option_correl_select = selected_combinations[1] #chrom_ = chrom_names[1] R_hat_dict = {} name_of_r_hat = results_folder + "r_hat_for_{0}_{1}".format("_".join(burn_in_start[:, 0].astype(str)), min_number_of_samples) pdf = PdfPages(name_of_r_hat + ".pdf") plt.rcParams['xtick.labelsize'] = 26 plt.rc('ytick', labelsize = 26) f, ax = plt.subplots(1, len(selected_combinations), sharex=True, sharey=True, figsize=(20,10)) f.subplots_adjust(left=0.085, bottom=0.15, right=0.965, top=0.925, hspace=0.1, wspace=0.05) marker_size_legend_box = 19 legend_box_names_font_size = 22 size_of_combination_name = 35 size_of_y_label = 35 ax[0].set_ylabel('Density', fontsize = size_of_y_label) ax[0].locator_params(axis = 'x', nbins = 6) f.text(0.525, 0.04, r'$ \hat R $', ha='center', fontsize=35) #number_of_samples_arr = [120000]*3 for index_opt, option_correl_select, number_of_samples_ar in zip(range(len(selected_combinations)), selected_combinations, number_of_samples_arr): #if index_opt == 4: continue comb_plot = ",".join([config_variables.dict_option[el] for el in option_correl_select]) if option_correl_select == combinations[-1]: comb_plot = "All" ax[index_opt].set_title(comb_plot, fontsize = size_of_combination_name) R_hat_total = [] for chrom_ in chrom_names[:-1]: tensor_parameter_multiple_chains = opener(option_correl_select, chrom_, number_of_samples_ar) #ll if config_variables.interacting_enhancers_only_MOG: interacting_enhancers = inter_enhancer(chrom_) tensor_parameter_multiple_chains = tensor_parameter_multiple_chains[:, :, interacting_enhancers] R_hat = Gelman_Rubin_R_hat(tensor_parameter_multiple_chains[:,:,:]) print np.where(np.isnan(R_hat))[0] R_hat[np.isnan(R_hat)] = 1. R_hat_total += R_hat.tolist() R_hat_dict[index_opt] = R_hat_total ax[index_opt].hist(R_hat_dict[index_opt], bins = np.arange(0.95, 1.5, 0.01), facecolor='green', alpha=0.5, normed=1, histtype='bar') ax[index_opt].set_xlim([0.9, 1.6]) ax[index_opt].set_ylim([0., 90.]) pdf.savefig() pdf.close() plt.close("all")
def execute(sensitivity_match_MAP, number_of_interacting_enhancers_, option_to_plot="ALL", type_of_models=[], posterior_MOG=None, kappa_0=1.0, mu_0=1.0, alpha_0=1.0, Beta_0=1.0, number_of_samples=10, burn_in=0): #---------------------------------------------------------------------------------------------------------------------------------------------------------------------- from matplotlib.backends.backend_pdf import PdfPages import config_variables import numpy as np results_folder = config_variables.results_folder name_of_output_FDR_file = results_folder + 'FDR_file_{0}_{1}_{4}_{2}_{3}_average_PolII'.format( config_variables.chroms_in_prior[0], config_variables.chroms_to_infer[0], config_variables.one_sided_or_two_sided, config_variables.use_smooth_prior_for_estimation, config_variables.number_of_bins) name_of_output_FDR_file += "_{0}".format("_".join(type_of_models)) name_of_output_FDR_file += "_{0}_{1}_{2}".format( config_variables.upstream, config_variables.downstream, config_variables.upstream_t_s) #name_of_output_FDR_file += "_{0}_{1}".format("_".join(np.array(number_of_samples,str)), "_".join(np.array(burn_in, str))) if config_variables.disentagled_features_validation: name_of_output_FDR_file += "_TSS" else: name_of_output_FDR_file += "_GENE" if option_to_plot == "ALL": name_of_output_FDR_file += "_ALL" if "MOG_correl_dist" in type_of_models or "MOG_dist" in type_of_models: name_of_output_FDR_file += "_MOG_{0}_{1}_{2}_{3}_{4}_{5}".format( kappa_0, mu_0, alpha_0, Beta_0, "_".join(np.array(number_of_samples, str)), "_".join(np.array(burn_in, str))) pdf = PdfPages(name_of_output_FDR_file + ".pdf") np = config_variables.np negative_interactions = config_variables.negative_interactions if config_variables.FDR_mode: name_of_output_file_with_thresholds_estimated_on_odd_even_chromosomes = results_folder + "file_with_FDRs_{0}_{1}_smo_{2}_{3}".format( config_variables.chroms_in_prior[0], config_variables.chroms_to_infer[0], config_variables.use_smooth_prior_for_estimation, config_variables.number_of_bins) name_of_output_file_with_thresholds_estimated_on_odd_even_chromosomes += "_{0}_{1}_{2}".format( config_variables.upstream, config_variables.downstream, config_variables.upstream_t_s) if config_variables.disentagled_features_validation: name_of_output_file_with_thresholds_estimated_on_odd_even_chromosomes += "_TSS" else: name_of_output_file_with_thresholds_estimated_on_odd_even_chromosomes += "_GENE" output = open( name_of_output_file_with_thresholds_estimated_on_odd_even_chromosomes, "w") output.write("#" + "\t".join([ "Data_set", "FDR", "Precision", "True_links_above_FDR", "Threshold" ]) + "\n") TSS_coordinates = config_variables.negative_interactions.extract_TSS_coordinates( config_variables.upstream) def positive_negative_interactions_for_MAP(chrom): indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables( chrom)[2:] if mode == "promoter_enhancer_interactions": false_inter_pro = negative_interactions.chrom_specific_negative_interactions( chrom, mode) i_s_f, j_s_f = false_inter_pro[:, 0] + total_p, false_inter_pro[:, 1] + total_e if mode == "enhancer_enhancer_interactions": false_inter_enh = negative_interactions.chrom_specific_negative_interactions( chrom, mode) i_s_f, j_s_f = false_inter_enh[:, 0] + total_e, false_inter_enh[:, 1] + total_e return i_s_f, j_s_f def filter_interactions_in_domain(posterior_t, posterior_f, chrom, domain, invert_domain): enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables( chrom) i_s_f, j_s_f = positive_negative_interactions_for_MAP(chrom) length_chr = len(indexes_p) + len(indexes_e) interaction_matrix = np.zeros((length_chr, length_chr)) posterior_t, posterior_f = posterior_t[chrom], posterior_f[chrom] if domain: if config_variables.TSS_or_intra_genic_for_domain_filter == "Intra_genic": coords_pro_domain = pro_coordinates[indexes_p] elif config_variables.TSS_or_intra_genic_for_domain_filter == "TSS_only": coords_pro_domain = np.column_stack( (TSS_coordinates[indexes_p] - 1, TSS_coordinates[indexes_p] + 1)) domain_matrix = interacting_domain.interacting_domains( coords_pro_domain, enh_coordinates[indexes_e], chrom, 'left', True) domain_matrix = domain_matrix + interacting_domain.interacting_domains( coords_pro_domain, enh_coordinates[indexes_e], chrom, 'right', True) if invert_domain: domain_matrix = np.invert(domain_matrix) else: domain_matrix = True if mode == "promoter_enhancer_interactions": #chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh #true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int) #i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1] if config_variables.disentagled_features_validation: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[ chrom] else: chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[ chrom] true_inter_pro = un_string( chr_interactions_pro_enh[:, :2]).astype(int) i_s_t, j_s_t = true_inter_pro[:, 0], true_inter_pro[:, 1] interaction_matrix[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = posterior_t interaction_matrix[i_s_f - total_p, j_s_f + len(indexes_p) - total_e] = posterior_f interacting_mask = np.zeros_like(interaction_matrix).astype(bool) interacting_mask[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = True true_pro_enh_inter_filtered = interacting_mask * domain_matrix print np.sum(true_pro_enh_inter_filtered) chrom_posterior_t_filtered = interaction_matrix[ true_pro_enh_inter_filtered] interacting_mask = np.zeros_like(interaction_matrix).astype(bool) interacting_mask[i_s_f - total_p, j_s_f + len(indexes_p) - total_e] = True false_pro_enh_inter_filtered = interacting_mask * domain_matrix print np.sum(false_pro_enh_inter_filtered) chrom_posterior_f_filtered = interaction_matrix[ false_pro_enh_inter_filtered] return chrom_posterior_t_filtered, chrom_posterior_f_filtered if mode == "enhancer_enhancer_interactions": chr_interactions_dict_enh_enh = config_variables.chr_interactions_dict_enh_enh true_inter_enh = un_string( chr_interactions_dict_enh_enh[chrom][:, :2]).astype(int) i_s_t, j_s_t = true_inter_enh[:, 0], true_inter_enh[:, 1] interaction_matrix[i_s_t + len(indexes_p) - total_e, j_s_t + len(indexes_p) - total_e] = posterior_t interaction_matrix[i_s_f + len(indexes_p) - total_e, j_s_f + len(indexes_p) - total_e] = posterior_f interaction_matrix[ j_s_t + len(indexes_p) - total_e, i_s_t + len(indexes_p) - total_e] = posterior_t # transpose to create a full matrix interaction_matrix[ j_s_f + len(indexes_p) - total_e, i_s_f + len(indexes_p) - total_e] = posterior_f # transpose to create a full matrix interacting_mask = np.zeros_like(interaction_matrix).astype(bool) interacting_mask[i_s_t + len(indexes_p) - total_e, j_s_t + len(indexes_p) - total_e] = True true_enh_enh_inter_filtered = interacting_mask * domain_matrix chrom_posterior_t_filtered = interaction_matrix[ true_enh_enh_inter_filtered] interacting_mask = np.zeros_like(interaction_matrix).astype(bool) interacting_mask[i_s_f + len(indexes_p) - total_e, j_s_f + len(indexes_p) - total_e] = True false_enh_enh_inter_filtered = interacting_mask * domain_matrix chrom_posterior_f_filtered = interaction_matrix[ false_enh_enh_inter_filtered] return chrom_posterior_t_filtered, chrom_posterior_f_filtered from prepare_interactions_clean import un_string normalised = False import interacting_domain import itertools def domain_filter(inpu, domain, invert_domain): posterior_t, posterior_f = inpu chrom_posterior_t_filtered, chrom_posterior_f_filtered = {}, {} for chrom__ in chroms_to_infer: chrom_posterior_t_filtered[chrom__], chrom_posterior_f_filtered[ chrom__] = filter_interactions_in_domain( posterior_t, posterior_f, chrom__, domain, invert_domain) posterior_t_filtered = np.array( list( itertools.chain.from_iterable([ chrom_posterior_t_filtered[chrom_] for chrom_ in chroms_to_infer ]))) posterior_f_filtered = np.array( list( itertools.chain.from_iterable([ chrom_posterior_f_filtered[chrom_] for chrom_ in chroms_to_infer ]))) return posterior_t_filtered, posterior_f_filtered #---------------------------------------------------------------------------------------------------------------------------------------------------------------------- import itertools import matplotlib.pyplot as plt import config_variables np = config_variables.np classificator_elements = config_variables.classificator_elements classifiers_clean = config_variables.classifiers_clean filter_values = config_variables.filter_values datasets_names = config_variables.datasets_names chroms_to_infer = config_variables.chroms_to_infer mode = config_variables.mode #dict_option = {0: 'Pol2_2012-03', 1: 'Pol2', 2: 'H2AZ', 3: 'ER', 4: 'H3K4me3', 5: '2012-03_RNA', 6: 'RNA'} dict_option = dict(zip(range(len(datasets_names)), datasets_names)) def calculate_single_ROC_best_True_sensitivity( probabilities_true, probabilities_false, length_of_positives, length_of_negatives, percent_1, percent_2, percent_3, thresh=False, give_indexes_for_thresholds=False): _True_positives_of_threshold = [] _False_positives_of_threshold = [] sorted_prob_true = np.sort(probabilities_true) sorted_prob_false = np.sort(probabilities_false) sorted_thresholds = np.sort( np.unique(np.r_[probabilities_true, probabilities_false])) sorted_thresholds = np.unique(np.r_[sorted_thresholds, np.max(sorted_thresholds) * 1.01]) len_prob_true = len(probabilities_true) len_prob_false = len(probabilities_false) print 'len prob: ', len_prob_true, len_prob_false _True_positives_of_threshold = np.cumsum( np.histogram(sorted_prob_true, sorted_thresholds)[0][::-1]) _False_positives_of_threshold = np.cumsum( np.histogram(sorted_prob_false, sorted_thresholds)[0][::-1]) Precision = np.array(_True_positives_of_threshold, dtype=float) / ( np.array(_True_positives_of_threshold, dtype=float) + np.array(_False_positives_of_threshold, dtype=float)) True_positive_Rate = np.array(_True_positives_of_threshold) / float( len_prob_true) #float(length_of_positives)# False_positive_Rate = np.array(_False_positives_of_threshold) / float( len_prob_false) #float(length_of_negatives)# if give_indexes_for_thresholds: threshold_1, threshold_2, threshold_3 = percent_1, percent_2, percent_3 index_100_first_occurance = np.where( sorted_thresholds[::-1] <= threshold_1)[0][0] - 1 index_200_first_occurance = np.where( sorted_thresholds[::-1] <= threshold_2)[0][0] - 1 index_300_first_occurance = np.where( sorted_thresholds[::-1] <= threshold_3)[0][0] - 1 threshold_1, threshold_2, threshold_3 = [], [], [] return True_positive_Rate, False_positive_Rate, Precision, index_100_first_occurance, index_200_first_occurance, index_300_first_occurance, threshold_1, threshold_2, threshold_3 else: index_100_first_occurance = np.where( True_positive_Rate >= percent_1)[0][0] index_200_first_occurance = np.where( True_positive_Rate >= percent_2)[0][0] index_300_first_occurance = np.where( True_positive_Rate >= percent_3)[0][0] if config_variables.FDR_mode and thresh: for FDR in config_variables.FDR: print "Precision", Precision[:40] print "TPR", True_positive_Rate[:40] try: left_most = np.where(Precision >= 1 - FDR)[0][-1] to_save = "\t".join( np.array([ config_variables.comb, config_variables.dist_or_correl_attribute, FDR, Precision[left_most], _True_positives_of_threshold[left_most], sorted_thresholds[::-1][left_most + 2] ]).astype("|S30") ) # should be sorted_thresholds[::-1][left_most + 1] but +2 corrects rounding errors caused by saving into a txt file. output.write(to_save + "\n") #var = raw_input("Please enter something (pause): ") except: to_save = "\t".join([ config_variables.comb, config_variables.dist_or_correl_attribute, str(FDR), "N\A", "N\A", "N\A" ]) output.write(to_save + "\n") if thresh: threshold_1 = sorted_thresholds[::-1][index_100_first_occurance + 1] threshold_2 = sorted_thresholds[::-1][index_200_first_occurance + 1] threshold_3 = sorted_thresholds[::-1][index_300_first_occurance + 1] return True_positive_Rate, False_positive_Rate, Precision, index_100_first_occurance, index_200_first_occurance, index_300_first_occurance, threshold_1, threshold_2, threshold_3 #print 'number of thresholds', len(True_positive_Rate), len(False_positive_Rate) #return True_positive_Rate, False_positive_Rate, Precision, index_100_first_occurance, index_200_first_occurance, index_300_first_occurance from pylab import rcParams #rcParams['figure.figsize'] = 20, 8 #stuff = [0, 1, 2, 3, 4] import selected_combinations as sel combinations, selected_combinations = sel.selected_combinations( option_to_plot) filter_values_ = filter_values[[0]] #http://stackoverflow.com/questions/14270391/python-matplotlib-multiple-bars - alternatively #http://matplotlib.org/examples/pylab_examples/subplots_demo.html percent_1_, percent_2_, percent_3_ = 0.1, 0.2, 0.3 dict_option_ = dict_option datasets_names_ = datasets_names if "correl_dist" in type_of_models: threshold_1_dist_correl, threshold_2_dist_correl, threshold_3_dist_correl = {}, {}, {} if "correl" in type_of_models: threshold_1_correl, threshold_2_correl, threshold_3_correl = {}, {}, {} if "dist" in type_of_models: threshold_1_dist, threshold_2_dist, threshold_3_dist = {}, {}, {} if "MOG_correl_dist" in type_of_models: threshold_1_dist_correl_MOG, threshold_2_dist_correl_MOG, threshold_3_dist_correl_MOG = {}, {}, {} if "MOG_dist" in type_of_models: threshold_1_dist_MOG, threshold_2_dist_MOG, threshold_3_dist_MOG = {}, {}, {} #total_number_of_interacting_enhancers = config_variables.total_number_of_interacting_enhancers for domain_atr, domain, invert_domain, thresh, give_indexes_for_thresholds in np.array( [[None, False, False, True, False], ["within_domain", True, False, False, True], ["outside_domain", True, True, False, True]]): if option_to_plot == "ALL": plt.rcParams['xtick.labelsize'] = 28 plt.rc('ytick', labelsize=28) f, ax = plt.subplots(1, len(selected_combinations), sharex=True, sharey=True, figsize=(50, 10)) f.subplots_adjust(left=0.035, bottom=0.1, right=0.975, top=0.925, hspace=0.1, wspace=0.1) red_blue_yellow_cyan_marker_size = 12 red_blue_yellow_cyan_marker_size_legend_box = 19 legend_box_names_font_size = 20 size_of_combination_name = 23 size_of_y_label = 30 ax[0].set_ylabel('Precision', fontsize=size_of_y_label) elif option_to_plot == "SELECTIVE": plt.rcParams['xtick.labelsize'] = 28 plt.rc('ytick', labelsize=28) f, ax = plt.subplots(1, len(selected_combinations), sharex=True, sharey=True, figsize=(20, 10)) f.subplots_adjust(left=0.085, bottom=0.15, right=0.965, top=0.925, hspace=0.1, wspace=0.05) red_blue_yellow_cyan_marker_size = 12 red_blue_yellow_cyan_marker_size_legend_box = 19 legend_box_names_font_size = 22 size_of_combination_name = 35 size_of_y_label = 35 ax[0].set_ylabel('Precision', fontsize=size_of_y_label) import matplotlib.lines as mlines blue_line = mlines.Line2D( [], [], color='blue', marker='^', markersize=red_blue_yellow_cyan_marker_size_legend_box, label='data+prior') yellow_line = mlines.Line2D( [], [], color='darkviolet', marker='s', markersize=red_blue_yellow_cyan_marker_size_legend_box, label='prior') red_line = mlines.Line2D( [], [], color='red', marker='o', markersize=red_blue_yellow_cyan_marker_size_legend_box, label='data') pink_line = mlines.Line2D( [], [], color='cyan', marker='*', markersize=red_blue_yellow_cyan_marker_size_legend_box, label='LVM data+prior') green_line = mlines.Line2D( [], [], color='green', marker="v", markersize=red_blue_yellow_cyan_marker_size_legend_box, label='LVM prior') #handles, labels = ax[0].get_legend_handles_labels() #ax[0].legend(handles, labels, fontsize = legend_box_names_font_size) for index_filt_val, filter_value in enumerate(filter_values_): if "dist" in type_of_models: if not (domain_atr): posterior_dist_true_unsplit, posterior_dist_false_unsplit = posterior_MOG[ "positive_interactions"]["dist"], posterior_MOG[ "negative_interactions"][ "dist"] #classifiers_clean.posterior_producer([0], [], total_posterior = False) posterior_dist_true, posterior_dist_false = domain_filter( (posterior_dist_true_unsplit, posterior_dist_false_unsplit), domain, invert_domain) if "MOG_dist" in type_of_models: if not (domain_atr): posterior_dist_true_unsplit_MOG, posterior_dist_false_unsplit_MOG = posterior_MOG[ "positive_interactions"]["MOG_dist"], posterior_MOG[ "negative_interactions"]["MOG_dist"] posterior_dist_true_MOG, posterior_dist_false_MOG = domain_filter( (posterior_dist_true_unsplit_MOG, posterior_dist_false_unsplit_MOG), domain, invert_domain) #posterior_dist_true, posterior_dist_false = np.array(posterior_dist_true), np.array(posterior_dist_false) #posterior_dist_true, posterior_dist_false = posterior_dist_true[posterior_dist_true <> 1.], posterior_dist_false[posterior_dist_false <> 1.] for index_opt, option_ in zip(range(len(selected_combinations)), selected_combinations): #[1:3] comb = ",".join([dict_option_[el] for el in option_]) comb_MOG = "_".join([dict_option[el] for el in option_]) if option_ == combinations[-1]: comb = "All" if config_variables.FDR_mode: config_variables.comb = comb print comb if domain_atr <> None: #number_of_interacting_enhancers = total_number_of_interacting_enhancers[domain_atr] sensitivity_dist_correl = sensitivity_match_MAP[ "correl_dist"][domain_atr][comb_MOG] sensitivity_correl = sensitivity_match_MAP["correl"][ domain_atr][comb_MOG] sensitivity_dist = sensitivity_match_MAP["dist"][ domain_atr] #[",".join(np.array(option_, str))] sensitivity_dist_MOG = sensitivity_match_MAP["MOG_dist"][ domain_atr] sensitivity_dist_correl_MOG = sensitivity_match_MAP[ "MOG_correl_dist"][domain_atr][comb_MOG] # sensitivity_MOG else: #number_of_interacting_enhancers = number_of_interacting_enhancers_ sensitivity_dist_correl = sensitivity_match_MAP[ "correl_dist"]["unsplit"][comb_MOG] sensitivity_correl = sensitivity_match_MAP["correl"][ "unsplit"][comb_MOG] sensitivity_dist = sensitivity_match_MAP["dist"][ "unsplit"] #[",".join(np.array(option_, str))] sensitivity_dist_MOG = sensitivity_match_MAP["MOG_dist"][ "unsplit"] sensitivity_dist_correl_MOG = sensitivity_match_MAP[ "MOG_correl_dist"]["unsplit"][comb_MOG] full_len = sum([ len(classificator_elements[-1.][mode] ["positive_interactions"]["distance"] ["probabilities_of_being_positive_interactions"] ["posterior_component_values"][chrom_]) for chrom_ in chroms_to_infer ]) new_len = sum([ len(classificator_elements[filter_value][mode] ["positive_interactions"]["distance"] ["probabilities_of_being_positive_interactions"] ["posterior_component_values"][chrom_]) for chrom_ in chroms_to_infer ]) length_of_positives_pro = full_len length_of_negatives_pro = sum([ len(classificator_elements[-1.][mode] ["negative_interactions"]["distance"] ["probabilities_of_being_positive_interactions"] ["posterior_component_values"][chrom_]) for chrom_ in chroms_to_infer ]) #full_len = len(true_interactions_dist_correl_pairwise_prob_pro_filter_comb[-1.]) #new_len = len(true_interactions_dist_correl_pairwise_prob_pro_filter_comb[filter_value]) per = float(full_len) / float(new_len) percent_1 = percent_1_ * per percent_2 = percent_2_ * per percent_3 = percent_3_ * per if not (domain_atr): posterior_correl_dist_true_unsplit, posterior_correl_dist_false_unsplit = classifiers_clean.posterior_producer( [0], option_, total_posterior=False) posterior_correl_dist_true, posterior_correl_dist_false = domain_filter( (posterior_correl_dist_true_unsplit, posterior_correl_dist_false_unsplit), domain, invert_domain) if "correl" in type_of_models: if not (domain_atr): posterior_correl_true_unsplit, posterior_correl_false_unsplit = classifiers_clean.posterior_producer( [], option_, total_posterior=False) posterior_correl_true, posterior_correl_false = domain_filter( (posterior_correl_true_unsplit, posterior_correl_false_unsplit), domain, invert_domain) if "MOG_correl_dist" in type_of_models: if not (domain_atr): posterior_correl_dist_true_unsplit_MOG, posterior_correl_dist_false_unsplit_MOG = posterior_MOG[ "positive_interactions"]["MOG_correl_dist"][ comb_MOG], posterior_MOG[ "negative_interactions"][ "MOG_correl_dist"][comb_MOG] posterior_correl_dist_true_MOG, posterior_correl_dist_false_MOG = domain_filter( (posterior_correl_dist_true_unsplit_MOG, posterior_correl_dist_false_unsplit_MOG), domain, invert_domain) length_of_positives_pro, length_of_negatives_pro = len( posterior_correl_dist_true), len( posterior_correl_dist_false) #domain adjusted if "correl_dist" in type_of_models: if give_indexes_for_thresholds: percent_1, percent_2, percent_3 = threshold_1_dist_correl[ comb], threshold_2_dist_correl[ comb], threshold_3_dist_correl[comb] config_variables.dist_or_correl_attribute = "distance_correl" True_positive_Rate_dist_correl_pro, False_positive_Rate_dist_correl_pro, precision_dist_correl_pro, index_100_dist_correl_pro, index_200_dist_correl_pro, index_300_dist_correl_pro, threshold_1_dist_correl_, threshold_2_dist_correl_, threshold_3_dist_correl_ = calculate_single_ROC_best_True_sensitivity( posterior_correl_dist_true, posterior_correl_dist_false, length_of_positives_pro, length_of_negatives_pro, percent_1, percent_2, percent_3, thresh=thresh, give_indexes_for_thresholds=give_indexes_for_thresholds ) if "dist" in type_of_models: if give_indexes_for_thresholds: percent_1, percent_2, percent_3 = threshold_1_dist[ comb], threshold_2_dist[comb], threshold_3_dist[ comb] config_variables.dist_or_correl_attribute = "distance" True_positive_Rate_dist_pro, False_positive_Rate_dist_pro, precision_dist_pro, index_100_dist_pro, index_200_dist_pro, index_300_dist_pro, threshold_1_dist_, threshold_2_dist_, threshold_3_dist_ = calculate_single_ROC_best_True_sensitivity( posterior_dist_true, posterior_dist_false, length_of_positives_pro, length_of_negatives_pro, percent_1, percent_2, percent_3, thresh=thresh, give_indexes_for_thresholds=give_indexes_for_thresholds ) if "correl" in type_of_models: if give_indexes_for_thresholds: percent_1, percent_2, percent_3 = threshold_1_correl[ comb], threshold_2_correl[ comb], threshold_3_correl[comb] config_variables.dist_or_correl_attribute = "correl" True_positive_Rate_correl_pro, False_positive_Rate_correl_pro, precision_correl_pro, index_100_correl_pro, index_200_correl_pro, index_300_correl_pro, threshold_1_correl_, threshold_2_correl_, threshold_3_correl_ = calculate_single_ROC_best_True_sensitivity( posterior_correl_true, posterior_correl_false, length_of_positives_pro, length_of_negatives_pro, percent_1, percent_2, percent_3, thresh=thresh, give_indexes_for_thresholds=give_indexes_for_thresholds ) #MOG-------------------------- if "MOG_correl_dist" in type_of_models: if give_indexes_for_thresholds: percent_1, percent_2, percent_3 = threshold_1_dist_correl_MOG[ comb], threshold_2_dist_correl_MOG[ comb], threshold_3_dist_correl_MOG[comb] True_positive_Rate_dist_correl_pro_MOG, False_positive_Rate_dist_correl_pro_MOG, precision_dist_correl_pro_MOG, index_100_dist_correl_pro_MOG, index_200_dist_correl_pro_MOG, index_300_dist_correl_pro_MOG, threshold_1_dist_correl_MOG_, threshold_2_dist_correl_MOG_, threshold_3_dist_correl_MOG_ = calculate_single_ROC_best_True_sensitivity( posterior_correl_dist_true_MOG, posterior_correl_dist_false_MOG, length_of_positives_pro, length_of_negatives_pro, percent_1, percent_2, percent_3, thresh=thresh, give_indexes_for_thresholds=give_indexes_for_thresholds ) if "MOG_dist" in type_of_models: if give_indexes_for_thresholds: percent_1, percent_2, percent_3 = threshold_1_dist_MOG[ comb], threshold_2_dist_MOG[ comb], threshold_3_dist_MOG[comb] True_positive_Rate_dist_pro_MOG, False_positive_Rate_dist_pro_MOG, precision_dist_pro_MOG, index_100_dist_pro_MOG, index_200_dist_pro_MOG, index_300_dist_pro_MOG, threshold_1_dist_MOG_, threshold_2_dist_MOG_, threshold_3_dist_MOG_ = calculate_single_ROC_best_True_sensitivity( posterior_dist_true_MOG, posterior_dist_false_MOG, length_of_positives_pro, length_of_negatives_pro, percent_1, percent_2, percent_3, thresh=thresh, give_indexes_for_thresholds=give_indexes_for_thresholds ) if thresh: if "correl_dist" in type_of_models: threshold_1_dist_correl[comb], threshold_2_dist_correl[ comb], threshold_3_dist_correl[ comb] = threshold_1_dist_correl_, threshold_2_dist_correl_, threshold_3_dist_correl_ if "dist" in type_of_models: threshold_1_dist[comb], threshold_2_dist[ comb], threshold_3_dist[ comb] = threshold_1_dist_, threshold_2_dist_, threshold_3_dist_ if "correl" in type_of_models: threshold_1_correl[comb], threshold_2_correl[ comb], threshold_3_correl[ comb] = threshold_1_correl_, threshold_2_correl_, threshold_3_correl_ if "MOG_correl_dist" in type_of_models: threshold_1_dist_correl_MOG[ comb], threshold_2_dist_correl_MOG[ comb], threshold_3_dist_correl_MOG[ comb] = threshold_1_dist_correl_MOG_, threshold_2_dist_correl_MOG_, threshold_3_dist_correl_MOG_ if "MOG_dist" in type_of_models: threshold_1_dist_MOG[comb], threshold_2_dist_MOG[ comb], threshold_3_dist_MOG[ comb] = threshold_1_dist_MOG_, threshold_2_dist_MOG_, threshold_3_dist_MOG_ centres_of_ticks = np.arange(4) + 0.5 ind = centres_of_ticks OX = [0.1, "0.2\n TPR ", 0.3, "\nMAP"] #ax[index_opt].set_title(comb, fontsize = size_of_combination_name) ax[index_opt].vlines(3., 0, 1, colors=u'SlateGray', linestyles=u'dashed') ax[index_opt].set_xlim([0., 4.]) ax[index_opt].set_ylim([0., 1.]) ax[index_opt].set_xticks(centres_of_ticks) ax[index_opt].set_xticklabels(np.array(OX, str)) ax[index_opt].set_title(comb, fontsize=size_of_combination_name) if "correl_dist" in type_of_models: probabilities_dist_correl = np.r_[ precision_dist_correl_pro[[ index_100_dist_correl_pro, index_200_dist_correl_pro, index_300_dist_correl_pro ]], sensitivity_dist_correl] n = np.r_[ length_of_positives_pro * True_positive_Rate_dist_correl_pro[[ index_100_dist_correl_pro, index_200_dist_correl_pro, index_300_dist_correl_pro ]] + False_positive_Rate_dist_correl_pro[[ index_100_dist_correl_pro, index_200_dist_correl_pro, index_300_dist_correl_pro ]] * length_of_negatives_pro, number_of_interacting_enhancers_] yerr = (probabilities_dist_correl * (1 - probabilities_dist_correl) / n)**0.5 ax[index_opt].errorbar( ind, probabilities_dist_correl, yerr=yerr, fmt='^', color="b", alpha=0.5, linewidth=3., markersize=red_blue_yellow_cyan_marker_size) ax[index_opt].plot( ind, probabilities_dist_correl, alpha=1.0, color="b", marker="^", linewidth=0.0, markersize=red_blue_yellow_cyan_marker_size, label="data+distance") if "dist" in type_of_models: probabilities_dist = np.r_[precision_dist_pro[[ index_100_dist_pro, index_200_dist_pro, index_300_dist_pro ]], sensitivity_dist] #n = np.r_[np.array([True_positive_Rate_dist_pro[index_100_dist_pro], True_positive_Rate_dist_pro[index_200_dist_pro], True_positive_Rate_dist_pro[index_300_dist_pro]])*number_of_interacting_enhancers_, number_of_interacting_enhancers_] n = np.r_[length_of_positives_pro * True_positive_Rate_dist_pro[[ index_100_dist_pro, index_200_dist_pro, index_300_dist_pro ]] + False_positive_Rate_dist_pro[[ index_100_dist_pro, index_200_dist_pro, index_300_dist_pro ]] * length_of_negatives_pro, number_of_interacting_enhancers_] yerr = (probabilities_dist * (1 - probabilities_dist) / n)**0.5 ax[index_opt].errorbar( ind, probabilities_dist, yerr=yerr, fmt='s', color="darkviolet", alpha=0.5, linewidth=3., markersize=red_blue_yellow_cyan_marker_size) ax[index_opt].plot( ind, probabilities_dist, alpha=1.0, color="darkviolet", marker="s", linewidth=0.0, markersize=red_blue_yellow_cyan_marker_size, label="distance") if "correl" in type_of_models: probabilities_correl = np.r_[precision_correl_pro[[ index_100_correl_pro, index_200_correl_pro, index_300_correl_pro ]], sensitivity_correl] #n = np.r_[np.array([True_positive_Rate_correl_pro[index_100_correl_pro], True_positive_Rate_correl_pro[index_200_correl_pro], True_positive_Rate_correl_pro[index_300_correl_pro]])*number_of_interacting_enhancers_, number_of_interacting_enhancers_] n = np.r_[length_of_positives_pro * True_positive_Rate_correl_pro[[ index_100_correl_pro, index_200_correl_pro, index_300_correl_pro ]] + False_positive_Rate_correl_pro[[ index_100_correl_pro, index_200_correl_pro, index_300_correl_pro ]] * length_of_negatives_pro, number_of_interacting_enhancers_] yerr = (probabilities_correl * (1 - probabilities_correl) / n)**0.5 ax[index_opt].errorbar( ind, probabilities_correl, yerr=yerr, fmt='o', color="red", alpha=0.5, linewidth=3., markersize=red_blue_yellow_cyan_marker_size) ax[index_opt].plot( ind, probabilities_correl, alpha=1.0, color="red", marker="o", linewidth=0.0, markersize=red_blue_yellow_cyan_marker_size, label="data") #-------------------------MOG if "MOG_correl_dist" in type_of_models: probabilities_dist_correl_MOG = np.r_[ precision_dist_correl_pro_MOG[[ index_100_dist_correl_pro_MOG, index_200_dist_correl_pro_MOG, index_300_dist_correl_pro_MOG ]], sensitivity_dist_correl_MOG] #n = np.array([True_positive_Rate_dist_correl_pro_MOG[index_100_dist_correl_pro_MOG], True_positive_Rate_dist_correl_pro_MOG[index_200_dist_correl_pro_MOG], True_positive_Rate_dist_correl_pro_MOG[index_300_dist_correl_pro_MOG]])*number_of_interacting_enhancers_ n = np.r_[length_of_positives_pro * True_positive_Rate_dist_correl_pro_MOG[[ index_100_dist_correl_pro_MOG, index_200_dist_correl_pro_MOG, index_300_dist_correl_pro_MOG ]] + False_positive_Rate_dist_correl_pro_MOG[[ index_100_dist_correl_pro_MOG, index_200_dist_correl_pro_MOG, index_300_dist_correl_pro_MOG ]] * length_of_negatives_pro, number_of_interacting_enhancers_] yerr = (probabilities_dist_correl_MOG * (1 - probabilities_dist_correl_MOG) / n)**0.5 ax[index_opt].errorbar( ind, probabilities_dist_correl_MOG, yerr=yerr, fmt='*', color="cyan", alpha=0.5, linewidth=3., markersize=red_blue_yellow_cyan_marker_size) ax[index_opt].plot( ind, probabilities_dist_correl_MOG, alpha=1.0, color="cyan", marker="*", linewidth=0.0, markersize=red_blue_yellow_cyan_marker_size, label="LVM data+prior") if "MOG_dist" in type_of_models: probabilities_dist_MOG = np.r_[precision_dist_pro_MOG[[ index_100_dist_pro_MOG, index_200_dist_pro_MOG, index_300_dist_pro_MOG ]], sensitivity_dist_MOG] #n = np.array([True_positive_Rate_dist_pro_MOG[index_100_dist_pro_MOG], True_positive_Rate_dist_pro_MOG[index_200_dist_pro_MOG], True_positive_Rate_dist_pro_MOG[index_300_dist_pro_MOG]])*number_of_interacting_enhancers_ n = np.r_[ length_of_positives_pro * True_positive_Rate_dist_pro_MOG[[ index_100_dist_pro_MOG, index_200_dist_pro_MOG, index_300_dist_pro_MOG ]] + False_positive_Rate_dist_pro_MOG[[ index_100_dist_pro_MOG, index_200_dist_pro_MOG, index_300_dist_pro_MOG ]] * length_of_negatives_pro, number_of_interacting_enhancers_] yerr = (probabilities_dist_MOG * (1 - probabilities_dist_MOG) / n)**0.5 ax[index_opt].errorbar( ind, probabilities_dist_MOG, yerr=yerr, fmt="v", color="green", alpha=0.5, linewidth=3., markersize=red_blue_yellow_cyan_marker_size) ax[index_opt].plot( ind, probabilities_dist_MOG, alpha=1.0, color="green", marker="v", linewidth=0.0, markersize=red_blue_yellow_cyan_marker_size, label="LVM prior") handles, labels = ax[0].get_legend_handles_labels() ax[0].legend(handles, labels, fontsize=legend_box_names_font_size, numpoints=1, handletextpad=0.0, borderpad=0.2, labelspacing=0.2) #[index_opt].plot(ind, np.r_[precision_correl_pro[[index_100_correl_pro, index_200_correl_pro, index_300_correl_pro]], sensitivity_match_MAP["correl"][",".join(np.array(option_, str))]], alpha=1.0, color="red", marker= "o", linewidth=0.0) pdf.savefig() if config_variables.FDR_mode: output.close() pdf.close() #plt.show() plt.close("all")