def correlation_calculator(data_set_name):

        name_of_pro_t_s = link_data_set_name_to_file_name["promoters"][
            data_set_name]
        name_of_enh_t_s = link_data_set_name_to_file_name["enhancers"][
            data_set_name]

        pro_time_series = dataset_time_series_dict[name_of_pro_t_s][2]
        enh_time_series = dataset_time_series_dict[name_of_enh_t_s][2]

        print 'correlator: start'
        pro_ts = pro_time_series[indexes_p]
        enh_ts = enh_time_series[indexes_e]
        chrom_correlations_matrix = calculate_correlations(pro_ts, enh_ts)
        print 'correlator: stop'

        if "promoter_enhancer_interactions" in mode:

            if config_variables.disentagled_features_validation:
                chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[
                    chrom]
            else:
                chr_interactions_pro_enh = chr_interactions_dict_pro_enh[chrom]

            pro_enh_indexes = un_string(chr_interactions_pro_enh[:, :2])

            correl_from_enhancer_to_promoters = chrom_correlations_matrix[
                len(indexes_p):len(indexes_p) + len(indexes_e),
                0:len(indexes_p)]
            correl_of_true_pro_enh_inter = correl_from_enhancer_to_promoters[
                pro_enh_indexes[:, 1] - total_e,
                pro_enh_indexes[:, 0] - total_p]
            correl_of_false_pro_enh_inter = correl_from_enhancer_to_promoters[
                negative_of_type_of_interactions[:, 1],
                negative_of_type_of_interactions[:, 0]]

            print 'nan_correl', (
                np.isnan(correl_of_false_pro_enh_inter)).any(), (
                    np.isnan(correl_of_true_pro_enh_inter)).any()

            return correl_from_enhancer_to_promoters, correl_of_true_pro_enh_inter, correl_of_false_pro_enh_inter

        if "enhancer_enhancer_interactions" in mode:
            chr_interactions_enh_enh = chr_interactions_dict_enh_enh[chrom]
            enh_enh_indexes = un_string(chr_interactions_enh_enh[:, :2])

            correl_from_enhancer_to_enhancers = chrom_correlations_matrix[
                len(indexes_p):len(indexes_p) + len(indexes_e),
                len(indexes_p):len(indexes_p) + len(indexes_e)]
            correl_of_true_enh_enh_inter = correl_from_enhancer_to_enhancers[
                enh_enh_indexes[:, 0] - total_e,
                enh_enh_indexes[:, 1] - total_e]
            correl_of_false_enh_enh_inter = correl_from_enhancer_to_enhancers[
                mask_feature_uniqueness_enh_enh]

            print 'nan_correl', (
                np.isnan(correl_of_false_enh_enh_inter)).any(), (
                    np.isnan(correl_of_true_enh_enh_inter)).any()

            return correl_from_enhancer_to_enhancers, correl_of_true_enh_enh_inter, correl_of_false_enh_enh_inter
	def promoter_enhancer_interactions_generator():

	
		chr_interactions_pro_enh = chr_interactions_dict_pro_enh[chrom]

		if config_variables.alternative_classificator_outside_enhancers: chrom_interacting_enhancers_pro = config_variables.chrom_interacting_enhancers_pro[chrom]
		else: chrom_interacting_enhancers_pro = np.unique(un_string(chr_interactions_dict_pro_enh[chrom])[:,1])
		chrom_interacting_promoters_pro = np.unique(un_string(chr_interactions_dict_pro_enh[chrom])[:,0])

		interaction_matrix = np.zeros((length_chr, length_chr), bool)
	
		interaction_matrix[range(length_chr), range(length_chr)] = True	# gets rid of diagonal
		interaction_matrix[np.tril_indices(length_chr)] = True # gets rid of symmetric interactions
	
		interaction_matrix[0:len(indexes_p), 0:len(indexes_p)] = True # gets rid of promoter_promoter_interactions

		features = np.array(['p{0}'.format(ind) for ind in indexes_p] + ['e{0}'.format(ind) for ind in indexes_e]) # creates a frame with chromosome specific interactions

		true_pro_enh_indexes = un_string(chr_interactions_pro_enh)

		print 'number of pro_enh true interactions: ', len(chr_interactions_pro_enh)

		if len(chrom_pro_not_survived): interaction_matrix[chrom_pro_not_survived - total_p, :] = True # gets rid of negative interactions which could be generated by filtered promoters

		if interacting_negatives:
			mask_interacting_promoters = np.zeros(length_chr).astype(bool)# we don't have to filter out enhancers which didn't pass the filter thresold. Since we consider only the interacting enhancers that's a subset of survived enhnacers.
			mask_interacting_promoters[chrom_interacting_promoters_pro - total_p] = True
			mask_non_interacting_promoters = np.invert(mask_interacting_promoters)	
			interaction_matrix[mask_non_interacting_promoters, len(indexes_p):] = True # it's equivalent to interacting_enhancers_mask_invert

		#if config_variables.disentagled_features_validation: 
			#true_pro_enh_indexes = un_string(config_variables.chr_interactions_dict_pro_enh_TSS[chrom])
			#chrom_interacting_enhancers_pro = np.unique(true_pro_enh_indexes[:, 1])

		mask_interacting_enhancers = np.zeros(length_chr).astype(bool)# we don't have to filter out enhancers which didn't pass the filter thresold. Since we consider only the interacting enhancers that's a subset of survived enhnacers.
		mask_interacting_enhancers[chrom_interacting_enhancers_pro - total_e + len(indexes_p)] = True
		mask_non_interacting_enhancers = np.invert(mask_interacting_enhancers)	
		#interaction_matrix[:len(indexes_p), mask_non_interacting_enhancers] = True # it's equivalent to interacting_enhancers_mask_invert

		if interacting_enhancers_only or prior_mode: interaction_matrix[:len(indexes_p), mask_non_interacting_enhancers] = True # it's equivalent to interacting_enhancers_mask_invert
		elif len(chrom_enh_not_survived): interaction_matrix[:len(indexes_p), len(indexes_p) + chrom_enh_not_survived - total_e] = True # gets rid of filtered out enhancers which could be causing nans due to their correlations 
		if distant_enh_only and len(dict_chrom_proximal[chrom]): interaction_matrix[:len(indexes_p), len(indexes_p) + dict_chrom_proximal[chrom] - total_e] = True
	
		interaction_matrix[true_pro_enh_indexes[:, 0] - total_p, true_pro_enh_indexes[:, 1] - total_e + len(indexes_p)] = True

		interaction_matrix[len(indexes_p): len(indexes_p) + len(indexes_e), len(indexes_p): len(indexes_p) + len(indexes_e)] = True # gets rid of enhancers-enhancer block

		
		indexes_of_zero_interactions = np.where(True == np.invert(interaction_matrix)*domain_matrix)
		column_1st = indexes_of_zero_interactions[0]
		column_2nd = indexes_of_zero_interactions[1] - len(indexes_p)

		prom_enh_false_interactions = np.concatenate((column_1st[:,None], column_2nd[:,None]), axis=1)
		#pro-enh interactions end-----------------------------------------------------------------
		return prom_enh_false_interactions
	def enhancer_enhancer_interactions_generator():

		chr_interactions_enh_enh = chr_interactions_dict_enh_enh[chrom]
		chrom_interacting_enhancers_enh = np.unique(un_string(chr_interactions_dict_enh_enh[chrom])[:,0])
		chrom_interacting_enhancers_enh = np.unique(np.r_[chrom_interacting_enhancers_enh, np.unique(un_string(chr_interactions_dict_enh_enh[chrom])[:,1])])

		interaction_matrix = np.zeros((length_chr, length_chr), bool)
		interaction_matrix[range(length_chr), range(length_chr)] = True	# gets rid of diagonal
		interaction_matrix[0:len(indexes_p), 0:len(indexes_p)] = True # gets rid of promoter_promoter_interactions
		interaction_matrix[:len(indexes_p), len(indexes_p): len(indexes_p) + len(indexes_e)] = True # gets rid of promoter-enhancer block

		print 'number of enh_enh true interactions: ', len(chr_interactions_enh_enh)
		#enh-enh interactions start-----------------------------------------------------------------

	
		if len(chrom_enh_not_survived): interaction_matrix[len(indexes_p) + chrom_enh_not_survived - total_e, :] = True # sorts out raws
		if distant_enh_only and len(dict_chrom_proximal[chrom]): interaction_matrix[len(indexes_p) + dict_chrom_proximal[chrom] - total_e, :] = True

		if interacting_negatives:
		
			mask_interacting_enhancers = np.zeros(length_chr).astype(bool)
			mask_interacting_enhancers[chrom_interacting_enhancers_enh - total_e + len(indexes_p)] = True
			mask_non_interacting_enhancers = np.invert(mask_interacting_enhancers)	
			interaction_matrix[mask_non_interacting_enhancers, len(indexes_p):] = True


		#sort out columns--------------------------------------
		mask_interacting_enhancers = np.zeros(length_chr).astype(bool)
		mask_interacting_enhancers[chrom_interacting_enhancers_enh - total_e + len(indexes_p)] = True
		mask_non_interacting_enhancers = np.invert(mask_interacting_enhancers)	

		if interacting_enhancers_only or prior_mode: interaction_matrix[len(indexes_p):, mask_non_interacting_enhancers] = True # it's equivalent to interacting_enhancers_mask_invert
		elif len(chrom_enh_not_survived): interaction_matrix[len(indexes_p):, len(indexes_p) + chrom_enh_not_survived - total_e] = True # gets rid of filtered out enhancers which could be causing nans due to their correlations 
		if distant_enh_only and len(dict_chrom_proximal[chrom]): interaction_matrix[len(indexes_p):, len(indexes_p) + dict_chrom_proximal[chrom] - total_e] = True

			
		#sort out columns--------------------------------------end

		true_enh_enh_indexes = un_string(chr_interactions_enh_enh)
		interaction_matrix[true_enh_enh_indexes[:,0] - total_e + len(indexes_p), true_enh_enh_indexes[:,1] - total_e + len(indexes_p)] = True
		interaction_matrix[true_enh_enh_indexes[:,1] - total_e + len(indexes_p), true_enh_enh_indexes[:,0] - total_e + len(indexes_p)] = True
		interaction_matrix[np.tril_indices(length_chr)] = True # gets rid of symmetric interactions
	
		indexes_of_zero_interactions = np.where(True == np.invert(interaction_matrix)*domain_matrix)
		column_1st = indexes_of_zero_interactions[0] - len(indexes_p)
		column_2nd = indexes_of_zero_interactions[1] - len(indexes_p)

		enh_enh_false_interactions = np.concatenate((column_1st[:, None], column_2nd[:, None]), axis=1)
				

		return enh_enh_false_interactions
	def inter_enhancer(chrom):

		pro_chroms, pro_coords, pro_time_series = dataset_time_series_dict[link_data_set_name_to_file_name["promoters"]["ER"]]
		enh_chroms, enh_coords, enh_time_series = dataset_time_series_dict[link_data_set_name_to_file_name["enhancers"]["ER"]]

		filtered_enhancers = config_variables.filtered_enhancers
		proximal_enhancers_mask = config_variables.proximal_enhancers_mask

		chrom_enh_survived = np.where((enh_chroms == chrom)*np.invert(proximal_enhancers_mask)*filtered_enhancers)[0]

		negative_interactions = config_variables.negative_interactions
		from  prepare_interactions_clean import un_string

		indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:]

		if config_variables.disentagled_features_validation: 
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
		else:
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

		true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

		i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]

		interacting_enhancers_ = np.unique(j_s_t)-total_e

		enhancer_array_survived = np.zeros(len(indexes_e), bool)
		enhancer_array_interacting = np.zeros(len(indexes_e), bool)

		enhancer_array_survived[chrom_enh_survived-total_e] = True
		enhancer_array_interacting[interacting_enhancers_] = True

		mask_interacting_c = np.in1d(np.where(enhancer_array_survived)[0], np.where(enhancer_array_interacting)[0])

		return np.where(mask_interacting_c)[0]
Ejemplo n.º 5
0
	def get_MAPS_for_domain_non_domain_interacting_enhancers(domain_atr):

		chrom_interacting_enhancers_pro = config_variables.chrom_interacting_enhancers_pro

		from  prepare_interactions_clean import un_string
		interacting_non_intracting_mask = {}

		#total_number_of_interacting_enhancers = 0

		if config_variables.disentagled_features_validation: 
			chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS
		else:
			chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh

		for chrom___ in chroms_to_infer:

			mask = np.in1d(np.unique(un_string(chr_interactions_dict_pro_enh[chrom___])[:,1]), chrom_interacting_enhancers_pro[chrom___])
			if domain_atr == "outside_domain":
				interacting_non_intracting_mask[chrom___] = np.invert(mask)
			elif domain_atr == "within_domain":	
				interacting_non_intracting_mask[chrom___] = mask

			#total_number_of_interacting_enhancers += np.sum(interacting_non_intracting_mask[chrom___])

		return interacting_non_intracting_mask
Ejemplo n.º 6
0
		def get_MAPS_for_domain_non_domain_interacting_enhancers(domain_atr):

			chrom_interacting_enhancers_pro = config_variables.chrom_interacting_enhancers_pro

			from  prepare_interactions_clean import un_string
			interacting_non_intracting_mask = {}

			total_number_of_interacting_enhancers = 0
			for chrom___ in chroms_to_infer:

				mask = np.in1d(np.unique(un_string(config_variables.chr_interactions_dict_pro_enh[chrom___])[:,1]), chrom_interacting_enhancers_pro[chrom___])
				if domain_atr == "outside_domain":
					interacting_non_intracting_mask[chrom___] = np.invert(mask)
				elif domain_atr == "within_domain":	
					interacting_non_intracting_mask[chrom___] = mask

				total_number_of_interacting_enhancers += np.sum(interacting_non_intracting_mask[chrom___])
			

			match_MAP_correl_dist_total = list(itertools.chain.from_iterable([match_MAP_correl_dist[chrom_][interacting_non_intracting_mask[chrom_]] for chrom_ in chroms_to_infer]))
			match_MAP_correl_total = list(itertools.chain.from_iterable([match_MAP_correl[chrom_][interacting_non_intracting_mask[chrom_]] for chrom_ in chroms_to_infer]))
			match_MAP_dist_total = list(itertools.chain.from_iterable([match_MAP_dist[chrom_][interacting_non_intracting_mask[chrom_]] for chrom_ in chroms_to_infer]))

			sensitivity_match_MAP_correl_dist = np.sum(match_MAP_correl_dist_total)/float(len(match_MAP_correl_dist_total))
			sensitivity_match_MAP_correl = np.sum(match_MAP_correl_total)/float(len(match_MAP_correl_total))
			sensitivity_match_MAP_dist = np.sum(match_MAP_dist_total)/float(len(match_MAP_dist_total))

			return sensitivity_match_MAP_correl_dist, sensitivity_match_MAP_correl, sensitivity_match_MAP_dist, total_number_of_interacting_enhancers
Ejemplo n.º 7
0
    def get_MAPS_for_domain_non_domain_interacting_enhancers(domain_atr):

        chrom_interacting_enhancers_pro = config_variables.chrom_interacting_enhancers_pro

        from prepare_interactions_clean import un_string
        interacting_non_intracting_mask = {}

        #total_number_of_interacting_enhancers = 0

        if config_variables.disentagled_features_validation:
            chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS
        else:
            chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh

        for chrom___ in chroms_to_infer:

            mask = np.in1d(
                np.unique(
                    un_string(chr_interactions_dict_pro_enh[chrom___])[:, 1]),
                chrom_interacting_enhancers_pro[chrom___])
            if domain_atr == "outside_domain":
                interacting_non_intracting_mask[chrom___] = np.invert(mask)
            elif domain_atr == "within_domain":
                interacting_non_intracting_mask[chrom___] = mask

            #total_number_of_interacting_enhancers += np.sum(interacting_non_intracting_mask[chrom___])

        return interacting_non_intracting_mask
Ejemplo n.º 8
0
	def inter_enhancer(self, chrom):

		negative_interactions = config_variables.negative_interactions
		from  prepare_interactions_clean import un_string

		indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:]

		if config_variables.disentagled_features_validation: 
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
		else:
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

		true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

		i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]

		interacting_enhancers_ = np.unique(j_s_t)-total_e

		enhancer_array_survived = np.zeros(len(indexes_e), bool)
		enhancer_array_interacting = np.zeros(len(indexes_e), bool)

		enhancer_array_survived[self.chrom_enh_survived-total_e] = True
		enhancer_array_interacting[interacting_enhancers_] = True

		mask_interacting_c = np.in1d(np.where(enhancer_array_survived)[0], np.where(enhancer_array_interacting)[0])

		return np.where(mask_interacting_c)[0]
Ejemplo n.º 9
0
def interactions_extractor(chrom):

	true_pro_enh_indexes = un_string(config_variables.chr_interactions_dict_pro_enh[chrom])
	prom_enh_false_interactions = chrom_specific_negative_interactions.chrom_specific_negative_interactions(chrom, mode)

	enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables(chrom)
	true_pro_enh_indexes[:,0] = true_pro_enh_indexes[:,0] - total_p
	true_pro_enh_indexes[:,1] = true_pro_enh_indexes[:,1] - total_e

	return true_pro_enh_indexes, prom_enh_false_interactions
	def correlation_calculator(data_set_name):

		name_of_pro_t_s = link_data_set_name_to_file_name["promoters"][data_set_name]
		name_of_enh_t_s = link_data_set_name_to_file_name["enhancers"][data_set_name]

		pro_time_series = dataset_time_series_dict[name_of_pro_t_s][2]
		enh_time_series = dataset_time_series_dict[name_of_enh_t_s][2]


		print 'correlator: start'
		pro_ts = pro_time_series[indexes_p]
		enh_ts = enh_time_series[indexes_e]
		chrom_correlations_matrix = calculate_correlations(pro_ts, enh_ts)
		print 'correlator: stop'

		if "promoter_enhancer_interactions" in mode:

			if config_variables.disentagled_features_validation: 
				chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
			else:
				chr_interactions_pro_enh = chr_interactions_dict_pro_enh[chrom]

			pro_enh_indexes = un_string(chr_interactions_pro_enh[:, :2])

			correl_from_enhancer_to_promoters = chrom_correlations_matrix[len(indexes_p):len(indexes_p) + len(indexes_e), 0: len(indexes_p)]
			correl_of_true_pro_enh_inter = correl_from_enhancer_to_promoters[pro_enh_indexes[:, 1] - total_e, pro_enh_indexes[:, 0] - total_p]
			correl_of_false_pro_enh_inter = correl_from_enhancer_to_promoters[negative_of_type_of_interactions[:, 1], negative_of_type_of_interactions[:, 0]]	

			print 'nan_correl', (np.isnan(correl_of_false_pro_enh_inter)).any(), (np.isnan(correl_of_true_pro_enh_inter)).any()

			return correl_from_enhancer_to_promoters, correl_of_true_pro_enh_inter, correl_of_false_pro_enh_inter

		if "enhancer_enhancer_interactions" in mode:
			chr_interactions_enh_enh = chr_interactions_dict_enh_enh[chrom]
			enh_enh_indexes = un_string(chr_interactions_enh_enh[:, :2])
	
			correl_from_enhancer_to_enhancers = chrom_correlations_matrix[len(indexes_p): len(indexes_p) + len(indexes_e), len(indexes_p): len(indexes_p) + len(indexes_e)]
			correl_of_true_enh_enh_inter = correl_from_enhancer_to_enhancers[enh_enh_indexes[:, 0] - total_e, enh_enh_indexes[:, 1] - total_e]
			correl_of_false_enh_enh_inter = correl_from_enhancer_to_enhancers[mask_feature_uniqueness_enh_enh]
		
			print 'nan_correl', (np.isnan(correl_of_false_enh_enh_inter)).any(), (np.isnan(correl_of_true_enh_enh_inter)).any()
	
			return correl_from_enhancer_to_enhancers, correl_of_true_enh_enh_inter, correl_of_false_enh_enh_inter
	def distance_calculator():

		point_coordinates_promoter, point_coordinates_enhancer = TSS_coordinates[indexes_p], np.mean(enh_coordinates[indexes_e], axis = 1)
		distances_matrix = calculate_distances(domain, point_coordinates_promoter, point_coordinates_enhancer)


		if "promoter_enhancer_interactions" in mode:

			if config_variables.disentagled_features_validation: 
				chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
			else:
				chr_interactions_pro_enh = chr_interactions_dict_pro_enh[chrom]

			pro_enh_indexes = un_string(chr_interactions_pro_enh[:, :2])
			dist_from_enhancer_to_promoters = distances_matrix[len(indexes_p):, 0: len(indexes_p)]
			
			#if domain:
			#	random_negative_interaction_negative_distances = negative_of_type_of_interactions[np.random.choice(len(negative_of_type_of_interactions), int(len(negative_of_type_of_interactions)/2.), replace = False)]
			#	dist_from_enhancer_to_promoters[random_negative_interaction_negative_distances[:,1], random_negative_interaction_negative_distances[:,0]] *= -1

			dist_of_true_pro_enh_inter = dist_from_enhancer_to_promoters[pro_enh_indexes[:, 1]  - total_e, pro_enh_indexes[:, 0]  - total_p]
			dist_of_false_pro_enh_inter = dist_from_enhancer_to_promoters[negative_of_type_of_interactions[:, 1], negative_of_type_of_interactions[:, 0]]
			
			return dist_from_enhancer_to_promoters, dist_of_true_pro_enh_inter, dist_of_false_pro_enh_inter


		if "enhancer_enhancer_interactions" in mode:
			chr_interactions_enh_enh = chr_interactions_dict_enh_enh[chrom]
			enh_enh_indexes = un_string(chr_interactions_enh_enh[:, :2])

			dist_from_enhancer_to_enhancers = distances_matrix[len(indexes_p):, len(indexes_p):]

			if domain:
				random_interaction_negative_distances = np.ones(len(indexes_e), bool)
				random_interaction_negative_distances = np.c_[random_interaction_negative_distances[0][:, None], random_interaction_negative_distances[1][:, None]]
				random_interaction_negative_distances = random_interaction_negative_distances[np.random.choice(len(random_interaction_negative_distances), int(len(random_interaction_negative_distances)/2.), replace = False)]
				dist_from_enhancer_to_enhancers[random_interaction_negative_distances[:, 0], random_interaction_negative_distances[:, 1]] *= -1
				dist_from_enhancer_to_enhancers[random_interaction_negative_distances[:, 1], random_interaction_negative_distances[:, 0]] *= -1

			dist_of_false_enh_enh_inter = dist_from_enhancer_to_enhancers[mask_feature_uniqueness_enh_enh]	
			dist_of_true_enh_enh_inter = dist_from_enhancer_to_enhancers[enh_enh_indexes[:, 0] - total_e, enh_enh_indexes[:, 1] - total_e]

			return dist_from_enhancer_to_enhancers, dist_of_true_enh_enh_inter, dist_of_false_enh_enh_inter
Ejemplo n.º 12
0
		def interacting_enhancers_coordinates_function():

			interacting_enhancers_coord = {}

			enh_coordinates = dataset_time_series_dict[link_data_set_name_to_file_name["enhancers"]['ER']][1]

			for chrom in  chroms_in_prior:		
				if mode == "promoter_enhancer_interactions":
					chrom_interacting_enhancers_pro = np.unique(un_string(chr_interactions_dict_pro_enh[chrom])[:,1])
					interacting_enhancers_coord[chrom] = enh_coordinates[chrom_interacting_enhancers_pro]
				#if config_variables.alternative_classificator_outside_enhancers: 
					#interacting_enhancers_coord[chrom] = enh_coordinates[config_variables.chrom_interacting_enhancers_pro[chrom]]
					
				elif mode == "enhancer_enhancer_interactions": 

					chrom_interacting_enhancers_enh = np.unique(un_string(chr_interactions_dict_enh_enh[chrom])[:,0])
					chrom_interacting_enhancers_enh = np.unique(np.r_[chrom_interacting_enhancers_enh, np.unique(un_string(chr_interactions_dict_enh_enh[chrom])[:,1])])
					interacting_enhancers_coord[chrom] = enh_coordinates[chrom_interacting_enhancers_enh]

			return interacting_enhancers_coord
def inter_enhancer(chrom):
	negative_interactions = config_variables.negative_interactions
	indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:]

	if config_variables.disentagled_features_validation: 
		chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
	else:
		chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

	true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

	i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]
	interacting_enhancers = np.unique(j_s_t)-total_e
	return interacting_enhancers
Ejemplo n.º 14
0
	def test_priors_funct(self, distances_per_sample):
		from prepare_interactions_clean import un_string
		chrom = self.chrom
		negative_interactions = config_variables.negative_interactions
		indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:]

		if config_variables.disentagled_features_validation: 
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
		else:
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

		true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

		i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]
		interacting_enhancers = np.unique(j_s_t)-total_e

		take_distances_array = np.zeros(len(indexes_e))
		take_distances_array[self.chrom_enh_survived-total_e] = distances_per_sample
		distaces_of_interacting_enhancers = take_distances_array[interacting_enhancers]
		return distaces_of_interacting_enhancers
def interactions_extractor(chrom):

    if config_variables.disentagled_features_validation:
        chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
    else:
        chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

    true_pro_enh_indexes = un_string(chr_interactions_pro_enh[:, :2])

    # true_pro_enh_indexes = un_string(config_variables.chr_interactions_dict_pro_enh[chrom])

    prom_enh_false_interactions = chrom_specific_negative_interactions.chrom_specific_negative_interactions(chrom, mode)

    enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = chrom_specific_negative_interactions.initialise_variables(
        chrom
    )
    true_pro_enh_indexes[:, 0] = true_pro_enh_indexes[:, 0] - total_p
    true_pro_enh_indexes[:, 1] = true_pro_enh_indexes[:, 1] - total_e

    return true_pro_enh_indexes, prom_enh_false_interactions
def inter_enhancer(chrom):

	negative_interactions = config_variables.negative_interactions
	from  prepare_interactions_clean import un_string

	indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:]

	if config_variables.disentagled_features_validation: 
		chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
	else:
		chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

	true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

	i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]

	interacting_enhancers_ = np.unique(j_s_t)

	non_interacting_enhancers_ = np.where(np.invert(np.in1d(indexes_e, j_s_t)))[0] + total_e

	return interacting_enhancers_, non_interacting_enhancers_
    def distance_calculator():

        point_coordinates_promoter, point_coordinates_enhancer = TSS_coordinates[
            indexes_p], np.mean(enh_coordinates[indexes_e], axis=1)
        distances_matrix = calculate_distances(domain,
                                               point_coordinates_promoter,
                                               point_coordinates_enhancer)

        if "promoter_enhancer_interactions" in mode:

            if config_variables.disentagled_features_validation:
                chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[
                    chrom]
            else:
                chr_interactions_pro_enh = chr_interactions_dict_pro_enh[chrom]

            pro_enh_indexes = un_string(chr_interactions_pro_enh[:, :2])
            dist_from_enhancer_to_promoters = distances_matrix[
                len(indexes_p):, 0:len(indexes_p)]

            #if domain:
            #	random_negative_interaction_negative_distances = negative_of_type_of_interactions[np.random.choice(len(negative_of_type_of_interactions), int(len(negative_of_type_of_interactions)/2.), replace = False)]
            #	dist_from_enhancer_to_promoters[random_negative_interaction_negative_distances[:,1], random_negative_interaction_negative_distances[:,0]] *= -1

            dist_of_true_pro_enh_inter = dist_from_enhancer_to_promoters[
                pro_enh_indexes[:, 1] - total_e,
                pro_enh_indexes[:, 0] - total_p]
            dist_of_false_pro_enh_inter = dist_from_enhancer_to_promoters[
                negative_of_type_of_interactions[:, 1],
                negative_of_type_of_interactions[:, 0]]

            return dist_from_enhancer_to_promoters, dist_of_true_pro_enh_inter, dist_of_false_pro_enh_inter

        if "enhancer_enhancer_interactions" in mode:
            chr_interactions_enh_enh = chr_interactions_dict_enh_enh[chrom]
            enh_enh_indexes = un_string(chr_interactions_enh_enh[:, :2])

            dist_from_enhancer_to_enhancers = distances_matrix[len(indexes_p):,
                                                               len(indexes_p):]

            if domain:
                random_interaction_negative_distances = np.ones(
                    len(indexes_e), bool)
                random_interaction_negative_distances = np.c_[
                    random_interaction_negative_distances[0][:, None],
                    random_interaction_negative_distances[1][:, None]]
                random_interaction_negative_distances = random_interaction_negative_distances[
                    np.random.choice(
                        len(random_interaction_negative_distances),
                        int(len(random_interaction_negative_distances) / 2.),
                        replace=False)]
                dist_from_enhancer_to_enhancers[
                    random_interaction_negative_distances[:, 0],
                    random_interaction_negative_distances[:, 1]] *= -1
                dist_from_enhancer_to_enhancers[
                    random_interaction_negative_distances[:, 1],
                    random_interaction_negative_distances[:, 0]] *= -1

            dist_of_false_enh_enh_inter = dist_from_enhancer_to_enhancers[
                mask_feature_uniqueness_enh_enh]
            dist_of_true_enh_enh_inter = dist_from_enhancer_to_enhancers[
                enh_enh_indexes[:, 0] - total_e,
                enh_enh_indexes[:, 1] - total_e]

            return dist_from_enhancer_to_enhancers, dist_of_true_enh_enh_inter, dist_of_false_enh_enh_inter
    def enhancer_enhancer_interactions_generator():

        chr_interactions_enh_enh = chr_interactions_dict_enh_enh[chrom]
        chrom_interacting_enhancers_enh = np.unique(
            un_string(chr_interactions_dict_enh_enh[chrom])[:, 0])
        chrom_interacting_enhancers_enh = np.unique(np.r_[
            chrom_interacting_enhancers_enh,
            np.unique(un_string(chr_interactions_dict_enh_enh[chrom])[:, 1])])

        interaction_matrix = np.zeros((length_chr, length_chr), bool)
        interaction_matrix[range(length_chr),
                           range(length_chr)] = True  # gets rid of diagonal
        interaction_matrix[0:len(indexes_p), 0:len(
            indexes_p)] = True  # gets rid of promoter_promoter_interactions
        interaction_matrix[:len(indexes_p),
                           len(indexes_p):len(indexes_p) +
                           len(indexes_e
                               )] = True  # gets rid of promoter-enhancer block

        print 'number of enh_enh true interactions: ', len(
            chr_interactions_enh_enh)
        #enh-enh interactions start-----------------------------------------------------------------

        if len(chrom_enh_not_survived):
            interaction_matrix[len(indexes_p) + chrom_enh_not_survived -
                               total_e, :] = True  # sorts out raws
        if distant_enh_only and len(dict_chrom_proximal[chrom]):
            interaction_matrix[len(indexes_p) + dict_chrom_proximal[chrom] -
                               total_e, :] = True

        if interacting_negatives:

            mask_interacting_enhancers = np.zeros(length_chr).astype(bool)
            mask_interacting_enhancers[chrom_interacting_enhancers_enh -
                                       total_e + len(indexes_p)] = True
            mask_non_interacting_enhancers = np.invert(
                mask_interacting_enhancers)
            interaction_matrix[mask_non_interacting_enhancers,
                               len(indexes_p):] = True

        #sort out columns--------------------------------------
        mask_interacting_enhancers = np.zeros(length_chr).astype(bool)
        mask_interacting_enhancers[chrom_interacting_enhancers_enh - total_e +
                                   len(indexes_p)] = True
        mask_non_interacting_enhancers = np.invert(mask_interacting_enhancers)

        if interacting_enhancers_only or prior_mode:
            interaction_matrix[
                len(indexes_p):,
                mask_non_interacting_enhancers] = True  # it's equivalent to interacting_enhancers_mask_invert
        elif len(chrom_enh_not_survived):
            interaction_matrix[
                len(indexes_p):,
                len(indexes_p) + chrom_enh_not_survived -
                total_e] = True  # gets rid of filtered out enhancers which could be causing nans due to their correlations
        if distant_enh_only and len(dict_chrom_proximal[chrom]):
            interaction_matrix[len(indexes_p):,
                               len(indexes_p) + dict_chrom_proximal[chrom] -
                               total_e] = True

        #sort out columns--------------------------------------end

        true_enh_enh_indexes = un_string(chr_interactions_enh_enh)
        interaction_matrix[true_enh_enh_indexes[:, 0] - total_e +
                           len(indexes_p), true_enh_enh_indexes[:, 1] -
                           total_e + len(indexes_p)] = True
        interaction_matrix[true_enh_enh_indexes[:, 1] - total_e +
                           len(indexes_p), true_enh_enh_indexes[:, 0] -
                           total_e + len(indexes_p)] = True
        interaction_matrix[np.tril_indices(
            length_chr)] = True  # gets rid of symmetric interactions

        indexes_of_zero_interactions = np.where(
            True == np.invert(interaction_matrix) * domain_matrix)
        column_1st = indexes_of_zero_interactions[0] - len(indexes_p)
        column_2nd = indexes_of_zero_interactions[1] - len(indexes_p)

        enh_enh_false_interactions = np.concatenate(
            (column_1st[:, None], column_2nd[:, None]), axis=1)

        return enh_enh_false_interactions
    def promoter_enhancer_interactions_generator():

        chr_interactions_pro_enh = chr_interactions_dict_pro_enh[chrom]

        if config_variables.alternative_classificator_outside_enhancers:
            chrom_interacting_enhancers_pro = config_variables.chrom_interacting_enhancers_pro[
                chrom]
        else:
            chrom_interacting_enhancers_pro = np.unique(
                un_string(chr_interactions_dict_pro_enh[chrom])[:, 1])
        chrom_interacting_promoters_pro = np.unique(
            un_string(chr_interactions_dict_pro_enh[chrom])[:, 0])

        interaction_matrix = np.zeros((length_chr, length_chr), bool)

        interaction_matrix[range(length_chr),
                           range(length_chr)] = True  # gets rid of diagonal
        interaction_matrix[np.tril_indices(
            length_chr)] = True  # gets rid of symmetric interactions

        interaction_matrix[0:len(indexes_p), 0:len(
            indexes_p)] = True  # gets rid of promoter_promoter_interactions

        features = np.array(
            ['p{0}'.format(ind) for ind in indexes_p] +
            ['e{0}'.format(ind) for ind in indexes_e
             ])  # creates a frame with chromosome specific interactions

        true_pro_enh_indexes = un_string(chr_interactions_pro_enh)

        print 'number of pro_enh true interactions: ', len(
            chr_interactions_pro_enh)

        if len(chrom_pro_not_survived):
            interaction_matrix[
                chrom_pro_not_survived -
                total_p, :] = True  # gets rid of negative interactions which could be generated by filtered promoters

        if interacting_negatives:
            mask_interacting_promoters = np.zeros(length_chr).astype(
                bool
            )  # we don't have to filter out enhancers which didn't pass the filter thresold. Since we consider only the interacting enhancers that's a subset of survived enhnacers.
            mask_interacting_promoters[chrom_interacting_promoters_pro -
                                       total_p] = True
            mask_non_interacting_promoters = np.invert(
                mask_interacting_promoters)
            interaction_matrix[
                mask_non_interacting_promoters,
                len(
                    indexes_p
                ):] = True  # it's equivalent to interacting_enhancers_mask_invert

        #if config_variables.disentagled_features_validation:
        #true_pro_enh_indexes = un_string(config_variables.chr_interactions_dict_pro_enh_TSS[chrom])
        #chrom_interacting_enhancers_pro = np.unique(true_pro_enh_indexes[:, 1])

        mask_interacting_enhancers = np.zeros(length_chr).astype(
            bool
        )  # we don't have to filter out enhancers which didn't pass the filter thresold. Since we consider only the interacting enhancers that's a subset of survived enhnacers.
        mask_interacting_enhancers[chrom_interacting_enhancers_pro - total_e +
                                   len(indexes_p)] = True
        mask_non_interacting_enhancers = np.invert(mask_interacting_enhancers)
        #interaction_matrix[:len(indexes_p), mask_non_interacting_enhancers] = True # it's equivalent to interacting_enhancers_mask_invert

        if interacting_enhancers_only or prior_mode:
            interaction_matrix[:len(
                indexes_p
            ), mask_non_interacting_enhancers] = True  # it's equivalent to interacting_enhancers_mask_invert
        elif len(chrom_enh_not_survived):
            interaction_matrix[:len(indexes_p),
                               len(indexes_p) + chrom_enh_not_survived -
                               total_e] = True  # gets rid of filtered out enhancers which could be causing nans due to their correlations
        if distant_enh_only and len(dict_chrom_proximal[chrom]):
            interaction_matrix[:len(indexes_p),
                               len(indexes_p) + dict_chrom_proximal[chrom] -
                               total_e] = True

        interaction_matrix[true_pro_enh_indexes[:, 0] - total_p,
                           true_pro_enh_indexes[:, 1] - total_e +
                           len(indexes_p)] = True

        interaction_matrix[
            len(indexes_p):len(indexes_p) + len(indexes_e),
            len(indexes_p):len(indexes_p) +
            len(indexes_e)] = True  # gets rid of enhancers-enhancer block

        indexes_of_zero_interactions = np.where(
            True == np.invert(interaction_matrix) * domain_matrix)
        column_1st = indexes_of_zero_interactions[0]
        column_2nd = indexes_of_zero_interactions[1] - len(indexes_p)

        prom_enh_false_interactions = np.concatenate(
            (column_1st[:, None], column_2nd[:, None]), axis=1)
        #pro-enh interactions end-----------------------------------------------------------------
        return prom_enh_false_interactions
Ejemplo n.º 20
0
def MAP(posterior_t, posterior_f, chrom, i_s_f, j_s_f):

	indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:]
		
	length_chr = len(indexes_p) + len(indexes_e)
	interaction_matrix = np.zeros((length_chr, length_chr))
	posterior_t, posterior_f = posterior_t[chrom], posterior_f[chrom]


	if mode == "promoter_enhancer_interactions":

		if config_variables.disentagled_features_validation: 
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
		else:
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

		true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

		i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]


		interaction_matrix[:,:] = np.min([np.min(posterior_t), np.min(posterior_f)])*0.99
		interaction_matrix[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = posterior_t
		interaction_matrix[i_s_f - total_p, j_s_f + len(indexes_p) - total_e] = posterior_f

		MAP_indexes = np.argmax(interaction_matrix, axis = 0)

		if config_variables.alternative_classificator_outside_enhancers: 
			chrom_interacting_enhancers_pro = config_variables.chrom_interacting_enhancers_pro[chrom]
			max_poster_is_pro = MAP_indexes[len(indexes_p) + chrom_interacting_enhancers_pro - total_e] + total_p	
			MAP_predicted_intereactions_pro = np.column_stack((max_poster_is_pro, chrom_interacting_enhancers_pro))

		else: 
			max_poster_is_pro = MAP_indexes[len(indexes_p) + np.unique(j_s_t) - total_e] + total_p # gives a maximum aposteriori promoter to each infered enhancer
			MAP_predicted_intereactions_pro = np.column_stack((max_poster_is_pro, np.unique(j_s_t)))

		infered_promoters_pro_enh = MAP_indexes[len(indexes_p):] + total_p
		MAP_probabilites_pro_enh = interaction_matrix[MAP_indexes, range(len(indexes_p) + len(indexes_e))][len(indexes_p):]

		probabilities_for_promoters_of_interacting_enhancers = interaction_matrix[:len(indexes_p), len(indexes_p) + np.unique(j_s_t) - total_e]

		link_exists_pro = [ind for ind, el in enumerate(MAP_predicted_intereactions_pro.tolist()) if el in true_inter_pro.tolist()]
		mask_link_exists_pro = np.zeros(len(MAP_predicted_intereactions_pro), bool)
		mask_link_exists_pro[link_exists_pro] = True
	
		return mask_link_exists_pro, infered_promoters_pro_enh, MAP_probabilites_pro_enh, probabilities_for_promoters_of_interacting_enhancers

	if mode == "enhancer_enhancer_interactions":

		chr_interactions_dict_enh_enh = config_variables.chr_interactions_dict_enh_enh
		true_inter_enh = un_string(chr_interactions_dict_enh_enh[chrom][:, :2]).astype(int)
		i_s_t, j_s_t = true_inter_enh[:,0], true_inter_enh[:,1]

		interaction_matrix[:,:] = np.min([np.min(posterior_t), np.min(posterior_f)])*0.99
		interaction_matrix[i_s_t + len(indexes_p) - total_e, j_s_t + len(indexes_p) - total_e] = posterior_t
		interaction_matrix[i_s_f + len(indexes_p) - total_e, j_s_f + len(indexes_p) - total_e] = posterior_f
		interaction_matrix[j_s_t + len(indexes_p) - total_e, i_s_t + len(indexes_p) - total_e] = posterior_t # transpose to create a full matrix
		interaction_matrix[j_s_f + len(indexes_p) - total_e, i_s_f + len(indexes_p) - total_e] = posterior_f # transpose to create a full matrix

		MAP_indexes = np.argmax(interaction_matrix, axis = 0)
		max_poster_is_enh = MAP_indexes[len(indexes_p) + np.unique(j_s_t) - total_e] - len(indexes_p) + total_e

		infered_enhancers_enh_enh = MAP_indexes[len(indexes_p):] + total_e 

		MAP_probabilites_enh_enh = interaction_matrix[MAP_indexes, range(len(indexes_p) + len(indexes_e))][len(indexes_p):]

		MAP_predicted_intereactions_enh = np.column_stack((max_poster_is_enh, np.unique(j_s_t)))

		link_exists_enh = [ind for ind, el in enumerate(MAP_predicted_intereactions_enh.tolist()) if el in true_inter_enh.tolist()]
		mask_link_exists_enh = np.zeros(len(MAP_predicted_intereactions_enh), bool)
		mask_link_exists_enh[link_exists_enh] = True

		return mask_link_exists_enh, infered_enhancers_enh_enh, MAP_probabilites_enh_enh
    def interactions_above_threshold(posterior_t, posterior_f, threshold_up, threshold_low, chrom, label, domain=False):
        enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(
            chrom
        )

        length_chr = len(indexes_p) + len(indexes_e)
        probability_matrix = np.zeros((length_chr, length_chr))
        posterior_t_chrom, posterior_f_chrom = posterior_t[chrom], posterior_f[chrom]

        i_s_f, j_s_f = positive_negative_interactions_for_MAP(chrom)

        if domain:
            if config_variables.TSS_or_intra_genic_for_domain_filter == "Intra_genic":
                coords_pro_domain = pro_coordinates[indexes_p]
            elif config_variables.TSS_or_intra_genic_for_domain_filter == "TSS_only":
                coords_pro_domain = np.column_stack((TSS_coordinates[indexes_p] - 1, TSS_coordinates[indexes_p] + 1))
            domain_matrix = interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, "left", True
            )
            domain_matrix = domain_matrix + interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, "right", True
            )
        else:
            domain_matrix = True

        if cluster_specific:

            if config_variables.distant_enh_only:
                survived_mask = np.zeros(len(indexes_e))
                survived_mask[dict_chrom_enh_survived[chrom]] = True
                distance_mask = np.zeros(len(indexes_e))
                distance_mask[dict_chrom_distant[chrom]] = True
                survived = np.where(survived_mask * distance_mask)[0]
            else:
                survived = dict_chrom_enh_survived[chrom]

            clustering_matrix = np.zeros_like(probability_matrix).astype(int)
            clustering_matrix[: len(indexes_p), len(indexes_p) + survived] = labels[survived]
            clustering_matrix += clustering_matrix.T
            clustering_matrix_mask = clustering_matrix == label

        else:
            # _matrix = np.zeros_like(probability_matrix).astype(int)
            clustering_matrix_mask = True

            # chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
            # true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)

        if config_variables.disentagled_features_validation:
            chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
        else:
            chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

            # chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
            # true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
        true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

        i_s_t, j_s_t = true_inter_pro[:, 0], true_inter_pro[:, 1]

        # probability_matrix[:,:] = np.min([np.min(posterior_t), np.min(posterior_f)])*0.999
        probability_matrix[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = posterior_t_chrom
        probability_matrix[i_s_f - total_p, j_s_f + len(indexes_p) - total_e] = posterior_f_chrom

        probability_matrix += probability_matrix.T

        # --------------------------------------------------------------------------the part to look at for Paolo !

        if normalised:
            norm_factors = np.sum(probability_matrix, axis=0)
            probability_matrix = probability_matrix / norm_factors
            probability_matrix[np.isnan(probability_matrix)] = 0.0

            # -------------------------------------------------------------------- that's where cluster_specifcic can play part

            # ---------------------- distal
        mask_for_counts = (
            clustering_matrix_mask * (probability_matrix >= threshold_low) * (probability_matrix < threshold_up)
        )  # * interacting_mask

        number_of_enhancer_target_promoters_within_a_cluster_distal = mask_for_counts.sum(0)[
            : len(indexes_p)
        ]  # affected by cluster labels

        filtered_probability_matrix = np.zeros_like(
            probability_matrix
        )  # events with probability one have got probability 0. ease the calculations below

        filtered_probability_matrix[mask_for_counts] = probability_matrix[mask_for_counts]

        # probability_of_enhancer_target_promoters_within_a_cluster = 1. - (1. - filtered_probability_matrix).prod(0)[:len(indexes_p)]  # affected by cluster labels
        probability_of_enhancer_target_promoters_within_a_cluster = (
            -1 * np.log(1.0 - filtered_probability_matrix).sum(0)[: len(indexes_p)]
        )  # affected by cluster labels
        # ---------------------- distal

        # ---------------------- proximal

        promoter_overlaps_enhancer = np.loadtxt(
            config_variables.promoter_overlaps_enhancer_file, usecols=(0, 4, 8), dtype=str
        )

        promoter_overlaps_enhancer_chrom = promoter_overlaps_enhancer[promoter_overlaps_enhancer[:, 0] == chrom]

        interacting_mask = np.zeros_like(probability_matrix).astype(bool)

        interacting_mask[
            promoter_overlaps_enhancer_chrom[:, 1].astype(int) - total_p,
            promoter_overlaps_enhancer_chrom[:, 2].astype(int) - total_e + len(indexes_p),
        ] = True
        interacting_mask += interacting_mask.T

        interacting_mask = interacting_mask * clustering_matrix_mask

        number_of_enhancer_target_promoters_within_a_cluster_proximal = interacting_mask.sum(0)[: len(indexes_p)]

        # ---------------------- distances
        TSS_coordinates = negative_interactions.extract_TSS_coordinates(config_variables.upstream)
        point_coordinates_promoter, point_coordinates_enhancer = (
            TSS_coordinates[indexes_p],
            np.mean(enh_coordinates[indexes_e], axis=1),
        )
        distances_matrix = negative_interactions.calculate_distances(
            domain, point_coordinates_promoter, point_coordinates_enhancer
        )

        # distances_of_proximal = distances_matrix[[promoter_overlaps_enhancer_chrom[:, 1].astype(int) - total_p, promoter_overlaps_enhancer_chrom[:, 2].astype(int) - total_e + len(indexes_p)]]

        interacting_mask = np.zeros_like(probability_matrix).astype(bool)
        interacting_mask[len(indexes_p) + dict_chrom_enh_survived[chrom] - total_e] = True
        # interacting_mask[config_variables.dict_chrom_pro_survived[chrom] - total_p] = False
        # interacting_mask[len(indexes_p) + config_variables.dict_chrom_proximal[chrom] - total_e] = True

        interacting_mask += interacting_mask.T

        # def dist_filter_(distances_matrix, low_lim, up_lim):

        # 	prox_distances_matrix = (distances_matrix <= up_lim) * (distances_matrix > low_lim) * interacting_mask
        # 	number_of_enhancer_target_promoters_within_a_cluster_proximal_within_distance_interval = prox_distances_matrix.sum(0)[:len(indexes_p)]
        # 	return number_of_enhancer_target_promoters_within_a_cluster_proximal_within_distance_interval

        # lower_bound = proximal_grid[0]
        # does_proximal_enhancers_targetting_the_promoter_exist = np.zeros((len(indexes_p), len(proximal_grid) - 1))
        # for ind, dist_upper_bound in enumerate(proximal_grid[1:]):
        # 	number_of_times_a_promoter_is_targetted_by_a_proximal_enhancer_within_distances = dist_filter_(abs(distances_matrix), lower_bound, dist_upper_bound);
        # 	lower_bound = dist_upper_bound;
        # 	does_proximal_enhancers_targetting_the_promoter_exist[number_of_times_a_promoter_is_targetted_by_a_proximal_enhancer_within_distances.astype(bool), ind] = dist_upper_bound;

        # ---------------------- distances

        def dist_filter_new(distances_matrix, low_lim, up_lim):
            prox_distances_matrix = (distances_matrix <= up_lim) * (distances_matrix > low_lim) * interacting_mask
            maximum_value = np.max(distances_matrix)
            distances_matrix_constr = np.zeros_like(prox_distances_matrix).astype(float) + maximum_value
            distances_matrix_constr[prox_distances_matrix] = distances_matrix[prox_distances_matrix]
            prox_distances_matrix_minimal_values = distances_matrix_constr.min(0)[: len(indexes_p)]
            prox_distances_matrix_minimal_values[prox_distances_matrix_minimal_values == maximum_value] = 0.0

            return prox_distances_matrix_minimal_values

            # ---------------------- proximal

        does_proximal_enhancers_targetting_the_promoter_exist = dist_filter_new(abs(distances_matrix), 0.0, 40000.0)

        number_of_enhancer_target_promoters_within_a_cluster = (
            number_of_enhancer_target_promoters_within_a_cluster_distal
            + number_of_enhancer_target_promoters_within_a_cluster_proximal
        )

        non_zero_gene_count_mask = (
            number_of_enhancer_target_promoters_within_a_cluster <> 0
        ) + does_proximal_enhancers_targetting_the_promoter_exist.astype(
            bool
        )  # .sum(1).astype(bool)

        non_zero_counts = number_of_enhancer_target_promoters_within_a_cluster[non_zero_gene_count_mask]

        non_zero_counts_probabilities = probability_of_enhancer_target_promoters_within_a_cluster[
            non_zero_gene_count_mask
        ]

        gene_names_non_zero_count = gene_names[np.where(non_zero_gene_count_mask)[0] + total_p]

        return (
            gene_names_non_zero_count,
            number_of_enhancer_target_promoters_within_a_cluster_distal[non_zero_gene_count_mask],
            number_of_enhancer_target_promoters_within_a_cluster_proximal[[non_zero_gene_count_mask]],
            non_zero_counts_probabilities,
            does_proximal_enhancers_targetting_the_promoter_exist[non_zero_gene_count_mask],
        )
	def interactions_above_threshold(posterior_t, posterior_f, threshold_up, threshold_low, chrom, label = 0, domain = False):
		enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)
		
		length_chr = len(indexes_p) + len(indexes_e)
		probability_matrix = np.zeros((length_chr, length_chr))
		posterior_t_chrom, posterior_f_chrom = posterior_t[chrom], posterior_f[chrom]

		i_s_f, j_s_f = positive_negative_interactions_for_MAP(chrom)

		if domain:
			if config_variables.TSS_or_intra_genic_for_domain_filter == "Intra_genic": coords_pro_domain = pro_coordinates[indexes_p]
			elif config_variables.TSS_or_intra_genic_for_domain_filter == "TSS_only": coords_pro_domain = np.column_stack((TSS_coordinates[indexes_p]-1, TSS_coordinates[indexes_p]+1))
			domain_matrix = interacting_domain.interacting_domains(coords_pro_domain, enh_coordinates[indexes_e], chrom, 'left', True)
			domain_matrix = domain_matrix + interacting_domain.interacting_domains(coords_pro_domain, enh_coordinates[indexes_e], chrom, 'right', True)
		else:
			domain_matrix = True


		if cluster_specific:
			
			if config_variables.distant_enh_only: 
				survived_mask = np.zeros(len(indexes_e))
				survived_mask[dict_chrom_enh_survived[chrom]] = True
				distance_mask = np.zeros(len(indexes_e))
				distance_mask[dict_chrom_distant[chrom]] = True
				survived = np.where(survived_mask*distance_mask)[0]
			else:
				survived = dict_chrom_enh_survived[chrom]
	
			clustering_matrix = np.zeros_like(probability_matrix).astype(int)		
			clustering_matrix[:len(indexes_p), len(indexes_p) + survived] = labels[survived]
			clustering_matrix += clustering_matrix.T
			clustering_matrix_mask = clustering_matrix == label

		else:
			#_matrix = np.zeros_like(probability_matrix).astype(int)	
			clustering_matrix_mask = True

		if config_variables.disentagled_features_validation: 
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
		else:
			chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

		#chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
		#true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
		true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

		i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]

		#probability_matrix[:,:] = np.min([np.min(posterior_t), np.min(posterior_f)])*0.999
		probability_matrix[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = posterior_t_chrom
		probability_matrix[i_s_f - total_p, j_s_f + len(indexes_p) - total_e] = posterior_f_chrom

		#probability_matrix += probability_matrix.T

		#--------------------------------------------------------------------------the part to look at for Paolo !
			
		if normalised:
			norm_factors = np.sum(probability_matrix, axis = 0)
			probability_matrix = probability_matrix/norm_factors
			probability_matrix[np.isnan(probability_matrix)] = 0.

		#--------------------------------------------------
		#how many true links for threshold estimated from test data at FDR
		
		interacting_mask = np.zeros_like(probability_matrix).astype(bool)
		interacting_mask[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = True

		number_of_true_interactions_at_prob_threshold_estimated_from_test_data_at_FDR_chrom = clustering_matrix_mask * (probability_matrix >= threshold_low) * (probability_matrix <= threshold_up) #* interacting_mask

		return number_of_true_interactions_at_prob_threshold_estimated_from_test_data_at_FDR_chrom.sum()
Ejemplo n.º 23
0
    def interactions_above_threshold(posterior_t,
                                     posterior_f,
                                     threshold_up,
                                     threshold_low,
                                     chrom,
                                     label,
                                     domain=False):
        enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(
            chrom)

        length_chr = len(indexes_p) + len(indexes_e)
        probability_matrix = np.zeros((length_chr, length_chr))
        posterior_t_chrom, posterior_f_chrom = posterior_t[chrom], posterior_f[
            chrom]

        i_s_f, j_s_f = positive_negative_interactions_for_MAP(chrom)

        if domain:
            if config_variables.TSS_or_intra_genic_for_domain_filter == "Intra_genic":
                coords_pro_domain = pro_coordinates[indexes_p]
            elif config_variables.TSS_or_intra_genic_for_domain_filter == "TSS_only":
                coords_pro_domain = np.column_stack(
                    (TSS_coordinates[indexes_p] - 1,
                     TSS_coordinates[indexes_p] + 1))
            domain_matrix = interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, 'left',
                True)
            domain_matrix = domain_matrix + interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, 'right',
                True)
        else:
            domain_matrix = True

        if cluster_specific:

            if config_variables.distant_enh_only:
                survived_mask = np.zeros(len(indexes_e))
                survived_mask[dict_chrom_enh_survived[chrom]] = True
                distance_mask = np.zeros(len(indexes_e))
                distance_mask[dict_chrom_distant[chrom]] = True
                survived = np.where(survived_mask * distance_mask)[0]
            else:
                survived = dict_chrom_enh_survived[chrom]

            clustering_matrix = np.zeros_like(probability_matrix).astype(int)
            clustering_matrix[:len(indexes_p),
                              len(indexes_p) + survived] = labels[survived]
            clustering_matrix += clustering_matrix.T
            clustering_matrix_mask = clustering_matrix == label

        else:
            #_matrix = np.zeros_like(probability_matrix).astype(int)
            clustering_matrix_mask = True

        #chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
        #true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)

        if config_variables.disentagled_features_validation:
            chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[
                chrom]
        else:
            chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[
                chrom]

        #chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
        #true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
        true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

        i_s_t, j_s_t = true_inter_pro[:, 0], true_inter_pro[:, 1]

        #probability_matrix[:,:] = np.min([np.min(posterior_t), np.min(posterior_f)])*0.999
        probability_matrix[i_s_t - total_p, j_s_t + len(indexes_p) -
                           total_e] = posterior_t_chrom
        probability_matrix[i_s_f - total_p, j_s_f + len(indexes_p) -
                           total_e] = posterior_f_chrom

        probability_matrix += probability_matrix.T

        #--------------------------------------------------------------------------the part to look at for Paolo !

        if normalised:
            norm_factors = np.sum(probability_matrix, axis=0)
            probability_matrix = probability_matrix / norm_factors
            probability_matrix[np.isnan(probability_matrix)] = 0.

        #-------------------------------------------------------------------- that's where cluster_specifcic can play part

        #---------------------- distal
        mask_for_counts = clustering_matrix_mask * (
            probability_matrix >= threshold_low) * (
                probability_matrix < threshold_up)  #* interacting_mask

        number_of_enhancer_target_promoters_within_a_cluster_distal = mask_for_counts.sum(
            0)[:len(indexes_p)]  # affected by cluster labels

        filtered_probability_matrix = np.zeros_like(
            probability_matrix
        )  # events with probability one have got probability 0. ease the calculations below

        filtered_probability_matrix[mask_for_counts] = probability_matrix[
            mask_for_counts]

        #probability_of_enhancer_target_promoters_within_a_cluster = 1. - (1. - filtered_probability_matrix).prod(0)[:len(indexes_p)]  # affected by cluster labels
        probability_of_enhancer_target_promoters_within_a_cluster = -1 * np.log(
            1. - filtered_probability_matrix).sum(0)[:len(
                indexes_p)]  # affected by cluster labels
        #---------------------- distal

        #---------------------- proximal

        promoter_overlaps_enhancer = np.loadtxt(
            config_variables.promoter_overlaps_enhancer_file,
            usecols=(0, 4, 8),
            dtype=str)

        promoter_overlaps_enhancer_chrom = promoter_overlaps_enhancer[
            promoter_overlaps_enhancer[:, 0] == chrom]

        interacting_mask = np.zeros_like(probability_matrix).astype(bool)

        interacting_mask[promoter_overlaps_enhancer_chrom[:, 1].astype(int) -
                         total_p,
                         promoter_overlaps_enhancer_chrom[:, 2].astype(int) -
                         total_e + len(indexes_p)] = True
        interacting_mask += interacting_mask.T

        interacting_mask = interacting_mask * clustering_matrix_mask

        number_of_enhancer_target_promoters_within_a_cluster_proximal = interacting_mask.sum(
            0)[:len(indexes_p)]

        #---------------------- distances
        TSS_coordinates = negative_interactions.extract_TSS_coordinates(
            config_variables.upstream)
        point_coordinates_promoter, point_coordinates_enhancer = TSS_coordinates[
            indexes_p], np.mean(enh_coordinates[indexes_e], axis=1)
        distances_matrix = negative_interactions.calculate_distances(
            domain, point_coordinates_promoter, point_coordinates_enhancer)

        #distances_of_proximal = distances_matrix[[promoter_overlaps_enhancer_chrom[:, 1].astype(int) - total_p, promoter_overlaps_enhancer_chrom[:, 2].astype(int) - total_e + len(indexes_p)]]

        interacting_mask = np.zeros_like(probability_matrix).astype(bool)
        interacting_mask[len(indexes_p) + dict_chrom_enh_survived[chrom] -
                         total_e] = True
        #interacting_mask[config_variables.dict_chrom_pro_survived[chrom] - total_p] = False
        #interacting_mask[len(indexes_p) + config_variables.dict_chrom_proximal[chrom] - total_e] = True

        interacting_mask += interacting_mask.T

        #def dist_filter_(distances_matrix, low_lim, up_lim):

        #	prox_distances_matrix = (distances_matrix <= up_lim) * (distances_matrix > low_lim) * interacting_mask
        #	number_of_enhancer_target_promoters_within_a_cluster_proximal_within_distance_interval = prox_distances_matrix.sum(0)[:len(indexes_p)]
        #	return number_of_enhancer_target_promoters_within_a_cluster_proximal_within_distance_interval

        #lower_bound = proximal_grid[0]
        #does_proximal_enhancers_targetting_the_promoter_exist = np.zeros((len(indexes_p), len(proximal_grid) - 1))
        #for ind, dist_upper_bound in enumerate(proximal_grid[1:]):
        #	number_of_times_a_promoter_is_targetted_by_a_proximal_enhancer_within_distances = dist_filter_(abs(distances_matrix), lower_bound, dist_upper_bound);
        #	lower_bound = dist_upper_bound;
        #	does_proximal_enhancers_targetting_the_promoter_exist[number_of_times_a_promoter_is_targetted_by_a_proximal_enhancer_within_distances.astype(bool), ind] = dist_upper_bound;

        #---------------------- distances

        def dist_filter_new(distances_matrix, low_lim, up_lim):
            prox_distances_matrix = (distances_matrix <= up_lim) * (
                distances_matrix > low_lim) * interacting_mask
            maximum_value = np.max(distances_matrix)
            distances_matrix_constr = np.zeros_like(
                prox_distances_matrix).astype(float) + maximum_value
            distances_matrix_constr[prox_distances_matrix] = distances_matrix[
                prox_distances_matrix]
            prox_distances_matrix_minimal_values = distances_matrix_constr.min(
                0)[:len(indexes_p)]
            prox_distances_matrix_minimal_values[
                prox_distances_matrix_minimal_values == maximum_value] = 0.

            return prox_distances_matrix_minimal_values

        #---------------------- proximal

        does_proximal_enhancers_targetting_the_promoter_exist = dist_filter_new(
            abs(distances_matrix), 0., 40000.)

        number_of_enhancer_target_promoters_within_a_cluster = number_of_enhancer_target_promoters_within_a_cluster_distal + number_of_enhancer_target_promoters_within_a_cluster_proximal

        non_zero_gene_count_mask = (
            number_of_enhancer_target_promoters_within_a_cluster <>
            0) + does_proximal_enhancers_targetting_the_promoter_exist.astype(
                bool)  #.sum(1).astype(bool)

        non_zero_counts = number_of_enhancer_target_promoters_within_a_cluster[
            non_zero_gene_count_mask]

        non_zero_counts_probabilities = probability_of_enhancer_target_promoters_within_a_cluster[
            non_zero_gene_count_mask]

        gene_names_non_zero_count = gene_names[
            np.where(non_zero_gene_count_mask)[0] + total_p]

        return gene_names_non_zero_count, number_of_enhancer_target_promoters_within_a_cluster_distal[
            non_zero_gene_count_mask], number_of_enhancer_target_promoters_within_a_cluster_proximal[
                [
                    non_zero_gene_count_mask
                ]], non_zero_counts_probabilities, does_proximal_enhancers_targetting_the_promoter_exist[
                    non_zero_gene_count_mask]
Ejemplo n.º 24
0
def MAP(posterior_t, posterior_f, chrom, i_s_f, j_s_f):

    indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:]

    length_chr = len(indexes_p) + len(indexes_e)
    interaction_matrix = np.zeros((length_chr, length_chr))
    posterior_t, posterior_f = posterior_t[chrom], posterior_f[chrom]

    if mode == "promoter_enhancer_interactions":

        chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
        true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
        i_s_t, j_s_t = true_inter_pro[:, 0], true_inter_pro[:, 1]

        interaction_matrix[:, :] = np.min([np.min(posterior_t), np.min(posterior_f)]) * 0.99
        interaction_matrix[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = posterior_t
        interaction_matrix[i_s_f - total_p, j_s_f + len(indexes_p) - total_e] = posterior_f

        MAP_indexes = np.argmax(interaction_matrix, axis=0)

        if config_variables.alternative_classificator_outside_enhancers:
            chrom_interacting_enhancers_pro = config_variables.chrom_interacting_enhancers_pro[chrom]
            max_poster_is_pro = MAP_indexes[len(indexes_p) + chrom_interacting_enhancers_pro - total_e] + total_p
            MAP_predicted_intereactions_pro = np.column_stack((max_poster_is_pro, chrom_interacting_enhancers_pro))

        else:
            max_poster_is_pro = (
                MAP_indexes[len(indexes_p) + np.unique(j_s_t) - total_e] + total_p
            )  # gives a maximum aposteriori promoter to each infered enhancer
            MAP_predicted_intereactions_pro = np.column_stack((max_poster_is_pro, np.unique(j_s_t)))

        infered_promoters_pro_enh = MAP_indexes[len(indexes_p) :] + total_p
        MAP_probabilites_pro_enh = interaction_matrix[MAP_indexes, range(len(indexes_p) + len(indexes_e))][
            len(indexes_p) :
        ]

        link_exists_pro = [
            ind for ind, el in enumerate(MAP_predicted_intereactions_pro.tolist()) if el in true_inter_pro.tolist()
        ]
        mask_link_exists_pro = np.zeros(len(MAP_predicted_intereactions_pro), bool)
        mask_link_exists_pro[link_exists_pro] = True

        return mask_link_exists_pro, infered_promoters_pro_enh, MAP_probabilites_pro_enh

    if mode == "enhancer_enhancer_interactions":

        chr_interactions_dict_enh_enh = config_variables.chr_interactions_dict_enh_enh
        true_inter_enh = un_string(chr_interactions_dict_enh_enh[chrom][:, :2]).astype(int)
        i_s_t, j_s_t = true_inter_enh[:, 0], true_inter_enh[:, 1]

        interaction_matrix[:, :] = np.min([np.min(posterior_t), np.min(posterior_f)]) * 0.99
        interaction_matrix[i_s_t + len(indexes_p) - total_e, j_s_t + len(indexes_p) - total_e] = posterior_t
        interaction_matrix[i_s_f + len(indexes_p) - total_e, j_s_f + len(indexes_p) - total_e] = posterior_f
        interaction_matrix[
            j_s_t + len(indexes_p) - total_e, i_s_t + len(indexes_p) - total_e
        ] = posterior_t  # transpose to create a full matrix
        interaction_matrix[
            j_s_f + len(indexes_p) - total_e, i_s_f + len(indexes_p) - total_e
        ] = posterior_f  # transpose to create a full matrix

        MAP_indexes = np.argmax(interaction_matrix, axis=0)
        max_poster_is_enh = MAP_indexes[len(indexes_p) + np.unique(j_s_t) - total_e] - len(indexes_p) + total_e

        infered_enhancers_enh_enh = MAP_indexes[len(indexes_p) :] + total_e

        MAP_probabilites_enh_enh = interaction_matrix[MAP_indexes, range(len(indexes_p) + len(indexes_e))][
            len(indexes_p) :
        ]

        MAP_predicted_intereactions_enh = np.column_stack((max_poster_is_enh, np.unique(j_s_t)))

        link_exists_enh = [
            ind for ind, el in enumerate(MAP_predicted_intereactions_enh.tolist()) if el in true_inter_enh.tolist()
        ]
        mask_link_exists_enh = np.zeros(len(MAP_predicted_intereactions_enh), bool)
        mask_link_exists_enh[link_exists_enh] = True

        return mask_link_exists_enh, infered_enhancers_enh_enh, MAP_probabilites_enh_enh
    def interactions_above_threshold(posterior_t,
                                     posterior_f,
                                     threshold_up,
                                     threshold_low,
                                     chrom,
                                     label=0,
                                     domain=False):
        enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(
            chrom)

        length_chr = len(indexes_p) + len(indexes_e)
        probability_matrix = np.zeros((length_chr, length_chr))
        posterior_t_chrom, posterior_f_chrom = posterior_t[chrom], posterior_f[
            chrom]

        i_s_f, j_s_f = positive_negative_interactions_for_MAP(chrom)

        if domain:
            if config_variables.TSS_or_intra_genic_for_domain_filter == "Intra_genic":
                coords_pro_domain = pro_coordinates[indexes_p]
            elif config_variables.TSS_or_intra_genic_for_domain_filter == "TSS_only":
                coords_pro_domain = np.column_stack(
                    (TSS_coordinates[indexes_p] - 1,
                     TSS_coordinates[indexes_p] + 1))
            domain_matrix = interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, 'left',
                True)
            domain_matrix = domain_matrix + interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, 'right',
                True)
        else:
            domain_matrix = True

        if cluster_specific:

            if config_variables.distant_enh_only:
                survived_mask = np.zeros(len(indexes_e))
                survived_mask[dict_chrom_enh_survived[chrom]] = True
                distance_mask = np.zeros(len(indexes_e))
                distance_mask[dict_chrom_distant[chrom]] = True
                survived = np.where(survived_mask * distance_mask)[0]
            else:
                survived = dict_chrom_enh_survived[chrom]

            clustering_matrix = np.zeros_like(probability_matrix).astype(int)
            clustering_matrix[:len(indexes_p),
                              len(indexes_p) + survived] = labels[survived]
            clustering_matrix += clustering_matrix.T
            clustering_matrix_mask = clustering_matrix == label

        else:
            #_matrix = np.zeros_like(probability_matrix).astype(int)
            clustering_matrix_mask = True

        if config_variables.disentagled_features_validation:
            chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[
                chrom]
        else:
            chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[
                chrom]

        #chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
        #true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
        true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

        i_s_t, j_s_t = true_inter_pro[:, 0], true_inter_pro[:, 1]

        #probability_matrix[:,:] = np.min([np.min(posterior_t), np.min(posterior_f)])*0.999
        probability_matrix[i_s_t - total_p, j_s_t + len(indexes_p) -
                           total_e] = posterior_t_chrom
        probability_matrix[i_s_f - total_p, j_s_f + len(indexes_p) -
                           total_e] = posterior_f_chrom

        #probability_matrix += probability_matrix.T

        #--------------------------------------------------------------------------the part to look at for Paolo !

        if normalised:
            norm_factors = np.sum(probability_matrix, axis=0)
            probability_matrix = probability_matrix / norm_factors
            probability_matrix[np.isnan(probability_matrix)] = 0.

        #--------------------------------------------------
        #how many true links for threshold estimated from test data at FDR

        interacting_mask = np.zeros_like(probability_matrix).astype(bool)
        interacting_mask[i_s_t - total_p,
                         j_s_t + len(indexes_p) - total_e] = True

        number_of_true_interactions_at_prob_threshold_estimated_from_test_data_at_FDR_chrom = clustering_matrix_mask * (
            probability_matrix >= threshold_low) * (
                probability_matrix <= threshold_up)  #* interacting_mask

        return number_of_true_interactions_at_prob_threshold_estimated_from_test_data_at_FDR_chrom.sum(
        )
    def interactions_above_threshold(posterior_t,
                                     posterior_f,
                                     threshold_up,
                                     threshold_low,
                                     chrom,
                                     domain=False):
        enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(
            chrom)

        length_chr = len(indexes_p) + len(indexes_e)
        interaction_matrix = np.zeros((length_chr, length_chr))
        posterior_t, posterior_f = posterior_t[chrom], posterior_f[chrom]

        i_s_f, j_s_f = positive_negative_interactions_for_MAP(chrom)

        if domain:
            import interacting_domain_clean as interacting_domain
            if config_variables.TSS_or_intra_genic_for_domain_filter == "Intra_genic":
                coords_pro_domain = pro_coordinates[indexes_p]
            elif config_variables.TSS_or_intra_genic_for_domain_filter == "TSS_only":
                coords_pro_domain = np.column_stack(
                    (TSS_coordinates[indexes_p] - 1,
                     TSS_coordinates[indexes_p] + 1))
            domain_matrix = interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, 'left',
                True)
            domain_matrix = domain_matrix + interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, 'right',
                True)
        else:
            domain_matrix = True

        if mode == "promoter_enhancer_interactions":

            if config_variables.disentagled_features_validation:
                chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[
                    chrom]
            else:
                chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[
                    chrom]

            #chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
            #true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
            true_inter_pro = un_string(
                chr_interactions_pro_enh[:, :2]).astype(int)

            i_s_t, j_s_t = true_inter_pro[:, 0], true_inter_pro[:, 1]

            #chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
            #true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
            #i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]

            interaction_matrix[:, :] = np.min(
                [np.min(posterior_t), np.min(posterior_f)]) * 0.999
            interaction_matrix[i_s_t - total_p,
                               j_s_t + len(indexes_p) - total_e] = posterior_t
            interaction_matrix[i_s_f - total_p,
                               j_s_f + len(indexes_p) - total_e] = posterior_f

            #--------------------------------------------------------------------------the part to look at for Paolo !
            #np.save("interaction_matrix_float", interaction_matrix) #I'm saving the interaction matrix for you

            if normalised:
                norm_factors = np.sum(interaction_matrix, axis=0)
                interaction_matrix = interaction_matrix / norm_factors
                interaction_matrix[np.isnan(interaction_matrix)] = 0.

            interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
            interacting_mask[i_s_t - total_p,
                             j_s_t + len(indexes_p) - total_e] = True

            true_pro_enh_inter_filtered = np.where(
                interacting_mask * (interaction_matrix >= threshold_low) *
                (interaction_matrix < threshold_up) * domain_matrix)
            i_s_t_filt, j_s_t_filt = true_pro_enh_inter_filtered[
                0] + total_p, true_pro_enh_inter_filtered[1] - len(
                    indexes_p
                ) + total_e  # that line tells you which of the positive (green) ChIA-PET-confirmed interactions lay within threshold_low, threshold_up interval

            interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
            interacting_mask[i_s_f - total_p,
                             j_s_f + len(indexes_p) - total_e] = True

            false_pro_enh_inter_filtered = np.where(
                interacting_mask * (interaction_matrix >= threshold_low) *
                (interaction_matrix < threshold_up) * domain_matrix)
            i_s_f_filt, j_s_f_filt = false_pro_enh_inter_filtered[
                0] + total_p, false_pro_enh_inter_filtered[1] - len(
                    indexes_p
                ) + total_e  # that line tells you which of the negative (gray) interactions lay within threshold_low, threshold_up interval

            #--------------------------------------------------------------------------the part to look at for Paolo !

            return i_s_t_filt, j_s_t_filt, i_s_f_filt, j_s_f_filt  # the function takes threshold_up, threshold_low as an argument and returns predicted interactions with probabilities within  [threshold_low, threshold_up) interval
            # i_s_t_filt, j_s_t_filt, i_s_f_filt, j_s_f_filt legend: i_s_t are promoters of ChIA-PET confirmed interactions, j_s_t are enhancers of the interactions, i_s_f, j_s_f are (promoters, enhancers) interactions which aren't ChIA-PET confirmed
            #for paolo

        if mode == "enhancer_enhancer_interactions":

            chr_interactions_dict_enh_enh = config_variables.chr_interactions_dict_enh_enh
            true_inter_enh = un_string(
                chr_interactions_dict_enh_enh[chrom][:, :2]).astype(int)
            i_s_t, j_s_t = true_inter_enh[:, 0], true_inter_enh[:, 1]

            interaction_matrix[:, :] = np.min(
                [np.min(posterior_t), np.min(posterior_f)]) * 0.999
            interaction_matrix[i_s_t + len(indexes_p) - total_e,
                               j_s_t + len(indexes_p) - total_e] = posterior_t
            interaction_matrix[i_s_f + len(indexes_p) - total_e,
                               j_s_f + len(indexes_p) - total_e] = posterior_f
            interaction_matrix[
                j_s_t + len(indexes_p) - total_e, i_s_t + len(indexes_p) -
                total_e] = posterior_t  # transpose to create a full matrix
            interaction_matrix[
                j_s_f + len(indexes_p) - total_e, i_s_f + len(indexes_p) -
                total_e] = posterior_f  # transpose to create a full matrix

            if normalised:
                norm_factors = np.sum(interaction_matrix, axis=0)
                interaction_matrix = interaction_matrix / norm_factors
                interaction_matrix[np.isnan(interaction_matrix)] = 0.

            interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
            interacting_mask[i_s_t + len(indexes_p) - total_e,
                             j_s_t + len(indexes_p) - total_e] = True

            true_enh_enh_inter_filtered = np.where(
                interacting_mask * (interaction_matrix >= threshold_low) *
                (interaction_matrix < threshold_up))
            i_s_t_filt, j_s_t_filt = true_enh_enh_inter_filtered[0] - len(
                indexes_p) + total_e, true_enh_enh_inter_filtered[1] - len(
                    indexes_p) + total_e

            interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
            interacting_mask[i_s_f + len(indexes_p) - total_e,
                             j_s_f + len(indexes_p) - total_e] = True

            false_enh_enh_inter_filtered = np.where(
                interacting_mask * (interaction_matrix >= threshold_low) *
                (interaction_matrix < threshold_up))
            i_s_f_filt, j_s_f_filt = false_enh_enh_inter_filtered[0] - len(
                indexes_p) + total_e, false_enh_enh_inter_filtered[1] - len(
                    indexes_p) + total_e

            return i_s_t_filt, j_s_t_filt, i_s_f_filt, j_s_f_filt
def executor(PR_CURVES = "SELECTIVE", mode_of_code = "EVEN", mode_of_features_and_interactions = "FEATURES_AND_INTERACTIONS_TOGETHER", GENE_OR_PROMOTER_MODE = "GENE_MODE", redo_raw_CHIA_PET_interactions = True, mode_atr = ["FIRST_TS", "SECOND_TS"][1], plot_TF_enrichments_in_cluster = False, upstream = 300, downstream = 0, upstream_t_s = 300, downstream_t_s = 0, do_clustering = False, re_do_clustering = False, cluster_figure_selection = None, DB_version = False, calculate_number_of_within_domain_interactions = True, option_correl_select = [1], number_of_samples = 10000, kappa_0 = 1.0, mu_0 = 0.0, alpha_0 = 2.0, Beta_0 = 2.0, mode_of_sampler = "distance_MOG_empir_mu", burn_in = 0, csf_mode = False, mode_of_code_2 = "WITHOUT", chain_number = 1, continue_sampling = False, interacting_enhancers_only_MOG = False, number_of_samples_arr = [], burn_in_start = []):


	import numpy as np
	import re
	from sys import argv
	import matplotlib.pyplot as plt
	import itertools
	import bisect as bis
	import random as random
	import time
	import kern_density_est
	import smooth_priors
	import smooth_priors_non_domain
	import smooth_correl
	import smooth_priors_domain
	import matplotlib as mpl
	import matplotlib.pyplot as plt
	from matplotlib.backends.backend_pdf import PdfPages


	copy_and_paste_mode = False
	if copy_and_paste_mode:
		#PR_CURVES = "SELECTIVE"
		mode_of_code = ["ODD","EVEN", "FULL", "GAUSSIAN_SAMPLE", "MK_PAIRWISE"][1]
		mode_of_code_2 = ["WITHOUT", "ADD_GAUSSIAN_VALIDATION"][1]
		mode_of_features_and_interactions = "FEATURES_AND_INTERACTIONS_SEPERATE"
		GENE_OR_PROMOTER_MODE = "GENE_MODE"
		redo_raw_CHIA_PET_interactions = False
		mode_atr = ["FIRST_TS", "SECOND_TS"][1]
		plot_TF_enrichments_in_cluster = False
		upstream = 300
		downstream = 0
		upstream_t_s = 300
		downstream_t_s = 0
		do_clustering = False
		re_do_clustering = False
		cluster_figure_selection = "cluster_ER_enhancer"
		DB_version = False
		csf_mode = False
		calculate_number_of_within_domain_interactions = True
		kappa_0, mu_0, alpha_0, Beta_0 = 4.0, 0.0, 2.0, 2.0 # here betta is in scale. np. gamma is in scale so you can plot the gammma with the scale to have an estimate on nice beta. #derivations are in 1/betta.
		number_of_samples = 30#23000#100001#30
		burn_in = 1000
		mode_of_sampler = ["distance_prior", "distance_MOG", "dirichlet_MOG", "distance_MOG_empir_mu"][3]
		chain_number = 1
		continue_sampling = False
		interacting_enhancers_only_MOG = True	

		#number_of_samples_correl, burn_in_correl = [62000, 62000, 240000, 240000, 70000], [31000, 31000, 120000, 120000, 10000] #[70000]*5, [10000]*5
		number_of_samples_correl, burn_in_correl = [80000, 80000, 80000, 80000, 70000], [40000, 40000, 40000, 40000, 10000] #[70000]*5, [10000]*5
		number_of_samples_dist, burn_in_dist = 70000, 10000
		chain_number_correl = [1,1,1, 1, False]
		chain_number_dist = False	

	if csf_mode: mpl.use('Agg')
	#np.seterr(all=None, divide='raise', over=None, under=None, invalid=None)

	if mode_of_features_and_interactions == "FEATURES_AND_INTERACTIONS_TOGETHER":
		disentagled_features_validation = False # it just mean that it's either gene or TSS mode
		upstream_t_s = upstream
		downstream_t_s = downstream

	elif mode_of_features_and_interactions == "FEATURES_AND_INTERACTIONS_SEPERATE":
		if GENE_OR_PROMOTER_MODE == "GENE_MODE": disentagled_features_validation = False
		if GENE_OR_PROMOTER_MODE == "TSS_MODE": disentagled_features_validation = True
			


	filter_value, filter_enh, count_f_p, count_f_e, ER_pro_filtered_, path = '-1.', '-1.', 30, 30, 'True', 1 # change to 2 if you want path 2 interactions
	alternative_classificator = True
	alternative_classificator_outside_enhancers = False#True # that option is for domain. Generator
	domain = False
	domain_like_chromosome_correction = False
	interacting_negatives = False
	log_distances = True
	plot_atr, plot_atr_kernel = False, True
	use_smooth_prior_for_estimation = True
	likelihood_cross_validation = True
	distant_enh_only = True # matters for enhancer-enhancer interactions and MAPS for all enhancers not only the interacting ones
	filter_values = np.array([-1., -0.6, -0.2])
	filter_value = filter_values[0]
	number_of_bins = 4000, 4000 # to implement-easy

	FDR = np.array([0.10, 0.2,  0.25,  0.3,  0.35,  0.4])# add 0.1


	import os
	data_folder = "./data/"
	temp_output = "./temp_output/"
	results_folder = "./results/"

	if not os.path.exists(temp_output): os.makedirs(temp_output)
	if not os.path.exists(results_folder): os.makedirs(results_folder)


	print mode_of_code

	#scripts:
	chrom_names = np.array(map(lambda x: "chr{0}".format(x), np.r_[np.arange(1, 23).astype(dtype='S2'), ['X'], ['Y']]))

	if mode_of_code == "FULL":
		chroms_in_prior = np.arange(0,23,1)#+1#np.arange(0,13,1)#np.arange(0,13,1)
		chroms_to_infer = np.arange(0,23,1)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1)
		FDR_mode = False
		interacting_enhancers_only = False
		TOP_PR_interaction_plotter_clean_chrom_to_plot = chrom_names[chroms_to_infer[1]]
		option_for_predictive_FULL_mode = 2
		genes_predicted_with_FDR_for_GRO_seq_validation = 0.25
		TOP_PR_interaction_plotter_FDR_thresholds_to_plot = FDR[:3]
		calculate_number_of_within_domain_interactions = True

	elif mode_of_code == "ODD":
		chroms_in_prior = np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,13,1)
		chroms_to_infer = np.arange(0,23,2)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1)
		FDR_mode = True # that fuction apply for odd-odd and odd-even only. 
		interacting_enhancers_only = True

	elif mode_of_code == "EVEN":
		chroms_in_prior = np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,13,1)
		chroms_to_infer = np.arange(0,22,2)+1#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1)
		FDR_mode = True
		interacting_enhancers_only = True

	Sample_MoG_classificator = False
	MoG_classificator = False

	if mode_of_code == "GAUSSIAN_SAMPLE":
		chroms_in_prior = np.arange(0,23,1)#+1#np.arange(0,13,1)#np.arange(0,13,1)
		chroms_to_infer = np.arange(0,23,1)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1)
		interacting_enhancers_only = False # set the upper-lower-bounds-of-distace-prior-otherwise-there-would-be-allocation-problem-of-high/low-distance
		FDR_mode = False
		if csf_mode: plot_atr, plot_atr_kernel = False, False
		Sample_MoG_classificator = True

	if mode_of_code == "MK_PAIRWISE":
		chroms_in_prior = np.arange(0,23,2)#+1#np.arange(0,13,1)#np.arange(0,13,1)
		chroms_to_infer = np.arange(0,23,1)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1)
		interacting_enhancers_only = False # set the upper-lower-bounds-of-distace-prior-otherwise-there-would-be-allocation-problem-of-high/low-distance
		FDR_mode = False
		if csf_mode: plot_atr, plot_atr_kernel = False, False
		Sample_MoG_classificator = False


	if mode_of_code == "convergence_checker":
		chroms_in_prior = np.arange(0,23,2)#+1#np.arange(0,13,1)#np.arange(0,13,1)
		chroms_to_infer = np.arange(0,23,1)#np.arange(0,23,2)#np.arange(0,13,1)#np.arange(0,23,2)#np.arange(0,13,1)
		interacting_enhancers_only = False # set the upper-lower-bounds-of-distace-prior-otherwise-there-would-be-allocation-problem-of-high/low-distance
		FDR_mode = False
		if csf_mode: plot_atr, plot_atr_kernel = False, False
		Sample_MoG_classificator = False


	if mode_of_code_2 == "ADD_GAUSSIAN_VALIDATION":
		MoG_classificator = True
#	else:
#		burn_in = 0



	mode = ["promoter_enhancer_interactions", "enhancer_enhancer_interactions"][0]
	one_sided_or_two_sided = ["single_sided", "double_sided"][1]
	TSS_or_intra_genic_for_domain_filter = ["Intra_genic", "TSS_only"][0]
	generator_mode = ["filter_independent_generator", "filter_correl_dependent_generator", "filter_dependent_generator"][1]

	promoter_overlaps_enhancer_file = temp_output + "intersect_with_full_genes_l_{0}_r_{1}".format(upstream, downstream)
	name_of_promoter_file_for_overlap = data_folder + "Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed.gz"
	name_of_enhancer_file_for_overlap = data_folder + "common_region_peaks_extended_less_time_points_corrected_0_indexed"#"common_region_peaks_extended_less_time_points_sorted"
	name_of_time_series_promoter_file_for_TSS_start = data_folder + "Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered.gz"
	name_of_overlap_file_pro = temp_output + 'ER_promoters_{0}_{1}'.format(upstream, downstream)
	name_of_overlap_file_enh = temp_output + 'ER_peaks_overlapping_promoters_{0}_{1}'.format(upstream, downstream)

	# you can now make every feature to behave differentely. However how to count a signal would depend on where TSS is.

	import selected_combinations as sel
	combinations, selected_combinations = sel.selected_combinations("SELECTIVE")


	#----------------------------------------------------------
	#print mode_of_code, MoG_classificator, Sample_MoG_classificator	

	
	chroms_to_infer = chrom_names[chroms_to_infer]
	chroms_in_prior = chrom_names[chroms_in_prior]
	option = [0,1,2,3,4]
	filt_option = option
	time_points = 8



	datasets_names = np.array(['PolII_2012-03', 'PolII', 'H2AZ', 'ER', 'H3K4me3'])#, '2012-03_RNA', 'RNA'])
	dataset_names_option = datasets_names[option]
	dict_option = dict(zip(range(len(datasets_names)), datasets_names))

	link_data_set_name_to_file_name = {}

	name_of_time_series_file = {}
	name_of_time_series_file["enhancers"] = name_of_enhancer_file_for_overlap + "_unfiltered_count"

	name_of_enhancer_file_for_overlap = name_of_enhancer_file_for_overlap + ".gz"

	if upstream_t_s <> 0: name_of_time_series_file["promoters"] = data_folder + "Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_{0}_unfiltered_count".format(upstream_t_s)
	else: name_of_time_series_file["promoters"] = data_folder + "Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_unfiltered_count"

	full_list_enhancers = np.array([name_of_time_series_file["enhancers"] + "_{0}.gz".format(name_of_TF) for name_of_TF in datasets_names])
	full_list_promoters = np.array([name_of_time_series_file["promoters"] + "_{0}.gz".format(name_of_TF) for name_of_TF in datasets_names])

	link_data_set_name_to_file_name["enhancers"] = dict(zip(datasets_names, full_list_enhancers))
	link_data_set_name_to_file_name["promoters"] = dict(zip(datasets_names, full_list_promoters))


	import config_variables
	reload(config_variables)
	config_variables.data_folder = data_folder
	config_variables.results_folder = results_folder
	#------------------------------------------------------------------------------------------------------------
	import time_series_prepare_filter as initiate_time_series
	initiate_time_series.datasets_names = datasets_names
	initiate_time_series.time_points = time_points
	dataset_time_series_dict = initiate_time_series.time_series_prepare(full_list_promoters[option], full_list_enhancers[option])


	#------------------------------------------------------------------------------------------------------------------

	classificator_elements = {}

	for filter_value_ in filter_values:
		classificator_elements[filter_value_] = {}
		for mode_ in ["promoter_enhancer_interactions", "enhancer_enhancer_interactions"]:
			classificator_elements[filter_value_][mode_] = {}
			for classification_of_interactions in ["positive_interactions", "negative_interactions"]:
				classificator_elements[filter_value_][mode_][classification_of_interactions] = {}
				for attribute_of_interaction in ["distance", "correlation"]:
					classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction] = {}
					for probability_of_being_positive_or_negative in ["probabilities_of_being_positive_interactions", "probabilities_of_being_negative_interactions"]:

						classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative] = {}
						classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["prior_bins"] = np.array([])
						classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["prior_frequencies"] = np.array([])

						if attribute_of_interaction == "correlation":

							for data_set_name in dataset_names_option:
								classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name] = {}
								classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["prior_bins"] = np.array([])
								classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["prior_frequencies"] = np.array([])
								classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["posterior_component_values"] = {}
								for chrom_ in chroms_to_infer:
									classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["posterior_component_values"][chrom_] = np.array([])

						else:
							for chrom_ in chroms_to_infer:
								classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["posterior_component_values"] = {}
								classificator_elements[filter_value_][mode_][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["posterior_component_values"][chrom_] = np.array([])



	
	config_variables.temp_output = temp_output
	config_variables.np = np
	config_variables.link_data_set_name_to_file_name = link_data_set_name_to_file_name
	config_variables.chroms_in_prior = chroms_in_prior
	config_variables.mode = mode
	config_variables.dataset_names_option = dataset_names_option
	config_variables.count_f_p = count_f_p
	config_variables.count_f_e = count_f_e
	config_variables.filter_enh = filter_enh
	config_variables.domain = domain
	config_variables.dataset_time_series_dict = dataset_time_series_dict
	config_variables.re = re
	config_variables.path = path
	config_variables.upstream = upstream
	config_variables.interacting_negatives = interacting_negatives
	config_variables.interacting_enhancers_only = interacting_enhancers_only
	config_variables.chroms_to_infer = chroms_to_infer
	config_variables.filter_value = filter_value
	config_variables.filter_values = filter_values
	config_variables.datasets_names = datasets_names
	config_variables.full_list_promoters = full_list_promoters
	config_variables.option = option
	config_variables.time_points = time_points
	config_variables.distant_enh_only = distant_enh_only
	config_variables.full_list_enhancers = full_list_enhancers
	config_variables.ER_pro_filtered_ = ER_pro_filtered_
	config_variables.TSS_or_intra_genic_for_domain_filter = TSS_or_intra_genic_for_domain_filter
	config_variables.one_sided_or_two_sided = one_sided_or_two_sided
	config_variables.chrom_names = chrom_names
	config_variables.promoter_overlaps_enhancer_file = promoter_overlaps_enhancer_file
	config_variables.name_of_time_series_promoter_file_for_TSS_start = name_of_time_series_promoter_file_for_TSS_start
	config_variables.upstream = upstream
	config_variables.downstream = downstream
	config_variables.upstream_t_s = upstream_t_s
	config_variables.name_of_promoter_file_for_overlap = name_of_promoter_file_for_overlap
	config_variables.name_of_enhancer_file_for_overlap = name_of_enhancer_file_for_overlap
	config_variables.name_of_overlap_file_pro = name_of_overlap_file_pro
	config_variables.name_of_overlap_file_enh = name_of_overlap_file_enh
	config_variables.filt_option = filt_option
	config_variables.log_distances = log_distances
	config_variables.domain_like_chromosome_correction = domain_like_chromosome_correction
	config_variables.alternative_classificator = alternative_classificator
	config_variables.likelihood_cross_validation = likelihood_cross_validation
	config_variables.alternative_classificator_outside_enhancers = alternative_classificator_outside_enhancers
	config_variables.dict_option = dict_option
	config_variables.kappa_0, config_variables.mu_0, config_variables.alpha_0, config_variables.Beta_0 = kappa_0, mu_0, alpha_0, Beta_0
	config_variables.MoG_classificator, config_variables.Sample_MoG_classificator = MoG_classificator, Sample_MoG_classificator
	config_variables.number_of_samples = number_of_samples
	config_variables.use_smooth_prior_for_estimation = use_smooth_prior_for_estimation
	config_variables.FDR = FDR
	config_variables.FDR_mode = FDR_mode
	config_variables.number_of_bins = number_of_bins
	config_variables.disentagled_features_validation = disentagled_features_validation
	config_variables.mode_of_code = mode_of_code
	config_variables.interacting_enhancers_only_MOG = interacting_enhancers_only_MOG

	#-----------------------------------------------

	#prepares variables and calculates model for a filter_value





	#run twice to get TSS and GENE mode.
	if redo_raw_CHIA_PET_interactions: import interaction_finder_wrapper


	import filters_clean

	#if not(domain) or alternative_classificator:
	#	dict_chrom_pro_survived, dict_chrom_pro_not_survived, filtered_promoters, Pol_2_correl_filtered_promoters = filters_clean.features_filtered(filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, add_overl = False)
	#	dict_chrom_enh_survived, dict_chrom_enh_not_survived, filtered_enhancers, Pol_2_correl_filtered_enhancers = filters_clean.features_filtered(filter_enh, count_f_e, full_list_enhancers, filt_option, name_of_overlap_file_enh, add_overl = False)
	#else:
	#	if mode == "promoter_enhancer_interactions":
	#		dict_chrom_pro_survived, dict_chrom_pro_not_survived, filtered_promoters, Pol_2_correl_filtered_promoters = filters_clean.features_filtered(filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, add_overl = False, remove_single_domain_elements = True)

	#		filter_, count_f, list_of_datasets, options, name_of_overlap_file, add_overl, remove_single_domain_elements = filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, False, True

	#		dict_chrom_enh_survived, dict_chrom_enh_not_survived, filtered_enhancers, Pol_2_correl_filtered_enhancers = filters_clean.features_filtered(filter_enh, count_f_e, full_list_enhancers, filt_option, name_of_overlap_file_enh, add_overl = False)

	#	else:
	#		dict_chrom_pro_survived, dict_chrom_pro_not_survived, filtered_promoters, Pol_2_correl_filtered_promoters = filters_clean.features_filtered(filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, add_overl = False)
	#		dict_chrom_enh_survived, dict_chrom_enh_not_survived, filtered_enhancers, Pol_2_correl_filtered_enhancers = filters_clean.features_filtered(filter_enh, count_f_e, full_list_enhancers, filt_option, name_of_overlap_file_enh, add_overl = False, remove_single_domain_elements = True)

	dict_chrom_pro_survived, dict_chrom_pro_not_survived, filtered_promoters, Pol_2_correl_filtered_promoters = filters_clean.features_filtered(filter_value, count_f_p, full_list_promoters, filt_option, name_of_overlap_file_pro, add_overl = False)
	dict_chrom_enh_survived, dict_chrom_enh_not_survived, filtered_enhancers, Pol_2_correl_filtered_enhancers = filters_clean.features_filtered(filter_enh, count_f_e, full_list_enhancers, filt_option, name_of_overlap_file_enh, add_overl = False)

	config_variables.Pol_2_correl_filtered_promoters = Pol_2_correl_filtered_promoters
	config_variables.Pol_2_correl_filtered_enhancers = Pol_2_correl_filtered_enhancers

	config_variables.dict_chrom_distant, config_variables.dict_chrom_proximal, config_variables.proximal_enhancers_mask = filters_clean.distant_enh_only_filter(name_of_overlap_file_enh)




	if do_clustering: 

		correl_value_filter = False
		distant_enh_only_log = False

		if cluster_figure_selection == "cluster_ER_enhancer": cluster_mode_setting = ["promoters", "enhancers"][1]; datasets_to_concat = datasets_names[[3]]; filter_each_dataset = 100; correl_value_filter = False; distant_enh_only_log = True;

		elif cluster_figure_selection == "cluster_Pol2s_enhancer": cluster_mode_setting = ["promoters", "enhancers"][1]; datasets_to_concat = datasets_names[[0, 1]]; filter_each_dataset = 30; correl_value_filter = 0.2; distant_enh_only_log = True;

		elif cluster_figure_selection == "cluster_Pol2s_promoter": cluster_mode_setting = ["promoters", "enhancers"][0]; datasets_to_concat = datasets_names[[0,1]]; filter_each_dataset = 30; correl_value_filter = 0.2;

		elif cluster_figure_selection == "cluster_ER_promoter": cluster_mode_setting = ["promoters", "enhancers"][0]; datasets_to_concat = datasets_names[[3]]; filter_each_dataset = 100;

		elif cluster_figure_selection == "cluster_Pol2s_ER_enhancer": cluster_mode_setting = ["promoters", "enhancers"][1]; datasets_to_concat = datasets_names[[1, 3]]; filter_each_dataset = 200;

		elif cluster_figure_selection == "cluster_Pol2s_ER_enhancer_test": cluster_mode_setting = ["promoters", "enhancers"][1]; datasets_to_concat = datasets_names[[1, 3]]; filter_each_dataset = 300;

		config_variables.dataset_time_series_dict_mean_std = initiate_time_series.time_series_prepare_mean_std(full_list_promoters[option], full_list_enhancers[option])		
		config_variables.name_of_time_series_file = name_of_time_series_file
		import AP_clustering	
		config_variables.name_of_overlap_file_dict = dict(zip(["promoters", "enhancers"], [name_of_overlap_file_pro, name_of_overlap_file_enh]))
		merged_time_series_to_cluster = AP_clustering.concatenator(
		cluster_mode = cluster_mode_setting, 
		merge_time_series_option = datasets_to_concat, 
		count_filter_each_data_set = filter_each_dataset, 
		pol2_rep_correl_filt = correl_value_filter, 
		distant_enh_only = distant_enh_only_log)

		if re_do_clustering:

			config_variables.merged_time_series_to_cluster = merged_time_series_to_cluster	
			AP_clustering.AP_clustering(merged_time_series_to_cluster, number_of_clusters = 40)
			config_variables.labels = np.loadtxt(merged_time_series_to_cluster + "_labels", dtype = str)

		import os as os
		cwd = os.getcwd()
		path_to_R = cwd + "/R_scripts/"
		os.chdir(path_to_R)

		print ("Rscript " + path_to_R + "ER_enhancer.R")
		
		if cluster_figure_selection == "cluster_ER_enhancer": os.system("Rscript " + path_to_R + "ER_enhancer.R") 

		elif cluster_figure_selection == "cluster_Pol2s_enhancer": os.system("Rscript " + path_to_R + "PolIIs_enhancer.R")

		elif cluster_figure_selection == "cluster_Pol2s_promoter": os.system("Rscript " + path_to_R + "PolIIs_promoter.R")

		elif cluster_figure_selection == "cluster_ER_promoter": os.system("Rscript " + path_to_R + "ER_promoter.R")

		elif cluster_figure_selection == "cluster_Pol2s_ER_enhancer": os.system("Rscript " + path_to_R + "Pol2_ER.R")
		os.chdir(cwd)	

		#if not(copy_and_paste_mode): return 0

	if plot_TF_enrichments_in_cluster:


		all_analysis = [["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_PolII_ER_200", 0, 0, ["FIRST_TS", "SECOND_TS"][0], "ENHANCER"],
						["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_PolII_ER_200", 0, 0, ["FIRST_TS", "SECOND_TS"][1], "ENHANCER"],
						["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_ER_100_distant_only", 0, 0, ["FIRST_TS", "SECOND_TS"][0], "ENHANCER"],
						["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2_distant_only", 0, 0, ["FIRST_TS", "SECOND_TS"][0], "ENHANCER"],
						["common_region_peaks_extended_less_time_points_corrected_0_indexed_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2_distant_only", 0, 0, ["FIRST_TS", "SECOND_TS"][1], "ENHANCER"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_ER_100", 1000, 1000, ["FIRST_TS", "SECOND_TS"][0], "TSS"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 1000, 1000, ["FIRST_TS", "SECOND_TS"][0], "TSS"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 1000, 1000, ["FIRST_TS", "SECOND_TS"][1], "TSS"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_ER_100", 10000, 10000, ["FIRST_TS", "SECOND_TS"][0], "TSS"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 10000, 10000, ["FIRST_TS", "SECOND_TS"][0], "TSS"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 10000, 10000, ["FIRST_TS", "SECOND_TS"][1], "TSS"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_ER_100", 300, 0, ["FIRST_TS", "SECOND_TS"][0], "GENE"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 300, 0, ["FIRST_TS", "SECOND_TS"][0], "GENE"],
						["Homo_sapiens.GRCh37.75.gtf_filtered_gene_joint_2_cleaned_chr_sorted_sorted_ordered_0_indexed_300_unfiltered_count_concat_PolII_2012-03_PolII_30_cor_0.2", 300, 0, ["FIRST_TS", "SECOND_TS"][1], "GENE"]]


		import overlapper_hg19_clean

		#mode_of_data_sets, sorted_mode = ["Ciiras", "Others_from_cistrom_finder"][1], ["amplitude_sorted", "size_sorted"][0]

		for merged_time_series_to_cluster, upstream_TSS, downstream_TSS, mode_atr, mode_atr2 in all_analysis:
			for mode_of_data_sets in ["Ciiras", "Others_from_cistrom_finder"]:
				if mode_of_data_sets == "Others_from_cistrom_finder": 
					if mode_atr2 == "ENHANCER":
						dont_plot = ["ESR1", "ESR2", "RAD21"]
					else: 
						dont_plot = ["ESR2", "RAD21"]
				else: dont_plot = []
				for sorted_mode in ["amplitude_sorted", "size_sorted"]:

					overlapper_hg19_clean.executor(merged_time_series_to_cluster, upstream_TSS = upstream_TSS, downstream_TSS = downstream_TSS, diff_bind_version = DB_version, mode_atr = mode_atr, mode_atr2 = mode_atr2, mode_of_data_sets = mode_of_data_sets, sorted_mode = sorted_mode, dont_plot = dont_plot) # mode attribute specifies whether it should use ER mean or Pol2 mean of a cluster to assess raising or falling tendencies.
		#if not(copy_and_paste_mode): return 0

	import generator_executor
	f_name = generator_executor.interactions_producer_filter(generator_mode, domain, 2, TSS_or_intra_genic_for_domain_filter, "GENE_MODE") #in order to get path 2 interactions change to 3

	config_variables.dict_chrom_pro_survived = dict_chrom_pro_survived
	config_variables.dict_chrom_pro_not_survived = dict_chrom_pro_not_survived
	config_variables.f_name = f_name
	config_variables.filtered_promoters = filtered_promoters
	config_variables.filtered_enhancers = filtered_enhancers
	config_variables.dict_chrom_enh_survived = dict_chrom_enh_survived
	config_variables.dict_chrom_enh_not_survived = dict_chrom_enh_not_survived

	import prepare_interactions_clean

	alternative_classificator_outside_enhancers = True # had something to do with enhancers outside domains - it's for MAP, enhancers which are interacting within domain and outside. Althought it's a bit ambigious for enhancers which may have one link inside domain and one outside
	if alternative_classificator_outside_enhancers:
		f_name_2 = generator_executor.interactions_producer_filter(generator_mode, True, 2, TSS_or_intra_genic_for_domain_filter, "GENE_MODE")
		chr_interactions_dict_pro_enh, chr_interactions_dict_enh_enh, dict_total_enh, dict_total_pro = prepare_interactions_clean.filter_true_interactions_of_promoters_and_enhancers_which_didnt_survive_filtering(f_name_2)
		from  prepare_interactions_clean import un_string
		chrom_interacting_enhancers_pro = {}	
		for chrom__ in chrom_names: chrom_interacting_enhancers_pro[chrom__] = np.unique(un_string(chr_interactions_dict_pro_enh[chrom__])[:,1])
		config_variables.chrom_interacting_enhancers_pro = chrom_interacting_enhancers_pro

	chr_interactions_dict_pro_enh, chr_interactions_dict_enh_enh, dict_total_enh, dict_total_pro = prepare_interactions_clean.filter_true_interactions_of_promoters_and_enhancers_which_didnt_survive_filtering(f_name)


	if disentagled_features_validation: #That TSS_MODE can be still buggy to some extend. Check that later if you need to
		f_name_TSS = generator_executor.interactions_producer_filter(generator_mode, domain, 2, TSS_or_intra_genic_for_domain_filter, "TSS_MODE")
		config_variables.chr_interactions_dict_pro_enh_TSS, config_variables.chr_interactions_dict_enh_enh_TSS, config_variables.dict_total_enh_TSS, config_variables.dict_total_pro_TSS = prepare_interactions_clean.filter_true_interactions_of_promoters_and_enhancers_which_didnt_survive_filtering(f_name_TSS)

	config_variables.dict_total_enh = dict_total_enh
	config_variables.dict_total_pro = dict_total_pro
	config_variables.chr_interactions_dict_pro_enh = chr_interactions_dict_pro_enh
	config_variables.chr_interactions_dict_enh_enh = chr_interactions_dict_enh_enh

	import chrom_specific_negative_interactions as negative_interactions

	config_variables.negative_interactions = negative_interactions

	import prior_producer
	import classificator_clean
	import prepare_upper_and_lower_bounds_for_priors as prior_bounds
	import prior_histograms_cl
	import allocator
	import plot_histograms_figures

	if Sample_MoG_classificator:
		config_variables.interacting_enhancers_only = False
		reload(negative_interactions)
		config_variables.negative_interactions = negative_interactions

		config_variables.alternative_classificator_outside_enhancers = False
		prior_elements = prior_producer.prior_producer()
		config_variables.alternative_classificator_outside_enhancers = False#True
		infered_elements = classificator_clean.infered_elements_filler()

		low_dist, up_dist = prior_bounds.prepare_upper_and_lower_bounds_for_priors(prior_elements, infered_elements)

		config_variables.interacting_enhancers_only = True	
		reload(negative_interactions)
		config_variables.negative_interactions = negative_interactions

		config_variables.alternative_classificator_outside_enhancers = False
		prior_elements = prior_producer.prior_producer()
		config_variables.alternative_classificator_outside_enhancers = False#True
		infered_elements = classificator_clean.infered_elements_filler()

	else:
		config_variables.alternative_classificator_outside_enhancers = False
		prior_elements = prior_producer.prior_producer()
		config_variables.alternative_classificator_outside_enhancers = False#True
		infered_elements = classificator_clean.infered_elements_filler()

		low_dist, up_dist = prior_bounds.prepare_upper_and_lower_bounds_for_priors(prior_elements, infered_elements)

	prior_elements = prior_histograms_cl.prior_bins_prob_and_plotter(prior_elements, low_dist, up_dist, use_smooth_prior_for_estimation, plot_atr, plot_atr_kernel, Sample_MoG_classificator = False)
	if not(csf_mode): plot_histograms_figures.execute(prior_elements, plot_atr, plot_atr_kernel)

	infered_elements = allocator.allocator(infered_elements, prior_elements)

	#for mode in modes:
	#	for filter_value in filter_values:


	for classification_of_interactions in ["positive_interactions", "negative_interactions"]:
		for attribute_of_interaction in ["distance", "correlation"]:
			for probability_of_being_positive_or_negative in ["probabilities_of_being_positive_interactions", "probabilities_of_being_negative_interactions"]:
				if attribute_of_interaction == "correlation":
					for data_set_name in dataset_names_option:
						for chrom_ in chroms_to_infer:
							update = infered_elements[mode][classification_of_interactions][attribute_of_interaction][data_set_name][probability_of_being_positive_or_negative][chrom_]
							classificator_elements[filter_value][mode][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][data_set_name]["posterior_component_values"][chrom_] = update

				elif attribute_of_interaction == "distance":
					for chrom_ in chroms_to_infer:
						update = infered_elements[mode][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative][chrom_]
						classificator_elements[filter_value][mode][classification_of_interactions][attribute_of_interaction][probability_of_being_positive_or_negative]["posterior_component_values"][chrom_] = update



	config_variables.classificator_elements = classificator_elements
	import classifiers_clean
	config_variables.classifiers_clean = classifiers_clean


	if mode_of_code == "GAUSSIAN_SAMPLE":

		print "Sample_MoG_classificator"
		from multiprocessing import Pool
		import Gaussian_probs	
		prior_elements[mode]["MOG_distance"]["prior_frequencies"], prior_elements[mode]["MOG_distance"]["prior_bins"] = Gaussian_probs.executor(prior_elements, low_dist, up_dist)

		config_variables.probabilities_of_a_bin = prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]/prior_elements[mode]["MOG_distance"]["prior_frequencies"] #prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]/(prior_elements[mode]["negative_interactions"]["distance"]["prior_frequencies"])
		config_variables.adequate_histogram_bins = prior_elements[mode]["MOG_distance"]["prior_bins"] #prior_elements[mode]["positive_interactions"]["distance"]["prior_bins"] it's the same but just in case

		#prior_elements[mode]["positive_interactions"]["distance"]["prior_bins"]


		config_variables.test_prior = False
		import finite_MOG_object_orientated_1d_times_n_case_log_calc_prob_visited_float64_distance_low_distances_active_promoters_clean as MOG

		#def func_star(args): return MOG.executor(*args)

		p = Pool(5)

		#option_correl_select = [1]	
		arguments = [(mode_of_sampler, number_of_samples, option_correl__, chrom_, chain_number, continue_sampling) for chrom_ in chroms_to_infer for option_correl__ in selected_combinations if option_correl__ == option_correl_select]
		#arguments = arguments[-6:]
		#bla = []
		#for el in arguments:bla += MOG.executor(el)
		
		p.map(MOG.executor, arguments)

		posterior_ = {}
		import classifiers_clean	

		if mode_of_sampler == "distance_prior":
			posterior_["positive_interactions"], posterior_["negative_interactions"] = classifiers_clean.MOG_classifier(mode_of_sampler, number_of_samples = number_of_samples, burn_in = burn_in, pairwise_number_in_pack = 150/2)
		else:
			posterior_["positive_interactions"], posterior_["negative_interactions"] = {}, {}
			comb = "_".join([dict_option[el] for el in option_correl_select])
			posterior_["positive_interactions"][comb], posterior_["negative_interactions"][comb] = classifiers_clean.MOG_classifier(mode_of_sampler, comb = comb, kappa_0 = kappa_0, mu_0 = mu_0 , alpha_0 = alpha_0, Beta_0 = Beta_0, number_of_samples = number_of_samples, burn_in = burn_in, chain = chain_number)



		if config_variables.test_prior:
			import Gaussian_probs
			prior_elements[mode]["MOG_distance"]["prior_frequencies"], prior_elements[mode]["MOG_distance"]["prior_bins"] = Gaussian_probs.executor(prior_elements, low_dist, up_dist)
			config_variables.probabilities_of_a_bin = prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]/prior_elements[mode]["MOG_distance"]["prior_frequencies"]#prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"]/(prior_elements[mode]["negative_interactions"]["distance"]["prior_frequencies"])# + prior_elements[mode]["positive_interactions"]["distance"]["prior_frequencies"])
			from prepare_interactions_clean import un_string

			def inter_enhancer(chrom):
				negative_interactions = config_variables.negative_interactions
				indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)[2:]

				if config_variables.disentagled_features_validation: 
					chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[chrom]
				else:
					chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[chrom]

				true_inter_pro = un_string(chr_interactions_pro_enh[:, :2]).astype(int)

				i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]
				interacting_enhancers = np.unique(j_s_t)-total_e
				return len(interacting_enhancers)
		
			arguments = [(mode_of_sampler, inter_enhancer(chrom_), option_correl__, chrom_, chain_number, continue_sampling) for chrom_ in chroms_in_prior for option_correl__ in selected_combinations if option_correl__ == option_correl_select]

			def calculate_kern(sample_, bins, band = "scott"):
				import kern_density_est

				prob_, bins_ = kern_density_est.kern_scipy_gaus(sample_, "g", bins, bandwidth = band, plot_atr = True)

				return prob_, bins_

			#config_variables.test_prior = False
			reload(config_variables)
			reload(MOG)
			#bla = p.map(MOG.executor, arguments)
			bla = []
			for i in arguments:
				bla += MOG.executor(i)
			#plt.hist(bla, bins = 200, normed=True)
			import plot_histograms_figures_MOG
			pr, bi = calculate_kern(bla, bins = prior_elements[mode]["MOG_distance"]["prior_bins"], band = 0.025)
			plot_histograms_figures_MOG.execute(prior_elements, bla, plot_atr, plot_atr_kernel)
			#plt.plot(bi,pr)
			plt.show()


	elif mode_of_code == "MK_PAIRWISE":	
		posterior_ = {}
		import classifiers_clean	

		if mode_of_sampler == "distance_prior":
			posterior_["positive_interactions"], posterior_["negative_interactions"] = classifiers_clean.MOG_classifier(mode_of_sampler, number_of_samples = number_of_samples, burn_in = burn_in, pairwise_number_in_pack = 150/2)
		else:
			posterior_["positive_interactions"], posterior_["negative_interactions"] = {}, {}
			comb = "_".join([dict_option[el] for el in option_correl_select])
			posterior_["positive_interactions"][comb], posterior_["negative_interactions"][comb] = classifiers_clean.MOG_classifier(mode_of_sampler, comb = comb, kappa_0 = kappa_0, mu_0 = mu_0 , alpha_0 = alpha_0, Beta_0 = Beta_0, number_of_samples = number_of_samples, burn_in = burn_in, chain = chain_number)

	elif mode_of_code == "convergence_checker":
		import convergence_checker as conv 
		conv.convergence_checker(number_of_samples_arr, burn_in_start)

	else:


		posterior = {}
		type_of_models = ["dist", "correl", "correl_dist"]
		if MoG_classificator: type_of_models += ["MOG_dist", "MOG_correl_dist"]

		for classification_of_interactions in ["positive_interactions", "negative_interactions"]: 
			posterior[classification_of_interactions] = {}
			for type_of_model in type_of_models:
				posterior[classification_of_interactions][type_of_model] = {}

		posterior["positive_interactions"]["dist"], posterior["negative_interactions"]["dist"] = classifiers_clean.posterior_producer([0], [])

		if "MOG_dist" in type_of_models: posterior["positive_interactions"]["MOG_dist"], posterior["negative_interactions"]["MOG_dist"] = classifiers_clean.MOG_classifier("distance_prior", number_of_samples = number_of_samples_dist, burn_in = burn_in_dist, chain = chain_number_dist) #infered_elements['promoter_enhancer_interactions']["positive_interactions"]["distance"]['probabilities_of_being_positive_interactions'], infered_elements['promoter_enhancer_interactions']["negative_interactions"]["distance"]['probabilities_of_being_positive_interactions']  #

		if MoG_classificator: combinations, selected_combinations = sel.selected_combinations("SELECTIVE")
		else: combinations, selected_combinations = sel.selected_combinations("ALL")

		for ind, option_ in enumerate(selected_combinations):
			comb = "_".join([dict_option[el] for el in option_])
			posterior["positive_interactions"]["correl_dist"][comb], posterior["negative_interactions"]["correl_dist"][comb] = classifiers_clean.posterior_producer([0], option_)
			posterior["positive_interactions"]["correl"][comb], posterior["negative_interactions"]["correl"][comb] = classifiers_clean.posterior_producer([], option_)		

			if "MOG_correl_dist" in type_of_models: posterior["positive_interactions"]["MOG_correl_dist"][comb], posterior["negative_interactions"]["MOG_correl_dist"][comb] = classifiers_clean.MOG_classifier("distance_MOG_empir_mu", comb = comb, kappa_0 = kappa_0, mu_0 = mu_0 , alpha_0 = alpha_0, Beta_0 = Beta_0, number_of_samples = number_of_samples_correl[ind], burn_in = burn_in_correl[ind], chain = chain_number_correl[ind])

		if mode_of_code == "ODD" or mode_of_code == "EVEN":

			#import PR_top
			#PR_top.execute()
			#import PR_top_MAP_dots
			import MAP_invoker
			#MAP_probabilites, infered_elements, match_MAP, sensitivity_match_MAP = MAP_invoker.executor(posterior, type_of_models)
			match_MAP, sensitivity_match_MAP, MAP_probabilites, infered_elements_MAP, probabilities_for_promoters_of_interacting_enhancers = MAP_invoker.executor(posterior, selected_combinations, type_of_models)
			import PR_top_MAP_dots_alternative_domain

			for PR_CURVES in np.array(["SELECTIVE", "ALL"])[:1]:
				if MoG_classificator and PR_CURVES == "ALL": continue
				#PR_top_MAP_dots_alternative_domain.execute(sensitivity_match_MAP, number_of_interacting_enhancers_ = np.sum([len(match_MAP["dist"][chrom_]) for chrom_ in chroms_to_infer]), option_to_plot = PR_CURVES, type_of_models=type_of_models, posterior_MOG = posterior, kappa_0=kappa_0, mu_0=mu_0, alpha_0=alpha_0, Beta_0=Beta_0, number_of_samples = [number_of_samples_dist] + number_of_samples_correl, burn_in = [burn_in_dist] + burn_in_correl)
				if MoG_classificator: type_of_models = ["correl_dist", "MOG_correl_dist","MOG_dist"]
				if MoG_classificator: type_of_models = ["dist", "MOG_dist"]

				PR_top_MAP_dots_alternative_domain.execute(sensitivity_match_MAP, number_of_interacting_enhancers_ = np.sum([len(match_MAP["dist"][chrom_]) for chrom_ in chroms_to_infer]), option_to_plot = PR_CURVES, type_of_models = type_of_models, posterior_MOG = posterior, kappa_0=kappa_0, mu_0=mu_0, alpha_0=alpha_0, Beta_0=Beta_0, number_of_samples = [number_of_samples_dist] + number_of_samples_correl, burn_in = [burn_in_dist] + burn_in_correl)#"correl_dist","MOG_correl_dist"]


		if mode_of_code == "FULL":
			#import MAP_clustering_labels_clean
			#MAP_clustering_labels_clean.executor(MAP_probabilites_correl_dist, infered_elements_correl_dist)

			import TOP_FDR_PR_gene_list_clean
			TOP_FDR_PR_gene_list_clean.executor(selection_option = option_for_predictive_FULL_mode)
			import TOP_FDR_PR_table_clean
			TOP_FDR_PR_table_clean.executor(selection_option = option_for_predictive_FULL_mode)
	
			import script_python_analys_PR
			script_python_analys_PR.executor(selection_option = option_for_predictive_FULL_mode, FDR_level = genes_predicted_with_FDR_for_GRO_seq_validation)

			#import MAP_interaction_plotter_clean
			#MAP_interaction_plotter_clean.executor(MAP_probabilites_correl_dist, infered_elements_correl_dist, match_MAP_correl_dist)
			import TOP_PR_interaction_plotter_clean
			TOP_PR_interaction_plotter_clean.executor(selection_option = option_for_predictive_FULL_mode, chrom_to_plot = TOP_PR_interaction_plotter_clean_chrom_to_plot, FDR_thresholds_to_plot = TOP_PR_interaction_plotter_FDR_thresholds_to_plot, calculate_number_of_within_domain_interactions = calculate_number_of_within_domain_interactions)
config_variables.dict_chrom_enh_survived = dict_chrom_enh_survived
config_variables.dict_chrom_enh_not_survived = dict_chrom_enh_not_survived

import prepare_interactions_clean

alternative_classificator_outside_enhancers = False
if alternative_classificator_outside_enhancers:
	f_name_2 = generator_executor.interactions_producer_filter(generator_mode, True, 2, TSS_or_intra_genic_for_domain_filter)
	chr_interactions_dict_pro_enh, chr_interactions_dict_enh_enh, dict_total_enh, dict_total_pro = prepare_interactions_clean.filter_true_interactions_of_promoters_and_enhancers_which_didnt_survive_filtering(f_name_2)


chr_interactions_dict_pro_enh, chr_interactions_dict_enh_enh, dict_total_enh, dict_total_pro = prepare_interactions_clean.filter_true_interactions_of_promoters_and_enhancers_which_didnt_survive_filtering(f_name)

from  prepare_interactions_clean import un_string
chrom_interacting_enhancers_pro = {}	
for chrom__ in chrom_names: chrom_interacting_enhancers_pro[chrom__] = np.unique(un_string(chr_interactions_dict_pro_enh[chrom__])[:,1])
config_variables.chrom_interacting_enhancers_pro = chrom_interacting_enhancers_pro


config_variables.dict_total_enh = dict_total_enh
config_variables.dict_total_pro = dict_total_pro
config_variables.chr_interactions_dict_pro_enh = chr_interactions_dict_pro_enh
config_variables.chr_interactions_dict_enh_enh = chr_interactions_dict_enh_enh

import chrom_specific_negative_interactions as negative_interactions

config_variables.negative_interactions = negative_interactions

import prior_producer
import classificator_clean
import prepare_upper_and_lower_bounds_for_priors as prior_bounds
	def interactions_above_threshold(posterior_t, posterior_f, threshold_up, threshold_low, chrom, domain = False):
		enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(chrom)
		
		length_chr = len(indexes_p) + len(indexes_e)
		interaction_matrix = np.zeros((length_chr, length_chr))
		posterior_t, posterior_f = posterior_t[chrom], posterior_f[chrom]

		i_s_f, j_s_f = positive_negative_interactions_for_MAP(chrom)

		if domain:
			if config_variables.TSS_or_intra_genic_for_domain_filter == "Intra_genic": coords_pro_domain = pro_coordinates[indexes_p]
			elif config_variables.TSS_or_intra_genic_for_domain_filter == "TSS_only": coords_pro_domain = np.column_stack((TSS_coordinates[indexes_p]-1, TSS_coordinates[indexes_p]+1))
			domain_matrix = interacting_domain.interacting_domains(coords_pro_domain, enh_coordinates[indexes_e], chrom, 'left', True)
			domain_matrix = domain_matrix + interacting_domain.interacting_domains(coords_pro_domain, enh_coordinates[indexes_e], chrom, 'right', True)
		else:
			domain_matrix = True

		if mode == "promoter_enhancer_interactions":

			chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
			true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
			i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]

			interaction_matrix[:,:] = np.min([np.min(posterior_t), np.min(posterior_f)])*0.999
			interaction_matrix[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = posterior_t
			interaction_matrix[i_s_f - total_p, j_s_f + len(indexes_p) - total_e] = posterior_f

			#--------------------------------------------------------------------------the part to look at for Paolo !
			np.save("interaction_matrix_float", interaction_matrix) #I'm saving the interaction matrix for you
	
			if normalised:
				norm_factors = np.sum(interaction_matrix, axis = 0)
				interaction_matrix = interaction_matrix/norm_factors
				interaction_matrix[np.isnan(interaction_matrix)] = 0.

			interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
			interacting_mask[i_s_t - total_p, j_s_t + len(indexes_p) - total_e] = True

			true_pro_enh_inter_filtered = np.where(interacting_mask * (interaction_matrix >= threshold_low) * (interaction_matrix < threshold_up))
			i_s_t_filt, j_s_t_filt = true_pro_enh_inter_filtered[0] + total_p, true_pro_enh_inter_filtered[1] - len(indexes_p) + total_e # that line tells you which of the positive (green) ChIA-PET-confirmed interactions lay within threshold_low, threshold_up interval

			interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
			interacting_mask[i_s_f - total_p, j_s_f + len(indexes_p) - total_e] = True

			false_pro_enh_inter_filtered = np.where(interacting_mask * (interaction_matrix >= threshold_low) * (interaction_matrix < threshold_up))
			i_s_f_filt, j_s_f_filt = false_pro_enh_inter_filtered[0] + total_p, false_pro_enh_inter_filtered[1] - len(indexes_p) + total_e # that line tells you which of the negative (gray) interactions lay within threshold_low, threshold_up interval

			#--------------------------------------------------------------------------the part to look at for Paolo !

			return	i_s_t_filt, j_s_t_filt, i_s_f_filt, j_s_f_filt # the function takes threshold_up, threshold_low as an argument and returns predicted interactions with probabilities within  [threshold_low, threshold_up) interval
			# i_s_t_filt, j_s_t_filt, i_s_f_filt, j_s_f_filt legend: i_s_t are promoters of ChIA-PET confirmed interactions, j_s_t are enhancers of the interactions, i_s_f, j_s_f are (promoters, enhancers) interactions which aren't ChIA-PET confirmed
			#for paolo

		if mode == "enhancer_enhancer_interactions":

			chr_interactions_dict_enh_enh = config_variables.chr_interactions_dict_enh_enh
			true_inter_enh = un_string(chr_interactions_dict_enh_enh[chrom][:, :2]).astype(int)
			i_s_t, j_s_t = true_inter_enh[:,0], true_inter_enh[:,1]

			interaction_matrix[:,:] = np.min([np.min(posterior_t), np.min(posterior_f)])*0.999
			interaction_matrix[i_s_t + len(indexes_p) - total_e, j_s_t + len(indexes_p) - total_e] = posterior_t
			interaction_matrix[i_s_f + len(indexes_p) - total_e, j_s_f + len(indexes_p) - total_e] = posterior_f
			interaction_matrix[j_s_t + len(indexes_p) - total_e, i_s_t + len(indexes_p) - total_e] = posterior_t # transpose to create a full matrix
			interaction_matrix[j_s_f + len(indexes_p) - total_e, i_s_f + len(indexes_p) - total_e] = posterior_f # transpose to create a full matrix


			if normalised: 
				norm_factors = np.sum(interaction_matrix, axis = 0)
				interaction_matrix = interaction_matrix/norm_factors
				interaction_matrix[np.isnan(interaction_matrix)] = 0.

			interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
			interacting_mask[i_s_t + len(indexes_p) - total_e, j_s_t + len(indexes_p) - total_e] = True

			true_enh_enh_inter_filtered = np.where(interacting_mask * (interaction_matrix >= threshold_low) * (interaction_matrix < threshold_up))
			i_s_t_filt, j_s_t_filt = true_enh_enh_inter_filtered[0] - len(indexes_p) + total_e, true_enh_enh_inter_filtered[1] - len(indexes_p) + total_e

			interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
			interacting_mask[i_s_f + len(indexes_p) - total_e, j_s_f + len(indexes_p) - total_e] = True

			false_enh_enh_inter_filtered = np.where(interacting_mask * (interaction_matrix >= threshold_low) * (interaction_matrix < threshold_up))
			i_s_f_filt, j_s_f_filt = false_enh_enh_inter_filtered[0] - len(indexes_p) + total_e, false_enh_enh_inter_filtered[1] - len(indexes_p) + total_e

			return i_s_t_filt, j_s_t_filt, i_s_f_filt, j_s_f_filt
Ejemplo n.º 30
0
    def filter_interactions_in_domain(posterior_t, posterior_f, chrom, domain,
                                      invert_domain):
        enh_coordinates, pro_coordinates, indexes_p, indexes_e, total_p, total_e = negative_interactions.initialise_variables(
            chrom)

        i_s_f, j_s_f = positive_negative_interactions_for_MAP(chrom)

        length_chr = len(indexes_p) + len(indexes_e)
        interaction_matrix = np.zeros((length_chr, length_chr))
        posterior_t, posterior_f = posterior_t[chrom], posterior_f[chrom]

        if domain:
            if config_variables.TSS_or_intra_genic_for_domain_filter == "Intra_genic":
                coords_pro_domain = pro_coordinates[indexes_p]
            elif config_variables.TSS_or_intra_genic_for_domain_filter == "TSS_only":
                coords_pro_domain = np.column_stack(
                    (TSS_coordinates[indexes_p] - 1,
                     TSS_coordinates[indexes_p] + 1))
            domain_matrix = interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, 'left',
                True)
            domain_matrix = domain_matrix + interacting_domain.interacting_domains(
                coords_pro_domain, enh_coordinates[indexes_e], chrom, 'right',
                True)
            if invert_domain: domain_matrix = np.invert(domain_matrix)

        else:
            domain_matrix = True

        if mode == "promoter_enhancer_interactions":

            #chr_interactions_dict_pro_enh = config_variables.chr_interactions_dict_pro_enh
            #true_inter_pro = un_string(chr_interactions_dict_pro_enh[chrom][:, :2]).astype(int)
            #i_s_t, j_s_t = true_inter_pro[:,0], true_inter_pro[:,1]

            if config_variables.disentagled_features_validation:
                chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh_TSS[
                    chrom]
            else:
                chr_interactions_pro_enh = config_variables.chr_interactions_dict_pro_enh[
                    chrom]

            true_inter_pro = un_string(
                chr_interactions_pro_enh[:, :2]).astype(int)
            i_s_t, j_s_t = true_inter_pro[:, 0], true_inter_pro[:, 1]

            interaction_matrix[i_s_t - total_p,
                               j_s_t + len(indexes_p) - total_e] = posterior_t
            interaction_matrix[i_s_f - total_p,
                               j_s_f + len(indexes_p) - total_e] = posterior_f

            interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
            interacting_mask[i_s_t - total_p,
                             j_s_t + len(indexes_p) - total_e] = True

            true_pro_enh_inter_filtered = interacting_mask * domain_matrix

            print np.sum(true_pro_enh_inter_filtered)

            chrom_posterior_t_filtered = interaction_matrix[
                true_pro_enh_inter_filtered]

            interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
            interacting_mask[i_s_f - total_p,
                             j_s_f + len(indexes_p) - total_e] = True

            false_pro_enh_inter_filtered = interacting_mask * domain_matrix

            print np.sum(false_pro_enh_inter_filtered)

            chrom_posterior_f_filtered = interaction_matrix[
                false_pro_enh_inter_filtered]

            return chrom_posterior_t_filtered, chrom_posterior_f_filtered

        if mode == "enhancer_enhancer_interactions":

            chr_interactions_dict_enh_enh = config_variables.chr_interactions_dict_enh_enh
            true_inter_enh = un_string(
                chr_interactions_dict_enh_enh[chrom][:, :2]).astype(int)
            i_s_t, j_s_t = true_inter_enh[:, 0], true_inter_enh[:, 1]

            interaction_matrix[i_s_t + len(indexes_p) - total_e,
                               j_s_t + len(indexes_p) - total_e] = posterior_t
            interaction_matrix[i_s_f + len(indexes_p) - total_e,
                               j_s_f + len(indexes_p) - total_e] = posterior_f
            interaction_matrix[
                j_s_t + len(indexes_p) - total_e, i_s_t + len(indexes_p) -
                total_e] = posterior_t  # transpose to create a full matrix
            interaction_matrix[
                j_s_f + len(indexes_p) - total_e, i_s_f + len(indexes_p) -
                total_e] = posterior_f  # transpose to create a full matrix

            interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
            interacting_mask[i_s_t + len(indexes_p) - total_e,
                             j_s_t + len(indexes_p) - total_e] = True

            true_enh_enh_inter_filtered = interacting_mask * domain_matrix
            chrom_posterior_t_filtered = interaction_matrix[
                true_enh_enh_inter_filtered]

            interacting_mask = np.zeros_like(interaction_matrix).astype(bool)
            interacting_mask[i_s_f + len(indexes_p) - total_e,
                             j_s_f + len(indexes_p) - total_e] = True

            false_enh_enh_inter_filtered = interacting_mask * domain_matrix
            chrom_posterior_f_filtered = interaction_matrix[
                false_enh_enh_inter_filtered]

            return chrom_posterior_t_filtered, chrom_posterior_f_filtered