def getMutualInfoForPair(pair_count_dict): """ For calculating the mutual information and suffled mutual information between two spike counts. Arguments: pair_count_dict, int => numpy array int, the two keys is the pair, the values are the spike counts Returns: plugin_mi, float, plugin mutual information plugin_shuff_mi, float, plugin shuffled mutual information """ first_spike_counts, second_spike_counts = np.array( list(pair_count_dict.values())) first_response_alphabet = np.arange(first_spike_counts.max() + 1) second_response_alphabet = np.arange(second_spike_counts.max() + 1) plugin_mi = np.max([ 0, drv.information_mutual(X=first_spike_counts, Y=second_spike_counts, Alphabet_X=first_response_alphabet, Alphabet_Y=second_response_alphabet) ]) bias_estimate = getPTBiasEstimate(first_spike_counts, second_spike_counts) bias_corrected_mi = np.max([0, plugin_mi + bias_estimate]) np.random.shuffle(second_spike_counts) plugin_shuff_mi = np.max([ 0, drv.information_mutual(X=first_spike_counts, Y=second_spike_counts, Alphabet_X=first_response_alphabet, Alphabet_Y=second_response_alphabet) ]) return plugin_mi, plugin_shuff_mi, bias_corrected_mi
def get_mutual_information_dd(x, y): ''' Returns the mutual information between two continuous vectors x and y. Each variable is a matrix = array(n_samples, n_features) where n_samples = number of samples n_features = number of features Parameters ---------- X : array-like, shape (m samples, n features) The data used for mutual information calculation. Y : array-like, shape (m samples, 1 features) The data used for mutual information calculation. k : int, optional (default is 5) number of nearest neighbors for density estimation Returns ------- Mutual information between two variables. Example ------- get_mutual_information((X, Y)) ''' return drv.information_mutual(x.reshape(len(x)), y.reshape(len(y)), cartesian_product=True)
def detect(X, M): npatches = np.size(X, 0) score = np.zeros(npatches) for i in range(npatches): score[i] = drv.information_mutual(X[i, :], M) L = np.argmax(score) return (L, score)
def f(self, chromosome): num_genes = len(chromosome[0]) # columns y=[] #predict for i in range(len(chromosome)): y.append(self.classifier.predict([chromosome[i]])[0]) #compute mutual array using y M=[] ch = np.array(chromosome) for i in range(num_genes): # int type problem!! M.append(drv.information_mutual(ch[:,i],np.array(y),cartesian_product=True)) #M.append(mutual_info_score(ch[:,i],np.array(y),contingency=None)) #M.append(normalized_mutual_info_score(ch[:,i],np.array(y),average_method='arithmetic')) #M.append(mutual_info_regression(ch[:,i],np.array(y),discrete_features='auto')) den = 0 num = 0 res = 0 threshold = sum(M) * 0.4 M.sort(reverse=True) #den = 0 while num < threshold: num=num+M[den] den=den+1 if den!=0: res = num/den self.stat.append([res,den]) #print("MI:",res) #print("num features:",den) return res
def MI(self,df): result = 0.00 listt = df.keys() for item in tqdm(df): j = 1 for i in listt[j:listt.__len__() - 1]: ''' data1 = df[item] data2 = df[i] pd_series_H1 = pd.Series(data1) pd_series_H2 = pd.Series(data2) pd_series = pd.Series(data1, data2) counts_H1 = pd_series_H1.value_counts() counts_H2 = pd_series_H2.value_counts() counts = pd_series.value_counts() entropy_H1 = stats.entropy(counts_H1 / sum(counts_H1), base=2) entropy_H2 = stats.entropy(counts_H2 / sum(counts_H2), base=2) entropy = stats.entropy(counts / sum(counts), base=2) result = result + entropy_H1 + entropy_H2 - entropy # print(dictionary[str([item,i])] ) j = j + 1 ''' result = result + drv.information_mutual(df[item].values, df[i].values) return result
def get_mutual_information(x, y, normalize=True): ''' Compute mutual information between two random variables :param x: random variable :param y: random variable ''' if normalize: return drv.information_mutual_normalised(x, y, norm_factor='Y', cartesian_product=True) else: return drv.information_mutual(x, y, cartesian_product=True)
def compute_mutual_information(data_points: np.ndarray, hamming_weights: np.ndarray) -> int: """ This method computes the mutual information of two discrete data sets using Shannon entropy :param data_points: data points to compute the mutual information on :param hamming_weights: hamming weights used :return: mutual information value """ return drv.information_mutual(data_points, hamming_weights)
def compute_discrete_Lmeasure(self): """Function to compute the un-normalized L-measure between the all the discrete feature pairs. The value for all the possible pairs is stored in the L_measures dict. Auxiliary values like the mutual information (I_mutinfo) are also in their respective dicts for all the possible pairs. This method sets the `feats_pairs_dict` class attribute. Args: None Returns: None """ # TAKE note: the function expects the array to be in a transpose form indi_entropies = drv.entropy(self.data_arr.T, estimator=self.ent_estimator) # indi_entropies = drv.entropy(self.data_arr.T) num_rand = self.data_arr.shape[ 1] # Number of random variables (feature columns) assert num_rand == len(indi_entropies) L_measures = {} # Dictionary storing the pairwise L-measures I_mutinfo = {} # Dictionary storing the pairwise mutual information # mu_vals = {} # Dictionary storing the pairwise MU values for i in range(num_rand): for j in range(i + 1, num_rand): key = (i, j) # since 0-indexed h_i = indi_entropies[i] h_j = indi_entropies[j] # mu_ij = self.get_discrete_mu(i, j) # Potential error: I_ij may come out negative depending on the estiamtor I_ij = drv.information_mutual(self.data_arr.T[i], self.data_arr.T[j], estimator=self.ent_estimator) W_ij = min(h_i, h_j) num = (-2.0 * I_ij * W_ij) den = (W_ij - I_ij) eps = 1e-9 # epsilon value for denominator inner_exp_term = num / (den + eps) # removing numerical errors by upper bounding exponent by 0 inner_exp_term = min(0, inner_exp_term) L_measures[key] = np.sqrt(1 - np.exp(inner_exp_term)) I_mutinfo[key] = I_ij # print(I_ij, W_ij, num, den) # print(key, L_measures[key], inner_exp_term) # print('\n') self.L_measure_dict = L_measures return
def Diagonal_Matrix1(X, data): # Output Class is Categorical String NOF = X.shape[1] D = np.zeros([NOF, NOF]) class_label_encoder = LabelEncoder() labeled_class = class_label_encoder.fit_transform(data.iloc[:, NOF]) labeled_class = labeled_class + 1 print(labeled_class) for i in range(NOF): diag_element = drv.information_mutual(X[:, i], labeled_class) D[i, i] = diag_element return D
def calculate_weights(self, discretized_data: pd.DataFrame): """ Provide calculation of link strength according mutual information between node and its parent(-s) values. """ import bamt.utils.GraphUtils as gru if not all([ i in ['disc', 'disc_num'] for i in gru.nodes_types(discretized_data).values() ]): logger_network.error( f"calculate_weghts() method deals only with discrete data. Continuous data: " + f"{[col for col, type in gru.nodes_types(discretized_data).items() if type not in ['disc', 'disc_num']]}" ) if not self.edges: logger_network.error( "Bayesian Network hasn't fitted yet. Please add edges with add_edges() method" ) if not self.nodes: logger_network.error( "Bayesian Network hasn't fitted yet. Please add nodes with add_nodes() method" ) weights = dict() for node in self.nodes: parents = node.cont_parents + node.disc_parents if parents is None: continue y = discretized_data[node.name].values if len(parents) == 1: x = discretized_data[parents[0]].values LS_true = drv.information_mutual(X=y, Y=x) entropy = drv.entropy(X=y) weight = LS_true / entropy weights[(parents[0], node.name)] = weight else: for parent_node in parents: x = discretized_data[parent_node].values other_parents = [ tmp for tmp in parents if tmp != parent_node ] z = list() for other_parent in other_parents: z.append(list(discretized_data[other_parent].values)) LS_true = np.average( drv.information_mutual_conditional( X=y, Y=x, Z=z, cartesian_product=True)) entropy = np.average( drv.entropy_conditional( X=y, Y=z, cartesian_product=True)) + 1e-8 weight = LS_true / entropy weights[(parent_node, node.name)] = weight self.weights = weights
def mi_ensemble_bound(self, individual_predictions, this_y=None, ensemble_predictions=None): """ Estimate the BER using the Mutual Information-Based Correlation in Tumer and Ghosh (2003). Parameters ---------- individual_predictions: numpy array The dimensions of this array should be |M| by |E|, where |M| is the number of labeled data points and |E| is the number of individual classifiers. Each element should be a probability (not a 0/1 prediction).) """ if this_y is None: this_y = self.y if ensemble_predictions is None: avg_predictor = individual_predictions.mean(axis=1).round() else: avg_predictor = ensemble_predictions.round() individual_predictions = individual_predictions.round() # deal with 0/1 predictions N = individual_predictions.shape[1] # number of classifiers in ensemble labels = np.repeat(this_y.reshape(-1, 1), N, axis=1) accs = np.equal(individual_predictions, labels).mean(axis=0) # mean accuracy for each classifier mean_err = 1 - accs.mean() # mean err for all classifiers ensemble_err = 1 - (this_y == avg_predictor).mean() # mean err for ensemble classifier # calculate average mutual information between each individual classifier's # predictions and the ensemble predictor ami = drv.information_mutual( individual_predictions.T, avg_predictor.reshape(1, -1), base=np.e, cartesian_product=True ).mean() # TODO: should we measure total entropy by discretizing the classification # probabilities into more granular bins? Currently we just use the # 0 / 1 matrix # total entropy in the individual classifiers total_entropy = drv.entropy_joint(individual_predictions.T, base=np.e) # delta is the normalized ami delta = ami / total_entropy assert delta >= 0 assert delta <= 1 # formula from Tumer and Ghosh be = (N * ensemble_err - ((N - 1) * delta + 1) * mean_err ) / ((N - 1) * (1 - delta)) return be
def SU(numero, feature, solution): #print numero #Hfr = drv.entropy(feature, fill_value=None) #print "Entropy FR:{0:.3f}".format(Hfr) #Hfr_sol = drv.entropy_conditional(feature, solution, fill_value=None) #print "Entropy FR|I:{0:.3f}".format(Hfr_sol) #HI = drv.entropy(solution, fill_value=None) #print "Entropy I:{0:.3f}".format(HI) featureDisc = pd.cut(feature, 30, labels=False) IG = drv.information_mutual(featureDisc, solution) #print "IG:{0:.3f}".format(IG) #IG = Hfr-Hfr_sol #print IG den = drv.entropy(featureDisc) + drv.entropy(solution) #print "Den:{0:.3f}".format(den) result = 2 * (IG / den) #print "Result:{0:.3f}".format(result) return result
def notselectedMI(self,NS_item,df): result = 0.00 for i in df: """ data1 = NS_item data2 = df[i] pd_series_H1 = pd.Series(data1) pd_series_H2 = pd.Series(data2) pd_series = pd.Series(data1, data2) counts_H1 = pd_series_H1.value_counts() counts_H2 = pd_series_H2.value_counts() counts = pd_series.value_counts() entropy_H1 = stats.entropy(counts_H1 / sum(counts_H1), base=2) entropy_H2 = stats.entropy(counts_H2 / sum(counts_H2), base=2) entropy = stats.entropy(counts / sum(counts), base=2) result = result + entropy_H1 + entropy_H2 - entropy """ X = NS_item.values Y = df[i].values result = result + drv.information_mutual(X, Y) return result
def MI_with_y(X): return drv.information_mutual(X, y_tmp)
def calculate_mutual_information(self, Y): """the reduction in uncertainty of X given Y""" return drv.information_mutual(Y, self.__mean_histogram_from_data, 2)
def calculate_mutual_information_static(X, Y): """the reduction in uncertainty of X given Y""" return drv.information_mutual(X, Y, 2)
def Mutual_Information2(X): mutual_info = drv.information_mutual(X.T) for i in range(mutual_info.shape[0]): mutual_info[i, i] = 0 return mutual_info
#pos = nx.spring_layout(G) #nx.draw_networkx(G,pos,node_size=5,alpha=0.5,with_labels=False) #plt.show() partition = community.best_partition(G) #print partition part = collections.OrderedDict( sorted(partition.items(), key=lambda x: int(x[0]))) rever = part #print rever lista = [v for v in rever.values()] #print lista communes = [] for i in range(16): for j in range(40): communes.append(i) #print communes c = drv.entropy(lista) eta = drv.information_mutual(lista, communes) eta = eta / c print p, eta #nmi = normalized_mutual_info_score(lista,communes,average_method='arithmetic') #print p,nmi
def Mutual_Information1(X): mutual_info = drv.information_mutual(X.T) return mutual_info
img_median_filtered = img_as_ubyte(median(img_noisy, disk(1))) median_im_entropy, _, _ = original_im_histogram.information_entropy( img_median_filtered) _, median_histogram = original_im_histogram.image_histogram( img_median_filtered, "on") img_bilateral_filtered = img_as_ubyte( mean_bilateral(img_noisy, disk(5), s0=10, s1=10)) bilateral_im_entropy, _, _ = original_im_histogram.information_entropy( img_bilateral_filtered) _, bilateral_histogram = original_im_histogram.image_histogram( img_bilateral_filtered, "on") information_mutual_original_noisy = np.round( drv.information_mutual(original_histogram, noisy_histogram), 2) information_mutual_original_noisy_filtered_by_gauss = np.round( drv.information_mutual(original_histogram, gauss_histogram), 2) information_mutual_original_noisy_filtered_by_median = np.round( drv.information_mutual(original_histogram, median_histogram), 2) information_mutual_original_noisy_filtered_by_bilateral = np.round( drv.information_mutual(original_histogram, bilateral_histogram), 2) object_finder.add_image(img) object_finder.image_entropy_analysis_for_testing(None) img_detected = object_finder.get_image() object_finder.add_image(img2) object_finder.image_entropy_analysis_for_testing(None) img_detected2 = object_finder.get_image()