Esempio n. 1
0
def getMutualInfoForPair(pair_count_dict):
    """
    For calculating the mutual information and suffled mutual information between two spike counts.
    Arguments:  pair_count_dict, int => numpy array int, the two keys is the pair, the values are the spike counts
    Returns:    plugin_mi, float, plugin mutual information
                plugin_shuff_mi, float, plugin shuffled mutual information
    """
    first_spike_counts, second_spike_counts = np.array(
        list(pair_count_dict.values()))
    first_response_alphabet = np.arange(first_spike_counts.max() + 1)
    second_response_alphabet = np.arange(second_spike_counts.max() + 1)
    plugin_mi = np.max([
        0,
        drv.information_mutual(X=first_spike_counts,
                               Y=second_spike_counts,
                               Alphabet_X=first_response_alphabet,
                               Alphabet_Y=second_response_alphabet)
    ])
    bias_estimate = getPTBiasEstimate(first_spike_counts, second_spike_counts)
    bias_corrected_mi = np.max([0, plugin_mi + bias_estimate])
    np.random.shuffle(second_spike_counts)
    plugin_shuff_mi = np.max([
        0,
        drv.information_mutual(X=first_spike_counts,
                               Y=second_spike_counts,
                               Alphabet_X=first_response_alphabet,
                               Alphabet_Y=second_response_alphabet)
    ])
    return plugin_mi, plugin_shuff_mi, bias_corrected_mi
Esempio n. 2
0
def get_mutual_information_dd(x, y):
    '''
    Returns the mutual information between two continuous vectors x and y.
    Each variable is a matrix = array(n_samples, n_features)
    where
      n_samples = number of samples
      n_features = number of features

    Parameters
    ----------
    X : array-like, shape (m samples, n features)
        The data used for mutual information calculation.
    Y : array-like, shape (m samples, 1 features)
        The data used for mutual information calculation.
    k : int, optional (default is 5)
        number of nearest neighbors for density estimation

    Returns
    -------
        Mutual information between two variables.

    Example
    -------
        get_mutual_information((X, Y))
    '''
    return drv.information_mutual(x.reshape(len(x)),
                                  y.reshape(len(y)),
                                  cartesian_product=True)
Esempio n. 3
0
def detect(X, M):
    npatches = np.size(X, 0)
    score = np.zeros(npatches)
    for i in range(npatches):
        score[i] = drv.information_mutual(X[i, :], M)
    L = np.argmax(score)
    return (L, score)
Esempio n. 4
0
    def f(self, chromosome):
        num_genes = len(chromosome[0]) # columns
        y=[]
        #predict
        for i in range(len(chromosome)):
            y.append(self.classifier.predict([chromosome[i]])[0])
        
        #compute mutual array using y
        M=[]
        ch = np.array(chromosome)
        
        for i in range(num_genes):
            # int type problem!!
            M.append(drv.information_mutual(ch[:,i],np.array(y),cartesian_product=True))
            #M.append(mutual_info_score(ch[:,i],np.array(y),contingency=None))
            #M.append(normalized_mutual_info_score(ch[:,i],np.array(y),average_method='arithmetic'))
            #M.append(mutual_info_regression(ch[:,i],np.array(y),discrete_features='auto'))

        den = 0
        num = 0
        res = 0
        threshold = sum(M) * 0.4
        M.sort(reverse=True)

        #den = 0
        while num < threshold:
            num=num+M[den]
            den=den+1
        if den!=0:
            res = num/den
        self.stat.append([res,den])
            #print("MI:",res)
            #print("num features:",den)
        return res
Esempio n. 5
0
    def MI(self,df):
        result = 0.00
        listt = df.keys()
        for item in tqdm(df):
            j = 1
            for i in listt[j:listt.__len__() - 1]:
                '''
                data1 = df[item]
                data2 = df[i]
                pd_series_H1 = pd.Series(data1)
                pd_series_H2 = pd.Series(data2)
                pd_series = pd.Series(data1, data2)
                counts_H1 = pd_series_H1.value_counts()
                counts_H2 = pd_series_H2.value_counts()
                counts = pd_series.value_counts()
                entropy_H1 = stats.entropy(counts_H1 / sum(counts_H1), base=2)
                entropy_H2 = stats.entropy(counts_H2 / sum(counts_H2), base=2)
                entropy = stats.entropy(counts / sum(counts), base=2)
                result = result + entropy_H1 + entropy_H2 - entropy
                # print(dictionary[str([item,i])] )
                j = j + 1
                '''

                result = result + drv.information_mutual(df[item].values, df[i].values)
        return result
def get_mutual_information(x, y, normalize=True):
    ''' Compute mutual information between two random variables
    
    :param x:      random variable
    :param y:      random variable
    '''
    if normalize:
        return drv.information_mutual_normalised(x, y, norm_factor='Y', cartesian_product=True)
    else:
        return drv.information_mutual(x, y, cartesian_product=True)
    def compute_mutual_information(data_points: np.ndarray,
                                   hamming_weights: np.ndarray) -> int:
        """ This method computes the mutual information of two discrete data sets using Shannon entropy

        :param data_points: data points to compute the mutual information on
        :param hamming_weights: hamming weights used
        :return: mutual information value
        """

        return drv.information_mutual(data_points, hamming_weights)
Esempio n. 8
0
    def compute_discrete_Lmeasure(self):
        """Function to compute the un-normalized L-measure between the all the 
        discrete feature pairs. The value for all the possible pairs is stored
        in the L_measures dict. Auxiliary values like the mutual information
        (I_mutinfo) are also in their respective dicts for all the possible pairs.        
        This method sets the `feats_pairs_dict` class attribute.

        Args:
            None
        
        Returns:
            None
        """
        # TAKE note: the function expects the array to be in a transpose form
        indi_entropies = drv.entropy(self.data_arr.T,
                                     estimator=self.ent_estimator)
        # indi_entropies = drv.entropy(self.data_arr.T)
        num_rand = self.data_arr.shape[
            1]  # Number of random variables (feature columns)
        assert num_rand == len(indi_entropies)

        L_measures = {}  # Dictionary storing the pairwise L-measures
        I_mutinfo = {}  # Dictionary storing the pairwise mutual information
        # mu_vals = {}        # Dictionary storing the pairwise MU values

        for i in range(num_rand):
            for j in range(i + 1, num_rand):
                key = (i, j)  # since 0-indexed
                h_i = indi_entropies[i]
                h_j = indi_entropies[j]

                # mu_ij = self.get_discrete_mu(i, j)

                # Potential error: I_ij may come out negative depending on the estiamtor
                I_ij = drv.information_mutual(self.data_arr.T[i],
                                              self.data_arr.T[j],
                                              estimator=self.ent_estimator)
                W_ij = min(h_i, h_j)

                num = (-2.0 * I_ij * W_ij)
                den = (W_ij - I_ij)
                eps = 1e-9  # epsilon value for denominator
                inner_exp_term = num / (den + eps)
                # removing numerical errors by upper bounding exponent by 0
                inner_exp_term = min(0, inner_exp_term)

                L_measures[key] = np.sqrt(1 - np.exp(inner_exp_term))
                I_mutinfo[key] = I_ij

                # print(I_ij, W_ij, num, den)
                # print(key, L_measures[key], inner_exp_term)
                # print('\n')

        self.L_measure_dict = L_measures
        return
Esempio n. 9
0
def Diagonal_Matrix1(X, data):  # Output Class is Categorical String
    NOF = X.shape[1]
    D = np.zeros([NOF, NOF])
    class_label_encoder = LabelEncoder()
    labeled_class = class_label_encoder.fit_transform(data.iloc[:, NOF])
    labeled_class = labeled_class + 1
    print(labeled_class)
    for i in range(NOF):
        diag_element = drv.information_mutual(X[:, i], labeled_class)
        D[i, i] = diag_element
    return D
    def calculate_weights(self, discretized_data: pd.DataFrame):
        """
        Provide calculation of link strength according mutual information between node and its parent(-s) values.
        """
        import bamt.utils.GraphUtils as gru
        if not all([
                i in ['disc', 'disc_num']
                for i in gru.nodes_types(discretized_data).values()
        ]):
            logger_network.error(
                f"calculate_weghts() method deals only with discrete data. Continuous data: "
                +
                f"{[col for col, type in gru.nodes_types(discretized_data).items() if type not in ['disc', 'disc_num']]}"
            )
        if not self.edges:
            logger_network.error(
                "Bayesian Network hasn't fitted yet. Please add edges with add_edges() method"
            )
        if not self.nodes:
            logger_network.error(
                "Bayesian Network hasn't fitted yet. Please add nodes with add_nodes() method"
            )
        weights = dict()

        for node in self.nodes:
            parents = node.cont_parents + node.disc_parents
            if parents is None:
                continue
            y = discretized_data[node.name].values
            if len(parents) == 1:
                x = discretized_data[parents[0]].values
                LS_true = drv.information_mutual(X=y, Y=x)
                entropy = drv.entropy(X=y)
                weight = LS_true / entropy
                weights[(parents[0], node.name)] = weight
            else:
                for parent_node in parents:
                    x = discretized_data[parent_node].values
                    other_parents = [
                        tmp for tmp in parents if tmp != parent_node
                    ]
                    z = list()
                    for other_parent in other_parents:
                        z.append(list(discretized_data[other_parent].values))
                    LS_true = np.average(
                        drv.information_mutual_conditional(
                            X=y, Y=x, Z=z, cartesian_product=True))
                    entropy = np.average(
                        drv.entropy_conditional(
                            X=y, Y=z, cartesian_product=True)) + 1e-8
                    weight = LS_true / entropy
                    weights[(parent_node, node.name)] = weight
        self.weights = weights
Esempio n. 11
0
    def mi_ensemble_bound(self, individual_predictions, this_y=None, ensemble_predictions=None):
        """
        Estimate the BER using the Mutual Information-Based Correlation in
        Tumer and Ghosh (2003).

        Parameters
        ----------
            individual_predictions: numpy array
                The dimensions of this array should be |M| by |E|, where
                |M| is the number of labeled data points and |E| is the number
                of individual classifiers. Each element should be a probability
                (not a 0/1 prediction).)
        """
        if this_y is None:
            this_y = self.y
        if ensemble_predictions is None:
            avg_predictor = individual_predictions.mean(axis=1).round()
        else:
            avg_predictor = ensemble_predictions.round()
        individual_predictions = individual_predictions.round() # deal with 0/1 predictions
        N = individual_predictions.shape[1]  # number of classifiers in ensemble
        labels = np.repeat(this_y.reshape(-1, 1), N, axis=1)
        accs = np.equal(individual_predictions, labels).mean(axis=0) # mean accuracy for each classifier
        mean_err = 1 - accs.mean() # mean err for all classifiers
        ensemble_err = 1 - (this_y == avg_predictor).mean() # mean err for ensemble classifier

        # calculate average mutual information between each individual classifier's
        # predictions and the ensemble predictor
        ami = drv.information_mutual(
            individual_predictions.T,
            avg_predictor.reshape(1, -1),
            base=np.e,
            cartesian_product=True
        ).mean()
        # TODO: should we measure total entropy by discretizing the classification
        # probabilities into more granular bins? Currently we just use the
        # 0 / 1 matrix
        # total entropy in the individual classifiers
        total_entropy = drv.entropy_joint(individual_predictions.T, base=np.e)
        # delta is the normalized ami
        delta = ami / total_entropy
        assert delta >= 0
        assert delta <= 1
        # formula from Tumer and Ghosh
        be = (N * ensemble_err - ((N - 1) * delta + 1) * mean_err ) / ((N - 1) * (1 - delta))
        return be
Esempio n. 12
0
def SU(numero, feature, solution):

    #print numero
    #Hfr = drv.entropy(feature, fill_value=None)
    #print "Entropy  FR:{0:.3f}".format(Hfr)
    #Hfr_sol = drv.entropy_conditional(feature, solution, fill_value=None)
    #print "Entropy  FR|I:{0:.3f}".format(Hfr_sol)
    #HI = drv.entropy(solution, fill_value=None)
    #print "Entropy I:{0:.3f}".format(HI)
    featureDisc = pd.cut(feature, 30, labels=False)
    IG = drv.information_mutual(featureDisc, solution)
    #print "IG:{0:.3f}".format(IG)
    #IG = Hfr-Hfr_sol
    #print IG
    den = drv.entropy(featureDisc) + drv.entropy(solution)
    #print "Den:{0:.3f}".format(den)
    result = 2 * (IG / den)
    #print "Result:{0:.3f}".format(result)
    return result
Esempio n. 13
0
    def notselectedMI(self,NS_item,df):
        result = 0.00

        for i in df:
            """
            data1 = NS_item
            data2 = df[i]
            
            pd_series_H1 = pd.Series(data1)
            pd_series_H2 = pd.Series(data2)
            pd_series = pd.Series(data1, data2)
            counts_H1 = pd_series_H1.value_counts()
            counts_H2 = pd_series_H2.value_counts()
            counts = pd_series.value_counts()
            entropy_H1 = stats.entropy(counts_H1 / sum(counts_H1), base=2)
            entropy_H2 = stats.entropy(counts_H2 / sum(counts_H2), base=2)
            entropy = stats.entropy(counts / sum(counts), base=2)
            result = result + entropy_H1 + entropy_H2 - entropy
            """
            X = NS_item.values
            Y = df[i].values
            result = result + drv.information_mutual(X, Y)

        return result
Esempio n. 14
0
 def MI_with_y(X):
     return drv.information_mutual(X, y_tmp)
 def calculate_mutual_information(self, Y):
     """the reduction in uncertainty of X given Y"""
     return drv.information_mutual(Y, self.__mean_histogram_from_data, 2)
 def calculate_mutual_information_static(X, Y):
     """the reduction in uncertainty of X given Y"""
     return drv.information_mutual(X, Y, 2)
Esempio n. 17
0
def Mutual_Information2(X):
    mutual_info = drv.information_mutual(X.T)
    for i in range(mutual_info.shape[0]):
        mutual_info[i, i] = 0
    return mutual_info
#pos = nx.spring_layout(G)
#nx.draw_networkx(G,pos,node_size=5,alpha=0.5,with_labels=False)
#plt.show()

partition = community.best_partition(G)
#print partition

part = collections.OrderedDict(
    sorted(partition.items(), key=lambda x: int(x[0])))

rever = part
#print rever

lista = [v for v in rever.values()]
#print lista

communes = []

for i in range(16):
    for j in range(40):
        communes.append(i)

#print communes
c = drv.entropy(lista)
eta = drv.information_mutual(lista, communes)
eta = eta / c
print p, eta

#nmi = normalized_mutual_info_score(lista,communes,average_method='arithmetic')
#print p,nmi
Esempio n. 19
0
def Mutual_Information1(X):
    mutual_info = drv.information_mutual(X.T)
    return mutual_info
Esempio n. 20
0
img_median_filtered = img_as_ubyte(median(img_noisy, disk(1)))
median_im_entropy, _, _ = original_im_histogram.information_entropy(
    img_median_filtered)
_, median_histogram = original_im_histogram.image_histogram(
    img_median_filtered, "on")

img_bilateral_filtered = img_as_ubyte(
    mean_bilateral(img_noisy, disk(5), s0=10, s1=10))
bilateral_im_entropy, _, _ = original_im_histogram.information_entropy(
    img_bilateral_filtered)
_, bilateral_histogram = original_im_histogram.image_histogram(
    img_bilateral_filtered, "on")

information_mutual_original_noisy = np.round(
    drv.information_mutual(original_histogram, noisy_histogram), 2)
information_mutual_original_noisy_filtered_by_gauss = np.round(
    drv.information_mutual(original_histogram, gauss_histogram), 2)
information_mutual_original_noisy_filtered_by_median = np.round(
    drv.information_mutual(original_histogram, median_histogram), 2)
information_mutual_original_noisy_filtered_by_bilateral = np.round(
    drv.information_mutual(original_histogram, bilateral_histogram), 2)

object_finder.add_image(img)
object_finder.image_entropy_analysis_for_testing(None)
img_detected = object_finder.get_image()

object_finder.add_image(img2)
object_finder.image_entropy_analysis_for_testing(None)
img_detected2 = object_finder.get_image()