def entropy(seq: Union[List[int], List[float]], total: int = None) -> float: """Calculate Shannon Entropy for a sequence of Floats or Integers. If Floats, check they are probabilities If Integers, divide each n in seq by total and calculate entropy Args: seq (Union[List[int], List[float]]): Sequence to calculate entropy for total (int, optional): Total to divide by for List of int Raises: ValueError: If seq is not valid Returns: float: Entropy for sequence """ if not seq: raise ValueError("Pass a valid non-empty sequence") if isinstance(seq[0], float): e = scipy_entropy(seq) elif isinstance(seq[0], int): e = scipy_entropy(get_probs_from_counts(seq)) else: raise ValueError( "Parameter seq must be a sequence of probabilites or integers.") return e
def jensen_shannon_distance(p, q): """Jenson-Shannon Distance between two probability distributions""" p = np.array(p) q = np.array(q) m = (p + q) / 2 divergence = (scipy_entropy(p, m) + scipy_entropy(q, m)) / 2 distance = np.sqrt(divergence) return distance
def shannon_entropy(image, base=2): """Calculate the Shannon entropy of an image. The Shannon entropy is defined as S = -sum(pk * log(pk)), where pk are the number of pixels of value k. Parameters ---------- image : (N, M) ndarray Grayscale input image. base : float, optional The logarithmic base to use. Returns ------- entropy : float Notes ----- The returned value is measured in bits or shannon (Sh) for base=2, natural unit (nat) for base=np.e and hartley (Hart) for base=10. References ---------- .. [1] https://en.wikipedia.org/wiki/Entropy_(information_theory) .. [2] https://en.wiktionary.org/wiki/Shannon_entropy Examples -------- >>> from skimage import data >>> shannon_entropy(data.camera()) 17.732031303342747 """ return scipy_entropy(image.ravel(), base=base)
def H(self, occs): ''' Compute the "entropy" of a snippet, assigning weights as the width of gaps. :occs: a list of times where the snippet occurred (duplicates if more than once). ''' p = np.diff(occs) p = p / sum(p) return scipy_entropy(p, base=2)
def shannon_entropy(self, base: float = 2) -> float: """Calculates the Shannon entropy. Args: base: The logarithmic base to use, defaults to 2. Returns: A float value, the shannon entropy of the distribution. """ return scipy_entropy(self._dist, base=base)
def get_divergences_per_element_with_segments( confidences_3d: List[List[List[float]]], confidence_segments: Dict) -> List[List[float]]: """Calculate divergences by segments defined in confidence segments where p_d is the probabilities within class X and q_d is the mean probability distribution for class X. Divergence(p_d, q_d) is calculated for each element in all classes. Args: confidences_3d (List[List[List[float]]]): Confidence probabilities per element. confidence_segments (Dict[(str, Tuple(int,int))]): A dictionary mapping segments to run KL Divergence. Returns: divergences (List[List[float]]): Divergences per model for each element. """ avg_preds = np.mean(confidences_3d, axis=0) divergences = [] # Calculate q_d q_d = {d: [] for d in confidence_segments} for row in avg_preds: domain = KLDivergenceSampling.get_domain(confidence_segments, row) q_d[domain].append(row) for pred in confidences_3d: # Calculate p_d p_d = {d: [] for d in confidence_segments} for row in pred: domain = KLDivergenceSampling.get_domain( confidence_segments, row) p_d[domain].append(row) # Calculate Divergence Scores by Domain divergence_scores_by_domain = {d: [] for d in confidence_segments} for domain in confidence_segments: if len(p_d[domain]) > 0 and len(q_d[domain]) > 0: divergence_scores_by_domain[domain] = scipy_entropy( np.array(p_d[domain]), np.array(q_d[domain]), axis=1, base=ENTROPY_LOG_BASE, ) else: divergence_scores_by_domain[domain] = 0 # Reorder Divergence Scores by Domain to Original order single_pred_divergence_scores = [] domain_counter = {d: 0 for d in confidence_segments} for row in pred: domain = KLDivergenceSampling.get_domain( confidence_segments, row) single_pred_divergence_scores.append( divergence_scores_by_domain[domain][ domain_counter[domain]]) domain_counter[domain] += 1 divergences.append(single_pred_divergence_scores) return divergences
def rank_2d(confidences_2d: List[List[float]]) -> List[int]: """Calculates the entropy score of the confidences per element. Elements are ranked from highest to lowest entropy. Args: confidences_2d (List[List[float]]): Confidence probabilities per element. Returns: ranked_indices (List[int]): Indices corresponding to elements ranked by the heuristic. """ entropy_per_element = scipy_entropy( np.array(confidences_2d), axis=1, base=ENTROPY_LOG_BASE ) high_to_low_entropy = np.argsort(entropy_per_element)[::-1] return list(high_to_low_entropy)
def entropy(x): # assumes 1D numpy array assert len(x.shape) == 1 # all values must be positive assert np.min(x) > 0 # make probabilites x_p = x / x.sum() # H = -np.sum(x_p * np.log2(x_p)) H = scipy_entropy(x_p, base=2) return H
def get_divergences_per_element_no_segments( confidences_3d: List[List[List[float]]], ) -> List[List[float]]: """ Args: confidences_3d (List[List[List[float]]]): Confidence probabilities per element. Returns: divergences (List[List[float]]): Divergences per model for each element. """ # Calculate average prediction values. q_x = np.mean(confidences_3d, axis=0) # Duplicate the mean distribution by number of models num_models, _, _ = np.array(confidences_3d).shape q_x = [q_x for n in range(num_models)] # X: Model, Y: Divergence Per Element divergences = scipy_entropy(confidences_3d, q_x, axis=2, base=ENTROPY_LOG_BASE) return divergences
def binaryPatterns(img, numPoints, radius): """ Fonction qui permet le calcul du binaryPatterns d'une image selon les paramètres numPoints,radius input : img (ndarray) : image quelconque numPoints (int): nombre de points à prendre en compte sur le périmètre du cercle radius (int): taille du rayon du cercle output : (int) Retourne l'histogramme du binaryPattern (motifs binaires) de l'image """ Patern = GalaxyBinaryPatterns(numPoints, radius) gris = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) Hist = Patern.Galaxy_description(gris) _, counts = unique(Hist, return_counts=True) return int(100 * scipy_entropy(counts, base=2))
def entrop(self, clase_nom): per_day = np.zeros((self.len_cols, self.len_dia), dtype=np.uint8) clase_nom = self.clase.index(clase_nom) for day in range(self.len_dia): sum_1 = 0 for hour in range(self.len_horas): for row in range(per_day.shape[0]): indx = hour + day*self.len_horas + clase_nom * self.len_dia * self.len_horas if self.genes[indx, row] > 0: per_day[row, day] += 1 entrop_total = 0 for row in range(per_day.shape[0]): entropy = scipy_entropy(per_day[row,:], base=2) if entropy == True: entrop_total += entropy return entrop_total/self.len_cols
def rank_entities(entity_confidences: List[List[List[float]]]) -> List[int]: """Calculates the entropy score of the entity confidences per element. Elements are ranked from highest to lowest entropy. Returns: Ranked lists based on either: Token Entropy: Average of per token entropies across a query; or Total Token Entropy: Sum of token entropies across a query. """ sequence_entropy_list = [] for sequence in entity_confidences: entropy_per_token = scipy_entropy( np.array(sequence), axis=1, base=ENTROPY_LOG_BASE ) total_entropy = sum(entropy_per_token) total_token_entropy = total_entropy / len(entropy_per_token) sequence_entropy_list.append(total_token_entropy) high_to_low_entropy = np.argsort(sequence_entropy_list)[::-1] return list(high_to_low_entropy)
def entropy(img, base=2): """ Calculate the entropy of a grayscale image. Parameters ---------- img : (N, M) ndarray Grayscale input image. base : float, optional The logarithmic base to use. Returns ------- entropy : float Notes ----- The units are bits or shannon (Sh) for base=2, natural unit (nat) for base=np.e and hartley (Hart) for base=10. """ return scipy_entropy(img.ravel(), base=base)
def entropy(X): """ Use the Shannon Entropy H to describe the distribution of the given sample :param X: :return: """ _X = np.array(X) if any([isinstance(_, (list, np.ndarray)) for _ in _X]): return np.array([entropy(_) for _ in _X]) # check even if len(_X) % 2 > 0: raise ValueError( 'The sample does not have an even length: {}'.format(_X)) # calculate vals = [np.abs(_X[i] - _X[i + 1]) for i in np.arange(0, len(_X), 2)] return scipy_entropy(pk=np.histogram(vals, bins='fd')[0])
def func(): # 1. fetch all features uniq_keys = set([]) for item_id1, item1 in process_notifier(d1): [uniq_keys.add(k1) for k1 in item1.iterkeys()] uniq_keys = list(uniq_keys) # 2. feature1 => {doc1: count1, doc2: count2, ...} value_cache = defaultdict(dict) for item_id1, item1 in process_notifier(d1): for k1, c1 in item1.iteritems(): value_cache[k1][item_id1] = c1 # 3. calculate each feauture's entropy entropy_cache = dict() total_len = len(d1) for k1 in process_notifier(uniq_keys): exist_values = value_cache[k1].values() total_values = exist_values + [0] * (total_len - len(value_cache)) entropy_cache[k1] = scipy_entropy(total_values) return entropy_cache
def shannon_entropy(image, base=2): """Calculate the Shannon entropy of an image. The Shannon entropy is defined as S = -sum(pk * log(pk)), where pk are frequency/probability of pixels of value k. Parameters ---------- image : (N, M) ndarray Grayscale input image. base : float, optional The logarithmic base to use. Returns ------- entropy : float Notes ----- The returned value is measured in bits or shannon (Sh) for base=2, natural unit (nat) for base=np.e and hartley (Hart) for base=10. References ---------- .. [1] `https://en.wikipedia.org/wiki/Entropy_(information_theory) <https://en.wikipedia.org/wiki/Entropy_(information_theory)>`_ .. [2] https://en.wiktionary.org/wiki/Shannon_entropy Examples -------- >>> from skimage import data >>> from skimage.measure import shannon_entropy >>> shannon_entropy(data.camera()) 7.231695011055706 """ _, counts = unique(image, return_counts=True) return scipy_entropy(counts, base=base)
def get_entropy(self, class_name): # if all units of a course is sheduled in the same day then return 1 # else return 0 u_per_day_mat = np.zeros((self.LEN_COLS, self.LEN_DAYS), dtype=np.uint8) # filling the vector class_name = self.classes.index(class_name) for day in range(self.LEN_DAYS): sum_of_1 = 0 for hour in range(self.LEN_HOURS): for row in range(u_per_day_mat.shape[0]): indx = hour + day * self.LEN_HOURS + class_name * self.LEN_DAYS * self.LEN_HOURS if self.genes[indx, row] > 0: u_per_day_mat[row, day] += 1 # calculating the entropy all_entropies = 0 for row in range(u_per_day_mat.shape[0]): entropy = scipy_entropy(u_per_day_mat[row, :], base=2) if entropy == True: all_entropies += entropy return all_entropies / self.LEN_COLS
def image_entropy(self, image, base=2): # image = self.image_gradient(image) _, counts = np.unique(image, return_counts=True) return scipy_entropy(counts, base=base) / 8.0
def entropy(self, img, base=2): # 2 stands for Shannon entropy _, counts = np.unique(img, return_counts=True) return scipy_entropy(counts, base=base)
def H(self, occs): p = np.diff(occs) p = p / sum(p) return scipy_entropy(p, base=2)
def Entropy(self, img): _, counts = unique(img, return_counts=True) return scipy_entropy(counts, base=2)
def shannon_entropy( image, base=10 ): # original work from the scikit-image team for entropy calculation _, counts = unique(image, return_counts=True) return scipy_entropy(counts, base=base)
def signal_features(data): """ Extract various features from time series data. Parameters ---------- data : numpy array of floats time series data Returns ------- num : integer number of elements min : integer minimum max : integer maximum rng : integer range avg : float mean std : float standard deviation med : float median mad : float median absolute deviation kurt : float kurtosis skew : float skewness cvar : float coefficient of variation lower25 : float lower quartile upper25 : float upper quartile inter50 : float interquartile range rms : float root mean squared error entropy : float entropy measure tk_energy : float mean Teager-Kaiser energy Examples -------- >>> import numpy as np >>> from mhealthx.signals import signal_features >>> data = np.random.random(100) >>> num, min, max, rng, avg, std, med, mad, kurt, skew, cvar, lower25, upper25, inter50, rms, entropy, tk_energy = signal_features(data) """ from scipy.stats import entropy as scipy_entropy from mhealthx.signals import compute_stats, root_mean_square, \ compute_mean_teagerkaiser_energy num, min, max, rng, avg, std, med, mad, kurt, skew, cvar, lower25, \ upper25, inter50 = compute_stats(data) rms = root_mean_square(data) entropy = scipy_entropy(data) tk_energy = compute_mean_teagerkaiser_energy(data) return num, min, max, rng, avg, std, med, mad, kurt, skew, cvar, \ lower25, upper25, inter50, rms, entropy, tk_energy
def get_entropy(img_gray): _, counts = np.unique(img_gray,return_counts=True) entropy = scipy_entropy(counts,base=2) return entropy
def entropy(arr, maximum=600, width=10): binned_means = np.digitize(arr, bins=np.arange(maximum, step=width)) probs = np.bincount(binned_means) / float(len(binned_means)) return scipy_entropy(probs)
def shannon_entropy(image, base=2): _, counts = np.unique(image, return_counts=True) return scipy_entropy(counts, base=base)
def compute_entropy_scipy_numpy(data): """Compute entropy on bytearray `data` with SciPy and NumPy.""" counts = np.bincount(bytearray(data), minlength=256) return scipy_entropy(counts, base=2)