def qre_exploitability(dist, payoff_tensor, temperature=0., aggregate=np.mean): """Compute Shannon regularized exploitability of dist for non-symmetric game. Args: dist: list of 1-d np.arrays, current estimate of nash distribution payoff_tensor: (n x A1 x ... x An) np.array, payoffs for each joint action assumed to be non-negative. can also be list of (A1 x ... x An) np.arrays temperature: non-negative float aggregate: function to reduce individual exp_is to scalar, e.g., mean or max Returns: exploitability (float): avg_i payoff_i of best response_i - payoff_i of dist """ num_players = len(payoff_tensor) exp_i = [] for i in range(num_players): nabla_i = misc.pt_reduce(payoff_tensor[i], dist, [i]) dist_i = dist[i] if temperature > 0: br_i = special.softmax(nabla_i / temperature) else: br_i = np.zeros_like(dist_i) maxima = (nabla_i == np.max(nabla_i)) br_i[maxima] = 1. / maxima.sum() u_i_br = nabla_i.dot(br_i) + temperature * special.entr(br_i).sum() u_i_dist = nabla_i.dot( dist_i) + temperature * special.entr(dist_i).sum() exp_i.append(u_i_br - u_i_dist) return aggregate(exp_i)
def best_merge(L, q, uni, mis21, mis, ffs, pos): """ Finds the best pair of letters to merge at positions pos. For each letter pair A,B it computes all the MI values which change, and adds up the total change, to compute a new msqerr. If then finds the choice with smallest msqerr. It returns the best msqerr, and the changed MIs (a list of length L) """ ffp = [ffs[pos, j] for j in range(L) if j != pos] entrp = np.array([np.sum(entr(x), axis=1) for x in ffp]) mip = np.array([mis[pos, j] for j in range(L) if j != pos]) mi21p = np.array([mis21[pos, j] for j in range(L) if j != pos]) unientr = entr(uni) goodness = np.inf for A in range(q - 1): ffA = [ff[A, :] for ff in ffp] eA = entrp[:, A] for B in range(A + 1, q): unidelta = entr(uni[A] + uni[B]) - unientr[A] - unientr[B] eC = np.array( [sum(entr(ffa + ff[B, :])) for ff, ffa in zip(ffp, ffA)]) eB = entrp[:, B] # vvvvvvvvvvvvvvvvvvvvvvvvv change in MI due to merge newmis = mip - eC + eA + eB + unidelta g = sum((newmis - mi21p)**2) if g < goodness: goodness, bestA, bestB, newmi = g, A, B, newmis return goodness, bestA, bestB
def cheap_gradients(random, dist, y, payoff_matrices, num_players, temperature=0., proj_grad=True): """Computes exploitablity gradient and aux variable gradients with samples. This implementation takes payoff_matrices as input so technically uses O(d^2) compute but only a single column of payoff_matrices is used to perform the update so can be re-implemented in O(d) if needed. Args: random: random number generator, np.random.RandomState(seed) dist: 1-d np.array, current estimate of nash distribution y: 1-d np.array (same shape as dist), current estimate of payoff gradient payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action num_players: int, number of players, in case payoff_matrices is abbreviated temperature: non-negative float, default 0. proj_grad: bool, if True, projects dist gradient onto simplex Returns: gradient of exploitability w.r.t. (dist, y) as tuple unregularized exploitability (stochastic estimate) tsallis regularized exploitability (stochastic estimate) """ action_1 = random.choice(dist.size, p=dist) nabla = payoff_matrices[0][:, action_1] if temperature > 0: br = special.softmax(y / temperature) br_mat = (np.diag(br) - np.outer(br, br)) / temperature br_policy_gradient = nabla - temperature * (np.log(br) + 1) else: power = np.inf s = np.linalg.norm(y, ord=power) br = np.zeros_like(dist) maxima = (y == s) br[maxima] = 1. / maxima.sum() br_mat = np.zeros((br.size, br.size)) br_policy_gradient = np.zeros_like(br) unreg_exp = np.max(y) - y.dot(dist) entr_br = temperature * special.entr(br).sum() entr_dist = temperature * special.entr(dist).sum() reg_exp = y.dot(br - dist) + entr_br - entr_dist policy_gradient = nabla if temperature > 0: policy_gradient -= temperature * (np.log(dist) + 1) other_player_fx = (br - dist) + br_mat.dot(br_policy_gradient) action_u = random.choice(dist.size) # uniform, ~importance sampling other_player_fx = dist.size * other_player_fx[action_u] other_player_fx_translated = payoff_matrices[1, :, action_u] * other_player_fx grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translated if proj_grad: grad_dist = simplex.project_grad(grad_dist) grad_y = y - nabla return (grad_dist, grad_y), unreg_exp, reg_exp
def print_info(self): _D = self.d.copy() print 'dominant wave period: \n', _D.argmax(axis=0) + 1 print '\n\navg. power (all):\t', np.abs(_D).mean() print 'avg. power waves:\t', self.A.mean() print '\n\nentropy:\n', special.entr(self.D).sum(axis=0) print '\n\n mean entropy: ', np.mean(special.entr(self.D).sum(axis=0)) d = self.d.copy() maxx = d.max(axis=0) d[d.argmax(axis=0), range(d.shape[1])] = -10.0 print '\n\ndiff b/w top 2:\n', maxx - d.max(axis=0)
def gradients(dist, y, payoff_matrices, num_players, temperature=0., proj_grad=True): """Computes exploitablity gradient and aux variable gradients. Args: dist: 1-d np.array, current estimate of nash distribution y: 1-d np.array (same shape as dist), current estimate of payoff gradient payoff_matrices: (>=2 x A x A) np.array, payoffs for each joint action num_players: int, number of players, in case payoff_matrices is abbreviated temperature: non-negative float, default 0. proj_grad: bool, if True, projects dist gradient onto simplex Returns: gradient of exploitability w.r.t. (dist, y) as tuple unregularized exploitability (stochastic estimate) tsallis regularized exploitability (stochastic estimate) """ nabla = payoff_matrices[0].dot(dist) y = nabla if temperature > 0: br = special.softmax(y / temperature) br_mat = (np.diag(br) - np.outer(br, br)) / temperature br_policy_gradient = nabla - temperature * (np.log(br) + 1) else: power = np.inf s = np.linalg.norm(y, ord=power) br = np.zeros_like(dist) maxima = (y == s) br[maxima] = 1. / maxima.sum() br_mat = np.zeros((br.size, br.size)) br_policy_gradient = np.zeros_like(br) unreg_exp = np.max(y) - y.dot(dist) entr_br = temperature * special.entr(br).sum() entr_dist = temperature * special.entr(dist).sum() reg_exp = y.dot(br - dist) + entr_br - entr_dist policy_gradient = nabla if temperature > 0: policy_gradient -= temperature * (np.log(dist) + 1) other_player_fx = (br - dist) + br_mat.dot(br_policy_gradient) other_player_fx_translated = payoff_matrices[1].dot(other_player_fx) grad_dist = -policy_gradient + (num_players - 1) * other_player_fx_translated if proj_grad: grad_dist = simplex.project_grad(grad_dist) grad_y = y - nabla return (grad_dist, grad_y), unreg_exp, reg_exp
def transform(self, X, y=None): """For each row in the array, take absolute values of any negative entry, normalise, and compute the Shannon entropy. Parameters ---------- X : array-like of shape (n_samples, n_features) Input data. y : None There is no need for a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray of shape (n_samples, 1) Array of Shannon entropies. """ # TODO: The following is a crude method to ensure each row vector # consists of "probabilities" that sum to one. Consider normalisation # in terms of bin counts? check_is_fitted(self, '_is_fitted') Xt = check_array(X) if np.any(Xt < 0): warnings.warn("Negative values detected in X! Taking absolute " "value to calculate probabilities.") Xt = np.abs(Xt) Xt = Xt / Xt.sum(axis=1, keepdims=True) Xt = entr(Xt).sum(axis=1, keepdims=True) / np.log(2) return Xt
def choose_move(self, board): next_moves_list = board.vec_next_moves() move_scores = np.zeros(len(next_moves_list)) moves_flat = [move[0].flatten() for move in next_moves_list] moves_flat = np.vstack(moves_flat) move_scores = self.model.predict_proba(moves_flat) whose_turn = board.turn if whose_turn: #white player player = 1 else: #black player player = 0 if self.state == 'explore': entropy = entr(move_scores).sum(axis=1) / np.log(2) ind = np.argmax(entropy) elif self.state == 'exploit': ind = np.argmax(move_scores[:, player]) elif self.state == 'wexploit': #weighted exploitation scores = move_scores[:, player] scores = scores + 0.00000001 scores = scores / sum(scores) ind = np.random.choice(np.arange(0, len(scores)), p=scores) legal_moves = list(board.legal_moves) return legal_moves[ind]
def cat_entropy(p): """ Entropy of categorical distribution """ # the following version has problems for p near 0 # return (-p * np.log(p)).sum(axis=1) return special.entr(p).sum(axis=1) #pylint: disable=E1101
def log_diagnostics(self, paths): # timesteps = 0 # manager_entropy = 0.0 # # skill_entropies = [0.0 for _ in range(self.latent_dim)] # skill_entropy = 0.0 # for path in paths: # timesteps += len(path['observations']) # # # calculate the entropy of the categorical distribution at each stage # manager_dist_info = self.manager.dist_info(path['observations']) # manager_entropy += self.manager.distribution.entropy(manager_dist_info).sum() # # # calculate the entropy of each skill # latent_dist_infos = self.low_policy.dist_info_sym_all_latents(path['observations']) # # for i in range(len(latent_dist_infos)): # # latent_dist_info = {'log_std': latent_dist_infos[i]['log_std'].eval()} # # skill_entropies[i] += self.low_policy.distribution.entropy(latent_dist_info).sum() # latent_dist_info = {'log_std': latent_dist_infos[0]['log_std'].eval()} # skill_entropy += self.low_policy.distribution.entropy(latent_dist_info).sum() # # logger.record_tabular("AverageManagerEntropy", manager_entropy/timesteps) # # for i in range(self.latent_dim): # # logger.record_tabular("AverageLatent{0}Entropy".format(str(i)), skill_entropies[i]/timesteps) # logger.record_tabular("AverageLatentEntropy", skill_entropy / timesteps) manager_probs = np.zeros([1, self.latent_dim]) for path in paths: latents = path['agent_infos']['latents'] manager_probs += np.sum(latents, axis=0) manager_probs = manager_probs.reshape(-1) manager_probs = manager_probs / np.sum(manager_probs) logger.record_tabular("ManagerEntropy", entr(manager_probs).sum()) logger.record_tabular("ManagerProbs", manager_probs)
def transform(self, X, y=None): """Normalise each row in array to have unit norm and calculate the Shannon entropy. Parameters ---------- X : ndarray, shape (n_samples, n_features) Input data. y : None There is no need for a target in a transformer, yet the pipeline API requires this parameter. Returns ------- Xt : ndarray, shape (n_samples, 1) """ # TODO: the following is a crude method to ensure each row vector # consists of "probabilities" that sum to one. Consider normalisation # in terms of bin counts? check_is_fitted(self) X = check_array(X) if np.any(X < 0): warnings.warn("Negative values detected in X! Taking absolute " "value to calculate probabilities.") X = np.abs(X) probs = X / X.sum(axis=1, keepdims=True) Xt = (entr(probs).sum(axis=1) / np.log(2)).reshape(-1, 1) return Xt
def pol_loss(pi, pi_k, q_k, ds_k, dsa_k, alpha): term1, term2 = 0.0, 0.0 i, j = 0, 0 for s in range(total_states): H_k = entr(pi_k[s]).sum() H = entr(pi[j:j+4]).sum() logH = 0 if (H<=0 or np.isinf(H) or np.isnan(H)) else np.log(H) logH_k = 0 if (H_k<=0 or np.isinf(H_k) or np.isnan(H_k)) else np.log(H_k) term2 += alpha * (logH - logH_k) * ds_k[s] * H_k j += 4 for a in range(total_actions): logpi = 0 if pi[i]==0 else np.log(pi[i]) logpi_k = 0 if pi_k[s][a]==0 else np.log(pi_k[s][a]) term1 += (logpi - logpi_k) * dsa_k[s][a] * (q_k[s][a] + alpha * H_k) i += 1 return -(term1 + term2)
def log_diagnostics(self, paths): manager_probs = np.zeros([1, self.latent_dim]) for path in paths: latents = path['agent_infos']['latents'] manager_probs += np.sum(latents, axis=0) manager_probs = manager_probs.reshape(-1) manager_probs = manager_probs / np.sum(manager_probs) logger.record_tabular("ManagerEntropy", entr(manager_probs).sum()) logger.record_tabular("ManagerProbs", manager_probs)
def draw_coin_flipping_tree(n=4, p=0.2): """ Construct and draw a Huffman code for a sequence of n coin flips. Arguments: ---------- n : int >= 0 The sequence length p : float in [0, 1] The bias of the coin. """ distribution = get_coin_flipping_distribution(n=n, p=p) tree = build_huffman_tree(distribution) code = OrderedDict(iter_codewords(tree)) codesize = len(code) H = entr(p) / np.log(2) # convert to base 2 EK = sum(distribution[letter] * len(code[letter]) for letter in distribution) min_y = -1 max_y = +1 max_x = 1.0 locations = max_x * np.ones((codesize, 2)) locations[:, 1] = np.linspace(min_y, max_y, codesize) location_tree = replace_leaves(tree, locations) figure = plt.figure(figsize=(12, 8)) (min_x, middle_y), num_children = draw_connections(location_tree) treewidth = 1 - min_x bigfontsize = matplotlib.rcParams["font.size"] smallfontsize = bigfontsize / np.log2(1 + n**0.5) fontargs = dict(fontsize=smallfontsize, family="monospace", fontweight="bold") for (x, y), codeword in zip(locations, code.values()): plt.text(x, y, " " + codeword, ha="left", va="center", **fontargs) titlestring = "$n=%s,\\quad{}H/n=%.3f,\\quad{}E(K/n)=%.3f$" % (n, H, EK / n) plt.title(titlestring, fontsize=bigfontsize) plt.xlim(min_x - 0.1 * treewidth, max_x + 0.3 * treewidth) plt.ylim(min_y - 0.2, max_y + 0.3) plt.axis("off") plt.tight_layout() plt.show() plt.close(figure)
def get_mdp_obj(pi, gamma, alpha): mdp_obj = 0 ds, dsa = get_occupancy_measure(pi, gamma) for s in range(total_states): H = entr(pi[s]).sum() r = 1 if s==0 else 0 for a in range(total_actions): mdp_obj += dsa[s][a] * (r + alpha * H) return mdp_obj
def extract_features(arr): features = [] for a in arr: features.extend([np.mean(a), np.std(a), entr(a).sum()]) for i in range(len(features)): if features[i] == np.NAN: features[i] = 0 return features
def objective(x): # Map solution vector x to joint distribution over Q and Y pQY = x.dot(mul_mx).reshape((-1, n_y)) pQY += 1e-12 probs_q = pQY.sum(axis=1) H_YgQ = entr(pQY / probs_q[:, None]).sum(axis=1).dot(probs_q) v = (H_Y - H_YgQ) / ln2 if np.isclose(v, 0): v = 0 return v
def entropy(p, axis=0): """Compute entropy (in bits) of the given probability distribution. Arguments: p -- distribution for which the entropy is to be computed. This will be normalized to sum to 1 along the axis of interest. axis -- axins along which to compute the entropy (default: 0) """ p = np.asarray(p) p = 1.0 * p / np.sum(p, axis=axis, keepdims=True) return np.sum(entr(p), axis=axis) / np.log(2)
def getEntropy_ForTerm(wordParam, corpus2SearchParam): tokenProbHolder = [] for indiDoc in corpus2SearchParam: appearanceCount = 0 splitted_strs = indiDoc.split(' ') totTokenCount = len(splitted_strs) for str_ in splitted_strs: if wordParam == str_: appearanceCount = appearanceCount + 1 tokenProb = float(appearanceCount) / float(totTokenCount) tokenProbHolder.append(tokenProb) return entr(tokenProb)
def band_features_unit(x,fmin,fmax): """extract band features for on signal""" W = np.array([max(0.1,fmin),fmax])/25 b, a = sig.butter(4, W, btype='bandpass') x_f = sig.lfilter(b, a, x) p = np.absolute(x_f)/np.absolute(x_f).sum() mmd = MMD(x_f) esis = esis_epoch(x_f, fmin, fmax) entropy = entr(p).sum() return mmd,esis,entropy
def saveConfToFile(store, coords): store = store.numpy() file = open("coordsConf.txt", "w") for p in range(len(coords)): file.write(str(coords[p][0]) + " " + str(coords[p][1]) + " " + str(coords[p][2])) softmax = np.exp(store[p]) / np.sum(np.exp(store[p])) for i in range(len(store[p])): label_id = np.where(remapper == i)[0][0] file.write(" " + str(label_id) + " " + str(softmax[i])) entropy = entr(softmax).sum(axis=0) #file.write(" entropy " + str(entropy)) file.write("\n") file.close()
def points_by_entropy(model, x): # Calculate entropy p = model.predict(x) e = entr(p) e = np.sum(e, axis=1) #Calculate standard deviation of entropy std = np.std(e) high_entropy_index = np.where(e > 2 * std) zero_entropy_index = np.where(e == 0) return [zero_entropy_index, high_entropy_index]
def scipy_entropy(pk, qk=None, base=None): pk = np.asarray(pk) pk = 1.0 * pk / np.sum(pk, axis=0) if qk is None: vec = special.entr(pk) else: qk = np.asarray(qk) if len(qk) != len(pk): raise ValueError("qk and pk must have same length.") qk = 1.0 * qk / np.sum(qk, axis=0) vec = special.rel_entr(pk, qk) S = np.sum(vec, axis=0) if base is not None: S /= np.log(base) return S
def _overlay_entropy(aix, a_areas, ab_areas, base): """ direct function to compute overlay entropies """ mapping = pandas.DataFrame.from_dict( dict( a=aix, area=ab_areas, a_area=a_areas[aix], )) mapping["frac"] = mapping.area / mapping.a_area mapping["entropy"] = entr(mapping.frac.values) / numpy.log(base) result = mapping.groupby("a").entropy.sum().values result[result < 0] = 0 return result
def qre_exploitability(dist, payoff_tensor, temperature=0.): """Compute Shannon regularized exploitability of dist for symmetric game. Args: dist: 1-d np.array, current estimate of nash distribution payoff_tensor: (>=1 x A x ... x A) np.array, payoffs for each joint action assumed to be non-negative temperature: non-negative float Returns: exploitability (float): payoff of best response - payoff of dist """ num_players = payoff_tensor.shape[0] nabla = misc.pt_reduce(payoff_tensor[0], [dist] * num_players, [0]) if temperature > 0: br = special.softmax(nabla / temperature) else: br = np.zeros_like(dist) maxima = (nabla == np.max(nabla)) br[maxima] = 1. / maxima.sum() u_br = nabla.dot(br) + temperature * special.entr(br).sum() u_dist = nabla.dot(dist) + temperature * special.entr(dist).sum() return u_br - u_dist
def compute_entropy(m, nan_as_zero=True): """ Given a num_of_paths_matrix, compute the entropy per rows. :param m: number of paths matrix, matrix[i][j] has the number of times a path from i arrives to j :param nan_as_zero: if a row of 0s is found, assume entropy is 0 (instead of NaN) :return: array of entropies, each value is the entropy of a row """ # normalize rows (the sum of each row must be 1) p = m / m.sum(axis=1, keepdims=True) # compute per row entropy (in base 2) e = entr(p).sum(axis=1) / np.log(2) if nan_as_zero: e = np.nan_to_num(e) return e
def examine_points(x): # Map data to names df = pd.DataFrame(x, columns=FEATURE_NAMES) num_points = len(df.index) # Reverse label encoding df['service'] = df['service'].map(SERVICE_MAP) df['flag'] = df['flag'].map(FLAG_MAP) df['protocol_type'] = df['protocol_type'].map(PROTOCOL_MAP) # One hot encode the labels cat_features_dfs = [] for feature in MULTI_CATEGORICAL_FEATURES: cat_df = pd.get_dummies(df[feature], prefix=feature) cat_features_dfs.append(cat_df) df.drop(feature, inplace=True, axis=1) cat_features_dfs.insert(0, df) # Merge the one hot encoded features into df df = pd.concat(cat_features_dfs, axis=1) # Find missing columns due to onehot encoding on sample missing_columns = list(set(DNN_FEATURE_ORDER) - set(df.columns)) # initialize the missing columns to 0's' for col in missing_columns: df[col] = [0] * num_points # reorder the features as expected by the nn df = df[DNN_FEATURE_ORDER] # For now return points with # a) high entropy -> most confused # b) zero entropy -> most confident # extend later to within class points # Calculate entropy p = MODEL.predict(df.values) e = entr(p) e = np.sum(e,axis=1) #Calculate standard deviation of entropy std = np.std(e) high_entropy_index = np.where(e > 1*std ) zero_entropy_index = np.where(e==0) return zero_entropy_index,high_entropy_index, p
def get_entropy_by_word(seq): entropies = [] lses = [] for i in range(1, len(seq)): x = seq[:i] state = model.get_state(x, bos=False)[0] indices, logprobs = model.next_word_logprobs_raw(state, x[-1]) lse = logsumexp(logprobs) lses.append(lse) logprobs -= lse probs = np.exp(logprobs) entropies.append(entr(probs).sum()) print( [model.id2str[indices[idx]] for idx in np.argsort(logprobs)[-3:]], x) return np.array(entropies), np.array(lses)
def entropy(model, X, img_name): predict = model.predict(X)[0] entropy = entr(predict).sum(axis=1) / np.log(2) if (is_plot): entropy_map = np.reshape(entropy, (model.outputWidth, model.outputHeight)) plt.imshow(entropy_map) plt.colorbar() directory = "./tmp/entropy/" if not os.path.exists(directory): os.makedirs(directory) plt.savefig(directory + img_name) plt.close() acq = np.mean(entropy, axis=0) return acq
def getEntropy(self, data): print('-------------------------------') print(' STEP : Feature Fusion using Entropy') print('-------------------------------') group_features = self.getGroupFeatures(data) entropy = dict() for key in group_features.keys(): tmp = data[group_features[key]].values tmp = tmp tmp_sum = tmp.sum(axis=1, keepdims=True) + .0000000000001 p = tmp / tmp_sum key = 'grp_entropy_' + key entropy[key] = entr(p).sum(axis=1) / np.log(2) entropy_features = pd.DataFrame(entropy) return entropy_features
def compute_entropy(frame: list, bit_depth: int): frame -= frame.min() #print(max(frame), min(frame)) frame = frame.astype('int64') nbins = max(frame) + 1 #2**bit_depth # count the number of occurrences for each unique integer between 0 and x.max() # in each row of x counts = np.bincount(frame, minlength=nbins) # divide by number of columns to get the probability of each unique value p = counts / float(len(frame)) # compute Shannon entropy in bits if len(p) == 0: return 0 else: return np.sum(entr(p) / np.log2(len(p)))
def get_entropy(pk, base=None): """ @param pk: a probability sequence @param base: base for log @return: the entropy of the sequence """ pk = asarray(pk) pk = 1.0*pk/np.sum(pk, axis=0) # caclulate entropy vec = entr(pk) S = np.sum(vec, axis=0) if base is not None: S /= log(base) return S
def _entropy(self, n, p): k = np.r_[0:n + 1] vals = self._pmf(k, n, p) return np.sum(entr(vals), axis=0)
def _entropy(self, M, n, N): k = np.r_[N - (M - n):min(n, N) + 1] vals = self.pmf(k, M, n, N) return np.sum(entr(vals), axis=0)
def _entropy(self, p): return entr(p) + entr(1-p)