def log_like(var_par, draw, value, k): l = int(len(var_par) / 2) mu, cov = var_par[:l], np.exp(var_par[l:]) samples = draw * cov + mu pi = softmax(samples[:k]) d = len(value) thetas = softmax(samples[k:].reshape([k, d]), axis=1) n = np.sum(value) logps = np.log(pi) + np.dot( np.log(thetas), value) + gammaln(n + 1) - np.sum(gammaln(value + 1)) return logsumexp(logps)
def logprior(var_par, draw, k): # Log of prior probabilities d = int((len(var_par) / 2 - k) / k) l = int(len(var_par) / 2) alpha = np.ones(k) # Prior for mixture probabilities beta = np.ones(d) # Prior for multinomials mu, cov = var_par[:l], np.exp(var_par[l:]) samples = draw * cov + mu pi = softmax(samples[:k]) thetas = softmax(samples[k:].reshape([k, d]), axis=1) return dirichlet.logpdf(pi, alpha) + np.sum( [dirichlet.logpdf(theta, beta) + ldt(theta) for theta in thetas]) + ldt(pi)
def log_like(var_par, draw, value, k): d = len(value) value = value[:-1] l = int(len(var_par)/2) mu, cov = var_par[:l], np.exp(var_par[l:]) samples = draw*cov + mu pi = softmax(samples[:k]) lam = np.exp(samples[k:2*k]) thetas = softmax(samples[2*k:].reshape([k,d]), axis=1) thetas1 = thetas[:,:-1] thetas2 = thetas[:,-1] logps = np.log(pi) + np.dot(np.log(thetas1.T*lam).T, value) + lam*(thetas2-1) - np.sum(gammaln(value+1)) return logsumexp(logps)
def np_log_like(var_par, draw, data, k): d = data.shape[1] gln_test_values = np.sum(gammaln(data[:,:-1]+1), axis=1) data = data[:,:-1] l = int(len(var_par)/2) mu, cov = var_par[:l], np.exp(var_par[l:]) samples = draw*cov + mu pi = softmax(samples[:k]) lam = np.exp(samples[k:2*k]) thetas = softmax(samples[2*k:].reshape([k,d]), axis=1) thetas1 = thetas[:,:-1] thetas2 = thetas[:,-1] logps = (np.log(pi) + np.dot(data, np.log(thetas1.T*lam)) + lam*(thetas2-1)).T - gln_test_values return np.sum(logsumexp(logps, axis=0))
def pred_like(test_data, var_par, n_samples, k): # Method for prediction likelihood N_test, d = test_data.shape l = int(len(var_par) / 2) gln_test_n = gammaln(test_data.sum(axis=1) + 1) gln_test_values = np.sum(gammaln(test_data + 1), axis=1) mu, cov = var_par[:l], np.exp(var_par[l:]) like_matrix = np.empty([N_test, n_samples]) samples = draw_samples(var_par, n_samples) for s, sample in enumerate(samples): pi = softmax(sample[:k]) thetas = softmax(sample[k:].reshape([k, d]), axis=1) logps = (np.log(pi) + np.dot(test_data, np.log( thetas.T))).T + gln_test_n - gln_test_values like_matrix[:, s] = logsumexp2(np.stack(logps)[:, :N_test], axis=0) return np.mean(logsumexp2(like_matrix, axis=1) - np.log(n_samples))
def get_frame_info(self, frame): actor = frame['policy'] distribution = [np.around(softmax(head), decimals=3) for head in actor] logits = [np.around(head, decimals=3) for head in actor] value = np.around(frame['value'], decimals=3) value_info = "reward={}, manipulated_reward={}, value={}\n".format(frame['reward'], frame['manipulated_reward'], value) actor_info = "logits={}, distribution={}\n".format(logits, distribution) action_info = "action={}\n".format(frame['action']) extra_info = "extra={}\n".format(frame['extra']) frame_info = { "log": value_info + actor_info + action_info + extra_info } if flags.save_episode_screen and frame['screen'] is not None: frame_info["screen"] = frame['screen'] return frame_info