Esempio n. 1
0
def log_like(var_par, draw, value, k):
    l = int(len(var_par) / 2)
    mu, cov = var_par[:l], np.exp(var_par[l:])
    samples = draw * cov + mu
    pi = softmax(samples[:k])
    d = len(value)
    thetas = softmax(samples[k:].reshape([k, d]), axis=1)
    n = np.sum(value)
    logps = np.log(pi) + np.dot(
        np.log(thetas), value) + gammaln(n + 1) - np.sum(gammaln(value + 1))
    return logsumexp(logps)
Esempio n. 2
0
def logprior(var_par, draw, k):
    # Log of prior probabilities
    d = int((len(var_par) / 2 - k) / k)
    l = int(len(var_par) / 2)
    alpha = np.ones(k)  # Prior for mixture probabilities
    beta = np.ones(d)  # Prior for multinomials
    mu, cov = var_par[:l], np.exp(var_par[l:])
    samples = draw * cov + mu
    pi = softmax(samples[:k])
    thetas = softmax(samples[k:].reshape([k, d]), axis=1)
    return dirichlet.logpdf(pi, alpha) + np.sum(
        [dirichlet.logpdf(theta, beta) + ldt(theta)
         for theta in thetas]) + ldt(pi)
Esempio n. 3
0
def log_like(var_par, draw, value, k):
	d = len(value)
	value = value[:-1]
	l = int(len(var_par)/2)
	mu, cov = var_par[:l], np.exp(var_par[l:])
	samples = draw*cov + mu
	pi = softmax(samples[:k])
	lam = np.exp(samples[k:2*k])
	thetas = softmax(samples[2*k:].reshape([k,d]), axis=1)
	thetas1 = thetas[:,:-1]
	thetas2 = thetas[:,-1]
	logps = np.log(pi) + np.dot(np.log(thetas1.T*lam).T, value) + lam*(thetas2-1) - np.sum(gammaln(value+1))  
	return logsumexp(logps)
Esempio n. 4
0
def np_log_like(var_par, draw, data, k):
	d = data.shape[1]
	gln_test_values = np.sum(gammaln(data[:,:-1]+1), axis=1)
	data = data[:,:-1]
	l = int(len(var_par)/2)
	mu, cov = var_par[:l], np.exp(var_par[l:])
	samples = draw*cov + mu
	pi = softmax(samples[:k])
	lam = np.exp(samples[k:2*k])
	thetas = softmax(samples[2*k:].reshape([k,d]), axis=1)
	thetas1 = thetas[:,:-1]
	thetas2 = thetas[:,-1]
	logps = (np.log(pi) + np.dot(data, np.log(thetas1.T*lam)) + lam*(thetas2-1)).T  - gln_test_values
	return np.sum(logsumexp(logps, axis=0))
Esempio n. 5
0
def pred_like(test_data, var_par, n_samples, k):
    # Method for prediction likelihood
    N_test, d = test_data.shape
    l = int(len(var_par) / 2)
    gln_test_n = gammaln(test_data.sum(axis=1) + 1)
    gln_test_values = np.sum(gammaln(test_data + 1), axis=1)
    mu, cov = var_par[:l], np.exp(var_par[l:])
    like_matrix = np.empty([N_test, n_samples])
    samples = draw_samples(var_par, n_samples)
    for s, sample in enumerate(samples):
        pi = softmax(sample[:k])
        thetas = softmax(sample[k:].reshape([k, d]), axis=1)
        logps = (np.log(pi) + np.dot(test_data, np.log(
            thetas.T))).T + gln_test_n - gln_test_values
        like_matrix[:, s] = logsumexp2(np.stack(logps)[:, :N_test], axis=0)
    return np.mean(logsumexp2(like_matrix, axis=1) - np.log(n_samples))
	def get_frame_info(self, frame):
		actor = frame['policy']
		distribution = [np.around(softmax(head), decimals=3) for head in actor]
		logits = [np.around(head, decimals=3) for head in actor]
		value = np.around(frame['value'], decimals=3)
		value_info = "reward={}, manipulated_reward={}, value={}\n".format(frame['reward'], frame['manipulated_reward'], value)
		actor_info = "logits={}, distribution={}\n".format(logits, distribution)
		action_info = "action={}\n".format(frame['action'])
		extra_info = "extra={}\n".format(frame['extra'])
		frame_info = { "log": value_info + actor_info + action_info + extra_info }
		if flags.save_episode_screen and frame['screen'] is not None:
			frame_info["screen"] = frame['screen']
		return frame_info