def update_beliefs(self, tau, observation, response): if tau == 0: self.posterior_states[0] = 1. / self.ns self.posterior_durations[0] = 1. / self.nd self.posterior_states[1] = self.prior_states self.posterior_durations[1] = self.prior_durations self.posterior_observations[0] = np.exp( ln(self.prior_observations) + (self.posterior_states[1][np.newaxis, :, np.newaxis] * ln(self.generative_model_observations)).sum(axis=1)) self.posterior_observations[0] /= self.posterior_observations[ 0].sum(axis=0) self.posterior_policies[0] = softmax( ln(self.prior_actions) - (self.posterior_observations[0] * ln(self.posterior_observations[0])).sum(axis=0) + (self.posterior_observations[0][:, np.newaxis, :] * self.posterior_states[1, np.newaxis, :, np.newaxis] * ln(self.generative_model_observations)).sum(axis=(0, 1))) else: old_post_s = self.posterior_states[1].copy() old_post_d = self.posterior_durations[1].copy() self.posterior_states[0] = softmax( ln(self.prior_states) + ln(self.generative_model_observations[observation, :, response])) self.posterior_durations[0] = old_post_d self.posterior_states[1] = softmax( (old_post_d[np.newaxis, np.newaxis, :] * self.posterior_states[0][np.newaxis, :, np.newaxis] * ln(self.generative_model_states)).sum(axis=(1, 2))) self.posterior_durations[1] = softmax((old_post_d[np.newaxis,:]*ln(self.generative_model_durations)).sum(axis=1)) \ #+ (self.posterior_states[0][np.newaxis,:,np.newaxis]*self.posterior_states[1][:,np.newaxis,np.newaxis]*ln(self.generative_model_states)).sum(axis=(0,1))) self.posterior_observations[0] = np.exp( ln(self.prior_observations[:, np.newaxis]) + (self.posterior_states[1][np.newaxis, :, np.newaxis] * ln(self.generative_model_observations)).sum(axis=1)) self.posterior_observations[0] /= self.posterior_observations[ 0].sum(axis=0) self.posterior_policies[0] = softmax( ln(self.prior_actions) - (self.posterior_observations[0] * ln(self.posterior_observations[0])).sum(axis=0) + (self.posterior_observations[0][:, np.newaxis, :] * self.posterior_states[1, np.newaxis, :, np.newaxis] * ln(self.generative_model_observations)).sum(axis=(0, 1)))
def predict_proba(self, windows): """ Predict class probabilities. Should return a matrix P of probabilities, with each row corresponding to a row of X. windows = array (n x windowsize), each row is a window of indices """ # handle singleton input by making sure we have # a list-of-lists if not hasattr(windows[0], "__iter__"): windows = [windows] #### YOUR CODE HERE #### onehot_vecs = asarray( [ self.sparams.L[windows[i],:].flatten() for i in range(len(windows)) ] ) a1 = self.params.W.dot(onehot_vecs.T).T + self.params.b1 h = tanh( a1 ) a2 = self.params.U.dot(h.T).T + self.params.b2 P = softmax( a2 ) #y_hat #### END YOUR CODE #### return P # rows are output for each input
def predict_proba(self, windows): """ Predict class probabilities. Should return a matrix P of probabilities, with each row corresponding to a row of X. windows = array (n x windowsize), each row is a window of indices """ # handle singleton input by making sure we have # a list-of-lists if not hasattr(windows[0], "__iter__"): windows = [windows] #### YOUR CODE HERE #### onehot_vecs = asarray([ self.sparams.L[windows[i], :].flatten() for i in range(len(windows)) ]) a1 = self.params.W.dot(onehot_vecs.T).T + self.params.b1 h = tanh(a1) a2 = self.params.U.dot(h.T).T + self.params.b2 P = softmax(a2) #y_hat #### END YOUR CODE #### return P # rows are output for each input
def _acc_grads(self, window, label): """ Accumulate gradients, given a training point (window, label) of the format window = [x_{i-1} x_{i} x_{i+1}] # three ints label = {0,1,2,3,4} # single int, gives class Your code should update self.grads and self.sgrads, in order for gradient_check and training to work. So, for example: self.grads.U += (your gradient dJ/dU) self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index """ #### YOUR CODE HERE #### onehot_vecs = expand_dims(self.sparams.L[window, :].flatten(), axis=0) #print "onehot_vecs.shape: %s " % (onehot_vecs.shape,) ## # Forward propagation a1 = self.params.W.dot(onehot_vecs.T).T + self.params.b1 s = sigmoid(2.0 * a1) h = 2.0 * s - 1.0 a2 = self.params.U.dot(h.T).T + self.params.b2 y_hat = softmax(a2) ## # Backpropagation t = zeros(y_hat.shape) t[:, label] = 1 delta_out = y_hat - t self.grads.U += h.T.dot(delta_out).T + self.lreg * self.params.U #print "delta_out shape: %s" % (delta_out.shape,) self.grads.b2 += delta_out.flatten() #print "self.grads.b2.shape: %s " % (self.grads.b2.shape,) delta_hidden = delta_out.dot(self.params.U) * 4.0 * sigmoid_grad(s) self.grads.W += delta_hidden.T.dot( onehot_vecs) + self.lreg * self.params.W self.grads.b1 += delta_hidden.flatten() #print "self.grads.b2.shape: %s " % (self.grads.b1.shape,) grad_xs = delta_hidden.dot(self.params.W).T #print "grad_xs.shape: %s " % (grad_xs.shape,) self.sgrads.L[window[0]] = grad_xs[range(0, 50)].flatten() self.sgrads.L[window[1]] = grad_xs[range(50, 100)].flatten() self.sgrads.L[window[2]] = grad_xs[range(100, 150)].flatten()
def _acc_grads(self, window, label): """ Accumulate gradients, given a training point (window, label) of the format window = [x_{i-1} x_{i} x_{i+1}] # three ints label = {0,1,2,3,4} # single int, gives class Your code should update self.grads and self.sgrads, in order for gradient_check and training to work. So, for example: self.grads.U += (your gradient dJ/dU) self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index """ #### YOUR CODE HERE #### onehot_vecs = expand_dims(self.sparams.L[window,:].flatten(),axis=0) #print "onehot_vecs.shape: %s " % (onehot_vecs.shape,) ## # Forward propagation a1 = self.params.W.dot(onehot_vecs.T).T + self.params.b1 s = sigmoid( 2.0 * a1 ) h = 2.0 * s - 1.0 a2 = self.params.U.dot(h.T).T + self.params.b2 y_hat = softmax( a2 ) ## # Backpropagation t = zeros( y_hat.shape ) t[:,label] = 1 delta_out = y_hat - t self.grads.U += h.T.dot(delta_out).T + self.lreg * self.params.U #print "delta_out shape: %s" % (delta_out.shape,) self.grads.b2 += delta_out.flatten() #print "self.grads.b2.shape: %s " % (self.grads.b2.shape,) delta_hidden = delta_out.dot(self.params.U) * 4.0 * sigmoid_grad( s ) self.grads.W += delta_hidden.T.dot(onehot_vecs) + self.lreg * self.params.W self.grads.b1 += delta_hidden.flatten() #print "self.grads.b2.shape: %s " % (self.grads.b1.shape,) grad_xs = delta_hidden.dot(self.params.W).T #print "grad_xs.shape: %s " % (grad_xs.shape,) self.sgrads.L[window[0]] = grad_xs[range(0,50)].flatten() self.sgrads.L[window[1]] = grad_xs[range(50,100)].flatten() self.sgrads.L[window[2]] = grad_xs[range(100,150)].flatten()
def update_beliefs_states(self, tau, t, observation, policies, prior, prior_pi): if t == 0: self.logzs = np.tile(ln(self.zs), (self.T, 1)).T self.logzs[:, t] = ln( self.generative_model_observations[int(observation), :]) #estimate expected state distribution lforw = np.zeros((self.nh, self.T)) lforw[:, 0] = ln(self.prior_states) lback = np.zeros((self.nh, self.T)) posterior = np.zeros((self.nh, self.T, policies.shape[0])) neg_fe = np.zeros(policies.shape[0]) eps = 0.01 for pi, ppi in enumerate(prior_pi): if ppi > 1e-6: logtm = ln(self.generative_model_states[:, :, policies[pi]]) #SARAH: check the following before publishing! post = prior[:, :, pi] not_close = True while not_close: lforw[:, 1:] = np.einsum('ijk, jk-> ik', logtm, post[:, :-1]) lback[:, :-1] = np.einsum('ijk, ik->jk', logtm, post[:, 1:]) logpost = lforw + self.logzs lp = ln(post) lp = (1 - eps) * lp + eps * (logpost + lback) new_post = softmax(lp) not_close = not np.allclose(post, new_post, atol=1e-3) post[:] = new_post posterior[:, :, pi] = post neg_fe[pi] = (logpost * post).sum() - np.sum(post * ln(post)) else: posterior[:, :, pi] = prior[:, :, pi] neg_fe[pi] = -1e10 self.fe_pi = neg_fe return posterior, neg_fe
def update_beliefs_context(self, tau, t, reward, posterior_states, posterior_policies, prior_context, policies): post_policies = (prior_context[np.newaxis, :] * posterior_policies).sum(axis=1) beta = self.dirichlet_rew_params.copy() states = (posterior_states[:, t, :] * post_policies[np.newaxis, :, np.newaxis]).sum(axis=1) beta_prime = self.dirichlet_rew_params.copy() beta_prime[reward] = beta[reward] + states # for c in range(self.nc): # for state in range(self.nh): # self.generative_model_rewards[:,state,c] =\ # np.exp(scs.digamma(beta_prime[:,state,c])\ # -scs.digamma(beta_prime[:,state,c].sum())) # self.generative_model_rewards[:,state,c] /= self.generative_model_rewards[:,state,c].sum() # # self.rew_messages[:,t+1:,c] = self.prior_rewards.dot(self.generative_model_rewards[:,:,c])[:,np.newaxis] # # for c in range(self.nc): # for pi, cs in enumerate(policies): # if self.prior_policies[pi,c] > 1e-15: # self.update_messages(t, pi, cs, c) # else: # self.fwd_messages[:,:,pi,c] = 1./self.nh #0 alpha = self.dirichlet_pol_params.copy() if t == self.T - 1: chosen_pol = np.argmax(post_policies) inf_context = np.argmax(prior_context) alpha_prime = self.dirichlet_pol_params.copy() alpha_prime[chosen_pol, :] += prior_context #alpha_prime[chosen_pol,inf_context] = self.dirichlet_pol_params[chosen_pol,inf_context] + 1 else: alpha_prime = alpha if self.nc == 1: posterior = np.ones(1) else: # todo: recalc #outcome_surprise = ((states * prior_context[np.newaxis,:]).sum(axis=1)[:,np.newaxis] * (scs.digamma(beta_prime[reward]) - scs.digamma(beta_prime.sum(axis=0)))).sum(axis=0) outcome_surprise = (posterior_policies * ln(self.fwd_norms.prod(axis=0))).sum(axis=0) entropy = -(posterior_policies * ln(posterior_policies)).sum(axis=0) #policy_surprise = (post_policies[:,np.newaxis] * scs.digamma(alpha_prime)).sum(axis=0) - scs.digamma(alpha_prime.sum(axis=0)) policy_surprise = ( posterior_policies * scs.digamma(alpha_prime)).sum( axis=0) - scs.digamma(alpha_prime.sum(axis=0)) posterior = outcome_surprise + policy_surprise + entropy #+ np.nan_to_num((posterior_policies * ln(self.fwd_norms).sum(axis = 0))).sum(axis=0)#\ # if tau in range(90,120) and t == 1: # #print(tau, np.exp(outcome_surprise), np.exp(policy_surprise)) # print(tau, np.exp(outcome_surprise[1])/np.exp(outcome_surprise[0]), np.exp(policy_surprise[1])/np.exp(policy_surprise[0])) posterior = np.nan_to_num(softmax(posterior + ln(prior_context))) return posterior
def update_beliefs_policies(self): posterior = softmax(ln(self.fwd_norms).sum(axis=0)) return posterior
def update_beliefs_policies(self): posterior = softmax(self.fe_pi) return posterior