Example #1
0
    def update_beliefs(self, tau, observation, response):

        if tau == 0:
            self.posterior_states[0] = 1. / self.ns
            self.posterior_durations[0] = 1. / self.nd
            self.posterior_states[1] = self.prior_states
            self.posterior_durations[1] = self.prior_durations
            self.posterior_observations[0] = np.exp(
                ln(self.prior_observations) +
                (self.posterior_states[1][np.newaxis, :, np.newaxis] *
                 ln(self.generative_model_observations)).sum(axis=1))

            self.posterior_observations[0] /= self.posterior_observations[
                0].sum(axis=0)

            self.posterior_policies[0] = softmax(
                ln(self.prior_actions) -
                (self.posterior_observations[0] *
                 ln(self.posterior_observations[0])).sum(axis=0) +
                (self.posterior_observations[0][:, np.newaxis, :] *
                 self.posterior_states[1, np.newaxis, :, np.newaxis] *
                 ln(self.generative_model_observations)).sum(axis=(0, 1)))

        else:
            old_post_s = self.posterior_states[1].copy()
            old_post_d = self.posterior_durations[1].copy()

            self.posterior_states[0] = softmax(
                ln(self.prior_states) +
                ln(self.generative_model_observations[observation, :,
                                                      response]))
            self.posterior_durations[0] = old_post_d
            self.posterior_states[1] = softmax(
                (old_post_d[np.newaxis, np.newaxis, :] *
                 self.posterior_states[0][np.newaxis, :, np.newaxis] *
                 ln(self.generative_model_states)).sum(axis=(1, 2)))
            self.posterior_durations[1] = softmax((old_post_d[np.newaxis,:]*ln(self.generative_model_durations)).sum(axis=1)) \
                                            #+ (self.posterior_states[0][np.newaxis,:,np.newaxis]*self.posterior_states[1][:,np.newaxis,np.newaxis]*ln(self.generative_model_states)).sum(axis=(0,1)))

            self.posterior_observations[0] = np.exp(
                ln(self.prior_observations[:, np.newaxis]) +
                (self.posterior_states[1][np.newaxis, :, np.newaxis] *
                 ln(self.generative_model_observations)).sum(axis=1))

            self.posterior_observations[0] /= self.posterior_observations[
                0].sum(axis=0)

            self.posterior_policies[0] = softmax(
                ln(self.prior_actions) -
                (self.posterior_observations[0] *
                 ln(self.posterior_observations[0])).sum(axis=0) +
                (self.posterior_observations[0][:, np.newaxis, :] *
                 self.posterior_states[1, np.newaxis, :, np.newaxis] *
                 ln(self.generative_model_observations)).sum(axis=(0, 1)))
Example #2
0
    def predict_proba(self, windows):
        """
        Predict class probabilities.

        Should return a matrix P of probabilities,
        with each row corresponding to a row of X.

        windows = array (n x windowsize),
            each row is a window of indices
        """
        # handle singleton input by making sure we have
        # a list-of-lists
        if not hasattr(windows[0], "__iter__"):
            windows = [windows]

        #### YOUR CODE HERE ####
        onehot_vecs = asarray( [ self.sparams.L[windows[i],:].flatten() for i in range(len(windows)) ] )

        a1 = self.params.W.dot(onehot_vecs.T).T + self.params.b1
        h  = tanh( a1 )
        a2 = self.params.U.dot(h.T).T + self.params.b2
        P  = softmax( a2 ) #y_hat

        #### END YOUR CODE ####

        return P # rows are output for each input
Example #3
0
    def predict_proba(self, windows):
        """
        Predict class probabilities.

        Should return a matrix P of probabilities,
        with each row corresponding to a row of X.

        windows = array (n x windowsize),
            each row is a window of indices
        """
        # handle singleton input by making sure we have
        # a list-of-lists
        if not hasattr(windows[0], "__iter__"):
            windows = [windows]

        #### YOUR CODE HERE ####
        onehot_vecs = asarray([
            self.sparams.L[windows[i], :].flatten()
            for i in range(len(windows))
        ])

        a1 = self.params.W.dot(onehot_vecs.T).T + self.params.b1
        h = tanh(a1)
        a2 = self.params.U.dot(h.T).T + self.params.b2
        P = softmax(a2)  #y_hat

        #### END YOUR CODE ####

        return P  # rows are output for each input
Example #4
0
    def _acc_grads(self, window, label):
        """
        Accumulate gradients, given a training point
        (window, label) of the format

        window = [x_{i-1} x_{i} x_{i+1}] # three ints
        label = {0,1,2,3,4} # single int, gives class

        Your code should update self.grads and self.sgrads,
        in order for gradient_check and training to work.

        So, for example:
        self.grads.U += (your gradient dJ/dU)
        self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
        """
        #### YOUR CODE HERE ####

        onehot_vecs = expand_dims(self.sparams.L[window, :].flatten(), axis=0)

        #print "onehot_vecs.shape: %s " % (onehot_vecs.shape,)

        ##
        # Forward propagation
        a1 = self.params.W.dot(onehot_vecs.T).T + self.params.b1
        s = sigmoid(2.0 * a1)
        h = 2.0 * s - 1.0
        a2 = self.params.U.dot(h.T).T + self.params.b2
        y_hat = softmax(a2)

        ##
        # Backpropagation
        t = zeros(y_hat.shape)
        t[:, label] = 1

        delta_out = y_hat - t

        self.grads.U += h.T.dot(delta_out).T + self.lreg * self.params.U

        #print "delta_out  shape: %s" % (delta_out.shape,)

        self.grads.b2 += delta_out.flatten()
        #print "self.grads.b2.shape: %s " % (self.grads.b2.shape,)

        delta_hidden = delta_out.dot(self.params.U) * 4.0 * sigmoid_grad(s)

        self.grads.W += delta_hidden.T.dot(
            onehot_vecs) + self.lreg * self.params.W
        self.grads.b1 += delta_hidden.flatten()

        #print "self.grads.b2.shape: %s " % (self.grads.b1.shape,)

        grad_xs = delta_hidden.dot(self.params.W).T
        #print "grad_xs.shape: %s " % (grad_xs.shape,)

        self.sgrads.L[window[0]] = grad_xs[range(0, 50)].flatten()
        self.sgrads.L[window[1]] = grad_xs[range(50, 100)].flatten()
        self.sgrads.L[window[2]] = grad_xs[range(100, 150)].flatten()
Example #5
0
    def _acc_grads(self, window, label):
        """
        Accumulate gradients, given a training point
        (window, label) of the format

        window = [x_{i-1} x_{i} x_{i+1}] # three ints
        label = {0,1,2,3,4} # single int, gives class

        Your code should update self.grads and self.sgrads,
        in order for gradient_check and training to work.

        So, for example:
        self.grads.U += (your gradient dJ/dU)
        self.sgrads.L[i] = (gradient dJ/dL[i]) # this adds an update for that index
        """
        #### YOUR CODE HERE ####

        onehot_vecs = expand_dims(self.sparams.L[window,:].flatten(),axis=0)

        #print "onehot_vecs.shape: %s " % (onehot_vecs.shape,)

        ##
        # Forward propagation
        a1 = self.params.W.dot(onehot_vecs.T).T + self.params.b1
        s  = sigmoid( 2.0 * a1 )
        h  = 2.0 * s - 1.0
        a2 = self.params.U.dot(h.T).T + self.params.b2
        y_hat = softmax( a2 ) 

        ##
        # Backpropagation
        t = zeros( y_hat.shape )
        t[:,label] = 1

        delta_out = y_hat - t

        self.grads.U  += h.T.dot(delta_out).T + self.lreg * self.params.U

        #print "delta_out  shape: %s" % (delta_out.shape,)

        self.grads.b2 += delta_out.flatten()
        #print "self.grads.b2.shape: %s " % (self.grads.b2.shape,)

        delta_hidden = delta_out.dot(self.params.U) * 4.0 * sigmoid_grad( s )
        
        self.grads.W  += delta_hidden.T.dot(onehot_vecs) + self.lreg * self.params.W
        self.grads.b1 += delta_hidden.flatten()

        #print "self.grads.b2.shape: %s " % (self.grads.b1.shape,)

        grad_xs = delta_hidden.dot(self.params.W).T
        #print "grad_xs.shape: %s " % (grad_xs.shape,)

        self.sgrads.L[window[0]] = grad_xs[range(0,50)].flatten()
        self.sgrads.L[window[1]] = grad_xs[range(50,100)].flatten()
        self.sgrads.L[window[2]] = grad_xs[range(100,150)].flatten()
    def update_beliefs_states(self, tau, t, observation, policies, prior,
                              prior_pi):
        if t == 0:
            self.logzs = np.tile(ln(self.zs), (self.T, 1)).T
        self.logzs[:, t] = ln(
            self.generative_model_observations[int(observation), :])

        #estimate expected state distribution
        lforw = np.zeros((self.nh, self.T))
        lforw[:, 0] = ln(self.prior_states)
        lback = np.zeros((self.nh, self.T))
        posterior = np.zeros((self.nh, self.T, policies.shape[0]))
        neg_fe = np.zeros(policies.shape[0])
        eps = 0.01
        for pi, ppi in enumerate(prior_pi):
            if ppi > 1e-6:
                logtm = ln(self.generative_model_states[:, :, policies[pi]])
                #SARAH: check the following before publishing!
                post = prior[:, :, pi]
                not_close = True
                while not_close:
                    lforw[:, 1:] = np.einsum('ijk, jk-> ik', logtm,
                                             post[:, :-1])
                    lback[:, :-1] = np.einsum('ijk, ik->jk', logtm, post[:,
                                                                         1:])
                    logpost = lforw + self.logzs
                    lp = ln(post)
                    lp = (1 - eps) * lp + eps * (logpost + lback)
                    new_post = softmax(lp)
                    not_close = not np.allclose(post, new_post, atol=1e-3)
                    post[:] = new_post

                posterior[:, :, pi] = post
                neg_fe[pi] = (logpost * post).sum() - np.sum(post * ln(post))
            else:
                posterior[:, :, pi] = prior[:, :, pi]
                neg_fe[pi] = -1e10

        self.fe_pi = neg_fe

        return posterior, neg_fe
Example #7
0
    def update_beliefs_context(self, tau, t, reward, posterior_states,
                               posterior_policies, prior_context, policies):

        post_policies = (prior_context[np.newaxis, :] *
                         posterior_policies).sum(axis=1)
        beta = self.dirichlet_rew_params.copy()
        states = (posterior_states[:, t, :] *
                  post_policies[np.newaxis, :, np.newaxis]).sum(axis=1)
        beta_prime = self.dirichlet_rew_params.copy()
        beta_prime[reward] = beta[reward] + states

        #        for c in range(self.nc):
        #            for state in range(self.nh):
        #                self.generative_model_rewards[:,state,c] =\
        #                np.exp(scs.digamma(beta_prime[:,state,c])\
        #                       -scs.digamma(beta_prime[:,state,c].sum()))
        #                self.generative_model_rewards[:,state,c] /= self.generative_model_rewards[:,state,c].sum()
        #
        #            self.rew_messages[:,t+1:,c] = self.prior_rewards.dot(self.generative_model_rewards[:,:,c])[:,np.newaxis]
        #
        #        for c in range(self.nc):
        #            for pi, cs in enumerate(policies):
        #                if self.prior_policies[pi,c] > 1e-15:
        #                    self.update_messages(t, pi, cs, c)
        #                else:
        #                    self.fwd_messages[:,:,pi,c] = 1./self.nh #0

        alpha = self.dirichlet_pol_params.copy()
        if t == self.T - 1:
            chosen_pol = np.argmax(post_policies)
            inf_context = np.argmax(prior_context)
            alpha_prime = self.dirichlet_pol_params.copy()
            alpha_prime[chosen_pol, :] += prior_context
            #alpha_prime[chosen_pol,inf_context] = self.dirichlet_pol_params[chosen_pol,inf_context] + 1
        else:
            alpha_prime = alpha

        if self.nc == 1:
            posterior = np.ones(1)
        else:
            # todo: recalc
            #outcome_surprise = ((states * prior_context[np.newaxis,:]).sum(axis=1)[:,np.newaxis] * (scs.digamma(beta_prime[reward]) - scs.digamma(beta_prime.sum(axis=0)))).sum(axis=0)
            outcome_surprise = (posterior_policies *
                                ln(self.fwd_norms.prod(axis=0))).sum(axis=0)
            entropy = -(posterior_policies *
                        ln(posterior_policies)).sum(axis=0)
            #policy_surprise = (post_policies[:,np.newaxis] * scs.digamma(alpha_prime)).sum(axis=0) - scs.digamma(alpha_prime.sum(axis=0))
            policy_surprise = (
                posterior_policies * scs.digamma(alpha_prime)).sum(
                    axis=0) - scs.digamma(alpha_prime.sum(axis=0))
            posterior = outcome_surprise + policy_surprise + entropy

            #+ np.nan_to_num((posterior_policies * ln(self.fwd_norms).sum(axis = 0))).sum(axis=0)#\

            #            if tau in range(90,120) and t == 1:
            #                #print(tau, np.exp(outcome_surprise), np.exp(policy_surprise))
            #                print(tau, np.exp(outcome_surprise[1])/np.exp(outcome_surprise[0]), np.exp(policy_surprise[1])/np.exp(policy_surprise[0]))

            posterior = np.nan_to_num(softmax(posterior + ln(prior_context)))

        return posterior
    def update_beliefs_policies(self):

        posterior = softmax(ln(self.fwd_norms).sum(axis=0))

        return posterior
    def update_beliefs_policies(self):
        posterior = softmax(self.fe_pi)

        return posterior