Example #1
0
    def estimate_generator(self,
                           lr=0.3,
                           samps=None,
                           prior=None,
                           rand_undiscovered=False):
        """
        FUNCTION: Estimates a transition matrix and generator from sequence samples under propagator PROP.
        INPUTS: lr = 0.3, learning rate
                samps = samples to use
                prior = transition matrix prior
                rand_undiscovered = True, randomizes unobserved transitions (to all states)
                                    otherwise removes such states from the state index
        OUTPUTS: self.est_Q = estimated generator matrix
                 self.est_T = estimated transition matrix
        """
        if samps is None:
            samps = range(self.n_samp)
        jr = self.GEN.jump_rate

        if not hasattr(self, "est_T"):
            est_T = np.eye(self.n_state)
            # est_T = row_norm(np.ones((self.n_state,self.n_state)))
            est_Q = np.ones((self.n_state, self.n_state))
            est_Q = set_generator_diagonal(est_Q)
        else:
            est_T = self.est_T
            est_Q = self.est_Q

        if samps is not None and len(samps) > 0:
            # n_samp x n_seq_steps matrix of sequence samples
            state_seqs = self._retrieve_state(samp=samps,
                                              step=None,
                                              coords=False)

            st = state_seqs.flatten()
            st_pairs = list(zip(st, st[1:]))
            self.estimate_transition_matrix(lr=lr, samps=samps, prior=prior)

            # deal with undiscovered states and convert to generator
            undiscovered_states = np.all(est_T == 0, axis=1)
            if rand_undiscovered is True:
                # randomize unobserved states
                est_T[undiscovered_states, :] = 1.0
                est_T = row_norm(est_T)
                est_Q = stochmat2generator(est_T, jump_rate=jr)
            else:
                # isolate undiscovered states
                est_T[np.ix_(undiscovered_states, undiscovered_states)] = 1.0
                est_T = row_norm(est_T)
                est_Q = stochmat2generator(est_T, jump_rate=jr)
                est_Q[undiscovered_states, :] = 0.0

        est_Q = set_generator_diagonal(est_Q)
        self.est_T = est_T
        self.est_Q = est_Q
        self.est_A = (self.est_T > 0).astype(
            "int")  # estimated adjacency matrix
Example #2
0
 def learn_SR(self, lr=0.3, discount=None, samps=None):
     """
     FUNCTION: Estimates the successor representation from sequence samples under propagator PROP.
     INPUTS: lr           = learning rate
             discount     = temporal discount
             samps        = samples to use
     OUTPUTS: self.est_SR = estimated successor representation matrix
     """
     if discount is None:
         discount = self.discount
     if not hasattr(self, "est_SR"):
         T_prior = row_norm(np.ones((self.n_state, self.n_state)))
         est_SR = SR(T_prior, gamma=discount)
     else:
         est_SR = self.est_SR
     if samps is None:
         samps = range(self.n_samp)
     self.lr_SR = lr
     self.discount_est_SR = discount
     for s in samps:
         lr = self.lr_SR * (self.lr_decay**s)
         state_traj = self._retrieve_state(samp=s, step=None, coords=False)
         est_SR = update_sr(est_SR,
                            state_traj,
                            discount=discount,
                            learning_rate=lr)
     self.est_SR = est_SR
     self.SR_error = self._obj_norm(L=self.est_SR,
                                    T=self.SR,
                                    normalized=False)
     self.SR_corr, _ = spearmanr(self.est_SR.flatten(), self.SR.flatten())
     if config.verbose:
         print("LEARNER: SR error = %.3f" % self.SR_error)
Example #3
0
def generator2stochmat(Q, tau=0.0, zero_diag=True):
    """
    FUNCTION: CTMC generator to DTMC transition matrix.
    INPUTS: Q           = generator
            tau         = prior on transition probability
            zero_diag   = zero out diagonal
    """
    T = Q.astype("float").copy()
    if zero_diag:
        T[np.eye(T.shape[0]).astype("bool")] = 0
    else:
        jump_rate = np.diagonal(T)
        T = T / jump_rate + np.eye(T.shape)
    T = row_norm(T)
    T = row_norm(T + tau)
    return T
Example #4
0
def policy_iteration(R, T, policy=None, max_iter=100, max_eval=100, gamma=0.99):
    """
    Iteratively improves policy by applying max operation to value function.
    Synchronously sweeps entire state-space in a vectorized fashion.
    INPUTS:
    R           rewards for every transition
    T           transition probabilities
    policy      initial policy
    max_iter    maximum number of iterations
    max_eval    maximum number of evaluation sweeps
    gamma       discount factor (< 1 for convergence guarantees)
    OUTPUTS:
    policy      optimized policy
    """

    nS,nA,nS = T.shape

    if policy is None:
        policy = np.ones((nS,nA))
        policy = row_norm(policy)
        # policy = np.ones((nS,)).astype('int')

    for _ in range(max_iter):
        #store current policy
        opt = policy.copy()

        #evaluate value function (at least approximately)
        V = policy_evaluation(R, T, policy, max_eval, gamma)

        #calculate Q-function
        Q = np.einsum('ijk,ijk->ij', T, R + gamma * V[None,None,:])

        #update policy
        policy = np.argmax(Q, axis=1)

        #if policy did not change, stop
        if np.array_equal(policy,opt):
            break

    return vectorize_policy(policy,nS,nA)
Example #5
0
# anti-clockwise directionality
from utils import row_norm
eps = 0.2
for c in range(n_clique - 1):
    state_bneck_out = states_bneck[c][1]
    state_bneck_in = states_bneck[c + 1][0]
    states_clique = [s for s in states_cliques[c] if s != state_bneck_out]
    ENV.T[states_clique, state_bneck_out] = 1.
    ENV.T[state_bneck_out, state_bneck_in] = 1.
state_bneck_out = states_bneck[-1][1]
state_bneck_in = states_bneck[0][0]
states_clique = [s for s in states_cliques[-1] if s != state_bneck_out]
ENV.T[states_clique, state_bneck_out] = 1.
ENV.T[state_bneck_out, state_bneck_in] = 1.
ENV.T[(ENV.T < 1) & (ENV.T > 0)] = eps
ENV.T = row_norm(ENV.T)
ENV.__name__ += '-anticlockwise'

# %%
GEN = Generator(ENV=ENV, jump_rate=jump_rate)
PROPd = Propagator(GEN=GEN, tau=tau_diff, alpha=alpha_diff)
PROPs = Propagator(GEN=GEN, tau=tau_supdiff, alpha=alpha_supdiff)
PROPo = Propagator(GEN=GEN, tau=tau_diff, alpha=alpha_diff)
PROPo.min_zero_cf(lags=lags_opt, rho_init=rho_init)

print('DIFF: average autotransition prob = %0.3f' % np.diag(PROPd.etO).mean())
print('SUPDIFF: average autotransition prob = %0.3f' %
      np.diag(PROPs.etO).mean())

# %% SIMS
if run_explorer:
Example #6
0
 def shift_norm_prop(self):
     """ shifts self.etO into non-negative range and row-normalizes. """
     self.etO += repmat(self.etO.min(1), self.n_state, 1)
     self.etO = row_norm(self.etO)