def estimate_generator(self, lr=0.3, samps=None, prior=None, rand_undiscovered=False): """ FUNCTION: Estimates a transition matrix and generator from sequence samples under propagator PROP. INPUTS: lr = 0.3, learning rate samps = samples to use prior = transition matrix prior rand_undiscovered = True, randomizes unobserved transitions (to all states) otherwise removes such states from the state index OUTPUTS: self.est_Q = estimated generator matrix self.est_T = estimated transition matrix """ if samps is None: samps = range(self.n_samp) jr = self.GEN.jump_rate if not hasattr(self, "est_T"): est_T = np.eye(self.n_state) # est_T = row_norm(np.ones((self.n_state,self.n_state))) est_Q = np.ones((self.n_state, self.n_state)) est_Q = set_generator_diagonal(est_Q) else: est_T = self.est_T est_Q = self.est_Q if samps is not None and len(samps) > 0: # n_samp x n_seq_steps matrix of sequence samples state_seqs = self._retrieve_state(samp=samps, step=None, coords=False) st = state_seqs.flatten() st_pairs = list(zip(st, st[1:])) self.estimate_transition_matrix(lr=lr, samps=samps, prior=prior) # deal with undiscovered states and convert to generator undiscovered_states = np.all(est_T == 0, axis=1) if rand_undiscovered is True: # randomize unobserved states est_T[undiscovered_states, :] = 1.0 est_T = row_norm(est_T) est_Q = stochmat2generator(est_T, jump_rate=jr) else: # isolate undiscovered states est_T[np.ix_(undiscovered_states, undiscovered_states)] = 1.0 est_T = row_norm(est_T) est_Q = stochmat2generator(est_T, jump_rate=jr) est_Q[undiscovered_states, :] = 0.0 est_Q = set_generator_diagonal(est_Q) self.est_T = est_T self.est_Q = est_Q self.est_A = (self.est_T > 0).astype( "int") # estimated adjacency matrix
def learn_SR(self, lr=0.3, discount=None, samps=None): """ FUNCTION: Estimates the successor representation from sequence samples under propagator PROP. INPUTS: lr = learning rate discount = temporal discount samps = samples to use OUTPUTS: self.est_SR = estimated successor representation matrix """ if discount is None: discount = self.discount if not hasattr(self, "est_SR"): T_prior = row_norm(np.ones((self.n_state, self.n_state))) est_SR = SR(T_prior, gamma=discount) else: est_SR = self.est_SR if samps is None: samps = range(self.n_samp) self.lr_SR = lr self.discount_est_SR = discount for s in samps: lr = self.lr_SR * (self.lr_decay**s) state_traj = self._retrieve_state(samp=s, step=None, coords=False) est_SR = update_sr(est_SR, state_traj, discount=discount, learning_rate=lr) self.est_SR = est_SR self.SR_error = self._obj_norm(L=self.est_SR, T=self.SR, normalized=False) self.SR_corr, _ = spearmanr(self.est_SR.flatten(), self.SR.flatten()) if config.verbose: print("LEARNER: SR error = %.3f" % self.SR_error)
def generator2stochmat(Q, tau=0.0, zero_diag=True): """ FUNCTION: CTMC generator to DTMC transition matrix. INPUTS: Q = generator tau = prior on transition probability zero_diag = zero out diagonal """ T = Q.astype("float").copy() if zero_diag: T[np.eye(T.shape[0]).astype("bool")] = 0 else: jump_rate = np.diagonal(T) T = T / jump_rate + np.eye(T.shape) T = row_norm(T) T = row_norm(T + tau) return T
def policy_iteration(R, T, policy=None, max_iter=100, max_eval=100, gamma=0.99): """ Iteratively improves policy by applying max operation to value function. Synchronously sweeps entire state-space in a vectorized fashion. INPUTS: R rewards for every transition T transition probabilities policy initial policy max_iter maximum number of iterations max_eval maximum number of evaluation sweeps gamma discount factor (< 1 for convergence guarantees) OUTPUTS: policy optimized policy """ nS,nA,nS = T.shape if policy is None: policy = np.ones((nS,nA)) policy = row_norm(policy) # policy = np.ones((nS,)).astype('int') for _ in range(max_iter): #store current policy opt = policy.copy() #evaluate value function (at least approximately) V = policy_evaluation(R, T, policy, max_eval, gamma) #calculate Q-function Q = np.einsum('ijk,ijk->ij', T, R + gamma * V[None,None,:]) #update policy policy = np.argmax(Q, axis=1) #if policy did not change, stop if np.array_equal(policy,opt): break return vectorize_policy(policy,nS,nA)
# anti-clockwise directionality from utils import row_norm eps = 0.2 for c in range(n_clique - 1): state_bneck_out = states_bneck[c][1] state_bneck_in = states_bneck[c + 1][0] states_clique = [s for s in states_cliques[c] if s != state_bneck_out] ENV.T[states_clique, state_bneck_out] = 1. ENV.T[state_bneck_out, state_bneck_in] = 1. state_bneck_out = states_bneck[-1][1] state_bneck_in = states_bneck[0][0] states_clique = [s for s in states_cliques[-1] if s != state_bneck_out] ENV.T[states_clique, state_bneck_out] = 1. ENV.T[state_bneck_out, state_bneck_in] = 1. ENV.T[(ENV.T < 1) & (ENV.T > 0)] = eps ENV.T = row_norm(ENV.T) ENV.__name__ += '-anticlockwise' # %% GEN = Generator(ENV=ENV, jump_rate=jump_rate) PROPd = Propagator(GEN=GEN, tau=tau_diff, alpha=alpha_diff) PROPs = Propagator(GEN=GEN, tau=tau_supdiff, alpha=alpha_supdiff) PROPo = Propagator(GEN=GEN, tau=tau_diff, alpha=alpha_diff) PROPo.min_zero_cf(lags=lags_opt, rho_init=rho_init) print('DIFF: average autotransition prob = %0.3f' % np.diag(PROPd.etO).mean()) print('SUPDIFF: average autotransition prob = %0.3f' % np.diag(PROPs.etO).mean()) # %% SIMS if run_explorer:
def shift_norm_prop(self): """ shifts self.etO into non-negative range and row-normalizes. """ self.etO += repmat(self.etO.min(1), self.n_state, 1) self.etO = row_norm(self.etO)