def _s2_regression(self, states, ex_states, im_states, U_old): print('State Projection') self._U_st, _, states_fx = ula.rand_svd_f(states.T, k=self._p, rng=self.rng) states = states_fx.T self._feat_dim.state = states.shape[1] print('Stage 2 Regression') self._W_s2ex = ridge(states, ex_states, self._lambda['s2ex']) self._W_s2oo = ridge(states, im_states, self._lambda['s2oo']) return states
def _build_model(self, data, feats, states): self._state0 = np.mean(states, 0) # Statistics for clamping self._max_state_norm2 = np.max(np.sum(states * states, 0)) self._max_state_norm = np.sqrt(self._max_state_norm2) self._max_state_coord = np.max(states, axis=1).reshape((-1, 1)) self._min_state_coord = np.min(states, axis=1).reshape((-1, 1)) # Horizon Prediction s2_h_in = ula.khatri_rao_rowwise(states, feats.fut_act) W_h = ridge(s2_h_in, feats.fut_obs, self._lambda['pred']) W_rff2fo = ridge(feats.fut_obs, data.fut_obs, self._lambda['pred']) self._W_h = np.dot(W_h, W_rff2fo) # 1-Step Prediction s2_1s_in = ula.khatri_rao_rowwise(states, feats.act) W_1s = ridge(s2_1s_in, feats.obs, self._lambda['pred']) W_rff2obs = ridge(feats.obs, data.obs, self._lambda['pred']) self._W_1s = np.dot(W_1s, W_rff2obs)
def _s1_regression_cond(self, data, feats, imp_weights): print('Stage 1A Regression') s1a_in = ula.khatri_rao_rowwise(feats.past, feats.fut_act) s1a_out = feats.fut_obs W_s1a = ridge(s1a_in, s1a_out, self._lambda['s1a'], imp_weights[0]) W_s1a = W_s1a.reshape((self._feat_dim.past, self._feat_dim.fut_act, self._feat_dim.fut_obs)) W_s1a = W_s1a.transpose((0, 2, 1)) W_s1a = W_s1a.reshape((self._feat_dim.past, -1)) states = np.dot(feats.past, W_s1a) print('Stage 1B Regression') s1b_in = ula.khatri_rao_rowwise(feats.past, feats.exfut_act) s1b_out = feats.exfut_obs W_s1b = ridge(s1b_in, s1b_out, self._lambda['s1b'], imp_weights[1]) W_s1b = W_s1b.reshape((self._feat_dim.past, self._feat_dim.exfut_act, self._feat_dim.exfut_obs)) W_s1b = W_s1b.transpose((0, 2, 1)) W_s1b = W_s1b.reshape((self._feat_dim.past, -1)) ex_states = np.dot(feats.past, W_s1b) print('Stage 1C Regression') s1c_in = ula.khatri_rao_rowwise(feats.past, feats.act) s1c_out = feats.oo W_s1c = ridge(s1c_in, s1c_out, self._lambda['s1c'], None) W_s1c = W_s1c.reshape( (self._feat_dim.past, self._feat_dim.act, self._feat_dim.oo)) W_s1c = W_s1c.transpose((0, 2, 1)) W_s1c = W_s1c.reshape((self._feat_dim.past, -1)) im_states = np.dot(feats.past, W_s1c) self._dbg_W_s1a = W_s1a self._dbg_W_s1b = W_s1b self._dbg_W_s1c = W_s1c return states, ex_states, im_states
def gaussian_blind_policy(data, feats, l2_lambda): # Find the closest blind policy of the form # (a_{t+i} | history) ~ N(dot(w_i, h_t), \sigma_i^2) W_policy = ridge(feats.past, data.exfut_act, l2_lambda) r = np.dot(W_policy, feats.past) - data.exfut_act r2 = r*r S = np.mean(r*r,1).reshape((-1,1)) blind_prob = np.sqrt(0.5/(np.pi * S)) * np.exp(-0.5*r2/S) d_a = data.act.shape[0] fut = data.fut_act.shape[0] / d_a blind_prop_future = np.prod(blind_prob[:d_a*fut,:], 0) blind_prop_extended = np.prod(blind_prob, 0) return blind_prop_future, blind_prop_extended
def _estimate_condop(self, A, B, C, reg_lambda, div_lambda, importance_weights): N,da = A.shape db = B.shape[1] dab = da*db dbb = db*db reg_out = np.empty((N,dab+dbb)) reg_out[:,:dab] = ula.khatri_rao_rowwise(A,B) reg_out[:,dab:] = ula.khatri_rao_rowwise(B,B) W = ridge(C, reg_out, reg_lambda, importance_weights) est_reg_out = np.dot(C,W) output = np.empty((N,dab)) for i in xrange(N): C_ab = est_reg_out[i,:dab].reshape((da,db)) C_bb = est_reg_out[i,dab:].reshape((db,db)) C_a_b = ula.reg_rdivide_nopsd(C_ab, C_bb, div_lambda) output[i,:] = C_a_b.reshape(-1) return output,W