コード例 #1
0
    def _s2_regression(self, states, ex_states, im_states, U_old):                
        print('State Projection')
        self._U_st, _, states_fx = ula.rand_svd_f(states.T, k=self._p, rng=self.rng)
        states = states_fx.T
        self._feat_dim.state = states.shape[1]
        print('Stage 2 Regression')
        self._W_s2ex = ridge(states, ex_states, self._lambda['s2ex'])
        self._W_s2oo = ridge(states, im_states, self._lambda['s2oo'])

        return states
コード例 #2
0
    def _build_model(self, data, feats, states):
        self._state0 = np.mean(states, 0)

        # Statistics for clamping
        self._max_state_norm2 = np.max(np.sum(states * states, 0))
        self._max_state_norm = np.sqrt(self._max_state_norm2)
        self._max_state_coord = np.max(states, axis=1).reshape((-1, 1))
        self._min_state_coord = np.min(states, axis=1).reshape((-1, 1))

        # Horizon Prediction
        s2_h_in = ula.khatri_rao_rowwise(states, feats.fut_act)
        W_h = ridge(s2_h_in, feats.fut_obs, self._lambda['pred'])
        W_rff2fo = ridge(feats.fut_obs, data.fut_obs, self._lambda['pred'])
        self._W_h = np.dot(W_h, W_rff2fo)

        # 1-Step Prediction
        s2_1s_in = ula.khatri_rao_rowwise(states, feats.act)
        W_1s = ridge(s2_1s_in, feats.obs, self._lambda['pred'])
        W_rff2obs = ridge(feats.obs, data.obs, self._lambda['pred'])
        self._W_1s = np.dot(W_1s, W_rff2obs)
コード例 #3
0
    def _s1_regression_cond(self, data, feats, imp_weights):
        print('Stage 1A Regression')
        s1a_in = ula.khatri_rao_rowwise(feats.past, feats.fut_act)
        s1a_out = feats.fut_obs
        W_s1a = ridge(s1a_in, s1a_out, self._lambda['s1a'], imp_weights[0])
        W_s1a = W_s1a.reshape((self._feat_dim.past, self._feat_dim.fut_act,
                               self._feat_dim.fut_obs))
        W_s1a = W_s1a.transpose((0, 2, 1))
        W_s1a = W_s1a.reshape((self._feat_dim.past, -1))

        states = np.dot(feats.past, W_s1a)

        print('Stage 1B Regression')
        s1b_in = ula.khatri_rao_rowwise(feats.past, feats.exfut_act)
        s1b_out = feats.exfut_obs
        W_s1b = ridge(s1b_in, s1b_out, self._lambda['s1b'], imp_weights[1])
        W_s1b = W_s1b.reshape((self._feat_dim.past, self._feat_dim.exfut_act,
                               self._feat_dim.exfut_obs))
        W_s1b = W_s1b.transpose((0, 2, 1))
        W_s1b = W_s1b.reshape((self._feat_dim.past, -1))

        ex_states = np.dot(feats.past, W_s1b)

        print('Stage 1C Regression')
        s1c_in = ula.khatri_rao_rowwise(feats.past, feats.act)
        s1c_out = feats.oo
        W_s1c = ridge(s1c_in, s1c_out, self._lambda['s1c'], None)
        W_s1c = W_s1c.reshape(
            (self._feat_dim.past, self._feat_dim.act, self._feat_dim.oo))
        W_s1c = W_s1c.transpose((0, 2, 1))
        W_s1c = W_s1c.reshape((self._feat_dim.past, -1))

        im_states = np.dot(feats.past, W_s1c)

        self._dbg_W_s1a = W_s1a
        self._dbg_W_s1b = W_s1b
        self._dbg_W_s1c = W_s1c

        return states, ex_states, im_states
コード例 #4
0
def gaussian_blind_policy(data, feats, l2_lambda):
    # Find the closest blind policy of the form
    # (a_{t+i} | history) ~ N(dot(w_i, h_t), \sigma_i^2)
    W_policy = ridge(feats.past, data.exfut_act, l2_lambda)
    r = np.dot(W_policy, feats.past) - data.exfut_act
    r2 = r*r
    S = np.mean(r*r,1).reshape((-1,1))
    
    blind_prob = np.sqrt(0.5/(np.pi * S)) * np.exp(-0.5*r2/S)
    
    d_a = data.act.shape[0]
    fut = data.fut_act.shape[0] / d_a
                
    blind_prop_future = np.prod(blind_prob[:d_a*fut,:], 0)
    blind_prop_extended = np.prod(blind_prob, 0)
            
    return blind_prop_future, blind_prop_extended
コード例 #5
0
 def _estimate_condop(self, A, B, C, reg_lambda, div_lambda, importance_weights):
     N,da = A.shape
     db = B.shape[1]
     dab = da*db
     dbb = db*db                
     
     reg_out = np.empty((N,dab+dbb))
     reg_out[:,:dab] = ula.khatri_rao_rowwise(A,B)
     reg_out[:,dab:] = ula.khatri_rao_rowwise(B,B)
     
     W = ridge(C, reg_out, reg_lambda, importance_weights)
     
     est_reg_out = np.dot(C,W)        
     output = np.empty((N,dab))
     
     for i in xrange(N):
         C_ab = est_reg_out[i,:dab].reshape((da,db))
         C_bb = est_reg_out[i,dab:].reshape((db,db))
         C_a_b = ula.reg_rdivide_nopsd(C_ab, C_bb, div_lambda)
         output[i,:] = C_a_b.reshape(-1)
             
     return output,W