Ejemplo n.º 1
0
def pendulum_traj_draw(traj, ax=None):
    plt.ion()
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)

    traj_array = np.array(traj)
    ax.hold(True)
    line = ax.plot(traj_array[:, 0], traj_array[:, 1], '-k')
    #add larger marker for the initial point
    ax.plot(traj_array[0, 0], traj_array[0, 1], '*k', markersize=10.0)
    #add arrow to curve
    utils.add_arrow_to_line2D(ax, line)
    return ax
Ejemplo n.º 2
0
def pendulum_traj_draw(traj, ax=None):
    plt.ion()
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)

    traj_array = np.array(traj)
    ax.hold(True)
    line = ax.plot(traj_array[:, 0], traj_array[:, 1], '-k')
    #add larger marker for the initial point
    ax.plot(traj_array[0, 0], traj_array[0, 1], '*k', markersize=10.0)
    #add arrow to curve
    utils.add_arrow_to_line2D(ax, line)
    return ax
Ejemplo n.º 3
0
def PendulumMDPValueLearningErrorDraw(err_lst_full, M, m_itrs):
    '''
    plot Log-likelihood for test data performance
    '''
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.hold(True)
    colors = [(1.0, 0.0, 0.0), (0.5, 0.5, 0.0), (0.0, 1.0, 0.0),
              (0.0, 0.5, 0.5), (0.0, 0.0, 1.0)]
    lines = []
    legend_txt = []
    for m_idx, m_itr in enumerate(m_itrs):
        line, _ = utils.draw_err_bar_with_filled_shape(
            ax, M, np.mean(err_lst_full[m_idx], axis=1),
            np.std(err_lst_full[m_idx], axis=1), colors[m_idx % len(colors)])
        lines.append(line)
        legend_txt.append('Max Iterations: {0}'.format(m_itr))
    #prepare legend, axis text...
    plt.legend(lines, legend_txt, loc='best')
    ax.yaxis.grid()
    ax.xaxis.grid()
    ax.set_xlabel('Model size - M', fontsize=20)
    ax.set_ylabel('Log-likelihood', fontsize=20)
    ax.set_title('Log-likelihood of Test Data versus Model Size', fontsize=20)
    plt.draw()
    return ax
Ejemplo n.º 4
0
def PendulumMDPValueLearningErrorDraw(err_lst_full, M, m_itrs):
    '''
    plot Log-likelihood for test data performance
    '''
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.hold(True)
    colors = [(1.0, 0.0, 0.0), (0.5, 0.5, 0.0), (0.0, 1.0, 0.0), (0.0, 0.5, 0.5), (0.0, 0.0, 1.0)]
    lines = []
    legend_txt = []
    for m_idx, m_itr in enumerate(m_itrs):
        line, _ = utils.draw_err_bar_with_filled_shape(ax, M, 
            np.mean(err_lst_full[m_idx], axis=1), np.std(err_lst_full[m_idx], axis=1), colors[m_idx%len(colors)])
        lines.append(line)
        legend_txt.append('Max Iterations: {0}'.format(m_itr))
    #prepare legend, axis text...
    plt.legend(lines, legend_txt, loc='best')
    ax.yaxis.grid()
    ax.xaxis.grid()
    ax.set_xlabel('Model size - M', fontsize=20)
    ax.set_ylabel('Log-likelihood', fontsize=20)
    ax.set_title('Log-likelihood of Test Data versus Model Size', fontsize=20)
    plt.draw()
    return ax
Ejemplo n.º 5
0
    def DiscretizeSystem(self, sys, costfunc, xbins, ubins, options=dict()):
        #check system
        is_ct = sys.is_ct_

        #check dimension
        if not isinstance(xbins, list):
            #convert to multi-dimension case
            self.xbins_ = [xbins]
        else:
            self.xbins_ = xbins

        if not isinstance(ubins, list):
            self.ubins_ = [ubins]
        else:
            self.ubins_ = ubins

        self.state_dim_ = len(self.xbins_)
        self.ctrl_dim = len(self.ubins_)

        if 'dt' not in options:
            self.dt_ = 1.0
        else:
            self.dt_ = options['dt']

        if 'wrap_flag' not in options:
            self.wrap_flag_ = False * np.ones(self.state_dim_)
        else:
            self.wrap_flag_ = options['wrap_flag']

        wrap_idx = np.where(self.wrap_flag_ == True)[0]

        xmin = np.array([bin[0] for bin in self.xbins_])
        xmax = np.array([bin[-1] for bin in self.xbins_])

        #construct grids
        #state
        Sgrid = np.meshgrid(*self.xbins_)

        #for each dim, need to reshape to a long 1-d array
        self.S_ = np.array([np.reshape(dim, (1, -1))[0] for dim in Sgrid])

        #action
        Agrid = np.meshgrid(*self.ubins_)
        self.A_ = np.array([np.reshape(dim, (1, -1))[0] for dim in Agrid])

        self.num_state_ = self.S_.shape[1]
        self.num_action_ = self.A_.shape[1]

        #prepare the transition matrix
        # self.T_ = csr_matrix([np.zeros([self.num_state_, self.num_state_]) for dim_ix in range(self.num_action_)])
        # self.T_ = [csr_matrix(np.zeros([self.num_state_, self.num_state_])) for dim_ix in range(self.num_action_)]
        self.T_ = [
            np.zeros([self.num_state_, self.num_state_])
            for dim_ix in range(self.num_action_)
        ]

        #prepare cost function
        self.C_ = np.zeros([self.num_state_, self.num_action_])

        #inline function to search index in reshaped state
        #offset for sub2ind
        sub2ind = self.MakeSub2Ind(self.xbins_)
        xdigitize, xdigitize_dim = self.MakeXDigitize(self.xbins_)

        print 'Constructing transition matrix...'
        #vectorize this to increase the efficiency if possible...
        for action_idx in range(self.num_action_):
            for state_idx in range(self.num_state_):
                if is_ct:
                    # the system must be an update equation
                    x_new = sys.Dynamics(self.S_[:, state_idx],
                                         self.A_[:, action_idx])
                    self.C_[state_idx, action_idx] = sys.dt_ * costfunc(
                        self.S_[:, state_idx], self.A_[:, action_idx], sys)

                    if isinstance(x_new, list):
                        #contains both expected state and diagonal Gaussian noise...
                        x_new_mu = x_new[0]
                        x_new_sig = x_new[1]

                        if len(x_new_mu) != len(x_new_sig):
                            print 'Inconsistent length of state and noise vector...'
                            return
                        #wrap x_new if needed, this is useful for state variable like angular position
                        x_new_mu[wrap_idx] = np.mod(
                            x_new_mu[wrap_idx] - xmin[wrap_idx],
                            xmax[wrap_idx] - xmin[wrap_idx]) + xmin[wrap_idx]
                        x_new_mu_idx = xdigitize(x_new_mu)
                        x_new_mu_digitized_state = self.S_[:,
                                                           sub2ind(x_new_mu_idx
                                                                   )]

                        coeff_lst = []
                        involved_states = []
                        for dim_idx in range(len(x_new_mu)):
                            tmp_x_new_mu_idx = [idx for idx in x_new_mu_idx]
                            #for each dim, try to crawl the grid
                            #find lower bound, use the interval [-2*sigma, 2*sigma]
                            #how to wrap here? or just truncate the shape of gaussian?...
                            x_new_mu_tmp_min = np.array(x_new_mu)
                            x_new_mu_tmp_max = np.array(x_new_mu)
                            x_new_mu_tmp_min[
                                dim_idx] += -2 * x_new_sig[dim_idx]
                            x_new_mu_tmp_max[dim_idx] += 2 * x_new_sig[dim_idx]
                            min_idx = xdigitize_dim(x_new_mu_tmp_min, dim_idx)
                            max_idx = xdigitize_dim(x_new_mu_tmp_max, dim_idx)

                            for step_idx in range(min_idx, max_idx + 1):
                                tmp_x_new_mu_idx[dim_idx] = step_idx
                                #get the index of involved state
                                involved_state_idx = sub2ind(tmp_x_new_mu_idx)
                                involved_states.append(involved_state_idx)
                                coeff_lst.append(
                                    np.exp(-np.linalg.norm((
                                        (self.S_[:, involved_state_idx] -
                                         x_new_mu_digitized_state) /
                                        x_new_sig))**2))

                        coeff_lst = coeff_lst / np.sum(coeff_lst)
                        #assign transition probability for each state
                        for coeff, involved_state_idx in zip(
                                coeff_lst.tolist(), involved_states):
                            self.T_[action_idx][state_idx,
                                                involved_state_idx] += coeff
                    else:
                        #only updated state is available, need to map it to the grid
                        #add Baryinterpolation?
                        #wrap x_new if needed, this is useful for state variable like angular position
                        x_new[wrap_idx] = np.mod(
                            x_new[wrap_idx] - xmin[wrap_idx],
                            xmax[wrap_idx] - xmin[wrap_idx]) + xmin[wrap_idx]

                        #barycentricinterpolation...
                        indices, coeffs = utils.BarycentricInterpolation(
                            self.xbins_, np.array([x_new]))

                        for i in range(len(indices[0])):
                            self.T_[action_idx][state_idx,
                                                indices[0, i]] = coeffs[0, i]
                else:
                    #discrete state dynamical system...
                    #for discrete state dynamics, take the direct returned states and associated probability

                    x_new_lst = sys.Dynamics(self.S_[:, state_idx],
                                             self.A_[:, action_idx])
                    self.C_[state_idx,
                            action_idx] = costfunc(self.S_[:, state_idx],
                                                   self.A_[:, action_idx], sys)

                    for x_new in x_new_lst:
                        #get index of x_new
                        x_new_idx = xdigitize(x_new[0])
                        state_new_idx = sub2ind(x_new_idx)
                        self.T_[action_idx][state_idx,
                                            state_new_idx] = x_new[1]
        return