コード例 #1
0
ファイル: ddpg.py プロジェクト: sherry4186/DDPG
class Actor(object):
    def __init__(self, n_st, n_act):
        super(Actor, self).__init__()
        self.n_st = n_st
        self.n_act = n_act
        self.model = NN(n_st, n_act)
        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)
        self.noise = ou_process(np.zeros((n_act), dtype=np.float32))

    def action(self, st, noise=False):
        a = self.model(st, norm=True)

        if noise:
            n = next(self.noise)
            a = np.clip(a.data + n, -1, 1)
            return a
        else:
            return a.data

    def update(self, st, dqda):
        mu = self.model(st, norm=True)
        self.model.cleargrads()
        mu.grad = -dqda
        mu.backward()
        self.optimizer.update()

    def update_target(self, tau, current_NN):
        self.model.weight_update(tau, current_NN)

    def save_model(self, outputfile):
        serializers.save_npz(outputfile, self.model)

    def load_model(self, inputfile):
        serializers.load_npz(inputfile, self.model)
コード例 #2
0
ファイル: ddpg.py プロジェクト: sherry4186/DDPG
class Critic(object):
    def __init__(self, n_st, n_act):
        super(Critic, self).__init__()
        self.n_st = n_st
        self.n_act = n_act
        self.model = NN(n_st + n_act, 1)
        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)
        self.log = []

    def Q_value(self, st, act):
        state_action_vector = np.concatenate((st, act), axis=1)
        Q = self.model(state_action_vector).data
        return Q

    def return_dqda(self, st, act):
        state_action_vector = Variable(np.concatenate((st, act), axis=1))
        self.model.cleargrads()
        Q = self.model(state_action_vector)
        Q.grad = np.ones((state_action_vector.shape[0], 1), dtype=np.float32)
        Q.backward()
        grad = state_action_vector.grad[:, self.n_st:]
        return grad

    def update(self, y, st, act):
        self.model.cleargrads()

        state_action_vector = np.concatenate((st, act), axis=1)
        Q = self.model(state_action_vector)

        loss = F.mean_squared_error(Q, Variable(y))

        loss.backward()
        self.optimizer.update()

        self.log.append('Q:{0},y:{1}\n'.format(Q.data.T, y.T))

        return loss.data

    def update_target(self, tau, current_NN):
        self.model.weight_update(tau, current_NN)

    def save_model(self, outputfile):
        serializers.save_npz(outputfile, self.model)

    def load_model(self, inputfile):
        serializers.load_npz(inputfile, self.model)
コード例 #3
0
class Model(object):
    """Model for predicting next state based on current state and action.

    Args:
        state_n:   dimension of state
        action_n:  dimension of action

    Result:
        next state = f(state, action) = NN(state + action)

    """
    def __init__(self, state_n, action_n):
        super(Model, self).__init__()
        self.model = NN(state_n + action_n, state_n)
        self.optimizer = optimizers.MomentumSGD(lr=1e-4)
        self.optimizer.setup(self.model)
        self.train_data = deque()
        self.train_data_size_max = 2000

    def predict(self, state, action):
        state_action = np.concatenate((state, action),
                                      axis=0).astype(np.float32)
        state_action = Variable(
            state_action.reshape((1, state_action.shape[0])))
        next_state = self.model(state_action)
        return next_state

    def store_data(self, state, action, next_state):
        state_action = np.concatenate((state, action), axis=0)
        self.train_data.append((state_action, next_state))
        if len(self.train_data) > self.train_data_size_max:
            self.train_data.popleft()

    def shuffle_data(self):
        data = np.array(self.train_data)
        return np.random.permutation(data)

    def train(self, n_epoch, batch_size):
        print('Train start!')
        for epoch in range(n_epoch):
            # print(f'epoch: {epoch}')

            perm = self.shuffle_data()
            sum_loss = 0.

            # Train
            for i in range(0, len(perm), batch_size):
                batch_data = perm[i:i + batch_size]

                x_batch = np.array(list(batch_data[:, 0]), dtype=np.float32)
                t_batch = np.array(list(batch_data[:, 1]), dtype=np.float32)

                x_batch, t_batch = Variable(x_batch), Variable(t_batch)

                y = self.model(x_batch)
                loss = F.mean_squared_error(y, t_batch)

                self.model.cleargrads()
                loss.backward()
                self.optimizer.update()
                sum_loss += loss.data

            # print(f'train loss: {sum_loss}')

        self.save_model()

    @property
    def train_data_size(self):
        return len(self.train_data)

    def dump_data(self, file='train_data/train_data.txt'):
        with open(file, 'wb') as f:
            pickle.dump(self.train_data, f)

    def load_data(self, file='train_data/train_data.txt'):
        with open(file, 'rb') as f:
            self.train_data = pickle.load(f)

    @staticmethod
    def exist_data(file='train_data/train_data.txt'):
        return os.path.exists(file)

    def save_model(self, file='model/model.model'):
        serializers.save_npz(file, self.model)

    def load_model(self, file='model/model.model'):
        serializers.load_npz(file, self.model)

    @staticmethod
    def exist_model(file='model/model.model'):
        return os.path.exists(file)