class Actor(object): def __init__(self, n_st, n_act): super(Actor, self).__init__() self.n_st = n_st self.n_act = n_act self.model = NN(n_st, n_act) self.optimizer = optimizers.Adam() self.optimizer.setup(self.model) self.noise = ou_process(np.zeros((n_act), dtype=np.float32)) def action(self, st, noise=False): a = self.model(st, norm=True) if noise: n = next(self.noise) a = np.clip(a.data + n, -1, 1) return a else: return a.data def update(self, st, dqda): mu = self.model(st, norm=True) self.model.cleargrads() mu.grad = -dqda mu.backward() self.optimizer.update() def update_target(self, tau, current_NN): self.model.weight_update(tau, current_NN) def save_model(self, outputfile): serializers.save_npz(outputfile, self.model) def load_model(self, inputfile): serializers.load_npz(inputfile, self.model)
class Critic(object): def __init__(self, n_st, n_act): super(Critic, self).__init__() self.n_st = n_st self.n_act = n_act self.model = NN(n_st + n_act, 1) self.optimizer = optimizers.Adam() self.optimizer.setup(self.model) self.log = [] def Q_value(self, st, act): state_action_vector = np.concatenate((st, act), axis=1) Q = self.model(state_action_vector).data return Q def return_dqda(self, st, act): state_action_vector = Variable(np.concatenate((st, act), axis=1)) self.model.cleargrads() Q = self.model(state_action_vector) Q.grad = np.ones((state_action_vector.shape[0], 1), dtype=np.float32) Q.backward() grad = state_action_vector.grad[:, self.n_st:] return grad def update(self, y, st, act): self.model.cleargrads() state_action_vector = np.concatenate((st, act), axis=1) Q = self.model(state_action_vector) loss = F.mean_squared_error(Q, Variable(y)) loss.backward() self.optimizer.update() self.log.append('Q:{0},y:{1}\n'.format(Q.data.T, y.T)) return loss.data def update_target(self, tau, current_NN): self.model.weight_update(tau, current_NN) def save_model(self, outputfile): serializers.save_npz(outputfile, self.model) def load_model(self, inputfile): serializers.load_npz(inputfile, self.model)
class Model(object): """Model for predicting next state based on current state and action. Args: state_n: dimension of state action_n: dimension of action Result: next state = f(state, action) = NN(state + action) """ def __init__(self, state_n, action_n): super(Model, self).__init__() self.model = NN(state_n + action_n, state_n) self.optimizer = optimizers.MomentumSGD(lr=1e-4) self.optimizer.setup(self.model) self.train_data = deque() self.train_data_size_max = 2000 def predict(self, state, action): state_action = np.concatenate((state, action), axis=0).astype(np.float32) state_action = Variable( state_action.reshape((1, state_action.shape[0]))) next_state = self.model(state_action) return next_state def store_data(self, state, action, next_state): state_action = np.concatenate((state, action), axis=0) self.train_data.append((state_action, next_state)) if len(self.train_data) > self.train_data_size_max: self.train_data.popleft() def shuffle_data(self): data = np.array(self.train_data) return np.random.permutation(data) def train(self, n_epoch, batch_size): print('Train start!') for epoch in range(n_epoch): # print(f'epoch: {epoch}') perm = self.shuffle_data() sum_loss = 0. # Train for i in range(0, len(perm), batch_size): batch_data = perm[i:i + batch_size] x_batch = np.array(list(batch_data[:, 0]), dtype=np.float32) t_batch = np.array(list(batch_data[:, 1]), dtype=np.float32) x_batch, t_batch = Variable(x_batch), Variable(t_batch) y = self.model(x_batch) loss = F.mean_squared_error(y, t_batch) self.model.cleargrads() loss.backward() self.optimizer.update() sum_loss += loss.data # print(f'train loss: {sum_loss}') self.save_model() @property def train_data_size(self): return len(self.train_data) def dump_data(self, file='train_data/train_data.txt'): with open(file, 'wb') as f: pickle.dump(self.train_data, f) def load_data(self, file='train_data/train_data.txt'): with open(file, 'rb') as f: self.train_data = pickle.load(f) @staticmethod def exist_data(file='train_data/train_data.txt'): return os.path.exists(file) def save_model(self, file='model/model.model'): serializers.save_npz(file, self.model) def load_model(self, file='model/model.model'): serializers.load_npz(file, self.model) @staticmethod def exist_model(file='model/model.model'): return os.path.exists(file)