def __init__(self): self.gamma = 0.98 self.lr_pi = 0.0002 self.lr_v = 0.0005 self.action_size = 2 self.pi = PolicyNet() self.v = ValueNet() self.optimizer_pi = optimizers.Adam(self.lr_pi).setup(self.pi) self.optimizer_v = optimizers.Adam(self.lr_v).setup(self.v)
def __init__(self): self.gamma = 0.98 self.lr = 0.0002 self.action_size = 2 self.memory = [] self.pi = Policy(self.action_size) self.optimizer = optimizers.Adam(self.lr) self.optimizer.setup(self.pi)
def __init__(self): self.gamma = 0.98 self.lr = 0.0005 self.epsilon = 0.1 self.buffer_size = 10000 self.batch_size = 32 self.action_size = 2 self.replay_buffer = ReplayBuffer(self.buffer_size, self.batch_size) self.qnet = QNet(self.action_size) self.qnet_target = QNet(self.action_size) self.optimizer = optimizers.Adam(self.lr) self.optimizer.setup(self.qnet)
import dezero.functions as F from dezero.models import MLP max_epoch = 5 batch_size = 100 hidden_size = 1000 train_set = dezero.datasets.MNIST(train=True) test_set = dezero.datasets.MNIST(train=False) train_loader = DataLoader(train_set, batch_size) test_loader = DataLoader(test_set, batch_size, shuffle=False) # model = MLP((hidden_size, 10)) model = MLP((hidden_size, hidden_size, 10), activation=F.relu) # optimizer = optimizers.SGD().setup(model) optimizer = optimizers.Adam().setup(model) for epoch in range(max_epoch): sum_loss, sum_acc = 0, 0 for x, t in train_loader: y = model(x) loss = F.softmax_cross_entropy(y, t) acc = F.accuracy(y, t) model.cleargrads() loss.backward() optimizer.update() sum_loss += float(loss.data) * len(t) sum_acc += float(acc.data) * len(t) print('epoch: {}'.format(epoch+1))