def train(batch_size, max_episode_length=10): env = Paint(batch_size, max_episode_length) actor = ResNet( 9, 18, (action_dim + 3) * n_frames_per_step ) # target, canvas, stepnum, coordconv 3 + 3 + 1 + 2 loss_fn = nn.MSELoss() optimizer = optim.Adam(actor.parameters(), lr=1e-2) for step in range(50000): state, y_target = env.reset_with_gen() y_target = y_target.view(batch_size, -1) state = torch.cat( ( state[:, :6].float() / 255, state[:, 6:7].float() / max_episode_length, coord.expand(state.shape[0], 2, 128, 128), ), 1, ) actor.zero_grad() y = actor(state) loss = loss_fn(y, y_target) loss.backward() optimizer.step() if step % 100 == 0: print("step %d: loss %f" % (step, loss))
def __init__(self, max_episode_length=10, env_batch=64, writer=None): self.max_episode_length = max_episode_length self.env_batch = env_batch self.env = Paint(self.env_batch, self.max_episode_length) # self.env.load_data() self.observation_space = self.env.observation_space self.action_space = self.env.action_space self.writer = writer self.test = False self.log = 0
def __init__(self, opt, writer=None): self.max_episode_length = opt.max_step self.env_batch = opt.env_batch self.env = Paint(opt) if opt.dataset == 'celeba': self.env.load_data_celeba() elif opt.dataset == 'pascal': self.env.load_data_pascal() elif opt.dataset == 'sketchy': self.env.load_data_sketchy() elif opt.dataset == 'cats': self.env.load_data_cat() elif opt.dataset == 'all': self.env.load_data_all() self.observation_space = self.env.observation_space self.action_space = self.env.action_space self.writer = writer self.test = False self.log = 0 self.dataset = opt.dataset self.opt = opt