def step(self, action): self.canvas = (decode(action, self.canvas.float() / 255) * 255).byte() self.stepnum += 1 ob = self.observation() done = (self.stepnum == self.max_step) reward = self.cal_reward() # np.array([0.] * self.batch_size) return ob.detach(), reward, np.array([done] * self.batch_size), None
def step(self, action, episode_num): if self.use_multiple_renderers: self.canvas = (decode_multiple_renderers( action, self.canvas.float() / 255, episode_num) * 255).byte() else: self.canvas = (decode(action, self.canvas.float() / 255) * 255).byte() self.stepnum += 1 ob, mask = self.observation() done = (self.stepnum == self.max_step) reward = self.cal_reward() # np.array([0.] * self.batch_size) return ob.detach(), reward, np.array([done] * self.batch_size), None, mask
def step(self, action): #if torch.is_tensor(self.prev_action): # strokeの間を繋ぐ #new_action = [ # self.prev_action[:,4:6], # self.prev_action[:,4:6], # action[:,:2], # self.prev_action[:,7:8], # action[:,6:7], # action[:,8:13] #] #connect_action = torch.cat(new_action, dim=1) #self.canvas = (decode(connect_action, self.canvas.float() / 255) * 255).byte() self.canvas = (decode(action, self.canvas.float() / 255) * 255).byte() self.stepnum += 1 self.prev_action = action ob = self.observation() done = (self.stepnum == self.max_step) reward = self.cal_reward() # np.array([0.] * self.batch_size) return ob.detach(), reward, np.array([done] * self.batch_size), None