def stepEnv(self, action=None): # If no manual action was specified by the user if action is None: action = random.randint(0, self.env.action_space.n - 1) action = int(action) obs, reward, done, info = self.env.step(action) self.current_actions.append(action) self.current_images.append(self.lastObs['image']) self.current_directions.append(self.lastObs['direction']) self.showEnv(obs) self.lastObs = obs if done: if reward > 0: # i.e. we did not lose if self.shift < len(self.demos): self.demos[self.shift] = self.current_demo, self.shift else: self.demos.append( (self.current_mission, blosc.pack_array(np.array(self.current_images)), self.current_directions, self.current_actions)) utils.save_demos(self.demos, self.demos_path) self.missionBox.append('Demonstrations are saved.') utils.synthesize_demos(self.demos) self.shift += 1 self.resetEnv() else: self.shiftEnv()
def stepEnv(self, action=None): # If no manual action was specified by the user if action is None: action = random.randint(0, self.env.action_space.n - 1) action = int(action) obs, reward, done, info = self.env.step(action) self.current_demo.append((self.lastObs, action, reward, done)) self.showEnv(obs) self.lastObs = obs if done: if reward > 0: # i.e. we did not lose if self.shift < len(self.demos): self.demos[self.shift] = self.current_demo else: self.demos.append(self.current_demo) utils.save_demos(self.demos, args.env, "human") self.missionBox.append('Demonstrations are saved.') utils.synthesize_demos(self.demos) self.shift += 1 self.resetEnv() else: self.shiftEnv()
def __init__(self, env): super().__init__() self.initUI() # By default, manual stepping only self.fpsLimit = 0 self.env = env self.lastObs = None # Demonstrations self.demos = utils.load_demos(args.env, "human") utils.synthesize_demos(self.demos) self.current_demo = [] self.shift = len(self.demos) if args.shift is None else args.shift self.shiftEnv() # Pointing and naming data self.pointingData = []
def __init__(self, env): super().__init__() self.initUI() # By default, manual stepping only self.fpsLimit = 0 self.env = env self.lastObs = None # Demonstrations self.demos_path = utils.get_demos_path(args.demos, args.env, origin="human", valid=False) self.demos = utils.load_demos(self.demos_path, raise_not_found=False) utils.synthesize_demos(self.demos) self.shift = len(self.demos) if args.shift is None else args.shift self.shiftEnv() # Pointing and naming data self.pointingData = []
utils.seed(args.seed) # Generate environment env = gym.make(args.env) env.seed(args.seed) # Define agent agent = utils.load_agent(args, env) # Load demonstrations demos = utils.load_demos(args.env, "agent") utils.synthesize_demos(demos) for i in range(1, args.episodes+1): # Run the expert for one episode done = False obs = env.reset() demo = [] while not(done): action = agent.get_action(obs) new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) demo.append((obs, action, reward, done)) obs = new_obs