예제 #1
0
    def stepEnv(self, action=None):
        # If no manual action was specified by the user
        if action is None:
            action = random.randint(0, self.env.action_space.n - 1)
        action = int(action)

        obs, reward, done, info = self.env.step(action)

        self.current_actions.append(action)
        self.current_images.append(self.lastObs['image'])
        self.current_directions.append(self.lastObs['direction'])

        self.showEnv(obs)
        self.lastObs = obs

        if done:
            if reward > 0:  # i.e. we did not lose
                if self.shift < len(self.demos):
                    self.demos[self.shift] = self.current_demo, self.shift
                else:
                    self.demos.append(
                        (self.current_mission,
                         blosc.pack_array(np.array(self.current_images)),
                         self.current_directions, self.current_actions))
                utils.save_demos(self.demos, self.demos_path)
                self.missionBox.append('Demonstrations are saved.')
                utils.synthesize_demos(self.demos)

                self.shift += 1
                self.resetEnv()
            else:
                self.shiftEnv()
예제 #2
0
    def stepEnv(self, action=None):
        # If no manual action was specified by the user
        if action is None:
            action = random.randint(0, self.env.action_space.n - 1)
        action = int(action)

        obs, reward, done, info = self.env.step(action)

        self.current_demo.append((self.lastObs, action, reward, done))

        self.showEnv(obs)
        self.lastObs = obs

        if done:
            if reward > 0:  # i.e. we did not lose
                if self.shift < len(self.demos):
                    self.demos[self.shift] = self.current_demo
                else:
                    self.demos.append(self.current_demo)
                utils.save_demos(self.demos, args.env, "human")
                self.missionBox.append('Demonstrations are saved.')
                utils.synthesize_demos(self.demos)

                self.shift += 1
                self.resetEnv()
            else:
                self.shiftEnv()
예제 #3
0
    def __init__(self, env):
        super().__init__()
        self.initUI()

        # By default, manual stepping only
        self.fpsLimit = 0

        self.env = env
        self.lastObs = None

        # Demonstrations
        self.demos = utils.load_demos(args.env, "human")
        utils.synthesize_demos(self.demos)
        self.current_demo = []

        self.shift = len(self.demos) if args.shift is None else args.shift

        self.shiftEnv()

        # Pointing and naming data
        self.pointingData = []
예제 #4
0
    def __init__(self, env):
        super().__init__()
        self.initUI()

        # By default, manual stepping only
        self.fpsLimit = 0

        self.env = env
        self.lastObs = None

        # Demonstrations
        self.demos_path = utils.get_demos_path(args.demos,
                                               args.env,
                                               origin="human",
                                               valid=False)
        self.demos = utils.load_demos(self.demos_path, raise_not_found=False)
        utils.synthesize_demos(self.demos)

        self.shift = len(self.demos) if args.shift is None else args.shift

        self.shiftEnv()

        # Pointing and naming data
        self.pointingData = []
utils.seed(args.seed)

# Generate environment

env = gym.make(args.env)
env.seed(args.seed)

# Define agent

agent = utils.load_agent(args, env)

# Load demonstrations

demos = utils.load_demos(args.env, "agent")
utils.synthesize_demos(demos)

for i in range(1, args.episodes+1):
    # Run the expert for one episode

    done = False
    obs = env.reset()
    demo = []

    while not(done):
        action = agent.get_action(obs)
        new_obs, reward, done, _ = env.step(action)
        agent.analyze_feedback(reward, done)

        demo.append((obs, action, reward, done))
        obs = new_obs