Esempio n. 1
0
def main():
    user_input_module = user_input.UserInputModule(is_asyn=False)
    env = gym.make('Assault-v0')

    for i_episode in range(20):
        observation = env.reset()
        for t in range(100):
            env.render()
            print(observation)

            human_action = user_input_module.getInput()
            if human_action == 'j':
                action = 0
            elif human_action == 'f':
                action = 1
            elif human_action == 'd':
                action = 2
            elif human_action == 's':
                action = 3
            elif human_action == 'a':
                action = 4
            elif human_action == 'k':
                action = 5
            else:
                action = 0
            print(action)

            observation, reward, done, info = env.step(action)
            if done:
                print("Episode finished after {} time steps".format(t + 1))
                break
    env.close()
    def run_episodes(self):
        user_input_module = user_input.UserInputModule(is_asyn=True)
        env = gym.make('Assault-v0')

        human_actions = {}
        for i_episode in range(5):
            episode_log_dir = self.expr_log_dir + '/' + 'episode-' + str(
                i_episode)
            if not os.path.exists(episode_log_dir):
                os.makedirs(episode_log_dir)

            env.reset()
            for t in range(10000):
                env.render()

                env.env.ale.saveScreenPNG(
                    six.b(episode_log_dir + '/' + str(t) + '.png'))

                human_action = user_input_module.getInput()
                if human_action is None:
                    action = env.action_space.sample()
                else:
                    print(human_action)
                    human_actions[str(i_episode) + '_' + str(t)] = human_action
                    if human_action == 'j':
                        action = 0
                    elif human_action == 'f':
                        action = 1
                    elif human_action == 'd':
                        action = 2
                    elif human_action == 's':
                        action = 3
                    elif human_action == 'a':
                        action = 4
                    elif human_action == 'k':
                        action = 5
                    else:
                        action = 0

                observation, reward, done, info = env.step(action)

                if done:
                    print("Episode finished after {} time steps".format(t + 1))
                    break

        counter_saver.saveDictToFile(
            self.expr_log_dir + '/' + 'human_actions.txt', human_actions)
        env.close()
    def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0
                 , noise=0, epsilon=0.3, display_speed=0.5
                 , grid_size=150, text_only=False, n_episodes=100
                 , agent_window_size=1
                 , agent_max_n_experiences=1000
                 , check_policy_converge=False
                 , optimal_policy=None
                 , expr_log_dir=None
                 , agent_type="qLearningAgent"
                 , init_temp=1024.0
                 , temp_decrease_rate=2.0
                 , is_asyn_input=True):
        """
        :param agent_type: "qLearningAgent" or "TamerAgent" or "preferenceTAMERAgent"
        """
        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = text_only
        self.display_speed = display_speed
        self.n_episodes = n_episodes
        self.discount = discount
        self.check_policy_converge = check_policy_converge
        self.optimal_policy = optimal_policy
        self.expr_log_dir = expr_log_dir
        self.save_VDBE = False

        ###########################
        # GET THE INPUT MODULE
        ###########################
        if agent_type == qlearningAgents.QLearningAgent.getAgentType():
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(is_asyn=is_asyn_input)

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # Variables used to store parameters values
        ###########################

        # init VDBE values records
        global VDBE_RECORDS
        VDBE_RECORDS = dict()
        for state in self.env.getGridWorld().getNonTerminalStates():
            VDBE_RECORDS[state] = list()

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda m_state: self.mdp.getPossibleActions(m_state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function,
            'init_temp': init_temp,
            'temp_decrease_rate': temp_decrease_rate
        }

        if agent_type == qlearningAgents.QLearningAgent.getAgentType():
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        elif agent_type == qlearningAgents.TamerQAgent.getAgentType():
            self.agent = qlearningAgents.TamerQAgent(max_n_experiences=agent_max_n_experiences
                                                     , window_size=agent_window_size
                                                     , is_asyn_input=is_asyn_input
                                                     , **q_learn_opts)
        elif agent_type == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType():
            self.agent = preferenceTamerAgent.PreferenceTAMERAgent(max_n_experiences=agent_max_n_experiences
                                                     , window_size=agent_window_size
                                                     , is_asyn_input=is_asyn_input
                                                     , **q_learn_opts)
    def __init__(self,
                 grid_name='DiscountGrid',
                 discount=0.9,
                 learning_rate=0.5,
                 living_reward=0.0,
                 noise=0,
                 epsilon=0.3,
                 display_speed=0.5,
                 grid_size=150,
                 text_only=False,
                 n_episodes=100,
                 agent_window_size=1,
                 agent_max_n_experiences=1000,
                 check_value_converge=False,
                 check_policy_converge=False,
                 optimal_policy=None,
                 expr_log_dir=None,
                 delta=0.02,
                 is_use_q_agent=False,
                 init_temp=1024.0,
                 temp_decrease_rate=2.0,
                 is_asyn_input=True):

        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = text_only
        self.display_speed = display_speed
        self.n_episodes = n_episodes
        self.discount = discount
        self.check_value_converge = check_value_converge
        self.check_policy_converge = check_policy_converge
        self.optimal_policy = optimal_policy
        self.expr_log_dir = expr_log_dir
        self.delta = delta

        ###########################
        # GET THE INPUT MODULE
        ###########################

        if is_use_q_agent:
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(
                is_asyn=is_asyn_input)

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
                self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        import qlearningAgents
        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda state: self.mdp.getPossibleActions(state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function,
            'init_temp': init_temp,
            'temp_decrease_rate': temp_decrease_rate
        }

        if is_use_q_agent:
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        else:
            self.agent = qlearningAgents.TamerQAgent(
                max_n_experiences=agent_max_n_experiences,
                window_size=agent_window_size,
                is_asyn_input=is_asyn_input,
                **q_learn_opts)
Esempio n. 5
0
    def __init__(self, optimal_policy=None, expr_log_dir=None):

        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = ExperimentConfigurator.experimentConfig['text_only']
        self.display_speed = ExperimentConfigurator.experimentConfig[
            'display_speed']
        self.n_episodes = ExperimentConfigurator.gridWorldConfig['n_episodes']
        self.discount = ExperimentConfigurator.gridWorldConfig['discount']
        self.check_policy_converge = ExperimentConfigurator.experimentConfig[
            'check_policy_converge']
        self.optimal_policy = optimal_policy
        self.expr_log_dir = expr_log_dir
        self.save_VDBE = ExperimentConfigurator.experimentConfig['save_VDBE']

        ###########################
        # GET THE INPUT MODULE
        ###########################
        if ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.QLearningAgent.getAgentType():
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(
                is_asyn=ExperimentConfigurator.TamerConfig['is_asyn_input'])

        self.auto_feedback = AutoFeedback()

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(
            gridworld,
            "get" + ExperimentConfigurator.gridWorldConfig['grid_name'])
        self.mdp = mdp_function()
        self.mdp.setLivingReward(
            ExperimentConfigurator.gridWorldConfig['living_reward'])
        self.mdp.setNoise(ExperimentConfigurator.gridWorldConfig['noise'])
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # Variables used to store parameters values
        ###########################

        # init VDBE values records
        global VDBE_RECORDS
        VDBE_RECORDS = dict()
        for state in self.env.getGridWorld().getNonTerminalStates():
            VDBE_RECORDS[state] = list()

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not self.text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
                self.mdp, ExperimentConfigurator.gridWorldConfig['grid_size'],
                self.display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda m_state: self.mdp.getPossibleActions(m_state)
        q_learn_opts = {'actionFn': action_function}

        if ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.QLearningAgent.getAgentType():
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        elif ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.TamerQAgent.getAgentType():
            self.agent = qlearningAgents.TamerQAgent(
                max_n_experiences=ExperimentConfigurator.
                TamerConfig['agent_max_n_experiences'],
                window_size=ExperimentConfigurator.
                TamerConfig['agent_window_size'],
                is_asyn_input=ExperimentConfigurator.
                TamerConfig['is_asyn_input'],
                **q_learn_opts)
        elif ExperimentConfigurator.experimentConfig[
                'agent_type'] == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType(
                ):
            self.agent = preferenceTamerAgent.PreferenceTAMERAgent(
                max_n_experiences=ExperimentConfigurator.
                TamerConfig['agent_max_n_experiences'],
                window_size=ExperimentConfigurator.
                TamerConfig['agent_window_size'],
                is_asyn_input=ExperimentConfigurator.
                TamerConfig['is_asyn_input'],
                **q_learn_opts)
Esempio n. 6
0
    def __init__(self,
                 grid_name='DiscountGrid',
                 discount=0.9,
                 learning_rate=0.5,
                 living_reward=0.0,
                 noise=0.2,
                 epsilon=0.3,
                 display_speed=0.5,
                 grid_size=150,
                 text_only=False,
                 n_episodes=100,
                 agent_window_size=1,
                 agent_max_n_experiences=1000,
                 is_use_q_agent=False):
        self.text_only = text_only
        self.display_speed = display_speed
        self.n_episodes = n_episodes
        self.discount = discount

        ###########################
        # GET THE INPUT MODULE
        ###########################

        if is_use_q_agent:
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule()

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
                self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        import qlearningAgents
        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda state: self.mdp.getPossibleActions(state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function
        }

        if is_use_q_agent:
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        else:
            self.agent = qlearningAgents.TamerQAgent(
                max_n_experiences=agent_max_n_experiences,
                window_size=agent_window_size,
                **q_learn_opts)