Esempio n. 1
0
def normal_goal():
    env = Breakout(conf)
    env = BreakoutFullObservableStateWrapper(env)

    # observation_space = env.observation_space
    # action_space = env.action_space
    # feat_ext = BreakoutRobotFeatureExtractor()
    # feature_space = feat_ext.output_space
    # print(observation_space, action_space, feature_space)

    agent = RLAgent(
        BreakoutRobotFeatureExtractor(),
        RandomPolicy(env.action_space,
                     epsilon_start=1.0,
                     epsilon_end=0.01,
                     decaying_steps=1000000),
        Sarsa(None, env.action_space, alpha=None, gamma=0.99, nsteps=100))

    return env, agent
Esempio n. 2
0
def temporal_goal():
    env = Breakout(conf)
    env = BreakoutFullObservableStateWrapper(env)

    # observation_space = env.observation_space
    # action_space = env.action_space
    # robot_feat_ext = BreakoutRobotFeatureExtractor()
    # feature_space = robot_feat_ext.output_space
    # print(observation_space, action_space, feature_space)

    agent = TGAgent(
        BreakoutRobotFeatureExtractor(),
        RandomPolicy(env.action_space,
                     epsilon_start=1.0,
                     epsilon_end=0.01,
                     decaying_steps=7500000),
        Sarsa(None, env.action_space, alpha=None, gamma=0.99, nsteps=200),
        [BreakoutRowBottomUpTemporalEvaluator()])

    return env, agent
Esempio n. 3
0
            obs_space.spaces["color"],
            obs_space.spaces["RAState"],
        ))

        super().__init__(obs_space, robot_feature_space)

    def _extract(self, input, **kwargs):
        return (input["x"], input["y"], input["theta"], input["color"],
                input["RAState"])


if __name__ == '__main__':
    env = GymSapientino()
    '''Normal task - no temporal goal'''
    agent = RLAgent(
        SapientinoNRobotFeatureExtractor(env.observation_space),
        RandomPolicy(env.action_space, epsilon=0.1),
        QLearning(None, env.action_space, alpha=0.1, gamma=0.9, nsteps=100))

    t = Trainer(
        env,
        agent,
        n_episodes=100000,
        resume=False,
        eval=False,
        # resume = True,
        # eval = True,
        # renderer=PygameRenderer(delay=0.05)
    )
    t.main()
Esempio n. 4
0
            obs_space.spaces["task_state"],
        ))

        super().__init__(obs_space, robot_feature_space)

    def _extract(self, input, **kwargs):
        return (input["x"], input["y"], input["theta"], input["task_state"])


if __name__ == '__main__':
    env = GymMinecraft()
    '''Normal task - no temporal goal'''
    agent = RLAgent(
        MinecraftNRobotFeatureExtractor(env.observation_space),
        RandomPolicy(env.action_space,
                     epsilon=0.01,
                     epsilon_start=1.0,
                     decaying_steps=1),
        QLearning(None, env.action_space, alpha=None, gamma=0.9, nsteps=200))

    t = Trainer(
        env,
        agent,
        n_episodes=100000,
        # resume=False,
        # eval=False,
        resume=True,
        eval=True,
        renderer=PygameRenderer(delay=0.01))
    t.main()
Esempio n. 5
0
import gym

from rltg.agents.RLAgent import RLAgent
from rltg.agents.brains.TDBrain import Sarsa, QLearning
from rltg.agents.exploration_policies.RandomPolicy import RandomPolicy
from rltg.agents.feature_extraction import IdentityFeatureExtractor
from rltg.trainer import Trainer
from rltg.utils.GoalEnvWrapper import GoalEnvWrapper

if __name__ == '__main__':
    env = gym.make("Taxi-v2")
    env = GoalEnvWrapper(env, lambda _, reward, done, __: done and reward==20)
    observation_space = env.observation_space
    action_space = env.action_space
    print(observation_space, action_space)
    agent = RLAgent(
        IdentityFeatureExtractor(observation_space),
        RandomPolicy(action_space, epsilon=0.1, epsilon_start=1.0, decaying_steps=5000),
        QLearning(observation_space, action_space, alpha=0.8, nsteps=1, gamma=1.0)
    )

    tr = Trainer(env, agent, n_episodes=10000, resume=False, eval=False, window_size=1000)
    # tr = Trainer(env, agent, n_episodes=10000, resume=True,  eval=True, window_size=1000)
    tr.main()