Exemple #1
0
"""
import torch
import ptan
from lib import data, utils, model
from tensorboardX import SummaryWriter

if __name__=='__main__':
    message = '*'*10 + '  A2C on Atari ' +'*'*10
    args = utils.argpars_dqn(message)
    params = data.params[args.env]
    utils.update_params(params, args)

    params.n_envs = max(params.n_envs, 8)

    device = 'cuda' if args.cuda else 'cpu'
    envs = utils.createEnvs(params, stack_frames=2)
    shape = envs[0].observation_space.shape
    actions = envs[0].action_space.n
    net = model.A2CNet(shape, actions)
    net.to(device)
    agent = ptan.agent.ActorCriticAgent(net, device=device, apply_softmax=True)

    exp_src = ptan.experience.ExperienceSourceFirstLast(envs, agent, params.gamma,steps_count=params.steps)
    generator = utils.BatchGenerator(exp_src, params)
    mean_monitor = utils.MeanRewardsMonitor(envs[0], net, 'A2C', params.solve_rewards)

    writer = SummaryWriter(logdir=mean_monitor.runs_dir,comment=params.frame_stack)

    optimizer = torch.optim.Adam(net.parameters(), lr=params.lr)
    
    # lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.75, patience=20000,
Exemple #2
0
        default='pong',
        choices=GAMES,
        help='name of the game: invaders(default), pong, breakout')
    parser.add_argument('--cuda',
                        action='store_true',
                        help='Train on GPU when available')
    args = parser.parse_args()

    device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'

    frames = mp.Value('i', 0)
    episodes = mp.Value('i', 0)

    params = data.params[args.env]

    env = utils.createEnvs(params)[0]
    shape = env.observation_space.shape
    actions = env.action_space.n

    net = model.DDQN(shape, actions).to(device)
    net.share_memory()
    print(net)
    tgt_net = ptan.agent.TargetNet(net)
    selector = ptan.actions.EpsilonGreedyActionSelector()
    agent = ptan.agent.DQNAgent(net, selector, device=device)
    buffer = ptan.experience.ExperienceReplayBuffer(None, params.buffer_size)

    optimizer = torch.optim.Adam(net.parameters(), lr=params.lr)
    exp_queue = mp.Queue(THREADS)
    proc_list = []
    for n in range(THREADS):
Exemple #3
0
    # mp.set_start_method('spawn')
    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['MKL_THREADING_LAYER'] = 'GNU'

    message = '*' * 10 + '  A3C on Atari ' + '*' * 10
    args = utils.argpars_dqn(message)
    params = data.params[args.env]
    utils.update_params(params, args)

    # For A2C/A3C to converge we need a high number of environments
    params.n_envs = max(params.n_envs, 8)

    device = 'cuda' if args.cuda else 'cpu'

    env = utils.createEnvs(
        params,
        stack_frames=params.frame_stack)[0]  # we can get rid of this one

    shape = env.observation_space.shape
    actions = env.action_space.n
    net = model.A2CNet(shape, actions)
    net.to(device)
    net.share_memory()
    optimizer = torch.optim.Adam(net.parameters(), lr=params.lr)

    mean_monitor = utils.MeanRewardsMonitor(env, net, ALGORITHM,
                                            params.solve_rewards)

    writer = SummaryWriter(logdir=mean_monitor.runs_dir,
                           comment=params.frame_stack)
Exemple #4
0
    parser.add_argument('-s', '--steps', default=4, type=int,
                        help='steps to skip while training')
    parser.add_argument('-n', '--envs', default=3, type=int,
                        help='Number of environments to run simultaneously')
    parser.add_argument('-g', '--game', default='invaders', choices=GAMES,
                        help='OpenAI gym environment name')
    parser.add_argument('--play', action='store_true',
                        help='Play a game when the environment is solved')

    args = parser.parse_args()

    device = 'cuda' if args.cuda else 'cpu'

    params = data.params[args.game]

    envs = utils.createEnvs(params)

    shape = envs[0].observation_space.shape
    actions = envs[0].action_space.n

    net = model.DDQN(shape, actions).to(device)
    tgt_net = ptan.agent.TargetNet(net)

    selector = ptan.actions.EpsilonGreedyActionSelector(
        epsilon=params.eps_start)
    eps_tracker = ptan.actions.EpsilonTracker(
        selector, params.eps_start, params.eps_final, params.eps_frames*args.envs)
    agent = ptan.agent.DQNAgent(net, selector, device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(
        envs, agent, params.gamma, steps_count=args.steps)
Exemple #5
0
    mp.set_start_method('spawn')
    os.environ['OMP_NUM_THREADS'] = '1'
    os.environ['MKL_THREADING_LAYER'] = 'GNU'

    message = '*' * 10 + '  A3C GRU on Atari ' + '*' * 10
    args = utils.argpars_dqn(message)
    params = data.params[args.env]
    utils.update_params(params, args)

    # For A2C/A3C to converge we need a lot of environments to draw observations from
    # This will ensure sample i.i.d (somehow!)
    params.n_envs = max(params.n_envs, 8)

    device = 'cuda' if args.cuda else 'cpu'

    env = utils.createEnvs(params, stack_frames=params.frame_stack)[0]

    shape = env.observation_space.shape
    actions = env.action_space.n
    net = model.A2Cgru(shape, actions)
    net.to(device)
    net.share_memory()
    optimizer = torch.optim.Adam(net.parameters(), lr=params.lr)

    mean_monitor = utils.MeanRewardsMonitor(env, net, ALGORITHM,
                                            params.solve_rewards)

    writer = SummaryWriter(logdir=mean_monitor.runs_dir,
                           comment=params.frame_stack)

    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
Exemple #6
0
                        action='store_true',
                        help='Activate GPU in training')
    parser.add_argument(
        '--stack',
        default=4,
        type=int,
        help=
        'stack N frames in each observation: this will change the input of the nn'
    )
    args = parser.parse_args()

    params = data.params[args.env]
    torch.manual_seed(params.seed)
    device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu'

    envs = utils.createEnvs(params, stack_frames=args.stack)

    shape = envs[0].observation_space.shape
    actions = envs[0].action_space.n

    net = model.DDQN(shape, actions).to(device)
    tgt_net = ptan.agent.TargetNet(net)

    selector = ptan.actions.EpsilonGreedyActionSelector()
    eps_tracker = ptan.actions.EpsilonTracker(selector, params.eps_start,
                                              params.eps_final,
                                              params.eps_frames)

    agent = ptan.agent.DQNAgent(net, selector, device=device)
    exp_src = ptan.experience.ExperienceSourceFirstLast(
        envs, agent, params.gamma, steps_count=params.steps)
Exemple #7
0
        type=int,
        help='Frames to skip when stacking. Must specifiy when selecting --lw')
    parser.add_argument('--model',
                        type=str,
                        help='Path to the trained state dict model')
    parser.add_argument(
        '--record',
        action='store_true',
        help='record a video of the game and store it in ~/Videos')
    args = parser.parse_args()

    params = data.params[args.env]
    params.max_steps = None
    env = utils.createEnvs(params,
                           stack_frames=args.stack,
                           episodic_life=False,
                           reward_clipping=False,
                           skip=args.skip)[0]
    # recording the game
    if args.record: env = gym.wrappers.Monitor(env, "Videos", force=True)

    shape, actions = env.observation_space.shape, env.action_space.n
    net = model.A2Cgru(shape, actions)
    print(net)
    if args.model: net.load_state_dict(load(args.model, map_location='cpu'))

    selector = ProbabilityActionSelector()
    agent = ActorCriticAgent(net, selector, apply_softmax=True)

    utils.play(env, agent, wait=args.wait, render=args.render)