Example #1
0
                    action='store_true',
                    help='Use simple MLP on CartPole')
parser.add_argument('--variable-ep-len',
                    action='store_true',
                    help="Change max episode length during training")
parser.add_argument('--silent',
                    action='store_true',
                    help='Silence print statements during training')
parser.add_argument('--test',
                    action='store_true',
                    help='Just render the env, no training')

if __name__ == '__main__':
    args = parser.parse_args()
    assert args.n % 2 == 0

    chkpt_dir = 'checkpoints/%s/' % args.env_name
    if not os.path.exists(chkpt_dir):
        os.makedirs(chkpt_dir)
    synced_model = ES(args.small_net)
    for param in synced_model.parameters():
        param.requires_grad = False
    if args.restore:
        state_dict = torch.load(args.restore)
        synced_model.load_state_dict(state_dict)

    if args.test:
        render_env(args, synced_model)
    else:
        train_loop(args, synced_model, chkpt_dir)
Example #2
0
                    help='Silence print statements during training')
parser.add_argument('--test',
                    action='store_true',
                    help='Just render the env, no training')

if __name__ == '__main__':
    args = parser.parse_args()
    assert args.n % 2 == 0
    if args.small_net and args.env_name not in [
            'CartPole-v0', 'CartPole-v1', 'MountainCar-v0'
    ]:
        args.env_name = 'CartPole-v1'
        print('Switching env to CartPole')

    env = create_atari_env(args.env_name)
    chkpt_dir = 'checkpoints/%s/' % args.env_name
    if not os.path.exists(chkpt_dir):
        os.makedirs(chkpt_dir)
    synced_model = ES(env.observation_space.shape[0], env.action_space,
                      args.small_net)
    for param in synced_model.parameters():
        param.requires_grad = False
    if args.restore:
        state_dict = torch.load(args.restore)
        synced_model.load_state_dict(state_dict)

    if args.test:
        render_env(args, synced_model, env)
    else:
        train_loop(args, synced_model, env, chkpt_dir)
Example #3
0
                    help='Silence print statements during training')
parser.add_argument('--test',
                    action='store_true',
                    help='Just render the env, no training')
parser.add_argument('--max-gradient-updates',
                    type=int,
                    default=100000,
                    metavar='MGU',
                    help='maximum number of updates')

if __name__ == '__main__':
    args = parser.parse_args()
    assert args.n % 2 == 0

    chkpt_dir = 'checkpoints/'
    if not os.path.exists(chkpt_dir):
        os.makedirs(chkpt_dir)

    env = TicTacToeEnv()
    synced_model = ES(env.observation_space, env.action_space)
    for param in synced_model.parameters():
        param.requires_grad = False
    if args.restore:
        state_dict = torch.load(args.restore)
        synced_model.load_state_dict(state_dict)

    if args.test:
        render_env(synced_model)
    else:
        train_loop(args, synced_model, chkpt_dir)