action='store_true', help='Use simple MLP on CartPole') parser.add_argument('--variable-ep-len', action='store_true', help="Change max episode length during training") parser.add_argument('--silent', action='store_true', help='Silence print statements during training') parser.add_argument('--test', action='store_true', help='Just render the env, no training') if __name__ == '__main__': args = parser.parse_args() assert args.n % 2 == 0 chkpt_dir = 'checkpoints/%s/' % args.env_name if not os.path.exists(chkpt_dir): os.makedirs(chkpt_dir) synced_model = ES(args.small_net) for param in synced_model.parameters(): param.requires_grad = False if args.restore: state_dict = torch.load(args.restore) synced_model.load_state_dict(state_dict) if args.test: render_env(args, synced_model) else: train_loop(args, synced_model, chkpt_dir)
help='Silence print statements during training') parser.add_argument('--test', action='store_true', help='Just render the env, no training') if __name__ == '__main__': args = parser.parse_args() assert args.n % 2 == 0 if args.small_net and args.env_name not in [ 'CartPole-v0', 'CartPole-v1', 'MountainCar-v0' ]: args.env_name = 'CartPole-v1' print('Switching env to CartPole') env = create_atari_env(args.env_name) chkpt_dir = 'checkpoints/%s/' % args.env_name if not os.path.exists(chkpt_dir): os.makedirs(chkpt_dir) synced_model = ES(env.observation_space.shape[0], env.action_space, args.small_net) for param in synced_model.parameters(): param.requires_grad = False if args.restore: state_dict = torch.load(args.restore) synced_model.load_state_dict(state_dict) if args.test: render_env(args, synced_model, env) else: train_loop(args, synced_model, env, chkpt_dir)
help='Silence print statements during training') parser.add_argument('--test', action='store_true', help='Just render the env, no training') parser.add_argument('--max-gradient-updates', type=int, default=100000, metavar='MGU', help='maximum number of updates') if __name__ == '__main__': args = parser.parse_args() assert args.n % 2 == 0 chkpt_dir = 'checkpoints/' if not os.path.exists(chkpt_dir): os.makedirs(chkpt_dir) env = TicTacToeEnv() synced_model = ES(env.observation_space, env.action_space) for param in synced_model.parameters(): param.requires_grad = False if args.restore: state_dict = torch.load(args.restore) synced_model.load_state_dict(state_dict) if args.test: render_env(synced_model) else: train_loop(args, synced_model, chkpt_dir)