metavar='N', help='model updates per simulator step (default: 5)') parser.add_argument('--num-stack', type=int, default=1, help='number of frames to stack') parser.add_argument('--model-suffix', default="", help='To resume training or not') args = parser.parse_args() env = NormalizedActions(gym.make(args.env_name)) writer = SummaryWriter() env.seed(args.seed) if torch.cuda.is_available(): device = torch.device("cuda:0") torch.cuda.manual_seed(args.seed) else: device = torch.device("cpu") torch.manual_seed(args.seed) np.random.seed(args.seed) obs_shape = env.observation_space.shape obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:]) if len(env.observation_space.shape) == 3: image_input = True
json.dump(vars(args), outfile) cnn = args.pics for i_run in range(args.max_num_run): logger.important(f"START TRAINING RUN {i_run}") # Make the environment env = gym.make(args.env_name) env._max_episode_steps = args.max_num_step env = NormalizedActions(env) if cnn: env = ImageWrapper(args.img_size, env) # Set Seed for repeatability torch.manual_seed(args.seed + i_run) np.random.seed(args.seed + i_run) env.seed(args.seed + i_run) env.action_space.np_random.seed(args.seed + i_run) # Setup the agent agent = SAC(args.state_buffer_size, env.action_space, args) # Setup TensorboardX writer_train = SummaryWriter(log_dir='runs/' + folder + 'run_' + str(i_run) + '/train') writer_test = SummaryWriter(log_dir='runs/' + folder + 'run_' + str(i_run) + '/test') # Setup Replay Memory memory = ReplayMemory(args.replay_size) # TRAINING LOOP
help='max episode length (default: 1000)') parser.add_argument('--num_episodes', type=int, default=1000, metavar='N', help='number of episodes (default: 1000)') parser.add_argument('--hidden_size', type=int, default=128, metavar='N', help='number of episodes (default: 128)') parser.add_argument('--updates_per_step', type=int, default=5, metavar='N', help='model updates per simulator step (default: 5)') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N', help='size of replay buffer (default: 1000000)') args = parser.parse_args() env = NormalizedActions(gym.make(args.env_name)) writer = SummaryWriter() env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) if args.algo == "NAF": agent = NAF(args.gamma, args.tau, args.hidden_size, env.observation_space.shape[0], env.action_space) else: agent = DDPG(args.gamma, args.tau, args.hidden_size, env.observation_space.shape[0], env.action_space) memory = ReplayMemory(args.replay_size) ounoise = OUNoise(env.action_space.shape[0]) if args.ou_noise else None param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.05, desired_action_stddev=args.noise_scale, adaptation_coefficient=1.05) if args.param_noise else None
env = sys.argv[1] args = None if env == 'mc': args = args_mc elif env == 'pd': args = args_pd elif env == 'll': args = args_ll else: print('Environment not selected, Please choose from: mc, pd,ll') exit(-1) env = NormalizedActions(gym.make(args['env_name'])) env.seed(args['seed']) torch.manual_seed(args['seed']) np.random.seed(args['seed']) agent = NAF(args['gamma'], args['tau'], args['hidden_size'], env.observation_space.shape[0], env.action_space) agent.load_model(f'models/naf_{args["env_name"]}') replay_buffer = ReplayBuffer(args['replay_size']) ounoise = OUNoise(env.action_space.shape[0]) if args['ou_noise'] else None run() plot_results()