default='default', type=str, help='Resuming model path for testing') # parser.add_argument('--l2norm', default=0.01, type=float, help='l2 weight decay') # TODO # parser.add_argument('--cuda', dest='cuda', action='store_true') # TODO args = parser.parse_args() args.output = get_output_folder(args.output, args.env) if args.resume == 'default': args.resume = 'output/{}-run0'.format(args.env) env = NormalizedEnv(gym.make(args.env)) if args.seed > 0: np.random.seed(args.seed) env.seed(args.seed) nb_states = env.observation_space.shape[0] nb_actions = env.action_space.shape[0] agent = DDPG(nb_states, nb_actions, args) evaluate = Evaluator(args.validate_episodes, args.validate_steps, args.output, max_episode_length=args.max_episode_length) if args.mode == 'train': train(args.train_iter, agent, env, evaluate,
parser.add_argument('--exploration_noise', default=0.1, type=float) parser.add_argument('--max_episode', default=10000, type=int) # num of games parser.add_argument('--num_episode', default=0, type=int) parser.add_argument('--print_log', default=5, type=int) parser.add_argument('--update_iteration', default=200, type=int) args = parser.parse_args() device = 'cuda:0' if torch.cuda.is_available() else 'cpu' print('env:', args.env_name) print('seed:', args.random_seed) script_name = os.path.basename(__file__) eps = np.finfo(np.float32).eps env = NormalizedEnv(gym.make(args.env_name)) if args.seed: env.seed(args.random_seed) torch.manual_seed(args.random_seed) np.random.seed(args.random_seed) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] max_action = float(env.action_space.high[0]) min_Val = torch.tensor(1e-7).float().to(device) # min value directory = './exp'+ script_name +'Seed'+str(args.random_seed)+ args.env_name +'./' def normal_R_V(R_, current_Q, reward): R_ = np.array(R_) R_ = (R_ - R_.mean()) / (R_.std() + eps.item()) value = (current_Q - reward).cpu().detach().numpy() value = ((value - value.mean()) / (value.std() + eps.item())).mean()
print('Writing to {}'.format(args.output)) writer = SummaryWriter(args.output) with open(os.path.join(args.output, 'cmdline.txt'), 'a') as f: f.write(' '.join(sys.argv) + '\n') bullet = ("Bullet" in args.env) if bullet: import pybullet import pybullet_envs env = NormalizedEnv(gym.make(args.env)) # input random seed if args.seed > 0: np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) env.seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # input status count & actions count print('observation_space', env.observation_space.shape, 'action_space', env.action_space.shape) nb_status = env.observation_space.shape[0] nb_actions = env.action_space.shape[0] agent = DDPG(nb_status, nb_actions, args, writer) train(args.train_iter, agent, env)