state, reward, done, _ = _env.step(action.cpu().numpy()[0])
            total_reward += reward

            state = agent.Tensor([state])
            if done:
                break
    return total_reward


test_episodes = 100
for env_name in [args.env]:#os.listdir(base_dir):
    
    env = NormalizedActions(gym.make(env_name))

    agent = DDPG(beta=0.9, epsilon=0, learning_rate=1e-4, gamma=0.99, tau=0.01, hidden_size_dim0=args.hidden_size, hidden_size_dim1=args.hidden_size, num_inputs=env.observation_space.shape[0], action_space=env.action_space, train_mode=False, alpha=0, replay_size=0, optimizer = 0, two_player=args.two_player, normalize_obs=True)
    noise = uniform.Uniform(agent.Tensor([-1.0]), agent.Tensor([1.0]))

    basic_bm = copy.deepcopy(env.env.env.model.body_mass.copy())

    env_dir = base_dir + env_name + '/'
    for optimizer in [args.optimizer]: #['RMSprop', 'SGLD_thermal_0.01', 'SGLD_thermal_0.001', 'SGLD_thermal_0.0001', 'SGLD_thermal_1e-05']:
        for noise_type in [args.action_noise]: 
            noise_dir = env_dir + optimizer + '/' + noise_type + '/nr_mdp_' + str(args.alpha) + '_1/'	
            if os.path.exists(noise_dir):
                for subdir in sorted(os.listdir(noise_dir)):
                    results = {}
                    
                    run_number = 0
                    dir = noise_dir + subdir #+ '/' + str(run_number)
                    print(dir)
                    if os.path.exists(noise_dir + subdir)\
Esempio n. 2
0
    def reset_noise(a, a_noise):
        if a_noise is not None:
            a_noise.reset()

    total_steps = 0
    print(base_dir)

    if args.num_steps is not None:
        assert args.num_epochs is None
        nb_epochs = int(args.num_steps) // (args.num_epochs_cycles *
                                            args.num_rollout_steps)
    else:
        nb_epochs = 500

    state = agent.Tensor([env.reset()])
    eval_state = agent.Tensor([eval_env.reset()])

    eval_reward = 0
    episode_reward = 0
    agent.train()

    reset_noise(agent, normalnoise)

    if args.visualize:
        vis = visdom.Visdom(env=base_dir)
    else:
        vis = None

    train_steps = 0
    ratio = args.ratio + 1