Beispiel #1
0
n_actions = len(env.get_action_meanings())
print(n_actions)
action_noise = NormalActionNoise(mean=np.zeros(n_actions),
                                 sigma=0.1 * np.ones(n_actions))

model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1)
# model = DDPG('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=10000, log_interval=10)
#model.save("ddpg_pendulum")
#env = model.get_env()

#del model # remove to demonstrate saving and loading

#model = DDPG.load("ddpg_pendulum")

score = 0

obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()

    score = score + 1
    print(dones)

    if dones:
        # obs = env.reset()
        print('finished', score)
        break
Beispiel #2
0
    #
    # model.load('DDPG_test_2_SOC_point5_two_states')
    mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
    
    print("Mean Reward = ", mean_reward)
    
    epsi_sp_list = []
    action_list = []
    soc_list = []
    Concentration_list = []
    Concentration_list1 = []
    
    obs = env.reset()
    for _ in range(3600):
    
        action, _states = model.predict(obs, deterministic=True)
        obs, rewards, done, info = env.step(action)
    
        epsi_sp_list.append(env.epsi_sp.item(0))
        # Concentration_list.append(env.state_output['yp'].item())
        # Concentration_list.append(env.state_output['yn'].item())
        soc_list.append(env.state_of_charge.item())
    
        action_list.append(action)
    
        if done:
            break
            # obs = env.reset()
 
    plt.figure()
    plt.plot(soc_list)