Example #1
0
def init(point_dict):
    rl_brain = PolicyGradient(
        n_actions=env.action_space_num,
        n_features=env.obs_num,
        learning_rate=0.02,
        reward_decay=0.99,
        # output_graph=True,
    )
    rl_brain.load_model(
        path='ReinforceLearning/saved_model/PG - 1/policy_gradient.ckpt')
    # 所有节点共用一个RL,PID控制器是独立的
    for id in point_dict:
        if id != 0:
            pid_brain = PID(100, 0.1, 1, 100)
            e = env(point_dict[id])
            a = Agent(rl_brain, pid_brain, e)
            Agent_dict[id] = a
Example #2
0
The cart pole example
View more on my tutorial page: https://morvanzhou.github.io/tutorials/
Using:
Tensorflow: 1.0
gym: 0.8.0
"""

import gym
from ReinforceLearning.brain.Policy_Gradient import PolicyGradient
import matplotlib.pyplot as plt
from ReinforceLearning.environment.env import env
from simulation.output_generator import output_generator

RENDER = False  # rendering wastes time
MAX_REWARD = 0
Myenv = env(max_energy=500)
output = output_generator(scale=20,
                          bias=20,
                          period=200,
                          phase=0,
                          max_iter=20000)
# output.load("imagedata.npy")
Myenv.output_need = output.output
# print(env.action_space)
# print(env.observation_space)
# print(env.observation_space.high)
# print(env.observation_space.low)

RL = PolicyGradient(
    n_actions=Myenv.action_space_num,
    n_features=Myenv.obs_num,
Example #3
0
Using:
Tensorflow: 1.0
gym: 0.8.0
"""

import numpy as np
from ReinforceLearning.brain.Policy_Gradient import PolicyGradient
import matplotlib.pyplot as plt
from ReinforceLearning.environment.env import env
from simulation.output_generator import output_generator
from method.myfunc import smooth

DISPLAY_REWARD_THRESHOLD = 400  # renders environment if total episode reward is greater then this threshold
RENDER = False  # rendering wastes time

Myenv = env()
output = output_generator(scale=20,
                          bias=20,
                          period=200,
                          phase=0,
                          max_iter=20000)
output.load("imagedata.npy")
Myenv.output_need = output.output
# print(env.action_space)
# print(env.observation_space)
# print(env.observation_space.high)
# print(env.observation_space.low)

RL = PolicyGradient(
    n_actions=Myenv.action_space_num,
    n_features=Myenv.obs_num,
Example #4
0
 def set_env(self):
     self.env = env(self)