def init(point_dict): rl_brain = PolicyGradient( n_actions=env.action_space_num, n_features=env.obs_num, learning_rate=0.02, reward_decay=0.99, # output_graph=True, ) rl_brain.load_model( path='ReinforceLearning/saved_model/PG - 1/policy_gradient.ckpt') # 所有节点共用一个RL,PID控制器是独立的 for id in point_dict: if id != 0: pid_brain = PID(100, 0.1, 1, 100) e = env(point_dict[id]) a = Agent(rl_brain, pid_brain, e) Agent_dict[id] = a
The cart pole example View more on my tutorial page: https://morvanzhou.github.io/tutorials/ Using: Tensorflow: 1.0 gym: 0.8.0 """ import gym from ReinforceLearning.brain.Policy_Gradient import PolicyGradient import matplotlib.pyplot as plt from ReinforceLearning.environment.env import env from simulation.output_generator import output_generator RENDER = False # rendering wastes time MAX_REWARD = 0 Myenv = env(max_energy=500) output = output_generator(scale=20, bias=20, period=200, phase=0, max_iter=20000) # output.load("imagedata.npy") Myenv.output_need = output.output # print(env.action_space) # print(env.observation_space) # print(env.observation_space.high) # print(env.observation_space.low) RL = PolicyGradient( n_actions=Myenv.action_space_num, n_features=Myenv.obs_num,
Using: Tensorflow: 1.0 gym: 0.8.0 """ import numpy as np from ReinforceLearning.brain.Policy_Gradient import PolicyGradient import matplotlib.pyplot as plt from ReinforceLearning.environment.env import env from simulation.output_generator import output_generator from method.myfunc import smooth DISPLAY_REWARD_THRESHOLD = 400 # renders environment if total episode reward is greater then this threshold RENDER = False # rendering wastes time Myenv = env() output = output_generator(scale=20, bias=20, period=200, phase=0, max_iter=20000) output.load("imagedata.npy") Myenv.output_need = output.output # print(env.action_space) # print(env.observation_space) # print(env.observation_space.high) # print(env.observation_space.low) RL = PolicyGradient( n_actions=Myenv.action_space_num, n_features=Myenv.obs_num,
def set_env(self): self.env = env(self)