def train(): model = create_model() #将定义好的网络作为参数传入huskarl框架的API中,构成一个完成DQN 智能体,用于接下来的强化学习训练。 agent = hk.agent.DQN(model, actions=dummy_env.action_space.n, nsteps=2) cpkt = tf.io.gfile.listdir("model_dir") if cpkt: agent.model.load_weights("model_dir/dqn.h5") # 使用huskarl框架的simulation来创建一个训练模拟器,在模拟器中进行训练。 sim = hk.Simulation(create_env, agent) sim.train(max_steps=3000, visualize=True, plot=plot_rewards) agent.model.save_weights(filepath='model_dir/dqn.h5', overwrite=True, save_format='h5')
Dense(16, activation='relu'), Dense(16, activation='relu'), Dense(action_size, activation='linear') ]) # Build a simple critic model action_input = Input(shape=(action_size,), name='action_input') state_input = Input(shape=state_shape, name='state_input') x = Concatenate()([action_input, state_input]) x = Dense(32, activation='relu')(x) x = Dense(32, activation='relu')(x) x = Dense(32, activation='relu')(x) x = Dense(1, activation='linear')(x) critic = Model(inputs=[action_input, state_input], outputs=x) # Create Deep Deterministic Policy Gradient agent agent = hk.agent.DDPG(actor=actor, critic=critic, nsteps=2) def plot_rewards(episode_rewards, episode_steps, done=False): plt.clf() plt.xlabel('Step') plt.ylabel('Reward') for ed, steps in zip(episode_rewards, episode_steps): plt.plot(np.array(steps), np.array(ed)) plt.show() if done else plt.pause(0.001) # Pause a bit so that the graph is updated # Create simulation and start training sim = hk.Simulation(create_env, agent) sim.train(max_steps=30_000, visualize=True, plot=plot_rewards) sim.test(max_steps=5_000)
def test(): model = create_model() agent = hk.agent.DQN(model, actions=dummy_env.action_space.n, nsteps=2) agent.model.load_weights("model_dir/dqn.h5") sim = hk.Simulation(create_env, agent) sim.test(max_steps=1000)