Beispiel #1
0
def train():
    model = create_model()
    #将定义好的网络作为参数传入huskarl框架的API中,构成一个完成DQN 智能体,用于接下来的强化学习训练。
    agent = hk.agent.DQN(model, actions=dummy_env.action_space.n, nsteps=2)
    cpkt = tf.io.gfile.listdir("model_dir")
    if cpkt:
        agent.model.load_weights("model_dir/dqn.h5")
    # 使用huskarl框架的simulation来创建一个训练模拟器,在模拟器中进行训练。
    sim = hk.Simulation(create_env, agent)
    sim.train(max_steps=3000, visualize=True, plot=plot_rewards)
    agent.model.save_weights(filepath='model_dir/dqn.h5',
                             overwrite=True,
                             save_format='h5')
Beispiel #2
0
		Dense(16, activation='relu'),
		Dense(16, activation='relu'),
		Dense(action_size, activation='linear')
	])

	# Build a simple critic model
	action_input = Input(shape=(action_size,), name='action_input')
	state_input = Input(shape=state_shape, name='state_input')
	x = Concatenate()([action_input, state_input])
	x = Dense(32, activation='relu')(x)
	x = Dense(32, activation='relu')(x)
	x = Dense(32, activation='relu')(x)
	x = Dense(1, activation='linear')(x)
	critic = Model(inputs=[action_input, state_input], outputs=x)

	# Create Deep Deterministic Policy Gradient agent
	agent = hk.agent.DDPG(actor=actor, critic=critic, nsteps=2)

	def plot_rewards(episode_rewards, episode_steps, done=False):
		plt.clf()
		plt.xlabel('Step')
		plt.ylabel('Reward')
		for ed, steps in zip(episode_rewards, episode_steps):
			plt.plot(np.array(steps), np.array(ed))
		plt.show() if done else plt.pause(0.001) # Pause a bit so that the graph is updated

	# Create simulation and start training
	sim = hk.Simulation(create_env, agent)
	sim.train(max_steps=30_000, visualize=True, plot=plot_rewards)
	sim.test(max_steps=5_000)
Beispiel #3
0
def test():
    model = create_model()
    agent = hk.agent.DQN(model, actions=dummy_env.action_space.n, nsteps=2)
    agent.model.load_weights("model_dir/dqn.h5")
    sim = hk.Simulation(create_env, agent)
    sim.test(max_steps=1000)