Python DQNAgent.fitの例、DQN.dqn.DQNAgent.fit Pythonの例

コード例 #1

0

ファイルを表示

# serialize model to JSON
model_save = model.to_json()
with open("save/NNmodel1.json", "w") as json_file:
    json_file.write(model_save)

print("Saved model to disk!")

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=10000, window_length=1)
#policy1 = BoltzmannQPolicy()
policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999)
policy2 = BoltzmannQPolicy()

callback12 = FileLogger(filepath='save/history12_{}'.format(timenow),
                        interval=1)

dqn8 = DQNAgent(model=model,
                nb_actions=nb_actions,
                memory=memory,
                gamma=0.99,
                nb_steps_warmup=2000,
                target_model_update=1,
                policy=policy1)
dqn8.compile(Adam(lr=1e-3), metrics=['mae'])
history8 = dqn8.fit(env,
                    nb_epsteps=3000,
                    visualize=False,
                    callbacks=[callback12],
                    verbose=2)

コード例 #2

0

ファイルを表示

ファイル: dqn_cirturtlebot.py プロジェクト: porterpan/CSN-RL

    callback1 = FileLogger(filepath='save/nhistory1_{}'.format(timenow), interval=1)
    callback2 = FileLogger(filepath='save/nhistory2_{}'.format(timenow), interval=1)
    callback3 = FileLogger(filepath='save/nhistory3_{}'.format(timenow), interval=1)
    callback4 = FileLogger(filepath='save/nhistory4_{}'.format(timenow), interval=1)
    callback5 = FileLogger(filepath='save/nhistory5_{}'.format(timenow), interval=1)

    # dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
    #               target_model_update=1e-2, policy=policy1)
    # dqn1.compile(Adam(lr=1e-3), metrics=['mae'])
    # history1 = dqn1.fit(env, nb_epsteps=5000, visualize=False, callbacks=[callback1], verbose=2)

    dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=32, nb_steps_warmup=1000,
                    target_model_update=1e-2, policy=policy2)
    dqn2.compile(Adam(lr=0.01), metrics=['mse'])
    # dqn2.save_weights('save/dqn_blotzmann0.8_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
    history2 = dqn2.fit(env, nb_steps=200000, visualize=False, callbacks=[callback1], verbose=2)

    time.sleep(3600)

    # dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
    #               target_model_update=1e-2, policy=policy1, enable_double_dqn=False)
    # dqn3.compile(Adam(lr=1e-3), metrics=['mae'])
    # history3 = dqn3.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback3], verbose=2)

    # dqn4 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
    #               target_model_update=1e-2, policy=policy2, enable_double_dqn=False)
    # dqn4.compile(Adam(lr=1e-3), metrics=['mae'])
    # history4 = dqn4.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback4], verbose=2)

    # print(history1.history.keys())
    # print(len(history1.history['policy_config']))

コード例 #3

0

ファイルを表示


#dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, nb_steps_warmup=2000,
#               target_model_update=1e-2, policy=policy2)
#dqn1.compile(Adam(lr=1e-3), metrics=['mae'])
#dqn1.fit(env, nb_steps=300000, visualize=False, callbacks=[callback1], verbose=2)

#dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, gamma=0.5, nb_steps_warmup=2000,
#               target_model_update=1e-2, policy=policy2)
#dqn2.compile(Adam(lr=1e-3), metrics=['mae'])
#dqn2.fit(env, nb_steps=300000, visualize=False, callbacks=[callback2], verbose=2)

dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=64, nb_steps_warmup=2000,
               target_model_update=1e-2, policy=policy1)
dqn3.compile(Adam(lr=1e-3), metrics=['mae'])
dqn3.fit(env, nb_steps=300000, visualize=False, callbacks=[callback3], verbose=2)
'''
pyplot.figure()
pyplot.subplot(2, 1, 1)
pyplot.plot(history1.history['episode_reward'], 'r--',history3.history['episode_reward'], 'b--')

pyplot.subplot(2, 1, 2)
#pyplot.plot(history1.history['nb_steps'], history1.history['episode_reward'], 'r', history2.history['nb_steps'], history2.history['episode_reward'], 'g')
pyplot.plot(history2.history['episode_reward'], 'r', history4.history['episode_reward'], 'b')
pyplot.show()

#pyplot.savefig('save/BoltzmannQPolicy')
'''

コード例 #4

0

ファイルを表示

ファイル: dqn_planeball.py プロジェクト: porterpan/CSN-RL

dqn1 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
               target_model_update=1e-2, policy=policy1)
dqn1.compile(Adam(lr=1e-3), metrics=['mae'])
history1 = dqn1.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback1], verbose=2)

dqn2 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
               target_model_update=1e-2, policy=policy2)
dqn2.compile(Adam(lr=1e-3), metrics=['mae'])
history2 = dqn2.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback2], verbose=2)

dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000,
               target_model_update=1e-2, policy=policy1, enable_double_dqn=False)
dqn3.compile(Adam(lr=1e-3), metrics=['mae'])
history3 = dqn3.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback3], verbose=2)
'''
dqn3 = DQNAgent(model=model,
                nb_actions=nb_actions,
                memory=memory,
                batch_size=640,
                nb_steps_warmup=20000,
                target_model_update=1e-2,
                policy=policy1)
dqn3.compile(Adam(lr=1e-2), metrics=['mae'])
history3 = dqn3.fit(env,
                    nb_steps=500000,
                    visualize=False,
                    callbacks=[callback1],
                    verbose=2)

#dqn3.save_weights('save/dqn4_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

コード例 #5

0

ファイルを表示

ファイル: dqn_cartpole8.py プロジェクト: porterpan/CSN-RL

# serialize model to JSON
model_save = model.to_json()
with open("save/NNmodel1.json", "w") as json_file:
    json_file.write(model_save)

print("Saved model to disk!")

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=10000, window_length=1)
#policy1 = BoltzmannQPolicy()
policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999)
policy2 = BoltzmannQPolicy()

callback10 = FileLogger(filepath='save/history10_{}'.format(timenow),
                        interval=1)

dqn10 = DQNAgent(model=model,
                 nb_actions=nb_actions,
                 memory=memory,
                 gamma=0.99,
                 nb_steps_warmup=2000,
                 target_model_update=1e-2,
                 policy=policy2)
dqn10.compile(Adam(lr=1e-3), metrics=['mae'])
history10 = dqn10.fit(env,
                      nb_epsteps=3000,
                      visualize=False,
                      callbacks=[callback10],
                      verbose=2)

コード例 #6

0

ファイルを表示

#dqn3 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
#               target_model_update=1e-2, policy=policy1, enable_double_dqn=False)
#dqn3.compile(Adam(lr=1e-3), metrics=['mae'])
#history3 = dqn3.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback3], verbose=2)

dqn4 = DQNAgent(model=model,
                nb_actions=nb_actions,
                memory=memory,
                batch_size=64,
                nb_steps_warmup=2000,
                target_model_update=1e-2,
                policy=policy2)
dqn4.compile(Adam(lr=1e-3), metrics=['mae'])
history4 = dqn4.fit(env,
                    nb_epsteps=3000,
                    visualize=False,
                    callbacks=[callback4],
                    verbose=2)
'''
pyplot.figure()
pyplot.subplot(2, 1, 1)
pyplot.plot(history1.history['episode_reward'], 'r--',history3.history['episode_reward'], 'b--')

pyplot.subplot(2, 1, 2)
#pyplot.plot(history1.history['nb_steps'], history1.history['episode_reward'], 'r', history2.history['nb_steps'], history2.history['episode_reward'], 'g')
pyplot.plot(history2.history['episode_reward'], 'r', history4.history['episode_reward'], 'b')
pyplot.show()

#pyplot.savefig('save/BoltzmannQPolicy')
'''

コード例 #7

0

ファイルを表示

model.add(Activation('linear'))
print(model.summary())

# serialize model to JSON
model_save = model.to_json()
with open("save/NNmodel1.json", "w") as json_file:
    json_file.write(model_save)

print("Saved model to disk!")

memory = SequentialMemory(limit=10000, window_length=1)
#policy1 = BoltzmannQPolicy()
policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999)
policy2 = BoltzmannQPolicy()

callback6 = FileLogger(filepath='save/history6_{}'.format(timenow), interval=1)

dqn6 = DQNAgent(model=model,
                nb_actions=nb_actions,
                memory=memory,
                gamma=0.1,
                nb_steps_warmup=2000,
                target_model_update=1e-2,
                policy=policy2)
dqn6.compile(Adam(lr=1e-3), metrics=['mae'])
history6 = dqn6.fit(env,
                    nb_epsteps=3000,
                    visualize=False,
                    callbacks=[callback6],
                    verbose=2)

コード例 #8

0

ファイルを表示

#dqn4 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
#               target_model_update=1e-2, policy=policy2, enable_double_dqn=False)
#dqn4.compile(Adam(lr=1e-3), metrics=['mae'])
#history4 = dqn4.fit(env, nb_epsteps=100, visualize=False, callbacks=[callback4], verbose=2)

dqn5 = DQNAgent(model=model,
                nb_actions=nb_actions,
                memory=memory,
                batch_size=96,
                nb_steps_warmup=2000,
                target_model_update=1e-2,
                policy=policy2)
dqn5.compile(Adam(lr=1e-3), metrics=['mae'])
history5 = dqn5.fit(env,
                    nb_epsteps=3000,
                    visualize=False,
                    callbacks=[callback5],
                    verbose=2)
'''
pyplot.figure()
pyplot.subplot(2, 1, 1)
pyplot.plot(history1.history['episode_reward'], 'r--',history3.history['episode_reward'], 'b--')

pyplot.subplot(2, 1, 2)
#pyplot.plot(history1.history['nb_steps'], history1.history['episode_reward'], 'r', history2.history['nb_steps'], history2.history['episode_reward'], 'g')
pyplot.plot(history2.history['episode_reward'], 'r', history4.history['episode_reward'], 'b')
pyplot.show()

#pyplot.savefig('save/BoltzmannQPolicy')
'''

コード例 #9

0

ファイルを表示

model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# serialize model to JSON
model_save = model.to_json()
with open("save/NNmodel1.json", "w") as json_file:
    json_file.write(model_save)

print("Saved model to disk!")




# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=10000, window_length=1)
#policy1 = BoltzmannQPolicy()
policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999)
policy2 = BoltzmannQPolicy()

callback9 = FileLogger(filepath='save/history9_{}'.format(timenow), interval=1)


dqn9 = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=0.99, nb_steps_warmup=2000,
                target_model_update=1e-2, policy=policy1)
dqn9.compile(Adam(lr=1e-3), metrics=['mae'])
history9 = dqn9.fit(env, nb_epsteps=3000, visualize=False, callbacks=[callback9], verbose=2)

コード例 #10

0

ファイルを表示

model.add(Activation('linear'))
print(model.summary())

# serialize model to JSON
model_save = model.to_json()
with open("save/NNmodel1.json", "w") as json_file:
    json_file.write(model_save)

print("Saved model to disk!")

memory = SequentialMemory(limit=10000, window_length=1)
#policy1 = BoltzmannQPolicy()
policy1 = EpsDisGreedyQPolicy(eps=0.0001, eps_decay=0.999)
policy2 = BoltzmannQPolicy()

callback7 = FileLogger(filepath='save/history7_{}'.format(timenow), interval=1)

dqn7 = DQNAgent(model=model,
                nb_actions=nb_actions,
                memory=memory,
                gamma=0.5,
                nb_steps_warmup=2000,
                target_model_update=1e-2,
                policy=policy2)
dqn7.compile(Adam(lr=1e-3), metrics=['mae'])
history7 = dqn7.fit(env,
                    nb_epsteps=3000,
                    visualize=False,
                    callbacks=[callback7],
                    verbose=2)