def play_it(): #ENV_NAME = 'CartPole-v0' #ENV_NAME = 'MountainCar-v0' ENV_NAME = 'Single_virtual-v0' # Get the environment and extract the number of actions. env = make(ENV_NAME) env1 = make(ENV_NAME) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n model = build_model(nb_actions,env.observation_space) # model = build_model1(nb_actions, env.observation_space) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy,) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME+ datetime.now().strftime("%Y%m%d-%H%M%S"))), overwrite=True) # dqn.load_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME))) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env1, nb_episodes=5, visualize=True)
def main(): # parser = argparse.ArgumentParser(description='Run DQN on Atari SpaceInvaders') # parser.add_argument('--env', default='SpaceInvaders-v0', help='Atari env name') # parser.add_argument( # '-o', '--output', default='SpaceInvaders-v0', help='Directory to save data to') # parser.add_argument('--seed', default=0, type=int, help='Random seed') # # parser.add_argument('--input_shape', default=(84, 84, 4), type=tuple, help='Size of each frame') # # args = parser.parse_args() # # args.output = get_output_folder(args.output, args.env) #vehicle_network veh_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=g1) #Attacker network att_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=gym.make( args.env).action_space.n) veh_agent = DQNAgent(q_network=veh_network, preprocessor=core.Preprocessor(), memory=core.ReplayMemory(), policy=1, gamma=0.1, target_update_freq=100, num_burn_in=100, train_freq=20, batch_size=32) att_agent = DQNAgent(q_network=att_network, preprocessor=core.Preprocessor(), memory=core.ReplayMemory(), policy=1, gamma=0.1, target_update_freq=100, num_burn_in=100, train_freq=20, batch_size=32) veh_agent.compile('Adam', 'mse') att_agent.compile('Adam', 'mse') env = VehicleFollowingENV for i_episode in range(20): agent.fit(env, 10**6) # env.close() model_json = q_network.to_json() with open("model.json", "w") as json_file: json_file.write(model_json)
def main(): # vehicle_network veh_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=4) # Attacker network # att_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=4) veh_agent = DQNAgent(q_network=veh_network, q_network2=veh_network, preprocessor=core.Preprocessor(), RLmemory=core.ReplayMemory(), SLmemory=core.ReplayMemory(), policy=1, gamma=0.1, target_update_freq=100, num_burn_in=100, train_freq=20, batch_size=32) # att_agent = DQNAgent(q_network=att_network, # q_network2=att_network, # preprocessor=core.Preprocessor(), # memory=core.ReplayMemory(), # policy=1, # gamma=0.1, # target_update_freq=100, # num_burn_in=100, # train_freq=20, # batch_size=32) veh_agent.compile('Adam', 'mse') # att_agent.compile('Adam', 'mse') env = VehicleFollowingENV() for i_episode in range(20): veh_agent.fit(env=env, num_iterations=10 ** 6) # att_agent.fit(env, 10 ** 6) # env.close() model_json = veh_network.to_json() with open("model.json", "w") as json_file: json_file.write(model_json)
value_max=1.0, value_min=0.1, value_test=0.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=3, policy=policy, memory=memory, nb_steps_warmup=2000, gamma=0.95, target_model_update=2000, train_interval=1, delta_clip=1.0) dqn.compile(optimizer, metrics=['mae']) env = Tetris() start = time.time() weights_filename = 'dqn_{}_tetris_weights.h5'.format(start) checkpoint_weights_filename = 'dqn_{}_tetris_weights_.h5'.format(start) log_filename = 'dqn_tetris_log.json' callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] callbacks += [WandbLogger()] dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) dqn.save_weights(weights_filename, overwrite=True)
# If you want, you can experiment with the parameters or use a different policy. Another popular one # is Boltzmann-style exploration: # policy = BoltzmannQPolicy(tau=1.) # Feel free to give it a try! # creating both agents hider_dqn = DQNAgent(model=hider_model, nb_actions=nb_actions, policy=policy, memory=memory, nb_steps_warmup=100, gamma=.99, target_model_update=10, train_interval=2, delta_clip=1.) hider_dqn.compile(Adam(lr=.00025), metrics=['mae']) seeker_dqn = DQNAgent(model=seeker_model, nb_actions=nb_actions, policy=policy, memory=memory, nb_steps_warmup=100, gamma=.99, target_model_update=10, train_interval=2, delta_clip=1.) seeker_dqn.compile(Adam(lr=.00025), metrics=['mae']) # passing both agents to framework framework = MultiAgentFramework(dqagents=[hider_dqn, seeker_dqn]) if args.mode == 'train':
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05 # so that the agent still performs some random actions. This ensures that the agent cannot get stuck. policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) # The trade-off between exploration and exploitation is difficult and an on-going research topic. # If you want, you can experiment with the parameters or use a different policy. Another popular one # is Boltzmann-style exploration: # policy = BoltzmannQPolicy(tau=1.) # Feel free to give it a try! dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.), target_model_update=10000, train_interval=4) dqn.compile(Adam(lr=.00025), metrics=['mae']) if args.mode == 'train': # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that you can the built-in Keras callbacks! weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(args.env_name) callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes.
# build model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(432, activation='relu')) model.add(Dense(216, activation='relu')) model.add(Dense(144, activation='linear')) model.add(Dense(nb_actions)) print(model.summary()) memory = SequentialMemory(limit=500000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-2), metrics=['mae']) #dqn.load_weights('E:\\git\\TensorFlow\\dqn_Snake_weights_2_weights.h5f') dqn.fit(env, nb_steps=500000, visualize=visualize_training, verbose=2, callbacks=[tbCallBack]) # After training is done save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format("Snake"), overwrite=True) # evaluate our model for 5 episodes. dqn.test(env, nb_episodes=500, visualize=True)
nb_actions = env.action_space.n actions = np.arange(nb_actions) policy = EpsGreedyQPolicy(1.0, 0.999) memory = Memory(limit=50000, maxlen=1) obs = env.reset() agent = DQNAgent(actions=actions, memory=memory, update_interval=500, train_interval=1, batch_size=32, memory_interval=1, observation=obs, input_shape=[len(obs)], training=True, policy=policy) agent.compile() result = [] for episode in range(500): # 1000エピソード回す agent.reset() observation = env.reset() # 環境の初期化 # observation, _, _, _ = env.step(env.action_space.sample()) observation = deepcopy(observation) agent.observe(observation) for t in range(250): # n回試行する # env.render() # 表示 action = agent.act() observation, reward, done, info = env.step( action) # アクションを実行した結果の状態、報酬、ゲームをクリアしたかどうか、その他の情報を返す observation = deepcopy(observation) agent.observe(observation, reward, done)
model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)