def main(): # Create env np.random.seed(SEED) env = PentagoEnv(SIZE, agent_starts = AGENT_STARTS) env.seed(SEED) nb_actions = env.action_space.n # Define model model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(64, activation='relu')) model.add(Dense(128, activation='sigmoid')) model.add(Dense(nb_actions)) print(model.summary()) # Configure and compile agent memory = SequentialMemory(limit=5000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1000, policy=policy) optimizer=RMSprop(lr=0.00025, epsilon=0.01) dqn.compile(optimizer) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=1) # After training is done, we save the final weights. dqn.save_weights('weights/dqn-{}-weights-{}.h5f'.format(TAG, datetime.datetime.now()))
def main(): np.random.seed(123) env = PentagoEnv(SIZE) env.seed(123) nb_actions = env.action_space.n model = Sequential() #model.add(Reshape((SIZE ** 2,), input_shape=(SIZE, SIZE))) model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(64, activation='relu')) model.add(Dense(128, activation='sigmoid')) model.add(Dense(nb_actions)) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=5000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy) optimizer=RMSprop(lr=0.00025, epsilon=0.01) dqn.compile(optimizer) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=1) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
def test_single_dqn_input(): model = Sequential() model.add(Flatten(input_shape=(2, 3))) model.add(Dense(2)) memory = SequentialMemory(limit=10, window_length=2) for double_dqn in (True, False): agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, enable_double_dqn=double_dqn) agent.compile('sgd') agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
class DQN(BaseAgent): def __init__(self, model, processor, policy, test_policy, num_actions): # Replay memory memory = SequentialMemory(limit=opt.dqn_replay_memory_size, window_length=opt.dqn_window_length) self.agent = DQNAgent(model=model, nb_actions=num_actions, policy=policy, test_policy=test_policy, memory=memory, processor=processor, batch_size=opt.dqn_batch_size, nb_steps_warmup=opt.dqn_nb_steps_warmup, gamma=opt.dqn_gamma, target_model_update=opt.dqn_target_model_update, enable_double_dqn=opt.enable_double_dqn, enable_dueling_network=opt.enable_dueling_network, train_interval=opt.dqn_train_interval, delta_clip=opt.dqn_delta_clip) self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae']) def fit(self, env, num_steps, weights_path=None, visualize=False): callbacks = [] if weights_path is not None: callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)] self.agent.fit(env=env, nb_steps=num_steps, action_repetition=opt.dqn_action_repetition, callbacks=callbacks, log_interval=opt.log_interval, test_interval=opt.test_interval, test_nb_episodes=opt.test_nb_episodes, test_action_repetition=opt.dqn_action_repetition, visualize=visualize, test_visualize=visualize, verbose=1) def test(self, env, num_episodes, visualize=False): self.agent.test(env=env, nb_episodes=num_episodes, action_repetition=opt.dqn_action_repetition, verbose=2, visualize=visualize) def save(self, out_dir): self.agent.save_weights(out_dir, overwrite=True) def load(self, out_dir): self.agent.load_weights(out_dir)
def test_multi_dqn_input(): input1 = Input(shape=(2, 3)) input2 = Input(shape=(2, 4)) x = Concatenate()([input1, input2]) x = Flatten()(x) x = Dense(2)(x) model = Model(inputs=[input1, input2], outputs=x) memory = SequentialMemory(limit=10, window_length=2) processor = MultiInputProcessor(nb_inputs=2) for double_dqn in (True, False): agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, processor=processor, enable_double_dqn=double_dqn) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False): ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0' env = gym.make(ENV_NAME) env.seed(123) nb_actions = env.action_space.n window_length = 1 # "experience" consists of where we were, where we are now # generate a policy model model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions) # configure and compile our agent # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values policy = BoltzmannQPolicy() # memory can help a model during training # for this, we only consider a single malware sample (window_length=1) for each "experience" memory = SequentialMemory(limit=32, ignore_episode_boundaries=False, window_length=window_length) # DQN agent as described in Mnih (2013) and Mnih (2015). # http://arxiv.org/pdf/1312.5602.pdf # http://arxiv.org/abs/1509.06461 agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16, enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy, batch_size=16) # keras-rl allows one to use and built-in keras optimizer agent.compile(RMSprop(lr=1e-3), metrics=['mae']) # play the game. learn something! agent.fit(env, nb_steps=rounds, visualize=False, verbose=2) history_train = env.history history_test = None if run_test: # Set up the testing environment TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0' test_env = gym.make(TEST_NAME) # evaluate the agent on a few episodes, drawing randomly from the test samples agent.test(test_env, nb_episodes=100, visualize=False) history_test = test_env.history return agent, model, history_train, history_test
env = gym.make("Breakout-v0") env.seed(1) env.reset() nb_actions = env.action_space.n model = create_model(nb_actions, 4) memory = SequentialMemory(limit=1000000, window_length=4) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000) dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=1000, gamma=.99, target_model_update=100, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) dqn.fit(env, nb_steps=1750000, log_interval=10000, visualize=False, verbose=2) model.save_weights('policy.h5', overwrite=True)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, gamma=.9, enable_dueling_network=False, dueling_type='avg', target_model_update=1e-2, policy=policy) # dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, # enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=.001, decay=.001), metrics=['mae']) rewards = [] callback = [TrainEpisodeLogger(), History()] hist = dqn.fit(env, nb_steps=10000, visualize=False, verbose=2, callbacks=None) rewards.extend(hist.history.get('episode_reward')) plt.plot(rewards) dqn.test(env, nb_episodes=5, visualize=True) state = env.reset() action = env.action_space.sample() print(action) state_list = [] for i in range(300): state_list.append(state) # action = np.argmax(dqn.model.predict(np.expand_dims(np.expand_dims(state, 0), 0))[0]) state, reward, done, _ = env.step(2) env.render() env.render(close=True)
checkpoint_weights_filename = 'dqn_' + Snake_env.name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(Snake_env.name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=1000) ] callbacks += [FileLogger(log_filename, interval=1000)] weights = "dqn_" + Snake_env.name + "_weights_" + str(step) + ".h5f" #if weights: # weights_filename_1 = weights #dqn.load_weights(weights_filename_1) #訓練開始 dqn.fit(Snake_env, callbacks=callbacks, nb_steps=step, log_interval=1000, verbose=1) #把權重存起來 dqn.save_weights(weights_filename, overwrite=True) elif mode == 'test': #讀取權重 weights = "dqn_" + Snake_env.name + "_weights_" + str(step) + ".h5f" if weights: weights_filename = weights dqn.load_weights(weights_filename) dqn.test(Snake_env, nb_episodes=10, visualize=True)
model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
model.add(Dense(512)) model.add(Activation('relu')) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=25310, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=False)
def fit_dqn(env, force: bool = False, dueling: bool = False, root_dir: str = ""): nb_actions = env.action_space.n loaded = False model_weights_path = pathlib.Path( f"{root_dir}/dqn{'-d' if dueling else ''}.h5") model_history_path = pathlib.Path( f"{root_dir}/dqn{'-d' if dueling else ''}.h5f.log") if not force and model_weights_path.exists(): model = load_model(str(model_weights_path)) with open(model_history_path, "rb") as f: history = pickle.load(f) loaded = True else: model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(64)) model.add(Dropout(0.5)) model.add(Activation("relu")) model.add(Dense(64)) model.add(Dropout(0.5)) model.add(Activation("relu")) model.add(Dense(64)) model.add(Dropout(0.5)) model.add(Activation("relu")) model.add(Dense(nb_actions)) model.add(Activation("linear")) memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent( model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy, enable_dueling_network=dueling, dueling_type="avg", ) dqn.compile(Adam(lr=1e-3), metrics=["mae"]) if loaded: return dqn, history metrics = Metrics(dqn) history = dqn.fit( env, nb_steps=10000, start_step_policy=env.start_step_policy, nb_max_start_steps=10, nb_max_episode_steps=100, callbacks=[metrics], ) model.save(str(model_weights_path)) with open(model_history_path, "wb") as f: history = history.history history.update(metrics.metrics) pickle.dump(history, f) return dqn, history
if args.mode == 'train': # Okay, now it's time to learn something! We capture the interrupt # exception so that training can be prematurely aborted. Notice # that you can the built-in Keras callbacks! weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(args.env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000, visualize=False) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, visualize=False) elif args.mode == 'test': weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True)
# Configure memory = SequentialMemory(limit=500000, window_length=1) policy = BoltzmannQPolicy() # policy = EpsGreedyQPolicy(0.5) #policy = RlPolicy(0.5, 0.2) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) # Compile dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Train dqn.fit(env, nb_steps=100000, visualize=False, verbose=2, nb_max_episode_steps=200) # Persist dqn.save_weights('dqn_{}_weights.h5f'.format("roborl"), overwrite=True) # Test while True: dqn.test(env, nb_episodes=5, visualize=False, nb_max_episode_steps=200) print("FINISHED!")
target_model_update=1, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) if args.mode == 'train': # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that you can the built-in Keras callbacks! weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(args.env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] # Lets just keep training the same damn model dqn.load_weights(weights_filename) dqn.fit(env, callbacks=callbacks, nb_steps=10000, log_interval=5000) dqn.save_weights(weights_filename, overwrite=True) env.reset() dqn.test(env, nb_episodes=1, visualize=True) # After training is done, we save the final weights one more time. # Finally, evaluate our algorithm for 10 episodes. elif args.mode == 'test': weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.fit(env, nb_steps=1000000, visualize=True)
delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) weights_filename = 'callbacks/dqn_flappy_weights.h5f' checkpoint_weights_filename = 'callbacks/dqn_flappy_weights_{step}.h5f' log_filename = 'dqn_flappy_log.json' callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] history = dqn.fit(env, callbacks=callbacks, nb_steps=1500000, log_interval=10000, verbose=2) #44,856.951 seconds plt.plot(history.history["episode_reward"]) plt.savefig('images/episode_reward_with_preprocessing.png', dpi=100) plt.show() plt.plot(history.history["nb_episode_steps"]) plt.savefig('images/nb_episode_steps_with_preprocessing.png', dpi=100) plt.show() env.reset() dqn.test(env, nb_episodes=10, visualize=True)
def main(): # OPTIONS ENV_NAME = 'OcNewActionSpaceEnv-v0' TIME_STEP = 100 set_gpu_option() # Get the environment and extract the number of actions. ''' PATH_TRAIN = "./data/train/" PATH_TEST = "./data/test/" ''' PATH_TRAIN = '/home/data/training_x_150.h5' PATH_TEST = '/home/data/test_x_150.h5' """ env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN) env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST) """ store = pd.HDFStore(PATH_TRAIN, mode='r') varieties_list = store.keys() variety = 'I' print('variety: ', variety) env = OcNewActionSpaceEnv(TIME_STEP, variety=variety, path=PATH_TRAIN) env_test = OcNewActionSpaceEnv(TIME_STEP, variety=variety, path=PATH_TEST) # random seed np.random.seed(123) env.seed(123) nb_actions = env.action_space.n print('nb_actions: ', nb_actions) print('env.shape: ', env.shape) model = create_model(shape=env.shape, nb_actions=nb_actions) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics! memory = SequentialMemory(limit=50000, window_length=TIME_STEP) # policy = BoltzmannQPolicy() policy = EpsGreedyQPolicy() # enable the dueling network # you can specify the dueling_type to one of {'avg','max','naive'} dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=200, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy, processor=NormalizerProcessor()) dqn.compile(Adam(lr=1e-3), metrics=['mae']) tbCallBack = TensorBoard(histogram_freq=0, write_grads=True, write_images=True) while True: # train ''' for e in range(500): print('epoch: {}'.format(e)) if os.path.isfile('weights'): print('weight file exist') print('load weights') dqn.load_weights('weights') else: print('weight file does not exist') ''' dqn.fit(env, nb_steps=70000, nb_max_episode_steps=None, visualize=False, verbose=2, callbacks=[tbCallBack]) #dqn.save_weights('weights', overwrite=True) #print('fit: ', fit) try: # validate info = dqn.test(env_test, nb_episodes=1, visualize=False) n_long, n_short, total_reward, portfolio = info['n_trades'][ 'long'], info['n_trades']['short'], info['total_reward'], int( info['portfolio']) np.array([info]).dump( './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format( ENV_NAME, portfolio, n_long, n_short, total_reward)) print('info saved') dqn.save_weights( './model/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format( ENV_NAME, portfolio, n_long, n_short, total_reward), overwrite=True) print('weight saved') except KeyboardInterrupt: continue
nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, enable_dueling_network=True, dueling_type='avg', target_model_update=target_model_update, policy=policy, batch_size=32) lr_06_05_20_49 = 1e-3 lr_06_05_22_18 = 1e-2 lr_06_13_19_07 = 5e-4 lr = lr_06_05_20_49 dqn.compile(Adam(lr=lr), metrics=['mae']) if if_learn: dqn.fit(env, nb_steps=nb_steps, visualize=True, verbose=2, nb_max_episode_steps=nb_max_episode_steps) dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) else: dqn.load_weights(weights_name + '.h5f') env.steps_before_rendering = 0 dqn.test(env, nb_episodes=30, visualize=True, nb_max_episode_steps=nb_max_episode_steps)
weights_filename = 'wts/phy_dqn_{}_weights.h5f'.format(args.env_name) checkpoint_weights_filename = 'wts/phy_dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = 'phy_dqn_{}_log.json'.format(args.env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] if os.path.isfile(checkpoint_weights_filename): print("Loading previous checkpoint weights...") dqn.load_weights(checkpoint_weights_filename) elif os.path.isfile(weights_filename): print("Loading previous weights...") dqn.load_weights(weights_filename) dqn.fit(env, callbacks=callbacks, nb_steps=20000000, log_interval=10000, nb_max_start_steps=20) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=1, nb_max_start_steps=20, visualize=False) elif args.mode == 'test': weights_filename = 'wts/phy_dqn_BreakoutDeterministic-v4_weights.h5f'.format( args.env_name) if args.weights: weights_filename = args.weights np.random.seed(None) env.seed(None)
memory = SequentialMemory(limit=50000, window_length=1) policy = CustomEpsGreedyQPolicy(automataEnv=env, eps=.9) dqn_only_embedding = DQNAgent(gamma=.999, model=model, nb_actions=action_size, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, test_policy=policy) dqn_only_embedding.compile(Adam(lr=1e-3), metrics=['mae']) dqn_only_embedding.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps=100, log_interval=10000, start_step_policy=policy) q_values = dqn_only_embedding.compute_batch_q_values([0]) for i in range(1, state_size): q_values = np.vstack( (q_values, dqn_only_embedding.compute_batch_q_values([i]))) #dqn_only_embedding.test(env, nb_episodes=5, visualize=False, verbose=1, nb_max_episode_steps=100, # start_step_policy=policy) #Caminho para o carro 0 até MI env.reset() env.step(21) env.step(4)
model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(env, nb_steps=50000) dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) dqn.test(env, nb_episodes=5)
from poloniex.gym_mikasa import MikasaLast4Env # create Mikasa gym env env = MikasaLast4Env() np.random.seed(123) env.seed(123) nb_actions = env.action_space.n # create model model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(32)) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) # configure agent policy = EpsGreedyQPolicy(eps=0.01) memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mse']) # run agent history = dqn.fit(env, nb_steps=50000, visualize=False, verbose=1) plt.plot(history.history['episode_reward']) plt.show()
nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) folder_path = '../model_saves/Vanilla/' if args.mode == 'train': weights_filename = folder_path + 'dqn_{}_weights.h5f'.format(args.env_name) checkpoint_weights_filename = folder_path + 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = folder_path + 'dqn_' + args.env_name + '_REWARD_DATA.txt' callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000) ] callbacks += [TrainEpisodeLogger(log_filename)] dqn.fit(env, callbacks=callbacks, nb_steps=10000000, verbose=0, nb_max_episode_steps=20000) elif args.mode == 'test': weights_filename = folder_path + 'dqn_MsPacmanDeterministic-v4_weights_10000000.h5f' if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=80)
class Agent(object): name = 'DQN' def __init__( self, step_size=1, window_size=20, max_position=5, fitting_file='ETH-USD_2018-12-31.xz', testing_file='ETH-USD_2018-01-01.xz', env='market-maker-v0', seed=1, action_repeats=4, number_of_training_steps=1e5, gamma=0.999, format_3d=False, # add 3rd dimension for CNNs train=True, weights=True, z_score=True, visualize=False, dueling_network=True, double_dqn=True): """ Agent constructor :param step_size: int, number of steps to take in env for a given simulation step :param window_size: int, number of lags to include in observation :param max_position: int, maximum number of positions able to be held in inventory :param fitting_file: str, file used for z-score fitting :param testing_file: str,file used for dqn experiment :param env: environment name :param seed: int, random seed number :param action_repeats: int, number of steps to take in environment between actions :param number_of_training_steps: int, number of steps to train agent for :param gamma: float, value between 0 and 1 used to discount future DQN returns :param format_3d: boolean, format observation as matrix or tensor :param train: boolean, train or test agent :param weights: boolean, import existing weights :param z_score: boolean, standardize observation space :param visualize: boolean, visiualize environment :param dueling_network: boolean, use dueling network architecture :param double_dqn: boolean, use double DQN for Q-value approximation """ self.env_name = env self.env = gym.make(self.env_name, fitting_file=fitting_file, testing_file=testing_file, step_size=step_size, max_position=max_position, window_size=window_size, seed=seed, action_repeats=action_repeats, training=train, z_score=z_score, format_3d=format_3d) # Number of frames to stack e.g., 1. # NOTE: 'Keras-RL' uses its own frame-stacker self.memory_frame_stack = 1 self.model = self.create_model() self.memory = SequentialMemory(limit=10000, window_length=self.memory_frame_stack) self.train = train self.number_of_training_steps = number_of_training_steps self.weights = weights self.cwd = os.path.dirname(os.path.realpath(__file__)) self.visualize = visualize # create the agent self.agent = DQNAgent(model=self.model, nb_actions=self.env.action_space.n, memory=self.memory, processor=None, nb_steps_warmup=500, enable_dueling_network=dueling_network, dueling_type='avg', enable_double_dqn=double_dqn, gamma=gamma, target_model_update=1000, delta_clip=1.0) self.agent.compile(Adam(lr=float("3e-4")), metrics=['mae']) def __str__(self): # msg = '\n' # return msg.join(['{}={}'.format(k, v) for k, v in self.__dict__.items()]) return 'Agent = {} | env = {} | number_of_training_steps = {}'.format( Agent.name, self.env_name, self.number_of_training_steps) def create_model(self): """ Create a Convolutional neural network with dense layer at the end :return: keras model """ features_shape = (self.memory_frame_stack, *self.env.observation_space.shape) model = Sequential() conv = Conv2D model.add( conv(input_shape=features_shape, filters=16, kernel_size=[10, 1], padding='same', activation='relu', strides=[5, 1], data_format='channels_first')) model.add( conv(filters=16, kernel_size=[6, 1], padding='same', activation='relu', strides=[3, 1], data_format='channels_first')) model.add( conv(filters=16, kernel_size=[4, 1], padding='same', activation='relu', strides=[2, 1], data_format='channels_first')) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('linear')) model.add(Dense(self.env.action_space.n)) model.add(Activation('softmax')) print(model.summary()) return model def start(self): """ Entry point for agent training and testing :return: (void) """ weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format( self.cwd, self.env_name) if self.weights: self.agent.load_weights(weights_filename) print('...loading weights for {}'.format(self.env_name)) if self.train: checkpoint_weights_filename = 'dqn_{}'.format(self.env_name) + \ '_weights_{step}.h5f' checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \ checkpoint_weights_filename log_filename = '{}/dqn_weights/dqn_{}_log.json'.format( self.cwd, self.env_name) print('FileLogger: {}'.format(log_filename)) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] print('Starting training...') self.agent.fit(self.env, callbacks=callbacks, nb_steps=self.number_of_training_steps, log_interval=10000, verbose=0, visualize=self.visualize) print('Saving AGENT weights...') self.agent.save_weights(weights_filename, overwrite=True) else: print('Starting TEST...') self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
env = gym.make(ENV_NAME) nb_actions = env.action_space.n model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(16, activation='relu')) model.add(Dense(nb_actions, activation='linear')) policy = BoltzmannQPolicy() memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(env, nb_steps=50000, verbose=2) dqn.test(env, nb_episodes=10, visualize=True)
action = layers.Dense(actions, activation="linear")(layer5) return K.Model(inputs=inputs, outputs=action) # This will automatically use a saved model! if path.exists("policy.h5"): print("Using saved model!") model = K.models.load_model('policy.h5') else: print("Using new model!") model = create_q_model(actions) # setting up the DQN agent and keras-rl stuff memory = SequentialMemory(limit=1000000, window_length=4) policy = LinearAnnealedPolicy( EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=850000) stateprocess = AtariProcessor() dqn = DQNAgent( model=model, nb_actions=actions, memory=memory, nb_steps_warmup=35, target_model_update=1e-2, policy=policy, processor=stateprocess, enable_double_dqn=True) dqn.compile( optimizer=Adam(lr=.00025, clipnorm=1.0), metrics=['mae', 'accuracy']) dqn.fit(env, nb_steps=1750000, callbacks=[ ModelIntervalCheck('policy.h5', 1000, 1, model)], visualize=True) # Saving the policy network model.save("policy.h5")
# Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. # callbacks = [] # if model_checkpoints: # callbacks += [ # ModelIntervalCheckpoint( # './checkpoints/checkpoint_weights.h5f', # interval=checkpoint_interval) # ] # if tensorboard: # callbacks += [TensorBoard(log_dir='./logs')] # dqn.fit(env, nb_steps=50000, visualize=args.visualize, verbose=2,callbacks=callbacks) dqn.fit(env, nb_steps=50000, visualize=args.visualize, verbose=2) # After training is done, we save the final weights. dqn.save_weights('double_dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=args.visualize) # TESTING BASED ON SAVED WEIGHTS if args.mode == 'test': weights_filename = 'double_dqn_{}_weights.h5f'.format(args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) if args.callbacks == True:
print(model.summary()) # コンパイル memory = SequentialMemory(limit=50000, window_length=1) policy = EpsGreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # 学習 dqn.fit(env, nb_steps=10000, visualize=True, verbose=1, log_interval=1) #plot plt.plot(env.x_plot, env.reward_plot, color='blue') plt.title("Learning curve") plt.xlabel("Episode") plt.ylabel("Reward") plt.show() # 学習したパラメータの保存 dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # テスト dqn.test(env, nb_episodes=5, visualize=True)
# enable the dueling network # you can specify the dueling_type to one of {'avg','max','naive'} dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, enable_dueling_network=False, dueling_type='avg', target_model_update=1e-4, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=1000000, visualize=False, verbose=1) # After training is done, we save the final weights. dqn.save_weights('weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=False) else: # SARSA # SARSA does not require a memory. policy = BoltzmannQPolicy() model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(state_size/2))
def attempt(lr, numTrainSteps, fnamePrefix, activation, exportVid, visualize): # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n print("env.observation_space.shape: " + str(env.observation_space.shape)) # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation(activation)) model.add(Dense(13)) model.add(Activation(activation)) model.add(Dense(10)) model.add(Activation(activation)) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=lr), metrics=['mae']) if not os.path.exists(fnamePrefix): os.makedirs(fnamePrefix) weights_fname = '%s/weights.h5f' % fnamePrefix if os.path.isfile(weights_fname): print("Loading weights from before") print("Skipping training") dqn.load_weights(weights_fname) else: # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=numTrainSteps, visualize=False, verbose=1) # After training is done, we save the final weights. dqn.save_weights(weights_fname, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. env.reset() env.close() if exportVid: if not visualize: # print to stderr, since trainAll redirects stdout eprint( "Error: I don't think the video export works unless you choose visualize=True" ) videoFname = fnamePrefix + '/videos/' + str(time()) if not os.path.exists(videoFname): os.makedirs(videoFname) env = wrappers.Monitor(env, videoFname, force=True) result = dqn.test(env, nb_episodes=1, visualize=visualize) if exportVid: print("Video saved to %s" % videoFname) means = {'reward': mean(result.history['episode_reward'])} json_fname = fnamePrefix + '/result.json' with open(json_fname, "w") as f: json.dump(result.history, f) return (means)
model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = EpsGreedyQPolicy(eps=0.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, nb_max_episode_steps=300) import rl.callbacks class EpisodeLogger(rl.callbacks.Callback): def __init__(self): self.observations = {} self.rewards = {} self.actions = {} def on_episode_begin(self, episode, logs): self.observations[episode] = [] self.rewards[episode] = [] self.actions[episode] = [] def on_step_end(self, step, logs): episode = logs['episode']
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=train_policy, test_policy=test_policy) filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) else: dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=train_policy, test_policy=test_policy) filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Optionally, we can reload a previous model's weights and continue training from there # FILENAME = 'weights/duel_dqn_variable_pendulum-v0_weights_4096_4_50000_2017-07-11_140316.h5f' # Load the model weights # dqn.load_weights(FILENAME) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=NUM_STEPS, visualize=False, verbose=2, nb_max_episode_steps=500) # After training is done, we save the final weights. dqn.save_weights(filename, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, nb_max_episode_steps=500, visualize=True)
dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=1000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) #Prioritized Memories typically use lower learning rates dqn.compile(Adam(lr=.00025/4), metrics=['mae']) folder_path = './' mode = 'train' if mode == 'train': weights_filename = folder_path + 'pdd_dqn_{}_weights.h5f'.format(env_name) checkpoint_weights_filename = folder_path + 'pdd_dqn_' + env_name + '_weights_{step}.h5f' log_filename = folder_path + 'pdd_dqn_' + env_name + '_REWARD_DATA.txt' callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000)] callbacks += [TrainEpisodeLogger()] dqn.fit(env, callbacks=callbacks, nb_steps=10000000, verbose=0, nb_max_episode_steps=20000) elif mode == 'test': weights_filename = folder_path + 'pdd_dqn_MsPacmanDeterministic-v4_weights_10000000.h5f' dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True, nb_max_start_steps=80)
else: raise('Please select DQN, DUEL_DQN, SARSA, or CEM for your method type.') callbacks = [] # callbacks += [ModelIntervalCheckpoint(CHECKPOINT_WEIGHTS_FILENAME, interval=10000)] callbacks += [FileLogger(LOG_FILENAME, interval=100)] # callbacks += [TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=False)] callbacks += [ExploreExploit()] # Optionally, we can reload a previous model's weights and continue training from there # LOAD_WEIGHTS_FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_50000_2017-07-12_160853.h5f' # # # Load the model weights # agent.load_weights(LOAD_WEIGHTS_FILENAME) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, action_repetition=5, visualize=False, verbose=1, log_interval=LOG_INTERVAL, nb_max_episode_steps=500) # After training is done, we save the final weights. agent.save_weights(WEIGHT_FILENAME, overwrite=True) # We'll also save a simply named version to make running test immediately # following training easier. filename = 'weights/{}_{}_weights.h5f'.format(METHOD, ENV_NAME) agent.save_weights(filename, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, action_repetition=5) #nb_max_episode_steps=500,
# memory = EpisodeParameterMemory(limit=20, window_length=window_length) # Non-episodic memory = SequentialMemory(limit=20, window_length=window_length) policy = EpsGreedyQPolicy(eps=0.1) agent = DQNAgent( model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=train_step, # A3C TRPO target_model_update=1e-2, policy=policy) agent.compile(Adam(lr=1e-2), metrics=['mae']) print("Fit model") st = time.time() agent.fit(env, train_step) ed = time.time() print("Training: %f [s]" % (ed - st)) # Reset environment # env.rewind() env = GraphEnv(graph, query, cond, base_step, test_step, time_limit, window_length) st = time.time() agent.test(env, nb_episodes=1) ed = time.time() print("Testing: %f [s]" % (ed - st))
from models import LordTateKanti env = SuperiorEnv( env=halite_env.Env(), tiles_num=16, ) env.configure(socket_path=f"/dev/shm/{time.time_ns()}", replay=False) nb_actions = env.action_space.n model = LordTateKanti.make_model(env) print(model.summary()) memory = SequentialMemory(limit=10_000, window_length=1) policy = BoltzmannGumbelQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy, gamma=0.99) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = [ #ModelIntervalCheckpoint('dqn_PlanetCaptureBot_weights_{step}.h5f', interval=100), TrainEpisodeLogger() ] dqn.fit(env, nb_steps=100_000, visualize=False, verbose=0, callbacks=callbacks) dqn.save_weights('dqn_SuperiorBot_weights_final.h5f', overwrite=True)
nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) if args.mode == 'train': # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that you can the built-in Keras callbacks! weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(args.env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, visualize=False) elif args.mode == 'test': weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True)
nb_actions=nb_actions, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, enable_double_dqn=False, batch_size=512) dqn.compile(Adam()) try: dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME)) except Exception as e: print(e) pass temp_folder = tempfile.mkdtemp() # env.monitor.start(temp_folder) dqn.fit(env, nb_steps=1e5, visualize=False, verbose=1, log_interval=10000) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=20, visualize=False) # env.monitor.close() # upload = input("Upload? (y/n)") # if upload == "y": # gym.upload(temp_folder, api_key='YOUR_OWN_KEY')
# is Boltzmann-style exploration: # policy = BoltzmannQPolicy(tau=1.) # Feel free to give it a try! dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.), reward_range=(-1., 1.), target_model_update=10000, train_interval=4) dqn.compile(Adam(lr=.00025), metrics=['mae']) if args.mode == 'train': # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that you can the built-in Keras callbacks! weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(args.env_name) callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, visualize=False) elif args.mode == 'test': weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True)
memory = SequentialMemory(limit=2000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10000, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Define 'test' for testing an existing network weights or 'train' to train a new one! mode = 'test' if mode == 'train': filename = '400kit_rn4_maior2_mem20k_20acleme_target1000_epsgr1' hist = dqn.fit(env, nb_steps=300000, visualize=False, verbose=2) with open( 'C:/Users/JMPF/PycharmProjects/ShipAI/ShipAI/_experiments/history_dqn_test_' + filename + '.pickle', 'wb') as handle: pickle.dump(hist.history, handle, protocol=pickle.HIGHEST_PROTOCOL) # After training is done, we save the final weights. dqn.save_weights('h5f_files/dqn_{}_weights.h5f'.format(filename), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True) if mode == 'test': env.set_test_performace() # Define the initialization as performance test env.set_save_experice() # Save the test to plot the results after filename = '400kit_rn4_maior2_mem20k_20acleme_target1000_epsgr1'
from keras.optimizers import Adam import gym from rl.agents.dqn import DQNAgent from rl.policy import EpsGreedyQPolicy from rl.memory import SequentialMemory env = gym.make('MountainCar-v0') nb_actions = env.action_space.n model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=30000, window_length=1) policy = EpsGreedyQPolicy(eps=0.001) dqn = DQNAgent(model=model, nb_actions=nb_actions,gamma=0.99, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) history = dqn.fit(env, nb_steps=30000, visualize=False, verbose=2) dqn.test(env, nb_episodes=1, visualize=True)
def training_game(): env = Environment( map_name="DefeatRoaches", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=32))) input_shape = (_SIZE, _SIZE, 1) nb_actions = 12 # Number of actions model = neural_network_model(input_shape, nb_actions) memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() # Policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.2, value_test=.0, nb_steps=1e2) # Agent dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor, delta_clip=1) dqn.compile(Adam(lr=.001), metrics=["mae", "acc"]) # Tensorboard callback callbacks = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=False) # Save the parameters and upload them when needed name = "agent" w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) if LOAD_MODEL: dqn.load_weights(w_file) class Saver(Callback): def on_episode_end(self, episode, logs={}): if episode % 200 == 0: self.model.save_weights(w_file, overwrite=True) s = Saver() logs = FileLogger('DQN_Agent_log.csv', interval=1) dqn.fit(env, callbacks=[callbacks, s, logs], nb_steps=600, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)