def train_model(seed=1, setup=0): np.random.seed(seed) if setup == 0: env = CameraControlEnv(a_p=0, a_r=0, e_thres=0) elif setup == 1: env = CameraControlEnv(a_p=0, a_r=0, e_thres=0.8) else: env = CameraControlEnv(a_p=0.5, a_r=0.2, e_thres=0.8) env.seed(seed) model = define_model(actions=7) memory = SequentialMemory(limit=10000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.1, value_test=0.05, nb_steps=95000) dqn = DQNAgent(model=model, nb_actions=7, policy=policy, memory=memory, processor=None, nb_steps_warmup=500, gamma=0.95, delta_clip=1, target_model_update=0.001, batch_size=32) dqn.compile(RMSprop(lr=.0001), metrics=['mae']) log_filename = 'results/drone_camera_control_log_' + str(setup) + '.json' model_checkpoint_filename = 'results/drone_camera_cnn_weights_' + str(setup) + '_{step}.model' callbacks = [ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)] callbacks += [FileLogger(log_filename, interval=1)] dqn.fit(env, nb_steps=100000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1, callbacks=callbacks) # After training is done, save the final weights. model_filename = 'models/drone_camera_cnn_' + str(setup) + '.model' dqn.save_weights(model_filename, overwrite=True)
def train_1(): n = 2 env = Puzzle(n) model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(env.action_space.n, activation='linear')) memory = SequentialMemory(limit=10000, window_length=1) policy = BoltzmannQPolicy() agent = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, enable_dueling_network=True, policy=policy) agent.compile(Adam(lr=1e-3), metrics=['mse']) agent.fit(env, nb_steps=500000, nb_max_episode_steps=50, visualize=False, verbose=2, callbacks=[TensorBoard(log_dir='temp')]) agent.save_weights('model/puzzle_2x2.h5')
def train_1(): env = MazeEnv(maze_file='data/maze_5x5.npy') model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(env.action_space.n, activation='linear')) memory = SequentialMemory(limit=10000, window_length=1) policy = BoltzmannQPolicy() agent = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=100, enable_double_dqn=False, enable_dueling_network=False, policy=policy) agent.compile(Adam(lr=1e-3), metrics=['mse']) agent.fit(env, nb_steps=100000, nb_max_episode_steps=200, visualize=True, verbose=2, callbacks=[TensorBoard(log_dir='temp')]) agent.save_weights('model/maze_5x5_1.h5')
def main(): #logging.basicConfig(level=logging.DEBUG) ENV_NAME = "MineRLTreechop-v0" env = gym.make(ENV_NAME) # A MineRLTreechop-v0 env nb_actions = 9 # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(64, 64, 3))) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=2500, visualize=True, verbose=2) print(model.summary()) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
class ExpAgent: """ ExpAgent: Experiment RL Agent Args: weights: (optional) the path to the pretrained weights env: the environment that the agent interacts with """ def __init__(self, env, weights=None): # init D-QN model # based on the environment model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dense(env.action_space.n)) model.add(Activation('linear')) print(model.summary()) memory = SequentialMemory(limit=50000, window_length=1) self.dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=1000, target_model_update=1e-3, policy=EpsGreedyQPolicy()) self.dqn.compile(Adam(lr=1e-3), metrics=['mae']) if weights: self.dqn.load_weights(weights) def save_model(self, path): self.dqn.save_weights(filepath=path) print("{} saved.".format(path))
def main(): env = retro.make(game=ENV_NAME, state=STATE_NAME, use_restricted_actions=retro.Actions.DISCRETE) nb_actions = env.action_space.n model = Sequential() model.add(Conv2D(32, kernel_size=(8, 8), strides=4, activation="relu", input_shape=(1,) + (128, 100), data_format='channels_first')) model.add(Conv2D(64, kernel_size=(4, 4), strides=2, activation="relu")) model.add(Conv2D(64, (3, 3), activation="relu")) model.add(Flatten()) model.add(Dense(512, activation="relu")) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() print(env.observation_space) # Uncomment the following line to load the model weights from file if os.path.exists('./weights/dqn_cnn_{}_weights.h5f'.format(STATE_NAME)): model.load_weights('./weights/dqn_cnn_{}_weights.h5f'.format(STATE_NAME)) dqn = DQNAgent(processor=CNNProcessor(), model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10000, target_model_update=1e-3, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(env, nb_steps=1000000, visualize=True, verbose=2, callbacks=[InfoCallbackTrain()], action_repetition=4) dqn.save_weights('./weights/dqn_cnn_{}_weights.h5f'.format(STATE_NAME), overwrite=True) plot_wins() #plot_reward(training_history) # Uncomment the following line to overwrite the model weights file after training dqn.test(env, nb_episodes=5, visualize=True)
def train_dqn(env, args): from src.Agents import create_dqn_model, dqn_controls, EnvironmentWrapper from keras.optimizers import Adam from rl.agents.dqn import DQNAgent from rl.policy import EpsGreedyQPolicy from rl.memory import SequentialMemory env = EnvironmentWrapper(dqn_controls, env) model = create_dqn_model(env) memory = SequentialMemory(limit=50000, window_length=1) policy = EpsGreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=env.nb_actions, memory=memory, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) try: dqn.load_weights(args.ai_in) except OSError: pass dqn.fit(env, nb_steps=50000, visualize=False, verbose=2) dqn.save_weights(args.ai_out, overwrite=True) dqn.test(env, nb_episodes=1, visualize=False)
class SimpleDQN: def __init__(self, observation_shape, nb_actions, eps_steps): # First, we build a very simple NN model. model = Sequential() model.add(Flatten(input_shape=(1, ) + observation_shape)) model.add(Dense(16)) model.add(Activation("relu")) model.add(Dense(16)) model.add(Activation("relu")) model.add(Dense(16)) model.add(Activation("relu")) model.add(Dense(nb_actions)) model.add(Activation("linear")) print(model.summary()) # Next, we configure and compile our agent. You can use every # built-in Keras optimizer and even the metrics! memory = SequentialMemory(limit=50000, window_length=1) # policy = BoltzmannQPolicy() policy = LinearAnnealedPolicy( EpsGreedyQPolicy(), attr="eps", value_max=1.0, value_min=0.1, value_test=0.05, nb_steps=eps_steps, ) self.dqn = DQNAgent( model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy, ) self.dqn.compile(Adam(lr=1e-3), metrics=["mae"]) def train(self, env, steps, log_interval=5000): self.dqn.fit( env, callbacks=[FileLogger("dqn_log.json")], log_interval=log_interval, nb_steps=steps, enable_dueling_network=True, # Enable dueling dueling_type="avg", enable_double_dqn=True, # Enable double dqn verbose=1, visualize=False, ) # After training is done, we save the final weights. self.dqn.save_weights("dqn_weights.h5f", overwrite=True) def test(self, env, episodes): # Finally, evaluate our algorithm for 5 episodes. self.dqn.load_weights("dqn_weights.h5f") self.dqn.test(env, nb_episodes=episodes, visualize=False)
def main(): # Create env np.random.seed(SEED) env = PentagoEnv(SIZE, agent_starts = AGENT_STARTS) env.seed(SEED) nb_actions = env.action_space.n # Define model model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(64, activation='relu')) model.add(Dense(128, activation='sigmoid')) model.add(Dense(nb_actions)) print(model.summary()) # Configure and compile agent memory = SequentialMemory(limit=5000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1000, policy=policy) optimizer=RMSprop(lr=0.00025, epsilon=0.01) dqn.compile(optimizer) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=1) # After training is done, we save the final weights. dqn.save_weights('weights/dqn-{}-weights-{}.h5f'.format(TAG, datetime.datetime.now()))
def main(): np.random.seed(123) env = PentagoEnv(SIZE) env.seed(123) nb_actions = env.action_space.n model = Sequential() #model.add(Reshape((SIZE ** 2,), input_shape=(SIZE, SIZE))) model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(64, activation='relu')) model.add(Dense(128, activation='sigmoid')) model.add(Dense(nb_actions)) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=5000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy) optimizer=RMSprop(lr=0.00025, epsilon=0.01) dqn.compile(optimizer) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=1) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)
def main(): # OPTIONS ENV_NAME = 'OHLCV-v0' TIME_STEP = 30 # Get the environment and extract the number of actions. PATH_TRAIN = "./data/train/" PATH_TEST = "./data/test/" env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN) env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST) # random seed np.random.seed(123) env.seed(123) nb_actions = env.action_space.n model = create_model(shape=env.shape, nb_actions=nb_actions) #print(model.summary()) model.summary() # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics! memory = SequentialMemory(limit=50000, window_length=TIME_STEP) # policy = BoltzmannQPolicy() policy = EpsGreedyQPolicy() # enable the dueling network # you can specify the dueling_type to one of {'avg','max','naive'} dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=200, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy, processor=NormalizerProcessor()) dqn.compile(Adam(lr=1e-3), metrics=['mae']) while True: # train dqn.fit(env, nb_steps=5500, nb_max_episode_steps=10000, visualize=False, verbose=2) try: # validate info = dqn.test(env_test, nb_episodes=1, visualize=False) n_long, n_short, total_reward, portfolio = info['n_trades'][ 'long'], info['n_trades']['short'], info['total_reward'], int( info['portfolio']) np.array([info]).dump( './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format( ENV_NAME, portfolio, n_long, n_short, total_reward)) dqn.save_weights( './model/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format( ENV_NAME, portfolio, n_long, n_short, total_reward), overwrite=True) except KeyboardInterrupt: continue
def main(): # OPTIONS ENV_NAME = 'OHLCV-v0' TIME_STEP = 30 WINDOW_LENGTH = TIME_STEP ADDITIONAL_STATE = 4 # Get the environment and extract the number of actions. PATH_TRAIN = "./data/train/" PATH_TEST = "./data/test/" env = OhlcvEnv(TIME_STEP, path=PATH_TRAIN) env_test = OhlcvEnv(TIME_STEP, path=PATH_TEST) # random seed np.random.seed(123) env.seed(123) nb_actions = env.action_space.n model = Sequential() model.add(CuDNNLSTM(64, input_shape=env.shape, return_sequences=True)) model.add(CuDNNLSTM(64)) model.add(Dense(32)) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and even the metrics! memory = SequentialMemory(limit=50000, window_length=TIME_STEP) # policy = BoltzmannQPolicy() policy = EpsGreedyQPolicy() # enable the dueling network # you can specify the dueling_type to one of {'avg','max','naive'} dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=200, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy, processor=NormalizerProcessor()) dqn.compile(Adam(lr=1e-3), metrics=['mae']) ### now only test dqn.load_weights( "./model/duel_dqn_OHLCV-v0_weights_49112166LS_184_297_4.033341265853485.h5f" ) # validate info = dqn.test(env_test, nb_episodes=1, visualize=False) n_long, n_short, total_reward, portfolio = info['n_trades']['long'], info[ 'n_trades']['short'], info['total_reward'], int(info['portfolio']) np.array([info]).dump( './model/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format( ENV_NAME, portfolio, n_long, n_short, total_reward)) dqn.save_weights( './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format( ENV_NAME, portfolio, n_long, n_short, total_reward), overwrite=True)
def main(): fleet_size = 2000 surge = 2 perc_k = 1 bonus = 0 pro_s = 0 percent_false_demand = 0 config = { "fleet_size": 2000, "surge": 2, "perc_k": 1, "bonus": 0, "pro_s": 0, "percent_false_demand": 0 } # m = Model(ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=fleet_size, PRO_SHARE=pro_s, # SURGE_MULTIPLIER=surge, BONUS=bonus, percent_false_demand=percent_false_demand, percentage_know_fare = perc_k) # make one veh to be AV # veh = m.vehilcs[-1] # veh.is_AV = True # # env = RebalancingEnv(m, penalty=-10, config=config ) env = RebalancingEnv(penalty=-10, config=config) nb_actions = env.action_space.n input_shape = (1, ) + env.state.shape input_dim = env.input_dim model = Sequential() model.add(Flatten(input_shape=input_shape)) model.add(Dense(256, activation='relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=2000, window_length=1) policy = EpsGreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy, gamma=0.99) dqn.compile(Adam(lr=0.001, epsilon=0.05, decay=0.0), metrics=['mae']) dqn.load_weights('dqn_weights_%s.h5f' % (3000)) history = dqn.fit(env, nb_steps=10000, action_repetition=1, visualize=False, verbose=2) dqn.save_weights('dqn_weights_%s.h5f' % (10000), overwrite=True) history_dict = history.history json.dump(history_dict, open(output_path + "history_10000.json", 'w'))
def main(): weight_path = 'models/cartpole/keras_weights.h5' env = gym.make('CartPole-v0') env = gym.wrappers.Monitor(env, "./gym-results", force=True) input_shape = (1, ) + env.observation_space.shape output = env.action_space.n model = create_model(input_shape, output) model.summary() # https://qiita.com/goodclues/items/9b2b618ac5ba4c3be1c5 dqn = DQNAgent( model=model, # 出力 分類数 action数 nb_actions=output, # 割引率 https://github.com/keras-rl/keras-rl/blob/master/rl/agents/dqn.py#L307 gamma=0.99, # experience replay # メモリにaction、reward、observationsなどのデータを経験(Experience)として保管しておいて、 # 後でランダムにデータを再生(Replay)して学習を行う memory=SequentialMemory( # メモリの上限サイズ limit=5000, # 観測を何個連結して処理するか。例えば時系列の複数の観測をまとめて1つの状態とする場合に利用。 window_length=1, ), # ウォームアップステップ数。学習の初期は安定しないため、学習率を徐々に上げていく期間。 nb_steps_warmup=10, # bellman equation # 1未満の値の場合はSoft update # 1以上の値の場合はHard update = ステップごとに重みが完全に更新 target_model_update=1e-2, # 環境において行動を選択する基準 # GreedyQPolicy デフォルト 探索か活用か、学習が進むにつれて探索率を下げていく # BoltzmannQPolicy ボルツマン分布を利用したソフトマックス手法による方策 policy=BoltzmannQPolicy(), ) dqn.compile(Adam(lr=1e-3), metrics=['mae']) if os.path.exists(weight_path): dqn.load_weights(weight_path) try: dqn.fit( env, nb_steps=5000, # 3min visualize=False, log_interval=1000, ) except KeyboardInterrupt: pass finally: dqn.save_weights(weight_path, overwrite=True)
def playGame(train_indicator=0): #1 means Train, 0 means simply Run BUFFER_SIZE = 100000 BATCH_SIZE = 32 GAMMA = 0.99 TAU = 0.001 #Target Network HyperParameters LRA = 0.0001 #Learning rate for Actor LRC = 0.001 #Lerning rate for Critic action_dim = 4 #Steering/Acceleration/Brake state_dim = 29 #of sensors input np.random.seed(1337) vision = False EXPLORE = 100000. episode_count = 2000 max_steps = 100000 reward = 0 done = False step = 0 epsilon = 1 indicator = 0 # Generate a Torcs environment env = TorcsEnv(vision=vision, throttle=False,gear_change=False) nb_actions = 3 # left, nothing , right, break model = Sequential() model.add(Flatten(input_shape=(window_length,29))) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) memory = SequentialMemory(limit=1000000, window_length=window_length) policy = BoltzmannQPolicy(tau=1.) processor=MyProcessor() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy, processor=processor) dqn.compile(RMSprop(lr=1e-3), metrics=['mae']) dqn.load_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME)) dqn.fit(env, nb_steps=500000, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=False)
class KerasDDQNAgent(object): ''' classdocs ''' def __init__(self, opts): self.metadata = { 'discrete_actions': True, } self.opts = opts def configure(self, observation_space_shape, nb_actions): # Next, we build a simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + observation_space_shape)) # input layer model.add(Dense(32)) # Just your regular fully connected NN layer model.add(Activation('tanh')) # tanh activation layer model.add( Dense(16)) # more model capacity through fully connected NN layers model.add(Activation('relu')) # Rectified Linear Units model.add( Dense(16)) # more model capacity through fully connected NN layers model.add(Activation('relu')) # Rectified Linear Units model.add(Dense(nb_actions) ) # fully connected NN layer with one output for each action model.add( Activation('linear')) # we want linear activations in the end print(model.summary()) memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() self.agent = DQNAgent(enable_double_dqn=True, model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) self.agent.compile(Adam(lr=1e-3), metrics=['mae']) def train(self, env, nb_steps, visualize, verbosity): self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity) def test(self, env, nb_episodes, visualize): self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize) def load_weights(self, load_file): self.agent.load_weights(load_file) def save_weights(self, save_file, overwrite): self.agent.save_weights(save_file, overwrite)
def startLearning(Env, max_board_size=7, loadFileNumber=-1, gpuToUse=None, memoryAllocation=800000): # Set to use GPU explicitly if gpuToUse != None: environ["CUDA_VISIBLE_DEVICES"]=gpuToUse else: environ["CUDA_VISIBLE_DEVICES"]="0" env = Env nb_actions = env.action_space.n # Init size based on max_board_size if max_board_size not in [11, 7, 19]: raise EnvironmentError layer0Size = 4096 layer1Size = 4096 layer2Size = 4096 layer3Size = 0 layer4Size = 0 layer5Size = 0 # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(layer0Size)) model.add(LeakyReLU(alpha=0.003)) model.add(Dense(layer1Size)) model.add(LeakyReLU(alpha=0.003)) model.add(Dense(layer2Size)) model.add(LeakyReLU(alpha=0.003)) model.add(Dense(nb_actions)) model.add(Activation('linear')) #A little diagnosis of the model summary print(model.summary()) # Finally, we configure and compile our agent. memory = SequentialMemory(limit=memoryAllocation, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, batch_size=32, nb_actions=nb_actions, memory=memory, policy=policy, enable_dueling_network=True, gamma=.97) dqn.compile(nadam(lr=0.01), metrics=['mae']) # Here we load from a file an old agent save if specified. if loadFileNumber >= 0: loadFile = "Larger_Memeory_BOARDSIZE_" + str(max_board_size) + "_DQN_LAYERS_" + str(layer0Size) + "_" + str(layer1Size) + "_" + str(layer2Size) + "_" + str(layer3Size) + "_" + str(layer4Size) + "_" + str(layer5Size) + "_SAVENUMBER_" + str(loadFileNumber) + ".h5f" dqn.load_weights(loadFile) saveFileNumberCounter = 0 while True: dqn.fit(env, nb_steps=100010, visualize=False, verbose=1) saveFileNumberCounter+=1 saveFile = "Larger_Memeory_BOARDSIZE_" + str(max_board_size) + "_DQN_LAYERS_" + str(layer0Size) + "_" + str(layer1Size) + "_" + str(layer2Size) + "_" + str(layer3Size) + "_" + str(layer4Size) + "_" + str(layer5Size) + "_SAVENUMBER_" + str(loadFileNumber + saveFileNumberCounter) + ".h5f" dqn.save_weights(saveFile, overwrite=True)
def main(): ENV_NAME = 'OHLCV-v0' TIME_STEP = 30 TRAIN_PATH = "./data/train" TEST_PATH = "./data/test" env_train = OhlcvEnv(TIME_STEP, path=TRAIN_PATH) env_test = OhlcvEnv(TIME_STEP, path=TEST_PATH) np.random.seed(456) env.seed(562) nb_actions = env.action_space.n model = model_create(shape=env.shape, nb_actions=nb_actions) print(model.summary()) # finally, we configure and compile our agent memory = SequentialMemory(limit=50000, window_length=TIME_STEP) # policy = BoltzmannQPolicy() policy = EpsGreedyQPolicy() # enable the dueling network dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=200, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy, processor=Normalizerprocessor()) dqn.compile(Adam(lr=1e-3), metrics=['mae']) while Train: #train dqn.load_weights('') dqn.fit(env, nb_steps=5500, nb_max_episode_steps=10000, visualize=True, verbose=2) #validate info = dqn.test(env_test, nb_episodes=1, visualize=True) n_long, n_short, total_reward, account = info['n_trades'][ 'long'], info['n_trades']['short'], info['total_reward'], int( info['account']) np.array([info]).dump( './info/duel_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.info'.format( ENV_NAME, account, n_long, n_short, total_reward)) dqn.save_weights( './model/duel_LSTM_dqn_{0}_weights_{1}LS_{2}_{3}_{4}.h5f'.format( ENV_NAME, account, n_long, n_short, total_reward), overite=True)
def keras_rl(env, model_name, saved_model_name="model", steps=50000, test_steps=5, visualize=False, hidden_layers=3, critic_hidden_layers=3): nb_actions = 0 if (model_name == "DQN" or model_name == "SARSA"): nb_actions = env.action_space.n elif (model_name == "DDPG"): nb_actions = env.action_space.shape[0] model_structure = define_layers(env, nb_actions, num_of_hidden_layers=hidden_layers) memory = define_memory() policy = define_policy(model_name) if (model_name == "DQN"): model = DQNAgent(model=model_structure, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, enable_double_dqn=True, dueling_type='avg', target_model_update=1e-2) elif (model_name == "SARSA"): model = SARSAAgent(model=model_structure, nb_actions=nb_actions, nb_steps_warmup=10, policy=policy) elif (model_name == "DDPG"): action_input, critic_layers = define_critic_layers( env, num_of_hidden_layers=critic_hidden_layers) random_process = define_random_process(nb_actions) model = DDPGAgent(nb_actions=nb_actions, actor=model_structure, critic=critic_layers, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) model.compile(Adam(lr=1e-3), metrics=['mae']) model.fit(env, nb_steps=steps, visualize=False, verbose=2) model.save_weights('{}.h5f'.format(model_name), overwrite=True) model.test(env, nb_episodes=test_steps, visualize=visualize)
def learn(self): memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=self.model, nb_actions=self.nb_actions, memory=memory, nb_steps_warmup=2000, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(self.env, nb_steps=50000, visualize=True, verbose=2) dqn.save_weights('dqn_weights.h5f', overwrite=True)
def do_train(dqn: DQNAgent, env, save_path): # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. history = dqn.fit(env, nb_steps=200000, visualize=False, verbose=0) history = history.history # After training is done, we save the final weights. dqn.save_weights(save_path, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. # dqn.test(env, nb_episodes=5, visualize=True) return history
def main() -> None: env = gym.make(ENV_NAME) nb_actions = env.action_space.n model = tf.keras.Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add( Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01))) model.add( Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01))) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) memory = SequentialMemory(limit=100000, window_length=1) policy = CustomEpsGreedy(max_eps=0.6, min_eps=0.1, eps_decay=0.9997) agent = DQNAgent(nb_actions=nb_actions, model=model, memory=memory, policy=policy, gamma=0.99, batch_size=64) agent.compile(optimizer=Adam(lr=1e-3), metrics=['mae']) history = agent.fit(env, nb_steps=100000, visualize=False, nb_max_episode_steps=300, log_interval=300, verbose=1) kill_all_node() dt_now = datetime.datetime.now() agent.save_weights(MODELS_PATH + 'dpg_{}_weights_{}{}{}.h5f'.format( ENV_NAME, dt_now.month, dt_now.day, dt_now.hour), overwrite=True) # agent.test(env, nb_episodes=5, visualize=False) fig = plt.figure() plt.plot(history.history['episode_reward']) plt.xlabel("episode") plt.ylabel("reward") plt.savefig(FIGURES_PATH + 'learning_results_{}{}{}.png'.format( dt_now.month, dt_now.day, dt_now.hour))
class DQN(BaseAgent): def __init__(self, model, processor, policy, test_policy, num_actions): # Replay memory memory = SequentialMemory(limit=opt.dqn_replay_memory_size, window_length=opt.dqn_window_length) self.agent = DQNAgent(model=model, nb_actions=num_actions, policy=policy, test_policy=test_policy, memory=memory, processor=processor, batch_size=opt.dqn_batch_size, nb_steps_warmup=opt.dqn_nb_steps_warmup, gamma=opt.dqn_gamma, target_model_update=opt.dqn_target_model_update, enable_double_dqn=opt.enable_double_dqn, enable_dueling_network=opt.enable_dueling_network, train_interval=opt.dqn_train_interval, delta_clip=opt.dqn_delta_clip) self.agent.compile(optimizer=keras.optimizers.Adam(lr=opt.dqn_learning_rate), metrics=['mae']) def fit(self, env, num_steps, weights_path=None, visualize=False): callbacks = [] if weights_path is not None: callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)] self.agent.fit(env=env, nb_steps=num_steps, action_repetition=opt.dqn_action_repetition, callbacks=callbacks, log_interval=opt.log_interval, test_interval=opt.test_interval, test_nb_episodes=opt.test_nb_episodes, test_action_repetition=opt.dqn_action_repetition, visualize=visualize, test_visualize=visualize, verbose=1) def test(self, env, num_episodes, visualize=False): self.agent.test(env=env, nb_episodes=num_episodes, action_repetition=opt.dqn_action_repetition, verbose=2, visualize=visualize) def save(self, out_dir): self.agent.save_weights(out_dir, overwrite=True) def load(self, out_dir): self.agent.load_weights(out_dir)
def learn(): # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) # Action space details nb_devices = env.action_space.spaces["device"].n nb_durations = env.action_space.spaces["duration"].n nb_actions = nb_devices * nb_durations # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1, ))) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in # Keras optimizer and even the metrics! memory = SequentialMemory(limit=50000, window_length=1) processor = CounterTrafficProcessor() policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, processor=processor, nb_actions=nb_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for # show, but this slows down training quite a lot. You can always safely # abort the training prematurely using Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=False, verbose=1) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) #dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME)) # Finally, evaluate our algorithm dqn.test(env, nb_episodes=5, visualize=True)
class dqn(): def __init__(self, Env): self.env = Env nb_actions = self.env.action_space.shape[0] model = Sequential() model.add(Flatten(input_shape=(1, ) + self.env.observation_space.shape)) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() self.model = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy, gamma=0) self.model.processor = ShowActionProcessor(self.model, self.env) self.model.compile(Adam(lr=1e-2), metrics=['mae']) def fit(self): self.model.fit(self.env, nb_steps=30000, visualize=False, verbose=2, nb_max_episode_steps=10000) def save_weights(self): self.model.save_weights( './store/dqn_{}_weights.h5f'.format('porfolio'), overwrite=True) def test(self): self.model.test(self.env, nb_episodes=1, visualize=False, nb_max_episode_steps=10000)
def main(options): env = gym.make(ENV_NAME) if options.gui: env.nogui = False options.prediction_type = options.type np.random.seed(123) env.seed(123) nb_actions = env.action_space.n model = make_model(env, nb_actions) # Configure and compile the agent memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=options.training_warmup, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Begin training print( "=================== Starting training.. ==============================" ) dqn.fit(env, nb_steps=options.training_steps, visualize=False, verbose=2, nb_max_episode_steps=options.training_max_steps) # After training is done, save the weights print( "=================== Finished training, saving weights.. ==============" ) dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Evaluate the model print( "=================== Finished saving weights, evaluating model ========" ) res = dqn.test(env, nb_episodes=options.eval_episodes, visualize=False, nb_max_episode_steps=options.eval_max_steps, verbose=1) pprint(res.history)
def run_dqn(): global N_NODE_NETWORK env = SnakeGymDiscrete() nb_actions = env.action_space.n # initialize randomness np.random.seed(123) env.seed(123) # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(N_NODE_NETWORK)) model.add(Activation('relu')) model.add(Dense(N_NODE_NETWORK)) model.add(Activation('relu')) model.add(Dense(N_NODE_NETWORK)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) adam = Adam(lr=1e-3) # setattr(adam, "_name", "Adam") dqn.compile(adam, metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) dqn.save_weights('dqn_SnakeGymDiscrete_weights.h5f', overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
def main(): weight_path = 'models/breakout/keras_weights.h5' env = gym.make('BreakoutDeterministic-v4') nb_actions = env.action_space.n input_shape = (WINDOW_LENGTH, ) + INPUT_SHAPE model = create_model(input_shape, nb_actions) print(model.summary()) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) if os.path.exists(weight_path): dqn.load_weights(weight_path) try: dqn.fit( env, nb_steps=1750000, # 8h visualize=False, ) except KeyboardInterrupt: pass finally: dqn.save_weights(weight_path, overwrite=True)
def main(): env = gym.make("balancebot-v0") model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(12)) model.add(Activation('relu')) model.add(Dense(9)) model.add(Activation('softmax')) # print(model.summary()) memory = SequentialMemory(limit=100000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(env, nb_steps=15000, visualize=True, verbose=2, callbacks=None) # act = deepq.learn(env, # q_func=model, # lr=1e-3, # max_timesteps=100000, # buffer_size=100000, # exploration_fraction=0.1, # exploration_final_eps=0.02, # print_freq=10, # callback=callback # ) print("Saving model to balance.pkl") # After training is done, we save the final weights. dqn.save_weights('balance.pkl', overwrite=True) print("================================================") print('\n') #Load the saved weights to dqn dqn.load_weights('balance.pkl') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
def Mainthread(): for k in range(10): if (k == 0): Gen_C = Genetic.Chromosomes_Offset() Gen_List = Gen_C.initGen(8) for i in range(len(Gen_List)): f = open('./CrS/' + str(i) + '.txt', 'w') f.write(Gen_List[i][0]) f.write(Gen_List[i][1]) f.close() Mgen = [] Sgen = [] for i in range(len(Gen_List)): Mgen.append(Gen_List[i][0]) Sgen.append(Gen_List[i][1]) for i in range(len(Mgen)): Model = Model_Converter.GeneticModel(Mgen[i], Sgen[i]).model model_json = Model.to_json() f = open('./model/model' + str(i) + '.json', 'w') f.write(model_json) f.close() else: Gen_M = gen_main.GenMain() Gen_M.main() for j in range(8): json_file = open("./model/model" + str(i) + ".json", "r") loaded_model_json = json_file.read() json_file.close() loaded_model = keras.models.model_from_json(loaded_model_json) memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=loaded_model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(env, nb_steps=30000, visualize=False, verbose=2) dqn.save_weights('t_score/dqn_' + str(k) + '_' + str(j) + '{}_weights.h5f'.format(env_name), overwrite=True) Calc_E_Cons_and_Perfomance(dqn, j)
def main(): # Get the environment and extract the number of actions. environment_name = "lawnmower-medium-obstacles-v0" environment = gym.make(environment_name) environment.print_description() nb_actions = environment.action_space.n # Build the model. model = build_model_cnn((WINDOW_LENGTH,) + INPUT_SHAPE, nb_actions) print(model.summary()) # Create sequential memory for memory replay. memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) # Process environment inputs and outputs. processor = LawnmowerProcessor() # Use epsilon-greedy as our policy. policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=int(STEPS * 0.8)) # Instantiate and compile our agent. dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae']) # Set up some callbacks for training. checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(environment_name) callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)] callbacks += [TensorboardCallback(os.path.join("tensorboard", datetime_string))] callbacks += [FileLogger(log_filename, interval=100)] # Train the agent. dqn.fit(environment, callbacks=callbacks, nb_steps=STEPS, log_interval=10000) # Save the final networkt after training. weights_filename = 'dqn_{}_weights.h5f'.format(environment_name) dqn.save_weights(weights_filename, overwrite=True) # Run the agent. dqn.test(environment, nb_episodes=10, visualize=False)
def cartpole(): ENV_NAME = 'CartPole-v0' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=2)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(4)) model.add(Activation('softmax')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=train_policy, test_policy=test_policy) filename = 'weights/duel_dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) else: dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=train_policy, test_policy=test_policy) filename = 'weights/dqn_{}_weights_{}_{}_{}_{}.h5f'.format(ENV_NAME, LAYER_SIZE, NUM_HIDDEN_LAYERS, NUM_STEPS, TRIAL_ID) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Optionally, we can reload a previous model's weights and continue training from there # FILENAME = 'weights/duel_dqn_variable_pendulum-v0_weights_4096_4_50000_2017-07-11_140316.h5f' # Load the model weights # dqn.load_weights(FILENAME) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=NUM_STEPS, visualize=False, verbose=2, nb_max_episode_steps=500) # After training is done, we save the final weights. dqn.save_weights(filename, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, nb_max_episode_steps=500, visualize=True)
model = Sequential() model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
else: raise('Please select DQN, DUEL_DQN, SARSA, or CEM for your method type.') callbacks = [] # callbacks += [ModelIntervalCheckpoint(CHECKPOINT_WEIGHTS_FILENAME, interval=10000)] callbacks += [FileLogger(LOG_FILENAME, interval=100)] # callbacks += [TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=False)] callbacks += [ExploreExploit()] # Optionally, we can reload a previous model's weights and continue training from there # LOAD_WEIGHTS_FILENAME = 'weights/duel_dqn_planar_crane-v0_weights_1024_4_50000_2017-07-12_160853.h5f' # # # Load the model weights # agent.load_weights(LOAD_WEIGHTS_FILENAME) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=NUM_STEPS, callbacks=callbacks, action_repetition=5, visualize=False, verbose=1, log_interval=LOG_INTERVAL, nb_max_episode_steps=500) # After training is done, we save the final weights. agent.save_weights(WEIGHT_FILENAME, overwrite=True) # We'll also save a simply named version to make running test immediately # following training easier. filename = 'weights/{}_{}_weights.h5f'.format(METHOD, ENV_NAME) agent.save_weights(filename, overwrite=True) # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, action_repetition=5) #nb_max_episode_steps=500,