def train(model, maze, epsilon=0.1): if model_file: print("loading weights from file: %s" % (model_file, )) model.load_weights(model_file) my_maze = Maze(maze) # 存储episode git_store = Git(model, max_save_size=max_save_size) win_history = [] hsize = my_maze.maze.size // 2 win_rate = 0.0 for epoch in range(epoch_num): loss = 0.0 rat_cell = random.choice(my_maze.free_cells) my_maze.reset(rat_cell) game_over = False state = my_maze.get_current_state() n_episodes = 0 while not game_over: valid_actions = my_maze.valid_actions() if not valid_actions: break state_now = state # epsilon-贪心算法 if np.random.rand() < epsilon: action = random.choice(valid_actions) else: action = np.argmax(git_store.predict(state_now)) # 实施action state_next, reward, game_status = my_maze.act( action, my_maze.get_current_state) if game_status == 'win': win_history.append(1) game_over = True elif game_status == 'lose': win_history.append(0) game_over = True else: game_over = False #保存episode episode = [state_now, action, reward, state_next, game_over] git_store.remember(episode) n_episodes += 1 # 训练网络 inputs, targets = git_store.get_data(batch_size=data_size) h = model.fit( inputs, targets, epochs=8, batch_size=16, verbose=0, ) loss = model.evaluate(inputs, targets, verbose=0) if len(win_history) > hsize: win_rate = sum(win_history[-hsize:]) / hsize template = "Epoch: {:03d}/{:d} Loss: {:.4f} Episodes: {:d} Win count: {:d} Win rate: {:.3f}" print( template.format(epoch, epoch_num - 1, loss, n_episodes, sum(win_history), win_rate)) # Save trained model weights and architecture, this will be used by the visualization code h5file = save_file + ".h5" json_file = save_file + ".json" model.save_weights(h5file, overwrite=True) with open(json_file, "w") as outfile: json.dump(model.to_json(), outfile) # we simply check if training has exhausted all free cells and if in all # cases the agent won if win_rate > 0.9: epsilon = 0.05 '''if sum(win_history[-hsize:]) == hsize and completion_check(model, my_maze): print("Reached 100%% win rate at epoch: %d" % (epoch,)) break''' print('files: %s, %s' % (h5file, json_file)) print("n_epoch: %d, max_mem: %d, data: %d, time: %s" % (epoch, max_save_size, data_size))