def train(colors_on, special): game = ConsoleGame(console_colors=colors_on, special_chars=special) opponent = HeuristicPlayer(game) # INIT TENSORFLOW session = tf.Session() ann = TFPlayer(game, session) total_game_num = model_load_or_init(ann) context = QuoridorContext(game) get_players = players_creator_factory(opponent, 'heuristic', ann) players = get_players() # INIT GAME context.reset(players=players) state = input_vector_from_game_state(context) state = np.array(list(state)).reshape([1, ann.input]) game_num = 0 move = 0 start = time.clock() while True: # store current state ann.input_vectors[move, :] = state # proceed to next state opponent.play(context) state = input_vector_from_game_state(context) state = np.array(list(state)).reshape([1, ann.input]) # update desired vector action = context.last_action sign = 1 - context.state[0] * 2 # 1 or -1 ann.desired_vectors[move, action] = 100 * sign move += 1 if context.is_terminal: game_num += 1 total_game_num += 1 context.reset(players=players) state = input_vector_from_game_state(context) state = np.array(list(state)).reshape([1, ann.input]) if game_num % SHOW_STATUS_STEP == 0: print_status(start, total_game_num, game_num) if game_num % SAVE_STATUS_STEP == 0: filename = TRAINING_FILENAME_FMT.format(num=total_game_num) ann.save(os.path.join(CKPT_MODELS_PATH, filename)) if move == ann.batch: # TODO: create list of q_values: # y = [1, 1*lr, 1*(lr**2), ...] # g = -y + 1 # and use them for learning session.run(ann.train_step, feed_dict=ann.feed_dict) move = 0
def play(self, context): state = input_vector_from_game_state(context, repeat=self.repeat) state = np.array(list(state)).reshape([1, self.input]) qlnn_actions = self.tf_session.run( self.output_layer, feed_dict={self.input_layer: state} ) for action in self._generate_action(qlnn_actions, context.state[0]): try: context.update(action) return except InvalidMove: pass # this will not get here, but in case... raise Exception('Could not play any action.')