Example #1
0
    def generate_random_state(self):
        new_state = [0, 0, 0, 0, 0, set()]
        board_positions = frozenset(range(self.board_positions))
        new_state[0] = random.choice((YELLOW, GREEN))
        exclude = self.goal_positions[YELLOW]
        new_state[1] = random.choice(tuple(board_positions - exclude))
        # new_state[1] = random.choice(range(72, 81))   # goal line
        exclude = self.goal_positions[GREEN] | set([new_state[1]])
        new_state[2] = random.choice(tuple(board_positions - exclude))
        # new_state[2] = new_state[1] + 9

        all_wall_positions = tuple(range(self.wall_moves))
        place_walls = random.randint(0, 2 * STARTING_WALL_COUNT)
        # place_walls = 20
        walls_used = random.randint(
            max(0, place_walls - STARTING_WALL_COUNT),
            min(STARTING_WALL_COUNT, place_walls)
        )
        new_state[3] = STARTING_WALL_COUNT - walls_used
        new_state[4] = STARTING_WALL_COUNT - place_walls + walls_used
        while place_walls:
            action = random.choice(all_wall_positions)
            if self.is_wall_crossing(new_state[5], action):
                continue
            new_state[5].add(action)
            if not self.players_can_reach_goal(new_state):
                new_state[5].remove(action)
                continue
            place_walls -= 1
        new_state[5] = frozenset(new_state[5])

        context = QuoridorContext(self)
        context.reset(state=tuple(new_state))
        self.display_on_console(context)
        print context
Example #2
0
def train(colors_on, special):
    game = ConsoleGame(console_colors=colors_on, special_chars=special)
    opponent = HeuristicPlayer(game)

    # INIT TENSORFLOW
    session = tf.Session()
    ann = TFPlayer(game, session)

    total_game_num = model_load_or_init(ann)

    context = QuoridorContext(game)
    get_players = players_creator_factory(opponent, 'heuristic', ann)
    players = get_players()

    # INIT GAME
    context.reset(players=players)
    state = input_vector_from_game_state(context)
    state = np.array(list(state)).reshape([1, ann.input])

    game_num = 0
    move = 0

    start = time.clock()
    while True:
        # store current state
        ann.input_vectors[move, :] = state

        # proceed to next state
        opponent.play(context)
        state = input_vector_from_game_state(context)
        state = np.array(list(state)).reshape([1, ann.input])

        # update desired vector
        action = context.last_action
        sign = 1 - context.state[0] * 2   # 1 or -1
        ann.desired_vectors[move, action] = 100 * sign

        move += 1

        if context.is_terminal:
            game_num += 1
            total_game_num += 1
            context.reset(players=players)
            state = input_vector_from_game_state(context)
            state = np.array(list(state)).reshape([1, ann.input])

            if game_num % SHOW_STATUS_STEP == 0:
                print_status(start, total_game_num, game_num)

            if game_num % SAVE_STATUS_STEP == 0:
                filename = TRAINING_FILENAME_FMT.format(num=total_game_num)
                ann.save(os.path.join(CKPT_MODELS_PATH, filename))

        if move == ann.batch:
            # TODO: create list of q_values:
            #           y = [1, 1*lr, 1*(lr**2), ...]
            #           g = -y + 1
            #       and use them for learning
            session.run(ann.train_step, feed_dict=ann.feed_dict)
            move = 0
Example #3
0
def tf_play(colors_on, special):
    game = ConsoleGame(console_colors=colors_on, special_chars=special)

    # INIT TENSORFLOW
    session = tf.Session()
    ann = TFPlayer(game, session)
    filename = ann.last_model_filename()
    if filename is None:
        raise Exception('Could not load ANN data.')
    ann.load(filename)

    kwargs = {
        'messages': game.messages,
        'game_controls': game.GAME_CONTROLS,
        'fail_callback': game.wrong_human_move,
    }
    hp = HumanPlayer(game, **kwargs)
    context = QuoridorContext(game)
    get_players = players_creator_factory(hp, 'human', ann)
    context.reset(players=get_players())

    while not context.is_terminal:
        game.display_on_console(context)
        context.current['player'](context)
    game.display_on_console(context)

    session.close()
Example #4
0
    def handle_menu(self):
        context = QuoridorContext(self)
        game_mode = 'menu'
        while game_mode != 'quit':
            self.print_menu()
            choice = self.get_menu_input()
            if isinstance(choice, basestring):
                if choice == 'unknown':
                    print self.messages['unknown_choice']
                    continue
                elif choice == 'quit':
                    return

            game_mode = choice['mode']

            if game_mode == 'quit':
                return
            elif game_mode == 'random_state':
                self.generate_random_state()
                continue
            elif game_mode == 'save':
                if not context.history:
                    print self.messages['cannot_save']
                    continue
                game_mode = self.save_menu(context)
                continue

            if game_mode == 'load':
                game_mode = self.load_menu(context)
                if game_mode == 'game':
                    game_mode = self.handle_game(context)
                continue

            players = self.get_players(choice['player_names'])
            context.reset(players=players)
            if game_mode == 'game':
                game_mode = self.handle_game(context)
                continue
            elif game_mode == 'train':
                game_mode = self.train(context)
                continue
Example #5
0
def measure(colors_on, special, opponent_type):
    game = ConsoleGame(console_colors=colors_on, special_chars=special)
    opponent = OPPONENTS[opponent_type](game)

    # INIT TENSORFLOW
    # session = tf.Session()
    # ann = TFPlayer(game, session)
    # saver = tf.train.Saver()
    # saver.restore(session, 'model.ckpt')
    # print 'tfplayer input layer size:', ann.input
    # print 'x'*80

    ann = RandomPlayerWithPath(game)

    context = QuoridorContext(game)
    while True:
        get_players = players_creator_factory(opponent, opponent_type, ann)
        context.reset(players=get_players())
        while not context.is_terminal:
            context.current['player'](context)
        game.display_on_console(context)
        break