load_network(session, variables, NETWORK_FILE_PATH)

    mini_batch_board_states, mini_batch_moves, mini_batch_rewards = [], [], []
    results = collections.deque(maxlen=PRINT_RESULTS_EVERY_X)

    def make_training_move(board_state, side):
        mini_batch_board_states.append(np.ravel(board_state) * side)
        move = get_stochastic_network_move(session, input_layer, output_layer,
                                           board_state, side)
        mini_batch_moves.append(move)
        return game_spec.flat_move_to_tuple(move.argmax())

    for episode_number in range(1, NUMBER_OF_GAMES_TO_RUN):
        # randomize if going first or second
        if bool(random.getrandbits(1)):
            reward = game_spec.play_game(make_training_move,
                                         game_spec.get_random_player_func())
        else:
            reward = -game_spec.play_game(game_spec.get_random_player_func(),
                                          make_training_move)

        results.append(reward)

        last_game_length = len(mini_batch_board_states) - len(
            mini_batch_rewards)

        # we scale here so winning quickly is better winning slowly and loosing slowly better than loosing quick
        reward /= float(last_game_length)

        mini_batch_rewards += ([reward] * last_game_length)

        if episode_number % BATCH_SIZE == 0:
                                              board_state, side)

        return game_spec.flat_move_to_tuple(np.argmax(move))


    board_states_training = {}
    board_states_test = []
    episode_number = 0

    while len(board_states_training) < TRAIN_SAMPLES + TEST_SAMPLES:
        board_state = generate_random_board_position()
        board_state_flat = tuple(np.ravel(board_state))

        # only accept the board_state if not already in the dict
        if board_state_flat not in board_states_training:
            result = game_spec.play_game(make_move, make_move, board_state=board_state)
            board_states_training[board_state_flat] = float(result)

    # take a random selection from training into a test set
    for _ in range(TEST_SAMPLES):
        sample = random.choice(board_states_training.keys())
        board_states_test.append((sample, board_states_training[sample]))
        del board_states_training[sample]

    board_states_training = list(board_states_training.iteritems())

    test_error = session.run(error, feed_dict={value_input_layer: [x[0] for x in board_states_test],
                                               target_placeholder: [[x[1]] for x in board_states_test]})

    while True:
        np.random.shuffle(board_states_training)
Exemple #3
0
        print("could not find previous weights so initialising randomly")

    for i in range(NUMBER_OF_HISTORICAL_COPIES_TO_KEEP):
        if os.path.isfile(BASE_HISTORICAL_NETWORK_PATH + str(i) + '.p'):
            load_network(session, historical_networks[i][2], BASE_HISTORICAL_NETWORK_PATH + str(i) + '.p')
        elif os.path.isfile(STARTING_NETWORK_WEIGHTS):
            # if we can't load a historical file use the current network weights
            load_network(session, historical_networks[i][2], STARTING_NETWORK_WEIGHTS)

    for episode_number in range(1, NUMBER_OF_GAMES_TO_PLAY):
        opponent_index = random.randint(0, NUMBER_OF_HISTORICAL_COPIES_TO_KEEP-1)
        make_move_historical_for_index = functools.partial(make_move_historical, opponent_index)

        # randomize if going first or second
        if bool(random.getrandbits(1)):
            reward = game_spec.play_game(make_training_move, make_move_historical_for_index)
        else:
            reward = -game_spec.play_game(make_move_historical_for_index, make_training_move)

        results.append(reward)

        last_game_length = len(mini_batch_board_states) - len(mini_batch_rewards)

        # we scale here so winning quickly is better winning slowly and loosing slowly better than loosing quick
        reward /= float(last_game_length)

        mini_batch_rewards += ([reward] * last_game_length)

        episode_number += 1

        if episode_number % MINI_BATCH_SIZE == 0: