Python Residual_CNN.read 예제들, model.Residual_CNN.read Python 예제들

예제 #1

0

파일 보기

파일: funcs.py 프로젝트: Hidysabc/DeepReinforcementLearning

def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0):
    
    if player1version == -1:
        player1 = User('player1', env.state_size, env.action_size)
    else:
        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

        if player1version > 0:
            player1_network = player1_NN.read(env.name, run_version, player1version)
            player1_NN.model.set_weights(player1_network.get_weights())   
        player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN)

    if player2version == -1:
        player2 = User('player2', env.state_size, env.action_size)
    else:
        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
        
        if player2version > 0:
            player2_network = player2_NN.read(env.name, run_version, player2version)
            player2_NN.model.set_weights(player2_network.get_weights())
        player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN)

    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first)

    return (scores, memory, points, sp_scores)

예제 #2

0

파일 보기

파일: funcs.py 프로젝트: Shatterblade/DeepReinforcementLearning

def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0):
    
    if player1version == -1:
        player1 = User('player1', env.state_size, env.action_size)
    else:
        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

        if player1version > 0:
            player1_network = player1_NN.read(env.name, run_version, player1version)
            player1_NN.model.set_weights(player1_network.get_weights())   
        player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN)

    if player2version == -1:
        player2 = User('player2', env.state_size, env.action_size)
    else:
        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
        
        if player2version > 0:
            player2_network = player2_NN.read(env.name, run_version, player2version)
            player2_NN.model.set_weights(player2_network.get_weights())
        player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN)

    printmoves = player1version == -1 or player2version == -1
    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first, printmoves)

    return (scores, memory, points, sp_scores)

예제 #3

0

파일 보기

파일: funcsForGame.py 프로젝트: margku/work

def playMatchesBetweenVersions(env,
                               run_version,
                               player1version,
                               player2version,
                               EPISODES,
                               logger,
                               turns_until_tau0,
                               goes_first=0):
    env = Game()
    if player1version == -1:
        player1 = User("user1", env.state_size, env.action_size)
    else:
        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  env.input_shape, env.action_size,
                                  config.HIDDEN_CNN_LAYERS)

        if player1version > 0:
            name = env.name + "{0:0>4}".format(player1version)
            if Provider.getNetByName(name) == None:
                return
            player1_network = player1_NN.read(env.name, run_version,
                                              player1version)
            player1_NN.model.set_weights(player1_network.get_weights())
        netName = env.name + "{0:0>4}".format(player1version)
        player1 = Agent(netName, env.state_size, env.action_size,
                        config.MCTS_SIMS, config.CPUCT, player1_NN)

    if player2version == -1:
        name = input('enter username: ')
        user2 = Provider.getPersonByName(name)
        player2 = User(user2.name, env.state_size, env.action_size)
    else:
        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  env.input_shape, env.action_size,
                                  config.HIDDEN_CNN_LAYERS)

        if player2version > 0:
            name = env.name + "{0:0>4}".format(player2version)
            if Provider.getNetByName(name) == None:
                return
            player2_network = player2_NN.read(env.name, run_version,
                                              player2version)
            player2_NN.model.set_weights(player2_network.get_weights())
        net2Name = env.name + "{0:0>4}".format(player2version)
        player2 = Agent(net2Name, env.state_size, env.action_size,
                        config.MCTS_SIMS, config.CPUCT, player2_NN)

    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES,
                                                    logger, turns_until_tau0,
                                                    None, goes_first)

    return (scores, memory, points, sp_scores)

예제 #4

0

파일 보기

def playMatchesBetweenVersions(env,
                               run_version,
                               player1version,
                               player2version,
                               EPISODES,
                               logger,
                               turns_until_tau0,
                               goes_first=0):
    #-1代表的是玩家
    if player1version == -1:
        player1 = User('player1', env.state_size, env.action_size)
    else:
        #Residual_CNN 返回的是一个x
        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  env.input_shape, env.action_size,
                                  config.HIDDEN_CNN_LAYERS)
        if player1version > 0:
            #如果不是玩家，则读取训练好的版本及相关权重
            player1_network = player1_NN.read(env.name, run_version,
                                              player1version)
            player1_NN.model.set_weights(player1_network.get_weights())

        #对其进行模拟，以及mcts树的构建
        player1 = Agent('player1', env.state_size, env.action_size,
                        config.MCTS_SIMS, config.CPUCT, player1_NN)

    if player2version == -1:
        player2 = User('player2', env.state_size, env.action_size)
    else:
        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  env.input_shape, env.action_size,
                                  config.HIDDEN_CNN_LAYERS)

        if player2version > 0:
            player2_network = player2_NN.read(env.name, run_version,
                                              player2version)
            player2_NN.model.set_weights(player2_network.get_weights())
        player2 = Agent('player2', env.state_size, env.action_size,
                        config.MCTS_SIMS, config.CPUCT, player2_NN)

    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES,
                                                    logger, turns_until_tau0,
                                                    None, goes_first)

    return (scores, memory, points, sp_scores)

예제 #5

0

파일 보기

파일: alpha_snake.py 프로젝트: scsukas8/DeepReinforcementLearning

  def start(self, data):

    # Generate Game from initial json
    board_json = data[BOARD_KEY]

    # Find grid shape
    self.grid_shape=(board_json[WIDTH_KEY],
                board_json[HEIGHT_KEY])

    self.w, self.h = self.grid_shape

    # Find the snake positions.
    snakes = board_json[SNAKES_KEY]

    # Find your position
    your_id = data[YOU_KEY][ID_KEY]

    starting_pos_json = []
    for snake in snakes:
      if snake[ID_KEY] == your_id:
        starting_pos_json = [snake[BODY_KEY][0]] + starting_pos_json
      else:
        starting_pos_json = starting_pos_json + [snake[BODY_KEY][0]]

    starting_pos = []
    for pos_json in starting_pos_json:
      starting_pos += [(pos_json['x'], pos_json['y'])]

    self.num_players = len(snakes) + 1

    # Food layer immediately after players
    self.food_layer = self.num_players * 2

    # Other Layer immediately after food
    self.other_layer = self.food_layer + 1

    # Total layers
    self.num_layers = self.other_layer + 1

    # Find the food positions.
    foods = board_json[FOOD_KEY]
    starting_food = []
    for pos_json in foods:
      starting_food += (pos_json['x'], pos_json['y'])

    print("Creating new Game")
    # Create Game
    env = Game(self.grid_shape, self.num_players, starting_pos, starting_food)

    print("Creating new Agent")
    # Create Agent
    player_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS)
    player_network = player_NN.read(env.name, self.run_version, self.playerversion)
    player_NN.model.set_weights(player_network.get_weights())   
    # self.player = Agent('player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player_NN)
    self.player = Agent('player', env.state_size, env.action_size, 50, config.CPUCT, player_NN)

예제 #6

0

파일 보기

파일: train_network.py 프로젝트: PaulaFernandez/Catan

def train_network(agent, train_phase):
    if train_phase[0] == 'start':
        net = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                           config.INPUT_START_DIM, config.OUTPUT_START_DIM,
                           config.HIDDEN_CNN_LAYERS)
        net_str = 's'
    elif train_phase[0] == 'general':
        net = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                           config.INPUT_DIM, config.OUTPUT_DIM,
                           config.HIDDEN_CNN_LAYERS)
        net_str = 'g'

    net.read(agent, net_str)

    #validation_file = choice(listdir('validation_states'))
    #with open('validation_states\\' + validation_file, 'rb') as input_file:
    #    validation = pickle.load(input_file)

    min_val_error = 10000.0
    for i in range(config.TRAINING_LOOPS):
        print("Iteration #" + str(i))

        game_file = choice(listdir('train_states'))
        with open('train_states\\' + game_file, 'rb') as input_file:
            game_memory = pickle.load(input_file)
        remove('train_states\\' + game_file)

        hist = net.fit(game_memory['batch_states'],
                       game_memory['batch_targets'], config.EPOCHS, 2, 0.0, 32)

        #metric = hist.history['val_loss'][config.EPOCHS - 1]

        #if metric < min_val_error:
        #    min_val_error = metric
        net.write(agent, net_str)

        print("Min Loss: " + str(min_val_error))

예제 #7

0

파일 보기

def predict():

    current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                              (2, ) + (6, 7), 42, config.HIDDEN_CNN_LAYERS)
    current_NN.model.set_weights(
        current_NN.read('connect4', 2, 74).get_weights())
    current_player = Agent('current_player', 84, 42, config.MCTS_SIMS,
                           config.CPUCT, current_NN)

    if request.method == 'POST':
        try:
            data = request.get_json()
            gs = GameState(np.array(json.loads(data["gameState"])), 1)
            #print(gs)
            preds = current_player.get_preds(gs)
            preds = np.array(preds[1]).reshape(6, 7)
            pred_arg = np.unravel_index(preds.argmax(), preds.shape)

        except ValueError:
            return jsonify("Please enter a proper GameState.")

        return jsonify([int(x) for x in pred_arg])
    if request.method == 'GET':
        return "Hello World! GET request"

예제 #8

0

파일 보기

파일: evaluation_worker.py 프로젝트: PhillipMerritt/Alpha_Domino

def evaluation_worker(conn):
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    import initialise
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from game import Game
    from agent import Agent
    from memory import Memory
    from funcs import playMatches
    import loggers as lg
    import logging
    import time

    # initialise new test memory
    test_memories = Memory(int(MEMORY_SIZE / 10))

    env = Game()

    # initialise new models
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, int(PLAYER_COUNT / TEAM_SIZE),
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
    else:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, PLAYER_COUNT,
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)

    current_player_version = 0
    best_player_version = 0
    # If loading an existing neural netwrok, set the weights from that model
    if initialise.INITIAL_MODEL_VERSION != None:
        best_player_version = initialise.INITIAL_MODEL_VERSION
        #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
        m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER,
                             initialise.INITIAL_MODEL_VERSION)
        current_NN.model.set_weights(m_tmp.get_weights())
        best_NN.model.set_weights(m_tmp.get_weights())
    # otherwise just ensure the weights on the two players are the same
    else:
        best_NN.model.set_weights(current_NN.model.get_weights())

    current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT,
                           current_NN)
    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)

    time.sleep(20)

    while 1:
        # request current_NN weights
        conn.send(current_player_version)
        # wait indefinitely for current_NN weights
        conn.poll(None)
        data = conn.recv()

        if data:

            # set current_NN weights
            current_NN.model.set_weights(data)
            current_player_version += 1

            # play tournament games
            tourney_players = []
            if TEAM_SIZE > 1:
                for i in range(int(PLAYER_COUNT / TEAM_SIZE)):  # for each team
                    for k in range(
                            TEAM_SIZE
                    ):  # alternate adding best_players and current_players up to the TEAM_SIZE
                        if k % 2 == 0:
                            tourney_players.append(best_player)
                        else:
                            tourney_players.append(current_player)
            else:
                for i in range(PLAYER_COUNT):
                    if i % 2 == 0:
                        tourney_players.append(best_player)
                    else:
                        tourney_players.append(current_player)

            scores, test_memories = playMatches(tourney_players,
                                                config.EVAL_EPISODES,
                                                lg.logger_tourney,
                                                0.0,
                                                test_memories,
                                                evaluation=True)
            test_memories.clear_stmemory()

            # if the current player is significantly better than the best_player replace the best player
            # the replacement is made by just copying the weights of current_player's nn to best_player's nn
            if scores['current_player'] > scores[
                    'best_player'] * config.SCORING_THRESHOLD:
                # if current_NN won send message
                conn.send(((current_player_version, best_player_version),
                           str(scores)))

                best_player_version = best_player_version + 1
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_NN.write(env.name, best_player_version)

            if len(
                    test_memories.ltmemory
            ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0:
                pickle.dump(
                    memories,
                    open(
                        run_folder + "memory/test_memory" +
                        str(current_player_version).zfill(4) + ".p", "wb"))

                #print("Evaluating performance of current_NN")
                #current_player.evaluate_accuracy(test_memories.ltmemory)
                #print('\n')
        else:
            time.sleep(10)

예제 #9

0

파일 보기

j = 0

# Each versions will play 5 games against each other versions.
for player_idx, _ in enumerate(version_list_CNN):
    for opponent_idx, _ in enumerate(version_list_Res):
        if player_idx != opponent_idx:
            continue
        else:
            m_tmp = player1_NN.read(initialise.INITIAL_RUN_NUMBER,
                                    player_idx + 1)
            player1_NN.model.set_weights(m_tmp.get_weights())
            player1 = Agent('player1', env.state_size, env.action_size,
                            config.MCTS_SIMS, config.CPUCT, player1_NN)

            m_tmp = player2_NN.read(initialise.INITIAL_RUN_NUMBER,
                                    opponent_idx + 1)
            player2_NN.model.set_weights(m_tmp.get_weights())
            player2 = Agent('player2', env.state_size, env.action_size,
                            config.MCTS_SIMS, config.CPUCT, player2_NN)

            scores, memory, points, sp_scores = funcs.playMatches(
                player1,
                player2,
                EPISODES,
                lg.logger_main,
                turns_until_tau0=0,
                goes_first=1)
            print('\n')
            print('-------')
            print('player1: version {}'.format(player_idx))
            print('player2: version {}'.format(opponent_idx))

예제 #10

0

파일 보기

pool = multiprocessing.Pool(2)
out = zip(pool.map(_selfplay, range(0, 2)))
t = tuple(out)

len(t)

chessenv.action_size
chessenv.state_size
chessenv.grid_shape
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119,)+chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119,)+chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS)
best_NN.model.set_weights(current_NN.model.get_weights())

best_player_version  = 2
print('LOADING MODEL VERSION ' + str(2) + '...')
m_tmp = best_NN.read(chessenv.name, 2, best_player_version)
current_NN.model.set_weights(m_tmp.get_weights())
best_NN.model.set_weights(m_tmp.get_weights())

current_player = Agent('current_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)


state = chessenv.reset()
state.render(None)
# action, pi, MCTS_value, NN_value = current_player.act(state, 0)

scores, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)

current_player.replay(memory.ltmemory)

예제 #11

0

파일 보기

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                          (2, ) + env.grid_shape, env.action_size,
                          config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                       (2, ) + env.grid_shape, env.action_size,
                       config.HIDDEN_CNN_LAYERS)

#If loading an existing neural netwrok, set the weights from that model
if initialize.INITIAL_MODEL_VERSION != None:
    best_player_version = initialize.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialize.INITIAL_MODEL_VERSION) +
          '...')
    m_tmp = best_NN.read(env.name, initialize.INITIAL_RUN_NUMBER,
                         best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model,
           to_file=run_folder + 'models/model.png',
           show_shapes=True)

print('\n')

예제 #12

0

파일 보기

class Agent_NN:
    def __init__(self, enable_cache=False):
        self.nn_start = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                     config.INPUT_START_DIM,
                                     config.OUTPUT_START_DIM,
                                     config.HIDDEN_CNN_LAYERS)
        self.nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                               config.INPUT_DIM, config.OUTPUT_DIM,
                               config.HIDDEN_CNN_LAYERS)

        self.enable_cache = enable_cache
        self.cache = {}

    def purge_cache(self):
        self.cache = {}

    def nn_read(self, name):
        self.nn_start.read(name, 's')
        self.nn.read(name, 'g')

    def nn_write(self, name):
        self.nn_start.write(name, 's')
        self.nn.write(name, 'g')

    def predict(self, state, perspective, mcts):
        network = self.build_nn_input(state, perspective, mcts=mcts)

        if network.shape[1] == config.INPUT_DIM[0]:
            return self.nn.predict(network)
        else:
            return self.nn_start.predict(network)

    def build_start_nn_input(self, state, perspective):
        nn_input = np.zeros(
            (1, config.INPUT_START_DIM[0], config.INPUT_START_DIM[1],
             config.INPUT_START_DIM[2]),
            dtype=np.float32)

        numbers_output = {
            2: 1,
            3: 2,
            4: 3,
            5: 4,
            6: 5,
            8: 5,
            9: 4,
            10: 3,
            11: 2,
            12: 1
        }
        rotation = np.random.randint(12)

        if self.enable_cache is True and rotation in self.cache:
            nn_input[:, :11, :, :] = self.cache[rotation]

        else:
            # Resources outputs
            for number, tile in state.numbers:
                resource = state.tiles[tile]
                for vertex in config.tiles_vertex[tile]:
                    nn_input[0, resource - 2,
                             config.vertex_to_nn_input[rotation][vertex][0],
                             config.vertex_to_nn_input[rotation][vertex]
                             [1]] += numbers_output[number] / 15.0

            # Ports
            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD, config.GENERIC
            ]):
                indices = [i for i, x in enumerate(state.ports) if x == r]
                for i in indices:
                    for vertex in config.ports_vertex[i]['vert']:
                        nn_input[
                            0, key + 5,
                            config.vertex_to_nn_input[rotation][vertex][0],
                            config.vertex_to_nn_input[rotation][vertex][1]] = 1

            if self.enable_cache is True:
                self.cache[rotation] = nn_input[:, :11, :, :]

        # Settlements, cities, roads
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            for s in state.players[p].settlements:
                nn_input[0, 11 + 2 * p_order,
                         config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1
            for r in state.players[p].roads:
                nn_input[
                    0, 12 + 2 * p_order,
                    config.vertex_to_nn_input[rotation][r[0]][0],
                    config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0
                nn_input[
                    0, 12 + 2 * p_order,
                    config.vertex_to_nn_input[rotation][r[1]][0],
                    config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0

        # Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD
            ]):
                nn_input[0, 19 + key +
                         5 * p_order, :, :] = state.players[p].cards[r] / 10.0

        # State
        if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT
                or state.game_phase == config.PHASE_INITIAL_ROAD
            ) and state.initial_phase_decrease == 0:
            nn_input[0, 39, :, :] = 1
        if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT
                or state.game_phase == config.PHASE_INITIAL_ROAD
            ) and state.initial_phase_decrease == 1:
            nn_input[0, 40, :, :] = 1

        # Player turn
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            if p == state.player_turn:
                nn_input[0, 41 + p_order, :, :] = 1

        return nn_input

    def build_nn_input(self, state, perspective, mcts=None):
        if state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD:
            return self.build_start_nn_input(state, perspective)

        nn_input = np.zeros(
            (1, config.INPUT_DIM[0], config.INPUT_DIM[1], config.INPUT_DIM[2]),
            dtype=np.float32)

        numbers_output = {
            2: 1,
            3: 2,
            4: 3,
            5: 4,
            6: 5,
            8: 5,
            9: 4,
            10: 3,
            11: 2,
            12: 1
        }
        rotation = np.random.randint(12)

        if self.enable_cache is True and rotation in self.cache:
            nn_input[:, :11, :, :] = self.cache[rotation]

        else:
            # Resources outputs
            for number, tile in state.numbers:
                resource = state.tiles[tile]
                for vertex in config.tiles_vertex[tile]:
                    nn_input[0, resource - 2,
                             config.vertex_to_nn_input[rotation][vertex][0],
                             config.vertex_to_nn_input[rotation][vertex]
                             [1]] += numbers_output[number] / 15.0

            # Ports
            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD, config.GENERIC
            ]):
                indices = [i for i, x in enumerate(state.ports) if x == r]
                for i in indices:
                    for vertex in config.ports_vertex[i]['vert']:
                        nn_input[
                            0, key + 5,
                            config.vertex_to_nn_input[rotation][vertex][0],
                            config.vertex_to_nn_input[rotation][vertex][1]] = 1

            if self.enable_cache is True:
                self.cache[rotation] = nn_input[:, :11, :, :]

        # Settlements, cities, roads
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            for s in state.players[p].settlements:
                nn_input[0, 11 + 3 * p_order,
                         config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1
            for c in state.players[p].cities:
                nn_input[0, 12 + 3 * p_order,
                         config.vertex_to_nn_input[rotation][c][0],
                         config.vertex_to_nn_input[rotation][c][1]] = 1
            for r in state.players[p].roads:
                nn_input[
                    0, 13 + 3 * p_order,
                    config.vertex_to_nn_input[rotation][r[0]][0],
                    config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0
                nn_input[
                    0, 13 + 3 * p_order,
                    config.vertex_to_nn_input[rotation][r[1]][0],
                    config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0

        # Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD
            ]):
                nn_input[0, 23 + key +
                         5 * p_order, :, :] = state.players[p].cards[r] / 10.0

        # Robber
        for vertex in config.tiles_vertex[state.robber_tile]:
            nn_input[0, 43, config.vertex_to_nn_input[rotation][vertex][0],
                     config.vertex_to_nn_input[rotation][vertex][1]] = 1

        # Army Cards Played
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     44 + p_order, :, :] = state.players[p].used_knights / 5.0

        # Army Holder
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     48 + p_order, :, :] = state.players[p].largest_army_badge

        # Longest Road Holder
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     52 + p_order, :, :] = state.players[p].longest_road_badge

        # Special Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.VICTORY_POINT, config.KNIGHT, config.MONOPOLY,
                    config.ROAD_BUILDING, config.YEAR_OF_PLENTY
            ]):
                nn_input[0, 56 + key + 5 *
                         p_order, :, :] = state.players[p].special_cards.count(
                             r) / 3.0

        # Discarding, initial game phase
        if state.game_phase == config.PHASE_DISCARD:
            nn_input[0, 76, :, :] = 1

        # Player turn
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            if p == state.player_turn:
                nn_input[0, 77 + p_order, :, :] = 1

        # Other game phases
        if state.game_phase == config.PHASE_THROW_DICE:
            nn_input[0, 81, :, :] = 1
        if state.game_phase == config.PHASE_MOVE_ROBBER:
            nn_input[0, 82, :, :] = 1
        if state.game_phase == config.PHASE_STEAL_CARD:
            nn_input[0, 83, :, :] = 1
        if state.game_phase == config.PHASE_ROAD_BUILDING:
            nn_input[0, 84, :, :] = 1
        if state.game_phase == config.PHASE_YEAR_OF_PLENTY:
            nn_input[0, 85, :, :] = 1
        if state.game_phase == config.PHASE_TRADE_RESPOND:
            nn_input[0, 86, :, :] = 1

        for s in range(54):
            if state.available_settlement_spot(s):
                nn_input[0, 87, config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1

        return nn_input

예제 #13

0

파일 보기

파일: testing.py 프로젝트: PhillipMerritt/Alpha_Domino

    if initialise.INITIAL_MEMORY_VERSION == [None] * DECISION_TYPES:
        for i in range(DECISION_TYPES):
            memories.append(Memory(MEMORY_SIZE[i]))
    else:
        for d_t, MEM_VERSION in enumerate(initialise.INITIAL_MEMORY_VERSION):
            print('LOADING MEMORY VERSION ' + str(MEM_VERSION) + '...')
            memories.append(pickle.load(open(
                run_archive_folder + game.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/decision_" + str(d_t) + "_memory" + str(MEM_VERSION).zfill(4) + ".p", "rb")))

            if memories[-1].MEMORY_SIZE < MEMORY_SIZE[d_t]:
                memories[-1].extension(MEMORY_SIZE[d_t])
                
    nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, game.grid_shape, PLAYER_COUNT,
                            config.HIDDEN_CNN_LAYERS, 0)
    m_tmp = nn.read(game.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION[0])
    nn.model.set_weights(m_tmp.get_weights())
    trained_agent = Agent('trained_agent', game.action_size, config.MCTS_SIMS, config.CPUCT, [nn])
    
    trained_agent.evaluate_accuracy(memories[0].ltmemory, 0)
    
    quit()
                
    

if arg == "pred_test":
    nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + game.grid_shape, [1],\
                          config.HIDDEN_CNN_LAYERS, 0)
    player = Agent('player', game.state_size, config.MCTS_SIMS, config.CPUCT, [nn])
    
    print(player.predict_value(game.gameState))

예제 #14

0

파일 보기

파일: run.py 프로젝트: PhillipMerritt/Alpha_Domino

                               config.HIDDEN_CNN_LAYERS)
else:
    current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                              PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
    best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                           PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
    opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)

best_player_version = 0
# If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) +
          '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER,
                         initialise.INITIAL_MODEL_VERSION)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
# otherwise just ensure the weights on the two players are the same
else:
    best_NN.model.set_weights(current_NN.model.get_weights())

# copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')

plot_model(current_NN.model,
           to_file=run_folder + 'models/model.png',
           show_shapes=True)

print('\n')

예제 #15

0

파일 보기

파일: main.py 프로젝트: benny-yau/AlphaZero-PyDotNet

import config

memory = Memory(config.MEMORY_SIZE)

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, Game.InputShape, Game.ActionSize, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, Game.InputShape, Game.ActionSize, config.HIDDEN_CNN_LAYERS)

#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = current_NN.read(Game.Name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    m_tmp = best_NN.read(Game.Name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
#plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')

######## CREATE THE PLAYERS ########

예제 #16

0

파일 보기

파일: playing.py 프로젝트: PallaviYenigalla/Generalized-Reinforced-Learning-Algorithms-for-games-by-self--play

class Playing:
    def __init__(self,
                 env,
                 run_version,
                 player1version,
                 player2version,
                 EPISODES,
                 logger,
                 turns_until_tau0,
                 goes_first=0):
        self.EPISODES = EPISODES
        self.turns_until_tau0 = turns_until_tau0
        self.logger = logger
        self.goes_first = goes_first

        if player1version == -1:
            self.player1 = User('player1', env.state_size, env.action_size)
        else:
            self.player1_NN = Residual_CNN(config.REG_CONST,
                                           config.LEARNING_RATE,
                                           env.input_shape, env.action_size,
                                           config.HIDDEN_CNN_LAYERS)

            if player1version > 0:
                self.player1_network = self.player1_NN.read(
                    env.name, run_version, player1version)
                self.player1_NN.model.set_weights(
                    self.player1_network.get_weights())
            self.player1 = Agent('player1', env.state_size, env.action_size,
                                 config.MCTS_SIMS, config.CPUCT,
                                 self.player1_NN)

        if player2version == -1:
            self.player2 = User('player2', env.state_size, env.action_size)
        else:
            self.player2_NN = Residual_CNN(config.REG_CONST,
                                           config.LEARNING_RATE,
                                           env.input_shape, env.action_size,
                                           config.HIDDEN_CNN_LAYERS)

            if player2version > 0:
                self.player2_network = self.player2_NN.read(
                    env.name, run_version, player2version)
                self.player2_NN.model.set_weights(
                    self.player2_network.get_weights())
            self.player2 = Agent('player2', env.state_size, env.action_size,
                                 config.MCTS_SIMS, config.CPUCT,
                                 self.player2_NN)

    def play_one_game(self, e):

        self.logger.info('====================')
        self.logger.info('EPISODE %d OF %d', e + 1, self.EPISODES)
        self.logger.info('====================')
        goes_first = self.goes_first
        player1 = self.player1
        logger = self.logger
        player2 = self.player2
        env = self.env
        turns_until_tau0 = self.turns_until_tau0
        memory = self.memory
        scores = self.scores
        sp_scores = self.sp_scores
        points = self.points

        print(str(e + 1) + ' ', end='')

        state = env.reset()

        done = 0
        turn = 0
        player1.mcts = None
        player2.mcts = None

        if goes_first == 0:
            player1Starts = random.randint(0, 1) * 2 - 1
        else:
            player1Starts = goes_first

        if player1Starts == 1:
            players = {
                1: {
                    "agent": player1,
                    "name": player1.name
                },
                -1: {
                    "agent": player2,
                    "name": player2.name
                }
            }
            logger.info(player1.name + ' plays as X')
        else:
            players = {
                1: {
                    "agent": player2,
                    "name": player2.name
                },
                -1: {
                    "agent": player1,
                    "name": player1.name
                }
            }
            logger.info(player2.name + ' plays as X')
            logger.info('--------------')

        env.gameState.render(logger)

        while done == 0:
            turn = turn + 1

            #### Run the MCTS algo and return an action
            if turn < turns_until_tau0:
                action, pi, MCTS_value, NN_value = players[
                    state.playerTurn]['agent'].act(state, 1)
            else:
                action, pi, MCTS_value, NN_value = players[
                    state.playerTurn]['agent'].act(state, 0)

            if action == "restart":
                break

            if memory != None:
                ####Commit the move to memory
                memory.commit_stmemory(env.identities, state, pi)

            logger.info('action: %d', action)
            for r in range(env.grid_shape[0]):
                logger.info([
                    '----' if x == 0 else '{0:.2f}'.format(np.round(x, 2))
                    for x in pi[env.grid_shape[1] * r:(env.grid_shape[1] * r +
                                                       env.grid_shape[1])]
                ])
            # logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(MCTS_value,2))
            # logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(NN_value,2))
            logger.info('====================')

            ### Do the action
            state, value, done, _ = env.step(
                action
            )  # the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move
            print("player turn", env.gameState.playerTurn)
            print(env.gameState.board)
            if env.gameState.playerTurn == -1:
                f = open("./communicate/output.txt", "w")
                temp_board = [str(x) for x in env.gameState.board]
                f.write(",".join(temp_board))
                f.close()
            env.gameState.render(logger)

            if done == 1:
                if memory != None:
                    #### If the game is finished, assign the values correctly to the game moves
                    for move in memory.stmemory:
                        if move['playerTurn'] == state.playerTurn:
                            move['value'] = value
                        else:
                            move['value'] = -value

                    memory.commit_ltmemory()

                if value == 1:
                    logger.info('%s WINS!', players[state.playerTurn]['name'])
                    print('%s WINS!' % (players[state.playerTurn]['name']))
                    scores[players[state.playerTurn]['name']] = scores[players[
                        state.playerTurn]['name']] + 1
                    if state.playerTurn == 1:
                        sp_scores['sp'] = sp_scores['sp'] + 1
                    else:
                        sp_scores['nsp'] = sp_scores['nsp'] + 1

                elif value == -1:
                    logger.info('%s WINS!', players[-state.playerTurn]['name'])
                    print('%s WINS!' % (players[-state.playerTurn]['name']))
                    scores[players[-state.playerTurn]['name']] = scores[
                        players[-state.playerTurn]['name']] + 1

                    if state.playerTurn == 1:
                        sp_scores['nsp'] = sp_scores['nsp'] + 1
                    else:
                        sp_scores['sp'] = sp_scores['sp'] + 1

                else:
                    logger.info('DRAW...')
                    print("DRAW")
                    scores['drawn'] = scores['drawn'] + 1
                    sp_scores['drawn'] = sp_scores['drawn'] + 1

                pts = state.score
                points[players[state.playerTurn]['name']].append(pts[0])
                points[players[-state.playerTurn]['name']].append(pts[1])

    def playMatches(self):

        self.env = Game()
        self.scores = {self.player1.name: 0, "drawn": 0, self.player2.name: 0}
        self.sp_scores = {'sp': 0, "drawn": 0, 'nsp': 0}
        self.points = {self.player1.name: [], self.player2.name: []}

        for e in range(self.EPISODES):
            self.play_one_game(e)

        return (self.scores, self.memory, self.points, self.sp_scores)


#
# if __name__ == '__main__':
#     env = Game()
#     playing = Playing(env, 1, 1, -1, 10, lg.logger_tourney, 0)
#     playing.playMatches()

예제 #17

0

파일 보기

파일: funcs.py 프로젝트: scsukas8/DeepReinforcementLearning

def playBattleSnake(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0):
    

    # Initialize Trained Snake
    player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

    if player1version > 0:
        player1_network = player1_NN.read(env.name, run_version, player1version)
        player1_NN.model.set_weights(player1_network.get_weights())   
    player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN)


    # Initialize BattleSnake Model
    player2 = BattleSnake('player2')

    # Initialize Memory
    memory = None

    # Initialize Game state
    env = Game()
    scores = {player1.name:0, "drawn": 0, player2.name:0}
    sp_scores = {'sp':0, "drawn": 0, 'nsp':0}
    points = {player1.name:[], player2.name:[]}


    state = env.reset()
    
    done = 0
    turn = 0
    player1.mcts = None
    player2.mcts = None

    if goes_first == 0:
        player1Starts = random.randint(0,1) * 2 - 1
    else:
        player1Starts = goes_first

    if player1Starts == 1:
        players = {1:{"agent": player1, "name":player1.name}
                , -1: {"agent": player2, "name":player2.name}
                }
        logger.info(player1.name + ' plays as X')
    else:
        players = {1:{"agent": player2, "name":player2.name}
                , -1: {"agent": player1, "name":player1.name}
                }
        logger.info(player2.name + ' plays as X')
        logger.info('--------------')

    env.gameState.render(logger)

    while done == 0:
        turn = turn + 1

        #### Run the MCTS algo and return an action
        if turn < turns_until_tau0:
            action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 1)
        else:
            action, pi, MCTS_value, NN_value = players[state.playerTurn]['agent'].act(state, 0)

        if memory != None:
            ####Commit the move to memory
            memory.commit_stmemory(env.identities, state, pi)


        #logger.info('action: %d', action)
        #for r in range(env.grid_shape[0]):
        #    logger.info(['----' if x == 0 else '{0:.2f}'.format(np.round(x,2)) for x in pi[env.grid_shape[1]*r : (env.grid_shape[1]*r + env.grid_shape[1])]])
        #logger.info('MCTS perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(MCTS_value,2))
        #logger.info('NN perceived value for %s: %f', state.pieces[str(state.playerTurn)] ,np.round(NN_value,2))
        #logger.info('====================')

        ### Do the action
        state, value, done, _ = env.step(action) #the value of the newState from the POV of the new playerTurn i.e. -1 if the previous player played a winning move
        
        env.gameState.render(logger)

        if done == 1: 
            if memory != None:
                #### If the game is finished, assign the values correctly to the game moves
                for move in memory.stmemory:
                    if move['playerTurn'] == state.playerTurn:
                        move['value'] = value
                    else:
                        move['value'] = -value
                     
                memory.commit_ltmemory()
         
            if value == 1:
                logger.info('%s WINS!', players[state.playerTurn]['name'])
                scores[players[state.playerTurn]['name']] = scores[players[state.playerTurn]['name']] + 1
                if state.playerTurn == 1: 
                    sp_scores['sp'] = sp_scores['sp'] + 1
                else:
                    sp_scores['nsp'] = sp_scores['nsp'] + 1

            elif value == -1:
                logger.info('%s WINS!', players[-state.playerTurn]['name'])
                scores[players[-state.playerTurn]['name']] = scores[players[-state.playerTurn]['name']] + 1
           
                if state.playerTurn == 1: 
                    sp_scores['nsp'] = sp_scores['nsp'] + 1
                else:
                    sp_scores['sp'] = sp_scores['sp'] + 1

            else:
                logger.info('DRAW...')
                scores['drawn'] = scores['drawn'] + 1
                sp_scores['drawn'] = sp_scores['drawn'] + 1

            pts = state.score
            points[players[state.playerTurn]['name']].append(pts[0])
            points[players[-state.playerTurn]['name']].append(pts[1])

    return (scores, memory, points, sp_scores)

예제 #18

0

파일 보기

파일: retraining_worker.py 프로젝트: PhillipMerritt/Alpha_Domino

def retraining_worker(conn):
    from game import Game
    import initialise
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, BATCH_SIZE, TRAINING_LOOPS
    from model import Residual_CNN, import_tf
    import_tf(1024 * 2)
    import numpy as np
    import time
    
    env = Game()
    
    ######## LOAD MODEL IF NECESSARY ########

    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1,) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE),
                            config.HIDDEN_CNN_LAYERS)
    else:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT,
                            config.HIDDEN_CNN_LAYERS)
    
    # If loading an existing neural netwrok, set the weights from that model
    if initialise.INITIAL_MODEL_VERSION != None:
        m_tmp = current_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION)
        current_NN.model.set_weights(m_tmp.get_weights())
        
    train_overall_loss = []
        
    while 1:
        # request memory samples
        conn.send((TRAINING_LOOPS, BATCH_SIZE))
        
        # wait for memory samples
        conn.poll(None)
        data = conn.recv()
        
        if data:
            # train on sampled memories
            for i, minibatch in enumerate(data):
                training_states = np.array([current_NN.convertToModelInput(row['state']) for row in minibatch])
                training_targets = {'value_head': np.array([row['value'] for row in minibatch])}
                
                fit = current_NN.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1,\
                                        validation_split=0, batch_size=32)
                
                if i == 0:
                    init_loss = fit.history['loss'][0]
                
                train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1], 4))
            
            """display.clear_output(wait=True)
            display.display(pl.gcf())
            pl.gcf().clear()
            time.sleep(.25)

            print('\n')
            current_NN.printWeightAverages()

            print("Max = {0}, Min = {1}, latest = {2}".format(max(self.train_overall_loss), min(self.train_overall_loss), self.train_overall_loss[-1]))
            print("Loss reduction: {}".format(init_loss - fit.history['loss'][0]))"""
            
            # send new current_NN weights
            conn.send((current_NN.model.get_weights(), train_overall_loss[-1]))
        else:
            time.sleep(10)

예제 #19

0

파일 보기

move39 = move41.copy()

all_dict = [move38, move39, move41, move37, move40]
moves = ['24', '32', '41', '37', '40']
j = 0

state = GameState(
    np.array([
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, 0, 0, 0, 1, 1, 0, 0
    ],
             dtype=np.int), 1)

# Each versions must predict the best move, we plot the prediction score against the version number
for player_idx, _ in enumerate(version_list_CNN):
    m_tmp = player1_NN.read(initialise.INITIAL_RUN_NUMBER, player_idx + 1)
    player1_NN.model.set_weights(m_tmp.get_weights())
    player1 = Agent('player1', env.state_size, env.action_size,
                    config.MCTS_SIMS, config.CPUCT, player1_NN)

    # m_tmp = player2_NN.read(initialise.INITIAL_RUN_NUMBER, player_idx + 1)
    # player2_NN.model.set_weights(m_tmp.get_weights())
    # player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN)

    move37[player_idx] = player1.get_preds(state)[0]
    # move40[player_idx] = player1.get_preds(state)[1][40]
    # move41[player_idx] = player1.get_preds(state)[1][41]
    # move38[player_idx] = player1.get_preds(state)[1][24]
    # move39[player_idx] = player1.get_preds(state)[1][32]

    plt.figure()

예제 #20

0

파일 보기

파일: main.py 프로젝트: AlexandreAdam/DeepReinforcementLearning

            str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb"))

# Create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                          (2, ) + env.grid_shape, env.action_size,
                          config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                       (2, ) + env.grid_shape, env.action_size,
                       config.HIDDEN_CNN_LAYERS)

# Load existing neural network if needed
if initialise.INITIAL_MODEL_VERSION is not None:
    best_player_version = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) +
          '...')
    m_tmp = best_NN.read(initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())

# Otherwise ensure the initial weights are the same for both players
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

print('\n')
print(current_NN.model.summary())

# Create players
current_player = Agent('current_player', env.state_size, env.action_size,
                       config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size,

예제 #21

0

파일 보기

파일: main.py 프로젝트: Hidysabc/DeepReinforcementLearning

    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

print('\n')

######## CREATE THE PLAYERS ########