Exemple #1
0
def train(n, batch_number, epoch):
    print("----Training model on all experience gathered----")
    old_agent_filename = 'agent/old_agent2/agent2.hdf5'
    updated_agent_filename = 'agent/new_agent2/agent2.hdf5'
    for_archive = 'agent/agent2_archive/agent2_{}.hdf5'.format(n)
    #batch_size=1000
    batch_size = batch_number
    #agent_white = load_policy_agent(h5file_dir=white_agent_dir)
    #agent_black = load_policy_agent(h5file_dir=black_agent_dir)
    experience_array = []
    main_dir = 'experience/'
    exp_dirs = os.listdir(main_dir)
    for exp_dir in exp_dirs:
        print(exp_dir)
        path = main_dir + exp_dir
        for file in os.listdir(path):
            filename = path + '/' + file
            experience_array.append(load_experience(h5py.File(filename)))

    old_agent = load_policy_agent(updated_agent_filename)
    old_agent.serialize(old_agent_filename)

    agent = load_policy_agent(updated_agent_filename)
    exp_buffer = Experience_Collector()
    final_buffer = exp_buffer.combine_buffer_array(experience_array)
    print("Length of final buffer states: {}".format(
        final_buffer.states.shape))
    #exp_buffer = self.load_experience(h5py.File(exp_filename))

    #self.agent_white.train(exp_buffer,lr,batch_size)
    agent.train(final_buffer, batch_size, epoch, n)

    agent.serialize(updated_agent_filename)
    agent.serialize(for_archive)
Exemple #2
0
    def __init__(self, position, gameSize):
        Player.__init__(self, position, gameSize)
        self.encoder = self.get_encoder_by_name('SeegaEncoder_Board', gameSize)
        self.model = load_model('seega_model.h5')
        self.num_encoded_layers = 9
        self.row = gameSize
        self.col = gameSize
        self.player_to_color = {1: 'black', -1: 'white'}
        self.color_to_player = {'black': 1, 'white': -1}

        agent_dir = 'agent/new_agent2/agent2.hdf5'

        # Create the game agents
        if os.path.exists(agent_dir) != True:
            print("Creating new model")
            self.encoder = SeegaEncoder_Board(gameSize)

            board_input = Input(shape=(self.encoder.board_height(),
                                       self.encoder.board_width(),
                                       self.encoder.layers_encode),
                                name='board_input')

            conv1 = Conv2D(128, (2, 2), padding='valid',
                           strides=1)(board_input)
            conv2 = Conv2D(128, (2, 2), padding='same',
                           activation='relu')(conv1)
            #conv2 = Dropout(0.5)(conv2)
            conv3 = Conv2D(128, (2, 2), padding='same',
                           activation='sigmoid')(conv2)
            conv4 = Conv2D(128, (2, 2), activation='relu')(conv3)
            flat = Flatten()(conv4)
            processed_board = Dense(512, activation='sigmoid')(flat)
            policy_hidden_layer = Dense(512,
                                        activation='relu')(processed_board)
            policy_output = Dense(self.encoder.num_moves(),
                                  activation='softmax')(policy_hidden_layer)

            value_hidden_layer = Dense(512, activation='relu')(processed_board)
            value_output = Dense(1, activation='tanh')(value_hidden_layer)

            model = Model(inputs=board_input,
                          outputs=[policy_output, value_output])

            self.agent_white = SeegaAgent(model, self.encoder)
            self.agent_black = SeegaAgent(model, self.encoder)
            # Save agent players
            self.agent_white.serialize(agent_dir)
            self.agent_black.serialize(agent_dir)
        else:
            self.agent_white = self.load_policy_agent(h5file_dir=agent_dir)
            self.agent_black = self.load_policy_agent(h5file_dir=agent_dir)

        self.collector_white = Experience_Collector()
        self.collector_black = Experience_Collector()

        self.agent_white = self.load_policy_agent(h5file_dir=agent_dir)
        self.agent_black = self.load_policy_agent(h5file_dir=agent_dir)

        self.agent_black.set_collector(self.collector_black)
        self.agent_white.set_collector(self.collector_white)
Exemple #3
0
    def __init__(self, position, gameSize):
        Player.__init__(self, position, gameSize)
        self.encoder = self.get_encoder_by_name('SeegaEncoder_Board', gameSize)
        self.model = load_model('seega_model.h5')
        self.num_encoded_layers = 4
        self.row = gameSize
        self.col = gameSize
        self.player_to_color = {1: 'black', -1: 'white'}
        self.color_to_player = {'black': 1, 'white': -1}

        agent_dir = 'agent/default_agent/agent_8.hdf5'
        '''
        # Create the game agents
        if os.path.exists(black_agent_dir) != True or os.path.exists(white_agent_dir) != True:
            self.encoder = SeegaEncoder_Board(gameSize)
            input_shape = (self.encoder.board_height(), self.encoder.board_width(), self.encoder.layers_encode)
            model = Sequential()
            model.add(layers.Conv2D(100, (2, 2), padding='same', data_format="channels_last", input_shape=input_shape))
            # model.add(BatchNormalization())
            # model.add(layers.MaxPooling2D((2, 2)))
            # model.add(layers.Conv2D(100, (2, 2), padding='same', activation='relu'))
            # model.add(layers.MaxPooling2D((1, )))
            model.add(layers.Conv2D(128, (2, 2), padding='same', activation='relu'))
            # model.add(BatchNormalization())
            # model.add(layers.MaxPooling2D((2, 2)))
            model.add(layers.Conv2D(256, (2, 2), activation='relu'))
            model.add(layers.Conv2D(512, (2, 2), activation='relu'))
            # model.add(BatchNormalization())
            # model.add(layers.MaxPooling2D((2, 2)))
            model.add(layers.Flatten())
            # model.add(layers.Dropout(0.5))
            # model.add(layers.Dense(100, activation='relu'))
            # model.add(BatchNormalization())
            # model.add(layers.Dropout(0.2))
            model.add(layers.Dense(self.encoder.num_moves(), activation='sigmoid'))
            model.add(layers.Dense(self.encoder.num_moves(), activation='softmax'))

            self.agent_white = SeegaAgent(model, self.encoder)
            self.agent_black = SeegaAgent(model, self.encoder)
            # Save agent players
            self.agent_white.serialize(white_agent_dir)
            self.agent_black.serialize(black_agent_dir)
        else:
            self.agent_white = self.load_policy_agent(h5file_dir=white_agent_dir)
            self.agent_black = self.load_policy_agent(h5file_dir=black_agent_dir)

        '''

        self.collector_white = Experience_Collector()
        self.collector_black = Experience_Collector()

        self.agent_white = self.load_policy_agent(h5file_dir=agent_dir)
        self.agent_black = self.load_policy_agent(h5file_dir=agent_dir)

        self.agent_black.set_collector(self.collector_black)
        self.agent_white.set_collector(self.collector_white)
Exemple #4
0
    def load_agent(self, white_agent_dir, black_agent_dir):
        print("---USING NEW MODEL----")
        self.agent_white = self.load_policy_agent(h5file_dir=white_agent_dir)
        self.agent_black = self.load_policy_agent(h5file_dir=black_agent_dir)

        self.collector_white = Experience_Collector()
        self.collector_black = Experience_Collector()

        self.agent_black.set_collector(self.collector_black)
        self.agent_white.set_collector(self.collector_white)
Exemple #5
0
                             advantages=h5file['experience']['advantages'])


print("----Training model on all experience gathered----")
updated_agent_filename = 'agent/default_agent/agent2_2.hdf5'

lr = 0.0123
batch_size = 500

#agent_white = load_policy_agent(h5file_dir=white_agent_dir)
#agent_black = load_policy_agent(h5file_dir=black_agent_dir)
experience_array = []
main_dir = 'old_experience/old_exp2/'
exp_dirs = os.listdir(main_dir)
for exp_dir in exp_dirs:
    print(exp_dir)
    path = main_dir + exp_dir
    for file in os.listdir(path):
        filename = path + '/' + file
        experience_array.append(load_experience(h5py.File(filename)))

agent = load_policy_agent('agent/default_agent/agent1.hdf5')
exp_buffer = Experience_Collector()
final_buffer = exp_buffer.combine_buffer_array(experience_array)
print("Length of final buffer states: {}".format(final_buffer.states.shape))
#exp_buffer = self.load_experience(h5py.File(exp_filename))

#self.agent_white.train(exp_buffer,lr,batch_size)
agent.train(final_buffer, batch_size)
agent.serialize(updated_agent_filename)