def train(n, batch_number, epoch): print("----Training model on all experience gathered----") old_agent_filename = 'agent/old_agent2/agent2.hdf5' updated_agent_filename = 'agent/new_agent2/agent2.hdf5' for_archive = 'agent/agent2_archive/agent2_{}.hdf5'.format(n) #batch_size=1000 batch_size = batch_number #agent_white = load_policy_agent(h5file_dir=white_agent_dir) #agent_black = load_policy_agent(h5file_dir=black_agent_dir) experience_array = [] main_dir = 'experience/' exp_dirs = os.listdir(main_dir) for exp_dir in exp_dirs: print(exp_dir) path = main_dir + exp_dir for file in os.listdir(path): filename = path + '/' + file experience_array.append(load_experience(h5py.File(filename))) old_agent = load_policy_agent(updated_agent_filename) old_agent.serialize(old_agent_filename) agent = load_policy_agent(updated_agent_filename) exp_buffer = Experience_Collector() final_buffer = exp_buffer.combine_buffer_array(experience_array) print("Length of final buffer states: {}".format( final_buffer.states.shape)) #exp_buffer = self.load_experience(h5py.File(exp_filename)) #self.agent_white.train(exp_buffer,lr,batch_size) agent.train(final_buffer, batch_size, epoch, n) agent.serialize(updated_agent_filename) agent.serialize(for_archive)
def __init__(self, position, gameSize): Player.__init__(self, position, gameSize) self.encoder = self.get_encoder_by_name('SeegaEncoder_Board', gameSize) self.model = load_model('seega_model.h5') self.num_encoded_layers = 9 self.row = gameSize self.col = gameSize self.player_to_color = {1: 'black', -1: 'white'} self.color_to_player = {'black': 1, 'white': -1} agent_dir = 'agent/new_agent2/agent2.hdf5' # Create the game agents if os.path.exists(agent_dir) != True: print("Creating new model") self.encoder = SeegaEncoder_Board(gameSize) board_input = Input(shape=(self.encoder.board_height(), self.encoder.board_width(), self.encoder.layers_encode), name='board_input') conv1 = Conv2D(128, (2, 2), padding='valid', strides=1)(board_input) conv2 = Conv2D(128, (2, 2), padding='same', activation='relu')(conv1) #conv2 = Dropout(0.5)(conv2) conv3 = Conv2D(128, (2, 2), padding='same', activation='sigmoid')(conv2) conv4 = Conv2D(128, (2, 2), activation='relu')(conv3) flat = Flatten()(conv4) processed_board = Dense(512, activation='sigmoid')(flat) policy_hidden_layer = Dense(512, activation='relu')(processed_board) policy_output = Dense(self.encoder.num_moves(), activation='softmax')(policy_hidden_layer) value_hidden_layer = Dense(512, activation='relu')(processed_board) value_output = Dense(1, activation='tanh')(value_hidden_layer) model = Model(inputs=board_input, outputs=[policy_output, value_output]) self.agent_white = SeegaAgent(model, self.encoder) self.agent_black = SeegaAgent(model, self.encoder) # Save agent players self.agent_white.serialize(agent_dir) self.agent_black.serialize(agent_dir) else: self.agent_white = self.load_policy_agent(h5file_dir=agent_dir) self.agent_black = self.load_policy_agent(h5file_dir=agent_dir) self.collector_white = Experience_Collector() self.collector_black = Experience_Collector() self.agent_white = self.load_policy_agent(h5file_dir=agent_dir) self.agent_black = self.load_policy_agent(h5file_dir=agent_dir) self.agent_black.set_collector(self.collector_black) self.agent_white.set_collector(self.collector_white)
def __init__(self, position, gameSize): Player.__init__(self, position, gameSize) self.encoder = self.get_encoder_by_name('SeegaEncoder_Board', gameSize) self.model = load_model('seega_model.h5') self.num_encoded_layers = 4 self.row = gameSize self.col = gameSize self.player_to_color = {1: 'black', -1: 'white'} self.color_to_player = {'black': 1, 'white': -1} agent_dir = 'agent/default_agent/agent_8.hdf5' ''' # Create the game agents if os.path.exists(black_agent_dir) != True or os.path.exists(white_agent_dir) != True: self.encoder = SeegaEncoder_Board(gameSize) input_shape = (self.encoder.board_height(), self.encoder.board_width(), self.encoder.layers_encode) model = Sequential() model.add(layers.Conv2D(100, (2, 2), padding='same', data_format="channels_last", input_shape=input_shape)) # model.add(BatchNormalization()) # model.add(layers.MaxPooling2D((2, 2))) # model.add(layers.Conv2D(100, (2, 2), padding='same', activation='relu')) # model.add(layers.MaxPooling2D((1, ))) model.add(layers.Conv2D(128, (2, 2), padding='same', activation='relu')) # model.add(BatchNormalization()) # model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Conv2D(256, (2, 2), activation='relu')) model.add(layers.Conv2D(512, (2, 2), activation='relu')) # model.add(BatchNormalization()) # model.add(layers.MaxPooling2D((2, 2))) model.add(layers.Flatten()) # model.add(layers.Dropout(0.5)) # model.add(layers.Dense(100, activation='relu')) # model.add(BatchNormalization()) # model.add(layers.Dropout(0.2)) model.add(layers.Dense(self.encoder.num_moves(), activation='sigmoid')) model.add(layers.Dense(self.encoder.num_moves(), activation='softmax')) self.agent_white = SeegaAgent(model, self.encoder) self.agent_black = SeegaAgent(model, self.encoder) # Save agent players self.agent_white.serialize(white_agent_dir) self.agent_black.serialize(black_agent_dir) else: self.agent_white = self.load_policy_agent(h5file_dir=white_agent_dir) self.agent_black = self.load_policy_agent(h5file_dir=black_agent_dir) ''' self.collector_white = Experience_Collector() self.collector_black = Experience_Collector() self.agent_white = self.load_policy_agent(h5file_dir=agent_dir) self.agent_black = self.load_policy_agent(h5file_dir=agent_dir) self.agent_black.set_collector(self.collector_black) self.agent_white.set_collector(self.collector_white)
def load_agent(self, white_agent_dir, black_agent_dir): print("---USING NEW MODEL----") self.agent_white = self.load_policy_agent(h5file_dir=white_agent_dir) self.agent_black = self.load_policy_agent(h5file_dir=black_agent_dir) self.collector_white = Experience_Collector() self.collector_black = Experience_Collector() self.agent_black.set_collector(self.collector_black) self.agent_white.set_collector(self.collector_white)
advantages=h5file['experience']['advantages']) print("----Training model on all experience gathered----") updated_agent_filename = 'agent/default_agent/agent2_2.hdf5' lr = 0.0123 batch_size = 500 #agent_white = load_policy_agent(h5file_dir=white_agent_dir) #agent_black = load_policy_agent(h5file_dir=black_agent_dir) experience_array = [] main_dir = 'old_experience/old_exp2/' exp_dirs = os.listdir(main_dir) for exp_dir in exp_dirs: print(exp_dir) path = main_dir + exp_dir for file in os.listdir(path): filename = path + '/' + file experience_array.append(load_experience(h5py.File(filename))) agent = load_policy_agent('agent/default_agent/agent1.hdf5') exp_buffer = Experience_Collector() final_buffer = exp_buffer.combine_buffer_array(experience_array) print("Length of final buffer states: {}".format(final_buffer.states.shape)) #exp_buffer = self.load_experience(h5py.File(exp_filename)) #self.agent_white.train(exp_buffer,lr,batch_size) agent.train(final_buffer, batch_size) agent.serialize(updated_agent_filename)