class PolicyValueNet(): """policy-value network """ def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty self.create_policy_value_net() if model_file: print("[Notice] load model from file") self.model = load_model(model_file) else: print("[Notice] create model") self.create_policy_value_net() self._loss_train_op() def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((4, self.board_width, self.board_height)) # conv layers network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs, value = self.policy_value( current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) return act_probs, value[0][0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model to file """ print("save model file") self.model.save(model_file)
datagen_for_validation = ImageDataGenerator( zca_whitening=True, # apply ZCA whitening zca_epsilon=1e-06 # epsilon for ZCA whitening ) # Compute quantities required for feature-wise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen_for_validation.fit(x_test) """ datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), epochs=epochs, callbacks=[lr_cb], validation_data=(x_test, y_test, batch_size), validation_steps=100, verbose=1) # Save model and weights if not os.path.isdir(save_dir): os.makedirs(save_dir) model_path = os.path.join(save_dir, model_name) model.save(model_path) print('Saved trained model at %s ' % model_path) # Score trained model. scores = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1])
cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(dropouted) cnn = Convolution1D(filters=50, kernel_size=3, activation='tanh')(cnn) flattened = Flatten()(cnn) dense = Dense(100, activation='tanh')(flattened) predict = Dense(2, activation='softmax')(dense) model = Model(input=[word, distance_e1, distance_e2], output=predict) # opt = RMSprop(lr=0.001, rho=0.9, epsilon=1e-06) # opt = Adagrad(lr=0.01, epsilon=1e-06) # opt = Adadelta(lr=1.0, rho=0.95, epsilon=1e-06) # opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=opt) train_instances = [line.strip() for line in lines] label_array_t, word_array_t, dis_e1_array_t, dis_e2_array_t = rep.represent_instances( train_instances) model.fit([word_array_t, dis_e1_array_t, dis_e2_array_t], label_array_t, batch_size=128, epochs=epoch_size) model.save(output_file) label_array_ans = model.predict([word_array_t, dis_e1_array_t, dis_e2_array_t], batch_size=128) print(label_array_ans) print("训练完成!!") eval_mulclass(label_array_t, label_array_ans)
class GAN(Model): """ Generative Adversarial Network (GAN). """ def __init__(self, generator, discriminator): super(GAN, self).__init__() assert generator != None assert discriminator != None assert discriminator.optimizer != None, "Discriminator must be compiled!" self.generator = generator self.discriminator = discriminator # Create the GAN. z_shape = generator.inputs[0].shape[1:] gan_input = layers.Input(shape=z_shape) gan_output = gan_input gan_output = self.generator(gan_output) self.discriminator.trainable = False gan_output = self.discriminator(gan_output) self.gan = Model(gan_input, gan_output) def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the model. Same as vanilla Keras. """ self.gan.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit( self, x=None, y=None, batch_size=None, epochs=1, sample_interval=None, # TODO document! verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ Trains the GAN. This is almost the same as in vanilla Keras. """ # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # Select a random batch of images idx = np.random.randint(0, x.shape[0], batch_size) imgs = x[idx] # Create some noise. noise = np.random.normal(0, 1, (batch_size, 100)) # Generate a batch of new images. gen_imgs = self.generator.predict(noise) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(imgs, valid) d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # Create some noise. noise = np.random.normal(0, 1, (batch_size, 100)) # Train the generator (to have the discriminator label samples as valid). g_loss = self.gan.train_on_batch(noise, valid) if type(g_loss) == list: g_loss = g_loss[0] # Plot the progress. print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss), end="\r") # If at save interval => save generated image samples if sample_interval != None and epoch % sample_interval == 0: self.sample_images(epoch) def sample_images(self, epoch): """ Samples images. """ r, c = 5, 5 noise = np.random.normal(0, 1, (r * c, 100)) gen_imgs = self.generator.predict(noise) # Rescale images 0 - 1 gen_imgs = 0.5 * gen_imgs + 0.5 fig, axs = plt.subplots(r, c) cnt = 0 for i in range(r): for j in range(c): axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') axs[i, j].axis('off') cnt += 1 #fig.savefig("images/%d.png" % epoch) plt.show() plt.close() def summary(self): """ Provides a summary. """ print("Generator:") self.generator.summary() print("Discriminator:") self.discriminator.summary() print("GAN:") self.gan.summary() def save(self, path): """ Saves the GAN. This includes the whole autoencoder plus the encoder and the decoder. The encoder and decoder use the path plus a respective annotation. This code >>> ae.save("myae.h5") will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*. """ self.gan.save(path) self.generator.save(append_to_filepath(path, "-generator")) self.discriminator.save(append_to_filepath(path, "-discriminator"))
def multi_gpu_model(model, gpus): """Replicates a model on different GPUs. Specifically, this function implements single-machine multi-GPU data parallelism. It works in the following way: - Divide the model's input(s) into multiple sub-batches. - Apply a model copy on each sub-batch. Every model copy is executed on a dedicated GPU. - Concatenate the results (on CPU) into one big batch. E.g. if your `batch_size` is 64 and you use `gpus=2`, then we will divide the input into 2 sub-batches of 32 samples, process each sub-batch on one GPU, then return the full batch of 64 processed samples. This induces quasi-linear speedup on up to 8 GPUs. This function is only available with the TensorFlow backend for the time being. # Arguments model: A Keras model instance. To avoid OOM errors, this model could have been built on CPU, for instance (see usage example below). gpus: Integer >= 2, number of on GPUs on which to create model replicas. # Returns A Keras `Model` instance which can be used just like the initial `model` argument, but which distributes its workload on multiple GPUs. # Example ```python import tensorflow as tf from keras.applications import Xception from keras.utils import multi_gpu_model import numpy as np num_samples = 1000 height = 224 width = 224 num_classes = 1000 # Instantiate the base model # (here, we do it on CPU, which is optional). with tf.device('/cpu:0'): model = Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Generate dummy data. x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) ``` """ if K.backend() != 'tensorflow': raise ValueError('`multi_gpu_model` is only available ' 'with the TensorFlow backend.') if gpus <= 1: return model # raise ValueError('For multi-gpu usage to be effective, ' # 'call `multi_gpu_model` with `gpus >= 2`. ' # 'Received: `gpus=%d`' % gpus) target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in range(gpus)] available_devices = _get_available_devices() available_devices = [ _normalize_device_name(name) for name in available_devices ] for device in target_devices: if device not in available_devices: raise ValueError( 'To call `multi_gpu_model` with `gpus=%d`, ' 'we expect the following devices to be available: %s. ' 'However this machine only has: %s. ' 'Try reducing `gpus`.' % (gpus, target_devices, available_devices)) def get_slice(data, i, parts): shape = tf.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == gpus - 1: size = batch_size - step * i else: size = step size = tf.concat([size, input_shape], axis=0) stride = tf.concat([step, input_shape * 0], axis=0) start = stride * i return tf.slice(data, start, size) all_outputs = [] for i in range(len(model.outputs)): all_outputs.append([]) # Place a copy of the model on each GPU, # each getting a slice of the inputs. for i in range(gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('replica_%d' % i): inputs = [] # Retrieve a slice of the input. for x in model.inputs: input_shape = tuple(x.get_shape().as_list())[1:] slice_i = Lambda(get_slice, output_shape=input_shape, arguments={ 'i': i, 'parts': gpus })(x) inputs.append(slice_i) # Apply model on slice # (creating a model replica on the target device). outputs = model(inputs) if not isinstance(outputs, list): outputs = [outputs] # Save the outputs for merging back together later. for o in range(len(outputs)): all_outputs[o].append(outputs[o]) # Merge outputs on CPU. with tf.device('/cpu:0'): merged = [] for outputs in all_outputs: merged.append(concatenate(outputs, axis=0)) new_model = Model(model.inputs, outputs=merged) funcType = type(model.save) # monkeypatch the save to save just the underlying model def new_save(self_, filepath, overwrite=True): model.save(filepath, overwrite) new_model.save = funcType(new_save, new_model) return new_model
class AE(Model): """ Autoencoder. This is a simple autoencoder consisting of an encoder and a decoder. You can use the class like this: >>> encoder = ... >>> decoder = ... >>> ae = Autoencoder(encoder=encoder, decoder=decoder) >>> ae.compile(...) >>> ae.fit(...) """ def __init__(self, encoder=None, decoder=None, autoencoder=None): super(AE, self).__init__() # For calling this as a super-constructor. parameters = [encoder, decoder] if all(v is None for v in parameters): return # From loading. if encoder != None and decoder != None and autoencoder != None: self.encoder = encoder self.decoder = decoder self.autoencoder = autoencoder return # Check preconditions. assert len(encoder.outputs) == 1 assert len(decoder.inputs) == 1 assert encoder.outputs[0].shape[1:] == decoder.inputs[0].shape[ 1:], str(encoder.outputs[0].shape) + " " + str( decoder.inputs[0].shape) self.latent_dim = encoder.outputs[0].shape[1] self.encoder = encoder self.decoder = decoder # Creating the AE. inputs = self.encoder.inputs[0] outputs = self.decoder(self.encoder(inputs)) self.autoencoder = Model(inputs, outputs, name='ae') def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, **kwargs): """ Compiles the model. This is the same as compilation in Keras. """ assert "reconstruction_loss" not in kwargs, "Not expected to use reconstruction_loss in AE." self.autoencoder.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ Trains the autoencoder. """ return self.autoencoder.fit(x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs) def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """ Trains the autoencoder with a generator. """ return self.autoencoder.fit_generator( generator, steps_per_epoch, epochs, verbose=verbose, callbacks=callbacks, validation_data=validation_data, validation_steps=validation_steps, class_weight=class_weight, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, shuffle=shuffle, initial_epoch=initial_epoch) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """ Evaluates the autoencoder. """ return self.autoencoder.evaluate(x, y, batch_size, verbose, sample_weight, steps=None) def predict(self, x, batch_size=None, verbose=0, steps=None): """ Does a prediction. This is the same as :func:`~ngdlm.models.AE.predict_reconstruct_from_samples` """ return self.predict_reconstruct_from_samples(x, batch_size, verbose, steps) def predict_reconstruct_from_samples(self, x, batch_size=None, verbose=0, steps=None): """ Reconstructs samples. Samples are firstly mapped to latent space using the encoder. The resulting latent vectors are then mapped to reconstruction space via the decoder. """ return self.autoencoder.predict(x, batch_size, verbose, steps) def predict_embed_samples_into_latent(self, x, batch_size=None, verbose=0, steps=None): """ Embeds samples into latent space using the encoder. """ return self.encoder.predict(x, batch_size, verbose, steps) def predict_reconstruct_from_latent(self, x, batch_size=None, verbose=0, steps=None): """ Maps latent vectors to reconstruction space using the decoder. """ return self.decoder.predict(x, batch_size, verbose, steps) def summary(self): """ Provides a summary. """ print("Encoder:") self.encoder.summary() print("Decoder:") self.decoder.summary() print("Autoencoder:") self.autoencoder.summary() def save(self, path): """ Saves the autoencoder. This includes the whole autoencoder plus the encoder and the decoder. The encoder and decoder use the path plus a respective annotation. This code >>> ae.save("myae.h5") will create the files *myae.h5*, *myae-encoder.h5*, and *myae-decoder.h5*. """ self.autoencoder.save(path) self.encoder.save(append_to_filepath(path, "-encoder")) self.decoder.save(append_to_filepath(path, "-decoder"))
class TL(Model): """ Triplet-Loss trained Neural Network. https://arxiv.org/abs/1503.03832 """ def __init__(self, base=None, siamese=None): super(TL, self).__init__() # Store the base model. assert (base != None) self.base = base # For loading. if base != None and siamese != None: self.base = base self.siamese = siamese self.latent_dim = self.base.outputs[0].shape[1] return # Get the latent dimension. assert len(self.base.outputs) == 1 assert len(self.base.outputs[0].shape) == 2 self.latent_dim = self.base.outputs[0].shape[1] # Get the input shape. input_shape = self.base.inputs[0].shape.as_list()[1:] # Create the anchor. input_anchor = layers.Input(shape=input_shape) output_anchor = input_anchor output_anchor = self.base(output_anchor) # Create the positive. input_positive = layers.Input(shape=input_shape) output_positive = input_positive output_positive = self.base(output_positive) # Create the negative. input_negative = layers.Input(shape=input_shape) output_negative = input_negative output_negative = self.base(output_negative) # Create a dummy output. output = layers.concatenate( [output_anchor, output_positive, output_negative]) # Create the model. self.siamese = Model([input_anchor, input_positive, input_negative], output, name="triplet_model") def compile(self, optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None, triplet_loss="mse", **kwargs): """ Compiles the TL. Additionally to the default functionality of *compile*, it adds the triplet-loss. In order to do so you have to provide it via the parameter *triplet_loss*. The VAE loss is similar to >>> vae_loss = max(0.0, pos_dist - neg_dist + alpha) See the literature for details. Additional args: triplet_loss (string): The base-loss for the triplet-loss. Values are either *euclidean* for euclidean norm or *cosine* for cosine similarity. """ assert loss == None, "Not expected to provide an explicit loss for TL. Use 'triplet_loss'" self.triplet_loss = triplet_loss def triplet_loss_function(y_true, y_pred, alpha=0.4): anchor = y_pred[:, 0:self.latent_dim] positive = y_pred[:, self.latent_dim:self.latent_dim * 2] negative = y_pred[:, self.latent_dim * 2:self.latent_dim * 3] if triplet_loss == "euclidean": pos_dist = euclidean_loss(positive, anchor) neg_dist = euclidean_loss(negative, anchor) elif triplet_loss == "cosine": pos_dist = cosine_loss(positive, anchor) neg_dist = cosine_loss(negative, anchor) else: raise Exception("Unexpected: " + triplet_loss) basic_loss = pos_dist - neg_dist + alpha loss = K.maximum(basic_loss, 0.0) return loss loss = triplet_loss_function self.siamese.compile(optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, **kwargs) def fit(self, x=None, y=None, batch_size=None, minibatch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0., validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, **kwargs): """ This is basically the same as in vanilla Keras. Additional args: minibatch_size (int): The model internally does some sampling. The *minibatch_size* specifies how many candidates to use in order to create a triplet for training. """ assert minibatch_size != None, "ERROR! Must provide 'minibatch_size'." assert steps_per_epoch != None, "ERROR! Must provide 'steps_per_epoch'." assert validation_steps != None, "ERROR! Must provide 'validation_steps'." y_dummy = np.zeros((batch_size, self.latent_dim * 3)) # Template generator. def triplet_loss_generator(x_generator, y_generator, model, sampling): # Get the classes. classes = sorted(list(set(y_generator))) # Sort by classes for easy indexing. class_indices = {} for c in classes: class_indices[c] = [] for index, c in enumerate(y_generator): class_indices[c].append(index) # Compute the complements. class_complements = {} for c in classes: class_complements[c] = [c2 for c2 in classes if c2 != c] # Generator loop. while True: x_input_anchors = [] x_input_positives = [] x_input_negatives = [] # Generate a whole batch. for _ in range(batch_size): anchor_class = random.choice(classes) anchor_index = random.choice(class_indices[anchor_class]) anchor_input = x_generator[anchor_index] #print("anchor_class", anchor_class) anchor_latent = self.base.predict( np.expand_dims(anchor_input, axis=0))[0] # Generate some positive candidates. positive_candidates = [] while len(positive_candidates) < minibatch_size: positive_class = anchor_class positive_index = random.choice( class_indices[positive_class]) positive_input = x_generator[positive_index] assert positive_class == y_generator[positive_index] #print("positive_class", positive_class) positive_candidates.append(positive_input) # Find the farthest positive candidate. positive_candidates = np.array(positive_candidates) positive_latents = self.base.predict(positive_candidates) positive_extremum = compute_latent_extremum( anchor_latent, positive_latents, "argmax", self.triplet_loss) positive_input = positive_candidates[positive_extremum] # Generate some negative candidates. negative_candidates = [] while len(negative_candidates) < minibatch_size: negative_class = random.choice( class_complements[anchor_class]) negative_index = random.choice( class_indices[negative_class]) negative_input = x_generator[negative_index] assert negative_class == y_generator[negative_index] #print("negative_class", negative_class) negative_candidates.append(negative_input) # Find the closest negative candidate. negative_candidates = np.array(negative_candidates) negative_latents = self.base.predict(negative_candidates) negative_extremum = compute_latent_extremum( anchor_latent, negative_latents, "argmin", self.triplet_loss) negative_input = negative_candidates[negative_extremum] # Done. x_input_anchors.append(anchor_input) x_input_positives.append(positive_input) x_input_negatives.append(negative_input) x_input_anchors = np.array(x_input_anchors) x_input_positives = np.array(x_input_positives) x_input_negatives = np.array(x_input_negatives) x_input = [ x_input_anchors, x_input_positives, x_input_negatives ] yield x_input, y_dummy # Create the generators. training_generator = triplet_loss_generator(x, y, batch_size, self.siamese) if validation_data != None: validation_generator = triplet_loss_generator( validation_data[0], validation_data[1], batch_size, self.siamese) else: validation_generator = None # Create the history. history_keys = ["loss", "val_loss"] history = {} for history_key in history_keys: history[history_key] = [] # Training the model for epoch in range(epochs): print("Epoch " + str(epoch + 1) + "/" + str(epochs) + "...") # Generating data for training. training_input, training_output = next(training_generator) if validation_generator != None: validation_input, validation_output = next( validation_generator) model_history = self.siamese.fit( training_input, training_output, validation_data=(validation_input, validation_output), epochs=1, steps_per_epoch=steps_per_epoch, verbose=0, validation_steps=validation_steps) # Update the history. for history_key in history_keys: history_value = model_history.history[history_key] history[history_key].append(history_value) print(history_key, history_value) return history def fit_generator(self, generator, steps_per_epoch=None, epochs=1, verbose=1, callbacks=None, validation_data=None, validation_steps=None, class_weight=None, max_queue_size=10, workers=1, use_multiprocessing=False, shuffle=True, initial_epoch=0): """ Coming soon... """ print("TODO: implement fit_generator!") raise Exception("Not implemented!") return self.siamese.fit_generator(generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch) def evaluate(self, x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None): """ Evaluates the model. Same as vanilla Keras. """ return self.siamese.evaluate(x, y, batch_size, verbose, sample_weight, steps=None) def predict(self, x, batch_size=None, verbose=0, steps=None): """ Does a prediction. Same as vanilla Keras. """ return self.siamese.predict(x, batch_size, verbose, steps) def summary(self): """ Provides a summary. """ print("Basemodel:") self.base.summary() print("Siamese model:") self.siamese.summary() def save(self, path): """ Saves the TL. This includes the whole Siamese Net plus the base-model. This code >>> tl.save("myae.h5") will create the files *tl.h5*, and *tl-base.h5*. """ self.siamese.save(path) self.base.save(append_to_filepath(path, "-base"))
def multi_gpu_model(model, gpus=None, cpu_merge=True, cpu_relocation=False): """Replicates a model on different GPUs. Specifically, this function implements single-machine multi-GPU data parallelism. It works in the following way: - Divide the model's input(s) into multiple sub-batches. - Apply a model copy on each sub-batch. Every model copy is executed on a dedicated GPU. - Concatenate the results (on CPU) into one big batch. E.g. if your `batch_size` is 64 and you use `gpus=2`, then we will divide the input into 2 sub-batches of 32 samples, process each sub-batch on one GPU, then return the full batch of 64 processed samples. This induces quasi-linear speedup on up to 8 GPUs. This function is only available with the TensorFlow backend for the time being. # Arguments model: A Keras model instance. To avoid OOM errors, this model could have been built on CPU, for instance (see usage example below). gpus: Integer >= 2 or list of integers, number of GPUs or list of GPU IDs on which to create model replicas. cpu_merge: A boolean value to identify whether to force merging model weights under the scope of the CPU or not. cpu_relocation: A boolean value to identify whether to create the model's weights under the scope of the CPU. If the model is not defined under any preceding device scope, you can still rescue it by activating this option. # Returns A Keras `Model` instance which can be used just like the initial `model` argument, but which distributes its workload on multiple GPUs. # Example 1 - Training models with weights merge on CPU ```python import tensorflow as tf from keras.applications import Xception from keras.utils import multi_gpu_model import numpy as np num_samples = 1000 height = 224 width = 224 num_classes = 1000 # Instantiate the base model (or "template" model). # We recommend doing this with under a CPU device scope, # so that the model's weights are hosted on CPU memory. # Otherwise they may end up hosted on a GPU, which would # complicate weight sharing. with tf.device('/cpu:0'): model = Xception(weights=None, input_shape=(height, width, 3), classes=num_classes) # Replicates the model on 8 GPUs. # This assumes that your machine has 8 available GPUs. parallel_model = multi_gpu_model(model, gpus=8) parallel_model.compile(loss='categorical_crossentropy', optimizer='rmsprop') # Generate dummy data. x = np.random.random((num_samples, height, width, 3)) y = np.random.random((num_samples, num_classes)) # This `fit` call will be distributed on 8 GPUs. # Since the batch size is 256, each GPU will process 32 samples. parallel_model.fit(x, y, epochs=20, batch_size=256) # Save model via the template model (which shares the same weights): model.save('my_model.h5') ``` # Example 2 - Training models with weights merge on CPU using cpu_relocation ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_relocation=True) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` # Example 3 - Training models with weights merge on GPU (recommended for NV-link) ```python .. # Not needed to change the device scope for model definition: model = Xception(weights=None, ..) try: model = multi_gpu_model(model, cpu_merge=False) print("Training using multiple GPUs..") except: print("Training using single GPU or CPU..") model.compile(..) .. ``` # On model saving To save the multi-gpu model, use `.save(fname)` or `.save_weights(fname)` with the template model (the argument you passed to `multi_gpu_model`), rather than the model returned by `multi_gpu_model`. """ if K.backend() != 'tensorflow': raise ValueError('`multi_gpu_model` is only available ' 'with the TensorFlow backend.') available_devices = _get_available_devices() available_devices = [ _normalize_device_name(name) for name in available_devices ] if not gpus: # Using all visible GPUs when not specifying `gpus` # e.g. CUDA_VISIBLE_DEVICES=0,2 python3 keras_mgpu.py gpus = len([x for x in available_devices if 'gpu' in x]) if isinstance(gpus, (list, tuple)): if len(gpus) <= 1: return model #raise ValueError('For multi-gpu usage to be effective, ' # 'call `multi_gpu_model` with `len(gpus) >= 2`. ' # 'Received: `gpus=%s`' % gpus) num_gpus = len(gpus) target_gpu_ids = gpus else: if gpus <= 1: return model #raise ValueError('For multi-gpu usage to be effective, ' # 'call `multi_gpu_model` with `gpus >= 2`. ' # 'Received: `gpus=%d`' % gpus) num_gpus = gpus target_gpu_ids = range(num_gpus) import tensorflow as tf target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids] for device in target_devices: if device not in available_devices: raise ValueError( 'To call `multi_gpu_model` with `gpus=%d`, ' 'we expect the following devices to be available: %s. ' 'However this machine only has: %s. ' 'Try reducing `gpus`.' % (gpus, target_devices, available_devices)) def get_slice(data, i, parts): shape = tf.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == num_gpus - 1: size = batch_size - step * i else: size = step size = tf.concat([size, input_shape], axis=0) stride = tf.concat([step, input_shape * 0], axis=0) start = stride * i return tf.slice(data, start, size) # Relocate the model definition under CPU device scope if needed if cpu_relocation: with tf.device('/cpu:0'): model = clone_model(model) all_outputs = [] for i in range(len(model.outputs)): all_outputs.append([]) # Place a copy of the model on each GPU, # each getting a slice of the inputs. for i, gpu_id in enumerate(target_gpu_ids): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('replica_%d' % gpu_id): inputs = [] # Retrieve a slice of the input. for x in model.inputs: input_shape = tuple(x.get_shape().as_list())[1:] slice_i = Lambda(get_slice, output_shape=input_shape, arguments={ 'i': i, 'parts': num_gpus })(x) inputs.append(slice_i) # Apply model on slice # (creating a model replica on the target device). outputs = model(inputs) if not isinstance(outputs, list): outputs = [outputs] # Save the outputs for merging back together later. for o in range(len(outputs)): all_outputs[o].append(outputs[o]) # Merge outputs under expected scope. with tf.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]): merged = [] for name, outputs in zip(model.output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) new_model = Model(model.inputs, outputs=merged) funcType = type(model.save) # monkeypatch the save to save just the underlying model def new_save(self_, filepath, overwrite=True): model.save(filepath, overwrite) new_model.save = funcType(new_save, new_model) return new_model
def training_CNN(model: training.Model, train, val, num_epochs: int, save=True, learning_rate=0.5, verbose=0, batch_size=32) -> Tuple[History, str]: ''' cnn.training_CNN() This function takes the model arcitecture and compiles it using the SDG optimiser and the Binary Cross entropy loss-function. The function defines the optimzer, the Stocastich Gradient Descent using the learning rate. The Learning rate is described in the thesis. For information on the two hyper parameters momentum and decay see https://keras.io/api/optimizers/. The funcition then compiles the model with all the hyper parameters. Here, callback are defined. These are used for plotting, saving ect. Then, the function is trained on the training data and validataed on the validation data. Input: model: the saved model arcitecture from keras. Made using e.g. cnn.s3_model or cnn.s1_model. train: Training data. Made using e.g. cnn.make_dataset val: Validataion data. Made using e.g. cnn.make_dataset. num_epochs: Number of epochs save[True/false]: If true, the model will be saved to disk. learning_rate[float]: Value between 0 and 1. verbose[int]: if verbose =0, nothing will be printed. If verbose=1, information will be printed. batch_size: hyper parater used in keras. Output: history: object containing the training history, the weight, plots and more weights: the weights as an array. Example: history, weights = training_CNN(model, train_data, val_data, 40, save=True,0.001,verbose=1) history, weights = training_CNN(model, train_data, val_data, 40, save=True,0.9,verbose=1) Author: Kristian Soerensen July 2020 [email protected] ''' #printing info if verbose >0 if verbose > 0: print(model.name + ' is being trained.\n') #setting tensorflow verbosity. tf.autograph.set_verbosity(0) #defining learning and loss fucntion. optimizer1 = keras.optimizers.SGD(lr=learning_rate, momentum=0.9, decay=0.01) #compiling the model with hyper paramters. model.compile(optimizer=optimizer1, loss="binary_crossentropy", metrics=[ "accuracy", "AUC", tf.keras.metrics.BinaryCrossentropy(), tf.keras.metrics.Precision(), tf.keras.metrics.BinaryAccuracy() ]) #path to save weight to.. filepath = 'weights/' + model.name + '.{epoch:02d}-{loss:.2f}.hdf5' #checkpoint and tensorbord are used in the fitting to get info.. checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='loss', verbose=verbose, save_weights_only=True, save_best_only=True, mode='auto') tensor_board = keras.callbacks.TensorBoard(log_dir='logs/', histogram_freq=0) #fitting the data to the model! history = model.fit(train, batch_size=batch_size, epochs=num_epochs, verbose=verbose, callbacks=[checkpoint, tensor_board], validation_data=val) weight_files = glob.glob(os.path.join(os.getcwd(), 'weights/*')) #making a folder to save models to. if os.path.exists(model.name) == False: os.mkdir(model.name) #saving models and weights.. if save == True: # convert history dict to pandas to save hist_df = pd.DataFrame(history.history) # save to json: hist_json_file = model.name + '/history.json' #with open(os.path.join(dirName, '') + 'index.html', 'w') as write_file: with open(hist_json_file, mode='w') as f: hist_df.to_json(f) hist_csv_file = model.name + '/history.csv' with open(hist_csv_file, mode='w') as f: hist_df.to_csv(f) np.save(model.name + '/history.npy', history.history) #saving entire model! here, we also have the weights and all!!! model.save(model.name + "/model_try.h5") print("Model is saved.") return history, weight_files
class PolicyValueNet(): """policy-value network """ def __init__(self, board_width, board_height, model_file=None): self.board_width = board_width self.board_height = board_height self.l2_const = 1e-4 # coef of l2 penalty if model_file: # net_params = pickle.load(open(model_file, 'rb')) # self.model.set_weights(net_params) self.model = load_model(model_file) else: # self.create_policy_value_net() self.create_policy_value_resnet() self._loss_train_op() def create_policy_value_resnet(self): def _conv_bn_relu(filters=128, kernel_size=(3, 3)): def f(input): conv = Conv2D(kernel_size=kernel_size, filters=filters, padding="same", data_format="channels_first", kernel_regularizer=l2(self.l2_const))(input) norm = BatchNormalization(axis=1)(conv) return Activation("relu")(norm) return f def _conv_bn(filters=128, kernel_size=(3, 3)): def f(input): conv = Conv2D(kernel_size=kernel_size, filters=filters, padding="same", data_format="channels_first", kernel_regularizer=l2(self.l2_const))(input) norm = BatchNormalization(axis=1)(conv) return norm return f def _basic_block(nb_filters): def f(input): conv1 = _conv_bn_relu(nb_filters, (3, 3))(input) conv2 = _conv_bn(nb_filters, (3, 3))(conv1) shortcut = keras.layers.add([conv1, conv2]) return Activation("relu")(shortcut) return f in_x = network = Input((4, self.board_width, self.board_height)) network = _basic_block(64)(network) network = _basic_block(128)(network) ''' layer1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) layer2 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(layer1) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) ''' # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value def create_policy_value_net(self): """create the policy value network """ in_x = network = Input((4, self.board_width, self.board_height)) # conv layers ''' network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) ''' layer1 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) layer2 = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(layer1) network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) # action policy layers policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.board_width * self.board_height, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) # state value layers value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network) value_net = Flatten()(value_net) value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(in_x, [self.policy_net, self.value_net]) ''' def policy_value(state_input): state_input_union = np.array(state_input) results = self.model.predict_on_batch(state_input_union) return results self.policy_value = policy_value ''' def policy_value_fn(self, board): """ input: board output: a list of (action, probability) tuples for each available action and the score of the board state """ legal_positions = board.availables current_state = board.current_state() act_probs, value = self.policy_value( current_state.reshape(-1, 4, self.board_width, self.board_height)) act_probs = zip(legal_positions, act_probs.flatten()[legal_positions]) return act_probs, value[0][0] def _loss_train_op(self): """ Three loss terms: loss = (z - v)^2 + pi^T * log(p) + c||theta||^2 """ # get the train op opt = Adam() losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=opt, loss=losses) def self_entropy(probs): return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1)) def train_step(state_input, mcts_probs, winner, learning_rate): state_input_union = np.array(state_input) mcts_probs_union = np.array(mcts_probs) winner_union = np.array(winner) loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) action_probs, _ = self.model.predict_on_batch(state_input_union) entropy = self_entropy(action_probs) K.set_value(self.model.optimizer.lr, learning_rate) self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0) return loss[0], entropy self.train_step = train_step def get_policy_param(self): net_params = self.model.get_weights() return net_params def save_model(self, model_file): """ save model params to file """ # net_params = self.get_policy_param() # pickle.dump(net_params, open(model_file, 'wb'), protocol=2) # self.model.save_weights(model_file) self.model.save(model_file) @staticmethod def _shortcut(self, input, residual): stride_width = input._keras_shape[2] / residual._keras_shape[2] stride_height = input._keras_shape[3] / residual._keras_shape[3] equal_channels = residual._keras_shape[1] == input._keras_shape[1] shortcut = input if stride_width > 1 or stride_height > 1 or not equal_channels: shortcut = Conv2D(nb_filter=residual._keras_shape[1], nb_row=1, nb_col=1, subsample=(stride_width, stride_height), init="he_normal", border_mode="valid")(input) return merge([shortcut, residual], mode="sum") @staticmethod def _residual_block(self, block_function, nb_filters, repetations, is_first_layer=False): def f(input): for i in range(repetations): init_subsample = (1, 1) if i == 0 and not is_first_layer: init_subsample = (2, 2) input = block_function(nb_filters=nb_filters, init_subsample=init_subsample)(input) return input return f def resnet(self): from keras.layers.convolutional import MaxPooling2D, AveragePooling2D input = Input(shape=(3, 224, 224)) conv1 = self._conv_bn_relu(nb_filter=64, nb_row=7, nb_col=7, subsample=(2, 2))(input) pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), border_mode="same")(conv1) # Build residual blocks.. block_fn = self._basic_block block1 = self._residual_block(block_fn, nb_filters=64, repetations=3, is_first_layer=True)(pool1) block2 = self._residual_block(block_fn, nb_filters=128, repetations=4)(block1) block3 = self._residual_block(block_fn, nb_filters=256, repetations=6)(block2) block4 = self._residual_block(block_fn, nb_filters=512, repetations=3)(block3) # Classifier block pool2 = AveragePooling2D(pool_size=(7, 7), strides=(1, 1), border_mode="same")(block4) flatten1 = Flatten()(pool2) dense = Dense(output_dim=1000, init="he_normal", activation="softmax")(flatten1) model = Model(input=input, output=dense) return model
class PolicyValueNet(): def __init__(self, n=15, filename=None): self.n = n self.l2_const = 1e-4 self.pvnet_fn_lock = Lock() if filename != None and os.path.exists(filename): self.model = load_model(filename) else: self.build_model() self.model._make_predict_function() self.graph = tf.get_default_graph() print(self.model.summary()) def build_model(self): print("build_model") x = net = Input((self.n, self.n, 4)) net = conv_block(net, (3, 3), 128, self.l2_const) for i in range(block_sz): net = residual_block(net, (3, 3), 128, self.l2_const) policy_net = Conv2D(filters=2, kernel_size=(1, 1), kernel_regularizer=l2(self.l2_const))(net) policy_net = BatchNormalization()(policy_net) policy_net = Activation('relu')(policy_net) policy_net = Flatten()(policy_net) self.policy_net = Dense(self.n * self.n, activation="softmax", kernel_regularizer=l2( self.l2_const))(policy_net) value_net = Conv2D(filters=1, kernel_size=(1, 1), kernel_regularizer=l2(self.l2_const))(net) value_net = BatchNormalization()(value_net) value_net = Activation('relu')(value_net) value_net = Flatten()(value_net) value_net = Dense(256, activation='relu', kernel_regularizer=l2(self.l2_const))(value_net) self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net) self.model = Model(x, [self.policy_net, self.value_net]) print(self.model.summary()) def get_train_fn(self): losses = ['categorical_crossentropy', 'mean_squared_error'] self.model.compile(optimizer=Adam(lr=0.002), loss=losses) batch_size = config.pvn_config['batch_size'] epochs = config.pvn_config['epochs'] def train_fn(board, policy, value): with self.graph.as_default(): history = self.model.fit( np.asarray(board), [np.asarray(policy), np.asarray(value)], batch_size=batch_size, epochs=epochs, verbose=0) print("train history:", history.history) return train_fn def get_pvnet_fn(self, single=True): def pvnet_fn(board): nparr_board = board.get_board() self.pvnet_fn_lock.acquire() with self.graph.as_default(): probs, value = self.model.predict( nparr_board.reshape(1, self.n, self.n, 4)) self.pvnet_fn_lock.release() #policy_move = nparr_board[:,:,0].reshape(self.n * self.n).nonzero()[0] policy_move = board.get_available().nonzero()[0] policy_probs = probs[0][policy_move] return (policy_move, policy_probs), value[0][0] def pvnet_fn_m(boards): nparr_boards = np.asarray( [b.get_board().reshape(self.n, self.n, 4) for b in boards]) with self.graph.as_default(): probs, value = self.model.predict(nparr_boards) policy_moves = [b.get_available().nonzero()[0] for b in boards] #if len(policy_move) == 0: # policy_moves = [ b[:,:,0].reshape(self.n * self.n).nonzero()[0] for b in nparr_boards] policy_probs = [p[policy_moves[i]] for i, p in enumerate(probs)] return zip(policy_moves, policy_probs, value.ravel()) return pvnet_fn if single else pvnet_fn_m # def get_policy_param(self): # net_params = self.model.get_weights() # return net_params def save_model(self, model_file): if os.path.exists(model_file): os.remove(model_file) self.model.save(model_file)
#载入模型 model = load_model(model_restore) print('Model Restore!') #创建一个实例history history = LossHistory() plot_model(model, to_file=model_pic) # 开始训练和测试 print('Training ------------') hist_log = model.fit(x_train, y_train, batch_size=batch, epochs=num_epoch, validation_data=(x_test, y_test), callbacks=[history]) print('Saving Log -------------') with open(log, 'w') as f: f.write(str(hist_log.history)) print('\nTesting ------------') loss, accuracy = model.evaluate(x_test, y_test) print('\ntest loss: ', loss) print('\ntest accuracy: ', accuracy) model.save(model_save_path) # 绘制acc-loss曲线 history.loss_plot('epoch') endtime = datetime.datetime.now() print('usetime | ', endtime - starttime)