def AssertModelWeightsEqual(model1: keras.Model, model2: keras.Model): weights1 = model1.get_weights() weights2 = model2.get_weights() if len(weights1) != len(weights2): raise AssertionError('model 1 has %d layers but model 2 has %d layers' % ( len(weights1), len(weights2))) for idx in range(len(weights1)): try: TestUtil.AssertArrayEqual(weights1[idx], weights2[idx]) except AssertionError as e: raise AssertionError('model weights mismatch for layer %d: %s' % ( idx, e))
def Activation_Visualizations(image, model, layer_name): def draw_activations(filters, predict_image): ix = 1 square = int(np.floor(np.sqrt(filters))) for _ in range(square): for _ in range(square): # specify subplot and turn of axis ax = plt.subplot(square, square, ix) ax.set_xticks([]) ax.set_yticks([]) # plot filter channel in grayscale plt.imshow(predict_image[0, :, :, ix - 1], cmap='gray') ix += 1 # show the figure return plt # def predict(self, image, model, layer_name): # # get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) output_layer = layer_dict[layer_name] model = Model(inputs=model.inputs, outputs=output_layer.output) model.summary() filter = model.get_weights()[1].shape class_id = model.predict(image) saved_img = draw_activations(filter, class_id) return saved_img
def compare_optimizers( meta_dataset: MetaLearnerDataset, optimizer_factories: List[Callable[[np.array, np.array], Optimizer]], n_learner_batches: int, learner_batch_size: int, learner: Model, trainings_per_dataset: int, initial_learner_weights: Optional[List[np.ndarray]] = None ) -> List[List[float]]: """ Compares performance of two or more optimizers on meta-valid set :param meta_dataset: MetaLearnerDataset to get data from :param optimizer_factories: list of functions that generate Optimizers to compare :param n_learner_batches: number of training batches for a single Learner :param learner_batch_size: batch size of Learner :param learner: model for Learner :param trainings_per_dataset: number of trainings per single dataset per lr value :param initial_learner_weights: initial weights for training Learner :return: List of Lists of all acquired valid. losses using optimizers on meta-valid tasks """ losses = [[] for _ in optimizer_factories] prg_bar = tqdm(total=len(meta_dataset.meta_test_set * trainings_per_dataset * len(optimizer_factories)), desc='Evaluating optimizers...') for learner_dataset in meta_dataset.meta_test_set: valid_batch_x, valid_batch_y = learner_dataset.test_set.x, learner_dataset.test_set.y train_generator = learner_dataset.train_set.batch_generator( batch_size=learner_batch_size, randomize=True) for _ in range(trainings_per_dataset): training_batches = list(islice(train_generator, n_learner_batches)) if initial_learner_weights is None: reset_weights(learner) current_initial_learner_weights = learner.get_weights() for i, optimizer_factory in enumerate(optimizer_factories): # use same batches and initial weights for all optimizers learner.optimizer = optimizer_factory( learner_dataset.train_set.x, learner_dataset.train_set.y) if initial_learner_weights is None: # noinspection PyUnboundLocalVariable learner.set_weights(current_initial_learner_weights) else: learner.set_weights(initial_learner_weights) learner.fit_generator(generator=(b for b in training_batches), steps_per_epoch=n_learner_batches, epochs=1, verbose=0) evaluation = learner.evaluate(valid_batch_x, valid_batch_y, verbose=0) if isinstance(evaluation, list): evaluation = evaluation[0] losses[i].append(evaluation) prg_bar.update(1) prg_bar.close() return losses
class SimpleNNet: def __init__(self, learning_rate=0.01, state_size=4, action_size=2, hidden_size=10): self.input_size = state_size self.output_size = action_size main_input = Input(shape=(self.input_size, ), name='main_input') model = Dense(hidden_size, activation='relu')(main_input) model = Dense(hidden_size, activation='relu')(model) model = Dense(self.output_size, activation='linear')(model) self._model = Model(inputs=main_input, outputs=model) self.optimizer = Adam(lr=learning_rate) # 誤差を減らす学習方法はAdam self._model.compile(loss=huberloss, optimizer=self.optimizer) def predict(self, x): x = np.reshape(x, [len(x), self.input_size]) return self._model.predict(x) def train_on_batch(self, x, y): x = np.reshape(x, [len(x), self.input_size]) y = np.reshape(y, [len(y), self.output_size]) return self._model.train_on_batch(x, y) def set_weights(self, w): return self._model.set_weights(w) def get_weights(self): return self._model.get_weights()
def save_model(paths, model: keras.Model): json_path, weight_path = paths with open(json_path, 'w') as file: json.dump(model.to_json(), file) with open(weight_path, 'wb') as file: pickle.dump(model.get_weights(), file, protocol=pickle.HIGHEST_PROTOCOL)
def test_3(self): num_modes = 6 h, w, num_channels = 10, 10, 3 input_shape = (50, h, w, num_channels) a = np.random.uniform(size=input_shape) i1 = Input(shape=(h, w, num_channels)) x1 = ModeNormalization(k=num_modes)(i1) m1 = Model(inputs=[i1], outputs=[x1]) weight_shapes = [a.shape for a in m1.get_weights()] assert weight_shapes == [ (num_channels, num_modes), # gates_kernel (num_modes, ), # gates_bias (num_channels, ), # gates_gamma (num_channels, ), # gates_beta (num_modes, num_channels), # moving_mean (num_modes, num_channels) ] # moving_variance
def evaluate_model_finetuned(model: keras.Model, train_data, test_data, epochs, repetitions=100, batch_size=32): X_train, y_train = train_data X_test, y_test = test_data original_weights = model.get_weights() model_MSE_all = np.zeros((1, repetitions)) for N in epochs: # Storage arrays for i iterations of each epoch number N model_MSE_epoch = np.array([]) for i in range(repetitions): # Reset model weights every repetition model.set_weights(original_weights) history = trainer.train_model( model, x_train=X_train, y_train=y_train, optimizer=keras.optimizers.Adam(learning_rate=0.001), validation_split=None, epochs=N, batch_size=batch_size, summary=False, verbose=0) model_MSE = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0) model_MSE_epoch = np.append(model_MSE_epoch, model_MSE) model_MSE_all = np.append(model_MSE_all, [model_MSE_epoch], axis=0) model.set_weights(original_weights) # Reset model model_MSE_all = np.delete(model_MSE_all, 0, axis=0) return model_MSE_all
def block_test(layer_func, kwargs={}, input_shape=None): """Test routine for faceswap neural network blocks. Tests are simple and are to ensure that the blocks compile on both tensorflow and plaidml backends """ # generate input data assert input_shape input_dtype = K.floatx() input_data_shape = list(input_shape) for i, var_e in enumerate(input_data_shape): if var_e is None: input_data_shape[i] = np.random.randint(1, 4) input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) expected_output_dtype = input_dtype # test in functional API inp = Input(shape=input_shape[1:], dtype=input_dtype) outp = layer_func(inp, **kwargs) assert K.dtype(outp) == expected_output_dtype # check with the functional API model = Model(inp, outp) actual_output = model.predict(input_data) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # for further checks in the caller function return actual_output
class DuelingNNet: def __init__(self, learning_rate=0.01, state_size=4, action_size=2, hidden_size=10): self.input_size = state_size self.output_size = action_size main_input = Input(shape=(self.input_size,), name='main_input') hdn = Dense(hidden_size, activation='relu')(main_input) v = Dense(hidden_size, activation='relu')(hdn) v = Dense(1)(v) adv = Dense(hidden_size, activation='relu')(hdn) adv = Dense(self.output_size)(adv) model = Concatenate()([v, adv]) model = Lambda(lambda a: k.expand_dims(a[:, 0], -1) + a[:, 1:] - k.mean(a[:, 1:], axis=1, keepdims=True), output_shape=(self.output_size,))(model) self._model = Model(inputs=main_input, outputs=model) self.optimizer = Adam(lr=learning_rate) # 誤差を減らす学習方法はAdam self._model.compile(loss=huberloss, optimizer=self.optimizer) def predict(self, x): x = np.reshape(x, [len(x), self.input_size]) return self._model.predict(x) def train_on_batch(self, x, y): x = np.reshape(x, [len(x), self.input_size]) y = np.reshape(y, [len(y), self.output_size]) return self._model.train_on_batch(x, y) def set_weights(self, w): return self._model.set_weights(w) def get_weights(self): return self._model.get_weights()
model_6.compile(optimizer=OPTIMIZER, loss=LOSS) model_6.summary() for _ in tqdm(range(n_epoch)): model_6.fit(X2, y22, epochs=1, batch_size=n_batch, verbose=0) res = model_6.predict(X2, batch_size=n_batch, verbose=0) print(y22) print(res) l1_7 = Input(shape=(3, 2)) l2_7 = Masking(mask_value=0.0)(l1_7) l3_7 = LSTM(n_neurons, return_sequences=True)(l2_7) l4_7 = TimeDistributed(Dense(1))(l3_7) model_7 = Model(inputs=l1_7, outputs=l4_7) model_7.compile(optimizer=OPTIMIZER, loss=LOSS) model_7.summary() model_7.set_weights(model_6.get_weights()) res = model_7.predict(X2, batch_size=n_batch, verbose=0) print(y22) print(res) # Change the masking value to be something crazy l1_8 = Input(shape=(3, 2)) l2_8 = Masking(mask_value=-999.0)(l1_8) l3_8 = LSTM(n_neurons, return_sequences=True)(l2_8) l4_8 = TimeDistributed(Dense(1))(l3_8) model_8 = Model(inputs=l1_8, outputs=l4_8) model_8.compile(optimizer=OPTIMIZER, loss=LOSS) model_8.summary() model_8.set_weights(model_6.get_weights()) res = model_8.predict(X2, batch_size=n_batch, verbose=0) print(y22)
from keras import Model from keras.layers import Input, Dense import numpy as np _input = Input(shape=(2, )) _output = Dense(units=1)(_input) model = Model(inputs=_input, outputs=_output) model.summary() # モデルの状態をみる model.set_weights([np.array([[0.5], [0.5]]), np.array([-0.7])]) X = np.array([[0,0], [0,1], [1,0], [1,1]]) Y = np.array([[0], [0], [0], [1]]) print(model.get_weights()) Y_ = model.predict(X) print(Y_) Y_[Y_ <= 0] = False Y_[Y_ > 0] = True print(Y_) print(f'Results: {Y == Y_}')
googlenet.summary() # Model Fit! 제발 잘 됐으면 좋.겠.다. import time # 훈련할 때마다 time을 가져와서 초기화를 시켜줘야 합니다! start = time.time() history = googlenet.fit(train_image, train_label, epochs=100, callbacks=callback_list, validation_data=(test_image, test_label)) time = time.time() - start print("테스트 시 소요 시간(초) : {}".format(time)) print("전체 파라미터 수 : {}".format( sum([arr.flatten().shape[0] for arr in googlenet.get_weights()]))) # 모델 훈련이 잘 되었는지 그래프로 확입힙니다. acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and Validation accuracy') plt.legend() plt.figure()
def fit_dnn(inputs: list, nfold: int, y_train, final_model: Model, outfolder: str, task: str, model_descriptor: str): encoder = LabelBinarizer() y_train_int = encoder.fit_transform(y_train) y_train_label_lookup = dict() for index, l in zip(y_train_int.argmax(1), y_train): y_train_label_lookup[index] = l X_merge = numpy.concatenate(inputs, axis=1) # merge so as to create correct splits across all different feature inputs model_file = os.path.join(outfolder, "ann-%s.m" % task) model_copies = [] for i in range(nfold): model_copy = clone_model(final_model) model_copy.set_weights(final_model.get_weights()) model_copies.append(model_copy) # perform n-fold validation (we cant use scikit-learn's wrapper as we used Keras functional api above if nfold is not None: kfold = StratifiedKFold(n_splits=nfold, shuffle=True, random_state=cl.RANDOM_STATE) splits = list(enumerate(kfold.split(X_merge, y_train_int.argmax(1)))) nfold_predictions = dict() for k in range(0, len(splits)): print("\tnfold=" + str(k)) nfold_model = model_copies[k] nfold_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Fit the model X_train_index = splits[k][1][0] X_test_index = splits[k][1][1] X_train_merge_ = X_merge[X_train_index] X_test_merge_ = X_merge[X_test_index] y_train_ = y_train_int[X_train_index] y_test_ = y_train_int[X_test_index] separate_training_feature_inputs = [] # to contain features for training set coming from different input branches separate_testing_feature_inputs = [] index_start = 0 for feature_input in inputs: length = len(feature_input[0]) index_end = index_start + length slice_train = X_train_merge_[:, index_start:index_end] slice_test = X_test_merge_[:, index_start:index_end] separate_training_feature_inputs.append(slice_train) separate_testing_feature_inputs.append(slice_test) index_start = index_end nfold_model.fit(separate_training_feature_inputs, y_train_, epochs=dmc.DNN_EPOCHES, batch_size=dmc.DNN_BATCH_SIZE) prediction_prob = nfold_model.predict(separate_testing_feature_inputs) # evaluate the model # predictions = prediction_prob.argmax(axis=-1) for i, l in zip(X_test_index, predictions): nfold_predictions[i] = l del nfold_model indexes = sorted(list(nfold_predictions.keys())) predicted_labels = [] for i in indexes: predicted_labels.append(nfold_predictions[i]) util.save_scores(predicted_labels, y_train_int.argmax(1), "dnn", task, model_descriptor, 3, outfolder) else: final_model.fit(inputs, y_train_int, epochs=dmc.DNN_EPOCHES, batch_size=dmc.DNN_BATCH_SIZE, verbose=2) # serialize model to YAML model_yaml = final_model.to_yaml() with open(model_file + ".yaml", "w") as yaml_file: yaml_file.write(model_yaml) # serialize weights to HDF5 final_model.save_weights(model_file + ".h5")
max_features = 50 i = Input(shape=(max_len,)) x = Embedding(max_features, 16)(i) x = TCN(nb_filters=12, dropout_rate=0.5, # with dropout here. kernel_size=6, dilations=[1, 2, 4])(x) x = Dropout(0.5)(x) # and dropout here. x = Dense(1, activation='sigmoid')(x) model = Model(inputs=[i], outputs=[x]) if os.path.exists('tcn.npz'): # Load checkpoint if file exists. w = np.load('tcn.npz', allow_pickle=True)['w'] print('Model reloaded.') model.set_weights(w.tolist()) else: # Save the checkpoint. w = np.array(model.get_weights()) np.savez_compressed(file='tcn.npz', w=w, allow_pickle=True) print('First time.') # Make inference. # The value for [First time] and [Model reloaded] should be the same. Run the script twice! inputs = np.ones(shape=(1, 100)) out1 = model.predict(inputs)[0, 0] print('*' * 80) print(out1) print('*' * 80)
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False): """Test routine for a layer with a single input tensor and single output tensor. """ # generate input data if input_data is None: assert input_shape if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, var_e in enumerate(input_data_shape): if var_e is None: input_data_shape[i] = np.random.randint(1, 4) input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) layer.build(input_shape) expected_output_shape = layer.compute_output_shape(input_shape) # test in functional API if fixed_batch_size: inp = Input(batch_shape=input_shape, dtype=input_dtype) else: inp = Input(shape=input_shape[1:], dtype=input_dtype) outp = layer(inp) assert K.dtype(outp) == expected_output_dtype # check with the functional API model = Model(inp, outp) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: assert expected_dim == actual_dim if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output
self.best_loss = math.inf def on_epoch_end(self, epoch, logs=None): if logs['loss'] < self.best_loss: model.save_weights('cbow_best_wts.weights') evaluator = Evaluator() model.fit_generator(gen, steps_per_epoch=len(gen), epochs=epochs, callbacks=[evaluator], shuffle=False, initial_epoch=0) '''eval ''' model.load_weights('cbow_best_wts.weights', by_name=True) embedding_wts = model.get_weights()[0] # cos相似 norm_embedding_wts = embedding_wts / np.sqrt( np.sum(np.square(embedding_wts), axis=1, keepdims=True)) def cos_similarity(word): vector = norm_embedding_wts[token2id[word]] sims = np.einsum('mn, n->m', norm_embedding_wts, vector) sort = np.argsort(sims)[::-1] return [(id2token[i], sims[i]) for i in sort[:6]] pprint(cos_similarity('movie'))
class WassersteinGAN: """Wasserstein GAN model optimzied on EM""" def __init__(self, num_row, num_col, num_channel): self.num_row = num_row self.num_col = num_col self.num_channel = num_channel self.input_shape_g = 100 self.num_critic = 5 self.clip_value = 0.01 self.generator = self.discriminator = None self.opt = RMSprop(lr=0.00005) self.images=[] def build_generator(self, summary=False): """Generator networks here""" inputs = Input(shape=(self.input_shape_g,)) x = Dense((self.num_row+9)*(self.num_col+9)*self.num_channel, activation='relu')(inputs) x = Reshape((self.num_row+9, self.num_col+9, self.num_channel))(x) x = BatchNormalization(momentum=0.8)(x) x = Conv2D(32, kernel_size=4, activation='relu')(x) x = BatchNormalization(momentum=0.8)(x) x = Conv2D(16, kernel_size=4, activation='relu')(x) x = BatchNormalization(momentum=0.8)(x) outputs = Conv2D(self.num_channel, kernel_size=4, activation='sigmoid')(x) generator = Model(inputs=inputs, outputs=outputs) if summary: generator.summary() return generator def build_discriminator(self, summary=False): """Discriminator networks here""" inputs = Input(shape=(self.num_row, self.num_col, self.num_channel)) x = Conv2D(16, kernel_size=3)(inputs) x = LeakyReLU(alpha=0.2)(x) # x = MaxPooling2D(pool_size=(3,3))(x) x = Conv2D(32, kernel_size=3)(x) x = LeakyReLU(alpha=0.2)(x) # x = MaxPooling2D(pool_size=(3,3))(x) x = Conv2D(64, kernel_size=3)(x) x = LeakyReLU(alpha=0.2)(x) # x = MaxPooling2D(pool_size=(3,3))(x) x = Flatten()(x) outputs = Dense(1, activation='tanh')(x) discriminator = Model(inputs=inputs, outputs=outputs) if summary: discriminator.summary() return discriminator def build_gan(self, summary=False): self.generator = self.build_generator(summary=summary) self.generator.compile(loss='mse', optimizer=self.opt) self.discriminator = self.build_discriminator(summary=summary) self.discriminator.compile(loss=wasserstein_loss, optimizer=self.opt, metrics=[wasserstein_loss]) z = Input(shape=(self.input_shape_g,)) img = self.generator(z) discriminator_combine = self.build_discriminator(summary=summary) discriminator_combine.trainable = False validation = discriminator_combine(img) self.combined = Model(z, validation) self.combined.compile(loss='mse', optimizer=self.opt, metrics=['mse']) def fit(self, X_train, y_train=None, iter_max=100, epochs=10000, batch_size=128, save_interval=10): half_batch_szie = int(batch_size / 2) assert half_batch_szie == batch_size / 2 early = keras.callbacks.EarlyStopping( monitor='loss', min_delta=0, patience=10, verbose=0, mode='auto' ) for num_iter in range(iter_max): print(num_iter) for _ in range(self.num_critic): idx = np.random.randint(0, X_train.shape[0], half_batch_szie) imgs = X_train[idx] noise = np.random.normal(0, 1, (half_batch_szie, self.input_shape_g)) gen_imgs = self.generator.predict(noise) train_imgs = np.vstack([imgs, gen_imgs]) train_targets = np.vstack([np.ones((half_batch_szie, 1)), -np.ones((half_batch_szie, 1))]) self.discriminator.fit(x=train_imgs, y=train_targets, batch_size=batch_size, shuffle=False, epochs=epochs, callbacks=[early], verbose=0) weights = self.discriminator.get_weights() num_layer_discriminator = len(weights) for i in range(num_layer_discriminator): weights[i] = np.clip(weights[i], -self.clip_value, self.clip_value) self.discriminator.set_weights(weights) weights_combined = self.combined.get_weights() weights_combined[-num_layer_discriminator:] = weights self.combined.set_weights(weights_combined) noise = np.random.normal(0, 1, (batch_size, self.input_shape_g)) self.combined.fit(x=noise, y=np.ones((batch_size, 1)), batch_size=batch_size, epochs=epochs, callbacks=[early], verbose=0) if num_iter%save_interval == 0: noise = np.random.normal(0, 1, (2, self.input_shape_g)) self.images.append(self.generator.predict(noise))
def get_shapes(model: keras.Model) -> List[Tuple[int]]: """Get a list with the shapes of a model matrices""" model_weights = model.get_weights() shapes = [x.shape for x in model_weights] return shapes
def train_REPTILE(model: keras.Model, dataset, training_keys, epochs=1, inner_optimizer='SGD', lr_inner=0.01, lr_meta=0.01, batch_size=10, train_proportion=1.0): if inner_optimizer == 'SGD': inner_optimizer = keras.optimizers.SGD(learning_rate=lr_inner) elif inner_optimizer == 'Adam': inner_optimizer = keras.optimizers.Adam(learning_rate=lr_inner) X_, y_ = dataset epoch_losses = [] for epoch in range(epochs): epoch_start = time.time() epoch_total_loss = 0 epoch_N = 0 for i, key in enumerate(training_keys): # Inner loop for task i, SGD/Adam on the learner model _x, _y = X_[key], y_[key] N = _x.shape[0] # No. of datapoints in task indices = np.random.permutation(N) # Shuffling training set train_idx = indices[:int(np.floor(train_proportion * N))] inner_N = len(train_idx) epoch_N += inner_N x, y = _x[train_idx], _y[train_idx] n_batches = int(np.floor(len(x) / batch_size)) # For batch processing # model_copy = keras.models.clone_model(model) # model_copy.set_weights(model.get_weights()) model_copy = copy_model(model, x) # Training batches of inner loop for n in range(n_batches + 1): with tf.GradientTape() as train_tape: if n + 1 <= n_batches: # Check if last batch inner_loss = MSE_loss(model_copy(x[0 + n * batch_size:(n + 1) * batch_size - 1]), y[0 + n * batch_size:(n + 1) * batch_size - 1]) epoch_total_loss += inner_loss * batch_size # Adding total loss elif n + 1 > n_batches and len(x) % batch_size > 0: # Last batch overflow inner_loss = MSE_loss(model_copy(x[n * batch_size:]), y[n * batch_size:]) epoch_total_loss += inner_loss * len(x[n * batch_size:]) # Adding total loss = n*mse print(f"Loss: {inner_loss}") gradients = train_tape.gradient(inner_loss, model_copy.trainable_variables) inner_optimizer.apply_gradients(zip(gradients, model_copy.trainable_variables)) # Meta-update step phi <- phi + lr_meta*(phi~ - phi) updated_weights = [] phi_tilde = model_copy.get_weights() phi = model.get_weights() for j in range(len(phi)): delta = lr_meta * (phi_tilde[j] - phi[j]) new_weight = phi[j] + delta updated_weights.append(new_weight) model.set_weights(updated_weights) # Logging losses _loss = epoch_total_loss / epoch_N epoch_losses.append(_loss) print(f"Epoch {epoch + 1} / {epochs} completed in {time.time() - epoch_start:.2f}s") print(f"Epoch loss: {_loss}") plt.plot(epoch_losses) plt.show() return epoch_losses
def copy_model(model: keras.Model, x): model_copy = models.DenseModel() model_copy.call(x) # To initialise weights model_copy.set_weights(model.get_weights()) return model_copy
y_true = np.ones((n_samples, dx, dout)) X2[2, 0] = mask_value X2[3, 1] = mask_value sample_weight = np.ones_like(y_true) sample_weight[2, 0] = 0 sample_weight[3, 1] = 0 inp_1 = Input(shape=(dx, dy)) mask_1 = Masking(mask_value=mask_value)(inp_1) lstm_1 = LSTM(dout, return_sequences=True)(mask_1) dense_1 = TimeDistributed(Dense(dout))(lstm_1) model_1 = Model(inputs=inp_1, outputs=dense_1) model_1.summary() model_1.compile(optimizer="rmsprop", loss="mae", sample_weight_mode="temporal") model_1_untrained_weights = model_1.get_weights() print("The losses are not going to be consistent with each other because the" "masking layer breaks the model.") y_pred = model_1.predict(X2, verbose=0) unmasked_loss = np.average(np.abs(y_true - y_pred)) masked_loss = np.average(np.abs(y_true - y_pred[y_pred != 0.0])) weighted_loss = np.average(np.abs(y_true - y_pred), weights=sample_weight) keras_loss = model_1.evaluate(X2, y_true, verbose=0) keras_loss_weighted = model_1.evaluate(X2, y_true, sample_weight=sample_weight[..., 0], verbose=0) print("-- model 1 --") print(f"unmasked loss: {unmasked_loss}") print(f"masked loss: {masked_loss}")
eco_mat.append(shared_dense(eco_inputs[-1])) eco_mat = kl.concatenate(eco_mat) hide_input = kl.multiply([counties_input, eco_mat]) output = kl.Dense(461)(hide_input) model = Model([counties_input] + eco_inputs, output) model.compile(optimizer='adam', loss='mean_squared_error') model.summary() #%% # it takes several minutes, and you will not go faster even you use 1080ti, I tried # it might because we use a shared Dense layer for 461 inputs history = model.fit(x=X3, y=[Y], batch_size=4, epochs=200) #%% plt.plot(history.epoch, history.history['loss']) #%% eco_weight = model.get_weights()[0] plt.plot(range(eco_weight.size), eco_weight) #%% # head are tags of socio-economic data in a certain order # this just where we do wrong, because different tags are used in each file. head = np.load('./data/head.npy') order = eco_weight.argsort(axis=0) one_node = model.get_weights()[1] print(head[order]) print('this list is in a increase order') if one_node > 0: print('larger the parameter, more the drug use') else: print('larger the parameter, less the drug use')
metrics=['acc']) InceptionV3.summary() # Model Fit! 제발 잘 됐으면 좋.겠.다. import time # 훈련할 때마다 time을 가져와서 초기화를 시켜줘야 합니다! start = time.time() history = InceptionV3.fit(train_image, train_label, epochs=100, callbacks=callback_list, validation_data=(test_image, test_label)) time = time.time() - start print("테스트 시 소요 시간(초) : {}".format(time)) print("전체 파라미터 수 : {}".format( sum([arr.flatten().shape[0] for arr in InceptionV3.get_weights()]))) # 모델 훈련이 잘 되었는지 그래프로 확입힙니다. acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and Validation accuracy') plt.legend() plt.figure()
class D3QNAgent: def __init__(self, env, optimizer, gamma, is_soft_update, tau, batch_size): self.state_size = env.observation_space.shape[0] # number of factors in the state; e.g: velocity, position, etc _, self.action_size = quantize(None) self.optimizer = optimizer # allow large replay exp space self.replay_exp = ExperienceReplay(type=REPLAY_TYPE) self.gamma = gamma self.batch_size = batch_size self.is_soft_update = is_soft_update self.tau = tau self.epsilon = 1.0 # initialize with high exploration, which will decay later # Build networks X_input = Input(self.state_size) X = X_input X = Dense(256, input_shape=(1, self.state_size), activation='relu', kernel_initializer='he_uniform')(X) X = Dense(256, activation='relu', kernel_initializer='he_uniform')(X) X = Dense(64, activation='relu', kernel_initializer='he_uniform')(X) # Build Policy Network state_value = Dense(1, kernel_initializer='he_uniform')(X) state_value = Lambda(lambda s: K.expand_dims(s[:, 0], -1), output_shape=(self.action_size,))(state_value) action_advantage = Dense(self.action_size, kernel_initializer='he_uniform')(X) action_advantage = Lambda(lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(self.action_size,))( action_advantage) brain_policy = Add()([state_value, action_advantage]) self.brain_policy = Model(inputs=X_input, outputs=brain_policy) self.brain_policy.compile(loss="mse", optimizer=self.optimizer) # Build Target Network state_value = Dense(1, kernel_initializer='he_uniform')(X) state_value = Lambda(lambda s: K.expand_dims(s[:, 0], -1), output_shape=(self.action_size,))(state_value) action_advantage = Dense(self.action_size, kernel_initializer='he_uniform')(X) action_advantage = Lambda(lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(self.action_size,))( action_advantage) brain_target = Add()([state_value, action_advantage]) self.brain_target = Model(inputs=X_input, outputs=brain_target) self.brain_target.compile(loss="mse", optimizer=self.optimizer) self.update_brain_target() def update_brain_target(self): if self.is_soft_update: policy_weights = self.brain_policy.get_weights() target_weights = self.brain_target.get_weights() counter = 0 for q_weight, target_weight in zip(policy_weights, target_weights): target_weight = target_weight * (1 - self.tau) + q_weight * self.tau target_weights[counter] = target_weight counter += 1 self.brain_target.set_weights(target_weights) else: self.brain_target.set_weights(self.brain_policy.get_weights()) def choose_action(self, state): if self._should_do_exploration(): action = np.random.choice(self.action_size) else: state = np.reshape(state, [1, self.state_size]) qhat = self.brain_policy.predict(state) # output Q(s,a) for all a of current state action = np.argmax(qhat[0]) # because the output is m * n, so we need to consider the dimension [0] return action def learn(self, sample=None): if sample is None: if self.replay_exp.is_prioritized(): cur_batch_size = min(self.replay_exp.size, self.batch_size) mini_batch = self.replay_exp.replay_exp.sample(cur_batch_size) else: cur_batch_size = min(self.replay_exp.size, self.batch_size) mini_batch = random.sample(self.replay_exp.replay_exp, cur_batch_size) else: cur_batch_size = 1 mini_batch = [(0, sample)] # batch data sample_states = np.ndarray(shape=(cur_batch_size, self.state_size)) sample_actions = np.ndarray(shape=(cur_batch_size, 2)) sample_rewards = np.ndarray(shape=(cur_batch_size, 1)) sample_next_states = np.ndarray(shape=(cur_batch_size, self.state_size)) sample_dones = np.ndarray(shape=(cur_batch_size, 1)) for index, exp in enumerate(mini_batch): if self.replay_exp.is_prioritized(): sample_states[index] = exp[1][0] sample_actions[index] = exp[1][1] sample_rewards[index] = exp[1][2] sample_next_states[index] = exp[1][3] sample_dones[index] = exp[1][4] else: sample_states[index] = exp[0] sample_actions[index] = exp[1] sample_rewards[index] = exp[2] sample_next_states[index] = exp[3] sample_dones[index] = exp[4] sample_qhat_next = self.brain_target.predict(sample_next_states) # set all Q values terminal states to 0 sample_qhat_next = sample_qhat_next * (np.ones(shape=sample_dones.shape) - sample_dones) # choose max action for each state sample_qhat_next = np.max(sample_qhat_next, axis=1) sample_qhat = self.brain_policy.predict(sample_states) if self.replay_exp.is_prioritized(): errors = np.zeros(cur_batch_size) for i in range(cur_batch_size): a = tuple(sample_actions[i]) sample_qhat[i, ACTIONS_TO_IDX[a]] = sample_rewards[i] + self.gamma * sample_qhat_next[i] if self.replay_exp.is_prioritized(): old_value = sample_qhat[i, ACTIONS_TO_IDX[tuple(a)]] errors[i] = abs(old_value - sample_qhat[i, ACTIONS_TO_IDX[a]]) q_target = sample_qhat if sample is None: if self.replay_exp.is_prioritized(): self.replay_exp.memorize_exp(mini_batch, {'e': errors, 'is_update': True}) self.brain_policy.fit(sample_states, q_target, epochs=1, verbose=0) else: self.replay_exp.memorize_exp(sample, {'e': errors[0], 'is_update': False}) def _should_do_exploration(self): return np.random.uniform(0.0, 1.0) < self.epsilon
def train_REPTILE_simple(model: keras.Model, dataset, training_keys, epochs=1, lr_inner=0.01, lr_meta=0.01, batch_size=32, validation_split=0.2, logging=1, stopping_threshold=None, stopping_number=None, lr_scheduler=None, show_plot=True): print("Beginning REPTILE training.") stop_counter = 0 model_copy = keras.models.clone_model(model) meta_optimizer = keras.optimizers.Adam(learning_rate=lr_meta) # Runs faster with optimizer initialised here X_, y_ = dataset epoch_train_losses = [] epoch_val_losses = [] for epoch in range(epochs): epoch_start = time.time() epoch_train_loss = [] epoch_val_loss = [] if lr_scheduler: lr_inner, lr_meta = lr_scheduler(epoch + 1) for i, key in enumerate(training_keys): # Inner loop for task i, SGD/Adam on the learner model _x, _y = X_[key], y_[key] model_copy.set_weights(model.get_weights()) # model_copy = mlu.copy_model(model, _x) history = trainer.train_model(model_copy, x_train=_x, y_train=_y, optimizer=keras.optimizers.Adam(learning_rate=lr_inner), loss='mse', metrics=None, validation_split=validation_split, epochs=1, batch_size=batch_size, summary=False, verbose=0) # Log losses of each task task_train_loss = history.history['loss'][0] epoch_train_loss.append(task_train_loss) try: task_val_loss = history.history['val_loss'][0] epoch_val_loss.append(task_val_loss) except: pass # Meta-update step per task phi <- phi + lr_meta*(phi~ - phi) updated_weights = [] phi_tilde = model_copy.get_weights() phi = model.get_weights() directions = [] for j in range(len(phi)): direction = phi[j] - phi_tilde[j] delta = lr_meta * (phi[j] - phi_tilde[j]) new_weight = phi[j] + delta updated_weights.append(new_weight) directions.append(direction) # model.set_weights(updated_weights) # return directions meta_optimizer.apply_gradients(zip(directions, model.trainable_variables)) # del model_copy # Cleanup to save memory? # Logging overall epoch losses _train_loss = np.mean(epoch_train_loss) epoch_train_losses.append(_train_loss) try: _val_loss = np.mean(epoch_val_loss) epoch_val_losses.append(_val_loss) except: pass # Logging every logging steps if logging: if (epoch + 1) % logging == 0: print(f"Epoch {epoch + 1} / {epochs} completed in {time.time() - epoch_start:.2f}s") try: print(f"Epoch train loss: {_train_loss}, val loss: {_val_loss}") except: print(f"Epoch train loss: {_train_loss}") if stopping_threshold is not None and len(epoch_train_losses) >= 2: if abs(epoch_train_losses[-1] - epoch_train_losses[-2]) < stopping_threshold: stop_counter += 1 else: stop_counter = 0 # Reset stop counter if stop_counter >= stopping_number: print(f"No significant change in training loss for {stopping_number} epochs.") break # Exit training early if show_plot: plt.plot(epoch_train_losses) try: plt.plot(epoch_val_losses) except: pass plt.show() try: output = {'loss': epoch_train_losses, 'val_loss': epoch_val_losses} except: output = {'loss': epoch_train_losses} return output
def fit_dnn(df: DataFrame, nfold: int, class_col: int, final_model: Model, outfolder: str, task: str, model_descriptor: str, text_norm_option: int, text_input_info: dict, embedding_model, embedding_model_format, word_weights: list = None): encoder = LabelBinarizer() y = df[:, class_col] y_int = encoder.fit_transform(y) y_label_lookup = dict() y_label_lookup_inverse = dict() for index, l in zip(y_int.argmax(1), y): y_label_lookup[index] = l y_label_lookup_inverse[l] = index model_file = os.path.join(outfolder, "ann-%s.m" % task) model_copies = [] for i in range(nfold): model_copy = clone_model(final_model) model_copy.set_weights(final_model.get_weights()) model_copies.append(model_copy) kfold = StratifiedKFold(n_splits=nfold, shuffle=True, random_state=cl.RANDOM_STATE) splits = list(enumerate(kfold.split(df, y_int.argmax(1)))) nfold_predictions = dict() for k in range(0, len(splits)): print("\tnfold=" + str(k)) nfold_model = model_copies[k] nfold_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Fit the model X_train_index = splits[k][1][0] X_test_index = splits[k][1][1] X_train_merge_ = df[X_train_index] X_test_merge_ = df[X_test_index] y_train_ = y_int[X_train_index] y_test_ = y_int[X_test_index] # df, batch_size, text_norm_option, classes, ft_model, text_col_info:list training_generator = data_generator( df=X_train_merge_, class_col=class_col, classes=y_label_lookup_inverse, batch_size=dmc.DNN_BATCH_SIZE, text_norm_option=text_norm_option, embedding_model=embedding_model, text_input_info=text_input_info, embedding_format=embedding_model_format, word_weights=word_weights) training_steps_per_epoch = round( len(X_train_merge_) / dmc.DNN_BATCH_SIZE) nfold_model.fit_generator(training_generator, steps_per_epoch=training_steps_per_epoch, epochs=dmc.DNN_EPOCHES) test_generator = data_generator( df=X_test_merge_, class_col=class_col, classes=y_label_lookup_inverse, batch_size=len(X_test_merge_), text_norm_option=text_norm_option, embedding_model=embedding_model, text_input_info=text_input_info, embedding_format=embedding_model_format, shuffle=False, word_weights=word_weights) prediction_prob = nfold_model.predict_generator(test_generator, steps=1) # evaluate the model # predictions = prediction_prob.argmax(axis=-1) for i, l in zip(X_test_index, predictions): nfold_predictions[i] = l del nfold_model indexes = sorted(list(nfold_predictions.keys())) predicted_labels = [] for i in indexes: predicted_labels.append(nfold_predictions[i]) util.save_scores(predicted_labels, y_int.argmax(1), "dnn", task, model_descriptor, 3, outfolder)
X = np.random.randint(5, size=(n_samples, dx, dy)) y_true = np.ones((n_samples, dx, dout)) inp = Input(shape=(dx, dy)) dense = Dense(dout)(inp) model = Model(inputs=inp, outputs=dense) model.summary() model.compile(optimizer="rmsprop", loss="mae", sample_weight_mode="temporal") y_pred = model.predict(X, verbose=0) unmasked_loss = mae(y_true, y_pred, mask=False) keras_loss = model.evaluate(X, y_true, verbose=0) print(unmasked_loss - keras_loss) np.testing.assert_approx_equal(unmasked_loss, keras_loss) weights = model.get_weights() ## third example: single dense layer, temporal dimension without TimeDistributed, only use the first ## sample, no masking sample_weight = np.ones_like(y_true) sample_weight[1:] = 0 unmasked_loss = mae(y_true, y_pred, weights=sample_weight, mask=False) keras_loss = model.evaluate(X, y_true, sample_weight=sample_weight[..., 0], verbose=0) print(weighted_loss - keras_loss_weighted) np.testing.assert_approx_equal(weighted_loss, keras_loss_weighted) ## fourth example: single dense layer, temporal dimension with TimeDistributed, no sample weights,
class OandaNNet: def __init__(self, learning_rate=0.01, rate_size=32, position_size=3): self.output_size = ACTION_SIZE self.input_rate_size = rate_size self.input_position_size = position_size rates_input = Input(shape=(self.input_rate_size, self.input_rate_size, 1), name='rates_input') rate = Conv2D(64, kernel_size=(3, 3))(rates_input) rate = Activation('relu')(rate) rate = Conv2D(64, kernel_size=(3, 3))(rate) rate = Activation('relu')(rate) rate = MaxPooling2D(pool_size=(2, 2))(rate) rate = Dropout(0.25)(rate) rate = Dense(64, activation='relu')(rate) rate = Flatten()(rate) position_input = Input(shape=(self.input_position_size,), name='position_input') position = Dense(64)(position_input) main_input = Concatenate()([rate, position]) main_input = Dense(128)(main_input) hdn = Dense(32, activation='relu')(main_input) v = Dense(32, activation='relu')(hdn) v = Dense(1)(v) adv = Dense(32, activation='relu')(hdn) adv = Dense(self.output_size)(adv) model = Concatenate()([v, adv]) model = Lambda(lambda a: k.expand_dims(a[:, 0], -1) + a[:, 1:] - k.mean(a[:, 1:], axis=1, keepdims=True), output_shape=(self.output_size,))(model) self._model = Model(inputs=[rates_input, position_input], outputs=model) self.optimizer = Adam(lr=learning_rate) # 誤差を減らす学習方法はAdam self._model.compile(loss=huberloss, optimizer=self.optimizer) def input_data_format(self, lst): rates = [] positions = [] for s in lst: rate = np.reshape(s.rates.map, (self.input_rate_size, self.input_rate_size, 1)) rates.append(rate) positions.append(s.position) return rates, positions def predict(self, x): rates, positions = self.input_data_format(x) return self._model.predict([rates, positions]) def train_on_batch(self, x, y): rates, positions = self.input_data_format(x) y = np.reshape(y, [len(y), self.output_size]) return self._model.train_on_batch([rates, positions], y) def set_weights(self, w): return self._model.set_weights(w) def get_weights(self): return self._model.get_weights()
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['acc']) import time # 훈련할 때마다 time을 가져와서 초기화를 시켜줘야 합니다! start = time.time() history = model.fit(train_image, train_label, epochs=100, callbacks=callback_list, validation_data=(test_image, test_label)) time = time.time() - start print("테스트 시 소요 시간(초) : {}".format(time)) print("전체 파라미터 수 : {}".format( sum([arr.flatten().shape[0] for arr in model.get_weights()]))) # 모델 훈련이 잘 되었는지 그래프로 확입힙니다. acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(acc)) plt.plot(epochs, acc, 'bo', label='Training acc') plt.plot(epochs, val_acc, 'b', label='Validation acc') plt.title('Training and Validation accuracy') plt.legend() plt.figure()
dx = 2 dy = 3 dout = 4 mask_value = -1 X = np.random.randint(5, size=(n_samples, dx, dy)) X[1, 0, :] = mask_value inp = Input(shape=(dx, dy)) x = Masking(mask_value=-1.0)(inp) lstm = LSTM(dout, return_sequences=True)(x) model_1 = Model(inputs=inp, outputs=lstm) model_1.summary() model_1.set_weights( [np.ones(l.shape) * i for i, l in enumerate(model_1.get_weights(), 2)] ) model_1.compile(optimizer="rmsprop", loss="mae") y_true = np.ones((n_samples, dx, model_1.layers[2].output_shape[-1])) y_pred_1 = model_1.predict(X) print(y_pred_1) unmasked_loss = np.abs(1 - y_pred_1).mean() masked_loss = np.abs(1 - y_pred_1[y_pred_1 != 0.0]).mean() keras_loss = model_1.evaluate(X, y_true, verbose=0) print(f"unmasked loss: {unmasked_loss}") print(f"masked loss: {masked_loss}") print(f"evaluate with Keras: {keras_loss}") bilstm = Bidirectional(LSTM(4, return_sequences=True), merge_mode="concat")(x) model_2 = Model(inputs=inp, outputs=bilstm) model_2.summary()