def build_reconstruction_adversarial_model(self): # Create adversarial graph inputs_discriminator_graph = keras.Input(shape=(64, 64, 3)) output = self.discriminator_graph(inputs_discriminator_graph) self.model_discriminator_graph = keras.Model( inputs_discriminator_graph, output) self.model_discriminator_graph.compile( loss='binary_crossentropy', optimizer=optimizers.Adam(lr=Config.learning_rate), loss_weights=[0.001]) # Input to the whole model inputs = keras.Input(shape=self.data_shape) # Reconstruction reconstruction_output = self.encoder_decoder_graph(inputs) adversarial_output = self.discriminator_graph(reconstruction_output) self.model = keras.Model( inputs, outputs=[reconstruction_output, adversarial_output]) self.model.compile( optimizer=optimizers.Adam(lr=Config.learning_rate), loss=[self.reconstruction_loss, 'binary_crossentropy'], loss_weights=[0.999, 0.001])
def compile_keras_model(model, optimizer, learning_rate): ''' Method that takes in input a model and compile it using the specific optimizer. The model is compiled with sparse_categorical_crossentropy as loss function. :param model: the tensorflow.keras.model implemented. :param optimizer: there are several options: 1)sgd ; 2)Adam( I used principally for the training) 3) adadelta :param learning_rate: float that indicates the learning rate to use with the optimizer. :return: the model compiled. ''' if (str.lower(optimizer) == "sgd"): opt = optimizers.SGD(lr=learning_rate, clipnorm=0.1, momentum=0.95, nesterov=True) elif (str.lower(optimizer) == "adam"): opt = optimizers.Adam(lr=learning_rate) elif (str.lower(optimizer) == "adadelta"): opt = optimizers.adadelta(lr=learning_rate) sess = K.get_session() init = tf.global_variables_initializer() sess.run(init) model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=["acc"]) return model
def train_cnn_model(emb_layer, x_train, y_train, x_val, y_val, opt): model = CNN(embedding_layer=emb_layer, num_words=opt.n_words, embedding_dim=opt.embed_dim, filter_sizes=opt.cnn_filter_shapes, feature_maps=opt.filter_sizes, max_seq_length=opt.sent_len, dropout_rate=opt.dropout_ratio, hidden_units=200, nb_classes=2).build_model() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(), metrics=['accuracy']) # y_train = y_train.reshape(-1, 1) # model = build_model(emb_layer, opt) print(model.summary()) early_stopping = EarlyStopping(monitor='val_loss', patience=2) history = model.fit(x_train, y_train, epochs=opt.cnn_epoch, batch_size=opt.batch_size, verbose=1, validation_data=(x_val, y_val), callbacks=[early_stopping]) with open("CNN_train_history.txt", "w") as f: print(history.history, file=f) return model
def nn(self): nn = Sequential() nn.add( Convolution2D(self.filtroprimeravez, self.filtrouno, padding="same", input_shape=(self.longituddelaimagen, self.alturadelaimagen, 3), activation='relu')) nn.add(MaxPooling2D(pool_size=self.pulido)) nn.add( Convolution2D(self.filtrosegundavez, self.filtrodos, padding="same")) nn.add(MaxPooling2D(pool_size=self.pulido)) nn.add(Flatten()) nn.add(Dense(256, activation='relu')) nn.add(Dropout(0.1)) nn.add(Dense(self.numerodenfermedades, activation='softmax')) nn.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.lr), metrics=['accuracy']) nn.fit_generator(self.entrenamiento_generador, steps_per_epoch=self.pasos, epochs=self.pruebas, validation_data=self.validacion_generador, validation_steps=self.validacon) #nn.save('./modelo_lab_experimental/modelo_pezenfermo.h5') #nn.save_weights('./modelo_lab_experimental/pesospezenfermo.h5') return nn
def instantiate_model(self): self.model = create_model_resnet( self.input_shape, n_output=self.n_output, normalize=self.normalize, kernel_shape=self.kernel_shape, size_blocks=self.size_blocks, resnet=self.resnet, dropout=self.dropout, n_channels_by_block=self.n_channels_by_block, size_dense=self.size_dense, average_pooling=self.average_pooling, separable_conv=self.separable_conv) print(self.model.summary()) self.optimizer = optimizers.Adamax(lr=self.lr) if self.optimizer == 'adamax' \ else optimizers.RMSprop(lr=self.lr) if self.optimizer == 'rmsprop' \ else optimizers.SGD(lr=self.lr, momentum=.9) if self.optimizer == 'sgd' \ else optimizers.Adam(lr=self.lr) if self.optimizer == 'adam' else None if self.zoom: self.datagen = ImageDataGenerator(rotation_range=10, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.1, horizontal_flip=True, fill_mode='nearest') elif self.shift: self.datagen = ImageDataGenerator(width_shift_range=0.1, height_shift_range=0.1, fill_mode='nearest') elif self.flip: self.datagen = ImageDataGenerator(horizontal_flip=bool(self.flip)) else: self.datagen = None
def entrenamiento_red2(): cnn = Sequential() cnn.add(Convolution2D(filtrosConv1, tamanio_filtro1, padding='same', input_shape=(altura, longitud, 3), activation='relu')) cnn.add(MaxPooling2D(pool_size=tamanio_pool)) cnn.add(Convolution2D(filtrosConv2, tamanio_filtro2, padding='same', activation='relu')) cnn.add(MaxPooling2D(pool_size=tamanio_pool)) #Teniendo una imagen con muchas capas, ahora la vamos a aplanar cnn.add(Flatten()) #Despues de aplanar las imagenes, se conectan las capas cnn.add(Dense(256,activation='relu')) #A la capa densa, se le van a ir apagando el 50% de las neuronas con cada paso, #esto se hace para evitar el sobre-ajuste cnn.add(Dropout(0.5)) #Se hace la conexion con la capa de salida cnn.add(Dense(clases, activation='softmax')) #Parametros para optimizar el algoritmo cnn.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=lr), metrics=['accuracy']) cnn.fit_generator(imagen_entrenamiento_red2, steps_per_epoch=pasos, epochs=epocas, validation_data=imagen_validacion_red2, validation_steps=pasos_validacion) dir='./modelo/red2/' if not os.path.exists(dir): os.mkdir(dir) cnn.save('./modelo/red2/modelo.h5') cnn.save_weights('./modelo/red2/pesos.h5')
def _get_keras_model(self) -> models.Model: I = layers.Input(shape=(KerasCNNModel.MAX_SEQUENCE_LENGTH, 300), dtype='float32', name='comment_text') # Convolutional Layers X = I for filter_size in self.hparams().filter_sizes: X = layers.Conv1D(self.hparams().num_filters, filter_size, activation='relu', padding='same')(X) X = layers.GlobalAveragePooling1D()(X) # Dense for num_units in self.hparams().dense_units: X = layers.Dense(num_units, activation='relu')(X) X = layers.Dropout(self.hparams().dropout_rate)(X) # Outputs outputs = [] for label in self._labels: outputs.append( layers.Dense(1, activation='sigmoid', name=label)(X)) model = models.Model(inputs=I, outputs=outputs) model.compile( optimizer=optimizers.Adam(lr=self.hparams().learning_rate), loss='binary_crossentropy', metrics=['binary_accuracy', super().roc_auc]) tf.logging.info(model.summary()) return model
def __init__(self, number_of_classes, maxlen): tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # leak_partial = partial(tf.nn.leaky_relu, alpha=0.2) dropout_rate = 0.5 base = wrapped_partial(tf.nn.leaky_relu, alpha=0.2) # print(base.func) input_shape = (maxlen, ) target_shape = (maxlen, 1) self.model_scheme = [ Reshape(input_shape=(input_shape), target_shape=(maxlen, 1)), Conv1D(128, kernel_size=2, strides=1, activation=base, kernel_regularizer='l1'), # to indentify important features in the samples MaxPooling1D(pool_size=2), # to convert ndarray to 1D array Flatten(), # Dense - A linear operation in which every input is connected to every output by a weight (so there are n_inputs * n_outputs weights - which can be a lot!). Generally followed by a non-linear activation function Dense(64, activation=base), BatchNormalization(), Dropout(dropout_rate), Dense(number_of_classes, activation=tf.nn.softmax) ] # sequential is a sequence of layers self.__model = tf.keras.Sequential(self.model_scheme) self.__model.compile( optimizer=optimizers.Adam(lr=0.0001), loss=losses.categorical_crossentropy, metrics=['accuracy'], )
def dense_train(space): ''' train lightgbm booster based on training / validaton set -> give predictions of Y ''' params = space.copy() input_shape = (X_train.shape[-1], ) # input shape depends on x_fields used input_img = Input(shape=input_shape) init_nodes = params['init_nodes'] # fisrt dense layer - number of nodes nodes_mult = params['nodes_mult'] # nodes growth rate mult_freq = params['mult_freq'] # grow every X layer mult_start = params['mult_start'] # grow from X layer end_nodes = params['end_nodes'] # maximum number of nodes if params['num_Dense_layer'] < 4: params['init_nodes'] = init_nodes = 16 d_1 = Dense(init_nodes, activation=params['activation'])( input_img) # remove kernel_regularizer=regularizers.l1(params['l1']) d_1 = Dropout(params['dropout'])(d_1) for i in range(1, params['num_Dense_layer']): temp_nodes = int( min( init_nodes * (2**(nodes_mult * max( (i - mult_start + 3) // mult_freq, 0))), end_nodes)) d_1 = Dense(temp_nodes, activation=params['activation'])(d_1) if i != params[ 'num_Dense_layer'] - 1: # last dense layer has no dropout d_1 = Dropout(params['dropout'])(d_1) f_x = Dense(1)(d_1) callbacks_list = [ callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10), callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='auto') ] # add callbacks lr_val = 10**-int(params['learning_rate']) adam = optimizers.Adam(lr=lr_val) model = Model(input_img, f_x) model.compile(adam, loss='mae') model.summary() history = model.fit(X_train, Y_train, epochs=50, batch_size=params['batch_size'], validation_data=(X_valid, Y_valid), callbacks=callbacks_list, verbose=1) Y_test_pred = model.predict(X_test) Y_train_pred = model.predict(X_train) Y_valid_pred = model.predict(X_valid) return Y_test_pred, Y_train_pred, Y_valid_pred, history
def get_model(lambda_centerloss, input_shape=(192, 192, 3), num_classes=8, regularizer=None, lr=0.01): image = Input(shape=input_shape) label = Input(shape=(num_classes, )) X = low_level_edge_detector(image) X = dsrc_module(X, num_filters=16, module_name='a') X = dsrc_module(X, num_filters=32, module_name='b') X = dsrc_module(X, num_filters=64, module_name='c') X = dsrc_module(X, num_filters=128, module_name='d') X = dsrc_module(X, num_filters=256, module_name='e') X = dsrc_module(X, num_filters=512, module_name='f') X = Conv2D(filters=num_classes, kernel_size=(1, 1))(X) X = GlobalAveragePooling2D()(X) X = Flatten()(X) center_loss = CenterLossLayer(alpha=0.5, name='centerlosslayer')([X, label]) if regularizer is None: X = Dense(num_classes, name='fc' + str(num_classes))(X) else: X = Dense(num_classes, name='fc' + str(num_classes), kernel_regularizer=l2(regularizer))(X) predicted_class = Activation('softmax', name='prediction')(X) model = Model(inputs=[image, label], outputs=[predicted_class, center_loss]) model.compile(optimizer=optimizers.Adam(lr=lr), loss=[losses.categorical_crossentropy, zero_loss], loss_weights=[1, lambda_centerloss], metrics=['accuracy']) return model
def keras_model4(num_classes, input_dim): nn_deep_model = OverwrittenSequentialClassifier() nn_deep_model.add(Dense(5000, input_dim=input_dim, activation='relu')) nn_deep_model.add(Dense(4500, activation='relu')) nn_deep_model.add(Dense(4000, activation='relu')) nn_deep_model.add(Dropout(0.5)) nn_deep_model.add(Dense(3500, activation='relu')) nn_deep_model.add(Dense(3000, activation='relu')) nn_deep_model.add(Dense(2500, activation='relu')) nn_deep_model.add(Dropout(0.5)) nn_deep_model.add(Dense(2000, activation='relu')) nn_deep_model.add(Dense(1500, activation='relu')) nn_deep_model.add(Dense(1000, activation='relu')) nn_deep_model.add(Dropout(0.5)) nn_deep_model.add(Dense(500, activation='relu')) nn_deep_model.add(Dense(250, activation='relu')) nn_deep_model.add(Dense(num_classes, activation='softmax')) model_optimizer = optimizers.Adam(lr=0.001) nn_deep_model.compile(loss='mean_squared_error', optimizer=model_optimizer, metrics=['accuracy']) return nn_deep_model
def build_bigru_model(self, embedding_matrix) -> Tuple[Model, Model]: """ build and return multi-headed BiGru model with 1) MLM output from first GRU layer 2) standard toxicity classification output from second :param embedding_matrix: :return: """ training_model, inference_model = transformer_bert_model( use_universal_transformer=True, max_seq_length=self.max_seq_len, vocabulary_size=self.vocab_size + 1, word_embedding_size=self.embedding_dims, transformer_depth=5, num_heads=8) training_model.compile( optimizers.Adam(lr=self.learning_rate, beta_1=0.9, beta_2=0.999), loss={ 'main_output': losses.binary_crossentropy, 'aux_output': MaskedPenalizedSparseCategoricalCrossentropy( CONFIDENCE_PENALTY) }) print('generated bigru model...') # print(training_model.summary()) return training_model, inference_model
def __init__(self, conf): self.conf = conf self.hps = self.conf['hps'] self.nn_arch = self.conf['nn_arch'] self.model_loading = self.conf['model_loading'] if self.model_loading: self.digit_classificaton_model = load_model(self.MODEL_PATH, custom_objects={'RBM': RBM}) self.digit_classificaton_model.summary() self.rbm = self.digit_classificaton_model.get_layer('rbm_1') else: # Design the model. input_image = Input(shape=(self.IMAGE_SIZE,)) x = Lambda(lambda x: x/255)(input_image) # RBM layer. self.rbm = RBM(self.conf['rbm_hps'], self.nn_arch['output_dim'], name='rbm') # Name? x = self.rbm(x) #? # Softmax layer. output = Dense(10, activation='softmax')(x) # Create a model. self.digit_classificaton_model = Model(inputs=[input_image], outputs=[output]) opt = optimizers.Adam(lr=self.hps['lr'] , beta_1=self.hps['beta_1'] , beta_2=self.hps['beta_2'] , decay=self.hps['decay']) self.digit_classificaton_model.compile(optimizer=opt, loss='categorical_crossentropy') self.digit_classificaton_model.summary()
def testNumericEquivalenceForAmsgrad(self): np.random.seed(1331) with self.cached_session(): train_samples = 20 input_dim = 3 num_classes = 2 (x, y), _ = testing_utils.get_test_data( train_samples=train_samples, test_samples=10, input_shape=(input_dim,), num_classes=num_classes) y = keras.utils.to_categorical(y) num_hidden = 5 model_k_v1 = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_k_v2 = testing_utils.get_small_sequential_mlp( num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim) model_k_v2.set_weights(model_k_v1.get_weights()) opt_k_v1 = optimizers.Adam(amsgrad=True) opt_k_v2 = adam.Adam(amsgrad=True) model_k_v1.compile(opt_k_v1, loss='categorical_crossentropy', metrics=[]) model_k_v2.compile(opt_k_v2, loss='categorical_crossentropy', metrics=[]) hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False) hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False) self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights()) self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights()) self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])
def build_bigru_model(self, embedding_matrix) -> Tuple[Model, Model]: """ build and return multi-headed BiGru model with 1) MLM output from first GRU layer 2) standard toxicity classification output from second :param embedding_matrix: :return: """ token_input = layers.Input(shape=(self.max_seq_len,)) embedding_layer = layers.Embedding(self.vocab_size + 1, self.embedding_dims, weights=[embedding_matrix], trainable=False) embedded_input = embedding_layer(token_input) gru1_output = layers.Bidirectional(layers.CuDNNGRU(self.num_neurons, return_sequences=True))(embedded_input) aux_output = layers.Dense(self.vocab_size + 1, 'softmax', name='aux_output')(gru1_output) gru2_output = layers.Bidirectional(layers.CuDNNGRU(self.num_neurons))(gru1_output) main_output = layers.Dense(6, activation='sigmoid', name='main_output')(gru2_output) training_model = Model(inputs=token_input, outputs=[main_output, aux_output]) mlm_loss = MaskedPenalizedSparseCategoricalCrossentropy(CONFIDENCE_PENALTY) training_model.compile(optimizer=optimizers.Adam(), loss={'main_output': MaskedBinaryCrossedentropy(), 'aux_output': mlm_loss}) inference_model = Model(inputs=token_input, outputs=main_output) print('generated bigru model...') print(training_model.summary()) return training_model, inference_model
def get_opt(): if opt == 'adam': return optimizers.Adam(lr=lr, clipnorm=1.) elif opt == 'rmsprop': return optimizers.RMSprop(lr=lr, clipnorm=1.) else: raise Exception('Only Adam and RMSProp are available here')
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) lstm_1 = Bidirectional(CuDNNLSTM(64, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def get_rnn_model(window_size, features, pred_length): inputs = Input(shape=(window_size, features)) x = CuDNNGRU(128, kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01), return_sequences=True, input_shape=(window_size, features))(inputs) x = BatchNormalization()(x) x = Dropout(0.5)(x) x = CuDNNGRU(128, kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01))(x) x = BatchNormalization()(x) x = Dropout(0.5)(x) x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01))(x) x = BatchNormalization()(x) x = Dropout(0.5)(x) preds = Dense(pred_length, activation='linear', kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01))(x) model = Model(inputs=inputs, outputs=preds) optimer = optimizers.Adam(lr=0.001) model.compile(optimizer=optimer, loss='mse', metrics=['mae']) return model
def __init__(self, hidden_state_size, entropy_reg=0.05): # single feedforward layer with sigmoid function self.model = Sequential([ Dense( 1, input_shape=(hidden_state_size, ), # kernel_initializer='random_uniform', # TODO or maybe random_normal kernel_initializer= 'random_normal', # TODO or maybe random_normal activity_regularizer=regularizers.l1(entropy_reg)), Activation('sigmoid') ]) # Accuracy is not the right measure for your model's performance. What you are trying to do here is more of a # regression task than a classification task. The same can be seen from your loss function, you are using # 'mean_squared_error' rather than something like 'categorical_crossentropy'. optimizer = optimizers.Adam() # lr=learning_rate 0.001 by default which is fine self.model.compile( optimizer=optimizer, loss= 'binary_crossentropy' # TODO these are random, needs to be checked # metrics=['accuracy'] )
def build_bigru_model(self, embedding_matrix) -> Model: """ build and return BiGru model using standard optimizer and loss :param embedding_matrix: :return: """ token_input = layers.Input(shape=(self.max_seq_len, )) embedding_layer = layers.Embedding(self.vocab_size + 1, self.embedding_dims, weights=[embedding_matrix], trainable=False) embedded_input = embedding_layer(token_input) gru_output = layers.Bidirectional( layers.CuDNNGRU(self.num_neurons, return_sequences=True))(embedded_input) gru_output = layers.Bidirectional(layers.CuDNNGRU( self.num_neurons))(gru_output) dense_output = layers.Dense(6, activation='sigmoid')(gru_output) bigru_model = Model(token_input, dense_output) bigru_model.compile(optimizer=optimizers.Adam(), loss=losses.binary_crossentropy) print('generated bigru model...') return bigru_model
def run(): hp = HyperParameter() # data load and process (x_train, y_train), (x_test, y_test) = datasets.imdb.load_data(num_words=hp.total_words) # x_train:[b, 80] # x_test:[b, 80] x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=hp.max_sentence_len) x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=hp.max_sentence_len) hidden_units = 64 model = MyRNN(hidden_units) model.compile(optimizer=optimizers.Adam(lr=hp.learning_rate), loss=losses.binary_crossentropy, metrics=['accuracy']) model.fit(x_train, y_train, batch_size=hp.batch_sz, epochs=hp.epochs) score = model.evaluate(x_test, y_test) print('test loss: ', score[0]) print('test accuracy: ', score[1])
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=-1)(inputs) x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True), merge_mode='concat')(inputs) # activation_1 = Activation('tanh')(lstm_1) x = SpatialDropout1D(0.1)(x) x = Attention(8, 16)([x, x, x]) x1 = GlobalMaxPool1D()(x) x2 = GlobalAvgPool1D()(x) x = Concatenate(axis=-1)([x1, x2]) x = Dense(units=128, activation='elu')(x) x = Dense(units=64, activation='elu')(x) x = Dropout(rate=0.4)(x) outputs = Dense(units=num_classes, activation='softmax')(x) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def fit(self, trdst, valdst, nb_epochs, steps_per_epoch, batch_size=100, use_wn=False): opt = AdamWithWeightnorm() if use_wn else optimizers.Adam() self.model.compile(optimizer=opt, loss='mse', metrics=[psnr_tf]) log_dir = os.path.join(self.log_dir, self.model_name) callback_list = [ callbacks.ModelCheckpoint(self.weights_path, save_best_only=False, save_weights_only=True, verbose=1), callbacks.LearningRateScheduler( lambda e: self.lr_schedule(e, nb_epochs), verbose=0), callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True) ] print('Training model : %s' % (self.model_name)) self.model.fit( x=trdst.batch(batch_size).prefetch(AUTOTUNE), epochs=nb_epochs, callbacks=callback_list, validation_data=valdst.batch(batch_size).prefetch(AUTOTUNE), steps_per_epoch=steps_per_epoch, verbose=1) return self
def train(self): Xin_train, Xout_train = self.load_dataset(self._training_dataset_path) model = self.create_model(Xin_train, Xout_train, self._lstm_units) optimizer = optimizers.Adam(lr=self._learning_rate) model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['accuracy']) hist = model.fit(Xin_train, Xout_train, batch_size=self._batch_size, verbose=1, epochs=self._epochs, validation_split=self._validation_split) model.save(self._model_params_path)
def __init__(self): self.DATASET = 'restaurant' # 'twitter', 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 100 self.LEARNING_RATE = 0.01 self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'recurrent_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'bias_initializer': initializers.RandomUniform(minval=-0.003, maxval=0.003), 'kernel_regularizer': regularizers.l2(0.001), 'recurrent_regularizer': regularizers.l2(0.001), 'bias_regularizer': regularizers.l2(0.001), 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 80 self.MAX_ASPECT_LENGTH = 2 self.BATCH_SIZE = 200 self.ITERATION = 500 self.texts_raw_indices, self.texts_left_indices, self.aspects_indices, self.texts_right_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) self.left_input = np.concatenate((self.texts_left_indices, self.aspects_indices), axis=1) self.right_input = np.concatenate((self.texts_right_indices, self.aspects_indices), axis=1) if os.path.exists('td_lstm_saved_model.h5'): print('loading saved model...') self.model = load_model('td_lstm_saved_model.h5') else: print('Build model...') inputs_l = Input(shape=(self.MAX_SEQUENCE_LENGTH + self.MAX_ASPECT_LENGTH,)) inputs_r = Input(shape=(self.MAX_SEQUENCE_LENGTH + self.MAX_ASPECT_LENGTH,)) Embedding_Layer = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH + self.MAX_ASPECT_LENGTH, weights=[self.embedding_matrix], trainable=False) x_l = Embedding_Layer(inputs_l) x_r = Embedding_Layer(inputs_r) x_l = LSTM(**self.LSTM_PARAMS)(x_l) x_r = LSTM(**self.LSTM_PARAMS, go_backwards=True)(x_r) x = Concatenate()([x_l, x_r]) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_l, inputs_r], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc']) # plot_model(model, to_file='model.png') self.model = model
def __init__(self): ######################### Tunable Parameters ################################ # General self.img_shape = (None, 128, 128, 3 ) # (None, 512, 512, 3) ## Image Shape self.n_styles = 4 # 50 ## Number of styles in the bank self.n_content = 1000 ## Number of content images self.N_steps = 300000 ## Total number of training steps self.T = 2 ## Number of consecutive steps for training styles before training the AutoEncoder self.print_iter = 100 ## Log output self.Batch_Size = 4 ## Batch size self.Use_Batch_Norm = True ## Use batch normalization instead of instance normalization # LR self.LR_Initial = 0.01 ## Initial ADAM learning rate self.LR_Current = self.LR_Initial ## For logging self.LR_Decay = 0.8 ## LR decay self.LR_Update_Every = self.N_steps / 10 ## LR decay period # Loss self.Optimizer = optimizers.Adam( lr=self.LR_Initial) ## Optimizer for both branches self.LossAlpha = 0.025 # Content weight self.LossBeta = 1.2 # Style weight self.LossGamma = 1.0 # Total Variation weight ######################### \Tunable Parameters ################################ self.StyleNetLoss = {k: None for k in range(self.n_styles)} self.StyleNetContentLoss = {k: None for k in range(self.n_styles)} self.StyleNetStyleLoss = {k: None for k in range(self.n_styles)} # Data self.Content_DB = None self.Style_DB = None self.Content_DB_path = './DB/content/' self.Style_DB_path = './DB/style/' self.Content_DB_list = glob(self.Content_DB_path + '*') self.Style_DB_list = glob(self.Style_DB_path + '*') # VGG self.VGG16 = None # auto-encoder self.encoder = None self.decoder = None # style bank self.style_bank = {k: None for k in range(self.n_styles)} self.StyleNet = {k: None for k in range(self.n_styles)} self.AutoEncoderNet = None # inputs - content and one for style self.KinputContent = None self.KinputStyle = None self.tfStyleIndices = None self.TensorBoardStyleNet = {k: None for k in range(self.n_styles)} self.TensorBoardAutoEncoder = None
def __init__(self): self.DATASET = 'twitter' # 'twitter', 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 100 self.LEARNING_RATE = 0.01 self.INITIALIZER = initializers.RandomUniform(minval=-0.003, maxval=0.003) self.REGULARIZER = regularizers.l2(0.001) self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': self.INITIALIZER, 'recurrent_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'recurrent_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 80 self.MAX_ASPECT_LENGTH = 10 self.BATCH_SIZE = 200 self.EPOCHS = 100 self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('lstm_saved_model.h5'): print('loading saved model...') self.model = load_model('lstm_saved_model.h5') else: print('Build model...') inputs = Input(shape=(self.MAX_SEQUENCE_LENGTH, )) x = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH, weights=[self.embedding_matrix], trainable=False)(inputs) x = LSTM(**self.LSTM_PARAMS)(x) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs, predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc', f1]) # plot_model(model, to_file='model.png') self.model = model
def testSetWeightsFromV1AdamWithoutMinimize(self): keras_v1_adam = optimizers.Adam() keras_v2_adam = adam.Adam() keras_v2_adam.set_weights(keras_v1_adam.get_weights()) keras_v1_iteration = keras_v1_adam.iterations keras_v2_iteration = keras_v2_adam.iterations self.evaluate(variables.global_variables_initializer()) self.assertEqual(self.evaluate(keras_v1_iteration), self.evaluate(keras_v2_iteration))
def build_reconstruction_model(self, loss): inputs = keras.Input(shape=self.data_shape) outputs = self.encoder_decoder_graph(inputs) self.model = keras.Model(inputs, outputs) self.model.compile( optimizer=optimizers.Adam(lr=Config.learning_rate), loss=loss, )
def getAdaptationModel(modelPath, adaptationVersion, features, seqLen): fineTuneModel = load_model(modelPath) # Test optimizer's state: #print(fineTuneModel.optimizer.get_config()) #print(dir(fineTuneModel.optimizer)) #print(fineTuneModel.optimizer.lr) fineTuneModel.get_layer('td').trainable = True if adaptationVersion == 2: fineTuneModel.get_layer('td').activation = 'relu' fineTuneModel.get_layer('rnn').trainable = False if fineTuneModel.get_layer('rnn_2nd_layer') != None: fineTuneModel.get_layer('rnn_2nd_layer').trainable = False fineTuneModel.get_layer('nn').trainable = False fineTuneModel.get_layer('nn_dropout').trainable = False fineTuneModel.get_layer('output_softmax').trainable = False if adaptationVersion == 3: fineTuneModel.get_layer('td').activation = 'relu' fineTuneModel.name = "existingModel" newModel = Sequential() newModel.add(TimeDistributed(Dense(features, kernel_initializer='identity', bias_initializer='zeros', activation='relu'), input_shape=(seqLen, features), name='td0', trainable=True)) newModel.add(fineTuneModel) fineTuneModel = newModel if adaptationVersion == 4: # initializer does not work with this initializer cause it is not square fineTuneModel.get_layer('td').activation = 'relu' fineTuneModel.name = "existingModel" newModel = Sequential() newModel.add(TimeDistributed(Dense(10*features, kernel_initializer='identity', bias_initializer='zeros', activation='relu'), input_shape=(seqLen, features), name='td0', trainable=True)) newModel.add(fineTuneModel) fineTuneModel = newModel if onTpu: multiFineTuneModel.compile(loss="categorical_crossentropy", optimizer=tf.train.AdamOptimizer(lr=0.001), metrics=["accuracy"]) multiFineTuneModel = toTpuModel(fineTuneModel) else: multiFineTuneModel = toMultiGpuModel(fineTuneModel) multiFineTuneModel.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(lr=0.001), metrics=["accuracy"]) # Test optimizer's state: #print(fineTuneModel.optimizer.get_config()) #print(dir(fineTuneModel.optimizer)) #print(fineTuneModel.optimizer.lr) return fineTuneModel, multiFineTuneModel