def get_optimizer(config): if config.use_lr_scheduler: if config.lr_scheduler == "InverseTimeDecay": learning_rate_schedule = InverseTimeDecay( config.initial_lr, config.decay_steps, config.decay_rate ) elif config.lr_scheduler == "ExponentialDecay": learning_rate_schedule = ExponentialDecay( config.initial_lr, config.decay_steps, config.decay_rate ) elif config.lr_scheduler.lower() == "custom": if config.optimizer.lower() == "adam": optimizer = Adam(learning_rate=config.initial_lr) elif config.optimizer.lower() == "rmsprop": optimizer = RMSprop(learning_rate=config.initial_lr) else: raise Exception( """Please enter a supported optimizer: Adam or RMSprop.""" ) return optimizer else: raise Exception( """Please enter a supported learning rate scheduler: InverseTimeDecay or ExponentialDecay.""" ) if config.optimizer.lower() == "adam": optimizer = Adam(learning_rate_schedule) elif config.optimizer.lower() == "rmsprop": optimizer = RMSprop(learning_rate_schedule) else: raise Exception("""Please enter a supported optimizer: Adam or RMSprop.""") else: if config.optimizer.lower() == "adam": optimizer = Adam(learning_rate=config.lr) elif config.optimizer.lower() == "rmsprop": optimizer = RMSprop(learning_rate=config.lr) else: raise Exception("""Please enter a supported optimizer: Adam or RMSprop.""") return optimizer
def tabular_cnn(n_of_input_columns, n_of_output_classes): inputs = Input(shape=(n_of_input_columns, )) inputs_normalization = BatchNormalization()(inputs) inputs_feature_selection = Dropout(0.3)(inputs_normalization) x = Reshape((n_of_input_columns, 1))(inputs_feature_selection) x = Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')(x) x = Dropout(0.5)(x) x = Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')(x) x = MaxPooling1D(pool_size=5, padding='same')(x) x = Dense(256, activation='relu')(x) x = Dropout(0.5)(x) x = Flatten()(x) x = Dense(256, activation='relu')(x) output = Dense(n_of_output_classes, activation='softmax')(x) model = Model(inputs, output) lr_schedule = ExponentialDecay(initial_learning_rate=1e-5, decay_steps=1000, decay_rate=0.8) optimizer = SGD(learning_rate=lr_schedule) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['categorical_accuracy']) return model
def get_lr_scheduler(learning_rate, decay_type, decay_steps): if decay_type: decay_type = decay_type.lower() if decay_type == None: lr_scheduler = learning_rate elif decay_type == 'cosine': lr_scheduler = CosineDecay( initial_learning_rate=learning_rate, decay_steps=decay_steps, alpha=0.2) # use 0.2*learning_rate as final minimum learning rate elif decay_type == 'exponential': lr_scheduler = ExponentialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, decay_rate=0.9) elif decay_type == 'polynomial': lr_scheduler = PolynomialDecay(initial_learning_rate=learning_rate, decay_steps=decay_steps, end_learning_rate=learning_rate / 100) else: raise ValueError('Unsupported lr decay type') return lr_scheduler
def siamese_model_VGG(exp_values): embedding_length, fc1, fc2, feat1, feat2, feat3, feat4, feat5 = exp_values imgA = Input(shape=(64, 64, 3)) imgB = Input(shape=(64, 64, 3)) base_model = VGG_16(embedding_length, fc1, fc2, feat1, feat2, feat3, feat4, feat5, input_shape=(64, 64, 3)) featsA = base_model(imgA) featsB = base_model(imgB) distance = Lambda(euclidean_distance, name='compute_ED')([featsA, featsB]) model = Model(inputs=[imgA, imgB], outputs=distance, name='Contrastive') # model.load_weights('saved_models/checkpoints/' + model_name) initial_learning_rate = 0.001 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=781, decay_rate=0.90, staircase=True) model.compile(loss=contrastive_loss, optimizer=keras.optimizers.Adam(lr_schedule), metrics=[accuracy1]) model.summary() return model
len(scan_validate)).map(validation_prepare).batch(batch_size).prefetch(2)) # data = train_data.take(1) # images, labels = list(data)[0] # images = images.numpy() # image = images[0] # print(images.shape) # (2, 128, 128, 64, 1) # print(image.shape) # (128, 128, 64, 1) # print(image[:, :, 30].shape) # (128, 128, 1) ''' Build model ''' model = get_model(width=128, height=128, depth=64) # model.summary() ''' Compile model ''' initial_lr = 0.0001 lr_schedule = ExponentialDecay(initial_learning_rate=initial_lr, decay_steps=100000, decay_rate=0.96, staircase=True) model.compile(optimizer=Adam(learning_rate=lr_schedule), loss="binary_crossentropy", metrics=['accuracy']) ''' Define callbacks ''' checkpoint_callback = ModelCheckpoint(filepath='3d_image_classification.h5', monitor='val_accuracy', save_best_only=True) early_stop_callback = EarlyStopping(monitor='val_accuracy', patience=15) ''' Train the model, doing validation at the end of each epoch ''' epoch = 50 model.fit(train_data, epochs=epoch,
denseLength = 8 # Layer growth factor denseGrowth = 0.85 # Dropout for additional regularization. dropout_val = 0.10 # Activation Functions # Non output activation activation_function = 'relu' # Output activation. output_activation = 'linear' # Learning rate decay to prevent wild oscillations when closer to minima. learningRateScheduler = ExponentialDecay(learning_rate, decay_steps=300, decay_rate=0.9, staircase=True) ###### !!!!!!!!!!!!!!! DO NOT EDIT UNLESS YOU KNOW WHAT TO DO !!!!!!!!!!!!!!! ###### model = build_model_FCNN(X_train.shape, outputSize, denseWidth, denseLength, denseGrowth, dropout_val, activation_function, output_activation) model = compile_model(model, learningRateScheduler, momentum, loss_function, metrics) model.summary() print("Complete. Training Commencing.") model_history = model.fit(X_train, y_train,
def get_lr_sched(lr): return ExponentialDecay(lr, config.decay_steps, config.decay_rate)
def get_Lemaire_MTL_model( TR_STEPS, N_MELS=120, n_classes=3, patch_size=68, loss_weights=None, #{'S': 1.0, 'M': 1.0, 'R': 1.0, '3C': 1.0}, ): ''' MTL modification of the TCN based model architecture proposed by Lemaire et al. [3] Code source: https://github.com/qlemaire22/speech-music-detection The model parameters are tuned on the MUSAN dataset. [3] Lemaire, Q., & Holzapfel, A. (2019). Temporal convolutional networks for speech and music detection in radio broadcast. In 20th International Society for Music Information Retrieval Conference, ISMIR 2019, 4-8 November 2019. International Society for Music Information Retrieval. Parameters ---------- TR_STEPS : int Number of training batches per epoch. N_MELS : int, optional The default is 120. n_classes : int, optional The default is 3. patch_size : int, optional The default is 68. loss_weights : dict, optional The default is {'S': 1.0, 'M': 1.0, 'R': 1.0, '3C': 1.0}. Returns ------- model : tensorflow.keras.models.Model CNN model. lr : float Learning rate. ''' from tcn import TCN from tcn.tcn import process_dilations kernel_size = 3 # Temporal Conv, LogMelSpec Nd = 8 # Temporal Conv, LogMelSpec nb_stacks = 3 # Temporal Conv, LogMelSpec n_layers = 1 # Temporal Conv, LogMelSpec n_filters = 32 # Temporal Conv, LogMelSpec use_skip_connections = False # Temporal Conv, LogMelSpec activation = 'norm_relu' dilations = [2**nd for nd in range(Nd)] list_n_filters = [n_filters] * n_layers dropout_rate = np.random.uniform(0.05, 0.5) padding = 'same' dilations = process_dilations(dilations) input_layer = Input(shape=(patch_size, N_MELS)) for i in range(n_layers): if i == 0: x = TCN(list_n_filters[i], kernel_size, nb_stacks, dilations, activation, padding, use_skip_connections, dropout_rate, return_sequences=True)(input_layer) else: x = TCN(list_n_filters[i], kernel_size, nb_stacks, dilations, activation, padding, use_skip_connections, dropout_rate, return_sequences=True, name="tcn" + str(i))(x) x = Flatten()(x) classification_output = Dense(n_classes, activation='softmax', name='3C')(x) sp_output, x_sp, mu_output, x_mu, smr_output, x_smr = MTL_modifications(x) model = Model(input_layer, [sp_output, mu_output, smr_output, classification_output]) initial_learning_rate = 0.002 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=3 * TR_STEPS, decay_rate=0.1) optimizer = optimizers.SGD(learning_rate=lr_schedule, clipnorm=1, momentum=0.9) model.compile(loss={ 'S': 'binary_crossentropy', 'M': 'binary_crossentropy', 'R': 'mean_squared_error', '3C': 'categorical_crossentropy' }, loss_weights=loss_weights, optimizer=optimizer, metrics={'3C': 'accuracy'}) print(model.summary()) print( 'MTL modification of the architecture of Lemaire et. al. Proc. of the 20th ISMIR Conference, Delft, Netherlands, November 4-8, 2019\n' ) return model, initial_learning_rate
cnn = Sequential() cnn.add(Conv2D(filters=32, kernel_size=4, activation="relu", input_shape=[256, 256, 3])) cnn.add(MaxPool2D(pool_size=2, strides=2, padding='valid')) cnn.add(Dropout(0.2)) cnn.add(Conv2D(filters=32, kernel_size=4, activation="relu")) cnn.add(MaxPool2D(pool_size=2, strides=2, padding='valid')) cnn.add(Flatten()) cnn.add(Dense(units = 128, activation = 'relu')) cnn.add(Dense(units = 1, activation = 'sigmoid')) ### Optimizers tested opt = Adam(learning_rate=0.1) lr_schedule = ExponentialDecay( initial_learning_rate=1e-2, decay_steps=1000, decay_rate=0.9) opt2 = SGD(learning_rate=lr_schedule) cnn.compile(optimizer = opt, loss = 'binary_crossentropy', metrics = ['accuracy','Precision','Recall']) cnn.summary() batch_size = 64 cnn.fit_generator(training_set, steps_per_epoch = 880//batch_size, epochs = 4, validation_data = test_set, validation_steps = 220//batch_size) ### Save the model
def get_Lemaire_model( TR_STEPS, # kernel_size=3, # Temporal Conv, MelSpec # Nd=8, # Temporal Conv, MelSpec # nb_stacks=3, # Temporal Conv, MelSpec # n_layers=1, # Temporal Conv, MelSpec # n_filters=32, # Temporal Conv, MelSpec # use_skip_connections=True, # Temporal Conv, MelSpec # ---- kernel_size=3, # Temporal Conv, LogMelSpec Nd=8, # Temporal Conv, LogMelSpec nb_stacks=3, # Temporal Conv, LogMelSpec n_layers=1, # Temporal Conv, LogMelSpec n_filters=32, # Temporal Conv, LogMelSpec use_skip_connections=False, # Temporal Conv, LogMelSpec # ---- activation='norm_relu', bidirectional=True, N_MELS=80, n_classes=2, patch_size=68, ): ''' TCN based model architecture proposed by Lemaire et al. [3] Code source: https://github.com/qlemaire22/speech-music-detection Parameters ---------- TR_STEPS : int Number of training batches per epoch. n_filters : int, optional The default is 32. Nd : int, optional The default is 3. kernel_size : int, optional The default is 3. nb_stacks : int, optional The default is 10. activation : string, optional The default is 'norm_relu'. n_layers : int, optional The default is 3. use_skip_connections : boolean, optional The default is False. bidirectional : boolean, optional The default is True. N_MELS : int, optional The default is 80. n_classes : int, optional The default is 2. patch_size : int, optional The default is 68. Returns ------- model : tensorflow.keras.models.Model CNN model. lr : float Learning rate. ''' from tcn import TCN from tcn.tcn import process_dilations dilations = [2**nd for nd in range(Nd)] list_n_filters = [n_filters] * n_layers dropout_rate = np.random.uniform(0.05, 0.5) bidirectional = True if bidirectional: padding = 'same' else: padding = 'causal' dilations = process_dilations(dilations) input_layer = Input(shape=(patch_size, N_MELS)) for i in range(n_layers): if i == 0: x = TCN(list_n_filters[i], kernel_size, nb_stacks, dilations, 'norm_relu', padding, use_skip_connections, dropout_rate, return_sequences=True)(input_layer) else: x = TCN(list_n_filters[i], kernel_size, nb_stacks, dilations, 'norm_relu', padding, use_skip_connections, dropout_rate, return_sequences=True, name="tcn" + str(i))(x) x = Flatten()(x) x = Dense(n_classes)(x) x = Activation('softmax')(x) output_layer = x model = Model(input_layer, output_layer) initial_learning_rate = 0.002 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=3 * TR_STEPS, decay_rate=0.1) optimizer = optimizers.SGD(learning_rate=lr_schedule, clipnorm=1, momentum=0.9) if n_classes == 2: model.compile(loss='binary_crossentropy', metrics='accuracy', optimizer=optimizer) elif n_classes == 3: model.compile(loss='categorical_crossentropy', metrics='accuracy', optimizer=optimizer) print(model.summary()) print( 'Architecture of Lemaire et. al. Proc. of the 20th ISMIR Conference, Delft, Netherlands, November 4-8, 2019\n' ) return model, initial_learning_rate
def transfer_train( image_size: int = 128, batch_size: int = 32, epochs: int = 30, l2_regularization: float = 1e-5, architecture: str = "BiT-M R50x1", learning_rate: float = 1e-3, lr_decay_rate: float = 0.99, lr_decay_steps: int = 5e2, project_name: str = "Seedlings Image Classification (Transfer Learning)", ) -> RunResult: # Load data train_data = ImageDataset("train", TRAIN_DIR, target_size=(image_size, image_size)) # Add data augmentation to training set. train_data_gen = ImageDataGenerator( horizontal_flip=True, zoom_range=0.1).flow(train_data.X, train_data.y, batch_size=batch_size) dev_data = ImageDataset("dev", DEV_DIR, target_size=(image_size, image_size)) # Define and compile the model. model = tf.keras.Sequential([ hub.KerasLayer(model_map[architecture], trainable=False), L.Flatten(), L.Dense( train_data.n_classes, activation="softmax", kernel_regularizer=tf.keras.regularizers.l2(l2_regularization), ), ]) model.compile( optimizer=Adam( ExponentialDecay(learning_rate, lr_decay_steps, lr_decay_rate, staircase=True)), loss="categorical_crossentropy", metrics=["accuracy"], ) # Make and init the wandb run. wandb.init(project=project_name, reinit=True) wandb.config.update({ "batch_size": batch_size, "epochs": epochs, "image_size": image_size, "l2_regularization": l2_regularization, "architecture": architecture, "learning_rate": learning_rate, "lr_decay_rate": lr_decay_rate, "lr_decay_steps": lr_decay_steps, }) # Train the model model.fit( train_data_gen, steps_per_epoch=int(len(train_data.X) / batch_size), epochs=epochs, validation_data=(dev_data.X, dev_data.y), callbacks=[WandbCallback(save_model=False)], ) # Evaluate the model train_loss, train_acc = model.evaluate(train_data.X, train_data.y) dev_loss, dev_acc = model.evaluate(dev_data.X, dev_data.y) # Log the scores wandb.run.summary.update({ "final_val_loss": dev_loss, "final_val_acc": dev_acc, "final_train_loss": train_loss, "final_train_acc": train_acc, }) wandb.run.save() run_name = wandb.run.name wandb.join() # end this run return RunResult( dev_acc, train_acc, run_name, make_submission(model, image_size, train_data.index2class), )
def main(args): # load train/test data datadir = os.path.join(args.volumedir, args.datadir) # train = imdb_data_load(datadir) train, test = load_datasets(datadir) # train, test = load_context_target_pairs(datadir, context_len = args.conlength) # train = sorted(train, key=lambda a: len(a), reverse=True) # train = train[:min(len(train), args.datacap)] # for msg in train: # if "roster" in msg: # print(msg) # return # Dynamically load modelBuilder class moduleName, klassName = args.modelbuilder.split(".") mod = __import__('models.%s' % moduleName, fromlist=[klassName]) klass = getattr(mod, klassName) modelBuilder = klass(args) timestamp = int(time.time()) logdir = os.path.join(args.volumedir, datetime.datetime.today().strftime('%Y%m%d'), args.logdir) if not os.path.isdir(logdir): os.makedirs(logdir) hdlr = logging.FileHandler( os.path.join(logdir, "training_output_%d.log" % timestamp)) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(logging.INFO) checkpointdir = os.path.join(args.volumedir, datetime.datetime.today().strftime('%Y%m%d'), args.checkpointdir) if not os.path.isdir(checkpointdir): os.makedirs(checkpointdir) checkpointpath = configure_checkpointing(args, timestamp) checkpoint_callback = ModelCheckpoint(filepath=checkpointpath, save_weights_only=False) # Create or load existing model init_epoch = 0 if args.textlineds: X, Y, vocab, tokens = SlackTextLineDataset(args, train).get_dataset() reverse_token_map = {t: i for i, t in enumerate(vocab)} else: tokens, vocab, reverse_token_map = modelBuilder.tokenize( train, freq_threshold=args.freqthreshold) # text_ds = text_ds.shuffle(buffer_size=1024).batch(args.minibatchsize) # print(text_ds.cardinality().numpy()) if args.loadmodel and os.path.exists(args.loadmodel): modelpath = args.loadmodel timestamp = int(modelpath.split(".")[1]) init_epoch = int(modelpath.split(".")[2]) loaddir = "/".join(modelpath.split("/")[:-1]) model = load_model(modelpath, custom_objects={"EinsumOp": EinsumOp}) vocab = load_vocab(loaddir, timestamp) # tokens = load_tokens(loaddir, timestamp) reverse_token_map = {t: i for i, t in enumerate(vocab)} else: model = modelBuilder.create_model(vocab) save_vocab(vocab, checkpointdir, timestamp) if args.savetokens: save_tokens(tokens, checkpointdir, timestamp) plot_model(model, to_file='model_plot_2.png', show_shapes=True, show_layer_names=True) optimizer_map = {"adam": Adam, "rmsprop": RMSprop, "sgd": SGD} optimizer = optimizer_map[ args.optimizer] if args.optimizer in optimizer_map.keys() else RMSprop lr_decay = ExponentialDecay(initial_learning_rate=args.learningrate, decay_rate=args.decayrate, decay_steps=args.decaysteps) custom_lr = CustomSchedule(args.hiddensize) opt = optimizer(learning_rate=lr_decay, clipvalue=3) # model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"]) # attn_4_output = model.get_layer("attention_values_4").output # dense_v_out = model.get_layer("dense_v_4").output # einsum_com_output = model.get_layer("einsum_com_4").output # inpt = model.get_layer("input") # attn_factor_model = keras.Model(inputs=inpt.input, outputs=attn_4_output) # einsum_com_model = keras.Model(inputs=inpt.input, outputs=einsum_com_output) # dense_v_model = keras.Model(inputs=inpt.input, outputs=dense_v_out) model.compile( loss=keras.losses.SparseCategoricalCrossentropy(name="loss"), run_eagerly=True, optimizer=opt, metrics=[ tf.keras.metrics.SparseCategoricalAccuracy(name="accuracy"), tf.keras.metrics.SparseTopKCategoricalAccuracy( k=3, name="top_3_accuracy"), tf.keras.metrics.SparseTopKCategoricalAccuracy( k=5, name="top_5_accuracy"), last_word_prediction_accuracy(args.minibatchsize, args.seqlength) ]) # last_word_prediction_topk_accuracy(args.minibatchsize, args.seqlength, 5)]) model.summary(print_fn=logger.info) checkpointnames = args.checkpointnames % timestamp sample_func = lambda: modelBuilder.sample(model, tokens, vocab, reverse_token_map) callbacks = get_callbacks(args.volumedir, checkpointdir, checkpointnames, timestamp, sample_func) sample_callback = LambdaCallback( on_epoch_end=lambda epoch, logs: sample_func()) logger_callback = LambdaCallback(on_epoch_end=lambda epoch, logs: logger. info("Epoch %d: %s" % (epoch, str(logs)))) if not args.textlineds: trainseqs = modelBuilder.get_input_sequences(tokens, reverse_token_map) # trainseqs, valseqs = validation_split(seqs, val_split=args.valsplit) if args.modelbuilder == "keras_word_lm.WordLanguageModelBuilder": trainvectors = SequenceVectors(args, trainseqs, vocab) history = model.fit(trainvectors, epochs=args.numepochs, initial_epoch=init_epoch, callbacks=[ sample_callback, logger_callback, checkpoint_callback ]) logger.info(history.history) plot_history(history.history, args.learningrate, logdir, timestamp) return X, Y, sample_weights = modelBuilder.build_input_vectors( trainseqs, vocab, reverse_token_map) # ds = modelBuilder.build_input_vectors(trainseqs, vocab, reverse_token_map) # model.fit(X, Y, # print(ds) # start_prompt = "this movie is" # start_tokens = [reverse_token_map[t] for t in start_prompt.split()] # num_tokens_generated = 40 # text_gen_callback = TextGenerator(num_tokens_generated, args.seqlength, start_tokens, vocab) history = model.fit( X, Y, epochs=args.numepochs, initial_epoch=init_epoch, batch_size=args.minibatchsize, validation_split=0.1, shuffle=True, callbacks=[sample_callback, logger_callback, checkpoint_callback]) logger.info(history.history) plot_history(history.history, args.learningrate, logdir, timestamp) return allmetrics = {} for epoch in range(init_epoch, args.numepochs): batches = rand_mini_batches(trainseqs, args.minibatchsize) for i, batch in enumerate(batches): X, Y, sample_weights = modelBuilder.build_input_vectors( batch, vocab, reverse_token_map) metrics = model.train_on_batch(X, Y, sample_weight=sample_weights, reset_metrics=i == 0, return_dict=True) if i % 100 == 0: valmetrics = evaluate_mini_batches(model, modelBuilder, vocab, reverse_token_map, valseqs, args.minibatchsize) metrics.update(valmetrics) for key in metrics.keys(): if key in allmetrics.keys(): allmetrics[key] += [metrics[key]] else: allmetrics[key] = [metrics[key]] print("Batch %d of %d in epoch %d: %s" % (i, len(batches), epoch, str(metrics))) logger.info("Epoch %d: %s" % (epoch, str(metrics))) # logger.info("Validation metrics %s" % str(valmetrics)) if args.runsamples: sample_output = sample_func() logger.info("\n" + sample_output) model.save( os.path.join(checkpointdir, checkpointnames).format(epoch=epoch)) plot_history(allmetrics, args.learningrate, logdir, timestamp)
def __init__(self, **kwargs): """ Input: translation_spec - dict with keys 'f_X', 'f_Y'. Values are passed as kwargs to the respective ImageTranslationNetwork's cycle_lambda=2 - float, loss weight cross_lambda=1 - float, loss weight l2_lambda=1e-3 - float, loss weight learning_rate=1e-5 - float, initial learning rate for ExponentialDecay clipnorm=None - gradient norm clip value, passed to tf.clip_by_global_norm if not None logdir=None - path to log directory. If provided, tensorboard logging of training and evaluation is set up at 'logdir/' """ learning_rate = kwargs.get("learning_rate", 1e-5) lr_all = ExponentialDecay(learning_rate, decay_steps=10000, decay_rate=0.96, staircase=True) self._optimizer_all = tf.keras.optimizers.Adam(lr_all) lr_k = ExponentialDecay(learning_rate, decay_steps=10000, decay_rate=0.9, staircase=True) self._optimizer_k = tf.keras.optimizers.Adam(lr_k) self.clipnorm = kwargs.get("clipnorm", None) # To keep a history for a specific training_metrics, # add `self.metrics_history[name] = []` in subclass __init__ self.train_metrics = {} self.difference_img_metrics = { "ACC_di": tf.keras.metrics.Accuracy() } #{"AUC": tf.keras.metrics.AUC()} self.change_map_metrics = { "ACC": tf.keras.metrics.Accuracy(), "cohens kappa": CohenKappa(num_classes=2), # 'F1': tfa.metrics.F1Score(num_classes=2, average=None) } assert not set(self.difference_img_metrics) & set( self.change_map_metrics) # If the metric dictionaries shares keys, the history will not work self.metrics_history = { **{key: [] for key in self.change_map_metrics.keys()}, **{key: [] for key in self.difference_img_metrics.keys()}, } self.timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") self.channels = { "x": kwargs.get("channel_x"), "y": kwargs.get("channel_y") } # Flag used in image_to_tensorboard decorator self._save_images = tf.Variable(False, trainable=False) logdir = kwargs.get("logdir", None) if logdir is not None: self.log_path = logdir self.tb_writer = tf.summary.create_file_writer(self.log_path) self._image_dir = tf.constant(os.path.join(self.log_path, "images")) else: self.tb_writer = tf.summary.create_noop_writer() self.evaluation_frequency = tf.constant(kwargs.get( "evaluation_frequency", 1), dtype=tf.int64) self.epoch = tf.Variable(0, dtype=tf.int64)
def get_Lemaire_model(hp): ''' TCN based model architecture proposed by Lemaire et al. [3] Code source: https://github.com/qlemaire22/speech-music-detection Parameters ---------- hp : object Hyperparameters. Returns ------- model : tensorflow.keras.models.Model CNN model. ''' dilations = [2**nd for nd in range(hp.get('Nd'))] list_n_filters = [hp.get('n_filters')] * hp.get('n_layers') dropout_rate = np.random.uniform(0.05, 0.5) bidirectional = True N_MELS = 80 n_classes = 2 patch_size = 68 if bidirectional: padding = 'same' else: padding = 'causal' dilations = process_dilations(dilations) input_layer = Input(shape=(patch_size, N_MELS)) for i in range(hp.get('n_layers')): if i == 0: x = TCN(list_n_filters[i], hp.get('kernel_size'), hp.get('nb_stacks'), dilations, 'norm_relu', padding, hp.get('skip_some_connections'), dropout_rate, return_sequences=True)(input_layer) else: x = TCN(list_n_filters[i], hp.get('kernel_size'), hp.get('nb_stacks'), dilations, 'norm_relu', padding, hp.get('skip_some_connections'), dropout_rate, return_sequences=True, name="tcn" + str(i))(x) x = Flatten()(x) x = Dense(n_classes)(x) x = Activation('softmax')(x) output_layer = x model = Model(input_layer, output_layer) initial_learning_rate = 0.002 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=3 * hp.get('TR_STEPS'), decay_rate=0.1) optimizer = optimizers.SGD(learning_rate=lr_schedule, clipnorm=1, momentum=0.9) if n_classes == 2: model.compile(loss='binary_crossentropy', metrics='accuracy', optimizer=optimizer) elif n_classes == 3: model.compile(loss='categorical_crossentropy', metrics='accuracy', optimizer=optimizer) # print(model.summary()) # print('Architecture of Lemaire et. al. Proc. of the 20th ISMIR Conference, Delft, Netherlands, November 4-8, 2019\n') return model
# Model initialisation tf.random.set_seed(seed) callback = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True) model = tf.keras.Sequential() model.add(tf.keras.Input(shape=(N - 1, 4))) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dense(2000, activation=tf.nn.relu)) model.add(tf.keras.layers.LayerNormalization()) model.add(tf.keras.layers.Dense(2000, activation=tf.nn.relu)) # model.add(tf.keras.layers.Dense(50, activation=tf.nn.relu)) model.add(tf.keras.layers.Dense(4 * N)) # learning_rate = CustomSchedule(4, 500) learning_rate = ExponentialDecay(1e-3, 3000, 0.96) optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) model.compile(optimizer=optimizer, loss='mse') # %% model.summary() history = model.fit(X_train, y_train, epochs=10000, verbose=1, validation_split=0.1, callbacks=[callback]) # %% # Model prediciton output trans_pred = rev_angle_embedding(model.predict(X_test), N, reshape=True) % (2*np.pi) E_pred = np.zeros(len(X_test)) for i in range(len(E_pred)): E_pred[i] = wrapper(trans_pred[i], data_test[i, 0][:N], data_test[i, 0][N:], dt, data_test[i, 3], N) np.mean(E_pred / data_test[:, 2])
LSTM_1 = LSTM(32, return_sequences=True, name='lstm_1')(Reshape_2) LSTM_2 = LSTM(32, return_sequences=False, name='lstm_2')(LSTM_1) CN = tf.concat([GAP_1, LSTM_2], axis=-1) dropout_1 = Dropout(0.2)(CN) Dense1 = Dense(1, name='Dense_1')(dropout_1) model = Model(inputs=[Input_1, Input_2], outputs=Dense1) # save model checkpoint = ModelCheckpoint(filepath='Model.h5', monitor='val_loss', verbose=1, save_best_only=True) lr = ExponentialDecay(initial_learning_rate=0.003, decay_steps=16, decay_rate=0.9) adam = Adam(learning_rate=lr) model.compile(loss='mse', optimizer=adam) model.fit(x=[train_X_1, train_X_2], y=train_Y, batch_size=8, epochs=600, shuffle=True, validation_data=([test_X_1, test_X_2], test_Y), callbacks=[checkpoint]) pred = model.predict([test_X_1, test_X_2]) rmse = np.mean( pow( np.square( scaler_Y.inverse_transform(test_Y) -
def create_model( model_name, log_dir, args ): # optimizer, learning rate, activation, neurons, batch size, epochs... input_shape = input_size(model_name, args) if args.head == 'max' or (args.base_trainable and args.head != 't_complex'): pool = 'max' else: pool = 'none' if model_name == 'VGG16': conv_base = VGG16(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'VGG19': conv_base = VGG19(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'ResNet50': conv_base = ResNet50(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'InceptionV3': conv_base = InceptionV3(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'Xception': conv_base = Xception(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'InceptionResNetV2': conv_base = InceptionResNetV2(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'NASNetMobile': conv_base = NASNetMobile(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'NASNetLarge': conv_base = NASNetLarge(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'DenseNet201': conv_base = DenseNet201(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) elif model_name == 'MobileNetV2': conv_base = MobileNetV2(weights='imagenet', include_top=False, pooling=pool, input_shape=input_shape) else: conv_base = None print("Model name not known!") exit() conv_base.trainable = args.base_trainable model = models.Sequential() if args.base_trainable: if args.head == 't_complex': model = models.Sequential() model.add(conv_base) model.add( layers.Conv2D(filters=1024, kernel_size=(3, 3), padding='same', strides=1)) model.add(layers.Flatten()) # ?? model.add(layers.Dense(1024, activation='sigmoid')) model.add(layers.Dense(256, activation='sigmoid')) model.add(layers.Dense(args.CLASSES_NO, activation='softmax') ) # (samples, new_rows, new_cols, filters) else: model.add(conv_base) model.add(layers.Dense(args.CLASSES_NO, activation='softmax')) elif args.head == 'dense': # outside only? model.add(conv_base) model.add(layers.Flatten()) model.add(layers.Dropout(0.5)) model.add(layers.Dense(256, activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(128, activation='relu')) model.add(layers.Dense(args.CLASSES_NO, activation='softmax')) elif args.head == 'max': model.add(conv_base) model.add(layers.Dense(512, activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(256, activation='relu')) model.add(layers.Dense(args.CLASSES_NO, activation='softmax')) elif args.head == 'mod': model = models.Sequential() model.add(conv_base) model.add( layers.Conv2D(filters=2048, kernel_size=(3, 3), padding='valid')) model.add(layers.Flatten()) # ?? model.add(layers.Dropout(0.5)) model.add(layers.Dense(1024, activation='sigmoid')) model.add(layers.Dense(256, activation='relu')) model.add(layers.Dense( args.CLASSES_NO, activation='softmax')) # (samples, new_rows, new_cols, filters) if args.lr_decay: lr_schedule = ExponentialDecay(args.INIT_LEARN_RATE, decay_steps=args.DECAY_STEPS, decay_rate=args.DECAY_RATE, staircase=True) model.compile(loss='categorical_crossentropy', optimizer=SGD(lr_schedule), metrics=['acc']) # To different optimisers? else: model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=args.LEARNING_RATE), metrics=['acc']) with open(os.path.join(log_dir, 'modelsummary.txt'), 'w') as f: with redirect_stdout(f): model.summary() print(model.summary()) return model
def train_model(hypa, force_retrain): """MAKEDOC: What is train_model doing?""" logg = logging.getLogger(f"c.{__name__}.train_model") # logg.debug("Starting train_model") # get the words words = words_types[hypa["words"]] # name the model model_name = build_cnn_name(hypa) logg.debug(f"model_name: {model_name}") # save the trained model here model_folder = Path("trained_models") / "cnn" if not model_folder.exists(): model_folder.mkdir(parents=True, exist_ok=True) model_path = model_folder / f"{model_name}.h5" # logg.debug(f"model_path: {model_path}") placeholder_path = model_folder / f"{model_name}.txt" # check if this model has already been trained if placeholder_path.exists(): if force_retrain: logg.warn("\nRETRAINING MODEL!!\n") else: logg.debug("Already trained") return # save info regarding the model training in this folder info_folder = Path("info") / "cnn" / model_name if not info_folder.exists(): info_folder.mkdir(parents=True, exist_ok=True) # magic to fix the GPUs setup_gpus() # input data processed_path = Path("data_proc") / f"{hypa['dataset']}" data, labels = load_processed(processed_path, words) # from hypa extract model param model_param = {} model_param["num_labels"] = len(words) model_param["input_shape"] = data["training"][0].shape model_param["base_filters"] = hypa["base_filters"] model_param["base_dense_width"] = hypa["base_dense_width"] # translate types to actual values kernel_size_types = { "01": [(2, 2), (2, 2), (2, 2)], "02": [(5, 1), (3, 3), (3, 3)], "03": [(1, 5), (3, 3), (3, 3)], } model_param["kernel_sizes"] = kernel_size_types[hypa["kernel_size_type"]] pool_size_types = { "01": [(2, 2), (2, 2), (2, 2)], "02": [(2, 1), (2, 2), (2, 2)], "03": [(1, 2), (2, 2), (2, 2)], } model_param["pool_sizes"] = pool_size_types[hypa["pool_size_type"]] dropout_types = {"01": [0.03, 0.01], "02": [0.3, 0.1]} model_param["dropouts"] = dropout_types[hypa["dropout_type"]] # a dict to recreate this training recap = {} recap["words"] = words recap["hypa"] = hypa recap["model_param"] = model_param recap["model_name"] = model_name recap["version"] = "002" # logg.debug(f"recap: {recap}") recap_path = info_folder / "recap.json" recap_path.write_text(json.dumps(recap, indent=4)) learning_rate_types = { "01": "fixed01", "02": "fixed02", "03": "fixed03", "e1": "exp_decay_keras_01", "04": "exp_decay_step_01", "05": "exp_decay_smooth_01", "06": "exp_decay_smooth_02", } learning_rate_type = hypa["learning_rate_type"] lr_value = learning_rate_types[learning_rate_type] # setup opt fixed lr values if lr_value.startswith("fixed"): if lr_value == "fixed01": lr = 1e-2 elif lr_value == "fixed02": lr = 1e-3 elif lr_value == "fixed03": lr = 1e-4 else: lr = 1e-3 if lr_value == "exp_decay_keras_01": lr = ExponentialDecay(0.1, decay_steps=100000, decay_rate=0.96, staircase=True) optimizer_types = { "a1": Adam(learning_rate=lr), "r1": RMSprop(learning_rate=lr), } opt = optimizer_types[hypa["optimizer_type"]] # create the model model = CNNmodel(**model_param) # model.summary() metrics = [ tf.keras.metrics.CategoricalAccuracy(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), ] model.compile( optimizer=opt, loss=tf.keras.losses.CategoricalCrossentropy(), metrics=metrics, ) # setup callbacks callbacks = [] # setup exp decay step / smooth if lr_value.startswith("exp_decay"): if lr_value == "exp_decay_step_01": exp_decay_part = partial(exp_decay_step, epochs_drop=5) elif lr_value == "exp_decay_smooth_01": exp_decay_part = partial(exp_decay_smooth, epochs_drop=5) elif lr_value == "exp_decay_smooth_02": exp_decay_part = partial( exp_decay_smooth, epochs_drop=5, initial_lrate=1e-2 ) lrate = LearningRateScheduler(exp_decay_part) callbacks.append(lrate) # # setup early stopping # early_stop = EarlyStopping( # # monitor="val_categorical_accuracy", # monitor="val_loss", # patience=4, # verbose=1, # restore_best_weights=True, # ) # callbacks.append(early_stop) # get training parameters BATCH_SIZE = hypa["batch_size"] SHUFFLE_BUFFER_SIZE = BATCH_SIZE EPOCH_NUM = hypa["epoch_num"] # load the datasets datasets = {} for which in ["training", "validation", "testing"]: # logg.debug(f"data[{which}].shape: {data[which].shape}") datasets[which] = Dataset.from_tensor_slices((data[which], labels[which])) # logg.debug(f"datasets[{which}]: {datasets[which]}") datasets[which] = datasets[which].shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE) # logg.debug(f"datasets[{which}]: {datasets[which]}") # train the model results = model.fit( data["training"], labels["training"], # validation_data=datasets["validation"], validation_data=(data["validation"], labels["validation"]), batch_size=BATCH_SIZE, epochs=EPOCH_NUM, verbose=1, callbacks=callbacks, ) # save the trained model model.save(model_path) results_recap = {} results_recap["model_name"] = model_name # version of the results saved results_recap["results_recap_version"] = "002" # quickly evaluate the results # logg.debug(f"\nmodel.metrics_names: {model.metrics_names}") # for which in ["training", "validation", "testing"]: # model_eval = model.evaluate(datasets[which]) # logg.debug(f"{which}: model_eval: {model_eval}") # save the evaluation results logg.debug("Evaluate on test data:") # eval_testing = model.evaluate(datasets["testing"]) # results_recap[model.metrics_names[0]] = eval_testing[0] # results_recap[model.metrics_names[1]] = eval_testing[1] eval_testing = model.evaluate(data["testing"], labels["testing"]) for metrics_name, value in zip(model.metrics_names, eval_testing): logg.debug(f"{metrics_name}: {value}") results_recap[metrics_name] = value # compute the confusion matrix # y_pred = model.predict(datasets["testing"]) y_pred = model.predict(data["testing"]) cm = pred_hot_2_cm(labels["testing"], y_pred, words) # logg.debug(f"cm: {cm}") results_recap["cm"] = cm.tolist() # compute the fscore fscore = analyze_confusion(cm, words) logg.debug(f"fscore: {fscore}") # plot the cm fig, ax = plt.subplots(figsize=(12, 12)) plot_confusion_matrix(cm, ax, model_name, words, fscore) plot_cm_path = info_folder / "test_confusion_matrix.png" fig.savefig(plot_cm_path) plt.close(fig) # save the histories results_recap["history"] = { "loss": results.history["loss"], "val_loss": results.history["val_loss"], "categorical_accuracy": results.history["categorical_accuracy"], "val_categorical_accuracy": results.history["val_categorical_accuracy"], } # save the results res_recap_path = info_folder / "results_recap.json" res_recap_path.write_text(json.dumps(results_recap, indent=4)) y_pred_dataset = model.predict(datasets["testing"]) cm_dataset = pred_hot_2_cm(labels["testing"], y_pred_dataset, words) fscore_dataset = analyze_confusion(cm_dataset, words) logg.debug(f"fscore_dataset: {fscore_dataset} fscore {fscore}") # for i, (ys, yd) in enumerate(zip(y_pred, y_pred_dataset)): # pred_split = np.argmax(ys) # pred_dataset = np.argmax(yd) # logg.debug(f"i: {i} pred_split: {pred_split} pred_dataset: {pred_dataset}") # plt.show() placeholder_path.write_text(f"Trained. F-score: {fscore}") return "done_training"
def fit(self, X_train, y_train, X_valid=None, y_valid=None, epochs=200, lr=0.0001, batch_size=16): # Check that X and y have correct shape # X, y = check_X_y(X, y) # Store the classes seen during fit # self.classes_ = unique_labels(y_train) self.X_ = X_train self.y_ = y_train self.model = Sequential() # Recurrent layer self.model.add( Bidirectional( LSTM(128, return_sequences=False, dropout=0.1, recurrent_dropout=0.1))) # Fully connected layer self.model.add(Dense(128, activation='relu')) # Dropout for regularization self.model.add(Dropout(0.5)) # Output layer self.model.add(Dense(3, activation='softmax')) # scheduler lr_schedule = ExponentialDecay(initial_learning_rate=lr, decay_steps=600, decay_rate=0.9, staircase=True) # optimizer opt = Adam(learning_rate=lr_schedule) # Compile the model self.model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy', AUC(name='auc')]) # encode class values as integers label_encoder = LabelEncoder() label_encoder.fit(y_train) self.classes_ = label_encoder.classes_ encoded_y_train = label_encoder.transform(y_train) # convert integers to dummy variables (i.e. one hot encoded) dummy_y_train = np_utils.to_categorical(encoded_y_train) X_train_ = self.get_embeddings(X_train) if X_valid is not None: encoded_y_valid = label_encoder.transform(y_valid) dummy_y_valid = np_utils.to_categorical(encoded_y_valid) X_valid_ = self.get_embeddings(X_valid) self.history_ = self.model.fit(X_train_, dummy_y_train, batch_size=batch_size, epochs=epochs, callbacks=self.callbacks, validation_data=(X_valid_, dummy_y_valid)) else: self.history_ = self.model.fit(X_train_, dummy_y_train, batch_size=batch_size, epochs=epochs, callbacks=self.callbacks) return self
def choose_scheduler(model_config): """ Define the optimizer used for training the RelevanceModel Users have the option to define an ExponentialDecay learning rate schedule Parameters ---------- model_config : dict model configuration doctionary Returns ------- tensorflow learning rate scheduler Notes ----- References: https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules/ExponentialDecay https://arxiv.org/pdf/1506.01186.pdf """ if 'lr_schedule' not in model_config: # use constant lr schedule learning_rate_schedule = ExponentialDecay( initial_learning_rate=OptimizerDefaultValues.CONSTANT_LR, decay_steps=10000000, decay_rate=1.0, ) else: lr_schedule = model_config['lr_schedule'] lr_schedule_key = lr_schedule['key'] if lr_schedule_key == LearningRateScheduleKey.EXPONENTIAL: learning_rate_schedule = ExponentialDecay( initial_learning_rate=lr_schedule.get( 'learning_rate', OptimizerDefaultValues.CONSTANT_LR), decay_steps=lr_schedule.get( 'learning_rate_decay_steps', OptimizerDefaultValues.EXP_DECAY_STEPS), decay_rate=lr_schedule.get( 'learning_rate_decay', OptimizerDefaultValues.EXP_DECAY_RATE), staircase=True, ) elif lr_schedule_key == LearningRateScheduleKey.CONSTANT: learning_rate_schedule = ExponentialDecay( initial_learning_rate=lr_schedule.get( 'learning_rate', OptimizerDefaultValues.CONSTANT_LR), decay_steps=10000000, decay_rate=1.0, ) elif lr_schedule_key == LearningRateScheduleKey.REDUCE_LR_ON_PLATEAU: learning_rate_schedule = lr_schedule.get( 'learning_rate', OptimizerDefaultValues.CONSTANT_LR) elif lr_schedule_key == LearningRateScheduleKey.CYCLIC: lr_schedule_type = lr_schedule['type'] if lr_schedule_type == CyclicLearningRateType.TRIANGULAR: learning_rate_schedule = cyclic_learning_rate.TriangularCyclicalLearningRate( initial_learning_rate=lr_schedule.get( 'initial_learning_rate', OptimizerDefaultValues.CYCLIC_INITIAL_LEARNING_RATE), maximal_learning_rate=lr_schedule.get( 'maximal_learning_rate', OptimizerDefaultValues.CYCLIC_MAXIMAL_LEARNING_RATE), step_size=lr_schedule.get( 'step_size', OptimizerDefaultValues.CYCLIC_STEP_SIZE), ) elif lr_schedule_type == CyclicLearningRateType.TRIANGULAR2: learning_rate_schedule = cyclic_learning_rate.Triangular2CyclicalLearningRate( initial_learning_rate=lr_schedule.get( 'initial_learning_rate', OptimizerDefaultValues.CYCLIC_INITIAL_LEARNING_RATE), maximal_learning_rate=lr_schedule.get( 'maximal_learning_rate', OptimizerDefaultValues.CYCLIC_MAXIMAL_LEARNING_RATE), step_size=lr_schedule.get( 'step_size', OptimizerDefaultValues.CYCLIC_STEP_SIZE), ) elif lr_schedule_type == CyclicLearningRateType.EXPONENTIAL: learning_rate_schedule = cyclic_learning_rate.ExponentialCyclicalLearningRate( initial_learning_rate=lr_schedule.get( 'initial_learning_rate', OptimizerDefaultValues.CYCLIC_INITIAL_LEARNING_RATE), maximal_learning_rate=lr_schedule.get( 'maximal_learning_rate', OptimizerDefaultValues.CYCLIC_MAXIMAL_LEARNING_RATE), step_size=lr_schedule.get( 'step_size', OptimizerDefaultValues.CYCLIC_STEP_SIZE), gamma=lr_schedule.get('gamma', OptimizerDefaultValues.CYCLIC_GAMMA), ) else: raise ValueError( "Unsupported cyclic learning rate schedule type key: " + lr_schedule_type) else: raise ValueError("Unsupported learning rate schedule key: " + lr_schedule_key) return learning_rate_schedule
def get_Papakostas_model(PARAMS, n_classes=2): ''' CNN architecture proposed by Papakostas et al. [2] Parameters ---------- PARAMS : dict Contains various parameters. n_classes : int, optional Number of classes. Default is 2. Returns ------- model : tensorflow.keras.models.Model Cascaded MTL CNN model. learning_rate : float Initial learning rate. ''' input_img = Input(PARAMS['input_shape'][PARAMS['Model']]) x = Conv2D(96, input_shape=PARAMS['input_shape'][PARAMS['Model']], kernel_size=(5, 5), strides=(2, 2), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(input_img) x = Lambda(lambda norm_lyr: LRN( norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Conv2D(384, kernel_size=(3, 3), strides=(2, 2), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = Lambda(lambda norm_lyr: LRN( norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Conv2D(512, kernel_size=(3, 3), strides=(1, 1), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1), padding='same')(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Flatten()(x) x = Dense(4096, kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.5)(x) x = Dense(4096, kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.5)(x) output = Dense(n_classes, activation='softmax', kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) model = Model(input_img, output) initial_learning_rate = 0.001 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=700, decay_rate=0.1) optimizer = optimizers.SGD(learning_rate=lr_schedule) if n_classes == 2: model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) elif n_classes == 3: model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary()) print( 'Architecture proposed by Papakostas et al. Expert Systems with Applications 2018\n' ) return model, initial_learning_rate
def __init__(self, config): self.ds_train, self.config = get_dataset_and_info(config) # ["/gpu:{}".format(i) for i in range(self.config['num_gpu'])] self.strategy = tf.distribute.MirroredStrategy() \ if len(self.config['gpu']) > 1 \ else tf.distribute.OneDeviceStrategy(device="/gpu:0") self.steps_per_epoch = self.config['num_records'] // self.config[ 'global_batch_size'] print("total steps: ", self.steps_per_epoch * self.config['epoch']) self.ds_train = self.strategy.experimental_distribute_dataset( self.ds_train) with self.strategy.scope(): if self.config['model'] == 'vanilla': self.generator = get_generator(self.config) self.discriminator = get_discriminator(self.config) #TODO: fix resnet model #elif config['model'] == 'resnet': # self.generator = get_res_generator(config) # self.discriminator = get_res_discriminator(config) else: raise ValueError('Unsupported model type') lr_fn_G = ExponentialDecay(self.config['lr_g'], self.steps_per_epoch, decay_rate=self.config['decay_rate'], staircase=True) lr_fn_D = ExponentialDecay(self.config['lr_d'], self.steps_per_epoch * self.config['update_ratio'], decay_rate=self.config['decay_rate'], staircase=True) self.optimizer_G = optimizers.Adam(learning_rate=lr_fn_G, beta_1=0.) self.optimizer_D = optimizers.Adam(learning_rate=lr_fn_D, beta_1=0.) if self.config['loss'] == "cross_entropy": print("use ce loss") self.gloss_fn = cross_entropy_g self.dloss_fn = cross_entropy_d elif self.config['loss'] == "hinge_loss": print("use hinge loss") self.gloss_fn = hinge_loss_g self.dloss_fn = hinge_loss_d else: raise ValueError('Unsupported loss type') # build model & get trainable variables. self.generator.build( input_shape=[(self.config['batch_size'], self.config['z_dim']), (self.config['batch_size'])]) self.discriminator.build( input_shape=[(self.config['batch_size'], config['img_size'], config['img_size'], 3), (self.config['batch_size'])]) self.generator.summary() self.discriminator.summary() self.var_G = [var.name for var in self.generator.variables] self.Train_var_G = [ var.name for var in self.generator.trainable_variables ] self.Train_var_D = [ var.name for var in self.discriminator.trainable_variables ] print("-" * 20, "generator weights", "-" * 20) pprint(self.Train_var_G) print("-" * 20, "discrimiator weights", "-" * 20) pprint(self.Train_var_D) # checkpoints self.ckpt_G = tf.train.Checkpoint(step=tf.Variable(1), optimizer=self.optimizer_G, net=self.generator) self.ckpt_D = tf.train.Checkpoint(step=tf.Variable(1), optimizer=self.optimizer_D, net=self.discriminator) self.CkptManager_G = tf.train.CheckpointManager( self.ckpt_G, '{}/G'.format(self.config['ckpt_dir']), max_to_keep=10, checkpoint_name='epoch') self.CkptManager_D = tf.train.CheckpointManager( self.ckpt_D, '{}/D'.format(self.config['ckpt_dir']), max_to_keep=10, checkpoint_name='epoch') # metrics self.metrics = {} self.metrics['G_loss'] = tf.keras.metrics.Mean('generator_loss', dtype=tf.float32) self.metrics['D_loss'] = tf.keras.metrics.Mean('discriminator_loss', dtype=tf.float32) self.metrics.update({ name: tf.keras.metrics.Mean(name, dtype=tf.float32) for name in self.var_G }) self.metrics.update({ name + '/norm': tf.keras.metrics.Mean(name + '/norm', dtype=tf.float32) for name in self.Train_var_G }) #for name in self.Train_var_G: # self.metrics[name] = #var_name = [var.name for var in self.generator.variables] #for name in var_name: # self.metrics[name] = tf.keras.metrics.Mean( # name, dtype=tf.float32) self.fixed_vector = tf.random.normal( [config['batch_size'], config['z_dim']]) self.fixed_label = tf.random.uniform((self.config['batch_size'], ), 0, self.config['num_classes'], dtype=tf.int32)
def __init__( self, seed: int, DQN_type: str, gamma: float, epsilon: float, min_eps_pct: float, min_eps: float, max_exp_pct: float, update_target: str, copy_step: int, tau: float, input_shape: int, hidden_units: list, hidden_memory_units: list, batch_size: int, selected_loss: str, lr: float, start_train: int, optimizer_name: str, batch_norm_input: str, batch_norm_hidden: str, activation: str, kernel_initializer: str, action_space, use_PER: bool = False, PER_e: Optional[float] = None, PER_a: Optional[float] = None, PER_b: Optional[float] = None, final_PER_b: Optional[float] = None, PER_b_growth: Optional[float] = None, final_PER_a: Optional[float] = None, PER_a_growth: Optional[float] = None, sample_type: str = "TDerror", beta_1: float = 0.9, beta_2: float = 0.999, eps_opt: float = 1e-07, lr_schedule: Optional[str] = None, exp_decay_pct: Optional[float] = None, exp_decay_rate: Optional[float] = None, rng=None, N_train: int = 100000, modelname: str = "Deep Network", ): """ Instantiate DQN Class Parameters ---------- seed: int Seed for experiment reproducibility DQN_type: str DQN variant choice. It can be 'DQN' or 'DDQN' recurrent_env: bool Boolean to regulate if the environment is recurrent or not gamma: float Discount parameter for the target update max_exp_pct: int Max size of the experience replay buffer as a pct of the total iterations update_target: str Choice for target update. It can be 'hard' or 'soft' tau: float When the update is 'soft', tau regulates the amount of the update towards the current parameters input_shape: int Shape of input of the neural network hidden_units: list List of sizes of hidden layers. The length of the list determines the depth of the Q network hidden_memory_units: list, List of sizes of recurrent hidden layers. The length of the list determines the depth of the Q network batch_size: int Size of the batch to perform an update selected_loss: str Choice for the loss function. It can be 'mse' or 'huber' lr: float Initial learning rate start_train: int Number of iteration after which the training starts optimizer_name: str Choice for the optimizer. It can be 'sgd', 'sgdmom', 'sgdnest', 'adagrad', 'adadelta', 'adamax', 'adam', 'amsgrad', 'nadam', or 'rmsprop' batch_norm_input: bool Boolean to regulate the presence of a Batch Norm layer after the input batch_norm_hidden: bool Boolean to regulate the presence of a Batch Norm layer after each hidden layer activation: str Choice of activation function. It can be 'leaky_relu', 'relu6' or 'elu' kernel_initializer: str Choice of weight initialization as aliased in TF2.0 documentation plot_hist: bool Boolean to regulate if plot the histogram of intermediate outputs in tensorboard plot_steps_hist: int Number of steps at which the histogram of intermediate outputs are plotted in tensorboard plot_steps: int Number of steps at which all the other variables are stored in tensorboard summary_writer, #TODO need to add proper type hint Tensorabord writer action_space: class Space of possible action as class that inherits from gym use_PER: bool = False Boolean to regulate if use Prioritized Experience Replay (PER) or not PER_e: Optional[float] Correction for priorities PER_a: Optional[float] Amount of prioritization PER_b: Optional[float] Amount of correction for introduced bias when using PER final_PER_b: Optional[float] = None Final value for b after the anneal PER_b_growth: Optional[float] Rate of increase of the b final_PER_a: Optional[float] = None Final value for a after the anneal PER_a_growth: Optional[float] Rate of increase of the a sample_type : str Type of sampling in PER. It can be 'TDerror', 'diffTDerror' or 'reward' clipgrad: bool Choice of the gradients to clip. It can be 'norm', 'value' or 'globnorm' clipnorm: Optional[Union[str or float]] Boolean for clipping the norm of the gradients clipvalue: Optional[Union[str or float]] Boolean for clipping the value of the gradients clipglob_steps: Optional[int] Boolean for clipping the global norm of the gradients beta_1: float = 0.9 Parameter for adaptive optimizer beta_2: float = 0.999 Parameter for adaptive optimizer eps_opt: float = 1e-07 Corrective parameter for adaptive optimizer std_rwds: bool = False Boolean to regulate if standardize rewards or not lr_schedule: Optional[str] Choice for the learning rate schedule. It can be 'exponential', 'piecewise', 'inverse_time' or 'polynomial' exp_decay_pct: Optional[float] Amount of steps to reach the desired level of decayed learning rate as pct of the total iteration exp_decay_rate: Optional[float] Rate of decay to reach the desired level of decayed learning rate rng = None Random number generator for reproducibility modelname: str Name for the model """ if rng is not None: self.rng = rng self.batch_size = batch_size exp_decay_steps = int(N_train * exp_decay_pct) if lr_schedule == "exponential": lr = ExponentialDecay( initial_learning_rate=lr, decay_steps=exp_decay_steps, decay_rate=exp_decay_rate, ) if optimizer_name == "adam": self.optimizer = tf.keras.optimizers.Adam( learning_rate=lr, beta_1=beta_1, beta_2=beta_2, epsilon=eps_opt, amsgrad=False, ) elif optimizer_name == "rmsprop": self.optimizer = tf.keras.optimizers.RMSprop( learning_rate=lr, rho=beta_1, momentum=0.0, epsilon=eps_opt, centered=False, ) self.beta_1 = beta_1 self.eps_opt = eps_opt self.gamma = gamma self.max_experiences = int(N_train * max_exp_pct) self.use_PER = use_PER if self.use_PER: if PER_b_growth: PER_b_steps = N_train PER_b_growth = (final_PER_b - PER_b) / PER_b_steps else: PER_b_growth = 0.0 PER_b_steps = None if PER_a_growth: PER_a_steps = PER_a_steps = N_train PER_a_growth = (final_PER_a - PER_a) / PER_a_steps else: PER_a_growth = 0.0 PER_a_steps = None self.PERmemory = PER_buffer( PER_e, PER_a, PER_b, final_PER_b, PER_b_steps, PER_b_growth, final_PER_a, PER_a_steps, PER_a_growth, self.max_experiences, rng, sample_type, ) # experience is stored as object of this class else: self.experience = { "s": [], "a": [], "r": [], "s2": [], "a_unsc": [] } self.start_train = start_train self.action_space = action_space self.num_actions = len(self.action_space.values) self.batch_norm_input = batch_norm_input self.batch_norm_hidden = batch_norm_hidden self.model = DeepNetworkModel( seed, input_shape, hidden_units, self.num_actions, batch_norm_input, batch_norm_hidden, activation, kernel_initializer, modelname, ) self.target_model = DeepNetworkModel( seed, input_shape, hidden_units, self.num_actions, batch_norm_input, batch_norm_hidden, activation, kernel_initializer, "Target " + modelname, ) self.selected_loss = selected_loss self.DQN_type = DQN_type self.update_target = update_target self.copy_step = copy_step self.tau = tau self.optimizer_name = optimizer_name if self.selected_loss == "mse": self.loss = tf.keras.losses.MeanSquaredError() elif self.selected_loss == "huber": self.loss = tf.keras.losses.Huber() self.epsilon = epsilon self.min_eps = min_eps self.min_eps_pct = min_eps_pct
return TensorBoard(log_dir=_log_dir + exp_name, profile_batch=0, histogram_freq=1) def build_baseline_vgg(): pass return _n_train, _n_valid, _n_test = report_data_size() _shuffle = True _log_dir = './logs/baseline_model/' _seed = 27 _learning_rate = 0.0001 _schedule = ExponentialDecay(_learning_rate, decay_steps=10_0000, decay_rate=0.96) _opt = Adam(learning_rate=_schedule) _es = EarlyStopping(monitor='val_accuracy', patience=20) _tb = tb_callback('Baseline_model_1') _callbacks = [_es] _metrics = ['accuracy'] _loss = 'categorical_crossentropy' _steps_per_epoch = _n_train // _batch_size baseline_model1 = build_baseline_vgg() baseline_model1_hist = baseline_model1.fit(train_data, epochs=_epochs, validation_data=valid_data, steps_per_epoch=_steps_per_epoch, callbacks=_callbacks,
def get_Papakostas_MTL_model(PARAMS, n_classes=3): ''' MTL modification of the CNN architecture proposed by Papakostas et al. [2] [2] Papakostas, M., & Giannakopoulos, T. (2018). Speech-music discrimination using deep visual feature extractors. Expert Systems with Applications, 114, 334-344. Parameters ---------- PARAMS : dict Contains various parameters. n_classes : int, optional Number of classes. Default is 3. Returns ------- model : tensorflow.keras.models.Model MTL CNN model. learning_rate : float Initial learning rate. ''' input_img = Input(PARAMS['input_shape'][PARAMS['Model']]) x = Conv2D(96, input_shape=PARAMS['input_shape'][PARAMS['Model']], kernel_size=(5, 5), strides=(2, 2), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(input_img) x = Lambda(lambda norm_lyr: LRN( norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x) # x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Conv2D(384, kernel_size=(3, 3), strides=(2, 2), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = Lambda(lambda norm_lyr: LRN( norm_lyr, depth_radius=5, alpha=0.0001, beta=0.75))(x) # x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Conv2D(512, kernel_size=(3, 3), strides=(1, 1), kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1), padding='same')(x) # x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = Flatten()(x) x = Dense(4096, kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.5)(x) x = Dense(4096, kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1))(x) x = BatchNormalization(axis=-1)(x) x = Activation('relu')(x) x = Dropout(0.5)(x) classification_output = Dense(n_classes, activation='softmax', kernel_initializer=RandomNormal(stddev=0.01), bias_initializer=Constant(value=0.1), name='3C')(x) sp_output, x_sp, mu_output, x_mu, smr_output, x_smr = MTL_modifications(x) model = Model(input_img, [sp_output, mu_output, smr_output, classification_output]) initial_learning_rate = 0.001 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=700, decay_rate=0.1) optimizer = optimizers.SGD(learning_rate=lr_schedule) model.compile(loss={ 'S': 'binary_crossentropy', 'M': 'binary_crossentropy', 'R': 'mean_squared_error', '3C': 'categorical_crossentropy' }, optimizer=optimizer, metrics={'3C': 'accuracy'}) print(model.summary()) print( 'MTL modifications of architecture proposed by Papakostas et al. Expert Systems with Applications 2018\n' ) return model, initial_learning_rate
def choose_scheduler(model_config): """ Define the optimizer used for training the RelevanceModel Users have the option to define an ExponentialDecay learning rate schedule Parameters ---------- model_config : dict model configuration doctionary Returns ------- tensorflow learning rate scheduler Notes ----- References: https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules/ExponentialDecay https://arxiv.org/pdf/1506.01186.pdf """ if 'lr_schedule' not in model_config: #use constant lr schedule learning_rate_schedule = ExponentialDecay( initial_learning_rate=0.01, decay_steps=10000000, decay_rate=1.0, ) else: lr_schedule = model_config['lr_schedule'] lr_schedule_key = lr_schedule['key'] if lr_schedule_key == LearningRateScheduleKey.EXPONENTIAL: learning_rate_schedule = ExponentialDecay( initial_learning_rate=lr_schedule['learning_rate'] if 'learning_rate' in lr_schedule else 0.01, decay_steps=lr_schedule['learning_rate_decay_steps'] if 'learning_rate_decay_steps' in lr_schedule else 100000, decay_rate=lr_schedule['learning_rate_decay'] if 'learning_rate_decay' in lr_schedule else 0.96, staircase=True, ) elif lr_schedule_key == LearningRateScheduleKey.CONSTANT: learning_rate_schedule = ExponentialDecay( initial_learning_rate=lr_schedule['learning_rate'] if 'learning_rate' in lr_schedule else 0.01, decay_steps=10000000, decay_rate=1.0, ) elif lr_schedule_key == LearningRateScheduleKey.CYCLIC: lr_schedule_type = lr_schedule['type'] if lr_schedule_type == CyclicLearningRateType.TRIANGULAR: learning_rate_schedule = cyclic_learning_rate.TriangularCyclicalLearningRate( initial_learning_rate=lr_schedule['initial_learning_rate'] if 'initial_learning_rate' in lr_schedule else 0.001, maximal_learning_rate=lr_schedule['maximal_learning_rate'] if 'maximal_learning_rate' in lr_schedule else 0.01, step_size=lr_schedule['step_size'] if 'step_size' in lr_schedule else 10, ) elif lr_schedule_type == CyclicLearningRateType.TRIANGULAR2: learning_rate_schedule = cyclic_learning_rate.Triangular2CyclicalLearningRate( initial_learning_rate=lr_schedule['initial_learning_rate'] if 'initial_learning_rate' in lr_schedule else 0.001, maximal_learning_rate=lr_schedule['maximal_learning_rate'] if 'maximal_learning_rate' in lr_schedule else 0.01, step_size=lr_schedule['step_size'] if 'step_size' in lr_schedule else 10, ) elif lr_schedule_type == CyclicLearningRateType.EXPONENTIAL: learning_rate_schedule = cyclic_learning_rate.ExponentialCyclicalLearningRate( initial_learning_rate=lr_schedule['initial_learning_rate'] if 'initial_learning_rate' in lr_schedule else 0.001, maximal_learning_rate=lr_schedule['maximal_learning_rate'] if 'maximal_learning_rate' in lr_schedule else 0.01, step_size=lr_schedule['step_size'] if 'step_size' in lr_schedule else 10, gamma=lr_schedule['gamma'] if 'gamma' in lr_schedule else 1.0, ) else: raise ValueError( "Unsupported cyclic learning rate schedule type key: " + lr_schedule_type) else: raise ValueError("Unsupported learning rate schedule key: " + lr_schedule_key) return learning_rate_schedule
def main(): parser = argparse.ArgumentParser() parser.add_argument('output', help='Model output name') args = parser.parse_args() x_gyro = [] x_acc = [] y = [] imu_data_filenames = [] gt_data = [] for i in range(9): data_imu_path = f'/home/huydung/devel/intern/data/1ere/{i}/data_deep/imu/' for j in range( len([ name for name in os.listdir(data_imu_path) if os.path.isfile(os.path.join(data_imu_path, name)) ])): imu_data_filenames.append(data_imu_path + f'{j}.csv') gt_data.append(np.array([0., 1.])) for i in range(9): data_imu_path = f'/home/huydung/devel/intern/data/2eme/{i}/data_deep/imu/' for j in range( len([ name for name in os.listdir(data_imu_path) if os.path.isfile(os.path.join(data_imu_path, name)) ])): imu_data_filenames.append(data_imu_path + f'{j}.csv') gt_data.append(np.array([1., 0.])) for i, (cur_imu_data_filename, cur_gt_data) in enumerate(zip(imu_data_filenames, gt_data)): cur_x_gyro, cur_x_acc, cur_gt = load_cea_dataset( cur_imu_data_filename, cur_gt_data) x_gyro.append(cur_x_gyro) x_acc.append(cur_x_acc) y.append(cur_gt) x_gyro = np.reshape(x_gyro, (len(x_gyro), x_gyro[0].shape[0], x_gyro[0].shape[1])) x_acc = np.reshape(x_acc, (len(x_acc), x_acc[0].shape[0], x_acc[0].shape[1])) y = np.vstack(y) x_gyro, x_acc, y = shuffle(x_gyro, x_acc, y) initial_learning_rate = 3e-4 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=100000, decay_rate=0.97, staircase=True) pred_model = create_pred_model_6d_quat() # train_model = create_train_model_6d_quat(pred_model) pred_model.compile(optimizer=Adam(initial_learning_rate), loss='categorical_crossentropy') filepath = "model_checkpoint.hdf5" model_checkpoint = ModelCheckpoint('model_checkpoint.hdf5', monitor='val_loss', save_best_only=True, verbose=1) tensorboard = TensorBoard(log_dir="logs/{}".format(time()), profile_batch=0) try: history = pred_model.fit([x_gyro, x_acc], y, epochs=20, batch_size=1, verbose=1, callbacks=[model_checkpoint, tensorboard], validation_split=0.1) pred_model.load_weights(filepath) pred_model.save('last_best_model_with_custom_layer.hdf5') # pred_model = create_pred_model_6d_quat(window_size) pred_model.set_weights(pred_model.get_weights()) pred_model.save('%s.hdf5' % args.output) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Validation'], loc='upper left') plt.show() except KeyboardInterrupt: pred_model.load_weights(filepath) pred_model.save('last_best_model_with_custom_layer.hdf5') # pred_model = create_pred_model_6d_quat(window_size) pred_model.set_weights(pred_model.get_weights()) pred_model.save('%s.hdf5' % args.output) print('Early terminate') print('Training complete')
def get_model(pretrained_weights=None): input_layer = Input(shape=(50, 50, 1)) # Down-sampling conv1 = Conv2D(64, kernel_size=3, padding="VALID")(input_layer) # 48 x 48 conv1 = BatchNormalization()(conv1) conv1 = Activation("relu")(conv1) # conv1 = Dropout(0.1)(conv1) conv1 = Conv2D(64, kernel_size=3, padding="SAME")(conv1) # 48 x 48 conv1 = BatchNormalization()(conv1) conv1 = Activation("relu")(conv1) # conv1 = Dropout(0.1)(conv1) conv1 = Conv2D(64, kernel_size=3, padding="SAME")(conv1) # 48 x 48 conv1 = BatchNormalization()(conv1) conv1 = Activation("relu")(conv1) # conv1 = Dropout(0.1)(conv1) pool1 = MaxPooling2D(pool_size=2)(conv1) # 24 x 24 conv2 = Conv2D(128, kernel_size=3, padding="SAME")(pool1) # 24 x 24 conv2 = BatchNormalization()(conv2) conv2 = Activation("relu")(conv2) # conv2 = Dropout(0.1)(conv2) conv2 = Conv2D(128, kernel_size=3, padding="SAME")(conv2) # 24 x 24 conv2 = BatchNormalization()(conv2) conv2 = Activation("relu")(conv2) # conv2 = Dropout(0.1)(conv2) conv2 = Conv2D(128, kernel_size=3, padding="SAME")(conv2) # 24 x 24 conv2 = BatchNormalization()(conv2) conv2 = Activation("relu")(conv2) # conv2 = Dropout(0.1)(conv2) pool2 = MaxPooling2D(pool_size=2)(conv2) # 12 x 12 conv3 = Conv2D(256, kernel_size=3, padding="SAME")(pool2) # 12 x 12 conv3 = BatchNormalization()(conv3) conv3 = Activation("relu")(conv3) # conv3 = Dropout(0.1)(conv3) conv3 = Conv2D(256, kernel_size=3, padding="SAME")(conv3) # 12 x 12 conv3 = BatchNormalization()(conv3) conv3 = Activation("relu")(conv3) # conv3 = Dropout(0.1)(conv3) conv3 = Conv2D(256, kernel_size=3, padding="SAME")(conv3) # 12 x 12 conv3 = BatchNormalization()(conv3) conv3 = Activation("relu")(conv3) # conv3 = Dropout(0.1)(conv3) pool3 = MaxPooling2D(pool_size=2)(conv3) # 6 x 6 conv4 = Conv2D(512, kernel_size=3, padding="SAME")(pool3) # 6 x 6 conv4 = BatchNormalization()(conv4) conv4 = Activation("relu")(conv4) # conv4 = Dropout(0.1)(conv4) conv4 = Conv2D(512, kernel_size=3, padding="SAME")(conv4) # 6 x 6 conv4 = BatchNormalization()(conv4) conv4 = Activation("relu")(conv4) # conv4 = Dropout(0.1)(conv4) conv4 = Conv2D(512, kernel_size=3, padding="SAME")(conv4) # 6 x 6 conv4 = BatchNormalization()(conv4) conv4 = Activation("relu")(conv4) # conv4 = Dropout(0.1)(conv4) ### # Up-sampling up5 = (UpSampling2D(size=(2, 2))(conv4)) # 12 x 12 merge5 = Concatenate()([conv3, up5]) conv5 = Conv2D(256, kernel_size=2, padding="SAME")(merge5) conv5 = BatchNormalization()(conv5) conv5 = Activation("relu")(conv5) # conv5 = Dropout(0.1)(conv5) conv5 = Conv2D(256, kernel_size=3, padding="SAME")(conv5) conv5 = BatchNormalization()(conv5) conv5 = Activation("relu")(conv5) # conv5 = Dropout(0.1)(conv5) conv5 = Conv2D(256, kernel_size=3, padding="SAME")(conv5) conv5 = BatchNormalization()(conv5) conv5 = Activation("relu")(conv5) # conv5 = Dropout(0.1)(conv5) up6 = (UpSampling2D(size=(2, 2))(conv5)) # 24 x 24 merge6 = Concatenate()([conv2, up6]) conv6 = Conv2D(128, kernel_size=2, padding="SAME")(merge6) conv6 = BatchNormalization()(conv6) conv6 = Activation("relu")(conv6) # conv6 = Dropout(0.1)(conv6) conv6 = Conv2D(128, kernel_size=3, padding="SAME")(conv6) conv6 = BatchNormalization()(conv6) conv6 = Activation("relu")(conv6) # conv6 = Dropout(0.1)(conv6) conv6 = Conv2D(128, kernel_size=3, padding="SAME")(conv6) conv6 = BatchNormalization()(conv6) conv6 = Activation("relu")(conv6) # conv6 = Dropout(0.1)(conv6) up7 = (UpSampling2D(size=(2, 2))(conv6)) # 48 x 48 merge7 = Concatenate()([conv1, up7]) conv7 = Conv2D(64, kernel_size=2, padding="SAME")(merge7) conv7 = BatchNormalization()(conv7) conv7 = Activation("relu")(conv7) # conv7 = Dropout(0.1)(conv7) conv7 = Conv2D(64, kernel_size=3, padding="SAME")(conv7) conv7 = BatchNormalization()(conv7) conv7 = Activation("relu")(conv7) # conv7 = Dropout(0.1)(conv7) conv7 = Conv2D(64, kernel_size=3, padding="SAME")(conv7) conv7 = BatchNormalization()(conv7) conv7 = Activation("relu")(conv7) # conv7 = Dropout(0.1)(conv7) conv8 = Conv2DTranspose(1, kernel_size=3, padding="VALID")(conv7) # 50 x 50 merge9 = Concatenate()([conv8, input_layer]) conv10 = Conv2D(1, kernel_size=1)(merge9) conv10 = BatchNormalization()(conv10) conv10 = Activation("relu")(conv10) model = Model(inputs=input_layer, outputs=conv10) initial_learning_rate = 0.1 lr_schedule = ExponentialDecay(initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True) model.compile(optimizer=Adam(learning_rate=lr_schedule), loss=MeanSquaredError(), metrics=["accuracy"]) return model
def __init__(self, timesteps, includeAux, folderI, trainLoss, includeModis, includeVGG, disLoss, cloud_cov=0.4, istransfer=False, img_h=256, img_width=256, startT='01-01-2018', endT='01-05-2019'): self.img_h = img_h self.img_w = img_width self.timesteps = timesteps self.includeModis = includeModis hvd.init() gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) self.gen_schedule = ExponentialDecay(1e-4 * hvd.size(), decay_steps=10000, decay_rate=0.1, staircase=True) self.disc_schedule = ExponentialDecay(1e-4 * hvd.size() * 5, decay_steps=10000, decay_rate=0.1, staircase=True) self.istransfer = istransfer # self.disOp = hvd.DistributedOptimizer(tf.keras.optimizers.Adam(1e-4 * hvd.size(), 0.5)) # self.lstmOp = hvd.DistributedOptimizer(Adam(lr=1e-4 * hvd.size(), beta_1=0.9, beta_2=0.999, epsilon=1e-08)) self.disOp = hvd.DistributedOptimizer( Adam(learning_rate=self.disc_schedule)) self.lstmOp = hvd.DistributedOptimizer( Adam(learning_rate=self.gen_schedule)) self.model_helpers = models.LSTM_GAN_MODEL(disOp=self.disOp, lstmOp=self.lstmOp, h=self.img_h, w=self.img_w, timeStep=timesteps, includeAux=includeAux, trainLoss=trainLoss, disLoss=disLoss) # print("GOT MODIS======", includeModis) if includeVGG and includeModis == 0: if istransfer: self.dataloader = dataloaders.DatasetHandling( self.img_w, self.img_h, no_of_timesteps=timesteps, startT=startT, endT=endT, cloud_cov=cloud_cov, album='foco-co-20km') self.lstm_gan, self.vgg, self.disciminator, self.lstm_generator = self.model_helpers.lstm_gan_with_vgg_transfer( self.transferLear()) else: self.dataloader = dataloaders.DatasetHandling( self.img_w, self.img_h, no_of_timesteps=timesteps, startT=startT, endT=endT, cloud_cov=cloud_cov) self.lstm_gan, self.vgg, self.disciminator, self.lstm_generator = self.model_helpers.lstm_gan_with_vgg( ) elif not includeVGG and includeModis == 0: self.lstm_gan, self.vgg, self.disciminator, self.lstm_generator = self.model_helpers.lstm_gan_no_vgg( ) elif includeModis == 1: self.lstm_gan, self.vgg, self.disciminator, self.lstm_generator = self.model_helpers.lstm_gan_with_vgg_multi_modis( ) self.dirName = "/s/" + socket.gethostname( ) + "/a/nobackup/galileo/paahuni/" + str(folderI) + "/" if not includeModis == 2: self.img_itr = self.dataloader.get_non_random_image_iterator_new( batch_size=1, no_of_timesteps=self.timesteps, sendMetaInfo=True, includeModis=includeModis) else: self.dataloader = dataloaders.DatasetHandling( self.img_w, self.img_h, no_of_timesteps=timesteps, startT=startT, endT=endT, cloud_cov=cloud_cov) self.includeVGG = includeVGG
f"mv ./experiments/{conf['name']}/* ./experiments/{conf['name']}_{NOW}/" ) else: os.mkdir(f"./experiments/{conf['name']}") hdf5_dir = f"./experiments/{conf['name']}/HDF5" os.mkdir(hdf5_dir) # schedules if conf['parameter']['schedules'] == 'CosineDecayRestarts': from tensorflow.keras.experimental import CosineDecayRestarts learning_rate = CosineDecayRestarts(conf['parameter']['learning_rate'], 100) elif conf['parameter']['schedules'] == 'ExponentialDecay': from tensorflow.keras.optimizers.schedules import ExponentialDecay learning_rate = ExponentialDecay(conf['parameter']['learning_rate']) else: learning_rate = conf['parameter']['learning_rate'] # ============================================================================= # optimizer if conf['parameter']['Optimizers'] == 'SGD': from tensorflow.keras.optimizers import SGD optimizer = SGD(learning_rate) else: from tensorflow.keras.optimizers import Adamax optimizer = Adamax(learning_rate) # ============================================================================= # callbacks callbacks = [