def test_TerminateOnNaN(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_data_callbacks() y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) cbks = [callbacks.TerminateOnNaN()] model = Sequential() initializer = initializers.Constant(value=1e5) for _ in range(5): model.add(Dense(num_hidden, input_dim=input_dim, activation='relu', kernel_initializer=initializer)) model.add(Dense(num_classes, activation='linear')) model.compile(loss='mean_squared_error', optimizer='rmsprop') # case 1 fit history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20) loss = history.history['loss'] assert len(loss) == 1 assert loss[0] == np.inf history = model.fit_generator(data_generator(X_train, y_train, batch_size), len(X_train), validation_data=(X_test, y_test), callbacks=cbks, epochs=20) loss = history.history['loss'] assert len(loss) == 1 assert loss[0] == np.inf or np.isnan(loss[0])
def test_validate_callbacks_predefined_callbacks(self): supported_predefined_callbacks = [ callbacks.TensorBoard(), callbacks.CSVLogger(filename='./log.csv'), callbacks.EarlyStopping(), callbacks.ModelCheckpoint(filepath='./checkpoint'), callbacks.TerminateOnNaN(), callbacks.ProgbarLogger(), callbacks.History(), callbacks.RemoteMonitor() ] distributed_training_utils_v1.validate_callbacks( supported_predefined_callbacks, adam.Adam()) unsupported_predefined_callbacks = [ callbacks.ReduceLROnPlateau(), callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001) ] for callback in unsupported_predefined_callbacks: with self.assertRaisesRegex( ValueError, 'You must specify a Keras Optimizer V2'): distributed_training_utils_v1.validate_callbacks( [callback], tf.compat.v1.train.AdamOptimizer())
def train(): """Train the given model saving weights to model_file.""" # Setup callbacks callbacks = [C.ModelCheckpoint(filepath=MODEL_WF, verbose=1, save_best_only=True, save_weights_only=True), ThresholdStop(), C.EarlyStopping(monitor='loss', patience=10, verbose=1), C.TerminateOnNaN()] # Big data machine learning in the cloud ft = "data/{}_task{}.txt" model = create_model(iterations=ARGS.iterations) # For long running training swap in stateful checkpoint callbacks[0] = StatefulCheckpoint(MODEL_WF, MODEL_SF, verbose=1, save_best_only=True, save_weights_only=True) tasks = ARGS.tasks or range(1, 13) traind = LogicSeq.from_files([ft.format("train", i) for i in tasks], ARGS.batch_size, pad=ARGS.pad) vald = LogicSeq.from_files([ft.format("val", i) for i in tasks], ARGS.batch_size, pad=ARGS.pad) model.fit_generator(traind, epochs=ARGS.epochs, callbacks=callbacks, validation_data=vald, verbose=1, shuffle=True, initial_epoch=callbacks[0].get_last_epoch())
def run_model(model, epochs, batch_size, X_train, y_train, X_test, y_test): history = History() nanterminator = callbacks.TerminateOnNaN() model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.25, verbose=1, callbacks=[nanterminator, history]) trainScore = model.evaluate(X_train, y_train, verbose=0) print('Train Score: %.10f MSE (%.10f RMSE)' % (trainScore[0], math.sqrt(trainScore[0]))) testScore = model.evaluate(X_test, y_test, verbose=0) print('Test Score: %.10f MSE (%.10f RMSE)' % (testScore[0], math.sqrt(testScore[0]))) plt.plot(history.history['loss'], label='training') plt.plot(history.history['val_loss'], label='validation') plt.title('loss') plt.legend() plt.show() return model
def train(self, d, report_dir=None, dropout=0.5, batch_size=32, epochs=5, validation_split=0., **params): d = d.sample(frac=1) x = d[[c for c in d.columns if c != self.target]] y = d[[self.target]] self.preprocessor = dict( x=StandardScaler(), y=CategoricalEncoder(encoding='onehot-dense'), ) x = self.preprocessor['x'].fit_transform(x) y = self.preprocessor['y'].fit_transform(y) self.build(dropout=dropout) self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) callbacks_used = [callbacks.TerminateOnNaN()] if validation_split: callbacks_used += [ callbacks.TensorBoard(report_dir, batch_size=batch_size, histogram_freq=1, write_grads=True), callbacks.ModelCheckpoint(os.path.join(report_dir, 'network.h5'), verbose=0, save_best_only=True) ] report = self.model.fit(x, y, batch_size=batch_size, epochs=epochs, verbose=0, callbacks=callbacks_used, validation_split=validation_split, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None) if not validation_split: models.save_model(self.model, os.path.join(report_dir, 'network.h5')) return {k: [float(_v) for _v in v] for k, v in report.history.items()}
def ilp(training=True): """Run the ILP task using the ILP model.""" # Create the head goal goals, vgoals = ["f(X)"], list() for g in goals: v = np.zeros((1, 1, 4, len(CHAR_IDX) + 1)) for i, c in enumerate(g): v[0, 0, i, CHAR_IDX[c]] = 1 vgoals.append(v) # Create the ILP wrapper model model = build_model("ilp", "weights/ilp.h5", char_size=len(CHAR_IDX) + 1, training=training, goals=vgoals, num_preds=1, pred_len=4) model.summary() traind = LogicSeq.from_file("data/ilp_train.txt", ARGS.batch_size, pad=ARGS.pad) testd = LogicSeq.from_file("data/ilp_test.txt", ARGS.batch_size, pad=ARGS.pad) if training: # Setup callbacks callbacks = [ C.ModelCheckpoint(filepath="weights/ilp.h5", verbose=1, save_best_only=True, save_weights_only=True), C.TerminateOnNaN() ] model.fit_generator(traind, epochs=200, callbacks=callbacks, validation_data=testd, shuffle=True) else: # Dummy input to get templates ctx = "b(h).v(O):-c(O).c(a)." ctx = ctx.split('.')[:-1] # split rules ctx = [r + '.' for r in ctx] dgen = LogicSeq([[(ctx, "f(h).", 0)]], 1, False, False) print("TEMPLATES:") outs = model.predict_on_batch(dgen[0]) ts, out = outs[0], outs[-1] print(ts) # Decode template # (num_templates, num_preds, pred_length, char_size) ts = np.argmax(ts[0], axis=-1) ts = np.vectorize(lambda i: IDX_CHAR[i])(ts) print(ts) print("CTX:", ctx) for o in outs[1:-1]: print(o) print("OUT:", out)
def main(): import os import snorbdata from keras.datasets import cifar10, cifar100 # setting the hyper parameters args = {'epochs':50, 'batch_size':250, 'lr': 1e-3, 'decay': 0.8, 'iters': 3, 'weights': None, 'save_dir':'./results', 'dataset': 10} print(args) if not os.path.exists(args['save_dir']): os.makedirs(args['save_dir']) # load data # define model graph = tf.Graph() with graph.as_default(): tf.add_check_numerics_ops() if args['dataset'] == 10 or args['dataset'] == 100: model = CapsNet_EM(input_shape=(32, 32, 3), num_classes=args['dataset'], iters=args['iters'], cifar=True, num_caps=(16, 24, 24)) else: model = CapsNet_EM(input_shape=(args['dataset'], args['dataset'], 1), num_classes=5, iters=args['iters']) print('-'*30 + 'Summary for Model' + '-'*30) model.summary() print('-'*30 + 'Summaries Done' + '-'*30) if args['dataset'] == 10: (x_train, y_train), (x_test, y_test) = cifar10.load_data() y_train, y_test = np.eye(10)[np.squeeze(y_train)], np.eye(10)[np.squeeze(y_test)] elif args['dataset'] == 100: (x_train, y_train), (x_test, y_test) = cifar100.load_data() y_train, y_test = np.eye(100)[np.squeeze(y_train)], np.eye(100)[np.squeeze(y_test)] else: x_train, y_train, x_test, y_test = snorbdata.load_data() if len(x_train.shape) < 4: x_train = np.expand_dims(x_train, axis=-1) if len(x_test.shape) < 4: x_test = np.expand_dims(x_test, axis=-1) print('Done loading data') # init the model weights with provided one if args['weights'] is not None: model.load_weights(args['weights']) log = callbacks.CSVLogger(args['save_dir'] + '/log.csv') tb = callbacks.TensorBoard(log_dir=args['save_dir'] + '/tensorboard-logs', batch_size=args['batch_size'], write_graph=True, write_images=True) checkpoint = callbacks.ModelCheckpoint(args['save_dir'] + '/w_{epoch:02d}.h5', monitor='val_categorical_accuracy', save_best_only=True, save_weights_only=True, verbose=1, period=2) lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: args['lr'] * args['decay']**epoch) naan = callbacks.TerminateOnNaN() # compile and train model for e in range(args['epochs']): model.compile(optimizer=optimizers.Nadam(lr=args['lr']), loss=spread_loss_wrap(e, 0.2, 0.1, args['batch_size']), \ metrics=['categorical_accuracy']) train_gen = ImageDataGenerator().flow(x_train, y_train, batch_size=args['batch_size']) test_gen = ImageDataGenerator().flow(x_test, y_test, batch_size=args['batch_size']) model.fit_generator(train_gen, validation_data=test_gen, initial_epoch=e, epochs=e +1, verbose=1, callbacks=[log, tb, checkpoint, lr_decay, naan]) model.save_weights(args['save_dir'] + '/model.h5') print('Trained model saved to \'%s' % args['save_dir']) return
def get_callbacks(self, opt): # ModelCheckpoints: saving model after each epoch fn1 = (os.path.basename(self.model_params_file_path).replace( '.json', '')) fn = (f'{fn1}___{self.start_time}' f'___model_%s{"_TEST" if opt.test else ""}.h5' % ('{epoch:02d}')) filepath = os.path.join(self.path_nn_model, self.model.name, fn) del fn checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=opt.verbose) # TerminateOnNaN tonan = callbacks.TerminateOnNaN() # History history = callbacks.History() # CSV logger: saves epoch train and valid loss to a log file fn1 = (os.path.basename(self.model_params_file_path).replace( '.json', '')) fn = (f'{fn1}___{self.start_time}' f'___training{"_TEST" if opt.test else ""}.log') filepath = os.path.join(self.path_nn_model, self.model.name, fn) csv_logger = callbacks.CSVLogger(filepath, separator=',', append=True) # Learning rate scheduler def exp_decay(epoch, initial_lrate=self.model_params['keras_train']['lr'], decay=self.model_params['keras_train']['lr_decay']): lrate = initial_lrate * np.exp(-decay * epoch) return lrate def learning_rate_decay( epoch, initial_lrate=self.model_params['keras_train']['lr'], decay=self.model_params['keras_train']['lr_decay']): lrate = initial_lrate * (1 - decay)**epoch return lrate lrs = callbacks.LearningRateScheduler(learning_rate_decay) callbacks_list = [ tonan, checkpoint, history, csv_logger, csv_logger, lrs ] # Early stopping: stops training if validation loss does not improves if (self.model_params['keras_train'].get('early_stopping_n') is not None): es = callbacks.EarlyStopping( monitor='val_loss', min_delta=0, patience=self.model_params['keras_train']['early_stopping_n'], verbose=opt.verbose) callbacks_list.append(es) return callbacks_list
def test_TerminateOnNaN(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim, ), classification=True, num_classes=num_classes) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) cbks = [callbacks.TerminateOnNaN()] model = Sequential() initializer = initializers.Constant(value=1e5) for _ in range(5): model.add( Dense(num_hidden, input_dim=input_dim, activation='relu', kernel_initializer=initializer)) model.add(Dense(num_classes, activation='linear')) model.compile(loss='mean_squared_error', optimizer='rmsprop') # case 1 fit history = model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20) loss = history.history['loss'] assert len(loss) == 1 assert loss[0] == np.inf # case 2 fit_generator def data_generator(): max_batch_index = len(X_train) // batch_size i = 0 while 1: yield (X_train[i * batch_size:(i + 1) * batch_size], y_train[i * batch_size:(i + 1) * batch_size]) i += 1 i = i % max_batch_index history = model.fit_generator(data_generator(), len(X_train), validation_data=(X_test, y_test), callbacks=cbks, epochs=20) loss = history.history['loss'] assert len(loss) == 1 assert loss[0] == np.inf or np.isnan(loss[0])
def test_stop_training_csv(tmpdir): np.random.seed(1337) fp = str(tmpdir / 'test.csv') (X_train, y_train), (X_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim, ), classification=True, num_classes=num_classes) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) cbks = [callbacks.TerminateOnNaN(), callbacks.CSVLogger(fp)] model = Sequential() for _ in range(5): model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='linear')) model.compile(loss='mean_squared_error', optimizer='rmsprop') def data_generator(): i = 0 max_batch_index = len(X_train) // batch_size tot = 0 while 1: if tot > 3 * len(X_train): yield np.ones([batch_size, input_dim]) * np.nan, np.ones( [batch_size, num_classes]) * np.nan else: yield (X_train[i * batch_size:(i + 1) * batch_size], y_train[i * batch_size:(i + 1) * batch_size]) i += 1 tot += 1 i = i % max_batch_index history = model.fit_generator(data_generator(), len(X_train) // batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=20) loss = history.history['loss'] assert len(loss) > 1 assert loss[-1] == np.inf or np.isnan(loss[-1]) values = [] with open(fp) as f: for x in reader(f): values.append(x) assert 'nan' in values[-1], 'The last epoch was not logged.' os.remove(fp)
def train_model(input_to_softmax, pickle_path, save_model_path, train_json='train_corpus.json', valid_json='valid_corpus.json', minibatch_size=16, # You will want to change this depending on the GPU you are training on spectrogram=True, mfcc_dim=13, optimizer=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False, clipnorm=1, clipvalue=.5), epochs=30, # You will want to change this depending on the model you are training and data you are using verbose=1, sort_by_duration=False, max_duration=10.0): # Obtain batches of data audio_gen = AudioGenerator(minibatch_size=minibatch_size, spectrogram=spectrogram, mfcc_dim=mfcc_dim, max_duration=max_duration, sort_by_duration=sort_by_duration) # Load the datasets audio_gen.load_train_data(train_json) audio_gen.load_validation_data(valid_json) # Calculate steps per epoch num_train_examples=len(audio_gen.train_audio_paths) steps_per_epoch = num_train_examples//minibatch_size # Calculate validation steps num_valid_samples = len(audio_gen.valid_audio_paths) validation_steps = num_valid_samples//minibatch_size # Add custom CTC loss function to the nn model = add_ctc_loss(input_to_softmax) # Dummy lambda function for loss since CTC loss is implemented above model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=optimizer) # Make initial results/ directory for saving model pickles if not os.path.exists('results'): os.makedirs('results') # Add callbacks checkpointer = ModelCheckpoint(filepath='results/'+save_model_path, verbose=0) terminator = callbacks.TerminateOnNaN() time_machiner = callbacks.History() logger = callbacks.CSVLogger('training.log') tensor_boarder = callbacks.TensorBoard(log_dir='./logs', batch_size=16, write_graph=True, write_grads=True, write_images=True,) # Fit/train model hist = model.fit_generator(generator=audio_gen.next_train(), steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=audio_gen.next_valid(), validation_steps=validation_steps, callbacks=[checkpointer, terminator, logger, time_machiner, tensor_boarder], verbose=verbose) # Save model loss with open('results/'+pickle_path, 'wb') as f: pickle.dump(hist.history, f)
def init_callbacks(self): self.callbacks.append( callbacks.ModelCheckpoint( filepath=self.best_model_fn, **self.callbacks_config["ModelCheckpoint"])) self.callbacks.append( callbacks.EarlyStopping(**self.callbacks_config["EarlyStopping"])) self.callbacks.append( callbacks.ReduceLROnPlateau( **self.callbacks_config["ReduceLROnPlateau"])) self.callbacks.append(callbacks.TerminateOnNaN()) self.callbacks.append( callbacks.TensorBoard( log_dir=self.callbacks_config["tensorboard_log_dir"], write_graph=self.callbacks_config["tensorboard_write_graph"], ))
def run_model(model, epochs, batch_size, X_train, y_train, X_test, y_test): history = History() nanterminator = callbacks.TerminateOnNaN() model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.25, verbose=1, callbacks=[nanterminator, history]) plt.plot(history.history['loss'], label='training') plt.plot(history.history['val_loss'], label='validation') plt.title('loss') plt.legend() plt.show() return model
print('loading dataset......') composers = [ 'Bach', 'Beethoven', 'Brahms', 'Chopin', 'Grieg', 'Liszt', 'Mozart' ] datapath = 'Dataset_Train_Medium/' X_train, Y_train = load_dataset(datapath, composers) datapath_val = 'Dataset_Dev_Medium/' X_test, Y_test = load_dataset(datapath_val, composers) print('applying one-hot-encoding') Y_train = convert_to_one_hot(Y_train, 7).T Y_test = convert_to_one_hot(Y_test, 7).T print('setting up callbacks...') nancheck = callbacks.TerminateOnNaN() filepath = 'Models/weights-improvement-{epoch:02d}-{acc:.2f}.hdf5' saver = callbacks.ModelCheckpoint(filepath, monitor='acc', verbose=1, save_best_only=False, mode='max', period=1) logger = callbacks.CSVLogger('model-weights/trainingresults.log') callbacklist = [nancheck, saver, logger] print('starting model fitting') model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epochs,
def run(_run, image_shape, data_dir, train_shuffle, dataset_train_seed, valid_shuffle, dataset_valid_seed, classes, architecture, weights, batch_size, last_base_layer, use_gram_matrix, pooling, dense_layers, device, opt_params, dropout_p, resuming_from_ckpt_file, ckpt_file, steps_per_epoch, epochs, validation_steps, workers, use_multiprocessing, initial_epoch, early_stop_patience, tensorboard_tag, first_trainable_layer, first_reset_layer, class_weight): report_dir = _run.observers[0].dir g = ImageDataGenerator(horizontal_flip=True, vertical_flip=True, samplewise_center=True, samplewise_std_normalization=True, zoom_range=.2, rotation_range=.2, height_shift_range=.2, width_shift_range=.2, fill_mode='reflect', preprocessing_function=None) train_data = g.flow_from_directory(os.path.join(data_dir, 'train'), target_size=image_shape[:2], classes=classes, batch_size=batch_size, shuffle=train_shuffle, seed=dataset_train_seed) valid_data = g.flow_from_directory(os.path.join(data_dir, 'valid'), target_size=image_shape[:2], classes=classes, batch_size=batch_size, shuffle=valid_shuffle, seed=dataset_valid_seed) if class_weight == 'balanced': class_weight = get_class_weights(train_data.classes) if steps_per_epoch is None: steps_per_epoch = ceil(train_data.n / batch_size) if validation_steps is None: validation_steps = ceil(valid_data.n / batch_size) with tf.device(device): print('building...') model = build_model(image_shape, architecture=architecture, weights=weights, dropout_p=dropout_p, classes=train_data.num_classes, last_base_layer=last_base_layer, use_gram_matrix=use_gram_matrix, pooling=pooling, dense_layers=dense_layers) layer_names = [l.name for l in model.layers] if first_trainable_layer: if first_trainable_layer not in layer_names: raise ValueError('%s is not a layer in the model: %s' % (first_trainable_layer, layer_names)) _trainable = False for layer in model.layers: if layer.name == first_trainable_layer: _trainable = True layer.trainable = _trainable del _trainable model.compile(optimizer=optimizers.Adam(**opt_params), metrics=['accuracy'], loss='categorical_crossentropy') if resuming_from_ckpt_file: print('re-loading weights...') model.load_weights(resuming_from_ckpt_file) if first_reset_layer: if first_reset_layer not in layer_names: raise ValueError('%s is not a layer in the model: %s' % (first_reset_layer, layer_names)) print('first layer to have its weights reset:', first_reset_layer) random_model = build_model(image_shape, architecture=architecture, weights=None, dropout_p=dropout_p, classes=train_data.num_class, last_base_layer=last_base_layer, use_gram_matrix=use_gram_matrix, dense_layers=dense_layers) _reset = False for layer, random_layer in zip(model.layers, random_model.layers): if layer.name == first_reset_layer: _reset = True if _reset: layer.set_weights(random_layer.get_weights()) del random_model model.compile(optimizer=optimizers.Adam(**opt_params), metrics=['accuracy'], loss='categorical_crossentropy') print('training from epoch %i...' % initial_epoch) try: model.fit_generator( train_data, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=2, validation_data=valid_data, validation_steps=validation_steps, initial_epoch=initial_epoch, class_weight=class_weight, workers=workers, use_multiprocessing=use_multiprocessing, callbacks=[ # callbacks.LearningRateScheduler(lambda epoch: .5 ** (epoch // 10) * opt_params['lr']), callbacks.TerminateOnNaN(), callbacks.ReduceLROnPlateau(min_lr=1e-10, patience=int( early_stop_patience // 3)), callbacks.EarlyStopping(patience=early_stop_patience), callbacks.TensorBoard(os.path.join(report_dir, tensorboard_tag), batch_size=batch_size), callbacks.ModelCheckpoint(os.path.join( report_dir, ckpt_file), save_best_only=True, verbose=1), ]) except KeyboardInterrupt: print('interrupted by user') else: print('done')
def run(_run, data_dir, train_info, chunks, train_pairs, valid_pairs, train_shuffle, valid_shuffle, joint_weights, trainable_joints, dense_layers, batch_size, device, opt_params, dropout_rate, ckpt, steps_per_epoch, epochs, validation_steps, initial_epoch, early_stop_patience, resuming_ckpt, outputs_meta): report_dir = _run.observers[0].dir print('loading limb-embedded inputs...') d = load_pickle_data(data_dir, keys=['data', 'names'], phases=['train', 'valid'], chunks=chunks) x_train, x_valid = d['train'][0], d['valid'][0] print('x-train, x-valid shape:', x_train['artist'].shape, x_valid['artist'].shape) print('loading labels...') outputs, name_map = load_multiple_outputs(train_info, outputs_meta, encode='sparse') ys = [] for phase in ('train', 'valid'): names = d[phase][1] names = ['-'.join(os.path.basename(n).split('-')[:-1]) for n in names] indices = [name_map[n] for n in names] ys += [{o: v[indices] for o, v in outputs.items()}] y_train, y_valid = ys artists = np.unique(y_train['artist']) x_train, y_train = create_pairs(x_train, y_train, pairs=train_pairs, classes=artists, shuffle=train_shuffle) x_valid, y_valid = create_pairs(x_valid, y_valid, pairs=valid_pairs, classes=artists, shuffle=valid_shuffle) for y in (y_train, y_valid): y['binary_predictions'] = y['artist_binary_predictions'] with tf.device(device): print('building...') model = build_siamese_top_meta(outputs_meta, dropout_rate=dropout_rate, joint_weights=joint_weights, trainable_joints=trainable_joints, dense_layers=dense_layers) if resuming_ckpt: print('loading weights from', resuming_ckpt) model.load_weights(resuming_ckpt) model.compile(optimizer=optimizers.Adam(**opt_params), loss='binary_crossentropy', metrics=['acc']) print('training from epoch %i...' % initial_epoch) try: model.fit( x_train, y_train, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=(x_valid, y_valid), validation_steps=validation_steps, initial_epoch=initial_epoch, batch_size=batch_size, verbose=2, callbacks=[ callbacks.TerminateOnNaN(), callbacks.EarlyStopping(patience=early_stop_patience), callbacks.ReduceLROnPlateau(min_lr=1e-10, patience=early_stop_patience // 3), callbacks.TensorBoard(report_dir, batch_size=batch_size, histogram_freq=1, write_grads=True, write_images=True), callbacks.ModelCheckpoint(os.path.join(report_dir, ckpt), save_best_only=True, verbose=1), ]) except KeyboardInterrupt: print('interrupted by user') else: print('done')
def run_model( train_generator, validation_generator, dl_model, output_folder, instance_name, image_size, nb_labels, nb_epochs, nb_training_image, nb_validation_image, batch_size, dropout, network, learning_rate, learning_rate_decay, ): """Run deep learning `dl_model` starting from training and validation data generators, depending on a range of hyperparameters Parameters ---------- train_generator : generator Training data generator validation_generator : generator Validation data generator dl_model : str Name of the addressed research problem (*e.g.* `feature_detection` or `semantic_segmentation`) output_folder : str Name of the folder where the trained model will be stored on the file system instance_name : str Name of the instance image_size : int Size of images, in pixel (height=width) nb_labels : int Number of labels into the dataset nb_epochs : int Number of epochs during which models will be trained nb_training_image : int Number of images into the training dataset nb_validation_image : int Number of images into the validation dataset batch_size : int Number of images into each batch dropout : float Probability of keeping a neuron during dropout phase network : str Neural network architecture (*e.g.* `simple`, `vgg`, `inception`) learning_rate : float Starting learning rate learning_rate_decay : float Learning rate decay Returns ------- dict Dictionary that summarizes the instance and the corresponding model performance (measured by validation accuracy) """ if dl_model == "featdet": net = FeatureDetectionNetwork( network_name=instance_name, image_size=image_size, nb_channels=3, nb_labels=nb_labels, architecture=network, ) loss_function = "binary_crossentropy" elif dl_model == "semseg": net = SemanticSegmentationNetwork( network_name=instance_name, image_size=image_size, nb_channels=3, nb_labels=nb_labels, architecture=network, ) loss_function = "categorical_crossentropy" else: logger.error(("Unrecognized model: %s. Please choose amongst %s", dl_model, AVAILABLE_MODELS)) sys.exit(1) model = Model(net.X, net.Y) opt = Adam(lr=learning_rate, decay=learning_rate_decay) metrics = ["acc", iou, dice_coef] model.compile(loss=loss_function, optimizer=opt, metrics=metrics) # Model training steps = max(nb_training_image // batch_size, 1) val_steps = max(nb_validation_image // batch_size, 1) checkpoint_files = [ item for item in os.listdir(output_folder) if "checkpoint-epoch" in item ] if len(checkpoint_files) > 0: model_checkpoint = max(checkpoint_files) trained_model_epoch = int(model_checkpoint[-5:-3]) checkpoint_complete_path = os.path.join(output_folder, model_checkpoint) model.load_weights(checkpoint_complete_path) logger.info( "Model weights have been recovered from %s", checkpoint_complete_path, ) else: logger.info(("No available checkpoint for this configuration. " "The model will be trained from scratch.")) trained_model_epoch = 0 checkpoint_filename = os.path.join(output_folder, "checkpoint-epoch-{epoch:03d}.h5") checkpoint = callbacks.ModelCheckpoint( checkpoint_filename, monitor="val_loss", verbose=0, save_best_only=True, save_weights_only=False, mode="auto", period=1, ) terminate_on_nan = callbacks.TerminateOnNaN() earlystop = callbacks.EarlyStopping(monitor="val_loss", patience=10, verbose=1, mode="max") csv_logger = callbacks.CSVLogger(os.path.join(output_folder, "training_metrics.csv"), append=True) hist = model.fit_generator( train_generator, epochs=nb_epochs, initial_epoch=trained_model_epoch, steps_per_epoch=steps, validation_data=validation_generator, validation_steps=val_steps, callbacks=[checkpoint, earlystop, terminate_on_nan, csv_logger], ) ref_metric = max(hist.history.get("val_acc", [np.nan])) return { "model": model, "val_acc": ref_metric, "batch_size": batch_size, "network": network, "dropout": dropout, "learning_rate": learning_rate, "learning_rate_decay": learning_rate_decay, }
monitor='loss', verbose=1, save_best_only=True, save_weights_only=True, mode='auto', period=5) reduce_lr = callbacks.ReduceLROnPlateau(monitor='loss', factor =0.5, patience=15, verbose=1, mode='auto', epsilon=0.0001, cooldown=0, min_lr=1e-8) nanterminator = callbacks.TerminateOnNaN() history = callbacks.History() weightwatcher = WeightWatcher(per_batch =False,per_epoch= True) n_features = x_train.shape[-1] ## Base model model = Sequential() model.add(Masking(mask_value=mask_value,input_shape=(None, n_features))) model.add(GRU(10,activation='tanh',return_sequences=True,recurrent_dropout=0.1,unroll=False)) model.add(BatchNormalization(axis=-1, momentum=0.9, epsilon=0.01)) model.add(TimeDistributed(Dense(10,activation='tanh'))) ## Wtte-RNN part model.add(TimeDistributed(Dense(2))) model.add(Lambda(wtte.output_lambda, arguments={"init_alpha":init_alpha,
def run(_run, image_shape, data_dir, train_pairs, valid_pairs, classes, class_weight, architecture, weights, batch_size, base_layers, pooling, dense_layers, metrics, device, opt_params, dropout_p, resuming_from_ckpt_file, steps_per_epoch, epochs, validation_steps, workers, use_multiprocessing, initial_epoch, early_stop_patience, tensorboard_tag, first_trainable_layer): report_dir = _run.observers[0].dir g = ImageDataGenerator( horizontal_flip=True, vertical_flip=True, samplewise_center=True, samplewise_std_normalization=True, zoom_range=45, rotation_range=.2, height_shift_range=.2, width_shift_range=.2, fill_mode='reflect', preprocessing_function=get_preprocess_fn(architecture)) if isinstance(classes, int): classes = sorted(os.listdir(os.path.join(data_dir, 'train')))[:classes] train_data = BalancedDirectoryPairsSequence(os.path.join( data_dir, 'train'), g, target_size=image_shape[:2], pairs=train_pairs, classes=classes, batch_size=batch_size) valid_data = BalancedDirectoryPairsSequence(os.path.join( data_dir, 'valid'), g, target_size=image_shape[:2], pairs=valid_pairs, classes=classes, batch_size=batch_size) if class_weight == 'balanced': class_weight = get_class_weights(train_data.classes) with tf.device(device): print('building...') model = build_siamese_gram_model(image_shape, architecture, dropout_p, weights, base_layers=base_layers, dense_layers=dense_layers, pooling=pooling, include_top=False, trainable_limbs=True, embedding_units=0, joints='l2', include_base_top=False) model.summary() layer_names = [l.name for l in model.layers] if first_trainable_layer: if first_trainable_layer not in layer_names: raise ValueError('%s is not a layer in the model: %s' % (first_trainable_layer, layer_names)) for layer in model.layers: if layer.name == first_trainable_layer: break layer.trainable = False model.compile(optimizer=optimizers.Adam(**opt_params), metrics=metrics, loss=contrastive_loss) if resuming_from_ckpt_file: print('re-loading weights...') model.load_weights(resuming_from_ckpt_file) print('training from epoch %i...' % initial_epoch) try: model.fit_generator( train_data, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=2, validation_data=valid_data, validation_steps=validation_steps, initial_epoch=initial_epoch, class_weight=class_weight, workers=workers, use_multiprocessing=use_multiprocessing, callbacks=[ # callbacks.LearningRateScheduler(lambda epoch: .5 ** (epoch // 10) * opt_params['lr']), callbacks.TerminateOnNaN(), callbacks.ReduceLROnPlateau(min_lr=1e-10, patience=int( early_stop_patience // 3)), callbacks.EarlyStopping(patience=early_stop_patience), callbacks.TensorBoard(os.path.join(report_dir, tensorboard_tag), batch_size=batch_size), callbacks.ModelCheckpoint(os.path.join( report_dir, 'weights.h5'), save_best_only=True, verbose=1), ]) except KeyboardInterrupt: print('interrupted by user') else: print('done')
def run(backbone, cfn_backbone, batch_size, lr, dropout_rate, data_path, artifacts_folder, img_size, use_cbam, use_se, cfn_model_path, use_transpose_conv, cfn_batch_multiplier, seed, _run): artifacts_folder = Path(artifacts_folder) artifacts_folder.mkdir(parents=True, exist_ok=True) data_path = Path(data_path) data_df = pd.read_csv(data_path / 'train.csv') data_df = prepare_data_df(data_df) print(data_df.info()) print(data_df.head(10)) train_df, val_df = train_test_split(data_df, test_size=0.2, random_state=seed) print( f'\nlength of train and val data before duplication: {len(train_df.index)}, {len(val_df.index)}' ) print(f"shape for 0, 1, 2, 3, 4 defects respectively:\n" f"{train_df[train_df.defect_count == 0].shape}\n" f"{train_df[train_df.has_defect_1 == 1].shape}\n" f"{train_df[train_df.has_defect_2 == 1].shape}\n" f"{train_df[train_df.has_defect_3 == 1].shape}\n" f"{train_df[train_df.has_defect_4 == 1].shape}\n") train_df = duplicate_data(train_df, 2, 10) print( f'\nlength of train and val data after duplication: {len(train_df.index)}, {len(val_df.index)}' ) print(f"shape for 0, 1, 2, 3, 4 defects respectively:\n" f"{train_df[train_df.defect_count == 0].shape}\n" f"{train_df[train_df.has_defect_1 == 1].shape}\n" f"{train_df[train_df.has_defect_2 == 1].shape}\n" f"{train_df[train_df.has_defect_3 == 1].shape}\n" f"{train_df[train_df.has_defect_4 == 1].shape}\n") train_df = train_df.sample(frac=1).reset_index(drop=True) ckpt_path = artifacts_folder / 'ckpts' ckpt_path.mkdir(exist_ok=True, parents=True) if cfn_model_path is None: classification_model = ClassificationModel(cfn_backbone, img_size, lr).get_model() utils.plot_model(classification_model, str(artifacts_folder / 'cfn_model.png'), show_shapes=True) training_callbacks = [ callbacks.ReduceLROnPlateau(patience=3, verbose=1, min_lr=1e-7), callbacks.EarlyStopping(patience=5, verbose=1, restore_best_weights=True), callbacks.ModelCheckpoint(str( ckpt_path / 'cfn_model-{epoch:04d}-{val_loss:.4f}.hdf5'), verbose=1, save_best_only=True), callbacks.TensorBoard(log_dir=str(artifacts_folder / 'tb_logs')), callbacks.TerminateOnNaN(), ObserveMetrics(_run, 'cfn') ] train_seq = ClassificationDataSeq(seed, train_df, int(batch_size * cfn_batch_multiplier), img_size, 'data/train_images', mode='train', shuffle=True, augment=True) val_seq = ClassificationDataSeq(seed, val_df, int(batch_size * cfn_batch_multiplier), img_size, 'data/train_images', mode='val', shuffle=False, augment=False) train_model(classification_model, train_seq, val_seq, training_callbacks) models.save_model(classification_model, str(artifacts_folder / 'cfn_model_best.h5')) else: classification_model = models.load_model(cfn_model_path, compile=False) segmentation_model = SegmentationModel( backbone, img_size, lr, dropout_rate, _MODEL_ARC, use_cbam=use_cbam, use_se=use_se, cfn_model=classification_model, cfn_backbone=cfn_backbone, use_transpose_conv=use_transpose_conv).get_model() utils.plot_model(segmentation_model, str(artifacts_folder / 'seg_model.png'), show_shapes=True) training_callbacks = [ callbacks.ReduceLROnPlateau(patience=3, verbose=1, min_lr=1e-7), callbacks.EarlyStopping(patience=5, verbose=1, restore_best_weights=True), callbacks.ModelCheckpoint(str( ckpt_path / 'seg_model-{epoch:04d}-{val_loss:.4f}.hdf5'), verbose=1, save_best_only=True), callbacks.TensorBoard(log_dir=str(artifacts_folder / 'tb_logs')), callbacks.TerminateOnNaN(), ObserveMetrics(_run, 'seg') ] train_seq = DataSequence(seed, train_df, batch_size, img_size, 'data/train_images', mode='train', shuffle=True, augment=True) val_seq = DataSequence(seed, val_df, batch_size, img_size, 'data/train_images', mode='val', shuffle=False, augment=False) history = train_model(segmentation_model, train_seq, val_seq, training_callbacks) models.save_model(segmentation_model, str(artifacts_folder / 'seg_model_best.h5')) return history.history['val_score'][-1]
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) self._node_num = graph.number_of_nodes() t1 = time() # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter self._encoder = get_variational_encoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_variational_autoencoder( self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(self._node_num, ), name='x_in') # Process inputs # [x_hat, y] = self._autoencoder(x_in) [x_hat, y_mean, y_std, y2] = self._autoencoder(x_in) # Outputs x_diff = merge([x_hat, x_in], mode=lambda (a, b): a - b, output_shape=lambda L: L[1]) y_log_var = KBack.log(KBack.square(y_std)) vae_loss = merge([y_mean, y_std], mode=lambda (a, b): -0.5 * KBack.sum(1 + KBack.log(KBack.square( b)) - KBack.square(a) - KBack.square(b), axis=-1), output_shape=lambda L: (L[1][0], 1)) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains b ''' return KBack.sum(KBack.square(y_pred * y_true[:, 0:self._node_num]), axis=-1) def weighted_mse_vae(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains KL-divergence y_true: Contains np.zeros(mini_batch) ''' min_batch_size = KBack.shape(y_true)[0] return KBack.mean( # KBack.abs(y_pred), KBack.abs(KBack.reshape(y_pred, [min_batch_size, 1])), axis=-1) # Model self._model = Model(input=x_in, output=[x_diff, vae_loss]) # sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile(optimizer=adam, loss=[weighted_mse_x, weighted_mse_vae], loss_weights=[1, self._beta_vae]) history = self._model.fit_generator( generator=batch_generator_vae(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.shape[0] // self._n_batch, verbose=1, callbacks=[callbacks.TerminateOnNaN()]) loss = history.history['loss'] # Get embedding for all points if loss[0] == np.inf or np.isnan(loss[0]): print 'Model diverged. Assigning random embeddings' self._Y = np.random.randn(self._node_num, self._d) else: self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch, meth='vae') submodel_gen = batch_generator_vae(S, self._beta, self._n_batch, True) x = np.concatenate([next(submodel_gen)[0] for _ in range(100)], axis=0) vae_submodel = Model(x_in, self._autoencoder(x_in)) _, _, log_std, _ = vae_submodel.predict(x) mean = np.mean(log_std) std = np.std(log_std) print('log std mean and std') print(mean) print(std) t2 = time() # Save the autoencoder and its weights if (self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if (self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if (self._savefilesuffix is not None): saveweights(self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) return self._Y, (t2 - t1)
def run(_run, image_shape, data_dir, train_pairs, valid_pairs, classes, num_classes, architecture, weights, batch_size, base_layers, pooling, device, predictions_activation, opt_params, dropout_rate, resuming_ckpt, ckpt, steps_per_epoch, epochs, validation_steps, joints, workers, use_multiprocessing, initial_epoch, early_stop_patience, dense_layers, embedding_units, limb_weights, trainable_limbs, tensorboard_tag): report_dir = _run.observers[0].dir if isinstance(classes, int): classes = sorted(os.listdir(os.path.join(data_dir, 'train')))[:classes] g = ImageDataGenerator( horizontal_flip=True, vertical_flip=True, zoom_range=.2, rotation_range=.2, height_shift_range=.2, width_shift_range=.2, fill_mode='reflect', preprocessing_function=utils.get_preprocess_fn(architecture)) train_data = BalancedDirectoryPairsSequence(os.path.join( data_dir, 'train'), g, target_size=image_shape[:2], pairs=train_pairs, classes=classes, batch_size=batch_size) valid_data = BalancedDirectoryPairsSequence(os.path.join( data_dir, 'valid'), g, target_size=image_shape[:2], pairs=valid_pairs, classes=classes, batch_size=batch_size) if steps_per_epoch is None: steps_per_epoch = len(train_data) if validation_steps is None: validation_steps = len(valid_data) with tf.device(device): print('building...') model = build_siamese_gram_model( image_shape, architecture, dropout_rate, weights, num_classes, base_layers, dense_layers, pooling, predictions_activation=predictions_activation, limb_weights=limb_weights, trainable_limbs=trainable_limbs, embedding_units=embedding_units, joints=joints) print('siamese model summary:') model.summary() if resuming_ckpt: print('loading weights...') model.load_weights(resuming_ckpt) model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer=optimizers.Adam(**opt_params)) print('training from epoch %i...' % initial_epoch) try: model.fit_generator( train_data, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=valid_data, validation_steps=validation_steps, initial_epoch=initial_epoch, use_multiprocessing=use_multiprocessing, workers=workers, verbose=2, callbacks=[ callbacks.TerminateOnNaN(), callbacks.EarlyStopping(patience=early_stop_patience), callbacks.ReduceLROnPlateau(min_lr=1e-10, patience=int( early_stop_patience // 3)), callbacks.TensorBoard(os.path.join(report_dir, tensorboard_tag), batch_size=batch_size), callbacks.ModelCheckpoint(os.path.join(report_dir, ckpt), save_best_only=True, verbose=1), ]) except KeyboardInterrupt: print('interrupted by user') else: print('done')
"").format(checkpoint_complete_path)) else: utils.logger.info(("No available checkpoint for this configuration. " "The model will be trained from scratch.")) trained_model_epoch = 0 checkpoint_filename = os.path.join(output_folder, "checkpoint-epoch-{epoch:03d}.h5") checkpoint = callbacks.ModelCheckpoint(checkpoint_filename, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) terminate_on_nan = callbacks.TerminateOnNaN() earlystop = callbacks.EarlyStopping(monitor='val_acc', min_delta=0.001, patience=10, verbose=1, mode='max') csv_logger = callbacks.CSVLogger( os.path.join(output_folder, 'training_metrics.csv')) hist = model.fit_generator( train_generator, epochs=args.nb_epochs, steps_per_epoch=STEPS, validation_data=validation_generator, validation_steps=VAL_STEPS, callbacks=[checkpoint, terminate_on_nan, earlystop, csv_logger],
def run(seg1_path, seg2_path, batch_size, lr, dropout_rate, data_path, artifacts_folder, img_size, seed, _run): artifacts_folder = Path(artifacts_folder) artifacts_folder.mkdir(parents=True, exist_ok=True) data_path = Path(data_path) data_df = pd.read_csv(data_path / 'train.csv') data_df = prepare_data_df(data_df) print(data_df.info()) print(data_df.head(10)) train_df, val_df = train_test_split(data_df, test_size=0.2, random_state=seed) train_df = train_df.sample(frac=1).reset_index(drop=True) ckpt_path = artifacts_folder / 'ckpts' ckpt_path.mkdir(exist_ok=True, parents=True) seg1_model = models.load_model(seg1_path, compile=False) for layer in seg1_model.layers: layer.name = f'seg1_{layer.name}' layer.trainable = False seg2_model = models.load_model(seg2_path, compile=False) for layer in seg2_model.layers: layer.name = f'seg2_{layer.name}' layer.trainable = False x = layers.concatenate([seg1_model.output, seg2_model.output]) x = layers.SpatialDropout2D(dropout_rate)(x) x = conv(x, 16, 3) x = layers.Conv2D(4, (1, 1))(x) o = layers.Activation('sigmoid', name='output_layer')(x) segmentation_model = models.Model([seg1_model.input, seg2_model.input], o) segmentation_model.compile( optimizers.Adam(lr), sm.losses.bce_dice_loss, metrics=[sm.metrics.iou_score, sm.metrics.f1_score]) utils.plot_model(segmentation_model, str(artifacts_folder / 'seg_model.png'), show_shapes=True) training_callbacks = [ callbacks.ReduceLROnPlateau(patience=3, verbose=1, min_lr=1e-7), callbacks.EarlyStopping(patience=5, verbose=1, restore_best_weights=True), callbacks.ModelCheckpoint(str( ckpt_path / 'seg_model-{epoch:04d}-{val_loss:.4f}.hdf5'), verbose=1, save_best_only=True), callbacks.TensorBoard(log_dir=str(artifacts_folder / 'tb_logs')), callbacks.TerminateOnNaN(), ObserveMetrics(_run, 'seg') ] train_seq = DataSequence(seed, train_df, batch_size, img_size, 'data/train_images', mode='train', shuffle=True, augment=True, for_stacker=True) val_seq = DataSequence(seed, val_df, batch_size, img_size, 'data/train_images', mode='val', shuffle=False, augment=False, for_stacker=True) history = train_model(segmentation_model, train_seq, val_seq, training_callbacks) models.save_model(segmentation_model, str(artifacts_folder / 'seg_model_best.h5')) segmentation_model.save_weights( str(artifacts_folder / 'weights_seg_model_best.h5')) print('loading model back') del segmentation_model segmentation_model = models.load_model(str(artifacts_folder / 'seg_model_best.h5'), compile=False) segmentation_model.predict_generator(val_seq, verbose=1) return history.history['val_loss'][-1]
def run(_run, image_shape, data_dir, train_pairs, valid_pairs, train_shuffle, valid_shuffle, classes, architecture, weights, batch_size, last_base_layer, pooling, device, opt_params, dropout_rate, ckpt, steps_per_epoch, epochs, validation_steps, workers, use_multiprocessing, initial_epoch, early_stop_patience, use_gram_matrix, limb_dense_layers, limb_weights, trainable_limbs, joint_weights, trainable_joints, dense_layers, resuming_ckpt, outputs_meta): report_dir = _run.observers[0].dir g = ImageDataGenerator( horizontal_flip=True, vertical_flip=True, zoom_range=.2, rotation_range=.2, height_shift_range=.2, width_shift_range=.2, fill_mode='reflect', preprocessing_function=get_preprocess_fn(architecture)) print('loading train meta-data...') train_data = BalancedDirectoryPairsSequence(os.path.join( data_dir, 'train'), g, batch_size=batch_size, target_size=image_shape[:2], classes=classes, shuffle=train_shuffle, pairs=train_pairs) print('loading valid meta-data...') valid_data = BalancedDirectoryPairsSequence(os.path.join( data_dir, 'valid'), g, batch_size=batch_size, target_size=image_shape[:2], classes=classes, shuffle=valid_shuffle, pairs=valid_pairs) with tf.device(device): print('building...') model = build_siamese_mo_model(image_shape, architecture, outputs_meta, dropout_rate, weights, last_base_layer=last_base_layer, use_gram_matrix=use_gram_matrix, limb_dense_layers=limb_dense_layers, pooling=pooling, trainable_limbs=trainable_limbs, limb_weights=limb_weights, trainable_joints=trainable_joints, joint_weights=joint_weights, dense_layers=dense_layers) if resuming_ckpt: print('loading weights from', resuming_ckpt) model.load_weights(resuming_ckpt) model.compile(optimizer=optimizers.Adam(**opt_params), loss='binary_crossentropy', metrics=['acc']) print('training from epoch %i...' % initial_epoch) try: model.fit_generator( train_data, steps_per_epoch=steps_per_epoch, epochs=epochs, validation_data=valid_data, validation_steps=validation_steps, initial_epoch=initial_epoch, verbose=2, use_multiprocessing=use_multiprocessing, workers=workers, callbacks=[ callbacks.TerminateOnNaN(), callbacks.EarlyStopping(patience=early_stop_patience), callbacks.ReduceLROnPlateau(min_lr=1e-10, patience=int( early_stop_patience // 3)), callbacks.TensorBoard(report_dir, batch_size=batch_size), callbacks.ModelCheckpoint(os.path.join(report_dir, ckpt), save_best_only=True, verbose=1), ]) except KeyboardInterrupt: print('interrupted by user') else: print('done')
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) t1 = time() S = (S + S.T) / 2 self._node_num = graph.number_of_nodes() # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter # If cannot use previous step information, initialize new models self._encoder = get_encoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_autoencoder(self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(2 * self._node_num, ), name='x_in') x1 = Lambda(lambda x: x[:, 0:self._node_num], output_shape=(self._node_num, ))(x_in) x2 = Lambda(lambda x: x[:, self._node_num:2 * self._node_num], output_shape=(self._node_num, ))(x_in) # Process inputs [x_hat1, y1] = self._autoencoder(x1) [x_hat2, y2] = self._autoencoder(x2) # Outputs x_diff1 = merge([x_hat1, x1], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) x_diff2 = merge([x_hat2, x2], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) y_diff = merge([y2, y1], mode=lambda ab: ab[0] - ab[1], output_shape=lambda L: L[1]) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains [b, deg] ''' return KBack.sum(KBack.square( y_pred * y_true[:, 0:self._node_num]), axis=-1) / y_true[:, self._node_num] def weighted_mse_y(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains y2 - y1 y_true: Contains s12 ''' min_batch_size = KBack.shape(y_true)[0] return KBack.reshape(KBack.sum(KBack.square(y_pred), axis=-1), [min_batch_size, 1]) * y_true # Model self._model = Model(input=x_in, output=[x_diff1, x_diff2, y_diff]) sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) # adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile( optimizer=sgd, loss=[weighted_mse_x, weighted_mse_x, weighted_mse_y], loss_weights=[1, 1, self._alpha]) history = self._model.fit_generator( generator=batch_generator_sdne(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.nonzero()[0].shape[0] // self._n_batch, verbose=1, callbacks=[callbacks.TerminateOnNaN()]) loss = history.history['loss'] # Get embedding for all points if loss[-1] == np.inf or np.isnan(loss[-1]): print('Model diverged. Assigning random embeddings') self._Y = np.random.randn(self._node_num, self._d) else: self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch) t2 = time() # Save the autoencoder and its weights if (self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if (self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if (self._savefilesuffix is not None): saveweights(self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) return self._Y, (t2 - t1)
def learn_embedding(self, graph=None, edge_f=None, is_weighted=False, no_python=False): if not graph and not edge_f: raise Exception('graph/edge_f needed') if not graph: graph = graph_util.loadGraphFromEdgeListTxt(edge_f) S = nx.to_scipy_sparse_matrix(graph) self._node_num = graph.number_of_nodes() t1 = time() # Generate encoder, decoder and autoencoder self._num_iter = self._n_iter self._encoder = get_encoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._decoder = get_decoder(self._node_num, self._d, self._n_units, self._nu1, self._nu2, self._actfn) self._autoencoder = get_autoencoder(self._encoder, self._decoder) # Initialize self._model # Input x_in = Input(shape=(self._node_num, ), name='x_in') # Process inputs [x_hat, y] = self._autoencoder(x_in) # Outputs x_diff = merge([x_hat, x_in], mode=lambda (a, b): a - b, output_shape=lambda L: L[1]) # Objectives def weighted_mse_x(y_true, y_pred): ''' Hack: This fn doesn't accept additional arguments. We use y_true to pass them. y_pred: Contains x_hat - x y_true: Contains b ''' return KBack.sum(KBack.square(y_true * y_pred), axis=-1) # Model self._model = Model(input=x_in, output=x_diff) # sgd = SGD(lr=self._xeta, decay=1e-5, momentum=0.99, nesterov=True) adam = Adam(lr=self._xeta, beta_1=0.9, beta_2=0.999, epsilon=1e-08) self._model.compile(optimizer=adam, loss=weighted_mse_x) history = self._model.fit_generator( generator=batch_generator_ae(S, self._beta, self._n_batch, True), nb_epoch=self._num_iter, samples_per_epoch=S.shape[0] // self._n_batch, verbose=1, callbacks=[callbacks.TerminateOnNaN()]) loss = history.history['loss'] # Get embedding for all points if loss[0] == np.inf or np.isnan(loss[0]): print 'Model diverged. Assigning random embeddings' self._Y = np.random.randn(self._node_num, self._d) else: self._Y = model_batch_predictor(self._autoencoder, S, self._n_batch) t2 = time() # Save the autoencoder and its weights if (self._weightfile is not None): saveweights(self._encoder, self._weightfile[0]) saveweights(self._decoder, self._weightfile[1]) if (self._modelfile is not None): savemodel(self._encoder, self._modelfile[0]) savemodel(self._decoder, self._modelfile[1]) if (self._savefilesuffix is not None): saveweights(self._encoder, 'encoder_weights_' + self._savefilesuffix + '.hdf5') saveweights(self._decoder, 'decoder_weights_' + self._savefilesuffix + '.hdf5') savemodel(self._encoder, 'encoder_model_' + self._savefilesuffix + '.json') savemodel(self._decoder, 'decoder_model_' + self._savefilesuffix + '.json') # Save the embedding np.savetxt('embedding_' + self._savefilesuffix + '.txt', self._Y) return self._Y, (t2 - t1)
# Tensorboard tbCallBack = callbacks.TensorBoard(log_dir='/code/logs/{}'.format(experiment)) # Checkpoints checkpoints = callbacks.ModelCheckpoint( '/code/checkpoints/{}.weights'.format(experiment), monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) # Terminate on NaN tnan = callbacks.TerminateOnNaN() ## Train model model.fit_generator(train_generator, epochs=epochs, validation_data=validation_generator, callbacks=[tbCallBack, checkpoints, tnan], shuffle=True, verbose=1, workers=4, use_multiprocessing=True) ## Evaluate model # Load best model best_model = load_model('/code/checkpoints/{}.weights'.format(experiment))
def run(cfn_model_path, seg_model_path, batch_size, lr, data_path, artifacts_folder, img_size, cfn_batch_multiplier, seed, _run): artifacts_folder = Path(artifacts_folder) artifacts_folder.mkdir(parents=True, exist_ok=True) data_path = Path(data_path) data_df = pd.read_csv(data_path / 'train.csv') data_df = prepare_data_df(data_df) print(data_df.info()) print(data_df.head(10)) train_df, val_df = train_test_split(data_df, test_size=0.2, random_state=seed) print(f'length of train and val data before mix-match: {len(train_df.index)}, {len(val_df.index)}') ckpt_path = artifacts_folder / 'ckpts' ckpt_path.mkdir(exist_ok=True, parents=True) classification_model = models.load_model(cfn_model_path, compile=False) classification_model = insert_layer_nonseq(classification_model, '.*relu.*|.*re_lu.*', mish_layer_factory, position='replace') optimizer = optimizers.Adam(lr=lr) classification_model.compile(optimizer, 'binary_crossentropy', metrics=[metrics.binary_accuracy, metrics.mse]) training_callbacks = [ callbacks.ReduceLROnPlateau(patience=3, verbose=1, min_lr=1e-7, factor=0.5), callbacks.EarlyStopping(patience=5, verbose=1, restore_best_weights=True), callbacks.ModelCheckpoint(str(ckpt_path / 'cfn_model-{epoch:04d}-{val_loss:.4f}.hdf5'), verbose=1, save_best_only=True), callbacks.TensorBoard(log_dir=str(artifacts_folder / 'tb_logs')), callbacks.TerminateOnNaN(), ObserveMetrics(_run, 'cfn') ] train_seq = ClassificationDataSeq(seed, train_df, batch_size * cfn_batch_multiplier, img_size, 'data/train_images', mode='train', shuffle=True, augment=True) val_seq = ClassificationDataSeq(seed, val_df, batch_size * cfn_batch_multiplier, img_size, 'data/train_images', mode='val', shuffle=False, augment=False) train_model(classification_model, train_seq, val_seq, training_callbacks) models.save_model(classification_model, str(artifacts_folder / 'cfn_model_best.h5')) segmentation_model = models.load_model(seg_model_path, compile=False) segmentation_model = insert_layer_nonseq(segmentation_model, '.*relu.*|.*re_lu.*', mish_layer_factory, position='replace') optimizer = optimizers.Adam(lr=lr) segmentation_model.compile(optimizer, sm.losses.bce_dice_loss, metrics=[sm.metrics.iou_score, sm.metrics.f1_score]) training_callbacks = [ callbacks.ReduceLROnPlateau(patience=3, verbose=1, min_lr=1e-7, factor=0.5), callbacks.EarlyStopping(patience=5, verbose=1, restore_best_weights=True), callbacks.ModelCheckpoint(str(ckpt_path / 'seg_model-{epoch:04d}-{val_loss:.4f}.hdf5'), verbose=1, save_best_only=True), callbacks.TensorBoard(log_dir=str(artifacts_folder / 'tb_logs')), callbacks.TerminateOnNaN(), ObserveMetrics(_run, 'seg') ] train_seq = DataSequence(seed * 2, train_df, batch_size, img_size, 'data/train_images', mode='train', shuffle=True, augment=True) val_seq = DataSequence(seed * 2, val_df, batch_size, img_size, 'data/train_images', mode='val', shuffle=False, augment=False) history = train_model(segmentation_model, train_seq, val_seq, training_callbacks) models.save_model(segmentation_model, str(artifacts_folder / 'seg_model_best.h5')) return history.history['val_loss'][-1]
def lstmModularDuka(conf, workdir): # --- Config ---------------------------------------------------------------------------------------------------- global logDirTensorboard time_stamp = str(datetime.datetime.utcnow()).replace( ":", "-") # date-time to name folders and data logDirTensorboard = os.path.join(workdir, f"{conf['name']}_{time_stamp}") # workdir if not os.path.exists(logDirTensorboard): os.makedirs(logDirTensorboard, exist_ok=True) # parameters if conf['adamEpsilon'] == None: conf['adamEpsilon'] = backend.epsilon() # --------------------------------------------------------------------------------------------------------------- # --- Preperation ----------------------------------------------------------------------------------------------- print('Preperation: collect dukacopy files') slectedSymbols = conf['selectedSymbols'].copy( ) # wird in parseFunctions bearbeitet usedSymbols = [re.sub(r"_.*", "", sym) for sym in slectedSymbols] usedSymbols = [re.sub(r"-.*", "", sym) for sym in usedSymbols] usedSymbols = list( dict.fromkeys(usedSymbols)) # filter duplicates with dict df = CollectorDuka(scale=conf['scale'], symbols=usedSymbols).df # --------------------------------------------------------------------------------------------------------------- # --- Calculations ---------------------------------------------------------------------------------------------- print('Calculations: parse custom functions: Moving Average') df = df.loc[conf['startdate']:] df = Calculator.parseFunctions(df, conf['selectedSymbols']) df = Slicer.trimHead(df) # the Moving Average drops some data df = Calculator.scaleRelative( df) # rescale relative to keep the relation between MA and Index df = df[conf['selectedSymbols']] # drop unused symbols if conf['debug']: df.reset_index(drop=True).plot() plt.show() # --------------------------------------------------------------------------------------------------------------- # --- Backup Configuration -------------------------------------------------------------------------------------- df.to_hdf(os.path.join(logDirTensorboard, 'dataframe.h5'), key='input') conf.save(os.path.join(logDirTensorboard, 'config.json')) # --------------------------------------------------------------------------------------------------------------- # --- Slice ----------------------------------------------------------------------------------------------------- print('Slice: split dataframe and slice') train, test = Slicer.split(df, trainsetSize=conf['trainsetSize']) trainX, trainY = Slicer.sliceCategory( train, block=conf['blockSize'], predictionLength=conf['predictionLength'], numCategories=conf['numCategories']) testX, testY = Slicer.sliceCategory( test, block=conf['blockSize'], predictionLength=conf['predictionLength'], numCategories=conf['numCategories']) # debug if (conf['debug']): print('categorical spread:') for i in range(conf['numCategories']): print( f'Spread on Cat{i}: {[list(x).index(1) for x in trainY].count(i)}' ) print() print(f"trainset: \r\n{train[-2:]}") print(f"testset: \r\n{test[-2:] }") # --------------------------------------------------------------------------------------------------------------- # --- Neural Network -------------------------------------------------------------------------------------------- print('Neural Network: create lstm model') model = Sequential() # basic model nbrOfLayers = len(conf['numNodes']) layer = 1 for i in conf['numNodes']: if (layer == 1 and nbrOfLayers != 1): # input layer print(f'Add LSTM input Layer with {i} Nodes') if (conf['useGPU']): model.add( CuDNNLSTM(i, input_shape=(conf['blockSize'], len(df.columns)), return_sequences=True)) else: model.add( LSTM(i, return_sequences=True, input_shape=(conf['blockSize'], len(df.columns)), activation=conf['activation'])) if conf['dropout'] > 0: model.add(Dropout(conf['dropout'])) elif (layer < nbrOfLayers): # hidden layers print(f'Add LSTM hidden Layer with {i} Nodes') if (conf['useGPU']): model.add(CuDNNLSTM(i, return_sequences=True)) else: model.add( LSTM(i, return_sequences=True, activation=conf['activation'])) if conf['dropout'] > 0: model.add(Dropout(conf['dropout'])) elif (layer == nbrOfLayers): # output layer print(f'Add LSTM output Layer with {i} Nodes') if nbrOfLayers == 1: if (conf['useGPU']): model.add( CuDNNLSTM(i, input_shape=(conf['blockSize'], len(df.columns)), return_sequences=False)) else: model.add( LSTM(i, input_shape=(conf['blockSize'], len(df.columns)), return_sequences=False, activation=conf['activation'])) else: if (conf['useGPU']): model.add(CuDNNLSTM(i, return_sequences=False)) else: model.add( LSTM(i, return_sequences=False, activation=conf['activation'])) if conf['dropout'] > 0: model.add(Dropout(conf['dropout'])) model.add(Dense(conf['numCategories'], activation='softmax')) layer += 1 # compile Model if conf['adamEpsilon'] == None: conf['adamEpsilon'] = backend.epsilon() optimizer = optimizers.Adam(lr=conf['adamLR'], beta_1=conf['adamBeta_1'], beta_2=conf['adamBeta_2'], epsilon=conf['adamEpsilon'], decay=conf['adamDecay'], amsgrad=conf['amsgrad']) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[]) #model.summary() # Prints the summary of the Model # --------------------------------------------------------------------------------------------------------------- # --- predict --------------------------------------------------------------------------------------------------- print('Predict: learn prediction of testset') earlyStopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min') mcpSave = callbacks.ModelCheckpoint(os.path.join(logDirTensorboard, 'model.h5'), save_best_only=True, monitor='val_loss', mode='min') reduceLrLoss = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=1, epsilon=conf['adamEpsilon'], mode='min') terminateNan = callbacks.TerminateOnNaN() tensorboard = callbacks.TensorBoard(log_dir=logDirTensorboard, histogram_freq=0, write_graph=True, write_images=True) class MoneyMakerCallback(callbacks.Callback): def __init__(self, epochInterval): self.epochInterval = epochInterval def on_epoch_end(self, epoch, logs=None): if epoch % self.epochInterval == 0: testX = self.validation_data[0] testY = self.validation_data[1] pred = self.model.predict(testX) Calculator.checkMoneyMakerClassification( pred, testY, checkOverValue=conf['checkOverValue']) moneyMaker = MoneyMakerCallback(epochInterval=1) batch_size = conf['batchSize'] if batch_size == -1: batch_size = trainX.shape[0] model.fit(trainX, trainY, batch_size=batch_size, epochs=conf['numberOfEpochs'], shuffle=conf['shuffleInput'], callbacks=[tensorboard, mcpSave, terminateNan, moneyMaker], validation_data=(testX, testY)) # train the model # load best model model = load_model(os.path.join(logDirTensorboard, 'model.h5')) pred = model.predict(testX) # Make the prediction # --------------------------------------------------------------------------------------------------------------- # --- evaluate -------------------------------------------------------------------------------------------------- print('Evaluate: evaluation of prediction') # plot compare matrix compareMatrix = Calculator.compareMatrix(pred, testY) cplt = Logger.plotCompareMatrix(compareMatrix, predictionLength=conf['predictionLength']) cplt.savefig(os.path.join(logDirTensorboard, 'predicition_matrix.png')) # plot prediction Calculator.checkMoneyMakerClassification( pred, testY, checkOverValue=conf['checkOverValue']) #cplt = Logger.plotKerasCategories(pred, testX, predictionLength=conf['predictionLength']) #cplt.savefig(os.path.join(logDirTensorboard, 'predicition_plot.png')) cplt = Logger.plotKerasCategories( pred[:20 * conf['predictionLength']], testX[:20 * conf['predictionLength']], predictionLength=conf['predictionLength']) cplt.savefig(os.path.join(logDirTensorboard, 'predicition_plot_small.png')) #with open(os.path.join(logDirTensorboard, 'plot.h5'), 'wb') as file: pickle.dump(plt, file) if conf['debug']: plt.show() else: global rigthClassPerc, rigthDirectionPerc, bestDirectionPerc, directionVerySurePerc, bestDirectionVerySurePerc SendSlack.sendText( f'--- NEW TEST -----------------------------------\r\nFile: {logDirTensorboard}\r\n{conf.toString()}' + \ f'Right class predicted: {rigthClassPerc} %\r\n' + \ f'Right direction predicted: {rigthDirectionPerc} %\r\n' + \ f'Best direction prediction: {bestDirectionPerc} %\r\n' + \ f'Right direction predicted with sureness over {conf["checkOverValue"]}: {directionVerySurePerc} %\r\n' + \ f'Best direction predicted with sureness over {conf["checkOverValue"]}: {bestDirectionVerySurePerc} %\r\n' ) SendSlack.sendFile( os.path.join(logDirTensorboard, 'predicition_matrix.png'), 'Prediction Matrix')