def get_callbacks(self, model_prefix='Model', is_super=True): """ Creates a list of callbacks that can be used during training to create a snapshot ensemble of the model. Args: is_super: True if we want to use improved version of snapshot anneal schedule model_prefix: prefix for the filename of the weights. Returns: list of 3 callbacks [ModelCheckpoint, LearningRateScheduler, SnapshotModelCheckpoint] which can be provided to the 'fit' function """ if not os.path.exists('weights/'): os.makedirs('weights/') callback_list = [ callbacks.ModelCheckpoint("weights/%s-Best.h5" % model_prefix, monitor="val_acc", save_best_only=True, save_weights_only=True), SnapshotModelCheckpoint(self.T, self.M, fn_prefix='weights/%s' % model_prefix) ] if is_super: callback_list.append( callbacks.LearningRateScheduler( schedule=self._cosine_anneal_schedule_super)) else: callback_list.append( callbacks.LearningRateScheduler( schedule=self._cosine_anneal_schedule)) return callback_list
def set_callbacks(self, lr_span, lr_factor): """Set useful periodic Keras callbacks: `LearningRateScheduler` updates the lr at the end of each epoch `ModelCheckpoint` saves model at the end of each epoch (if conditions are met) `CSVLogger` writes results to a csv file Arguments: lr_span {int} -- The number of epoch to wait until changing lr lr_factor {float} -- By how much to modify the lr """ if not os.path.exists(self.subdir): os.makedirs(self.subdir) # learning rate schedule def schedule(epoch, lr): return lr * lr_factor**(epoch // lr_span) lr_scheduler = callbacks.LearningRateScheduler(schedule) # Should I monitor here the best val_loss or the metrics of interest? # If not all samples are used in an epoch val_loss is noisy checkpointer = callbacks.ModelCheckpoint(os.path.join( self.subdir, 'model.test{:02d}-ep{{epoch:02d}}.pth'.format(self.group_idx)), monitor='val_loss', save_best_only=True, mode='min') csv_logger = callbacks.CSVLogger( os.path.join(self.subdir, 'training.test{:02d}.log'.format(self.group_idx))) self.callbacks = [lr_scheduler, csv_logger, checkpointer]
def train(self): _dir = self.model_out_dir + '/' + self.net_type + '/' tb = cb.TensorBoard(log_dir=_dir + '/tensorboard-logs', batch_size=self.batch_size, histogram_freq=int(10)) checkpoint = cb.ModelCheckpoint(_dir + '/weights-{epoch:02d}.h5', monitor='pred_acc', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = cb.LearningRateScheduler( schedule=lambda epoch: self.learning_rate * (self.lr_decay**epoch)) self.preprocessor = self.preprocessor(self.data_dir) self.model.fit_generator(self.preprocessor.flow(), steps_per_epoch=self.steps_per_epoch, epochs=self.epochs, validation_data=([ self.preprocessor.test_images, self.preprocessor.test_image_emotions ], [ self.preprocessor.test_image_emotions, self.preprocessor.test_images ]), callbacks=[tb, checkpoint, lr_decay]) score = self.model.evaluate([ self.preprocessor.test_images, self.preprocessor.test_image_emotions ], [ self.preprocessor.test_image_emotions, self.preprocessor.test_images ]) self.save_model() self.logger.log_model(self.net_type, score, self.model)
def train(self, epochs): # init variables loss_hist = [] acc_list = [] # predicting that we will have to move every image we take, each step is being made separated for epoch in range(0, epochs): print('epoch {}/{}'.format(epoch + 1, epochs)) # each epoch the learning rate is reduced self.lr = 0.001 * (0.7**(epoch // 20)) # take a random test sample already multiplied by the mover states_sample, actions_sample = self.il_buffer.il_extended_mini_batch( BATCH_SIZE_TEST, 'test') # encrypt each number using a mux actions_sample = mux(actions_sample, 16) # uses callback to update the learning rate according to self.__scheduler method callback = callbacks.LearningRateScheduler(self.__scheduler) hist = self.model.fit(x=states_sample, y=actions_sample, epochs=1, verbose=1, callbacks=[callback], batch_size=MINI_BATCH_SIZE) loss_hist = loss_hist + hist.history['loss'] if epoch % 10 == 9: states_sample, actions_sample = self.il_buffer.il_extended_mini_batch( BATCH_SIZE_VAL, 'validation') acc_list.append( self.__evaluate(states_sample, actions_sample)[1]) return [loss_hist, acc_list]
def _callbacks(self): def lambdaCallbackFunc(epoch, _): print(K.eval(self._model.optimizer.lr)) if (epoch + 1) % MODEL_SAVE_PERIOD == 0: self._save(epoch + 1) with open(self._model_config_file_path(), mode='w', encoding='utf-8') as f: dic = {} dic['epoch'] = epoch + 1 dic['lr'] = K.eval(self._model.optimizer.lr) json.dump(dic, f, cls=NumpyEncoder) def learningRateSchedulerFunc(epoch): return LR * (0.5**(epoch // 8)) return [ # callbacks.ReduceLROnPlateau(monitor='loss', factor=LR_REDUCE_FACTOR, patience=LR_REDUCE_PATIENCE, epsilon=LR_REDUCE_EPSILON), # callbacks.ModelCheckpoint(os.path.join(proj_path, '{epoch:d}.hdf5'), period=MODEL_SAVE_PERIOD), # callbacks.LambdaCallback(on_epoch_end=lambda epoch, logs: self._model.save_weights(os.path.join(model_dir, 'epoch_{}.hdf5'.format(epoch)))), callbacks.LearningRateScheduler(schedule=learningRateSchedulerFunc ), callbacks.LambdaCallback(on_epoch_end=lambdaCallbackFunc), callbacks.TensorBoard(log_dir=self._model_dir(), batch_size=BATCH_SIZE) ]
def train(model, data, args): (x_train, y_train) = data checkpoint = callbacks.ModelCheckpoint( monitor='val_acc', verbose=1, filepath=args.save_file.rstrip('.h5') + '_' + 'epoch.{epoch:02d}.h5', save_weights_only=True, mode='auto', period=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) #model = multi_gpu_model(model, gpus=2) if args.load == 1: model.load_weights(args.save_file) print('Loading %s' % args.save_file) model.compile(optimizer=optimizers.Adam(lr=args.lr), loss='categorical_crossentropy', metrics=["accuracy"]) hist = model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, validation_split=0.01, callbacks=[checkpoint, lr_decay]) return hist.history
def train(model,data,args): (x_train,y_train),(x_test,y_test)=data log=callbacks.CSVLogger(args.save_dir+'/log.csv') tb=callbacks.TensorBoard(log_dir=args.save_dir+'tensorboard-logs', batch_size=args.batch_size,histogram_freq=int(args.debug)) checkpoint=callbacks.ModelCheckpoint(args.save_dir+'/weights-{epoch:02d}.h5',monitor='val_capsnet_acc', save_best_only=True,save_weights_only=True,verbose=1) lr_decay=callbacks.LearningRateScheduler(schedule=lambda epoch:args.lr*(args.lr_decay**epoch)) model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss,'mse'], loss_weight=[1., args.lam_recon], metrics={'capsnet':'accuracy'}) def train_generator(x,y,batch_size,shift_fraction=0.): train_data_generator=ImageDataGenerator(width_shift_range=shift_fraction, # 图片宽度的某个比例,数据提升时图片水平偏移的幅度 height_shift_range=shift_fraction) generator=train_data_generator.flow(x,y,batch_size) while True: x_batch,y_batch=generator.next() yield x_batch,y_batch model.fit_generator(generator=train_generator(x_train,y_train,args.batch_size,args.shift_fraction), steps_per_epoch=int(x_train.shape[0]/args.batch_size), epochs=args.epoch, validation_data=[[x_test,y_test],[y_test,y_test]], callbacks=[log,tb,checkpoint,lr_decay]) model.save_weights(args.save_dir+'/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) # plot log return model
def train(model, data, args): (x_train, y_train) = data checkpoint = callbacks.ModelCheckpoint(args.save_file, monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True, mode='auto', period=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) if args.load == 1: model.load_weights(args.save_file) print('Loading %s' % args.save_file) model = multi_gpu_model(model, gpus=2) model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=margin_loss, metrics={}) hist = model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, validation_split=0.02, callbacks=[checkpoint, lr_decay]) return hist.history
def test_validate_callbacks_predefined_callbacks(self): supported_predefined_callbacks = [ callbacks.TensorBoard(), callbacks.CSVLogger(filename='./log.csv'), callbacks.EarlyStopping(), callbacks.ModelCheckpoint(filepath='./checkpoint'), callbacks.TerminateOnNaN(), callbacks.ProgbarLogger(), callbacks.History(), callbacks.RemoteMonitor() ] distributed_training_utils_v1.validate_callbacks( supported_predefined_callbacks, adam.Adam()) unsupported_predefined_callbacks = [ callbacks.ReduceLROnPlateau(), callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001) ] for callback in unsupported_predefined_callbacks: with self.assertRaisesRegex( ValueError, 'You must specify a Keras Optimizer V2'): distributed_training_utils_v1.validate_callbacks( [callback], tf.compat.v1.train.AdamOptimizer())
def get_calls(): from keras import callbacks as C import math cycles = 50 calls = list() calls.append( C.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.h5', save_best_only=True, save_weights_only=True, verbose=1)) calls.append(C.CSVLogger(args.save_dir + '/log.csv')) calls.append( C.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs/{}'.format(actual_partition), batch_size=args.batch_size, histogram_freq=args.debug)) calls.append(C.EarlyStopping(monitor='val_loss', patience=10, verbose=0)) # calls.append( C.EarlyStopping(monitor='acc', patience=10, verbose=0, min_delta=0.1) ) # calls.append( C.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=0.0001, verbose=0) ) calls.append( C.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch))) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: args.lr * math.cos(1+( (epoch-1 % (args.epochs/cycles)))/(args.epochs/cycles) ) )) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: 0.001 * np.exp(-epoch / 10.)) ) return calls
def get_callbacks(self, model_prefix='Model'): """ Creates a list of callbacks that can be used during training to create a snapshot ensemble of the model. Args: model_prefix: prefix for the filename of the weights. Returns: list of 3 callbacks [ModelCheckpoint, LearningRateScheduler, SnapshotModelCheckpoint] which can be provided to the 'fit' function """ if not os.path.exists('snapshot_models/'): os.makedirs('snapshot_models/') callback_list = [ callbacks.ModelCheckpoint("snapshot_models/%s-Best.h5" % model_prefix, monitor="val_acc", save_best_only=True), callbacks.LearningRateScheduler( schedule=self._cosine_anneal_schedule), SnapshotModelCheckpoint(self.T, self.M, fn_prefix='snapshot_models/%s' % model_prefix) ] return callback_list
def define_calls(self): from keras import callbacks as C self.model_cnt += 1 calls = list() calls.append( C.ModelCheckpoint(self.args.save_dir + '/weights-{}-'.format(self.model_cnt) + '{epoch:02d}.h5', save_best_only=True, save_weights_only=True, verbose=0)) calls.append(C.CSVLogger(self.args.save_dir + '/log.csv')) # calls.append( C.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs/{}'.format(actual_partition), batch_size=self.args.batch_size, histogram_freq=self.args.debug) ) # calls.append( C.TensorBoard(log_dir=self.args.save_dir + '/tensorboard-logs/{}'.format(1), batch_size=self.args.batch_size, histogram_freq=self.args.debug) ) calls.append(C.EarlyStopping(monitor='val_loss', patience=3, verbose=0)) calls.append( C.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.0001, verbose=0)) calls.append( C.LearningRateScheduler(schedule=lambda epoch: self.args.lr * (self.args.lr_decay**((1 + epoch) / 10)))) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: args.lr * (args.lr_decay ** epoch)) ) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: 0.001 * np.exp(-epoch / 10.)) ) self.calls = calls return self.calls
def train(model, train, dev, test, save_directory, optimizer, epoch, batch_size, schedule): (X_train, Y_train) = train (X_dev, Y_dev) = dev (X_test, Y_test) = test # Callbacks log = callbacks.CSVLogger(filename=save_directory + '/log.csv') tb = callbacks.TensorBoard(log_dir=save_directory + '/tensorboard-logs', batch_size=batch_size) checkpoint = callbacks.ModelCheckpoint( filepath=save_directory + '/weights-improvement-{epoch:02d}.hdf5', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler(schedule=schedule, verbose=1) # compile the model model.compile(optimizer=optimizer, loss=[margin_loss], metrics=['accuracy']) history = model.fit(x=X_train, y=Y_train, validation_data=[X_dev, Y_dev], batch_size=batch_size, epochs=epoch, callbacks=[log, tb, checkpoint, lr_decay], shuffle=True, verbose=1) score = model.evaluate(X_test, Y_test, batch_size=batch_size) print colored(score, 'green') print(history.history.keys()) # Summarize history for accuracy plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['training accuracy', 'testing accuracy'], loc='upper left') plt.savefig(save_directory + '/model_accuracy.png') plt.close() # Summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['training loss', 'testing loss'], loc='upper left') plt.savefig(save_directory + '/model_loss.png') plt.close() model.save_weights(save_directory + '/trained_model.h5')
def get_calls(self, partition, seed): from keras import callbacks as C monitor = ('matthews_correlation', 'val_loss') # fname = save_dir+'/'+'org_{}-partition_{}-seed_{}'.format(self.organism, partition, seed)+'-epoch_{epoch:02d}-weights.h5' fname = save_dir + '/' + 'org_{}-partition_{}-seed_{}'.format( self.organism, partition, seed) + '-weights.h5' calls = list() calls.append( C.ModelCheckpoint(fname, save_best_only=True, save_weights_only=True, verbose=0)) calls.append(C.CSVLogger(save_dir + '/log.csv')) calls.append( C.TensorBoard(log_dir=save_dir + '/tensorboard-logs/{}'.format(partition), batch_size=self.batch_size, histogram_freq=self.debug)) calls.append( C.EarlyStopping(monitor=monitor[0], patience=self.stop_patience, mode='min', restore_best_weights=False, verbose=0)) # calls.append( C.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, min_lr=0.0001, verbose=0) ) calls.append( C.LearningRateScheduler( schedule=lambda epoch: self.lr * (self.lr_decay**epoch))) # calls.append( C.LearningRateScheduler(schedule=lambda epoch: 0.001 * np.exp(-epoch / 10.)) ) return calls
def train(self): dir = self.model_out_dir + '/' + self.net_type + '/' tb = cb.TensorBoard(log_dir=dir + '/tensorboard-logs', batch_size=self.batch_size) checkpoint = cb.ModelCheckpoint(dir + '/weights.h5', mode='min', save_best_only=True, save_weights_only=False, verbose=1) lr_decay = cb.LearningRateScheduler( schedule=lambda epoch: self.learning_rate * (self.lr_decay**epoch)) self.preprocessor = self.preprocessor(self.data_dir) self.model.fit_generator(self.preprocessor.flow(), steps_per_epoch=self.steps_per_epoch, epochs=self.epochs, validation_data=([ self.preprocessor.test_images, self.preprocessor.test_dpoints, self.preprocessor.dpointsDists, self.preprocessor.dpointsAngles ], self.preprocessor.test_image_emotions), callbacks=[tb, checkpoint, lr_decay]) score = self.model.evaluate([ self.preprocessor.test_images, self.preprocessor.test_dpoints, self.preprocessor.dpointsDists, self.preprocessor.dpointsAngles ], self.preprocessor.test_image_emotions) self.save_model() self.logger.log_model(self.net_type, score, self.model)
def step_decay_schedule(initial_lr, decay_factor, step_size): def schedule(epoch): lr = initial_lr * (decay_factor**np.floor(epoch / step_size)) print('Learning rate for epoch number {} is {}'.format(epoch, lr)) return lr return callbacks.LearningRateScheduler(schedule)
def test_LearningRateScheduler(): np.random.seed(1337) (X_train, y_train), (X_test, y_test) = get_test_data(num_train=train_samples, num_test=test_samples, input_shape=(input_dim, ), classification=True, num_classes=num_classes) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) model = Sequential() model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) model.add(Dense(num_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) cbks = [callbacks.LearningRateScheduler(lambda x: 1. / (1. + x))] model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=cbks, epochs=5) assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon()
def train_Keras(train_X, train_y, test_X, test_y, kwargs, cae_model_func=None, n_features=None, epochs=150): normalization = normalization_func() num_classes = train_y.shape[-1] norm_train_X = normalization.fit_transform(train_X) norm_test_X = normalization.transform(test_X) batch_size = max(2, len(train_X) // 50) class_weight = train_y.shape[0] / np.sum(train_y, axis=0) class_weight = num_classes * class_weight / class_weight.sum() sample_weight = None print('mu :', kwargs['mu'], ', batch_size :', batch_size) print('reps : ', reps, ', weights : ', class_weight) if num_classes == 2: sample_weight = np.zeros((len(norm_train_X),)) sample_weight[train_y[:, 1] == 1] = class_weight[1] sample_weight[train_y[:, 1] == 0] = class_weight[0] class_weight = None model_clbks = [ callbacks.LearningRateScheduler(scheduler()), ] if cae_model_func is not None: svc_model = LinearSVC(nfeatures=(n_features,), **kwargs) svc_model.create_keras_model(nclasses=num_classes) classifier = svc_model.model cae_model = cae_model_func(output_function=classifier, K=n_features) start_time = time.process_time() cae_model.fit( norm_train_X, train_y, norm_test_X, test_y, num_epochs=800, batch_size=batch_size, class_weight=class_weight ) model = cae_model.model model.indices = cae_model.get_support(True) model.heatmap = cae_model.probabilities.max(axis=0) model.fs_time = time.process_time() - start_time else: svc_model = LinearSVC(norm_train_X.shape[1:], **kwargs) svc_model.create_keras_model(nclasses=num_classes) model = svc_model.model model.compile( loss=LinearSVC.loss_function(loss_function, class_weight), optimizer=optimizer_class(lr=initial_lr), metrics=[LinearSVC.accuracy] ) model.fit( norm_train_X, train_y, batch_size=batch_size, epochs=epochs, callbacks=model_clbks, validation_data=(norm_test_X, test_y), class_weight=class_weight, sample_weight=sample_weight, verbose=verbose ) model.normalization = normalization return model
def train(model, data, args): (x_train, y_train), (x_test, y_test), (x_train0, x_train1), (x_test0, x_test1), (y_train1, y_test1) = data checkpoint = callbacks.ModelCheckpoint(args.save_file, monitor='train_capsnet_loss', verbose=1, save_best_only=True, save_weights_only=True, mode='auto', period=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, 'mse', 'mse'], loss_weights=[1., args.lam_recon, args.lam_recon], metrics={}) hist = model.fit([x_train, y_train1], [y_train, x_train0, x_train1], batch_size=args.batch_size, epochs=args.epochs, validation_data=[[x_test, y_test1], [y_test, x_test0, x_test1]], callbacks=[checkpoint, lr_decay]) return hist.history
def train(model, data, args): """ Training a CapsuleNet :param model: the CapsuleNet model :param data: a tuple containing training and testing data, like `((train_set_x, train_set_y), (test_set_x, test_set_y))` :param args: arguments :return: The trained model """ from keras import callbacks # unpacking the data (train_set_x, train_set_y), (test_set_x, test_set_y) = data # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=args.debug) checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.h5', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (0.95**epoch)) # compile the model model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, 'mse'], loss_weights=[1., args.lam_recon], metrics={'out_caps': 'accuracy'})
def trainBatch(model, args): """ Training a CapsuleNet :param model: the CapsuleNet model :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))` :param args: arguments :return: The trained model """ # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=args.debug) checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-resnet-{epoch:02d}.h5', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (0.9**epoch)) # compile the model # model.compile(optimizer=optimizers.Adam(lr=args.lr), # loss=[margin_loss, 'mse'], # loss_weights=[1., args.lam_recon], # metrics={'out_caps': 'accuracy'}) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['mae', 'acc', 'top_k_categorical_accuracy']) groups = args.groups for i in range(groups): print("Training Group: ", i) (x_test, y_test, x_train, y_train) = loadMiniplacesBatch(train_data_list, val_data_list, images_root, group=i, groups=groups, size=[100, 100]) x_train = x_train.reshape(-1, 100, 100, 3).astype('float32') / 255. x_test = x_test.reshape(-1, 100, 100, 3).astype('float32') / 255. y_train = to_categorical(y_train.astype('float32'), num_classes=100) y_test = to_categorical(y_test.astype('float32'), num_classes=100) print(x_train.shape, y_train.shape, x_test.shape, y_test.shape) # Training without data augmentation: model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, callbacks=[log, tb, checkpoint, lr_decay], validation_data=(x_test, y_test)) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) return model
def train_model(model, args): print('Loading train data!') images_train, images_mask_train = load_data() # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') # 查看tensorboard: # methond1:./ python -m tensorboard.main --logdir=./ # method22: ./ tensorboard --logdir=./ tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=args.debug) checkpoint = callbacks.ModelCheckpoint( args.save_dir + '/multi-trained_model.h5', monitor='val_loss', save_best_only=True, save_weights_only=True, verbose=1, mode='min', ) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (0.99**epoch)) early_stopping = keras.callbacks.EarlyStopping( monitor='val_loss', patience=args.patience, verbose=0, mode='min', ) model = ParallelModel(model, args.gpus) # 断点续存 # model = keras.models.load_model(args.save_dir + '/trained_model_old.h5', # custom_objects={'bce_dice_loss': bce_dice_loss, 'mean_iou': mean_iou}) # compile the model model.compile(optimizer=optimizers.Adam(lr=args.lr), loss=bce_dice_loss, metrics=["accuracy", mean_iou]) # Fitting model model.fit(images_train, images_mask_train, batch_size=args.batch_size, nb_epoch=args.epochs, verbose=1, shuffle=True, validation_split=0.2, callbacks=[log, tb, lr_decay, checkpoint, early_stopping]) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) plot_log(args.save_dir + '/log.csv', show=True) return model
def train(model, data, args, dirs): """ The function which defines the training loop of the model Parameters ---------- model : `keras.models.Model` The structure of the model which is to be trained data : `tuple` The training and validation data args : `dict` The argument dictionary which defines other parameters at training time dirs : `string` Filepath to store the logs """ # Extract the data (x_train, y_train), (x_val, y_val) = data # callbacks log = callbacks.CSVLogger(dirs + '/log.csv') tb = callbacks.TensorBoard(log_dir=dirs + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=int(args.debug)) checkpoint = callbacks.ModelCheckpoint(dirs + '/model.h5', monitor='val_acc', save_best_only=True, save_weights_only=False, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) # compile the model model.compile(optimizer=optimizers.Adam(lr=args.lr), loss='binary_crossentropy', metrics=['acc']) # Training without data augmentation: model.fit(x_train, y_train, batch_size=args.batch_size, epochs=args.epochs, verbose=1, validation_data=(x_val, y_val), callbacks=[ log, tb, checkpoint, lr_decay ]) #, roc_auc_callback((x_train, y_train), (x_val, y_val))]) # Save the trained model model.save(dirs + '/trained_model.h5') # Plot the training results plot_log(dirs, show=False) return model
def ieee_net(x_train, y_train, ddg_train): row_num, col_num = x_train.shape[1:3] verbose = 1 batch_size = 64 epochs = int(sys.argv[1]) #[15, 12, 16, 29, 16, 12, 10, 31, 10, 19] metrics = ('mae', pearson_r, rmse) def step_decay(epoch): # drops as progression proceeds, good for sgd if epoch > 0.9 * epochs: lr = 0.00001 elif epoch > 0.75 * epochs: lr = 0.0001 elif epoch > 0.5 * epochs: lr = 0.001 else: lr = 0.01 print('lr: %f' % lr) return lr lrate = callbacks.LearningRateScheduler(step_decay, verbose=verbose) my_callbacks = [lrate] network = models.Sequential() network.add( layers.Conv1D(filters=16, kernel_size=5, activation='relu', input_shape=(row_num, col_num))) network.add(layers.MaxPooling1D(pool_size=2)) network.add(layers.Conv1D(32, 5, activation='relu')) network.add(layers.MaxPooling1D(pool_size=2)) network.add(layers.Conv1D(64, 3, activation='relu')) network.add(layers.MaxPooling1D(pool_size=2)) network.add(layers.Flatten()) network.add(layers.Dense(128, activation='relu')) network.add(layers.Dropout(0.5)) network.add(layers.Dense(16, activation='relu')) network.add(layers.Dropout(0.3)) network.add(layers.Dense(1)) # print(network.summary()) # rmsp = optimizers.RMSprop(lr=0.0001, decay=0.1) rmsp = optimizers.RMSprop(lr=0.0001) network.compile( optimizer=rmsp, #'rmsprop', # SGD,adam,rmsprop loss='mse', metrics=list(metrics)) # mae平均绝对误差(mean absolute error) accuracy result = network.fit( x=x_train, y=ddg_train, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=my_callbacks, shuffle=True, ) return network, result.history
def train(model, data, args): """ Training a CapsuleNet :param model: the CapsuleNet model :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))` :param args: arguments :return: The trained model """ # unpacking the data (x_train, y_train), (x_test, y_test) = data # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=int(args.debug)) checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.h5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) # compile the model model.compile(optimizer=optimizers.SGD(lr=args.lr), loss=[margin_loss, 'mse'], loss_weights=[1., args.lam_recon], metrics={'capsnet': 'accuracy'}) # Begin: Training with data augmentation ---------------------------------------------------------------------# def train_generator(x, y, batch_size, shift_fraction=0.): train_datagen = ImageDataGenerator( width_shift_range=shift_fraction, height_shift_range=shift_fraction) # shift up to 2 pixel for MNIST generator = train_datagen.flow(x, y, batch_size=batch_size) while 1: x_batch, y_batch = generator.next() yield ([x_batch, y_batch], [y_batch, x_batch]) # Training with data augmentation. If shift_fraction=0., also no augmentation. model.fit_generator( generator=train_generator(x_train, y_train, args.batch_size, args.shift_fraction), steps_per_epoch=int(y_train.shape[0] / args.batch_size), epochs=args.epochs, validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay]) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) from utils import plot_log plot_log(args.save_dir + '/log.csv', show=True) return model
def loaddata_andtrain(dataframeip,batchsize,split_number,epnumber,routingnumber,initilr,lrdecay,x_train,y_train,x_test,y_test,recover=False): save_dir='/home/ubuntu/Projects/tnc_ai/peter/model/result' (x_train,y_train), (x_test,y_test)=(x_train,y_train),(x_test,y_test) df=pd.read_csv('/home/ubuntu/Projects/tnc_ai/peter/model/EO_rank_data.csv') model= CapsNet(input_shape=x_train.shape[1:],n_class=len(np.unique(df['specie']))-1,routings=routingnumber) lr_decay=callbacks.LearningRateScheduler(schedule=lambda epoch: initilr*(lrdecay**epoch)) log = callbacks.CSVLogger(save_dir + '/log.csv') checkpoint = callbacks.ModelCheckpoint(save_dir + '/weights-{epoch:02d}.h5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[categorical_accuracy]) # if recover: # from glob import glob # weights = glob(os.path.join(save_dir, '*.h5')) # weights = sorted(weights, key=lambda x: x.split('-')[-1].split('.')) # model.load_weights(weights[-1]) train_data_gen_args = dict(featurewise_center=True, featurewise_std_normalization=True, rotation_range=90, width_shift_range=0.1, height_shift_range=0.1, zoom_range=0.2) def train_generator(x, y, batch_size, shift_fraction=0.): train_datagen = ImageDataGenerator(**train_data_gen_args) generator = train_datagen.flow(x, y, batch_size=batch_size) while 1: x_batch, y_batch = generator.next() yield ([x_batch, y_batch], [y_batch, x_batch]) model.fit_generator(generator=train_generator(x_train,y_train,batchsize), steps_per_epoch=int(y_train.shape[0] / batchsize), epochs=epnumber, validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[lr_decay, log, checkpoint] ) return model
def train(model, data, args): # unpacking the data (x_train, y_train), (x_test, y_test) = data # callbacks log = callbacks.CSVLogger(args.save_dir + '/log.csv') tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', batch_size=args.batch_size, histogram_freq=int(args.debug)) checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.hdf5', monitor='val_capsnet_acc', save_best_only=True, save_weights_only=True, verbose=1) lr_decay = callbacks.LearningRateScheduler( schedule=lambda epoch: args.lr * (args.lr_decay**epoch)) # compile the model model.compile( optimizer=optimizers.Adam(lr=args.lr), loss=[margin_loss, reconstruction_loss ], # We scale down this reconstruction loss by 0.0005 so that loss_weights=[ 1., args.scale_reconstruction_loss ], # ...it does not dominate the margin loss during training. metrics={'capsnet': 'accuracy'}) # Generator with data augmentation as used in [1] def train_generator_with_augmentation(x, y, batch_size, shift_fraction=0.): train_datagen = ImageDataGenerator( width_shift_range=shift_fraction, height_shift_range=shift_fraction) # shift up to 2 pixel for MNIST generator = train_datagen.flow(x, y, batch_size=batch_size) while 1: x_batch, y_batch = generator.next() yield ([x_batch, y_batch], [y_batch, x_batch]) generator = train_generator_with_augmentation(x_train, y_train, args.batch_size, args.shift_fraction) model.fit_generator( generator=generator, steps_per_epoch=int(y_train.shape[0] / args.batch_size), epochs=args.epochs, validation_data=[ [x_test, y_test], [y_test, x_test] ], # Note: For the decoder the input is the label and the output the image callbacks=[log, tb, checkpoint, lr_decay]) model.save_weights(args.save_dir + '/trained_model.hdf5') print('Trained model saved to \'%s/trained_model.hdf5\'' % args.save_dir) utils.plot_log(args.save_dir + '/log.csv', show=True) return model
def get_callbacks(self, model_prefix='Model'): if not os.path.exists('weights/'): os.makedirs('weights/') callback_list = [callbacks.ModelCheckpoint("weights/%s-Best.h5" % model_prefix, monitor="val_acc", save_best_only=True, save_weights_only=True), callbacks.LearningRateScheduler(schedule=self._cosine_anneal_schedule), SnapshotModelCheckpoint(self.T, self.M, fn_prefix='weights/%s' % model_prefix)] return callback_list
def get_callbacks(self, model_prefix='Model'): callback_list = [ callbacks.ModelCheckpoint("./unet_best.model",monitor='val_my_dice_metric', #val_my_iou_metric mode = 'max', save_best_only=True, verbose=1), swa, callbacks.LearningRateScheduler(schedule=self._cosine_anneal_schedule) ] return callback_list
def main(): import os import snorbdata from keras.datasets import cifar10, cifar100 # setting the hyper parameters args = {'epochs':50, 'batch_size':250, 'lr': 1e-3, 'decay': 0.8, 'iters': 3, 'weights': None, 'save_dir':'./results', 'dataset': 10} print(args) if not os.path.exists(args['save_dir']): os.makedirs(args['save_dir']) # load data # define model graph = tf.Graph() with graph.as_default(): tf.add_check_numerics_ops() if args['dataset'] == 10 or args['dataset'] == 100: model = CapsNet_EM(input_shape=(32, 32, 3), num_classes=args['dataset'], iters=args['iters'], cifar=True, num_caps=(16, 24, 24)) else: model = CapsNet_EM(input_shape=(args['dataset'], args['dataset'], 1), num_classes=5, iters=args['iters']) print('-'*30 + 'Summary for Model' + '-'*30) model.summary() print('-'*30 + 'Summaries Done' + '-'*30) if args['dataset'] == 10: (x_train, y_train), (x_test, y_test) = cifar10.load_data() y_train, y_test = np.eye(10)[np.squeeze(y_train)], np.eye(10)[np.squeeze(y_test)] elif args['dataset'] == 100: (x_train, y_train), (x_test, y_test) = cifar100.load_data() y_train, y_test = np.eye(100)[np.squeeze(y_train)], np.eye(100)[np.squeeze(y_test)] else: x_train, y_train, x_test, y_test = snorbdata.load_data() if len(x_train.shape) < 4: x_train = np.expand_dims(x_train, axis=-1) if len(x_test.shape) < 4: x_test = np.expand_dims(x_test, axis=-1) print('Done loading data') # init the model weights with provided one if args['weights'] is not None: model.load_weights(args['weights']) log = callbacks.CSVLogger(args['save_dir'] + '/log.csv') tb = callbacks.TensorBoard(log_dir=args['save_dir'] + '/tensorboard-logs', batch_size=args['batch_size'], write_graph=True, write_images=True) checkpoint = callbacks.ModelCheckpoint(args['save_dir'] + '/w_{epoch:02d}.h5', monitor='val_categorical_accuracy', save_best_only=True, save_weights_only=True, verbose=1, period=2) lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: args['lr'] * args['decay']**epoch) naan = callbacks.TerminateOnNaN() # compile and train model for e in range(args['epochs']): model.compile(optimizer=optimizers.Nadam(lr=args['lr']), loss=spread_loss_wrap(e, 0.2, 0.1, args['batch_size']), \ metrics=['categorical_accuracy']) train_gen = ImageDataGenerator().flow(x_train, y_train, batch_size=args['batch_size']) test_gen = ImageDataGenerator().flow(x_test, y_test, batch_size=args['batch_size']) model.fit_generator(train_gen, validation_data=test_gen, initial_epoch=e, epochs=e +1, verbose=1, callbacks=[log, tb, checkpoint, lr_decay, naan]) model.save_weights(args['save_dir'] + '/model.h5') print('Trained model saved to \'%s' % args['save_dir']) return