def get_model(dataset, model_name='ce', cm=None, **kwargs): # model architecture model = get_model_architecture(dataset, **kwargs) optimizer = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) # loss if model_name == 'forward': assert cm is not None model.compile(loss=forward(cm), optimizer=optimizer, metrics=['accuracy']) elif model_name == 'backward': assert cm is not None model.compile(loss=backward(cm), optimizer=optimizer, metrics=['accuracy']) elif model_name == 'boot_hard': model.compile(loss=boot_hard, optimizer=optimizer, metrics=['accuracy']) elif model_name == 'boot_soft': model.compile(loss=boot_soft, optimizer=optimizer, metrics=['accuracy']) elif model_name == 'd2l': model.compile(loss=lid_paced_loss(), optimizer=optimizer, metrics=['accuracy']) elif model_name == 'ce' or model_name == 'coteaching': model.compile(loss=cross_entropy, optimizer=optimizer, metrics=['accuracy']) elif model_name == 'distillation': model.compile(loss=distillation_loss(dataset.num_classes), optimizer=optimizer, metrics=[acc_distillation(dataset.num_classes)]) model._name = model_name return model
def train(dataset='mnist', model_name='sl', batch_size=128, epochs=50, noise_ratio=0, asym=False, alpha=1.0, beta=1.0): """ Train one model with data augmentation: random padding+cropping and horizontal flip :param dataset: :param model_name: :param batch_size: :param epochs: :param noise_ratio: :return: """ print( 'Dataset: %s, model: %s, batch: %s, epochs: %s, noise ratio: %s%%, asymmetric: %s, alpha: %s, beta: %s' % (dataset, model_name, batch_size, epochs, noise_ratio, asym, alpha, beta)) # load data X_train, y_train, y_train_clean, X_test, y_test = get_data( dataset, noise_ratio, asym=asym, random_shuffle=False) n_images = X_train.shape[0] image_shape = X_train.shape[1:] num_classes = y_train.shape[1] print("n_images", n_images, "num_classes", num_classes, "image_shape:", image_shape) # load model model = get_model(dataset, input_tensor=None, input_shape=image_shape, num_classes=num_classes) # model.summary() if dataset == 'cifar-100': optimizer = SGD(lr=0.1, decay=5e-3, momentum=0.9) else: optimizer = SGD(lr=0.1, decay=1e-4, momentum=0.9) # create loss if model_name == 'ce': loss = cross_entropy elif model_name == 'sl': loss = symmetric_cross_entropy(alpha, beta) elif model_name == 'lsr': loss = lsr elif model_name == 'joint': loss = joint_optimization_loss elif model_name == 'gce': loss = generalized_cross_entropy elif model_name == 'boot_hard': loss = boot_hard elif model_name == 'boot_soft': loss = boot_soft elif model_name == 'forward': loss = forward(P) elif model_name == 'backward': loss = backward(P) else: print("Model %s is unimplemented!" % model_name) exit(0) # model model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) if asym: model_save_file = "model/asym_%s_%s_%s.{epoch:02d}.hdf5" % ( model_name, dataset, noise_ratio) else: model_save_file = "model/%s_%s_%s.{epoch:02d}.hdf5" % ( model_name, dataset, noise_ratio) ## do real-time updates using callbakcs callbacks = [] if model_name == 'sl': cp_callback = ModelCheckpoint(model_save_file, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=True, period=1) callbacks.append(cp_callback) else: cp_callback = ModelCheckpoint(model_save_file, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=True, period=1) callbacks.append(cp_callback) # learning rate scheduler if use sgd lr_scheduler = get_lr_scheduler(dataset) callbacks.append(lr_scheduler) callbacks.append(SGDLearningRateTracker(model)) # acc, loss, lid log_callback = LoggerCallback(model, X_train, y_train, y_train_clean, X_test, y_test, dataset, model_name, noise_ratio, asym, epochs, alpha, beta) callbacks.append(log_callback) # data augmentation if dataset in ['mnist', 'svhn']: datagen = ImageDataGenerator() elif dataset in ['cifar-10']: datagen = ImageDataGenerator(width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True) else: datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True) datagen.fit(X_train) # train model model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), steps_per_epoch=len(X_train) / batch_size, epochs=epochs, validation_data=(X_test, y_test), verbose=1, callbacks=callbacks)
def train(dataset='mnist', model_name='d2l', batch_size=128, epochs=50, noise_ratio=0): """ Train one model with data augmentation: random padding+cropping and horizontal flip :param dataset: :param model_name: :param batch_size: :param epochs: :param noise_ratio: :return: """ print('Dataset: %s, model: %s, batch: %s, epochs: %s, noise ratio: %s%%' % (dataset, model_name, batch_size, epochs, noise_ratio)) # load data X_train, y_train, X_test, y_test = get_data(dataset, noise_ratio, random_shuffle=True) # X_train, y_train, X_val, y_val = validatation_split(X_train, y_train, split=0.1) n_images = X_train.shape[0] image_shape = X_train.shape[1:] num_classes = y_train.shape[1] print("n_images", n_images, "num_classes", num_classes, "image_shape:", image_shape) # load model model = get_model(dataset, input_tensor=None, input_shape=image_shape, num_classes=num_classes) # model.summary() optimizer = SGD(lr=0.01, decay=1e-4, momentum=0.9) # for backward, forward loss # suppose the model knows noise ratio P = uniform_noise_model_P(num_classes, noise_ratio/100.) # create loss if model_name == 'forward': P = uniform_noise_model_P(num_classes, noise_ratio / 100.) loss = forward(P) elif model_name == 'backward': P = uniform_noise_model_P(num_classes, noise_ratio / 100.) loss = backward(P) elif model_name == 'boot_hard': loss = boot_hard elif model_name == 'boot_soft': loss = boot_soft elif model_name == 'd2l': loss = lid_paced_loss() else: loss = cross_entropy # model model.compile( loss=loss, optimizer=optimizer, metrics=['accuracy'] ) ## do real-time updates using callbakcs callbacks = [] if model_name == 'd2l': init_epoch = D2L[dataset]['init_epoch'] epoch_win = D2L[dataset]['epoch_win'] d2l_learning = D2LCallback(model, X_train, y_train, dataset, noise_ratio, epochs=epochs, pace_type=model_name, init_epoch=init_epoch, epoch_win=epoch_win) callbacks.append(d2l_learning) cp_callback = ModelCheckpoint("model/%s_%s_%s.hdf5" % (model_name, dataset, noise_ratio), monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=True, period=1) callbacks.append(cp_callback) else: cp_callback = ModelCheckpoint("model/%s_%s_%s.hdf5" % (model_name, dataset, noise_ratio), monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=True, period=epochs) callbacks.append(cp_callback) # tensorboard callback callbacks.append(TensorBoard(log_dir='./log/log')) # learning rate scheduler if use sgd lr_scheduler = get_lr_scheduler(dataset) callbacks.append(lr_scheduler) # acc, loss, lid log_callback = LoggerCallback(model, X_train, y_train, X_test, y_test, dataset, model_name, noise_ratio, epochs) callbacks.append(log_callback) # data augmentation if dataset in ['mnist', 'svhn']: datagen = ImageDataGenerator() elif dataset in ['cifar-10', 'cifar-100']: datagen = ImageDataGenerator( width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True) else: datagen = ImageDataGenerator( width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True) datagen.fit(X_train) # train model model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), steps_per_epoch=len(X_train) / batch_size, epochs=epochs, validation_data=(X_test, y_test), verbose=1, callbacks=callbacks )