def load_val_gennerator(batch_size): train_val_df = load_train_csv(cfg) train_df, val_df = split_train_val(train_val_df, 0.25, seed=42) val_gen = BaseTestGenerator(val_df, cfg.train_dir, batch_size=batch_size, aug_args=cfg.aug_args, target_shape=cfg.input_shape[:2], use_yellow=False, return_label=True, preprocessing_function=preprocess_input) return val_gen, val_df
def train(): train_val_df = load_train_csv(cfg) train_df, val_df = split_train_val(train_val_df, 0.25) train_gen = BaseGenerator(train_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args, target_shape=cfg.input_shape[:2], use_yellow=False) val_gen = BaseGenerator(val_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args, target_shape=(512, 512), use_yellow=False) if n_gpus > 0: with tf.device('/cpu:0'): cpu_model = Xception(cfg.input_shape, include_top=True, n_class=len(cfg.label_names)) model = multi_gpu_model(cpu_model, gpus=n_gpus) else: model = Xception(cfg.input_shape, include_top=True, n_class=len(cfg.label_names)) model.compile(optimizer=Adam(1e-3), loss=roc_auc_loss, metrics=['binary_accuracy', 'mae']) log_dir = os.path.join(cfg.log_dir, 'base_xception') makedir(log_dir) weights_path = os.path.join(log_dir, cfg.weights_file) checkpoint = ModelCheckpoint(weights_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True) if n_gpus > 0: del checkpoint checkpoint = MultiGPUCheckpoint(weights_path, cpu_model, monitor='val_loss') callbacks = [checkpoint] callbacks += [ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1, mode='min')] train_steps = get_number_of_steps(len(train_df), cfg.batch_size) val_steps = get_number_of_steps(len(val_df), cfg.batch_size) model.fit_generator(train_gen, epochs=cfg.epochs, steps_per_epoch=train_steps, callbacks=callbacks, validation_data=val_gen, workers=cfg.n_works, max_queue_size=cfg.n_queue, use_multiprocessing=True, validation_steps=val_steps, initial_epoch=0) K.clear_session()
def check_batch_sample(gen, path=None): train_val_df = load_train_csv(cfg) train_df, val_df = split_train_val(train_val_df, 0.25) train_gen = BaseGenerator(train_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args, target_shape=cfg.input_shape[:2], use_yellow=False) batch_x, batch_y = next(train_gen) x = montage2d(np.squeeze(batch_x[:, :, :, 0])) fig = plt.figure(figsize=(15, 15)) plt.imshow(x, cmap='bone') plt.axis('off') if path: plt.savefig(path) else: plt.show()
def load_local_gen(target_shape): train_val_df = load_train_csv(cfg) train_df, val_df = split_train_val(train_val_df, 0.25, seed=42) train_gen = BaseGenerator(train_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args.copy(), target_shape=target_shape, use_yellow=False, preprocessing_function=preprocess_input) val_gen = BaseGenerator(val_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args.copy(), target_shape=target_shape, use_yellow=False, preprocessing_function=preprocess_input) return train_df, val_df, train_gen, val_gen
def pretrain(task_name='base_resnet', epochs=10, lr=1e-1, target_shape=(512, 512), trainable=True, pretrain_weights='imagenet', init_epoch=0): cfg.input_shape = list(target_shape) + [3] train_val_df = load_train_csv(cfg) train_df, val_df = split_train_val(train_val_df, 0.25, seed=42) train_gen = BaseGenerator(train_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args.copy(), target_shape=target_shape, use_yellow=False, preprocessing_function=preprocess_input) val_gen = BaseGenerator(val_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args.copy(), target_shape=target_shape, use_yellow=False, preprocessing_function=preprocess_input) if n_gpus > 1: print('use multi gpu') with tf.device('/cpu:0'): cpu_model = ResNet50(input_shape=cfg.input_shape, classes=len(cfg.label_names), trainable=trainable, weights=pretrain_weights) model = multi_gpu_model(cpu_model, gpus=n_gpus) else: print('use single gpu') model = ResNet50(input_shape=cfg.input_shape, classes=len(cfg.label_names), trainable=trainable, weights=pretrain_weights) model.compile(optimizer=KO.Adam(lr=lr, amsgrad=True), loss='binary_crossentropy', metrics=[f1_score, 'mae']) log_dir = os.path.join(cfg.log_dir, task_name) makedir(log_dir) weights_path = os.path.join(log_dir, cfg.weights_file) checkpoint = ModelCheckpoint(weights_path, monitor='f1_score', verbose=1, save_best_only=True, mode='max', save_weights_only=True) if n_gpus > 1: del checkpoint checkpoint = MultiGPUCheckpoint(weights_path, cpu_model, verbose=1, monitor='f1_score', mode='max', save_weights_only=True, save_best_only=True) callbacks = [checkpoint] callbacks += [ ReduceLROnPlateau(monitor='f1_score', factor=0.5, patience=3, verbose=1, mode='max') ] # callbacks += [LearningRateScheduler(lr_schedule)] train_steps = get_number_of_steps(len(train_df), cfg.batch_size) val_steps = get_number_of_steps(len(val_df), cfg.batch_size) model.fit_generator(train_gen, epochs=epochs, steps_per_epoch=train_steps, callbacks=callbacks, validation_data=val_gen, workers=cfg.n_works, max_queue_size=cfg.n_queue, use_multiprocessing=True, validation_steps=val_steps, initial_epoch=init_epoch) K.clear_session()
def train(task_name='base_xception', epochs=6, target_shape=(512, 512), lr_schedule=None, weights='imagenet', trainable=True, seed=42, save_best_only=True, initial_epoch=0, drop_rate=0): train_val_df = load_train_csv(cfg) train_df, val_df = split_train_val(train_val_df, 0.25, seed=42) np.random.seed(seed + 111) random.seed(seed + 111) tf.set_random_seed(seed + 111) train_gen = BaseGenerator(train_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args.copy(), target_shape=target_shape, use_yellow=False, preprocessing_function=preprocess_input) val_gen = BaseGenerator(val_df, cfg.train_dir, batch_size=cfg.batch_size, aug_args=cfg.aug_args.copy(), target_shape=target_shape, use_yellow=False, preprocessing_function=preprocess_input) if n_gpus > 1: print('use multi gpu') with tf.device('/cpu:0'): cpu_model = Xception(cfg.input_shape, include_top=True, n_class=len(cfg.label_names), weights=weights, trainable=trainable, drop_rate=drop_rate) model = multi_gpu_model(cpu_model, gpus=n_gpus) else: print('use single gpu') model = Xception(cfg.input_shape, include_top=True, n_class=len(cfg.label_names), weights=weights, trainable=trainable, drop_rate=drop_rate) model.compile(optimizer=KO.Adam(lr=lr_schedule[0][0], amsgrad=True), loss='binary_crossentropy', metrics=[f1_score, 'mae']) log_dir = os.path.join(cfg.log_dir, task_name) makedir(log_dir) weights_path = os.path.join(log_dir, cfg.weights_file) checkpoint = ModelCheckpoint(weights_path, monitor='f1_score', mode='max', verbose=1, save_best_only=save_best_only, save_weights_only=True) if n_gpus > 1: del checkpoint checkpoint = MultiGPUCheckpoint(weights_path, cpu_model, monitor='f1_score', mode='max', save_best_only=save_best_only) callbacks = [] callbacks += [checkpoint] callbacks += [ LearningRateScheduler(lambda epoch: schedule_steps(epoch, lr_schedule)) ] train_steps = get_number_of_steps(len(train_df), cfg.batch_size) * 4 val_steps = get_number_of_steps(len(val_df), cfg.batch_size) model.fit_generator(train_gen, epochs=epochs, steps_per_epoch=train_steps, callbacks=callbacks, validation_data=val_gen, workers=cfg.n_works, max_queue_size=cfg.n_queue, use_multiprocessing=True, validation_steps=val_steps, initial_epoch=initial_epoch) del model del checkpoint K.clear_session()