def train_(base_path): data, anno = read_data(base_path) anno = np.expand_dims(anno, axis = -1) mean = np.mean(data) std = np.std(data) data_ = (data - mean) / std train_data = data_[:150] train_anno = anno[:150] val_data = data_[150:] val_anno = anno[150:] print('-'*30) print('Creating and compiling the fully convolutional regression networks.') print('-'*30) model = buildModel_U_net(input_dim = (256,256,3)) model_checkpoint = ModelCheckpoint('cell_counting.hdf5', monitor='loss', save_best_only=True) model.summary() print('...Fitting model...') print('-'*30) change_lr = LearningRateScheduler(step_decay) datagen = ImageDataGenerator( featurewise_center = False, # set input mean to 0 over the dataset samplewise_center = False, # set each sample mean to 0 featurewise_std_normalization = False, # divide inputs by std of the dataset samplewise_std_normalization = False, # divide each input by its std zca_whitening = False, # apply ZCA whitening rotation_range = 30, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range = 0.3, # randomly shift images horizontally (fraction of total width) height_shift_range = 0.3, # randomly shift images vertically (fraction of total height) zoom_range = 0.3, shear_range = 0., horizontal_flip = True, # randomly flip images vertical_flip = True, # randomly flip images fill_mode = 'constant', dim_ordering = 'tf') # Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(train_data, train_anno, batch_size = 16 ), samples_per_epoch = train_data.shape[0], nb_epoch = 192, callbacks = [model_checkpoint, change_lr], ) model.load_weights('cell_counting.hdf5') A = model.predict(val_data) mean_diff = np.average(np.abs(np.sum(np.sum(A,1),1)-np.sum(np.sum(val_anno,1),1))) / (100.0) print('After training, the difference is : {} cells per image.'.format(np.abs(mean_diff)))
def learn(filename, train_data, train_anno, val_data, val_anno, model): print(filename) model_checkpoint = ModelCheckpoint(filename, monitor='loss', save_best_only=True) model.summary() print('...Fitting model...') print('-' * 30) change_lr = LearningRateScheduler(step_decay) datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 30, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.3, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.3, # randomly shift images vertically (fraction of total height) zoom_range=0.3, shear_range=0., horizontal_flip=True, # randomly flip images vertical_flip=True, # randomly flip images fill_mode='constant', dim_ordering='tf') # Fit the model on the batches generated by datagen.flow(). batch_size = 8 model.fit_generator( datagen.flow(train_data, train_anno, batch_size=batch_size), steps_per_epoch=train_data.shape[0] // batch_size, epochs=192, callbacks=[model_checkpoint, change_lr], ) model.load_weights(filename) A = model.predict(val_data) mean_diff = np.average( np.abs(np.sum(np.sum(A, 1), 1) - np.sum(np.sum(val_anno, 1), 1))) / (100.0) print('After training, the difference is : {} cells per image.'.format( np.abs(mean_diff)))
def image_generator(train=True): def wrap(value): return float(train) and value return ImageDataGenerator( # contrast_stretching=True, ##### # histogram_equalization=False, ##### # adaptive_equalization=False, ##### # channel_shift_range=wrap(25.5), # rotation_range=wrap(15.), # width_shift_range=wrap(0.2), # height_shift_range=wrap(0.2), # shear_range=wrap(0.2), # zoom_range=wrap(0.2), # horizontal_flip=train, # preprocessing_function=im_utils.scene_preprocess_input, preprocessing_function=lambda x: scene_preprocess_input(aug_images([x])[0] if train else x) )
def TrainModel(idfold=0): from setupmodel import GetSetupKfolds, GetCallbacks, GetOptimizer, GetLoss from buildmodel import get_unet, thick_slices ### ### load data ### kfolds = settings.options.kfolds print('loading memory map db for large dataset') numpydatabase = np.load(settings._globalnpfile) (train_index, test_index) = GetSetupKfolds(settings.options.dbfile, kfolds, idfold) print('copy data subsets into memory...') axialbounds = numpydatabase['axialtumorbounds'] dataidarray = numpydatabase['dataid'] dbtrainindex = np.isin(dataidarray, train_index) dbtestindex = np.isin(dataidarray, test_index) subsetidx_train = np.all(np.vstack((axialbounds, dbtrainindex)), axis=0) subsetidx_test = np.all(np.vstack((axialbounds, dbtestindex)), axis=0) if np.sum(subsetidx_train) + np.sum(subsetidx_test) != min( np.sum(axialbounds), np.sum(dbtrainindex)): raise ("data error: slice numbers dont match") print('copy memory map from disk to RAM...') trainingsubset = numpydatabase[subsetidx_train] np.random.seed(seed=0) np.random.shuffle(trainingsubset) totnslice = len(trainingsubset) if settings.options.D3: x_data = trainingsubset['imagedata'] y_data = trainingsubset['truthdata'] x_train = thick_slices(x_data, settings.options.thickness) y_train = thick_slices(y_data, settings.options.thickness) else: x_train = trainingsubset['imagedata'] y_train = trainingsubset['truthdata'] slicesplit = int(0.9 * totnslice) TRAINING_SLICES = slice(0, slicesplit) VALIDATION_SLICES = slice(slicesplit, totnslice) print("\nkfolds : ", kfolds) print("idfold : ", idfold) print("slices in kfold : ", totnslice) print("slices training : ", slicesplit) print("slices validation : ", totnslice - slicesplit) try: print("slices testing : ", len(numpydatabase[subsetidx_test])) except: print("slices testing : 0") ### ### data preprocessing : applying liver mask ### y_train_typed = y_train.astype(settings.SEG_DTYPE) liver_idx = y_train_typed > 0 y_train_liver = np.zeros_like(y_train_typed) y_train_liver[liver_idx] = 1 tumor_idx = y_train_typed > 1 y_train_tumor = np.zeros_like(y_train_typed) y_train_tumor[tumor_idx] = 1 x_masked = x_train * y_train_liver - 100.0 * (1.0 - y_train_liver) x_masked = x_masked.astype(settings.IMG_DTYPE) ### ### set up output, logging, and callbacks ### logfileoutputdir = '%s/%03d/%03d' % (settings.options.outdir, kfolds, idfold) os.system('mkdir -p ' + logfileoutputdir) os.system('mkdir -p ' + logfileoutputdir + '/nii') os.system('mkdir -p ' + logfileoutputdir + '/tumor') print("Output to\t", logfileoutputdir) ### ### create and run model ### opt = GetOptimizer() callbacks, modelloc = GetCallbacks(logfileoutputdir, "tumor") lss, met = GetLoss() model = get_unet() model.compile(loss=lss, metrics=met, optimizer=opt) print("\n\n\tlivermask training...\tModel parameters: {0:,}".format( model.count_params())) if settings.options.augment: train_datagen = ImageDataGenerator( brightness_range=[0.95, 1.05], width_shift_range=[-0.1, 0.1], height_shift_range=[-0.1, 0.1], horizontal_flip=True, vertical_flip=True, zoom_range=0.1, fill_mode='nearest', ) else: train_datagen = ImageDataGenerator() test_datagen = ImageDataGenerator() if settings.options.D3: train_generator = train_datagen.flow( x_masked[TRAINING_SLICES, :, :, :, np.newaxis], y_train_tumor[TRAINING_SLICES, :, :, :, np.newaxis], batch_size=settings.options.trainingbatch) test_generator = test_datagen.flow( x_masked[TRAINING_SLICES, :, :, :, np.newaxis], y_train_tumor[TRAINING_SLICES, :, :, :, np.newaxis], batch_size=settings.options.validationbatch) else: train_generator = train_datagen.flow( x_masked[TRAINING_SLICES, :, :, np.newaxis], y_train_tumor[TRAINING_SLICES, :, :, np.newaxis], batch_size=settings.options.trainingbatch) test_generator = test_datagen.flow( x_masked[VALIDATION_SLICES, :, :, np.newaxis], y_train_tumor[VALIDATION_SLICES, :, :, np.newaxis], batch_size=settings.options.validationbatch) history_liver = model.fit_generator( train_generator, steps_per_epoch=slicesplit // settings.options.trainingbatch, validation_steps=(totnslice - slicesplit) // settings.options.validationbatch, epochs=settings.options.numepochs, validation_data=test_generator, callbacks=callbacks) ### ### make predicions on validation set ### print("\n\n\tapplying models...") if settings.options.D3: y_pred_float = model.predict(x_masked[VALIDATION_SLICES, :, :, :, np.newaxis]) else: y_pred_float = model.predict(x_masked[VALIDATION_SLICES, :, :, np.newaxis]) y_pred_seg = (y_pred_float[..., 0] >= settings.options.segthreshold).astype(settings.SEG_DTYPE) print("\tsaving to file...") if settings.options.D3: trueinnii = nib.Nifti1Image(x_train[VALIDATION_SLICES, :, :, :], None) truesegnii = nib.Nifti1Image(y_train[VALIDATION_SLICES, :, :, :], None) truelivernii = nib.Nifti1Image( y_train_liver[VALIDATION_SLICES, :, :, :], None) truetumornii = nib.Nifti1Image( y_train_tumor[VALIDATION_SLICES, :, :, :], None) else: trueinnii = nib.Nifti1Image(x_train[VALIDATION_SLICES, :, :], None) truesegnii = nib.Nifti1Image(y_train[VALIDATION_SLICES, :, :], None) truelivernii = nib.Nifti1Image(y_train_liver[VALIDATION_SLICES, :, :], None) truetumornii = nib.Nifti1Image(y_train_tumor[VALIDATION_SLICES, :, :], None) predsegnii = nib.Nifti1Image(y_pred_seg, None) predfloatnii = nib.Nifti1Image(y_pred_float, None) trueinnii.to_filename(logfileoutputdir + '/nii/trueimg.nii.gz') truesegnii.to_filename(logfileoutputdir + '/nii/truseg.nii.gz') truelivernii.to_filename(logfileoutputdir + '/nii/trueliver.nii.gz') truetumornii.to_filename(logfileoutputdir + '/nii/truetumor.nii.gz') predsegnii.to_filename(logfileoutputdir + '/nii/predtumorseg.nii.gz') predfloatnii.to_filename(logfileoutputdir + '/nii/predtumorfloat.nii.gz') print("\done saving.") return modelloc
def main(): args = parse_args() print("load the model configuration...", file=sys.stderr) print("=======================================================", file=sys.stderr) exp_config = generate_exp_config(args.net_name, args.pre_trained, args.include_fc, args.k_fold) weights_path = get_weights_path(net_name=args.net_name) net = importlib.import_module("Nets." + args.net_name) batch_size = get_batch_size(args.net_name, args.pre_trained) input_shape = get_input_shape(args.net_name, args.pre_trained) if args.pre_trained: preprocessing_function = net.preprocess_input else: preprocessing_function = None weights_filename = os.path.join(weights_path, "{}.h5".format(exp_config)) assert os.path.exists(weights_filename), print( "the model doesn't exist...", file=sys.stderr) model = load_model(weights_filename) rotation_range = AUGMENT_PARAMETERS.get('rotation_range', 0.) width_shift_range = AUGMENT_PARAMETERS.get('width_shift_range', 0.) height_shift_range = AUGMENT_PARAMETERS.get('height_shift_range', 0.) shear_range = AUGMENT_PARAMETERS.get('shear_range', 0.) zoom_range = AUGMENT_PARAMETERS.get('zoom_range', 0.) fill_mode = AUGMENT_PARAMETERS.get('fill_mode', 'nearest') cval = AUGMENT_PARAMETERS.get('cval', 0.) horizontal_flip = AUGMENT_PARAMETERS.get('horizontal_flip', True) vertical_flip = AUGMENT_PARAMETERS.get('vertical_flip', True) # output path training_predict_path = get_training_predict_path(args.net_name) test_predict_path = get_test_predict_path(args.net_name) print("load training data...", file=sys.stderr) print("=======================================================", file=sys.stderr) img, label = load_data(dataset="train") split_filename = os.path.join(DATA_DIR, "KFold_{}.npz".format(args.k_fold)) split = np.load(split_filename) test_indexes = split['test_indexes'] print("validate the model on {} samples".format(test_indexes.shape[0]), file=sys.stderr) valid_generator = ImageDataGenerator( x=img[test_indexes], y=None, batch_size=batch_size, augment=False, shuffle=False, output_shape=(input_shape[0], input_shape[1]), n_channels=input_shape[2], preprocessing_function=preprocessing_function) valid_generator_aug = ImageDataGenerator( x=img[test_indexes], y=None, batch_size=batch_size, augment=True, shuffle=False, output_shape=(input_shape[0], input_shape[1]), n_channels=input_shape[2], rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, shear_range=shear_range, zoom_range=zoom_range, fill_mode=fill_mode, cval=cval, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, preprocessing_function=preprocessing_function, augment_prob=1.0) valid_pred = model.predict_generator(valid_generator, use_multiprocessing=True, workers=8) valid_pred_aug = np.zeros((test_indexes.shape[0], N_LABELS), dtype=np.float32) for i in range(TEST_TIME_AUGMENT): valid_pred_aug += model.predict_generator(valid_generator_aug, use_multiprocessing=True, workers=8) valid_pred = 0.5 * valid_pred + 0.5 * valid_pred_aug / TEST_TIME_AUGMENT filename = os.path.join(training_predict_path, "{}.npz".format(exp_config)) np.savez(file=filename, pred=valid_pred, label=label[test_indexes]) print("load test data...", file=sys.stderr) print("=======================================================", file=sys.stderr) x_test = load_data(dataset="test") test_generator = ImageDataGenerator( x=x_test, batch_size=batch_size, augment=False, shuffle=False, output_shape=(input_shape[0], input_shape[1]), n_channels=input_shape[2], preprocessing_function=preprocessing_function) test_generator_aug = ImageDataGenerator( x=x_test, batch_size=batch_size, augment=True, shuffle=False, output_shape=(input_shape[0], input_shape[1]), n_channels=input_shape[2], rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, shear_range=shear_range, zoom_range=zoom_range, fill_mode=fill_mode, cval=cval, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, preprocessing_function=preprocessing_function, augment_prob=1.0) test_pred = model.predict_generator(test_generator, use_multiprocessing=True, workers=8) test_pred_aug = np.zeros((x_test.shape[0], N_LABELS), dtype=np.float32) for i in range(TEST_TIME_AUGMENT): test_pred_aug += model.predict_generator(test_generator_aug, use_multiprocessing=True, workers=8) test_pred = 0.5 * test_pred + 0.5 * test_pred_aug / TEST_TIME_AUGMENT filename = os.path.join(test_predict_path, "{}.npz".format(exp_config)) np.savez(file=filename, pred=test_pred)
monitor='loss', save_best_only=True) #model.summary() print('...Fitting model...') print('-' * 30) change_lr = LearningRateScheduler(step_decay) datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization=False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range=30, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.3, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.3, # randomly shift images vertically (fraction of total height) zoom_range=0.3, shear_range=0., horizontal_flip=True, # randomly flip images vertical_flip=True, # randomly flip images fill_mode='constant', dim_ordering='tf') #%% Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(train_data, train_anno, batch_size=1), steps_per_epoch=train_data.shape[0], epochs=100, callbacks=[model_checkpoint, change_lr], initial_epoch=0)
def main(): args = parse_args() print("load the model configuration...", file=sys.stderr) print("=======================================================", file=sys.stderr) exp_config = generate_exp_config(args.net_name, args.pre_trained, args.optimizer, args.k_fold) weights_path = get_weights_path(net_name=args.net_name) net = importlib.import_module("Nets." + args.net_name) batch_size = get_batch_size(args.net_name, args.pre_trained) input_shape = get_input_shape(args.net_name, args.pre_trained) if args.pre_trained: preprocessing_function = net.preprocess_input else: preprocessing_function = None weights_filename = os.path.join(weights_path, "{}.h5".format(exp_config)) if os.path.exists(weights_filename): model = load_model(weights_filename) else: if args.pre_trained: model = net.build_model(input_shape=input_shape, num_classes=N_LABELS, weights='imagenet', opt=args.optimizer) else: model = net.build_model(input_shape=input_shape, num_classes=N_LABELS, weights=None, opt=args.optimizer) model.summary() print("load training and validation data...", file=sys.stderr) print("===========================================================================", file=sys.stderr) img, label = load_data(dataset="train") split_filename = os.path.join(DATA_DIR, "KFold_{}.npz".format(args.k_fold)) split = np.load(split_filename) train_indexes = split['train_indexes'] test_indexes = split['test_indexes'] print("Training model on {} samples, validate on {} samples".format(train_indexes.shape[0], test_indexes.shape[0]), file=sys.stderr) # get augmentation parameters rotation_range = AUGMENT_PARAMETERS.get('rotation_range', 0.) width_shift_range = AUGMENT_PARAMETERS.get('width_shift_range', 0.) height_shift_range = AUGMENT_PARAMETERS.get('height_shift_range', 0.) shear_range = AUGMENT_PARAMETERS.get('shear_range', 0.) zoom_range = AUGMENT_PARAMETERS.get('zoom_range', 0.) fill_mode = AUGMENT_PARAMETERS.get('fill_mode', 'nearest') cval = AUGMENT_PARAMETERS.get('cval', 0.) horizontal_flip = AUGMENT_PARAMETERS.get('horizontal_flip', True) vertical_flip = AUGMENT_PARAMETERS.get('vertical_flip', True) train_generator = ImageDataGenerator(x=img[train_indexes], y=label[train_indexes], batch_size=batch_size, augment=True, shuffle=True, output_shape=(input_shape[0], input_shape[1]), n_channels=input_shape[2], rotation_range=rotation_range, width_shift_range=width_shift_range, height_shift_range=height_shift_range, shear_range=shear_range, zoom_range=zoom_range, fill_mode=fill_mode, cval=cval, horizontal_flip=horizontal_flip, vertical_flip=vertical_flip, preprocessing_function=preprocessing_function) valid_generator = ImageDataGenerator(x=img[test_indexes], y=label[test_indexes], batch_size=batch_size, augment=False, shuffle=False, output_shape=(input_shape[0], input_shape[1]), n_channels=input_shape[2], preprocessing_function=preprocessing_function) logs_path = get_logs_path(net_name=args.net_name) callbacks = build_callbacks(weights_path=weights_path, logs_path=logs_path, exp_config=exp_config) del img, label print("training model...", file=sys.stderr) print("===========================================================================", file=sys.stderr) model.fit_generator(generator=train_generator, validation_data=valid_generator, epochs=args.epochs, verbose=args.verbose, callbacks=callbacks, use_multiprocessing=True, workers=args.workers) print("training is done!", file=sys.stderr) acc_loss_path = get_acc_loss_path(args.net_name) visua_acc_loss(acc_loss_path=acc_loss_path, logs_path=logs_path, exp_config=exp_config) print("complete!!")
def train_(): X, y = read_train_data() train_img, test_img, train_mask, test_mask = train_test_split( X, y, test_size=0.2, random_state=1) train_img, val_img, train_mask, val_mask = train_test_split(train_img, train_mask, test_size=0.2, random_state=1) print(train_img.shape, train_mask.shape) print('-' * 30) print('UNET FOR MASK SEGMENTATION.') print('-' * 30) model = get_unet(IMG_WIDTH=256, IMG_HEIGHT=256, IMG_CHANNELS=1) model_checkpoint = ModelCheckpoint(model_name + ".hdf5", monitor='loss', save_best_only=False) model.summary() model_json = model.to_json() with open("{}.json".format(model_name), "w") as json_file: json_file.write(model_json) print('...Fitting model...') print('-' * 30) change_lr = LearningRateScheduler(step_decay) tensorboard = TensorBoard(log_dir="logs/{}".format(model_name)) datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 30, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.3, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.3, # randomly shift images vertically (fraction of total height) zoom_range=0.3, shear_range=0., horizontal_flip=True, # randomly flip images vertical_flip=True, # randomly flip images fill_mode='constant', dim_ordering='tf') model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[dice_coef]) # Fit the model on the batches generated by datagen.flow(). model.fit_generator( datagen.flow(train_img, train_mask, batch_size=8), validation_data=(val_img, val_mask), samples_per_epoch=train_img.shape[0], nb_epoch=10, callbacks=[model_checkpoint, change_lr, tensorboard], ) score = model.evaluate(test_img, test_mask, batch_size=16) model.save_weights('seg_weight.h5')
from __future__ import print_function, division from generator import ImageDataGenerator from configure import * from PIL import Image from utils import * OUTPUT_DIR = "/home/rs619065/kaggle_HPAIC/src/test" valid_img, valid_label = load_data(dataset="validation") valid_generator = ImageDataGenerator(images=valid_img, augment=True, horizontal_flip=True, vertical_flip=True, rescale=1) print("testing horizontal and vertical flip...", file=sys.stderr) df = pd.read_csv(VALIDATION_DATA_CSV) img = valid_generator[0] for i in range(img.shape[0]): prefix = df.iloc[i][0] r_img = Image.open( os.path.join(TRAINING_INPUT_DIR, "{}_red.png".format(prefix))) g_img = Image.open( os.path.join(TRAINING_INPUT_DIR, "{}_green.png".format(prefix))) b_img = Image.open( os.path.join(TRAINING_INPUT_DIR, "{}_blue.png".format(prefix)))