def train(pretrained_model='', version_dl2 = 0, test_dataset='luna'): # Data augmentation generator # train_datagen = ImageDataGenerator(dim_ordering="th", horizontal_flip=True, vertical_flip=True) train_datagen = ImageDataGenerator( rotation_range=30, #.06, width_shift_range=0.1, #0.02, height_shift_range=0.1, #0.02, #shear_range=0.0002, #zoom_range=0.0002, dim_ordering="th", horizontal_flip=True, vertical_flip=True ) test_datagen = ImageDataGenerator(dim_ordering="th") # dummy for testing to have the same structure # LOADING PATCHES FROM DISK logging.info("Loading training and test sets") x_train = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_x_train_luna.npz'.format(version_dl2)))['arr_0'] y_train = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_y_train_luna.npz'.format(version_dl2)))['arr_0'] x_test = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_x_test_{}.npz'.format(version_dl2, test_dataset)))['arr_0'] y_test = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_y_test_{}.npz'.format(version_dl2, test_dataset)))['arr_0'] logging.info("Training set (1s/total): %d/%d" % (sum(y_train),len(y_train))) logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test))) # Load model model = ResnetBuilder().build_resnet_50((3,40,40),1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy','fmeasure']) logging.info('Loading exiting model...') if pretrained_model != '': # model.load_weights(OUTPUT_MODEL) model.load_weights(pretrained_model) model.fit_generator(generator=chunk_generator(x_train, y_train, batch_size=32, thickness=1, data_generator = train_datagen), samples_per_epoch=1280, # make it small to update TB and CHECKPOINT frequently nb_epoch=500*4, verbose=1, callbacks=[tb, model_checkpoint, roc_callback(x_test, y_test)], validation_data=chunk_generator(x_test, y_test, batch_size=32, thickness=1, data_generator = test_datagen, is_training=False), nb_val_samples=len(y_test), max_q_size=64, nb_worker=1) # a locker is needed if increased the number of parallel workers
def listener(q, model_path, output_csv): """Reads regions from queue, predicts nodules and stores in the output file.""" from keras import backend as K from dl_networks.sample_resnet import ResnetBuilder from keras.optimizers import Adam # Model loading inside the listener thread (otherwise keras complains) K.set_image_dim_ordering('th') model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy', 'fmeasure']) logging.info('Loading existing model %s...' % model_path) model.load_weights(model_path) total, errors = 0, 0 f = open(output_csv, 'w') f.write('patientid,nslice,x,y,diameter,score,label\n') while 1: queue_element = q.get() if queue_element == 'kill': logging.info('[LISTENER] Closing...') break try: filename, x, y, rois = queue_element filename = filename.split('/')[-1] preds = model.predict(np.asarray(x), verbose=1) logging.info( "[LISTENER] Predicted patient %d %s. Batch results: %d/%d (th=0.7)" % (total, filename, len([p for p in preds if p > 0.7 ]), len(preds))) for i in range(len(preds)): nslice, r = rois[i] f.write('%s,%d,%d,%d,%.3f,%.5f,%d\n' % (filename, nslice, r.centroid[0], r.centroid[1], r.equivalent_diameter, preds[i], y[i])) total += 1 f.flush() except Exception as e: logging.error("[LISTENER] Error processing result, skipping. %s" % str(e)) errors += 1 logging.info("Stats: %d patients, %d errors" % (total, errors)) f.close()
def train(load_model=False, version=0): # Data augmentation generator train_datagen = ImageDataGenerator( rotation_range=30, # .06, width_shift_range=0.1, #0.02, height_shift_range=0.1, #0.02, #shear_range=0.0002, #zoom_range=0.0002, dim_ordering="th", horizontal_flip=True, vertical_flip=True) test_datagen = ImageDataGenerator( dim_ordering="th") # dummy for testing to have the same structure # LOADING PATCHES FROM DISK logging.info("Loading training and test sets") x_test = np.load( os.path.join(PATCHES_PATH, 'dl4_v{}_x_test.npz'.format(version)))['arr_0'] y_test = np.load( os.path.join(PATCHES_PATH, 'dl4_v{}_y_test.npz'.format(version)))['arr_0'] y_test = y_test / 84. y_test[y_test < 0] = -1 y_test = sigmoid(y_test) y_test = np.expand_dims(y_test, axis=1) x_train = np.load( os.path.join(PATCHES_PATH, 'dl4_v{}_x_train.npz'.format(version)))['arr_0'] y_train = np.load( os.path.join(PATCHES_PATH, 'dl4_v{}_y_train.npz'.format(version)))['arr_0'] y_train = y_train / 84. y_train[y_train < 0] = -1 y_train = sigmoid(y_train) y_train = np.expand_dims(y_train, axis=1) # logging.info("Training set (1s/total): %d/%d" % (sum(y_train),len(y_train))) # logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test))) # Load model def R2(y_true, y_pred): SS_res = K.sum(K.square(y_true - y_pred)) SS_tot = K.sum(K.square(y_true - K.mean(y_true))) return (1 - SS_res / (SS_tot + K.epsilon())) model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=[R2, 'mse']) if load_model: logging.info('Loading exiting model...') model.load_weights(OUTPUT_MODEL) model.fit_generator( generator=chunks(x_train, y_train, batch_size=32, thickness=1, data_generator=train_datagen), samples_per_epoch= 1280, # make it small to update TB and CHECKPOINT frequently nb_epoch=1600, verbose=1, #class_weight={0:1., 1:4.}, callbacks=[tb, model_checkpoint], # roc_callback(x_test, y_test)], validation_data=chunks( x_test, y_test, batch_size=32, thickness=1, data_generator=test_datagen, is_training=False, ), nb_val_samples=32 * 40, max_q_size=10, # initial_epoch=715, nb_worker=1) # a locker is needed if increased the number of
def train(load_model=False, model='patches', version=0): # Data augmentation generator train_datagen = ImageDataGenerator( rotation_range=30, # .06, width_shift_range=0.1, #0.02, height_shift_range=0.1, #0.02, #shear_range=0.0002, #zoom_range=0.0002, dim_ordering="th", horizontal_flip=True, vertical_flip=True) test_datagen = ImageDataGenerator( dim_ordering="th") # dummy for testing to have the same structure # LOADING PATCHES FROM DISK logging.info("Loading training and test sets") print 'dl1_v{}_x_test.npz'.format(version) x_test = np.load( os.path.join(PATCHES_PATH, 'dl1_v{}_x_test.npz'.format(version)))['arr_0'] y_test = np.load( os.path.join(PATCHES_PATH, 'dl1_v{}_y_test.npz'.format(version)))['arr_0'] y_test = np.expand_dims(y_test, axis=1) x_train = np.load( os.path.join(PATCHES_PATH, 'dl1_v{}_x_train.npz'.format(version)))['arr_0'] y_train = np.load( os.path.join(PATCHES_PATH, 'dl1_v{}_y_train.npz'.format(version)))['arr_0'] y_train = np.expand_dims(y_train, axis=1) logging.info("Training set (1s/total): %d/%d" % (sum(y_train), len(y_train))) logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test))) # Load model if model == 'patches': model = ResnetBuilder().build_resnet_50((3, 40, 40), 1).get_model() model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) elif model == 'unet': factory = dl_networks.unet2.UNet() model = factory.create_model((32, 32, 3), 1) logging.info('Loading exiting model...') if load_model: model.load_weights(OUTPUT_MODEL) model.fit_generator( generator=chunks(x_train, y_train, batch_size=32, thickness=1, data_generator=train_datagen), samples_per_epoch= 1280, # make it small to update TB and CHECKPOINT frequently nb_epoch=1600, verbose=1, #class_weight={0:1., 1:4.}, callbacks=[ tb, model_checkpoint, roc_callback( chunks( x_test, y_test, batch_size=500, thickness=1, data_generator=test_datagen, is_training=False, )) ], validation_data=chunks( x_test, y_test, batch_size=32, thickness=1, data_generator=test_datagen, is_training=False, ), nb_val_samples=32 * 40, max_q_size=10, # initial_epoch=715, nb_worker=1) # a locker is needed if increased the number of
parser = argparse.ArgumentParser(description='Runs a trained in a preprocessed dataset (npz)') parser.add_argument('-input_model', help='path of the model') parser.add_argument('-input_data', help='path of the input data') parser.add_argument('-output_csv', help='path of the output csv') parser.add_argument('-output_dl1', help='path of the output csv of dl1') parser.add_argument('--roi_statistics_csv', default = '', help=' (OPTIONAL) Annotate statistics') parser.add_argument('--threshold', type = float, default = -1, help=' (OPTIONAL) Discard patches with less than that.') parser.add_argument('--overwrite', action='store_true', help=' (OPTIONAL) Overwrite Default none.') parser.add_argument('--convertToFloat', action='store_true', help=' (OPTIONAL) Transform the images to float. Dunno why, but some networks only work with one kind (Mingot ones with float, new ones with int16).') parser.add_argument('--eval_all', action='store_true', help='Evals all rois from dl1.') args = parser.parse_args() #Load the network K.set_image_dim_ordering('th') model = ResnetBuilder().build_resnet_50((3,40,40),1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy','fmeasure']) logging.info('Loading existing model %s...' % args.input_model) model.load_weights(args.input_model) #Create a dataframe for the ROIS stats_roi_pd = pd.DataFrame() SCORE_TH = 0.5 nodules_df = pd.read_csv(args.output_dl1) if not args.eval_all: # nodules_df = nodules_df[(nodules_df['score'] > SCORE_TH) | (nodules_df['label']==1)] nodules_df = nodules_df[(nodules_df['score'] > SCORE_TH)] nodules_df['nslice'] = nodules_df['nslice'].astype(int)
default=-1, help=' (OPTIONAL) Discard patches with less than that.') parser.add_argument('--overwrite', action='store_true', help=' (OPTIONAL) Overwrite Default none.') parser.add_argument( '--convertToFloat', action='store_true', help= ' (OPTIONAL) Transform the images to float. Dunno why, but some networks only work with one kind (Mingot ones with float, new ones with int16).' ) args = parser.parse_args() #Load the network K.set_image_dim_ordering('th') model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['mse']) logging.info('Loading existing model %s...' % args.input_model) model.load_weights(args.input_model) #Create a dataframe for the ROIS stats_roi_pd = pd.DataFrame() #Get the patient files if os.path.isdir(args.input_data): patientFiles = map( lambda s: os.path.join(args.input_data, s), filter(lambda s: s.endswith('.npz'), os.listdir(args.input_data))) else: patientFiles = [] with open(args.input_data, 'r') as f:
parser.add_argument('-dl7', help='path of the model') parser.add_argument('-dl8', help='path of the model SPICULATION') parser.add_argument('-input_data', help='path of the input data') parser.add_argument('-output_csv', help='path of the output csv') parser.add_argument('--roi_statistics_csv', default = '', help=' (OPTIONAL) Annotate statistics') parser.add_argument('--threshold', type = float, default = -1, help=' (OPTIONAL) Discard patches with less than that.') parser.add_argument('--overwrite', action='store_true', help=' (OPTIONAL) Overwrite Default none.') parser.add_argument('--convertToFloat', action='store_true', help=' (OPTIONAL) Transform the images to float. Dunno why, but some networks only work with one kind (Mingot ones with float, new ones with int16).') args = parser.parse_args() #Load the networks K.set_image_dim_ordering('th') if args.dl1: model_dl1 = ResnetBuilder().build_resnet_50((3,40,40),1) model_dl1.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy','fmeasure']) logging.info('Loading nodularity model %s...' % args.dl1) model_dl1.load_weights(args.dl1) if args.dl4: model_dl4 = ResnetBuilder().build_resnet_50((3,40,40),1) model_dl4.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['mse']) logging.info('Loading malignancy model %s...' % args.dl4) model_dl4.load_weights(args.dl4) if args.dl6: model_dl6 = ResnetBuilder().build_resnet_50((3,40,40),1) model_dl6.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['mse']) logging.info('Loading lobulation model %s...' % args.dl6) model_dl6.load_weights(args.dl6)
x_test = np.load(os.path.join(PATCHES_PATH, 'dl1_v2_x_test.npz'))['arr_0'] y_test = np.load(os.path.join(PATCHES_PATH, 'dl1_v2_y_test.npz'))['arr_0'] ======= x_train = np.load(os.path.join(PATCHES_PATH, 'dl1_v1_x_train.npz'))['arr_0'] y_train = np.load(os.path.join(PATCHES_PATH, 'dl1_v1_y_train.npz'))['arr_0'] y_train = np.expand_dims(y_train, axis=1) x_test = np.load(os.path.join(PATCHES_PATH, 'dl1_v1_x_test.npz'))['arr_0'] y_test = np.load(os.path.join(PATCHES_PATH, 'dl1_v1_y_test.npz'))['arr_0'] >>>>>>> 2e1b52202f1927086f9b87b11a707bf7ff689983 y_test = np.expand_dims(y_test, axis=1) logging.info("Training set (1s/total): %d/%d" % (sum(y_train),len(y_train))) logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test))) # Load model model = ResnetBuilder().build_resnet_50((3,40,40),1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy','fmeasure']) logging.info('Loading exiting model...') # model.load_weights(OUTPUT_MODEL) model.fit_generator(generator=chunks(x_train, y_train, batch_size=32, thickness=1), samples_per_epoch=1280, # make it small to update TB and CHECKPOINT frequently nb_epoch=1600, verbose=1, #class_weight={0:1., 1:4.}, callbacks=[tb, model_checkpoint],# , roc_callback(training_data=(x_train, y_train),validation_data=(x_test, y_test))], validation_data=chunks(x_test, y_test, batch_size=32, thickness=1, is_training=False,), nb_val_samples=32*40, max_q_size=10, initial_epoch=715,
def process_filenames_sequencial(filenames_list, model_path, output_csv, nodules_df=None): """Reads regions from queue, predicts nodules and stores in the output file.""" from keras import backend as K from dl_networks.sample_resnet import ResnetBuilder from keras.optimizers import Adam # Model loading inside the listener thread (otherwise keras complains) K.set_image_dim_ordering('th') model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy', 'fmeasure']) logging.info('Loading existing model %s...' % model_path) model.load_weights(model_path) total, errors = 0, 0 f = open(output_csv, 'w') f.write('patientid,nslice,x,y,diameter,score,label\n') for filename in filenames_list: try: patient_data = np.load(filename)['arr_0'] if nodules_df is not None: ndf = nodules_df[nodules_df['patientid'] == filename.split('/') [-1]] X, y, rois, stats = common.load_patient( patient_data, ndf, discard_empty_nodules=False, output_rois=True, thickness=1) else: X, y, rois, stats = common.load_patient( patient_data, discard_empty_nodules=False, output_rois=True, thickness=1) logging.info("Patient: %s, stats: %s" % (filename.split('/')[-1], stats)) filename = filename.split('/')[-1] preds = model.predict(np.asarray(X), verbose=2) logging.info( "[Process Sequencial] Predicted patient %d %s. Batch results: %d/%d (th=0.7)" % (total, filename, len([p for p in preds if p > 0.7 ]), len(preds))) for i in range(len(preds)): nslice, r = rois[i] f.write('%s,%d,%d,%d,%.3f,%.5f,%d\n' % (filename, nslice, r.centroid[0], r.centroid[1], r.equivalent_diameter, preds[i], y[i])) total += 1 f.flush() except Exception as e: logging.error("[LISTENER] Error processing result, skipping. %s" % str(e)) errors += 1 logging.info("Stats: %d patients, %d errors" % (total, errors)) f.close()
yield X_batch, y_batch # LOADING PATCHES FROM DISK logging.info("Loading training and test sets") x_train = np.load(os.path.join(PATCHES_PATH, 'dl3_v1_x_train.npz'))['arr_0'] y_train = np.load(os.path.join(PATCHES_PATH, 'dl3_v1_y_train.npz'))['arr_0'] y_train = np.expand_dims(y_train, axis=1) x_test = np.load(os.path.join(PATCHES_PATH, 'dl3_v1_x_test.npz'))['arr_0'] y_test = np.load(os.path.join(PATCHES_PATH, 'dl3_v1_y_test.npz'))['arr_0'] y_test = np.expand_dims(y_test, axis=1) logging.info("Training set (1s/total): %d/%d" % (sum(y_train), len(y_train))) logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test))) # Load model model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy', 'fmeasure']) model_checkpoint = ModelCheckpoint(OUTPUT_MODEL, monitor='val_loss', save_best_only=True) # logging.info('Loading exiting model...') # model.load_weights(OUTPUT_MODEL) model.fit_generator( generator=chunks(x_train, y_train, batch_size=32, thickness=1, augmentation_times=8),