def listener(q, model_path, output_csv): """Reads regions from queue, predicts nodules and stores in the output file.""" from keras import backend as K from dl_networks.sample_resnet import ResnetBuilder from keras.optimizers import Adam # Model loading inside the listener thread (otherwise keras complains) K.set_image_dim_ordering('th') model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy', 'fmeasure']) logging.info('Loading existing model %s...' % model_path) model.load_weights(model_path) total, errors = 0, 0 f = open(output_csv, 'w') f.write('patientid,nslice,x,y,diameter,score,label\n') while 1: queue_element = q.get() if queue_element == 'kill': logging.info('[LISTENER] Closing...') break try: filename, x, y, rois = queue_element filename = filename.split('/')[-1] preds = model.predict(np.asarray(x), verbose=1) logging.info( "[LISTENER] Predicted patient %d %s. Batch results: %d/%d (th=0.7)" % (total, filename, len([p for p in preds if p > 0.7 ]), len(preds))) for i in range(len(preds)): nslice, r = rois[i] f.write('%s,%d,%d,%d,%.3f,%.5f,%d\n' % (filename, nslice, r.centroid[0], r.centroid[1], r.equivalent_diameter, preds[i], y[i])) total += 1 f.flush() except Exception as e: logging.error("[LISTENER] Error processing result, skipping. %s" % str(e)) errors += 1 logging.info("Stats: %d patients, %d errors" % (total, errors)) f.close()
def train(pretrained_model='', version_dl2 = 0, test_dataset='luna'): # Data augmentation generator # train_datagen = ImageDataGenerator(dim_ordering="th", horizontal_flip=True, vertical_flip=True) train_datagen = ImageDataGenerator( rotation_range=30, #.06, width_shift_range=0.1, #0.02, height_shift_range=0.1, #0.02, #shear_range=0.0002, #zoom_range=0.0002, dim_ordering="th", horizontal_flip=True, vertical_flip=True ) test_datagen = ImageDataGenerator(dim_ordering="th") # dummy for testing to have the same structure # LOADING PATCHES FROM DISK logging.info("Loading training and test sets") x_train = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_x_train_luna.npz'.format(version_dl2)))['arr_0'] y_train = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_y_train_luna.npz'.format(version_dl2)))['arr_0'] x_test = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_x_test_{}.npz'.format(version_dl2, test_dataset)))['arr_0'] y_test = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_y_test_{}.npz'.format(version_dl2, test_dataset)))['arr_0'] logging.info("Training set (1s/total): %d/%d" % (sum(y_train),len(y_train))) logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test))) # Load model model = ResnetBuilder().build_resnet_50((3,40,40),1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy','fmeasure']) logging.info('Loading exiting model...') if pretrained_model != '': # model.load_weights(OUTPUT_MODEL) model.load_weights(pretrained_model) model.fit_generator(generator=chunk_generator(x_train, y_train, batch_size=32, thickness=1, data_generator = train_datagen), samples_per_epoch=1280, # make it small to update TB and CHECKPOINT frequently nb_epoch=500*4, verbose=1, callbacks=[tb, model_checkpoint, roc_callback(x_test, y_test)], validation_data=chunk_generator(x_test, y_test, batch_size=32, thickness=1, data_generator = test_datagen, is_training=False), nb_val_samples=len(y_test), max_q_size=64, nb_worker=1) # a locker is needed if increased the number of parallel workers
def train(load_model=False, version=0): # Data augmentation generator train_datagen = ImageDataGenerator( rotation_range=30, # .06, width_shift_range=0.1, #0.02, height_shift_range=0.1, #0.02, #shear_range=0.0002, #zoom_range=0.0002, dim_ordering="th", horizontal_flip=True, vertical_flip=True) test_datagen = ImageDataGenerator( dim_ordering="th") # dummy for testing to have the same structure # LOADING PATCHES FROM DISK logging.info("Loading training and test sets") x_test = np.load( os.path.join(PATCHES_PATH, 'dl4_v{}_x_test.npz'.format(version)))['arr_0'] y_test = np.load( os.path.join(PATCHES_PATH, 'dl4_v{}_y_test.npz'.format(version)))['arr_0'] y_test = y_test / 84. y_test[y_test < 0] = -1 y_test = sigmoid(y_test) y_test = np.expand_dims(y_test, axis=1) x_train = np.load( os.path.join(PATCHES_PATH, 'dl4_v{}_x_train.npz'.format(version)))['arr_0'] y_train = np.load( os.path.join(PATCHES_PATH, 'dl4_v{}_y_train.npz'.format(version)))['arr_0'] y_train = y_train / 84. y_train[y_train < 0] = -1 y_train = sigmoid(y_train) y_train = np.expand_dims(y_train, axis=1) # logging.info("Training set (1s/total): %d/%d" % (sum(y_train),len(y_train))) # logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test))) # Load model def R2(y_true, y_pred): SS_res = K.sum(K.square(y_true - y_pred)) SS_tot = K.sum(K.square(y_true - K.mean(y_true))) return (1 - SS_res / (SS_tot + K.epsilon())) model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=[R2, 'mse']) if load_model: logging.info('Loading exiting model...') model.load_weights(OUTPUT_MODEL) model.fit_generator( generator=chunks(x_train, y_train, batch_size=32, thickness=1, data_generator=train_datagen), samples_per_epoch= 1280, # make it small to update TB and CHECKPOINT frequently nb_epoch=1600, verbose=1, #class_weight={0:1., 1:4.}, callbacks=[tb, model_checkpoint], # roc_callback(x_test, y_test)], validation_data=chunks( x_test, y_test, batch_size=32, thickness=1, data_generator=test_datagen, is_training=False, ), nb_val_samples=32 * 40, max_q_size=10, # initial_epoch=715, nb_worker=1) # a locker is needed if increased the number of
def train(load_model=False, model='patches', version=0): # Data augmentation generator train_datagen = ImageDataGenerator( rotation_range=30, # .06, width_shift_range=0.1, #0.02, height_shift_range=0.1, #0.02, #shear_range=0.0002, #zoom_range=0.0002, dim_ordering="th", horizontal_flip=True, vertical_flip=True) test_datagen = ImageDataGenerator( dim_ordering="th") # dummy for testing to have the same structure # LOADING PATCHES FROM DISK logging.info("Loading training and test sets") print 'dl1_v{}_x_test.npz'.format(version) x_test = np.load( os.path.join(PATCHES_PATH, 'dl1_v{}_x_test.npz'.format(version)))['arr_0'] y_test = np.load( os.path.join(PATCHES_PATH, 'dl1_v{}_y_test.npz'.format(version)))['arr_0'] y_test = np.expand_dims(y_test, axis=1) x_train = np.load( os.path.join(PATCHES_PATH, 'dl1_v{}_x_train.npz'.format(version)))['arr_0'] y_train = np.load( os.path.join(PATCHES_PATH, 'dl1_v{}_y_train.npz'.format(version)))['arr_0'] y_train = np.expand_dims(y_train, axis=1) logging.info("Training set (1s/total): %d/%d" % (sum(y_train), len(y_train))) logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test))) # Load model if model == 'patches': model = ResnetBuilder().build_resnet_50((3, 40, 40), 1).get_model() model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) elif model == 'unet': factory = dl_networks.unet2.UNet() model = factory.create_model((32, 32, 3), 1) logging.info('Loading exiting model...') if load_model: model.load_weights(OUTPUT_MODEL) model.fit_generator( generator=chunks(x_train, y_train, batch_size=32, thickness=1, data_generator=train_datagen), samples_per_epoch= 1280, # make it small to update TB and CHECKPOINT frequently nb_epoch=1600, verbose=1, #class_weight={0:1., 1:4.}, callbacks=[ tb, model_checkpoint, roc_callback( chunks( x_test, y_test, batch_size=500, thickness=1, data_generator=test_datagen, is_training=False, )) ], validation_data=chunks( x_test, y_test, batch_size=32, thickness=1, data_generator=test_datagen, is_training=False, ), nb_val_samples=32 * 40, max_q_size=10, # initial_epoch=715, nb_worker=1) # a locker is needed if increased the number of
parser.add_argument('-input_model', help='path of the model') parser.add_argument('-input_data', help='path of the input data') parser.add_argument('-output_csv', help='path of the output csv') parser.add_argument('-output_dl1', help='path of the output csv of dl1') parser.add_argument('--roi_statistics_csv', default = '', help=' (OPTIONAL) Annotate statistics') parser.add_argument('--threshold', type = float, default = -1, help=' (OPTIONAL) Discard patches with less than that.') parser.add_argument('--overwrite', action='store_true', help=' (OPTIONAL) Overwrite Default none.') parser.add_argument('--convertToFloat', action='store_true', help=' (OPTIONAL) Transform the images to float. Dunno why, but some networks only work with one kind (Mingot ones with float, new ones with int16).') parser.add_argument('--eval_all', action='store_true', help='Evals all rois from dl1.') args = parser.parse_args() #Load the network K.set_image_dim_ordering('th') model = ResnetBuilder().build_resnet_50((3,40,40),1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy','fmeasure']) logging.info('Loading existing model %s...' % args.input_model) model.load_weights(args.input_model) #Create a dataframe for the ROIS stats_roi_pd = pd.DataFrame() SCORE_TH = 0.5 nodules_df = pd.read_csv(args.output_dl1) if not args.eval_all: # nodules_df = nodules_df[(nodules_df['score'] > SCORE_TH) | (nodules_df['label']==1)] nodules_df = nodules_df[(nodules_df['score'] > SCORE_TH)] nodules_df['nslice'] = nodules_df['nslice'].astype(int) #Get the patient files
help=' (OPTIONAL) Discard patches with less than that.') parser.add_argument('--overwrite', action='store_true', help=' (OPTIONAL) Overwrite Default none.') parser.add_argument( '--convertToFloat', action='store_true', help= ' (OPTIONAL) Transform the images to float. Dunno why, but some networks only work with one kind (Mingot ones with float, new ones with int16).' ) args = parser.parse_args() #Load the network K.set_image_dim_ordering('th') model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['mse']) logging.info('Loading existing model %s...' % args.input_model) model.load_weights(args.input_model) #Create a dataframe for the ROIS stats_roi_pd = pd.DataFrame() #Get the patient files if os.path.isdir(args.input_data): patientFiles = map( lambda s: os.path.join(args.input_data, s), filter(lambda s: s.endswith('.npz'), os.listdir(args.input_data))) else: patientFiles = [] with open(args.input_data, 'r') as f: for line in f:
def process_filenames_sequencial(filenames_list, model_path, output_csv, nodules_df=None): """Reads regions from queue, predicts nodules and stores in the output file.""" from keras import backend as K from dl_networks.sample_resnet import ResnetBuilder from keras.optimizers import Adam # Model loading inside the listener thread (otherwise keras complains) K.set_image_dim_ordering('th') model = ResnetBuilder().build_resnet_50((3, 40, 40), 1) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy', 'fmeasure']) logging.info('Loading existing model %s...' % model_path) model.load_weights(model_path) total, errors = 0, 0 f = open(output_csv, 'w') f.write('patientid,nslice,x,y,diameter,score,label\n') for filename in filenames_list: try: patient_data = np.load(filename)['arr_0'] if nodules_df is not None: ndf = nodules_df[nodules_df['patientid'] == filename.split('/') [-1]] X, y, rois, stats = common.load_patient( patient_data, ndf, discard_empty_nodules=False, output_rois=True, thickness=1) else: X, y, rois, stats = common.load_patient( patient_data, discard_empty_nodules=False, output_rois=True, thickness=1) logging.info("Patient: %s, stats: %s" % (filename.split('/')[-1], stats)) filename = filename.split('/')[-1] preds = model.predict(np.asarray(X), verbose=2) logging.info( "[Process Sequencial] Predicted patient %d %s. Batch results: %d/%d (th=0.7)" % (total, filename, len([p for p in preds if p > 0.7 ]), len(preds))) for i in range(len(preds)): nslice, r = rois[i] f.write('%s,%d,%d,%d,%.3f,%.5f,%d\n' % (filename, nslice, r.centroid[0], r.centroid[1], r.equivalent_diameter, preds[i], y[i])) total += 1 f.flush() except Exception as e: logging.error("[LISTENER] Error processing result, skipping. %s" % str(e)) errors += 1 logging.info("Stats: %d patients, %d errors" % (total, errors)) f.close()