Example #1
0
def listener(q, model_path, output_csv):
    """Reads regions from queue, predicts nodules and stores in the output file."""
    from keras import backend as K
    from dl_networks.sample_resnet import ResnetBuilder
    from keras.optimizers import Adam

    # Model loading inside the listener thread (otherwise keras complains)
    K.set_image_dim_ordering('th')
    model = ResnetBuilder().build_resnet_50((3, 40, 40), 1)
    model.compile(optimizer=Adam(lr=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy', 'fmeasure'])
    logging.info('Loading existing model %s...' % model_path)
    model.load_weights(model_path)

    total, errors = 0, 0

    f = open(output_csv, 'w')
    f.write('patientid,nslice,x,y,diameter,score,label\n')
    while 1:
        queue_element = q.get()
        if queue_element == 'kill':
            logging.info('[LISTENER] Closing...')
            break

        try:
            filename, x, y, rois = queue_element
            filename = filename.split('/')[-1]

            preds = model.predict(np.asarray(x), verbose=1)
            logging.info(
                "[LISTENER] Predicted patient %d %s. Batch results: %d/%d (th=0.7)"
                % (total, filename, len([p for p in preds if p > 0.7
                                         ]), len(preds)))
            for i in range(len(preds)):
                nslice, r = rois[i]
                f.write('%s,%d,%d,%d,%.3f,%.5f,%d\n' %
                        (filename, nslice, r.centroid[0], r.centroid[1],
                         r.equivalent_diameter, preds[i], y[i]))
            total += 1
            f.flush()
        except Exception as e:
            logging.error("[LISTENER] Error processing result, skipping. %s" %
                          str(e))
            errors += 1

    logging.info("Stats: %d patients, %d errors" % (total, errors))
    f.close()
Example #2
0
def train(pretrained_model='', version_dl2 = 0, test_dataset='luna'):
    # Data augmentation generator
    # train_datagen = ImageDataGenerator(dim_ordering="th", horizontal_flip=True, vertical_flip=True)
    train_datagen = ImageDataGenerator(
        rotation_range=30, #.06,
        width_shift_range=0.1, #0.02,
        height_shift_range=0.1, #0.02,
        #shear_range=0.0002,
        #zoom_range=0.0002,
        dim_ordering="th",
        horizontal_flip=True,
        vertical_flip=True
        )
    test_datagen = ImageDataGenerator(dim_ordering="th")  # dummy for testing to have the same structure
    
    # LOADING PATCHES FROM DISK
    logging.info("Loading training and test sets")
    x_train = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_x_train_luna.npz'.format(version_dl2)))['arr_0']
    y_train = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_y_train_luna.npz'.format(version_dl2)))['arr_0']
    x_test = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_x_test_{}.npz'.format(version_dl2, test_dataset)))['arr_0']
    y_test = np.load(os.path.join(PATCHES_PATH, 'dl2_v{}_y_test_{}.npz'.format(version_dl2, test_dataset)))['arr_0']
    logging.info("Training set (1s/total): %d/%d" % (sum(y_train),len(y_train)))
    logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test)))

    # Load model
    model = ResnetBuilder().build_resnet_50((3,40,40),1)
    model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy','fmeasure'])
    
    logging.info('Loading exiting model...')
    if pretrained_model != '':
#         model.load_weights(OUTPUT_MODEL)
        model.load_weights(pretrained_model)
        

    model.fit_generator(generator=chunk_generator(x_train, y_train, batch_size=32, thickness=1, data_generator = train_datagen),
                        samples_per_epoch=1280,  # make it small to update TB and CHECKPOINT frequently
                        nb_epoch=500*4,
                        verbose=1,
                        callbacks=[tb, model_checkpoint, roc_callback(x_test, y_test)],
                        validation_data=chunk_generator(x_test, y_test, batch_size=32, thickness=1, data_generator = test_datagen, is_training=False),
                        nb_val_samples=len(y_test),
                        max_q_size=64,
                        nb_worker=1)  # a locker is needed if increased the number of parallel workers
Example #3
0
def train(load_model=False, version=0):
    # Data augmentation generator
    train_datagen = ImageDataGenerator(
        rotation_range=30,  # .06,
        width_shift_range=0.1,  #0.02,
        height_shift_range=0.1,  #0.02,
        #shear_range=0.0002,
        #zoom_range=0.0002,
        dim_ordering="th",
        horizontal_flip=True,
        vertical_flip=True)

    test_datagen = ImageDataGenerator(
        dim_ordering="th")  # dummy for testing to have the same structure

    # LOADING PATCHES FROM DISK
    logging.info("Loading training and test sets")
    x_test = np.load(
        os.path.join(PATCHES_PATH,
                     'dl4_v{}_x_test.npz'.format(version)))['arr_0']
    y_test = np.load(
        os.path.join(PATCHES_PATH,
                     'dl4_v{}_y_test.npz'.format(version)))['arr_0']
    y_test = y_test / 84.
    y_test[y_test < 0] = -1
    y_test = sigmoid(y_test)
    y_test = np.expand_dims(y_test, axis=1)

    x_train = np.load(
        os.path.join(PATCHES_PATH,
                     'dl4_v{}_x_train.npz'.format(version)))['arr_0']
    y_train = np.load(
        os.path.join(PATCHES_PATH,
                     'dl4_v{}_y_train.npz'.format(version)))['arr_0']
    y_train = y_train / 84.
    y_train[y_train < 0] = -1
    y_train = sigmoid(y_train)
    y_train = np.expand_dims(y_train, axis=1)

    #     logging.info("Training set (1s/total): %d/%d" % (sum(y_train),len(y_train)))
    #     logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test)))

    # Load model
    def R2(y_true, y_pred):
        SS_res = K.sum(K.square(y_true - y_pred))
        SS_tot = K.sum(K.square(y_true - K.mean(y_true)))
        return (1 - SS_res / (SS_tot + K.epsilon()))

    model = ResnetBuilder().build_resnet_50((3, 40, 40), 1)
    model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=[R2, 'mse'])
    if load_model:
        logging.info('Loading exiting model...')
        model.load_weights(OUTPUT_MODEL)

    model.fit_generator(
        generator=chunks(x_train,
                         y_train,
                         batch_size=32,
                         thickness=1,
                         data_generator=train_datagen),
        samples_per_epoch=
        1280,  # make it small to update TB and CHECKPOINT frequently
        nb_epoch=1600,
        verbose=1,
        #class_weight={0:1., 1:4.},
        callbacks=[tb, model_checkpoint],  # roc_callback(x_test, y_test)],
        validation_data=chunks(
            x_test,
            y_test,
            batch_size=32,
            thickness=1,
            data_generator=test_datagen,
            is_training=False,
        ),
        nb_val_samples=32 * 40,
        max_q_size=10,
        # initial_epoch=715,
        nb_worker=1)  # a locker is needed if increased the number of
Example #4
0
def train(load_model=False, model='patches', version=0):
    # Data augmentation generator
    train_datagen = ImageDataGenerator(
        rotation_range=30,  # .06,
        width_shift_range=0.1,  #0.02,
        height_shift_range=0.1,  #0.02,
        #shear_range=0.0002,
        #zoom_range=0.0002,
        dim_ordering="th",
        horizontal_flip=True,
        vertical_flip=True)

    test_datagen = ImageDataGenerator(
        dim_ordering="th")  # dummy for testing to have the same structure

    # LOADING PATCHES FROM DISK
    logging.info("Loading training and test sets")
    print 'dl1_v{}_x_test.npz'.format(version)
    x_test = np.load(
        os.path.join(PATCHES_PATH,
                     'dl1_v{}_x_test.npz'.format(version)))['arr_0']
    y_test = np.load(
        os.path.join(PATCHES_PATH,
                     'dl1_v{}_y_test.npz'.format(version)))['arr_0']
    y_test = np.expand_dims(y_test, axis=1)

    x_train = np.load(
        os.path.join(PATCHES_PATH,
                     'dl1_v{}_x_train.npz'.format(version)))['arr_0']
    y_train = np.load(
        os.path.join(PATCHES_PATH,
                     'dl1_v{}_y_train.npz'.format(version)))['arr_0']
    y_train = np.expand_dims(y_train, axis=1)
    logging.info("Training set (1s/total): %d/%d" %
                 (sum(y_train), len(y_train)))
    logging.info("Test set (1s/total): %d/%d" % (sum(y_test), len(y_test)))

    # Load model
    if model == 'patches':
        model = ResnetBuilder().build_resnet_50((3, 40, 40), 1).get_model()
        model.compile(optimizer=Adam(lr=1e-4),
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

    elif model == 'unet':
        factory = dl_networks.unet2.UNet()
        model = factory.create_model((32, 32, 3), 1)
    logging.info('Loading exiting model...')
    if load_model:
        model.load_weights(OUTPUT_MODEL)

    model.fit_generator(
        generator=chunks(x_train,
                         y_train,
                         batch_size=32,
                         thickness=1,
                         data_generator=train_datagen),
        samples_per_epoch=
        1280,  # make it small to update TB and CHECKPOINT frequently
        nb_epoch=1600,
        verbose=1,
        #class_weight={0:1., 1:4.},
        callbacks=[
            tb, model_checkpoint,
            roc_callback(
                chunks(
                    x_test,
                    y_test,
                    batch_size=500,
                    thickness=1,
                    data_generator=test_datagen,
                    is_training=False,
                ))
        ],
        validation_data=chunks(
            x_test,
            y_test,
            batch_size=32,
            thickness=1,
            data_generator=test_datagen,
            is_training=False,
        ),
        nb_val_samples=32 * 40,
        max_q_size=10,
        # initial_epoch=715,
        nb_worker=1)  # a locker is needed if increased the number of
Example #5
0
    parser.add_argument('-input_model', help='path of the model')
    parser.add_argument('-input_data', help='path of the input data')
    parser.add_argument('-output_csv', help='path of the output csv')
    parser.add_argument('-output_dl1', help='path of the output csv of dl1')
    parser.add_argument('--roi_statistics_csv', default = '', help=' (OPTIONAL) Annotate statistics')
    parser.add_argument('--threshold', type = float, default = -1, help=' (OPTIONAL) Discard patches with less than that.')
    parser.add_argument('--overwrite',  action='store_true', help=' (OPTIONAL) Overwrite Default none.')
    parser.add_argument('--convertToFloat',  action='store_true', help=' (OPTIONAL) Transform the images to float. Dunno why, but some networks only work with one kind (Mingot ones with float, new ones with int16).')
    parser.add_argument('--eval_all',  action='store_true', help='Evals all rois from dl1.')

    args = parser.parse_args()

    #Load the network
    K.set_image_dim_ordering('th')
    model = ResnetBuilder().build_resnet_50((3,40,40),1)
    model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy','fmeasure'])
    logging.info('Loading existing model %s...' % args.input_model)
    model.load_weights(args.input_model)
    
    #Create a dataframe for the ROIS
    stats_roi_pd = pd.DataFrame()
    SCORE_TH = 0.5
    nodules_df = pd.read_csv(args.output_dl1)
    if not args.eval_all:
#         nodules_df = nodules_df[(nodules_df['score'] > SCORE_TH) | (nodules_df['label']==1)]
        nodules_df = nodules_df[(nodules_df['score'] > SCORE_TH)]

    nodules_df['nslice'] = nodules_df['nslice'].astype(int)
    
    
    #Get the patient files
Example #6
0
        help=' (OPTIONAL) Discard patches with less than that.')
    parser.add_argument('--overwrite',
                        action='store_true',
                        help=' (OPTIONAL) Overwrite Default none.')
    parser.add_argument(
        '--convertToFloat',
        action='store_true',
        help=
        ' (OPTIONAL) Transform the images to float. Dunno why, but some networks only work with one kind (Mingot ones with float, new ones with int16).'
    )
    args = parser.parse_args()

    #Load the network
    K.set_image_dim_ordering('th')
    model = ResnetBuilder().build_resnet_50((3, 40, 40), 1)
    model.compile(optimizer=Adam(lr=1e-4), loss='mse', metrics=['mse'])
    logging.info('Loading existing model %s...' % args.input_model)
    model.load_weights(args.input_model)

    #Create a dataframe for the ROIS
    stats_roi_pd = pd.DataFrame()

    #Get the patient files
    if os.path.isdir(args.input_data):
        patientFiles = map(
            lambda s: os.path.join(args.input_data, s),
            filter(lambda s: s.endswith('.npz'), os.listdir(args.input_data)))
    else:
        patientFiles = []
        with open(args.input_data, 'r') as f:
            for line in f:
Example #7
0
def process_filenames_sequencial(filenames_list,
                                 model_path,
                                 output_csv,
                                 nodules_df=None):
    """Reads regions from queue, predicts nodules and stores in the output file."""
    from keras import backend as K
    from dl_networks.sample_resnet import ResnetBuilder
    from keras.optimizers import Adam

    # Model loading inside the listener thread (otherwise keras complains)
    K.set_image_dim_ordering('th')
    model = ResnetBuilder().build_resnet_50((3, 40, 40), 1)
    model.compile(optimizer=Adam(lr=1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy', 'fmeasure'])
    logging.info('Loading existing model %s...' % model_path)
    model.load_weights(model_path)

    total, errors = 0, 0

    f = open(output_csv, 'w')
    f.write('patientid,nslice,x,y,diameter,score,label\n')
    for filename in filenames_list:
        try:
            patient_data = np.load(filename)['arr_0']
            if nodules_df is not None:
                ndf = nodules_df[nodules_df['patientid'] == filename.split('/')
                                 [-1]]
                X, y, rois, stats = common.load_patient(
                    patient_data,
                    ndf,
                    discard_empty_nodules=False,
                    output_rois=True,
                    thickness=1)
            else:
                X, y, rois, stats = common.load_patient(
                    patient_data,
                    discard_empty_nodules=False,
                    output_rois=True,
                    thickness=1)
            logging.info("Patient: %s, stats: %s" %
                         (filename.split('/')[-1], stats))

            filename = filename.split('/')[-1]
            preds = model.predict(np.asarray(X), verbose=2)
            logging.info(
                "[Process Sequencial] Predicted patient %d %s. Batch results: %d/%d (th=0.7)"
                % (total, filename, len([p for p in preds if p > 0.7
                                         ]), len(preds)))
            for i in range(len(preds)):
                nslice, r = rois[i]
                f.write('%s,%d,%d,%d,%.3f,%.5f,%d\n' %
                        (filename, nslice, r.centroid[0], r.centroid[1],
                         r.equivalent_diameter, preds[i], y[i]))
            total += 1
            f.flush()
        except Exception as e:
            logging.error("[LISTENER] Error processing result, skipping. %s" %
                          str(e))
            errors += 1

    logging.info("Stats: %d patients, %d errors" % (total, errors))
    f.close()