def run(img_folder,
        img_extension='dcm',
        img_size=[288, 224],
        img_scale=4095,
        multi_view=False,
        do_featurewise_norm=True,
        featurewise_mean=398.5,
        featurewise_std=627.8,
        batch_size=16,
        samples_per_epoch=160,
        nb_epoch=20,
        balance_classes=.0,
        all_neg_skip=0.,
        pos_cls_weight=1.0,
        nb_init_filter=64,
        init_filter_size=7,
        init_conv_stride=2,
        pool_size=3,
        pool_stride=2,
        weight_decay=.0001,
        alpha=1.,
        l1_ratio=.5,
        inp_dropout=.0,
        hidden_dropout=.0,
        init_lr=.01,
        val_size=.2,
        lr_patience=5,
        es_patience=10,
        resume_from=None,
        net='resnet50',
        load_val_ram=False,
        exam_tsv='./metadata/exams_metadata.tsv',
        img_tsv='./metadata/images_crosswalk.tsv',
        best_model='./modelState/dm_resnet_best_model.h5',
        final_model="NOSAVE"):
    '''Run ResNet training on mammograms using an exam or image list
    Args:
        featurewise_mean, featurewise_std ([float]): they are estimated from 
                1152 x 896 images. Using different sized images give very close
                results. For png, mean=7772, std=12187.
    '''

    # Read some env variables.
    random_seed = int(os.getenv('RANDOM_SEED', 12345))
    nb_worker = int(os.getenv('NUM_CPU_CORES', 4))
    gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1))

    # Setup training and validation data.
    # Load image or exam lists and split them into train and val sets.
    meta_man = DMMetaManager(exam_tsv=exam_tsv,
                             img_tsv=img_tsv,
                             img_folder=img_folder,
                             img_extension=img_extension)
    if multi_view:
        exam_list = meta_man.get_flatten_exam_list()
        exam_train, exam_val = train_test_split(
            exam_list,
            test_size=val_size,
            random_state=random_seed,
            stratify=meta_man.exam_labs(exam_list))
        val_size_ = len(exam_val) * 2  # L and R.
    else:
        img_list, lab_list = meta_man.get_flatten_img_list()
        img_train, img_val, lab_train, lab_val = train_test_split(
            img_list,
            lab_list,
            test_size=val_size,
            random_state=random_seed,
            stratify=lab_list)
        val_size_ = len(img_val)

    # Create image generator.
    img_gen = DMImageDataGenerator(horizontal_flip=True, vertical_flip=True)
    if do_featurewise_norm:
        img_gen.featurewise_center = True
        img_gen.featurewise_std_normalization = True
        img_gen.mean = featurewise_mean
        img_gen.std = featurewise_std
    else:
        img_gen.samplewise_center = True
        img_gen.samplewise_std_normalization = True

    if multi_view:
        train_generator = img_gen.flow_from_exam_list(
            exam_train,
            target_size=(img_size[0], img_size[1]),
            target_scale=img_scale,
            batch_size=batch_size,
            balance_classes=balance_classes,
            all_neg_skip=all_neg_skip,
            shuffle=True,
            seed=random_seed,
            class_mode='binary')
        if load_val_ram:
            val_generator = img_gen.flow_from_exam_list(
                exam_val,
                target_size=(img_size[0], img_size[1]),
                target_scale=img_scale,
                batch_size=val_size_,
                validation_mode=True,
                class_mode='binary')
        else:
            val_generator = img_gen.flow_from_exam_list(
                exam_val,
                target_size=(img_size[0], img_size[1]),
                target_scale=img_scale,
                batch_size=batch_size,
                validation_mode=True,
                class_mode='binary')
    else:
        train_generator = img_gen.flow_from_img_list(
            img_train,
            lab_train,
            target_size=(img_size[0], img_size[1]),
            target_scale=img_scale,
            batch_size=batch_size,
            balance_classes=balance_classes,
            all_neg_skip=all_neg_skip,
            shuffle=True,
            seed=random_seed,
            class_mode='binary')
        if load_val_ram:
            val_generator = img_gen.flow_from_img_list(
                img_val,
                lab_val,
                target_size=(img_size[0], img_size[1]),
                target_scale=img_scale,
                batch_size=val_size_,
                validation_mode=True,
                class_mode='binary')
        else:
            val_generator = img_gen.flow_from_img_list(
                img_val,
                lab_val,
                target_size=(img_size[0], img_size[1]),
                target_scale=img_scale,
                batch_size=batch_size,
                validation_mode=True,
                class_mode='binary')

    # Load validation set into RAM.
    if load_val_ram:
        validation_set = next(val_generator)
        if not multi_view and len(validation_set[0]) != val_size_:
            raise Exception
        elif len(validation_set[0][0]) != val_size_ \
                or len(validation_set[0][1]) != val_size_:
            raise Exception

    # Create model.
    if resume_from is not None:
        model = load_model(resume_from,
                           custom_objects={
                               'sensitivity': DMMetrics.sensitivity,
                               'specificity': DMMetrics.specificity
                           })
    else:
        if multi_view:
            builder = MultiViewResNetBuilder
        else:
            builder = ResNetBuilder
        if net == 'resnet18':
            model = builder.build_resnet_18(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)
        elif net == 'resnet34':
            model = builder.build_resnet_34(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)
        elif net == 'resnet50':
            model = builder.build_resnet_50(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)
        elif net == 'dmresnet14':
            model = builder.build_dm_resnet_14(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)
        elif net == 'dmresnet47rb5':
            model = builder.build_dm_resnet_47rb5(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)
        elif net == 'dmresnet56rb6':
            model = builder.build_dm_resnet_56rb6(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)
        elif net == 'dmresnet65rb7':
            model = builder.build_dm_resnet_65rb7(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)
        elif net == 'resnet101':
            model = builder.build_resnet_101(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)
        elif net == 'resnet152':
            model = builder.build_resnet_152(
                (1, img_size[0], img_size[1]), 1, nb_init_filter,
                init_filter_size, init_conv_stride, pool_size, pool_stride,
                weight_decay, alpha, l1_ratio, inp_dropout, hidden_dropout)

    if gpu_count > 1:
        model = make_parallel(model, gpu_count)

    # Model training.
    sgd = SGD(lr=init_lr, momentum=0.9, decay=0.0, nesterov=True)
    model.compile(optimizer=sgd,
                  loss='binary_crossentropy',
                  metrics=[DMMetrics.sensitivity, DMMetrics.specificity])
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.1,
                                  patience=lr_patience,
                                  verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss',
                                   patience=es_patience,
                                   verbose=1)
    if load_val_ram:
        auc_checkpointer = DMAucModelCheckpoint(best_model,
                                                validation_set,
                                                batch_size=batch_size)
    else:
        auc_checkpointer = DMAucModelCheckpoint(best_model,
                                                val_generator,
                                                nb_test_samples=val_size_)
    # checkpointer = ModelCheckpoint(
    #     best_model, monitor='val_loss', verbose=1, save_best_only=True)
    hist = model.fit_generator(
        train_generator,
        samples_per_epoch=samples_per_epoch,
        nb_epoch=nb_epoch,
        class_weight={
            0: 1.0,
            1: pos_cls_weight
        },
        validation_data=validation_set if load_val_ram else val_generator,
        nb_val_samples=val_size_,
        callbacks=[reduce_lr, early_stopping, auc_checkpointer],
        nb_worker=nb_worker,
        pickle_safe=True,  # turn on pickle_safe to avoid a strange error.
        verbose=2)

    # Training report.
    min_loss_locs, = np.where(
        hist.history['val_loss'] == min(hist.history['val_loss']))
    best_val_loss = hist.history['val_loss'][min_loss_locs[0]]
    best_val_sensitivity = hist.history['val_sensitivity'][min_loss_locs[0]]
    best_val_specificity = hist.history['val_specificity'][min_loss_locs[0]]
    print "\n==== Training summary ===="
    print "Minimum val loss achieved at epoch:", min_loss_locs[0] + 1
    print "Best val loss:", best_val_loss
    print "Best val sensitivity:", best_val_sensitivity
    print "Best val specificity:", best_val_specificity

    if final_model != "NOSAVE":
        model.save(final_model)

    return hist
def run(img_folder, img_extension='dcm', 
        img_height=1024, img_scale=4095, 
        do_featurewise_norm=True, norm_fit_size=10,
        img_per_batch=2, roi_per_img=32, roi_size=(256, 256), 
        one_patch_mode=False,
        low_int_threshold=.05, blob_min_area=3, 
        blob_min_int=.5, blob_max_int=.85, blob_th_step=10,
        data_augmentation=False, roi_state=None, clf_bs=32, cutpoint=.5,
        amp_factor=1., return_sample_weight=True, auto_batch_balance=True,
        patches_per_epoch=12800, nb_epoch=20, 
        neg_vs_pos_ratio=None, all_neg_skip=0., 
        nb_init_filter=32, init_filter_size=5, init_conv_stride=2, 
        pool_size=2, pool_stride=2, 
        weight_decay=.0001, alpha=.0001, l1_ratio=.0, 
        inp_dropout=.0, hidden_dropout=.0, init_lr=.01,
        test_size=.2, val_size=.0, 
        lr_patience=3, es_patience=10, 
        resume_from=None, net='resnet50', load_val_ram=False, 
        load_train_ram=False, no_pos_skip=0., balance_classes=0.,
        pred_img_per_batch=1, pred_roi_per_img=32,
        exam_tsv='./metadata/exams_metadata.tsv',
        img_tsv='./metadata/images_crosswalk.tsv',
        best_model='./modelState/dm_candidROI_best_model.h5',
        final_model="NOSAVE",
        pred_trainval=False, pred_out="dl_pred_out.pkl"):
    '''Run ResNet training on candidate ROIs from mammograms
    Args:
        norm_fit_size ([int]): the number of patients used to calculate 
                feature-wise mean and std.
    '''

    # Read some env variables.
    random_seed = int(os.getenv('RANDOM_SEED', 12345))
    # Use of multiple CPU cores is not working!
    # When nb_worker>1 and pickle_safe=True, this error is encountered:
    # "failed to enqueue async memcpy from host to device: CUDA_ERROR_NOT_INITIALIZED"
    # To avoid the error, only this combination worked: 
    # nb_worker=1 and pickle_safe=False.
    nb_worker = int(os.getenv('NUM_CPU_CORES', 4))
    gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1))
    
    # Setup training and validation data.
    # Load image or exam lists and split them into train and val sets.
    meta_man = DMMetaManager(exam_tsv=exam_tsv, 
                             img_tsv=img_tsv, 
                             img_folder=img_folder, 
                             img_extension=img_extension)
    # Split data based on subjects.
    subj_list, subj_labs = meta_man.get_subj_labs()
    subj_train, subj_test, slab_train, slab_test = train_test_split(
        subj_list, subj_labs, test_size=test_size, random_state=random_seed, 
        stratify=subj_labs)
    if val_size > 0:  # train/val split.
        subj_train, subj_val, slab_train, slab_val = train_test_split(
            subj_train, slab_train, test_size=val_size, 
            random_state=random_seed, stratify=slab_train)
    else:  # use test as val. make a copy of the test list.
        subj_val = list(subj_test)
        slab_val = list(slab_test)
    # import pdb; pdb.set_trace()
    # Subset subject lists to desired ratio.
    if neg_vs_pos_ratio is not None:
        subj_train, slab_train = DMMetaManager.subset_subj_list(
            subj_train, slab_train, neg_vs_pos_ratio, random_seed)
        subj_val, slab_val = DMMetaManager.subset_subj_list(
            subj_val, slab_val, neg_vs_pos_ratio, random_seed)
    print "After sampling, Nb of subjects for train=%d, val=%d, test=%d" \
            % (len(subj_train), len(subj_val), len(subj_test))
    # Get image and label lists.
    img_train, lab_train = meta_man.get_flatten_img_list(subj_train)
    img_val, lab_val = meta_man.get_flatten_img_list(subj_val)

    # Create image generators for train, fit and val.
    imgen_trainval = DMImageDataGenerator()
    if data_augmentation:
        imgen_trainval.horizontal_flip=True 
        imgen_trainval.vertical_flip=True
        imgen_trainval.rotation_range = 45.
        imgen_trainval.shear_range = np.pi/8.
        # imgen_trainval.width_shift_range = .05
        # imgen_trainval.height_shift_range = .05
        # imgen_trainval.zoom_range = [.95, 1.05]

    if do_featurewise_norm:
        imgen_trainval.featurewise_center = True
        imgen_trainval.featurewise_std_normalization = True
        # Fit feature-wise mean and std.
        img_fit,_ = meta_man.get_flatten_img_list(
            subj_train[:norm_fit_size])  # fit on a subset.
        print ">>> Fit image generator <<<"; sys.stdout.flush()
        fit_generator = imgen_trainval.flow_from_candid_roi(
            img_fit,
            target_height=img_height, target_scale=img_scale,
            class_mode=None, validation_mode=True, 
            img_per_batch=len(img_fit), roi_per_img=roi_per_img, 
            roi_size=roi_size,
            low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, 
            blob_min_int=blob_min_int, blob_max_int=blob_max_int, 
            blob_th_step=blob_th_step,
            roi_clf=None, return_sample_weight=False, seed=random_seed)
        imgen_trainval.fit(fit_generator.next())
        print "Estimates from %d images: mean=%.1f, std=%.1f." % \
            (len(img_fit), imgen_trainval.mean, imgen_trainval.std)
        sys.stdout.flush()
    else:
        imgen_trainval.samplewise_center = True
        imgen_trainval.samplewise_std_normalization = True

    # Load ROI classifier.
    if roi_state is not None:
        roi_clf = load_model(
            roi_state, 
            custom_objects={
                'sensitivity': DMMetrics.sensitivity, 
                'specificity': DMMetrics.specificity
            }
        )
        graph = tf.get_default_graph()
    else:
        roi_clf = None
        graph = None

    # Set some DL training related parameters.
    if one_patch_mode:
        class_mode = 'binary'
        loss = 'binary_crossentropy'
        metrics = [DMMetrics.sensitivity, DMMetrics.specificity]
    else:
        class_mode = 'categorical'
        loss = 'categorical_crossentropy'
        metrics = ['accuracy', 'precision', 'recall']
    if load_train_ram:
        validation_mode = True
        return_raw_img = True
    else:
        validation_mode = False
        return_raw_img = False

    # Create train and val generators.
    print ">>> Train image generator <<<"; sys.stdout.flush()
    train_generator = imgen_trainval.flow_from_candid_roi(
        img_train, lab_train, 
        target_height=img_height, target_scale=img_scale,
        class_mode=class_mode, validation_mode=validation_mode, 
        img_per_batch=img_per_batch, roi_per_img=roi_per_img, 
        roi_size=roi_size, one_patch_mode=one_patch_mode,
        low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, 
        blob_min_int=blob_min_int, blob_max_int=blob_max_int, 
        blob_th_step=blob_th_step,
        tf_graph=graph, roi_clf=roi_clf, clf_bs=clf_bs, cutpoint=cutpoint,
        amp_factor=amp_factor, return_sample_weight=return_sample_weight,
        auto_batch_balance=auto_batch_balance,
        all_neg_skip=all_neg_skip, shuffle=True, seed=random_seed,
        return_raw_img=return_raw_img)

    print ">>> Validation image generator <<<"; sys.stdout.flush()
    val_generator = imgen_trainval.flow_from_candid_roi(
        img_val, lab_val, 
        target_height=img_height, target_scale=img_scale,
        class_mode=class_mode, validation_mode=True, 
        img_per_batch=img_per_batch, roi_per_img=roi_per_img, 
        roi_size=roi_size, one_patch_mode=one_patch_mode,
        low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, 
        blob_min_int=blob_min_int, blob_max_int=blob_max_int, 
        blob_th_step=blob_th_step,
        tf_graph=graph, roi_clf=roi_clf, clf_bs=clf_bs, cutpoint=cutpoint,
        amp_factor=amp_factor, return_sample_weight=False, 
        auto_batch_balance=False,
        seed=random_seed)

    # Load train and validation set into RAM.
    if one_patch_mode:
        nb_train_samples = len(img_train)
        nb_val_samples = len(img_val)
    else:
        nb_train_samples = len(img_train)*roi_per_img
        nb_val_samples = len(img_val)*roi_per_img
    if load_val_ram:
        print "Loading validation data into RAM.",
        sys.stdout.flush()
        validation_set = load_dat_ram(val_generator, nb_val_samples)
        print "Done."; sys.stdout.flush()
        sparse_y = to_sparse(validation_set[1])
        for uy in np.unique(sparse_y):
            print "Nb of samples for class:%d = %d" % \
                    (uy, (sparse_y==uy).sum())
        sys.stdout.flush()
    if load_train_ram:
        print "Loading train data into RAM.",
        sys.stdout.flush()
        train_set = load_dat_ram(train_generator, nb_train_samples)
        print "Done."; sys.stdout.flush()
        sparse_y = to_sparse(train_set[1])
        for uy in np.unique(sparse_y):
            print "Nb of samples for class:%d = %d" % \
                    (uy, (sparse_y==uy).sum())
        sys.stdout.flush()
        train_generator = imgen_trainval.flow(
            train_set[0], train_set[1], batch_size=clf_bs, 
            auto_batch_balance=auto_batch_balance, no_pos_skip=no_pos_skip,
            balance_classes=balance_classes, shuffle=True, seed=random_seed)

    # Load or create model.
    if resume_from is not None:
        model = load_model(
            resume_from,
            custom_objects={
                'sensitivity': DMMetrics.sensitivity, 
                'specificity': DMMetrics.specificity
            }
        )
    else:
        builder = ResNetBuilder
        if net == 'resnet18':
            model = builder.build_resnet_18(
                (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, 
                init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, 
                inp_dropout, hidden_dropout)
        elif net == 'resnet34':
            model = builder.build_resnet_34(
                (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, 
                init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, 
                inp_dropout, hidden_dropout)
        elif net == 'resnet50':
            model = builder.build_resnet_50(
                (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, 
                init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, 
                inp_dropout, hidden_dropout)
        elif net == 'resnet101':
            model = builder.build_resnet_101(
                (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, 
                init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, 
                inp_dropout, hidden_dropout)
        elif net == 'resnet152':
            model = builder.build_resnet_152(
                (1, roi_size[0], roi_size[1]), 3, nb_init_filter, init_filter_size, 
                init_conv_stride, pool_size, pool_stride, weight_decay, alpha, l1_ratio, 
                inp_dropout, hidden_dropout)
    
    if gpu_count > 1:
        model = make_parallel(model, gpu_count)

    # Model training.
    sgd = SGD(lr=init_lr, momentum=0.9, decay=0.0, nesterov=True)
    model.compile(optimizer=sgd, loss=loss, metrics=metrics)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, 
                                  patience=lr_patience, verbose=1)
    early_stopping = EarlyStopping(monitor='val_loss', patience=es_patience, 
                                   verbose=1)
    if load_val_ram:
        auc_checkpointer = DMAucModelCheckpoint(
            best_model, validation_set, batch_size=clf_bs)
    else:
        auc_checkpointer = DMAucModelCheckpoint(
            best_model, val_generator, nb_test_samples=nb_val_samples)
    hist = model.fit_generator(
        train_generator, 
        samples_per_epoch=patches_per_epoch, 
        nb_epoch=nb_epoch,
        validation_data=validation_set if load_val_ram else val_generator, 
        nb_val_samples=nb_val_samples, 
        callbacks=[reduce_lr, early_stopping, auc_checkpointer],
        # nb_worker=1, pickle_safe=False,
        nb_worker=nb_worker if load_train_ram else 1,
        pickle_safe=True if load_train_ram else False,
        verbose=2)

    if final_model != "NOSAVE":
        print "Saving final model to:", final_model; sys.stdout.flush()
        model.save(final_model)
    
    # Training report.
    min_loss_locs, = np.where(hist.history['val_loss'] == min(hist.history['val_loss']))
    best_val_loss = hist.history['val_loss'][min_loss_locs[0]]
    if one_patch_mode:
        best_val_sensitivity = hist.history['val_sensitivity'][min_loss_locs[0]]
        best_val_specificity = hist.history['val_specificity'][min_loss_locs[0]]
    else:
        best_val_precision = hist.history['val_precision'][min_loss_locs[0]]
        best_val_recall = hist.history['val_recall'][min_loss_locs[0]]
        best_val_accuracy = hist.history['val_acc'][min_loss_locs[0]]
    print "\n==== Training summary ===="
    print "Minimum val loss achieved at epoch:", min_loss_locs[0] + 1
    print "Best val loss:", best_val_loss
    if one_patch_mode:
        print "Best val sensitivity:", best_val_sensitivity
        print "Best val specificity:", best_val_specificity
    else:
        print "Best val precision:", best_val_precision
        print "Best val recall:", best_val_recall
        print "Best val accuracy:", best_val_accuracy

    # Make predictions on train, val, test exam lists.
    if best_model != 'NOSAVE':
        print "\n==== Making predictions ===="
        print "Load best model for prediction:", best_model
        sys.stdout.flush()
        pred_model = load_model(best_model)
        if gpu_count > 1:
            pred_model = make_parallel(pred_model, gpu_count)
        
        if pred_trainval:
            print "Load exam lists for train, val sets"; sys.stdout.flush()
            exam_train = meta_man.get_flatten_exam_list(
                subj_train, flatten_img_list=True)
            print "Train exam list length=", len(exam_train); sys.stdout.flush()
            exam_val = meta_man.get_flatten_exam_list(
                subj_val, flatten_img_list=True)
            print "Val exam list length=", len(exam_val); sys.stdout.flush()
        print "Load exam list for test set"; sys.stdout.flush()
        exam_test = meta_man.get_flatten_exam_list(
            subj_test, flatten_img_list=True)
        print "Test exam list length=", len(exam_test); sys.stdout.flush()
        
        if do_featurewise_norm:
            imgen_pred = DMImageDataGenerator()
            imgen_pred.featurewise_center = True
            imgen_pred.featurewise_std_normalization = True
            imgen_pred.mean = imgen_trainval.mean
            imgen_pred.std = imgen_trainval.std
        else:
            imgen_pred.samplewise_center = True
            imgen_pred.samplewise_std_normalization = True
        
        if pred_trainval:
            print "Make predictions on train exam list"; sys.stdout.flush()
            meta_prob_train = get_exam_pred(
                exam_train, pred_roi_per_img, imgen_pred, 
                target_height=img_height, target_scale=img_scale,
                img_per_batch=pred_img_per_batch, roi_size=roi_size,
                low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, 
                blob_min_int=blob_min_int, blob_max_int=blob_max_int, 
                blob_th_step=blob_th_step, seed=random_seed, 
                dl_model=pred_model)
            print "Train prediction list length=", len(meta_prob_train)
            
            print "Make predictions on val exam list"; sys.stdout.flush()
            meta_prob_val = get_exam_pred(
                exam_val, pred_roi_per_img, imgen_pred, 
                target_height=img_height, target_scale=img_scale,
                img_per_batch=pred_img_per_batch, roi_size=roi_size,
                low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, 
                blob_min_int=blob_min_int, blob_max_int=blob_max_int, 
                blob_th_step=blob_th_step, seed=random_seed, 
                dl_model=pred_model)
            print "Val prediction list length=", len(meta_prob_val)
        
        print "Make predictions on test exam list"; sys.stdout.flush()
        meta_prob_test = get_exam_pred(
            exam_test, pred_roi_per_img, imgen_pred, 
            target_height=img_height, target_scale=img_scale,
            img_per_batch=pred_img_per_batch, roi_size=roi_size,
            low_int_threshold=low_int_threshold, blob_min_area=blob_min_area, 
            blob_min_int=blob_min_int, blob_max_int=blob_max_int, 
            blob_th_step=blob_th_step, seed=random_seed, 
            dl_model=pred_model)
        print "Test prediction list length=", len(meta_prob_test)
        
        if pred_trainval:
            pickle.dump((meta_prob_train, meta_prob_val, meta_prob_test), 
                        open(pred_out, 'w'))
        else:
            pickle.dump(meta_prob_test, open(pred_out, 'w'))

    return hist
Example #3
0
def run(img_folder,
        img_extension='png',
        img_size=[288, 224],
        multi_view=False,
        do_featurewise_norm=True,
        featurewise_mean=7772.,
        featurewise_std=12187.,
        batch_size=16,
        samples_per_epoch=160,
        nb_epoch=20,
        val_size=.2,
        balance_classes=0.,
        all_neg_skip=False,
        pos_cls_weight=1.0,
        alpha=1.,
        l1_ratio=.5,
        init_lr=.01,
        lr_patience=2,
        es_patience=4,
        exam_tsv='./metadata/exams_metadata.tsv',
        img_tsv='./metadata/images_crosswalk.tsv',
        dl_state='./modelState/resnet50_288_best_model.h5',
        best_model='./modelState/enet_288_best_model.h5',
        final_model="NOSAVE"):

    # Read some env variables.
    random_seed = int(os.getenv('RANDOM_SEED', 12345))
    nb_worker = int(os.getenv('NUM_CPU_CORES', 4))
    gpu_count = int(os.getenv('NUM_GPU_DEVICES', 1))

    # Setup training and validation data.
    meta_man = DMMetaManager(exam_tsv=exam_tsv,
                             img_tsv=img_tsv,
                             img_folder=img_folder,
                             img_extension=img_extension)

    if multi_view:
        exam_list = meta_man.get_flatten_exam_list()
        exam_train, exam_val = train_test_split(
            exam_list,
            test_size=val_size,
            random_state=random_seed,
            stratify=meta_man.exam_labs(exam_list))
        val_size_ = len(exam_val) * 2  # L and R.
    else:
        img_list, lab_list = meta_man.get_flatten_img_list()
        img_train, img_val, lab_train, lab_val = train_test_split(
            img_list,
            lab_list,
            test_size=val_size,
            random_state=random_seed,
            stratify=lab_list)
        val_size_ = len(img_val)

    img_gen = DMImageDataGenerator(horizontal_flip=True, vertical_flip=True)
    if do_featurewise_norm:
        img_gen.featurewise_center = True
        img_gen.featurewise_std_normalization = True
        img_gen.mean = featurewise_mean
        img_gen.std = featurewise_std
    else:
        img_gen.samplewise_center = True
        img_gen.samplewise_std_normalization = True

    if multi_view:
        train_generator = img_gen.flow_from_exam_list(
            exam_train,
            target_size=(img_size[0], img_size[1]),
            batch_size=batch_size,
            balance_classes=balance_classes,
            all_neg_skip=all_neg_skip,
            shuffle=True,
            seed=random_seed,
            class_mode='binary')
        val_generator = img_gen.flow_from_exam_list(exam_val,
                                                    target_size=(img_size[0],
                                                                 img_size[1]),
                                                    batch_size=batch_size,
                                                    validation_mode=True,
                                                    class_mode='binary')
    else:
        train_generator = img_gen.flow_from_img_list(
            img_train,
            lab_train,
            target_size=(img_size[0], img_size[1]),
            batch_size=batch_size,
            balance_classes=balance_classes,
            all_neg_skip=all_neg_skip,
            shuffle=True,
            seed=random_seed,
            class_mode='binary')
        val_generator = img_gen.flow_from_img_list(img_val,
                                                   lab_val,
                                                   target_size=(img_size[0],
                                                                img_size[1]),
                                                   batch_size=batch_size,
                                                   validation_mode=True,
                                                   class_mode='binary')

    # Deep learning model.
    dl_model = load_model(dl_state,
                          custom_objects={
                              'sensitivity': DMMetrics.sensitivity,
                              'specificity': DMMetrics.specificity
                          })
    # Dummy compilation to turn off the "uncompiled" error when model was run on multi-GPUs.
    # dl_model.compile(optimizer='sgd', loss='binary_crossentropy')
    reprlayer_model = Model(input=dl_model.input,
                            output=dl_model.get_layer(index=-2).output)
    if gpu_count > 1:
        reprlayer_model = make_parallel(reprlayer_model, gpu_count)

    # Setup test data in RAM.
    X_test, y_test = dlrepr_generator(reprlayer_model, val_generator,
                                      val_size_)
    # import pdb; pdb.set_trace()

    # Evaluat DL model on the test data.
    val_generator.reset()
    dl_test_pred = dl_model.predict_generator(val_generator,
                                              val_samples=val_size_,
                                              nb_worker=1,
                                              pickle_safe=False)
    # Set nb_worker to >1 can cause:
    # either inconsistent result when pickle_safe is False,
    #     or broadcasting error when pickle_safe is True.
    # This seems to be a Keras bug!!
    # Further note: the broadcasting error may only happen when val_size_
    # is not divisible by batch_size.
    try:
        dl_auc = roc_auc_score(y_test, dl_test_pred)
        dl_loss = log_loss(y_test, dl_test_pred)
    except ValueError:
        dl_auc = 0.
        dl_loss = np.inf
    print "\nAUROC by the DL model: %.4f, loss: %.4f" % (dl_auc, dl_loss)
    # import pdb; pdb.set_trace()

    # Elastic net training.
    target_classes = np.array([0, 1])
    sgd_clf = SGDClassifier(loss='log',
                            penalty='elasticnet',
                            alpha=alpha,
                            l1_ratio=l1_ratio,
                            verbose=0,
                            n_jobs=nb_worker,
                            learning_rate='constant',
                            eta0=init_lr,
                            random_state=random_seed,
                            class_weight={
                                0: 1.0,
                                1: pos_cls_weight
                            })
    curr_lr = init_lr
    best_epoch = 0
    best_auc = 0.
    min_loss = np.inf
    min_loss_epoch = 0
    for epoch in xrange(nb_epoch):
        samples_seen = 0
        X_list = []
        y_list = []
        epoch_start = time.time()
        while samples_seen < samples_per_epoch:
            X, y = next(train_generator)
            X_repr = reprlayer_model.predict_on_batch(X)
            sgd_clf.partial_fit(X_repr, y, classes=target_classes)
            samples_seen += len(y)
            X_list.append(X_repr)
            y_list.append(y)
        # The training X, y are expected to change for each epoch due to
        # image random sampling and class balancing.
        X_train_epo = np.concatenate(X_list)
        y_train_epo = np.concatenate(y_list)
        # End of epoch summary.
        pred_prob = sgd_clf.predict_proba(X_test)[:, 1]
        train_prob = sgd_clf.predict_proba(X_train_epo)[:, 1]
        try:
            auc = roc_auc_score(y_test, pred_prob)
            crossentropy_loss = log_loss(y_test, pred_prob)
        except ValueError:
            auc = 0.
            crossentropy_loss = np.inf
        try:
            train_loss = log_loss(y_train_epo, train_prob)
        except ValueError:
            train_loss = np.inf
        wei_sparseness = np.mean(sgd_clf.coef_ == 0)
        epoch_span = time.time() - epoch_start
        print ("%ds - Epoch=%d, auc=%.4f, train_loss=%.4f, test_loss=%.4f, "
               "weight sparsity=%.4f") % \
            (epoch_span, epoch + 1, auc, train_loss, crossentropy_loss,
             wei_sparseness)
        # Model checkpoint, reducing learning rate and early stopping.
        if auc > best_auc:
            best_epoch = epoch + 1
            best_auc = auc
            if best_model != "NOSAVE":
                with open(best_model, 'w') as best_state:
                    pickle.dump(sgd_clf, best_state)
        if crossentropy_loss < min_loss:
            min_loss = crossentropy_loss
            min_loss_epoch = epoch + 1
        else:
            if epoch + 1 - min_loss_epoch >= es_patience:
                print 'Early stopping criterion has reached. Stop training.'
                break
            if epoch + 1 - min_loss_epoch >= lr_patience:
                curr_lr *= .1
                sgd_clf.set_params(eta0=curr_lr)
                print "Reducing learning rate to: %s" % (curr_lr)
    # End of training summary
    print ">>> Found best AUROC: %.4f at epoch: %d, saved to: %s <<<" % \
        (best_auc, best_epoch, best_model)
    print ">>> Found best val loss: %.4f at epoch: %d. <<<" % \
        (min_loss, min_loss_epoch)
    #### Save elastic net model!! ####
    if final_model != "NOSAVE":
        with open(final_model, 'w') as final_state:
            pickle.dump(sgd_clf, final_state)