Example #1
0
def test_mpca_trainer(classifier, params, gait):
    x = gait["fea3D"].transpose((3, 0, 1, 2))
    x = x[:20, :]
    y = gait["gnd"][:20].reshape(-1)
    trainer = MPCATrainer(classifier=classifier, **params)
    trainer.fit(x, y)
    y_pred = trainer.predict(x)
    testing.assert_equal(np.unique(y), np.unique(y_pred))
    assert accuracy_score(y, y_pred) >= 0.8

    if classifier == "linear_svc":
        with pytest.raises(Exception):
            y_proba = trainer.predict_proba(x)
    else:
        y_proba = trainer.predict_proba(x)
        assert np.max(y_proba) <= 1.0
        assert np.min(y_proba) >= 0.0
        y_ = np.zeros(y.shape)
        y_[np.where(y == 1)] = 1
        assert roc_auc_score(y_, y_proba[:, 0]) >= 0.8

    y_dec_score = trainer.decision_function(x)
    assert roc_auc_score(y, y_dec_score) >= 0.8

    if classifier == "svc" and trainer.clf.kernel == "rbf":
        with pytest.raises(Exception):
            trainer.mpca.inverse_transform(trainer.clf.coef_)
    else:
        weights = trainer.mpca.inverse_transform(
            trainer.clf.coef_) - trainer.mpca.mean_
        top_weights = model_weights.select_top_weight(weights,
                                                      select_ratio=0.1)
        fig = visualize.plot_weights(top_weights[0][0], background_img=x[0][0])
        assert type(fig) == matplotlib.figure.Figure
Example #2
0
def main():
    args = arg_parse()

    # ---- setup configs ----
    cfg = get_cfg_defaults()
    cfg.merge_from_file(args.cfg)
    cfg.freeze()
    print(cfg)

    save_images = cfg.OUTPUT.SAVE_IMAGES
    print(f"Save Images: {save_images}")

    # ---- initialize folder to store images ----
    save_images_location = cfg.OUTPUT.ROOT
    print(f"Save Images: {save_images_location}")

    if not os.path.exists(save_images_location):
        os.makedirs(save_images_location)

    # ---- setup dataset ----
    base_dir = cfg.DATASET.BASE_DIR
    file_format = cfg.DATASET.FILE_FORAMT
    download_file_by_url(cfg.DATASET.SOURCE, cfg.DATASET.ROOT,
                         "%s.%s" % (base_dir, file_format), file_format)

    img_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.IMG_DIR)
    images = read_dicom_images(img_path, sort_instance=True, sort_patient=True)

    mask_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.MASK_DIR)
    mask = read_dicom_images(mask_path, sort_instance=True)

    landmark_path = os.path.join(cfg.DATASET.ROOT, base_dir,
                                 cfg.DATASET.LANDMARK_FILE)
    landmark_df = pd.read_csv(
        landmark_path, index_col="Subject")  # read .csv file as dataframe
    landmarks = landmark_df.iloc[:, :6].values
    y = landmark_df["Group"].values
    y[np.where(
        y != 0
    )] = 1  # convert to binary classification problem, i.e. no PH vs PAH

    # plot the first phase of images
    if save_images:
        visualize.plot_multi_images(
            images[:, 0, ...],
            marker_locs=landmarks,
            im_kwargs=dict(cfg.IM_KWARGS),
            marker_kwargs=dict(cfg.MARKER_KWARGS),
        ).savefig(str(save_images_location) + "/0)first_phase.png")

    # ---- data pre-processing ----
    # ----- image registration -----
    img_reg, max_dist = reg_img_stack(images.copy(), landmarks)
    if save_images:
        visualize.plot_multi_images(img_reg[:, 0, ...],
                                    im_kwargs=dict(cfg.IM_KWARGS)).savefig(
                                        str(save_images_location) +
                                        "/1)image_registration")

    # ----- masking -----
    img_masked = mask_img_stack(img_reg.copy(), mask[0, 0, ...])
    if save_images:
        visualize.plot_multi_images(img_masked[:, 0, ...],
                                    im_kwargs=dict(cfg.IM_KWARGS)).savefig(
                                        str(save_images_location) +
                                        "/2)masking")

    # ----- resize -----
    img_rescaled = rescale_img_stack(img_masked.copy(),
                                     scale=1 / cfg.PROC.SCALE)
    if save_images:
        visualize.plot_multi_images(img_rescaled[:, 0, ...],
                                    im_kwargs=dict(cfg.IM_KWARGS)).savefig(
                                        str(save_images_location) +
                                        "/3)resize")

    # ----- normalization -----
    img_norm = normalize_img_stack(img_rescaled.copy())
    if save_images:
        visualize.plot_multi_images(img_norm[:, 0, ...],
                                    im_kwargs=dict(cfg.IM_KWARGS)).savefig(
                                        str(save_images_location) +
                                        "/4)normalize")

    # ---- evaluating machine learning pipeline ----
    x = img_norm.copy()
    trainer = MPCATrainer(classifier=cfg.PIPELINE.CLASSIFIER, n_features=200)
    cv_results = cross_validate(trainer,
                                x,
                                y,
                                cv=10,
                                scoring=["accuracy", "roc_auc"],
                                n_jobs=1)

    print("Averaged training time: {:.4f} seconds".format(
        np.mean(cv_results["fit_time"])))
    print("Averaged testing time: {:.4f} seconds".format(
        np.mean(cv_results["score_time"])))
    print("Averaged Accuracy: {:.4f}".format(
        np.mean(cv_results["test_accuracy"])))
    print("Averaged AUC: {:.4f}".format(np.mean(cv_results["test_roc_auc"])))

    # ---- model weights interpretation ----
    trainer.fit(x, y)

    weights = trainer.mpca.inverse_transform(
        trainer.clf.coef_) - trainer.mpca.mean_
    weights = rescale_img_stack(
        weights, cfg.PROC.SCALE)  # rescale weights to original shape
    weights = mask_img_stack(weights, mask[0, 0, ...])  # masking weights
    top_weights = model_weights.select_top_weight(
        weights, select_ratio=0.02)  # select top 2% weights
    if save_images:
        visualize.plot_weights(
            top_weights[0][0],
            background_img=images[0][0],
            im_kwargs=dict(cfg.IM_KWARGS),
            marker_kwargs=dict(cfg.WEIGHT_KWARGS),
        ).savefig(str(save_images_location) + "/5)weights")
Example #3
0
def main():
    args = arg_parse()

    # ---- setup configs ----
    cfg = get_cfg_defaults()
    cfg.merge_from_file(args.cfg)
    cfg.freeze()
    print(cfg)

    save_figs = cfg.OUTPUT.SAVE_FIG
    fig_format = cfg.SAVE_FIG_KWARGS.format
    print(f"Save Figures: {save_figs}")

    # ---- initialize folder to store images ----
    save_figures_location = cfg.OUTPUT.ROOT
    print(f"Save Figures: {save_figures_location}")

    if not os.path.exists(save_figures_location):
        os.makedirs(save_figures_location)

    # ---- setup dataset ----
    base_dir = cfg.DATASET.BASE_DIR
    file_format = cfg.DATASET.FILE_FORAMT
    download_file_by_url(cfg.DATASET.SOURCE, cfg.DATASET.ROOT, "%s.%s" % (base_dir, file_format), file_format)

    img_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.IMG_DIR)
    patient_dcm_list = read_dicom_dir(img_path, sort_instance=True, sort_patient=True)
    images, patient_ids = dicom2arraylist(patient_dcm_list, return_patient_id=True)
    patient_ids = np.array(patient_ids, dtype=int)
    n_samples = len(images)

    mask_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.MASK_DIR)
    mask_dcm = read_dicom_dir(mask_path, sort_instance=True)
    mask = dicom2arraylist(mask_dcm, return_patient_id=False)[0][0, ...]

    landmark_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.LANDMARK_FILE)
    landmark_df = pd.read_csv(landmark_path, index_col="Subject").loc[patient_ids]  # read .csv file as dataframe
    landmarks = landmark_df.iloc[:, :-1].values
    y = landmark_df["Group"].values
    y[np.where(y != 0)] = 1  # convert to binary classification problem, i.e. no PH vs PAH

    # plot the first phase of images with landmarks
    marker_names = list(landmark_df.columns[1::2])
    markers = []
    for marker in marker_names:
        marker_name = marker.split(" ")
        marker_name.pop(-1)
        marker_name = " ".join(marker_name)
        markers.append(marker_name)

    if save_figs:
        n_img_per_fig = 45
        n_figures = int(n_samples / n_img_per_fig) + 1
        for k in range(n_figures):
            visualize.plot_multi_images(
                [images[i][0, ...] for i in range(k * n_img_per_fig, min((k + 1) * n_img_per_fig, n_samples))],
                marker_locs=landmarks[k * n_img_per_fig : min((k + 1) * n_img_per_fig, n_samples), :],
                im_kwargs=dict(cfg.PLT_KWS.IM),
                marker_cmap="Set1",
                marker_kwargs=dict(cfg.PLT_KWS.MARKER),
                marker_titles=markers,
                image_titles=list(patient_ids[k * n_img_per_fig : min((k + 1) * n_img_per_fig, n_samples)]),
                n_cols=5,
            ).savefig(
                str(save_figures_location) + "/0)landmark_visualization_%s_of_%s.%s" % (k + 1, n_figures, fig_format),
                **dict(cfg.SAVE_FIG_KWARGS),
            )

    # ---- data pre-processing ----
    # ----- image registration -----
    img_reg, max_dist = reg_img_stack(images.copy(), landmarks, landmarks[0])
    plt_kawargs = {**{"im_kwargs": dict(cfg.PLT_KWS.IM), "image_titles": list(patient_ids)}, **dict(cfg.PLT_KWS.PLT)}
    if save_figs:
        visualize.plot_multi_images([img_reg[i][0, ...] for i in range(n_samples)], **plt_kawargs).savefig(
            str(save_figures_location) + "/1)image_registration.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS)
        )

    # ----- masking -----
    img_masked = mask_img_stack(img_reg.copy(), mask)
    if save_figs:
        visualize.plot_multi_images([img_masked[i][0, ...] for i in range(n_samples)], **plt_kawargs).savefig(
            str(save_figures_location) + "/2)masking.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS)
        )

    # ----- resize -----
    img_rescaled = rescale_img_stack(img_masked.copy(), scale=1 / cfg.PROC.SCALE)
    if save_figs:
        visualize.plot_multi_images([img_rescaled[i][0, ...] for i in range(n_samples)], **plt_kawargs).savefig(
            str(save_figures_location) + "/3)resize.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS)
        )

    # ----- normalization -----
    img_norm = normalize_img_stack(img_rescaled.copy())
    if save_figs:
        visualize.plot_multi_images([img_norm[i][0, ...] for i in range(n_samples)], **plt_kawargs).savefig(
            str(save_figures_location) + "/4)normalize.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS)
        )

    # ---- evaluating machine learning pipeline ----
    x = np.concatenate([img_norm[i].reshape((1,) + img_norm[i].shape) for i in range(n_samples)], axis=0)
    trainer = MPCATrainer(classifier=cfg.PIPELINE.CLASSIFIER, n_features=200)
    cv_results = cross_validate(trainer, x, y, cv=10, scoring=["accuracy", "roc_auc"], n_jobs=1)

    print("Averaged training time: {:.4f} seconds".format(np.mean(cv_results["fit_time"])))
    print("Averaged testing time: {:.4f} seconds".format(np.mean(cv_results["score_time"])))
    print("Averaged Accuracy: {:.4f}".format(np.mean(cv_results["test_accuracy"])))
    print("Averaged AUC: {:.4f}".format(np.mean(cv_results["test_roc_auc"])))

    # ---- model weights interpretation ----
    trainer.fit(x, y)

    weights = trainer.mpca.inverse_transform(trainer.clf.coef_) - trainer.mpca.mean_
    weights = rescale_img_stack(weights, cfg.PROC.SCALE)  # rescale weights to original shape
    weights = mask_img_stack(weights, mask)  # masking weights
    top_weights = model_weights.select_top_weight(weights, select_ratio=0.02)  # select top 2% weights
    if save_figs:
        visualize.plot_weights(
            top_weights[0][0],
            background_img=images[0][0],
            im_kwargs=dict(cfg.PLT_KWS.IM),
            marker_kwargs=dict(cfg.PLT_KWS.WEIGHT),
        ).savefig(str(save_figures_location) + "/5)weights.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS))