def test_mpca_trainer(classifier, params, gait): x = gait["fea3D"].transpose((3, 0, 1, 2)) x = x[:20, :] y = gait["gnd"][:20].reshape(-1) trainer = MPCATrainer(classifier=classifier, **params) trainer.fit(x, y) y_pred = trainer.predict(x) testing.assert_equal(np.unique(y), np.unique(y_pred)) assert accuracy_score(y, y_pred) >= 0.8 if classifier == "linear_svc": with pytest.raises(Exception): y_proba = trainer.predict_proba(x) else: y_proba = trainer.predict_proba(x) assert np.max(y_proba) <= 1.0 assert np.min(y_proba) >= 0.0 y_ = np.zeros(y.shape) y_[np.where(y == 1)] = 1 assert roc_auc_score(y_, y_proba[:, 0]) >= 0.8 y_dec_score = trainer.decision_function(x) assert roc_auc_score(y, y_dec_score) >= 0.8 if classifier == "svc" and trainer.clf.kernel == "rbf": with pytest.raises(Exception): trainer.mpca.inverse_transform(trainer.clf.coef_) else: weights = trainer.mpca.inverse_transform( trainer.clf.coef_) - trainer.mpca.mean_ top_weights = model_weights.select_top_weight(weights, select_ratio=0.1) fig = visualize.plot_weights(top_weights[0][0], background_img=x[0][0]) assert type(fig) == matplotlib.figure.Figure
def main(): args = arg_parse() # ---- setup configs ---- cfg = get_cfg_defaults() cfg.merge_from_file(args.cfg) cfg.freeze() print(cfg) save_images = cfg.OUTPUT.SAVE_IMAGES print(f"Save Images: {save_images}") # ---- initialize folder to store images ---- save_images_location = cfg.OUTPUT.ROOT print(f"Save Images: {save_images_location}") if not os.path.exists(save_images_location): os.makedirs(save_images_location) # ---- setup dataset ---- base_dir = cfg.DATASET.BASE_DIR file_format = cfg.DATASET.FILE_FORAMT download_file_by_url(cfg.DATASET.SOURCE, cfg.DATASET.ROOT, "%s.%s" % (base_dir, file_format), file_format) img_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.IMG_DIR) images = read_dicom_images(img_path, sort_instance=True, sort_patient=True) mask_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.MASK_DIR) mask = read_dicom_images(mask_path, sort_instance=True) landmark_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.LANDMARK_FILE) landmark_df = pd.read_csv( landmark_path, index_col="Subject") # read .csv file as dataframe landmarks = landmark_df.iloc[:, :6].values y = landmark_df["Group"].values y[np.where( y != 0 )] = 1 # convert to binary classification problem, i.e. no PH vs PAH # plot the first phase of images if save_images: visualize.plot_multi_images( images[:, 0, ...], marker_locs=landmarks, im_kwargs=dict(cfg.IM_KWARGS), marker_kwargs=dict(cfg.MARKER_KWARGS), ).savefig(str(save_images_location) + "/0)first_phase.png") # ---- data pre-processing ---- # ----- image registration ----- img_reg, max_dist = reg_img_stack(images.copy(), landmarks) if save_images: visualize.plot_multi_images(img_reg[:, 0, ...], im_kwargs=dict(cfg.IM_KWARGS)).savefig( str(save_images_location) + "/1)image_registration") # ----- masking ----- img_masked = mask_img_stack(img_reg.copy(), mask[0, 0, ...]) if save_images: visualize.plot_multi_images(img_masked[:, 0, ...], im_kwargs=dict(cfg.IM_KWARGS)).savefig( str(save_images_location) + "/2)masking") # ----- resize ----- img_rescaled = rescale_img_stack(img_masked.copy(), scale=1 / cfg.PROC.SCALE) if save_images: visualize.plot_multi_images(img_rescaled[:, 0, ...], im_kwargs=dict(cfg.IM_KWARGS)).savefig( str(save_images_location) + "/3)resize") # ----- normalization ----- img_norm = normalize_img_stack(img_rescaled.copy()) if save_images: visualize.plot_multi_images(img_norm[:, 0, ...], im_kwargs=dict(cfg.IM_KWARGS)).savefig( str(save_images_location) + "/4)normalize") # ---- evaluating machine learning pipeline ---- x = img_norm.copy() trainer = MPCATrainer(classifier=cfg.PIPELINE.CLASSIFIER, n_features=200) cv_results = cross_validate(trainer, x, y, cv=10, scoring=["accuracy", "roc_auc"], n_jobs=1) print("Averaged training time: {:.4f} seconds".format( np.mean(cv_results["fit_time"]))) print("Averaged testing time: {:.4f} seconds".format( np.mean(cv_results["score_time"]))) print("Averaged Accuracy: {:.4f}".format( np.mean(cv_results["test_accuracy"]))) print("Averaged AUC: {:.4f}".format(np.mean(cv_results["test_roc_auc"]))) # ---- model weights interpretation ---- trainer.fit(x, y) weights = trainer.mpca.inverse_transform( trainer.clf.coef_) - trainer.mpca.mean_ weights = rescale_img_stack( weights, cfg.PROC.SCALE) # rescale weights to original shape weights = mask_img_stack(weights, mask[0, 0, ...]) # masking weights top_weights = model_weights.select_top_weight( weights, select_ratio=0.02) # select top 2% weights if save_images: visualize.plot_weights( top_weights[0][0], background_img=images[0][0], im_kwargs=dict(cfg.IM_KWARGS), marker_kwargs=dict(cfg.WEIGHT_KWARGS), ).savefig(str(save_images_location) + "/5)weights")
def main(): args = arg_parse() # ---- setup configs ---- cfg = get_cfg_defaults() cfg.merge_from_file(args.cfg) cfg.freeze() print(cfg) save_figs = cfg.OUTPUT.SAVE_FIG fig_format = cfg.SAVE_FIG_KWARGS.format print(f"Save Figures: {save_figs}") # ---- initialize folder to store images ---- save_figures_location = cfg.OUTPUT.ROOT print(f"Save Figures: {save_figures_location}") if not os.path.exists(save_figures_location): os.makedirs(save_figures_location) # ---- setup dataset ---- base_dir = cfg.DATASET.BASE_DIR file_format = cfg.DATASET.FILE_FORAMT download_file_by_url(cfg.DATASET.SOURCE, cfg.DATASET.ROOT, "%s.%s" % (base_dir, file_format), file_format) img_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.IMG_DIR) patient_dcm_list = read_dicom_dir(img_path, sort_instance=True, sort_patient=True) images, patient_ids = dicom2arraylist(patient_dcm_list, return_patient_id=True) patient_ids = np.array(patient_ids, dtype=int) n_samples = len(images) mask_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.MASK_DIR) mask_dcm = read_dicom_dir(mask_path, sort_instance=True) mask = dicom2arraylist(mask_dcm, return_patient_id=False)[0][0, ...] landmark_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.LANDMARK_FILE) landmark_df = pd.read_csv(landmark_path, index_col="Subject").loc[patient_ids] # read .csv file as dataframe landmarks = landmark_df.iloc[:, :-1].values y = landmark_df["Group"].values y[np.where(y != 0)] = 1 # convert to binary classification problem, i.e. no PH vs PAH # plot the first phase of images with landmarks marker_names = list(landmark_df.columns[1::2]) markers = [] for marker in marker_names: marker_name = marker.split(" ") marker_name.pop(-1) marker_name = " ".join(marker_name) markers.append(marker_name) if save_figs: n_img_per_fig = 45 n_figures = int(n_samples / n_img_per_fig) + 1 for k in range(n_figures): visualize.plot_multi_images( [images[i][0, ...] for i in range(k * n_img_per_fig, min((k + 1) * n_img_per_fig, n_samples))], marker_locs=landmarks[k * n_img_per_fig : min((k + 1) * n_img_per_fig, n_samples), :], im_kwargs=dict(cfg.PLT_KWS.IM), marker_cmap="Set1", marker_kwargs=dict(cfg.PLT_KWS.MARKER), marker_titles=markers, image_titles=list(patient_ids[k * n_img_per_fig : min((k + 1) * n_img_per_fig, n_samples)]), n_cols=5, ).savefig( str(save_figures_location) + "/0)landmark_visualization_%s_of_%s.%s" % (k + 1, n_figures, fig_format), **dict(cfg.SAVE_FIG_KWARGS), ) # ---- data pre-processing ---- # ----- image registration ----- img_reg, max_dist = reg_img_stack(images.copy(), landmarks, landmarks[0]) plt_kawargs = {**{"im_kwargs": dict(cfg.PLT_KWS.IM), "image_titles": list(patient_ids)}, **dict(cfg.PLT_KWS.PLT)} if save_figs: visualize.plot_multi_images([img_reg[i][0, ...] for i in range(n_samples)], **plt_kawargs).savefig( str(save_figures_location) + "/1)image_registration.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS) ) # ----- masking ----- img_masked = mask_img_stack(img_reg.copy(), mask) if save_figs: visualize.plot_multi_images([img_masked[i][0, ...] for i in range(n_samples)], **plt_kawargs).savefig( str(save_figures_location) + "/2)masking.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS) ) # ----- resize ----- img_rescaled = rescale_img_stack(img_masked.copy(), scale=1 / cfg.PROC.SCALE) if save_figs: visualize.plot_multi_images([img_rescaled[i][0, ...] for i in range(n_samples)], **plt_kawargs).savefig( str(save_figures_location) + "/3)resize.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS) ) # ----- normalization ----- img_norm = normalize_img_stack(img_rescaled.copy()) if save_figs: visualize.plot_multi_images([img_norm[i][0, ...] for i in range(n_samples)], **plt_kawargs).savefig( str(save_figures_location) + "/4)normalize.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS) ) # ---- evaluating machine learning pipeline ---- x = np.concatenate([img_norm[i].reshape((1,) + img_norm[i].shape) for i in range(n_samples)], axis=0) trainer = MPCATrainer(classifier=cfg.PIPELINE.CLASSIFIER, n_features=200) cv_results = cross_validate(trainer, x, y, cv=10, scoring=["accuracy", "roc_auc"], n_jobs=1) print("Averaged training time: {:.4f} seconds".format(np.mean(cv_results["fit_time"]))) print("Averaged testing time: {:.4f} seconds".format(np.mean(cv_results["score_time"]))) print("Averaged Accuracy: {:.4f}".format(np.mean(cv_results["test_accuracy"]))) print("Averaged AUC: {:.4f}".format(np.mean(cv_results["test_roc_auc"]))) # ---- model weights interpretation ---- trainer.fit(x, y) weights = trainer.mpca.inverse_transform(trainer.clf.coef_) - trainer.mpca.mean_ weights = rescale_img_stack(weights, cfg.PROC.SCALE) # rescale weights to original shape weights = mask_img_stack(weights, mask) # masking weights top_weights = model_weights.select_top_weight(weights, select_ratio=0.02) # select top 2% weights if save_figs: visualize.plot_weights( top_weights[0][0], background_img=images[0][0], im_kwargs=dict(cfg.PLT_KWS.IM), marker_kwargs=dict(cfg.PLT_KWS.WEIGHT), ).savefig(str(save_figures_location) + "/5)weights.%s" % fig_format, **dict(cfg.SAVE_FIG_KWARGS))