def load_or_generate_summary(images, term_scores, n_components, scoring, dataset, force=False, sparsityThreshold=0.000005, memory=Memory(cachedir='nilearn_cache')): """ For a given n_components, load summary csvs if they already exist, or run main.py to get and save necessary summary data required for plotting. Returns (wb_summary, R_sparsity, L_sparsity), each of which are DataFrame. """ # Directory to find or save the summary csvs out_dir = op.join('ica_imgs', dataset, 'analyses', str(n_components)) summary_csvs = ["wb_summary.csv", "R_sparsity.csv", "L_sparsity.csv"] # If summary data are already saved as csv files, simply load them if not force and all([op.exists(op.join(out_dir, csv)) for csv in summary_csvs]): print("Loading summary data from %s" % out_dir) (wb_summary, R_sparsity, L_sparsity) = (pd.read_csv(op.join(out_dir, csv)) for csv in summary_csvs) # Otherwise run main.py and save them as csv files else: # Initialize summary DFs (wb_summary, R_sparsity, L_sparsity) = (pd.DataFrame( {"n_comp": [n_components] * n_components}) for i in range(3)) if not op.exists(out_dir): os.makedirs(out_dir) # Use wb matching in main analysis to get component images and # matching scores match_method = 'wb' img_d, score_mats_d, sign_mats_d = do_main_analysis( dataset=dataset, images=images, term_scores=term_scores, key=match_method, force=force, plot=False, n_components=n_components, scoring=scoring) # 1) Get sparsity for each hemisphere for "wb", "R" and "L" imgs hemis = ("R", "L") sparsitySigns = ("pos", "neg", "abs") # Dict of DF and labels used to get and store Sparsity results label_dict = {"wb": (wb_summary, hemis), "R": (R_sparsity, ["R"]), "L": (L_sparsity, ["L"])} for key in label_dict: (df, labels) = label_dict[key] sparsityResults = {label: getHemiSparsity(img_d[key], label, threshold=sparsityThreshold, memory=memory) for label in labels} # {label: (pos_arr, neg_arr, abs_arr)} for i, sign in enumerate(sparsitySigns): for label in labels: df["%s_%s" % (sign, label)] = sparsityResults[label][i] # For wb only, also compute Total sparsity and HPI if key == "wb": df["%sTotal" % sign] = df["%s_R" % sign] + df["%s_L" % sign] df["%sHPI" % sign] = ((df["%s_R" % sign] - df["%s_L" % sign]) / df["%sTotal" % sign].astype(float)) # Save R/L_sparsity DFs R_sparsity.to_csv(op.join(out_dir, "R_sparsity.csv")) L_sparsity.to_csv(op.join(out_dir, "L_sparsity.csv")) # 2) Get SAS of wb component images as well as matched RL images by passing # 2 x wb or RL images and hemi labels to the compare_components (make sure # not to flip when comparing R and L) name_img_pairs = [("wb_SAS", img_d["wb"]), ("matchedRL_SAS", img_d["RL"])] for (name, img) in name_img_pairs: sas_imgs = [img] * 2 score_mat, sign_mat = compare_components(sas_imgs, hemis, scoring, flip=False) # we only care about the diagonal in score_mat wb_summary[name] = score_mat.diagonal() # 3) Finally store indices of matched R, L, and RL components, and the # respective match scores against wb comparisons = [('wb', 'R'), ('wb', 'L'), ('wb', 'RL')] for comparison in comparisons: score_mat, sign_mat = score_mats_d[comparison], sign_mats_d[comparison] matched, unmatched = get_match_idx_pair(score_mat, sign_mat) # Component indices for matched R, L , RL are in matched[1]. # Multiply it by matched[2], which stores sign flipping info. matched_indices = matched[1] * matched[2] wb_summary["matched%s" % comparison[1]] = matched_indices matched_scores = score_mat[matched[0], matched[1]] wb_summary["match%s_score" % comparison[1]] = matched_scores # Save wb_summary wb_summary.to_csv(op.join(out_dir, "wb_summary.csv")) return (wb_summary, R_sparsity, L_sparsity)
def main_ic_loop(components, scoring, dataset, query_server=True, force=False, memory=Memory(cachedir='nilearn_cache'), **kwargs): # $FIX Test with just 'wb' and 'rl' matching until 'lr' matching is fixed # match_methods = ['wb', 'rl', 'lr'] match_methods = ['wb', 'rl'] out_dir = op.join('ica_imgs', dataset) mean_scores, unmatched = [], [] # Get the data once. images, term_scores = get_dataset( dataset, query_server=query_server) for match_method in match_methods: print("Plotting results for %s matching method" % match_method) mean_score_d, num_unmatched_d = {}, {} for c in components: print("Running analysis with %d components" % c) # main analysis is run for each component and match method: # plotting for component comparisons are done only if force=True img_d, score_mats_d, sign_mats_d = do_main_analysis( dataset=dataset, images=images, term_scores=term_scores, key=match_method, force=force, plot=force, n_components=c, scoring=scoring, **kwargs) # Get mean dissimilarity scores and number of unmatched for each comparisons # in score_mats_d for comp in score_mats_d: score_mat, sign_mat = score_mats_d[comp], sign_mats_d[comp] # For ("wb", "RL-forced") and ("wb", "RL-unforced") if "forced" in comp[1]: if "-forced" in comp[1]: match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=True) elif "-unforced" in comp[1]: match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=False) n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0 um_label = "unmatched RL" mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean() score_label = "%s" % (" vs ".join(comp)) # Store values in respective dict if c == components[0]: mean_score_d[score_label] = [mean_score] if "-unforced" in comp[1]: num_unmatched_d[um_label] = [n_unmatched] else: mean_score_d[score_label].append(mean_score) if "-unforced" in comp[1]: num_unmatched_d[um_label].append(n_unmatched) # For ("wb", "R"), ("wb", "L") --wb matching or ("R", "L") --rl matching else: for force_match in [True, False]: match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match) mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean() if force_match: score_label = "%s%s" % (" vs ".join(comp), "-forced") n_unmatched = None else: score_label = "%s%s" % (" vs ".join(comp), "-unforced") n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0 um_label = "unmatched %s" % comp[1] # Store values in respective dict if c == components[0]: mean_score_d[score_label] = [mean_score] if not force_match: num_unmatched_d[um_label] = [n_unmatched] else: mean_score_d[score_label].append(mean_score) if not force_match: num_unmatched_d[um_label].append(n_unmatched) # Store vals as df ms_df = pd.DataFrame(mean_score_d, index=components) um_df = pd.DataFrame(num_unmatched_d, index=components) mean_scores.append(ms_df) unmatched.append(um_df) # Save combined df combined = pd.concat([ms_df, um_df], axis=1) out = op.join(out_dir, '%s-matching_simscores.csv' % match_method) combined.to_csv(out) # We have all the scores for the matching method; now plot. fh, axes = plt.subplots(1, len(match_methods), sharex=True, sharey=True, figsize=(18, 6)) fh.suptitle("Average dissimilarity scores for the best-match pairs", fontsize=16) labels = ["wb vs R-unforced", "wb vs L-unforced", "R vs L-unforced", "wb vs RL-unforced", "wb vs R-forced", "wb vs L-forced", "R vs L-forced", "wb vs RL-forced", "unmatched R", "unmatched L", "unmatched RL"] styles = ["r-", "b-", "m-", "g-", "r:", "b:", "m:", "g:", "r--", "b--", "m--"] for i, ax in enumerate(axes): ax2 = ax.twinx() ms_df, um_df = mean_scores[i], unmatched[i] for label, style in zip(labels, styles): if label in ms_df.columns: ms_df[label].plot(ax=ax, style=style) elif label in um_df.columns: um_df[label].plot(ax=ax2, style=style) ax.set_title("%s-matching" % (match_methods[i])) # Shrink current axis by 30% box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.75, box.height]) ax2.set_position([box.x0, box.y0, box.width * 0.75, box.height]) # Put the legends to the right of the current axis ax.legend(loc='lower left', bbox_to_anchor=(1.3, 0.5)) ax2.legend(loc='upper left', bbox_to_anchor=(1.3, 0.5)) fh.text(0.5, 0.04, "# of components", ha="center") fh.text(0.05, 0.5, "mean %s scores" % scoring, va='center', rotation='vertical') fh.text(0.95, 0.5, "# of unmatched R- or L- components", va='center', rotation=-90) out_path = op.join(out_dir, '%s_simscores.png' % scoring) save_and_close(out_path, fh=fh)