def plot_component_comparisons(images, labels, score_mat, sign_mat,
                               force=False, out_dir=None):
    """
    Uses the score_mat to match up two images. If force, one-to-one matching
    is forced.
    Sign_mat is used to flip signs when comparing two images.
    """
    # Be careful
    assert len(images) == 2
    assert len(labels) == 2
    assert images[0].shape == images[1].shape
    n_components = images[0].shape[3]  # values @ 0 and 1 are the same
    assert score_mat.shape == sign_mat.shape
    assert len(score_mat[0]) == n_components

    # Get indices for matching components
    match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force)
    idx_pair = match["idx"]
    sign_pair = match["sign"]

    if not force and unmatch["idx"] is not None:
        idx_pair = np.hstack((idx_pair, unmatch["idx"]))
        sign_pair = np.hstack((sign_pair, unmatch["sign"]))

    n_comp = len(idx_pair[0])   # number of comparisons

    # Calculate a vmax optimal across all the plots
    # get nonzero part of the image for proper thresholding of
    # r- or l- only component
    nonzero_imgs = [img.get_data()[np.nonzero(img.get_data())]
                    for img in images]
    dat = np.append(nonzero_imgs[0], nonzero_imgs[1])
    vmax = stats.scoreatpercentile(np.abs(dat), 99.99)

    print("Plotting results.")
    for i in range(n_comp):
        c1i, c2i = idx_pair[0][i], idx_pair[1][i]
        cis = [c1i, c2i]

        prefix = "unmatched-" if i >= n_components else ""
        num = i-n_components if i >= n_components else i
        png_name = '%s%s_%s_%s.png' % (prefix, labels[0], labels[1], num)
        print "plotting %s" % png_name

        comp_imgs = [index_img(img, ci) for img, ci in zip(images, cis)]

        # flip the sign if sign_mat for the corresponding comparison is -1
        signs = [sign_pair[0][i], sign_pair[1][i]]
        comp_imgs = [math_img("%d*img" % (sign), img=img)
                     for sign, img in zip(signs, comp_imgs)]

        if ('R' in labels and 'L' in labels):
            # Combine left and right image, show just one.
            # terms are not combined here
            comp = math_img("img1+img2", img1=comp_imgs[0], img2=comp_imgs[1])
            titles = [_title_from_terms(
                terms=comp_imgs[labels.index(hemi)].terms,
                ic_idx=cis[labels.index(hemi)], label=hemi,
                sign=signs[labels.index(hemi)]) for hemi in labels]
            fh = plt.figure(figsize=(14, 8))
            plot_stat_map(
                comp, axes=fh.gca(), title="\n".join(titles), black_bg=True,
                symmetric_cbar=True, vmax=vmax)

        else:
            # Show two images, one above the other.
            fh = plt.figure(figsize=(14, 12))

            for ii in [0, 1]:  # Subplot per image
                ax = fh.add_subplot(2, 1, ii + 1)
                comp = comp_imgs[ii]

                title = _title_from_terms(
                    terms=images[ii].terms, ic_idx=cis[ii],
                    label=labels[ii], sign=signs[ii])

                if ii == 0:
                    display = plot_stat_map(comp, axes=ax, title=title,    # noqa
                                            black_bg=True, symmetric_cbar=True,
                                            vmax=vmax)
                else:
                    # use same cut coords
                    cut_coords = display.cut_coords  # noqa
                    display = plot_stat_map(comp, axes=ax, title=title,
                                            black_bg=True, symmetric_cbar=True,
                                            vmax=vmax, display_mode='ortho',
                                            cut_coords=cut_coords)

        # Save images instead of displaying
        if out_dir is not None:
            save_and_close(out_path=op.join(out_dir, png_name), fh=fh)
def load_or_generate_summary(images, term_scores, n_components, scoring, dataset,
                             force=False, sparsityThreshold=0.000005,
                             memory=Memory(cachedir='nilearn_cache')):
    """
    For a given n_components, load summary csvs if they already exist, or
    run main.py to get and save necessary summary data required for plotting.

    Returns (wb_summary, R_sparsity, L_sparsity), each of which are DataFrame.
    """
    # Directory to find or save the summary csvs
    out_dir = op.join('ica_imgs', dataset, 'analyses', str(n_components))
    summary_csvs = ["wb_summary.csv", "R_sparsity.csv", "L_sparsity.csv"]

    # If summary data are already saved as csv files, simply load them
    if not force and all([op.exists(op.join(out_dir, csv)) for csv in summary_csvs]):
        print("Loading summary data from %s" % out_dir)
        (wb_summary, R_sparsity, L_sparsity) = (pd.read_csv(op.join(out_dir, csv))
                                                for csv in summary_csvs)

    # Otherwise run main.py and save them as csv files
    else:
        # Initialize summary DFs
        (wb_summary, R_sparsity, L_sparsity) = (pd.DataFrame(
            {"n_comp": [n_components] * n_components}) for i in range(3))
        if not op.exists(out_dir):
            os.makedirs(out_dir)

        # Use wb matching in main analysis to get component images and
        # matching scores
        match_method = 'wb'
        img_d, score_mats_d, sign_mats_d = do_main_analysis(
            dataset=dataset, images=images, term_scores=term_scores,
            key=match_method, force=force, plot=False,
            n_components=n_components, scoring=scoring)

        # 1) Get sparsity for each hemisphere for "wb", "R" and "L" imgs
        hemis = ("R", "L")
        sparsitySigns = ("pos", "neg", "abs")
        # Dict of DF and labels used to get and store Sparsity results
        label_dict = {"wb": (wb_summary, hemis),
                      "R": (R_sparsity, ["R"]),
                      "L": (L_sparsity, ["L"])}
        for key in label_dict:
            (df, labels) = label_dict[key]
            sparsityResults = {label: getHemiSparsity(img_d[key], label,
                               threshold=sparsityThreshold, memory=memory)
                               for label in labels}  # {label: (pos_arr, neg_arr, abs_arr)}

            for i, sign in enumerate(sparsitySigns):
                for label in labels:
                    df["%s_%s" % (sign, label)] = sparsityResults[label][i]
                # For wb only, also compute Total sparsity and HPI
                if key == "wb":
                    df["%sTotal" % sign] = df["%s_R" % sign] + df["%s_L" % sign]
                    df["%sHPI" % sign] = ((df["%s_R" % sign] - df["%s_L" % sign]) /
                                          df["%sTotal" % sign].astype(float))

        # Save R/L_sparsity DFs
        R_sparsity.to_csv(op.join(out_dir, "R_sparsity.csv"))
        L_sparsity.to_csv(op.join(out_dir, "L_sparsity.csv"))

        # 2) Get SAS of wb component images as well as matched RL images by passing
        # 2 x wb or RL images and hemi labels to the compare_components (make sure
        # not to flip when comparing R and L)
        name_img_pairs = [("wb_SAS", img_d["wb"]),
                          ("matchedRL_SAS", img_d["RL"])]
        for (name, img) in name_img_pairs:
            sas_imgs = [img] * 2
            score_mat, sign_mat = compare_components(sas_imgs, hemis, scoring,
                                                     flip=False)
            # we only care about the diagonal in score_mat
            wb_summary[name] = score_mat.diagonal()

        # 3) Finally store indices of matched R, L, and RL components, and the
        # respective match scores against wb
        comparisons = [('wb', 'R'), ('wb', 'L'), ('wb', 'RL')]
        for comparison in comparisons:
            score_mat, sign_mat = score_mats_d[comparison], sign_mats_d[comparison]
            matched, unmatched = get_match_idx_pair(score_mat, sign_mat)
            # Component indices for matched R, L , RL are in matched[1].
            # Multiply it by matched[2], which stores sign flipping info.
            matched_indices = matched[1] * matched[2]
            wb_summary["matched%s" % comparison[1]] = matched_indices

            matched_scores = score_mat[matched[0], matched[1]]
            wb_summary["match%s_score" % comparison[1]] = matched_scores

            # Save wb_summary
            wb_summary.to_csv(op.join(out_dir, "wb_summary.csv"))

    return (wb_summary, R_sparsity, L_sparsity)
def do_main_analysis(dataset, images, term_scores, key="wb", n_components=20,
                     plot=True, max_images=np.inf, scoring='l1norm',
                     query_server=True, force=False, nii_dir=None,
                     plot_dir=None, random_state=42, hemis=('wb', 'R', 'L')):

    # Output directories
    nii_dir = nii_dir or op.join('ica_nii', dataset, str(n_components))
    plot_dir = plot_dir or op.join('ica_imgs', dataset,
                                   '%s-%dics' % (scoring, n_components),
                                   '%s-matching' % key)

    # 1) Components are generated for R-, L-only, and whole brain images.

    imgs = {}

    # Load or generate components
    kwargs = dict(images=[im['absolute_path'] for im in images],
                  n_components=n_components, term_scores=term_scores,
                  out_dir=nii_dir, plot_dir=plot_dir)
    for hemi in hemis:
        print("Running analyses on %s" % hemi)
        imgs[hemi] = (load_or_generate_components(hemi=hemi, force=force,
                                                  random_state=random_state, **kwargs))

    # 2) Compare components in order to get concatenated RL image
    #    "wb": R- and L- is compared to wb-components, then matched
    #    "rl": direct R- and L- comparison, using R as a ref
    #    "lr": direct R- and L- comparison, using L as a ref
    if key == "wb":
        comparisons = [('wb', 'R'), ('wb', 'L')]
    elif key == "rl":
        comparisons = [('R', 'L')]
    elif key == "lr":
        comparisons = [('L', 'R')]

    score_mats, sign_mats = {}, {}
    RL_arr = {}

    for comp in comparisons:

        img_pair = [imgs[comp[0]], imgs[comp[1]]]

        # Compare components and plot similarity matrix
        # The sign_mat contains signs that gave the best score for the comparison
        score_mat, sign_mat = compare_components_and_plot(images=img_pair, labels=comp,
                                                          scoring=scoring, out_dir=plot_dir)

        # Store score_mat and sign_mat
        score_mats[comp] = score_mat
        sign_mats[comp] = sign_mat

        # Get indices for matching up components for both forced and unforced one-to-one matching
        for force_match in [True, False]:
            force_status = 'forced' if force_match else 'unforced'
            plot_sub_dir = op.join(plot_dir, '%s-match' % force_status)
            match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match)

            # Store R and L indices/signs to match up R and L
            for i, hem in enumerate(comp):
                if hem in ['R', 'L']:
                    RL_arr[(force_status, hem, "idx")] = match["idx"][i]
                    RL_arr[(force_status, hem, "sign")] = match["sign"][i]

            # If plot=True, plot matched (and unmatched, if unforced matching) components
            if plot:
                plot_component_comparisons(images=img_pair, labels=comp,
                                           score_mat=score_mat, sign_mat=sign_mat,
                                           force=force_match, out_dir=plot_sub_dir)

    # 3) Now match up R and L (forced vs unforced match)
    for force_match in [True, False]:
        force_status = 'forced' if force_match else 'unforced'
        plot_sub_dir = op.join(plot_dir, '%s-match' % force_status)

        rl_idx_pair = (RL_arr[(force_status, "R", "idx")], RL_arr[(force_status, "L", "idx")])
        rl_sign_pair = (RL_arr[(force_status, "R", "sign")], RL_arr[(force_status, "L", "sign")])
        imgs['RL-%s' % force_status] = concat_RL(R_img=imgs['R'], L_img=imgs['L'],
                                                 rl_idx_pair=rl_idx_pair,
                                                 rl_sign_pair=rl_sign_pair)

        # 4) Compare the concatenated image to bilateral components (ie wb vs RL)
        # Note that for wb-matching, diagnal components will be matched by definition
        comp = ('wb', 'RL-%s' % force_status)
        img_pair = [imgs[comp[0]], imgs[comp[1]]]
        score_mat, sign_mat = compare_components_and_plot(images=img_pair, labels=comp,
                                                          scoring=scoring, out_dir=plot_sub_dir)

        # Store score_mat and sign_mat
        score_mats[comp] = score_mat
        sign_mats[comp] = sign_mat

        # If plot=True, plot matched (and unmatched, if unforced matching) components
        if plot:
            plot_component_comparisons(images=img_pair, labels=comp,
                                       score_mat=score_mat, sign_mat=sign_mat,
                                       force=force_match, out_dir=plot_sub_dir)

            # Show term comparisons between the matched wb, R and L components
            match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match)
            terms = [imgs[hemi].terms for hemi in hemis]

            # component index list for wb, R and L
            wb_idx_arr = match["idx"][0]
            r_idx_arr, l_idx_arr = [arr[match["idx"][1]] for arr in rl_idx_pair]
            ic_idx_list = [wb_idx_arr, r_idx_arr, l_idx_arr]

            # sign flipping list for wb, R and L
            wb_sign_arr = match["sign"][0]
            r_sign_arr, l_sign_arr = [match["sign"][1] * arr[match["idx"][1]] for arr in rl_sign_pair]
            sign_list = [wb_sign_arr, r_sign_arr, l_sign_arr]

            plot_term_comparisons(terms, labels=hemis, ic_idx_list=ic_idx_list,
                                  sign_list=sign_list, color_list=['g', 'r', 'b'],
                                  top_n=5, bottom_n=5, standardize=True, out_dir=plot_sub_dir)

    return imgs, score_mats, sign_mats
Exemple #4
0
def main_ic_loop(components, scoring,
                 dataset, query_server=True, force=False,
                 memory=Memory(cachedir='nilearn_cache'), **kwargs):
    # $FIX Test with just 'wb' and 'rl' matching until 'lr' matching is fixed
    # match_methods = ['wb', 'rl', 'lr']
    match_methods = ['wb', 'rl']
    out_dir = op.join('ica_imgs', dataset)
    mean_scores, unmatched = [], []

    # Get the data once.
    images, term_scores = get_dataset(
        dataset, query_server=query_server)

    for match_method in match_methods:
        print("Plotting results for %s matching method" % match_method)
        mean_score_d, num_unmatched_d = {}, {}
        for c in components:
            print("Running analysis with %d components" % c)
            # main analysis is run for each component and match method:
            # plotting for component comparisons are done only if force=True
            img_d, score_mats_d, sign_mats_d = do_main_analysis(
                    dataset=dataset, images=images, term_scores=term_scores,
                    key=match_method, force=force, plot=force,
                    n_components=c, scoring=scoring, **kwargs)

            # Get mean dissimilarity scores and number of unmatched for each comparisons
            # in score_mats_d
            for comp in score_mats_d:
                score_mat, sign_mat = score_mats_d[comp], sign_mats_d[comp]
                # For ("wb", "RL-forced") and ("wb", "RL-unforced")
                if "forced" in comp[1]:
                    if "-forced" in comp[1]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=True)
                    elif "-unforced" in comp[1]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=False)
                        n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0
                        um_label = "unmatched RL"
                    mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean()
                    score_label = "%s" % (" vs ".join(comp))
                    # Store values in respective dict
                    if c == components[0]:
                        mean_score_d[score_label] = [mean_score]
                        if "-unforced" in comp[1]:
                            num_unmatched_d[um_label] = [n_unmatched]
                    else:
                        mean_score_d[score_label].append(mean_score)
                        if "-unforced" in comp[1]:
                            num_unmatched_d[um_label].append(n_unmatched)

                # For ("wb", "R"), ("wb", "L") --wb matching or ("R", "L") --rl matching
                else:
                    for force_match in [True, False]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match)
                        mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean()
                        if force_match:
                            score_label = "%s%s" % (" vs ".join(comp), "-forced")
                            n_unmatched = None
                        else:
                            score_label = "%s%s" % (" vs ".join(comp), "-unforced")
                            n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0
                            um_label = "unmatched %s" % comp[1]
                        # Store values in respective dict
                        if c == components[0]:
                            mean_score_d[score_label] = [mean_score]
                            if not force_match:
                                num_unmatched_d[um_label] = [n_unmatched]
                        else:
                            mean_score_d[score_label].append(mean_score)
                            if not force_match:
                                num_unmatched_d[um_label].append(n_unmatched)

        # Store vals as df
        ms_df = pd.DataFrame(mean_score_d, index=components)
        um_df = pd.DataFrame(num_unmatched_d, index=components)
        mean_scores.append(ms_df)
        unmatched.append(um_df)
        # Save combined df
        combined = pd.concat([ms_df, um_df], axis=1)
        out = op.join(out_dir, '%s-matching_simscores.csv' % match_method)
        combined.to_csv(out)

    # We have all the scores for the matching method; now plot.
    fh, axes = plt.subplots(1, len(match_methods), sharex=True, sharey=True, figsize=(18, 6))
    fh.suptitle("Average dissimilarity scores for the best-match pairs", fontsize=16)
    labels = ["wb vs R-unforced", "wb vs L-unforced", "R vs L-unforced", "wb vs RL-unforced",
              "wb vs R-forced", "wb vs L-forced", "R vs L-forced", "wb vs RL-forced",
              "unmatched R", "unmatched L", "unmatched RL"]
    styles = ["r-", "b-", "m-", "g-",
              "r:", "b:", "m:", "g:",
              "r--", "b--", "m--"]

    for i, ax in enumerate(axes):
        ax2 = ax.twinx()
        ms_df, um_df = mean_scores[i], unmatched[i]
        for label, style in zip(labels, styles):
            if label in ms_df.columns:
                ms_df[label].plot(ax=ax, style=style)
            elif label in um_df.columns:
                um_df[label].plot(ax=ax2, style=style)
        ax.set_title("%s-matching" % (match_methods[i]))
        # Shrink current axis by 30%
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
        ax2.set_position([box.x0, box.y0, box.width * 0.75, box.height])
        # Put the legends to the right of the current axis
        ax.legend(loc='lower left', bbox_to_anchor=(1.3, 0.5))
        ax2.legend(loc='upper left', bbox_to_anchor=(1.3, 0.5))
    fh.text(0.5, 0.04, "# of components", ha="center")
    fh.text(0.05, 0.5, "mean %s scores" % scoring, va='center', rotation='vertical')
    fh.text(0.95, 0.5, "# of unmatched R- or L- components", va='center', rotation=-90)

    out_path = op.join(out_dir, '%s_simscores.png' % scoring)
    save_and_close(out_path, fh=fh)