Example #1
0
def qc_image_data(dataset, images, plot_dir='qc'):
    # Get ready
    masker = GreyMatterNiftiMasker(memory=Memory(cachedir='nilearn_cache')).fit()
    if op.exists(plot_dir):  # Delete old plots.
        shutil.rmtree(plot_dir)

    # Dataframe to contain summary metadata for neurovault images
    if dataset == 'neurovault':
        fetch_summary = pd.DataFrame(
            columns=('Figure #', 'col_id', 'image_id', 'name',
                     'modality', 'map_type', 'analysis_level',
                     'is_thresholded', 'not_mni', 'brain_coverage',
                     'perc_bad_voxels', 'perc_voxels_outside'))

    for ii, image in enumerate(images):
        im_path = image['absolute_path']
        if im_path is None:
            continue

        ri = ii % 4  # row i
        ci = (ii / 4) % 4  # column i
        pi = ii % 16 + 1  # plot i
        fi = ii / 16  # figure i

        if ri == 0 and ci == 0:
            fh = plt.figure(figsize=(16, 10))
            print('Plot %03d of %d' % (fi + 1, np.ceil(len(images) / 16.)))
        ax = fh.add_subplot(4, 4, pi)
        title = "%s%s" % (
            '(X) ' if image['rejected'] else '', op.basename(im_path))

        if dataset == 'neurovault':
            fetch_summary.loc[ii] = [
                'fig%03d' % (fi + 1), image.get('collection_id'),
                image.get('id'), title, image.get('modality'),
                image.get('map_type'), image.get('analysis_level'),
                image.get('is_thresholded'), image.get('not_mni'),
                image.get('brain_coverage'), image.get('perc_bad_voxels'),
                image.get('perc_voxels_outside')]

        # Images may fail to be transformed, and are of different shapes,
        # so we need to trasnform one-by-one and keep track of failures.
        img = cast_img(im_path, dtype=np.float32)
        img = clean_img(img)
        try:
            img = masker.inverse_transform(masker.transform(img))
        except Exception as e:
            print("Failed to mask/reshape image %s: %s" % (title, e))

        plot_stat_map(img, axes=ax, black_bg=True, title=title, colorbar=False)

        if (ri == 3 and ci == 3) or ii == len(images) - 1:
            out_path = op.join(plot_dir, 'fig%03d.png' % (fi + 1))
            save_and_close(out_path)

    # Save fetch_summary
    if dataset == 'neurovault':
        fetch_summary.to_csv(op.join(plot_dir, 'fetch_summary.csv'))
def loop_main_and_plot(components, scoring, dataset, query_server=True,
                       force=False, sparsityThreshold=0.000005,
                       memory=Memory(cachedir='nilearn_cache'), **kwargs):
    """
    Loop main.py to plot summaries of WB vs hemi ICA components
    """
    out_dir = op.join('ica_imgs', dataset, 'analyses')

    # Get the data once.
    images, term_scores = get_dataset(dataset, max_images=200,  # for testing
                                      query_server=query_server)

    # Initialize master DFs
    (wb_master, R_master, L_master) = (pd.DataFrame() for i in range(3))

    for c in components:
        print("Running analysis with %d components" % c)
        (wb_summary, R_sparsity, L_sparsity) = load_or_generate_summary(
            images=images, term_scores=term_scores, n_components=c,
            scoring=scoring, dataset=dataset, force=force,
            sparsityThreshold=sparsityThreshold, memory=memory)
        # Append them to master DFs
        wb_master = wb_master.append(wb_summary)
        R_master = R_master.append(R_sparsity)
        L_master = L_master.append(L_sparsity)

        ### Generate component-specific plots ###
        # Save component-specific images in the component dir
        comp_outdir = op.join(out_dir, str(c))

        # 1) Relationship between positive and negative HPI in wb components
        out_path = op.join(comp_outdir, "1_PosNegHPI_%dcomponents.png" % c)

        hpi_signs = ['pos', 'neg', 'abs']
        # set color to be proportional to the symmetry in the sparsity (Pos-Neg/Abs),
        # and set size to be proportional to the total sparsity (Abs)
        color = (wb_summary['posTotal'] - wb_summary['negTotal']) / wb_summary['absTotal']
        size = wb_summary['absTotal'] / 20.0
        ax = wb_summary.plot.scatter(x='posHPI', y='negHPI', c=color, s=size,
                                     xlim=(-1.1, 1.1), ylim=(-1.1, 1.1),
                                     colormap='Reds', colorbar=True, figsize=(7, 6))
        title = ax.set_title("\n".join(wrap("The relationship between HPI on "
                                            "positive and negative side: "
                                            "n_components = %d" % c, 60)))
        ax.spines['right'].set_color('none')
        ax.spines['top'].set_color('none')
        ax.yaxis.set_ticks_position('left')
        ax.yaxis.set_label_coords(-0.1, 0.5)
        ax.spines['left'].set_position(('data', 0))
        ax.xaxis.set_ticks_position('bottom')
        ax.spines['bottom'].set_position(('data', 0))
        ticks = [-1.1, -1.0, -0.5, 0, 0.5, 1.0, 1.1]
        labels = ['L', '-1.0', '-0.5', '0', '0.5', '1.0', 'R']
        plt.setp(ax, xticks=ticks, xticklabels=labels, yticks=ticks, yticklabels=labels)
        f = plt.gcf()
        title.set_y(1.05)
        f.subplots_adjust(top=0.8)
        cax = f.get_axes()[1]
        cax.set_ylabel('Balance between pos/neg(anti-correlated network)',
                       rotation=270, labelpad=20)

        save_and_close(out_path)

        # 2) Relationship between HPI and SAS in wb components
        out_path = op.join(comp_outdir, "2_HPIvsSAS_%dcomponents.png" % c)

        fh, axes = plt.subplots(1, 3, sharey=True, figsize=(18, 6))
        fh.suptitle("The relationship between HPI values and SAS: "
                    "n_components = %d" % c, fontsize=16)
        hpi_sign_colors = {'pos': 'r', 'neg': 'b', 'abs': 'g'}
        for ax, sign in zip(axes, hpi_signs):
            ax.scatter(wb_summary['%sHPI' % sign], wb_summary['wb_SAS'],
                       c=hpi_sign_colors[sign], s=wb_summary['%sTotal' % sign] / 20.0)
            ax.set_xlabel("%s HPI" % sign)
            ax.set_xlim(-1.1, 1.1)
            ax.set_ylim(0, 1)
            ax.spines['right'].set_color('none')
            ax.spines['top'].set_color('none')
            ax.yaxis.set_ticks_position('left')
            ax.spines['left'].set_position(('data', 0))
            ax.xaxis.set_ticks_position('bottom')
            ax.spines['bottom'].set_position(('data', 0))
            plt.setp(ax, xticks=ticks, xticklabels=labels)
        fh.text(0.04, 0.5, "Spatial Asymmetry Score", va='center', rotation='vertical')

        save_and_close(out_path)

    ### Generate plots over a range of specified n_components ###
    # 1) HPI-for pos, neg, and abs in wb components
    out_path = op.join(out_dir, '1_wb_HPI.png')

    fh, axes = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(18, 6))
    fh.suptitle("Hemispheric Participation Index for each component", fontsize=16)
    hpi_styles = {'pos': ['r', 'lightpink', 'above %d' % sparsityThreshold],
                  'neg': ['b', 'lightblue', 'below -%d' % sparsityThreshold],
                  'abs': ['g', 'lightgreen', 'with abs value above %d' % sparsityThreshold]}
    by_comp = wb_master.groupby("n_comp")
    for ax, sign in zip(axes, hpi_signs):
        mean, sd = by_comp.mean()["%sHPI" % sign], by_comp.std()["%sHPI" % sign]
        ax.fill_between(components, mean + sd, mean - sd, linewidth=0,
                        facecolor=hpi_styles[sign][1], alpha=0.5)
        size = wb_master['%sTotal' % (sign)] / 20.0
        ax.scatter(wb_master.n_comp, wb_master["%sHPI" % sign], label=sign,
                   c=hpi_styles[sign][0], s=size)
        ax.plot(components, mean, c=hpi_styles[sign][0])
        ax.set_xlim((0, components[-1] + 5))
        ax.set_ylim((-1, 1))
        ax.set_xticks(components)
        ax.set_ylabel("HPI((R-L)/(R+L) for # of voxels %s" % (hpi_styles[sign][2]))
    fh.text(0.5, 0.04, "# of components", ha="center")

    save_and_close(out_path, fh=fh)

    # 2) SAS for wb components
    fh, ax = plt.subplots(1, 1, figsize=(18, 6))
    fh.suptitle("Spatial Asymmetry Score for each component", fontsize=16)
    sas_mean, sas_sd = by_comp.mean()["wb_SAS"], by_comp.std()["wb_SAS"]
    ax.fill_between(components, sas_mean + sas_sd, sas_mean - sas_sd,
                    linewidth=0, facecolor='lightgrey', alpha=0.5)
    size = wb_master["absTotal"] / 20.0
    ax.scatter(wb_master.n_comp, wb_master["wb_SAS"], c='grey', s=size)
    ax.plot(components, sas_mean, c='grey')
    ax.set_xlim((0, components[-1] + 5))
    ax.set_ylim((-1, 1))
    ax.set_xticks(components)
    ax.set_ylabel("SAS (higher values indicate asymmetry)")

    out_path = op.join(out_dir, '2_wb_SAS.png')
    save_and_close(out_path, fh=fh)
Example #3
0
def image_analyses(components, dataset, memory=Memory(cachedir='nilearn_cache'),
                   **kwargs):
    """
    1) Plot sparsity of ICA images for wb, R, and L.
    2) Plot Hemispheric Participation Index (HPI) for wb ICA images
    """
    out_dir = op.join('ica_imgs', dataset)
    images_key = ["R", "L", "wb"]
    sparsity_levels = ['pos_005', 'neg_005', 'abs_005']

    # For calculating hemispheric participation index (HPI) from wb components,
    # prepare hemisphere maskers
    hemi_maskers = [HemisphereMasker(hemisphere=hemi, memory=memory).fit()
                    for hemi in ['R', 'L']]

    # Store sparsity (and hpi for wb) vals in a DF
    columns = ["n_comp"] + sparsity_levels
    wb_columns = columns + ["pos_hpi", "neg_hpi"]
    hemi_dfs = {hemi: pd.DataFrame(columns=wb_columns if hemi == "wb" else columns)
                for hemi in images_key}

    # Loop over components
    for c in components:
        print("Simply loading component images for n_component = %s" % c)
        nii_dir = op.join('ica_nii', dataset, str(c))
        for hemi in images_key:
            img_path = op.join(nii_dir, '%s_ica_components.nii.gz' % (hemi))
            img = NiftiImageWithTerms.from_filename(img_path)
            data = pd.DataFrame({"n_comp": [c] * c}, columns=columns)
            # get mean sparsity for the ica iamge and store in sparsity dict
            for s in sparsity_levels:
                thresh = float('0.%s' % (re.findall('\d+', s)[0]))
                # sparsity is # of voxels above the given sparsity level for each component
                if 'pos' in s:
                    data[s] = (img.get_data() > thresh).sum(axis=0).sum(axis=0).sum(axis=0)
                elif 'neg' in s:
                    data[s] = (img.get_data() < -thresh).sum(axis=0).sum(axis=0).sum(axis=0)
                elif 'abs' in s:
                    data[s] = (abs(img.get_data()) > thresh).sum(axis=0).sum(axis=0).sum(axis=0)

            # get hpi values for wb components
            if hemi == "wb":
                hemi_vectors = [masker.transform(img) for masker in hemi_maskers]
                # transform back so that values for each component can be calculated
                hemi_imgs = [masker.inverse_transform(vec) for masker, vec in
                             zip(hemi_maskers, hemi_vectors)]
                # pos/neg_vals[0] = # voxels in R, pos/neg_vals[1] = # voxels in L
                pos_vals = [(hemi_img.get_data() > 0.005).sum(axis=0).sum(axis=0).sum(axis=0)
                            for hemi_img in hemi_imgs]
                neg_vals = [(hemi_img.get_data() < -0.005).sum(axis=0).sum(axis=0).sum(axis=0)
                            for hemi_img in hemi_imgs]

                for sign, val in zip(['pos', 'neg'], [pos_vals, neg_vals]):
                    with np.errstate(divide="ignore", invalid="ignore"):
                        # pos/neg HPI vals, calculated as (R-L)/(R+L) for num. of voxels above
                        # the given threshold
                        hpi = (val[0].astype(float) - val[1]) / (val[0] + val[1])
                    data["%s_hpi" % (sign)] = hpi

            hemi_dfs[hemi] = hemi_dfs[hemi].append(data)

    # Now plot:
    # 1) Sparsity for wb, R and L ICA images
    fh, axes = plt.subplots(1, 3, sharex=True, sharey=True, figsize=(18, 6))
    sparsity_styles = {'pos_005': ['b', 'lightblue'],
                       'neg_005': ['r', 'lightpink'],
                       'abs_005': ['g', 'lightgreen']}
    for ax, hemi in zip(axes, images_key):
        df = hemi_dfs[hemi]
        by_comp = df.groupby('n_comp')
        for s in sparsity_levels:
            mean, sd = by_comp.mean()[s], by_comp.std()[s]
            ax.fill_between(components, mean + sd, mean - sd, linewidth=0,
                            facecolor=sparsity_styles[s][1], alpha=0.5)
            ax.plot(components, mean, color=sparsity_styles[s][0], label=s)
        # Overlay individual points for absolute threshold
        ax.scatter(df.n_comp, df.abs_005, c=sparsity_styles['abs_005'][0])
        ax.set_title("Sparsity of the %s components" % (hemi))
        ax.set_xlim(xmin=components[0] - 1, xmax=components[-1] + 1)
        ax.set_xticks(components)
    plt.legend()
    fh.text(0.5, 0.04, "# of components", ha="center")
    fh.text(0.04, 0.5, "# of voxels above the threshold", va='center', rotation='vertical')

    out_path = op.join(out_dir, 'sparsity.png')
    save_and_close(out_path, fh=fh)

    # 2) HPI plot for wb components
    fh, axes = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(12, 6))
    fh.suptitle("Hemispheric Participation Index for each component", fontsize=16)
    hpi_styles = {'pos': ['b', 'lightblue', 'above 0.005'],
                  'neg': ['r', 'lightpink', 'below -0.005']}
    df = hemi_dfs["wb"]
    by_comp = df.groupby("n_comp")
    for ax, sign in zip(axes, ['pos', 'neg']):
        mean, sd = by_comp.mean()["%s_hpi" % sign], by_comp.std()["%s_hpi" % sign]
        ax.fill_between(components, mean + sd, mean - sd, linewidth=0,
                        facecolor=hpi_styles[sign][1], alpha=0.5)
        size = df['%s_005' % (sign)]
        ax.scatter(df.n_comp, df["%s_hpi" % sign], label=sign, c=hpi_styles[sign][0], s=size / 20)
        ax.plot(components, mean, c=hpi_styles[sign][0])
        ax.set_title("%s" % (sign))
        ax.set_xlim((0, components[-1] + 5))
        ax.set_ylim((-1, 1))
        ax.set_xticks(components)
        ax.set_ylabel("HPI((R-L)/(R+L) for # of voxels %s" % (hpi_styles[sign][2]))

    fh.text(0.5, 0.04, "# of components", ha="center")

    out_path = op.join(out_dir, 'wb_HPI.png')
    save_and_close(out_path, fh=fh)

    # Save sparsity and HPI vals for all the components
    for hemi in images_key:
        hemi_dfs[hemi].index.name = "idx"
        hemi_dfs[hemi].to_csv(op.join(out_dir, "%s_summary.csv" % (hemi)))
Example #4
0
def main_ic_loop(components, scoring,
                 dataset, query_server=True, force=False,
                 memory=Memory(cachedir='nilearn_cache'), **kwargs):
    # $FIX Test with just 'wb' and 'rl' matching until 'lr' matching is fixed
    # match_methods = ['wb', 'rl', 'lr']
    match_methods = ['wb', 'rl']
    out_dir = op.join('ica_imgs', dataset)
    mean_scores, unmatched = [], []

    # Get the data once.
    images, term_scores = get_dataset(
        dataset, query_server=query_server)

    for match_method in match_methods:
        print("Plotting results for %s matching method" % match_method)
        mean_score_d, num_unmatched_d = {}, {}
        for c in components:
            print("Running analysis with %d components" % c)
            # main analysis is run for each component and match method:
            # plotting for component comparisons are done only if force=True
            img_d, score_mats_d, sign_mats_d = do_main_analysis(
                    dataset=dataset, images=images, term_scores=term_scores,
                    key=match_method, force=force, plot=force,
                    n_components=c, scoring=scoring, **kwargs)

            # Get mean dissimilarity scores and number of unmatched for each comparisons
            # in score_mats_d
            for comp in score_mats_d:
                score_mat, sign_mat = score_mats_d[comp], sign_mats_d[comp]
                # For ("wb", "RL-forced") and ("wb", "RL-unforced")
                if "forced" in comp[1]:
                    if "-forced" in comp[1]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=True)
                    elif "-unforced" in comp[1]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=False)
                        n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0
                        um_label = "unmatched RL"
                    mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean()
                    score_label = "%s" % (" vs ".join(comp))
                    # Store values in respective dict
                    if c == components[0]:
                        mean_score_d[score_label] = [mean_score]
                        if "-unforced" in comp[1]:
                            num_unmatched_d[um_label] = [n_unmatched]
                    else:
                        mean_score_d[score_label].append(mean_score)
                        if "-unforced" in comp[1]:
                            num_unmatched_d[um_label].append(n_unmatched)

                # For ("wb", "R"), ("wb", "L") --wb matching or ("R", "L") --rl matching
                else:
                    for force_match in [True, False]:
                        match, unmatch = get_match_idx_pair(score_mat, sign_mat, force=force_match)
                        mean_score = score_mat[[match["idx"][0], match["idx"][1]]].mean()
                        if force_match:
                            score_label = "%s%s" % (" vs ".join(comp), "-forced")
                            n_unmatched = None
                        else:
                            score_label = "%s%s" % (" vs ".join(comp), "-unforced")
                            n_unmatched = unmatch["idx"].shape[1] if unmatch["idx"] is not None else 0
                            um_label = "unmatched %s" % comp[1]
                        # Store values in respective dict
                        if c == components[0]:
                            mean_score_d[score_label] = [mean_score]
                            if not force_match:
                                num_unmatched_d[um_label] = [n_unmatched]
                        else:
                            mean_score_d[score_label].append(mean_score)
                            if not force_match:
                                num_unmatched_d[um_label].append(n_unmatched)

        # Store vals as df
        ms_df = pd.DataFrame(mean_score_d, index=components)
        um_df = pd.DataFrame(num_unmatched_d, index=components)
        mean_scores.append(ms_df)
        unmatched.append(um_df)
        # Save combined df
        combined = pd.concat([ms_df, um_df], axis=1)
        out = op.join(out_dir, '%s-matching_simscores.csv' % match_method)
        combined.to_csv(out)

    # We have all the scores for the matching method; now plot.
    fh, axes = plt.subplots(1, len(match_methods), sharex=True, sharey=True, figsize=(18, 6))
    fh.suptitle("Average dissimilarity scores for the best-match pairs", fontsize=16)
    labels = ["wb vs R-unforced", "wb vs L-unforced", "R vs L-unforced", "wb vs RL-unforced",
              "wb vs R-forced", "wb vs L-forced", "R vs L-forced", "wb vs RL-forced",
              "unmatched R", "unmatched L", "unmatched RL"]
    styles = ["r-", "b-", "m-", "g-",
              "r:", "b:", "m:", "g:",
              "r--", "b--", "m--"]

    for i, ax in enumerate(axes):
        ax2 = ax.twinx()
        ms_df, um_df = mean_scores[i], unmatched[i]
        for label, style in zip(labels, styles):
            if label in ms_df.columns:
                ms_df[label].plot(ax=ax, style=style)
            elif label in um_df.columns:
                um_df[label].plot(ax=ax2, style=style)
        ax.set_title("%s-matching" % (match_methods[i]))
        # Shrink current axis by 30%
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.75, box.height])
        ax2.set_position([box.x0, box.y0, box.width * 0.75, box.height])
        # Put the legends to the right of the current axis
        ax.legend(loc='lower left', bbox_to_anchor=(1.3, 0.5))
        ax2.legend(loc='upper left', bbox_to_anchor=(1.3, 0.5))
    fh.text(0.5, 0.04, "# of components", ha="center")
    fh.text(0.05, 0.5, "mean %s scores" % scoring, va='center', rotation='vertical')
    fh.text(0.95, 0.5, "# of unmatched R- or L- components", va='center', rotation=-90)

    out_path = op.join(out_dir, '%s_simscores.png' % scoring)
    save_and_close(out_path, fh=fh)