Exemple #1
0
def get_thresh_similarity(narps, dataset='resampled'):
    """
    For each pair of thresholded images, compute the similarity
    of the thresholded/binarized maps using the Jaccard coefficient.
    Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa
    also add computation of jaccard on only nonzero pairs
    (ala scipy)
    """

    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(
        narps.dirs.dirs['logs'],
        '%s-%s.txt' % (sys.argv[0].split('.')[0], func_name))
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    output_dir = os.path.join(narps.dirs.dirs['output'], 'jaccard_thresh')
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    for hyp in hypnums:
        print('creating Jaccard map for hypothesis', hyp)
        maskdata, labels = get_masked_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           imgtype='thresh',
                                           dataset=dataset)
        jacsim = 1 - pairwise_distances(maskdata, metric="hamming")
        jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard'))
        df = pandas.DataFrame(jacsim, index=labels, columns=labels)
        df.to_csv(os.path.join(output_dir, 'jacsim_thresh_hyp%d.csv' % hyp))
        df_nonzero = pandas.DataFrame(jacsim_nonzero,
                                      index=labels,
                                      columns=labels)
        df_nonzero.to_csv(
            os.path.join(output_dir, 'jacsim_nonzero_thresh_hyp%d.csv' % hyp))
        seaborn.clustermap(df, cmap='jet', figsize=(16, 16), method='ward')
        plt.title(hypotheses[hyp])
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_jaccard_map_thresh.pdf' % hyp))
        plt.close()
        seaborn.clustermap(df_nonzero,
                           cmap='jet',
                           figsize=(16, 16),
                           method='ward')
        plt.title(hypotheses[hyp])
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_jaccard_nonzero_map_thresh.pdf' % hyp))
        plt.close()
def create_unthresh_histograms(narps, overwrite=True):
    """
`   Create histograms for in-mask values in unthresholded images
    These are only created for the images that were successfully
    registered and rectified.
    """
    figdir = os.path.join(narps.dirs.dirs['figures'], 'unthresh_histograms')

    if not os.path.exists(figdir):
        os.mkdir(figdir)

    for hyp in hypnums:
        outfile = os.path.join(figdir, 'hyp%d_unthresh_histogram.pdf' % hyp)

        if not os.path.exists(outfile) or overwrite:
            print('making figure for hyp', hyp)
            unthresh_data, labels = get_masked_data(hyp,
                                                    narps.dirs.MNI_mask,
                                                    narps.dirs.dirs['output'],
                                                    imgtype='unthresh',
                                                    dataset='rectified')

            fig, ax = plt.subplots(int(numpy.ceil(len(labels) / 3)),
                                   3,
                                   figsize=(16, 50))

            # make three columns - these are row and column counters
            ctr_x = 0
            ctr_y = 0

            for i, l in enumerate(labels):
                ax[ctr_x, ctr_y].hist(unthresh_data[i, :], 100)
                ax[ctr_x, ctr_y].set_title(l)
                ctr_y += 1
                if ctr_y > 2:
                    ctr_y = 0
                    ctr_x += 1
            plt.tight_layout()
            plt.savefig(outfile)
            plt.close(fig)
Exemple #3
0
def mk_correlation_maps_unthresh(narps,
                                 corr_type='spearman',
                                 n_clusters=None,
                                 dataset='zstat'):
    """
    Create correlation maps for unthresholded images
    These correlation matrices are clustered using Ward clustering,
    with the number of clusters for each hypotheses determined by
    visual examination.
    """
    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    if n_clusters is None:
        n_clusters = {1: 4, 2: 3, 5: 4, 6: 3, 7: 4, 8: 4, 9: 3}

    dendrograms = {}
    membership = {}
    cc_unthresh = {}
    output_dir = os.path.join(narps.dirs.dirs['output'],
                              'correlation_unthresh')
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    for i, hyp in enumerate(hypnums):
        print('creating correlation map for hypothesis', hyp)
        membership[str(hyp)] = {}
        maskdata, labels = get_masked_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           dataset=dataset)

        # compute correlation of all datasets with mean
        if 'mean_corr' not in locals():
            mean_corr = pandas.DataFrame(
                numpy.zeros((len(labels), len(hypnums))),
                columns=['hyp%d' % i for i in hypnums],
                index=labels)
        meandata = numpy.mean(maskdata, 0)
        for t in range(maskdata.shape[0]):
            mean_corr.iloc[t,
                           i] = scipy.stats.spearmanr(maskdata[t, :],
                                                      meandata).correlation

        # cluster datasets
        if corr_type == 'spearman':
            cc = scipy.stats.spearmanr(maskdata.T).correlation
        else:  # use Pearson
            cc = numpy.corrcoef(maskdata)
        cc = numpy.nan_to_num(cc)
        df = pandas.DataFrame(cc, index=labels, columns=labels)
        df.to_csv(
            os.path.join(output_dir,
                         '%s_unthresh_hyp%d.csv' % (corr_type, hyp)))

        ward_linkage = scipy.cluster.hierarchy.ward(cc)

        clustlabels = [
            s[0] for s in scipy.cluster.hierarchy.cut_tree(
                ward_linkage, n_clusters=n_clusters[hyp])
        ]

        # get decisions for column colors
        md = narps.metadata.query('varnum==%d' % hyp).set_index('teamID')

        col_colors = [
            cluster_colors[md.loc[teamID, 'Decision']] for teamID in labels
        ]

        row_colors = [cluster_colors[s - 1] for s in clustlabels]
        cm = seaborn.clustermap(df,
                                cmap='vlag',
                                figsize=(16, 16),
                                method='ward',
                                row_colors=row_colors,
                                col_colors=col_colors,
                                center=0,
                                vmin=-1,
                                vmax=1)
        plt.title('hyp %d:' % hyp + hypotheses[hyp])
        cc_unthresh[hyp] = (cc, labels)
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_%s_map_unthresh.pdf' % (hyp, corr_type)))
        plt.close()
        dendrograms[hyp] = ward_linkage

        # get cluster membership
        for j in cm.dendrogram_row.reordered_ind:
            cl = clustlabels[j]
            if str(cl) not in membership[str(hyp)]:
                membership[str(hyp)][str(cl)] = []
            membership[str(hyp)][str(cl)].append(labels[j])

    # save cluster data to file so that we don't have to rerun everything
    with open(
            os.path.join(output_dir,
                         'unthresh_cluster_membership_%s.json' % corr_type),
            'w') as f:
        json.dump(membership, f)

    # also save correlation info
    median_distance = mean_corr.median(1).sort_values()
    median_distance_df = pandas.DataFrame(median_distance,
                                          columns=['median_distance'])
    median_distance_df.to_csv(
        os.path.join(narps.dirs.dirs['metadata'],
                     'median_pattern_distance.csv'))

    log_to_file(
        logfile, 'median correlation between teams: %f' %
        numpy.median(cc[numpy.triu_indices_from(cc, 1)]))

    return ((dendrograms, membership))