Ejemplo n.º 1
0
def get_thresh_similarity(narps, dataset='resampled'):
    """
    For each pair of thresholded images, compute the similarity
    of the thresholded/binarized maps using the Jaccard coefficient.
    Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa
    also add computation of jaccard on only nonzero pairs
    (ala scipy)
    """

    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    for hyp in hypnums:
        print('analyzing thresh similarity for hypothesis', hyp)
        maskdata, labels = get_concat_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           imgtype='thresh',
                                           dataset=dataset)

        pctagree = matrix_pct_agreement(maskdata)
        median_pctagree = numpy.median(pctagree[numpy.triu_indices_from(
            pctagree, 1)])
        log_to_file(
            logfile,
            'hyp %d: median pctagree similarity: %f' % (hyp, median_pctagree))

        df_pctagree = pandas.DataFrame(pctagree, index=labels, columns=labels)
        df_pctagree.to_csv(
            os.path.join(narps.dirs.dirs['metadata'],
                         'pctagree_hyp%d.csv' % hyp))

        seaborn.clustermap(df_pctagree,
                           cmap='jet',
                           figsize=(16, 16),
                           method='ward')
        plt.title(hypotheses_full[hyp])
        plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                                 'hyp%d_pctagree_map_thresh.pdf' % hyp),
                    bbox_inches='tight')
        plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                                 'hyp%d_pctagree_map_thresh.png' % hyp),
                    bbox_inches='tight')
        plt.close()

        # get jaccard for nonzero voxels
        jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard'))
        median_jacsim_nonzero = numpy.median(
            jacsim_nonzero[numpy.triu_indices_from(jacsim_nonzero, 1)])
        log_to_file(
            logfile, 'hyp %d: median jacaard similarity (nonzero): %f' %
            (hyp, median_jacsim_nonzero))
Ejemplo n.º 2
0
def get_thresh_similarity(narps, dataset='resampled'):
    """
    For each pair of thresholded images, compute the similarity
    of the thresholded/binarized maps using the Jaccard coefficient.
    Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa
    also add computation of jaccard on only nonzero pairs
    (ala scipy)
    """

    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    output_dir = narps.dirs.get_output_dir('jaccard_thresh')

    for hyp in hypnums:
        print('creating Jaccard map for hypothesis', hyp)
        maskdata, labels = get_concat_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           imgtype='thresh',
                                           dataset=dataset)

        jacsim = 1 - pairwise_distances(maskdata, metric="hamming")
        jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard'))
        df = pandas.DataFrame(jacsim, index=labels, columns=labels)
        df.to_csv(os.path.join(output_dir, 'jacsim_thresh_hyp%d.csv' % hyp))
        df_nonzero = pandas.DataFrame(jacsim_nonzero,
                                      index=labels,
                                      columns=labels)
        df_nonzero.to_csv(
            os.path.join(output_dir, 'jacsim_nonzero_thresh_hyp%d.csv' % hyp))
        seaborn.clustermap(df, cmap='jet', figsize=(16, 16), method='ward')
        plt.title(hypotheses[hyp])
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_jaccard_map_thresh.pdf' % hyp))
        plt.close()
        seaborn.clustermap(df_nonzero,
                           cmap='jet',
                           figsize=(16, 16),
                           method='ward')
        plt.title(hypotheses[hyp])
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_jaccard_nonzero_map_thresh.pdf' % hyp))
        plt.close()
Ejemplo n.º 3
0
def mk_correlation_maps_unthresh(narps,
                                 corr_type='spearman',
                                 n_clusters=None,
                                 dataset='zstat',
                                 vox_mask_thresh=1.0):
    """
    Create correlation maps for unthresholded images
    These correlation matrices are clustered using Ward clustering,
    with the number of clusters for each hypotheses determined by
    visual examination.
    vox_mask_thresh controls which voxels are analyzed in terms
    of proportion of teams with signal in voxel.  defaults to 100%
    """
    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    if n_clusters is None:
        n_clusters = {1: 3, 2: 3, 5: 3, 6: 3, 7: 3, 8: 3, 9: 3}

    dendrograms = {}
    membership = {}
    cc_unthresh = {}
    output_dir = narps.dirs.get_output_dir('correlation_unthresh')

    for i, hyp in enumerate(hypnums):
        print('creating correlation map for hypothesis', hyp)
        membership[str(hyp)] = {}
        maskdata, labels = get_concat_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           dataset=dataset,
                                           vox_mask_thresh=vox_mask_thresh,
                                           logfile=logfile)

        # compute correlation of all datasets with mean
        if 'mean_corr' not in locals():
            mean_corr = pandas.DataFrame(numpy.zeros(
                (len(labels), len(hypnums))),
                                         columns=['H%d' % i for i in hypnums],
                                         index=labels)
        meandata = numpy.mean(maskdata, 0)
        for t in range(maskdata.shape[0]):
            mean_corr.iloc[t,
                           i] = scipy.stats.spearmanr(maskdata[t, :],
                                                      meandata).correlation

        # cluster datasets
        if corr_type == 'spearman':
            cc = scipy.stats.spearmanr(maskdata.T).correlation
        else:  # use Pearson
            cc = numpy.corrcoef(maskdata)
        cc = numpy.nan_to_num(cc)
        df = pandas.DataFrame(cc, index=labels, columns=labels)
        df.to_csv(
            os.path.join(output_dir,
                         '%s_unthresh_hyp%d.csv' % (corr_type, hyp)))

        ward_linkage = scipy.cluster.hierarchy.ward(cc)

        # add 1 to cluster labels so they start at 1
        # rather than zero - for clarity in paper
        clustlabels = [
            s[0] + 1 for s in scipy.cluster.hierarchy.cut_tree(
                ward_linkage, n_clusters=n_clusters[hyp])
        ]
        print('clustlabels:', clustlabels)
        # get decisions for column colors
        md = narps.metadata.query('varnum==%d' % hyp).set_index('teamID')

        decision_colors = ['r', 'g']
        col_colors = [
            decision_colors[md.loc[teamID, 'Decision']] for teamID in labels
        ]

        row_colors = [cluster_colors[s] for s in clustlabels]
        print('row_colors:', row_colors)
        cm = seaborn.clustermap(df,
                                cmap='vlag',
                                figsize=(16, 16),
                                method='ward',
                                row_colors=row_colors,
                                col_colors=col_colors,
                                center=0,
                                vmin=-1,
                                vmax=1)
        plt.title('H%d:' % hyp + hypotheses_full[hyp])
        cc_unthresh[hyp] = (cc, labels)
        plt.savefig(os.path.join(
            narps.dirs.dirs['figures'],
            'hyp%d_%s_map_unthresh.pdf' % (hyp, corr_type)),
                    bbox_inches='tight')
        plt.savefig(os.path.join(
            narps.dirs.dirs['figures'],
            'hyp%d_%s_map_unthresh.png' % (hyp, corr_type)),
                    bbox_inches='tight')
        plt.close()
        dendrograms[hyp] = ward_linkage

        # get cluster membership
        for j in cm.dendrogram_row.reordered_ind:
            cl = clustlabels[j]
            if str(cl) not in membership[str(hyp)]:
                membership[str(hyp)][str(cl)] = []
            membership[str(hyp)][str(cl)].append(labels[j])

    # save cluster data to file so that we don't have to rerun everything
    with open(
            os.path.join(output_dir,
                         'unthresh_cluster_membership_%s.json' % corr_type),
            'w') as f:
        json.dump(membership, f)

    # also save correlation info
    median_corr = mean_corr.median(1).sort_values()
    median_corr_df = pandas.DataFrame(median_corr, columns=['median_corr'])
    median_corr_df.to_csv(
        os.path.join(narps.dirs.dirs['metadata'], 'median_pattern_corr.csv'))

    log_to_file(
        logfile, 'median correlation between teams: %f' %
        numpy.median(cc[numpy.triu_indices_from(cc, 1)]))

    return ((dendrograms, membership))