Beispiel #1
0
    def estimate_smoothness(self, overwrite=None, imgtype='zstat'):
        """
        estimate smoothness of Z maps using FSL's smoothness estimation
        """
        log_to_file(
            self.dirs.logfile,
            sys._getframe().f_code.co_name,
            headspace=2)
        func_args = inspect.getargvalues(
            inspect.currentframe()).locals
        log_to_file(
            self.dirs.logfile,
            stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite
        output_file = os.path.join(self.dirs.dirs['metadata'],
                                   'smoothness_est.csv')
        if os.path.exists(output_file) and not overwrite:
            if self.verbose:
                print('using existing smoothness file')
            smoothness_df = pandas.read_csv(output_file)
            return(smoothness_df)

        # use nipype's interface to the FSL smoothest command
        est = SmoothEstimate()
        smoothness = []
        for teamID in self.complete_image_sets['unthresh']:
            for hyp in range(1, 10):
                if hyp not in self.teams[teamID].images['unthresh'][imgtype]:
                    # fill missing data with nan
                    print('no zstat present for', teamID, hyp)
                    smoothness.append([teamID, hyp, numpy.nan,
                                       numpy.nan, numpy.nan])
                    continue
                infile = self.teams[teamID].images['unthresh'][imgtype][hyp]
                if not os.path.exists(infile):
                    print('no image present:', infile)
                    continue
                else:
                    if self.verbose:
                        print('estimating smoothness for hyp', hyp)

                    est.inputs.zstat_file = infile
                    est.inputs.mask_file = self.dirs.MNI_mask
                    est.terminal_output = 'file_split'
                    smoothest_output = est.run()
                    smoothness.append([teamID, hyp,
                                       smoothest_output.outputs.dlh,
                                       smoothest_output.outputs.volume,
                                       smoothest_output.outputs.resels])
                    self.teams[teamID].logs['smoothest'] = (
                        smoothest_output.runtime.stdout,
                        smoothest_output.runtime.stderr)

        smoothness_df = pandas.DataFrame(
            smoothness,
            columns=['teamID', 'hyp', 'dhl', 'volume', 'resels'])
        smoothness_df.to_csv(output_file)
        return(smoothness_df)
Beispiel #2
0
    def compute_image_stats(self, datatype='zstat', overwrite=None):
        """
        compute std and range on statistical images
        """
        log_to_file(
            self.dirs.logfile,
            sys._getframe().f_code.co_name,
            headspace=2)
        func_args = inspect.getargvalues(
            inspect.currentframe()).locals
        log_to_file(
            self.dirs.logfile,
            stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite

        # set up directories
        unthresh_concat_dir = self.dirs.get_output_dir(
            'unthresh_concat_%s' % datatype)
        unthresh_range_dir = self.dirs.get_output_dir(
            'unthresh_range_%s' % datatype)
        unthresh_std_dir = self.dirs.get_output_dir(
            'unthresh_std_%s' % datatype)

        for hyp in range(1, 10):

            unthresh_file = os.path.join(
                unthresh_concat_dir,
                'hypo%d.nii.gz' % hyp)

            range_outfile = os.path.join(
                unthresh_range_dir,
                'hypo%d.nii.gz' % hyp)

            std_outfile = os.path.join(
                unthresh_std_dir,
                'hypo%d.nii.gz' % hyp)

            if not os.path.exists(range_outfile) \
                    or not os.path.exists(std_outfile) \
                    or overwrite:
                unthresh_img = nibabel.load(unthresh_file)
                unthresh_data = unthresh_img.get_data()
                concat_data = numpy.nan_to_num(unthresh_data)

                # compute range
                datarange = numpy.max(concat_data, axis=3) \
                    - numpy.min(concat_data, axis=3)
                range_img = nibabel.Nifti1Image(
                    datarange,
                    affine=unthresh_img.affine)
                range_img.to_filename(range_outfile)

                # compute standard deviation
                datastd = numpy.std(concat_data, axis=3)
                std_img = nibabel.Nifti1Image(
                    datastd,
                    affine=unthresh_img.affine)
                std_img.to_filename(std_outfile)
Beispiel #3
0
    def compute_image_stats(self, datatype='zstat', overwrite=None):
        """
        compute std and range on statistical images
        """
        log_to_file(
            self.dirs.logfile, '\n\n%s' %
            sys._getframe().f_code.co_name)
        func_args = inspect.getargvalues(
            inspect.currentframe()).locals
        log_to_file(
            self.dirs.logfile,
            stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite
        for hyp in range(1, 10):

            unthresh_file = os.path.join(
                self.dirs.dirs['output'],
                'unthresh_concat_%s/hypo%d.nii.gz' % (datatype, hyp))

            range_outfile = os.path.join(
                self.dirs.dirs['output'],
                'unthresh_range_%s/hypo%d.nii.gz' % (datatype, hyp))
            if not os.path.exists(os.path.join(
                self.dirs.dirs['output'],
                    'unthresh_range_%s' % datatype)):
                os.mkdir(os.path.join(
                    self.dirs.dirs['output'],
                    'unthresh_range_%s' % datatype))

            std_outfile = os.path.join(
                self.dirs.dirs['output'],
                'unthresh_std_%s/hypo%d.nii.gz' % (datatype, hyp))
            if not os.path.exists(os.path.join(
                    self.dirs.dirs['output'],
                    'unthresh_std_%s' % datatype)):
                os.mkdir(os.path.join(
                    self.dirs.dirs['output'],
                    'unthresh_std_%s' % datatype))

            if not os.path.exists(range_outfile) \
                    or not os.path.exists(std_outfile) \
                    or overwrite:
                unthresh_img = nibabel.load(unthresh_file)
                unthresh_data = unthresh_img.get_data()
                concat_data = numpy.nan_to_num(unthresh_data)
                datarange = numpy.max(concat_data, axis=3) \
                    - numpy.min(concat_data, axis=3)
                range_img = nibabel.Nifti1Image(
                    datarange,
                    affine=unthresh_img.affine)
                range_img.to_filename(range_outfile)
                datastd = numpy.std(concat_data, axis=3)
                std_img = nibabel.Nifti1Image(
                    datastd,
                    affine=unthresh_img.affine)
                std_img.to_filename(std_outfile)
Beispiel #4
0
def get_thresh_similarity(narps, dataset='resampled'):
    """
    For each pair of thresholded images, compute the similarity
    of the thresholded/binarized maps using the Jaccard coefficient.
    Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa
    also add computation of jaccard on only nonzero pairs
    (ala scipy)
    """

    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    for hyp in hypnums:
        print('analyzing thresh similarity for hypothesis', hyp)
        maskdata, labels = get_concat_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           imgtype='thresh',
                                           dataset=dataset)

        pctagree = matrix_pct_agreement(maskdata)
        median_pctagree = numpy.median(pctagree[numpy.triu_indices_from(
            pctagree, 1)])
        log_to_file(
            logfile,
            'hyp %d: median pctagree similarity: %f' % (hyp, median_pctagree))

        df_pctagree = pandas.DataFrame(pctagree, index=labels, columns=labels)
        df_pctagree.to_csv(
            os.path.join(narps.dirs.dirs['metadata'],
                         'pctagree_hyp%d.csv' % hyp))

        seaborn.clustermap(df_pctagree,
                           cmap='jet',
                           figsize=(16, 16),
                           method='ward')
        plt.title(hypotheses_full[hyp])
        plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                                 'hyp%d_pctagree_map_thresh.pdf' % hyp),
                    bbox_inches='tight')
        plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                                 'hyp%d_pctagree_map_thresh.png' % hyp),
                    bbox_inches='tight')
        plt.close()

        # get jaccard for nonzero voxels
        jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard'))
        median_jacsim_nonzero = numpy.median(
            jacsim_nonzero[numpy.triu_indices_from(jacsim_nonzero, 1)])
        log_to_file(
            logfile, 'hyp %d: median jacaard similarity (nonzero): %f' %
            (hyp, median_jacsim_nonzero))
Beispiel #5
0
    def create_concat_images(self,
                             datatype='resampled',
                             imgtypes=None,
                             overwrite=None):
        """
        create images concatenated across teams
        ordered by self.complete_image_sets
        """
        log_to_file(self.dirs.logfile,
                    '\n\n%s' % sys._getframe().f_code.co_name)
        func_args = inspect.getargvalues(inspect.currentframe()).locals
        log_to_file(self.dirs.logfile, stringify_dict(func_args))

        if imgtypes is None:
            imgtypes = ['thresh', 'unthresh']
        if overwrite is None:
            overwrite = self.overwrite
        for imgtype in imgtypes:
            self.dirs.dirs['concat_%s' % imgtype] = os.path.join(
                self.dirs.dirs['output'], '%s_concat_%s' % (imgtype, datatype))
            for hyp in range(1, 10):
                outfile = os.path.join(self.dirs.dirs['concat_%s' % imgtype],
                                       'hypo%d.nii.gz' % hyp)
                if not os.path.exists(os.path.dirname(outfile)):
                    os.mkdir(os.path.dirname(outfile))
                if not os.path.exists(outfile) or overwrite:
                    if self.verbose:
                        print('%s - hypo %d: creating concat file' %
                              (imgtype, hyp))
                    concat_teams = [
                        teamID for teamID in self.complete_image_sets
                        if os.path.exists(self.teams[teamID].images[imgtype]
                                          [datatype][hyp])
                    ]
                    self.all_maps[imgtype][datatype] = [
                        self.teams[teamID].images[imgtype][datatype][hyp]
                        for teamID in concat_teams
                    ]

                    # use nilearn NiftiMasker to load data
                    # and save to a new file
                    masker = nilearn.input_data.NiftiMasker(
                        mask_img=self.dirs.MNI_mask)
                    concat_data = masker.fit_transform(
                        self.all_maps[imgtype][datatype])
                    concat_img = masker.inverse_transform(concat_data)
                    concat_img.to_filename(outfile)
                else:
                    if self.verbose:
                        print('%s - hypo %d: using existing file' %
                              (imgtype, hyp))
        return (self.all_maps)
Beispiel #6
0
def get_thresh_similarity(narps, dataset='resampled'):
    """
    For each pair of thresholded images, compute the similarity
    of the thresholded/binarized maps using the Jaccard coefficient.
    Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa
    also add computation of jaccard on only nonzero pairs
    (ala scipy)
    """

    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(
        narps.dirs.dirs['logs'],
        '%s-%s.txt' % (sys.argv[0].split('.')[0], func_name))
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    output_dir = os.path.join(narps.dirs.dirs['output'], 'jaccard_thresh')
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    for hyp in hypnums:
        print('creating Jaccard map for hypothesis', hyp)
        maskdata, labels = get_masked_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           imgtype='thresh',
                                           dataset=dataset)
        jacsim = 1 - pairwise_distances(maskdata, metric="hamming")
        jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard'))
        df = pandas.DataFrame(jacsim, index=labels, columns=labels)
        df.to_csv(os.path.join(output_dir, 'jacsim_thresh_hyp%d.csv' % hyp))
        df_nonzero = pandas.DataFrame(jacsim_nonzero,
                                      index=labels,
                                      columns=labels)
        df_nonzero.to_csv(
            os.path.join(output_dir, 'jacsim_nonzero_thresh_hyp%d.csv' % hyp))
        seaborn.clustermap(df, cmap='jet', figsize=(16, 16), method='ward')
        plt.title(hypotheses[hyp])
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_jaccard_map_thresh.pdf' % hyp))
        plt.close()
        seaborn.clustermap(df_nonzero,
                           cmap='jet',
                           figsize=(16, 16),
                           method='ward')
        plt.title(hypotheses[hyp])
        plt.savefig(
            os.path.join(narps.dirs.dirs['figures'],
                         'hyp%d_jaccard_nonzero_map_thresh.pdf' % hyp))
        plt.close()
Beispiel #7
0
    def create_mean_thresholded_images(self, datatype='resampled',
                                       overwrite=None, thresh=1e-5):
        """
        create overlap maps for thresholded images
        """
        log_to_file(
            self.dirs.logfile,
            sys._getframe().f_code.co_name,
            headspace=2)
        func_args = inspect.getargvalues(
            inspect.currentframe()).locals
        log_to_file(
            self.dirs.logfile,
            stringify_dict(func_args))

        imgtype = 'thresh'
        if overwrite is None:
            overwrite = self.overwrite
        output_dir = self.dirs.get_output_dir('overlap_binarized_thresh')
        concat_dir = self.dirs.get_output_dir(
            '%s_concat_%s' % (imgtype, datatype))

        for hyp in range(1, 10):
            outfile = os.path.join(
                output_dir,
                'hypo%d.nii.gz' % hyp)
            if not os.path.exists(outfile) or overwrite:
                if self.verbose:
                    print('%s - hypo %d: creating overlap file' % (
                        imgtype, hyp))
                concat_file = os.path.join(
                    concat_dir,
                    'hypo%d.nii.gz' % hyp)
                concat_img = nibabel.load(concat_file)
                concat_data = concat_img.get_data()
                concat_data = (concat_data > thresh).astype('float')
                concat_mean = numpy.mean(concat_data, 3)
                concat_mean_img = nibabel.Nifti1Image(concat_mean,
                                                      affine=concat_img.affine)
                concat_mean_img.to_filename(outfile)

            else:
                if self.verbose:
                    print('%s - hypo %d: using existing file' % (
                        imgtype, hyp))
Beispiel #8
0
def analyze_clusters(narps,
                     dendrograms,
                     membership,
                     dataset='zstat',
                     corr_type='spearman',
                     thresh=2.,
                     vmax=5.,
                     rand_thresh=0.2):
    """
    Use dendrogram computed by seaborn clustermap to identify clusters,
    and then create separate mean statstical map for each cluster.
    """

    # if dendrograms is None or membership is None:
    #     with open(os.path.join(
    #             narps.dirs.dirs['output'],
    #             'unthresh_dendrograms_%s.pkl' % corr_type), 'rb') as f:
    #         dendrograms, membership = pickle.load(f)

    func_args = inspect.getargvalues(inspect.currentframe()).locals
    # remove these to keep logs more tractable
    del func_args['membership']
    del func_args['dendrograms']
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    mean_smoothing = {}
    mean_decision = {}
    cluster_metadata = {}
    cluster_metadata_df = pandas.DataFrame(
        columns=['hyp%d' % i for i in hypnums], index=narps.metadata.teamID)

    masker = nilearn.input_data.NiftiMasker(mask_img=narps.dirs.MNI_mask)

    for i, hyp in enumerate(hypnums):
        log_to_file(logfile, 'hyp %d' % hyp)
        # set cluster indices back to int, for consistency with above
        clusters = [int(x) for x in list(membership[str(hyp)].keys())]
        clusters.sort()

        fig, ax = plt.subplots(len(clusters), 1, figsize=(12, 12))
        cluster_metadata[hyp] = {}
        mean_smoothing[str(hyp)] = {}
        mean_decision[str(hyp)] = {}
        for j, cl in enumerate(clusters):
            log_to_file(
                logfile,
                'hyp %d cluster %d (%s)' % (hyp, cl, cluster_colors[j + 1]))
            # get all images for this cluster and average them
            member_maps = []
            member_smoothing = []
            member_decision = []
            for member in membership[str(hyp)][str(cl)]:
                cid = narps.teams[member].datadir_label
                infile = os.path.join(
                    narps.dirs.dirs['output'],
                    '%s/%s/hypo%d_unthresh.nii.gz' % (dataset, cid, hyp))
                if os.path.exists(infile):
                    member_maps.append(infile)
                    member_smoothing.append(
                        narps.metadata.query('varnum==%d' % hyp).query(
                            'teamID=="%s"' % member)['fwhm'].iloc[0])
                    member_decision.append(
                        narps.metadata.query('varnum==%d' % hyp).query(
                            'teamID=="%s"' % member)['Decision'].iloc[0])
            log_to_file(logfile, membership[str(hyp)][str(cl)])
            cluster_metadata[hyp][cl] = narps.metadata[
                narps.metadata.teamID.isin(membership[str(hyp)][str(cl)])]
            for m in membership[str(hyp)][str(cl)]:
                cluster_metadata_df.loc[m, 'hyp%d' % hyp] = cl

            log_to_file(logfile,
                        'N cluster %d maps: %d' % (cl, len(member_maps)))
            mean_smoothing[str(hyp)][str(cl)] = numpy.mean(
                numpy.array(member_smoothing))
            mean_decision[str(hyp)][str(cl)] = numpy.mean(
                numpy.array(member_decision))
            log_to_file(logfile,
                        'mean fwhm: %f' % mean_smoothing[str(hyp)][str(cl)])
            log_to_file(logfile, 'pYes: %f' % mean_decision[str(hyp)][str(cl)])
            maskdata = masker.fit_transform(member_maps)
            meandata = numpy.mean(maskdata, 0)
            mean_img = masker.inverse_transform(meandata)
            mean_filename = os.path.join(
                narps.dirs.dirs['output'],
                'cluster_maps/hyp%d_cluster%d_mean.nii.gz' % (hyp, cl))
            if not os.path.exists(os.path.dirname(mean_filename)):
                os.mkdir(os.path.dirname(mean_filename))
            mean_img.to_filename(mean_filename)
            nilearn.plotting.plot_stat_map(
                mean_img,
                threshold=thresh,
                vmax=vmax,
                display_mode="z",
                colorbar=True,
                title='H%d - cluster %d [%s] (pYes = %0.2f)' %
                (hyp, cl, cluster_colornames[cluster_colors[j + 1]],
                 mean_decision[str(hyp)][str(cl)]),
                cut_coords=cut_coords,
                axes=ax[j])
            log_to_file(logfile, '')
        log_to_file(logfile, '')
        plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                                 'hyp%d_cluster_means.pdf' % hyp),
                    bbox_inches='tight')
        plt.savefig(os.path.join(narps.dirs.dirs['figures'],
                                 'hyp%d_cluster_means.png' % hyp),
                    bbox_inches='tight')
        plt.close(fig)

    # save cluster metadata to data frame
    cluster_metadata_df = cluster_metadata_df.dropna()
    cluster_metadata_df = cluster_metadata_df[~cluster_metadata_df.index.
                                              duplicated(keep='first')]
    cluster_metadata_df.to_csv(
        os.path.join(narps.dirs.dirs['metadata'], 'cluster_metadata_df.csv'))

    # compute clustering similarity across hypotheses
    log_to_file(logfile, 'Computing cluster similarity (Rand score)')
    log_to_file(logfile, 'pairs with adjusted Rand index > %f' % rand_thresh)

    randmtx = numpy.zeros((10, 10))
    for i, j in enumerate(hypnums):
        for k in hypnums[i:]:
            if j == k:
                continue
            randmtx[j, k] = sklearn.metrics.adjusted_rand_score(
                cluster_metadata_df['hyp%d' % j],
                cluster_metadata_df['hyp%d' % k])
            if randmtx[j, k] > rand_thresh:
                log_to_file(logfile, '%d, %d: %f' % (j, k, randmtx[j, k]))

    numpy.savetxt(
        os.path.join(narps.dirs.dirs['output'],
                     'cluster_membership_Rand_indices.csv'), randmtx)

    # are the same teams in the main cluster each time?
    main_cluster_teams = []
    print('index:', cluster_metadata_df.index)
    for i, hyp in enumerate(hypnums):
        # find main cluster
        clusters = cluster_metadata_df.loc[:, 'hyp%d' % hyp]
        clusters.index = cluster_metadata_df.index
        cnt = clusters.value_counts()
        largest_cluster = cnt.index[0]
        main_cluster_teams = main_cluster_teams +\
            clusters[clusters == largest_cluster].index.tolist()
    main_cluster_counts = Counter(main_cluster_teams)
    consistent_teams = [
        m for m in main_cluster_counts if main_cluster_counts[m] == 7
    ]

    log_to_file(
        logfile, 'Number of teams consistently in main cluster: %d' %
        len(consistent_teams))

    return (cluster_metadata_df)
Beispiel #9
0
def mk_correlation_maps_unthresh(narps,
                                 corr_type='spearman',
                                 n_clusters=None,
                                 dataset='zstat',
                                 vox_mask_thresh=1.0):
    """
    Create correlation maps for unthresholded images
    These correlation matrices are clustered using Ward clustering,
    with the number of clusters for each hypotheses determined by
    visual examination.
    vox_mask_thresh controls which voxels are analyzed in terms
    of proportion of teams with signal in voxel.  defaults to 100%
    """
    func_args = inspect.getargvalues(inspect.currentframe()).locals
    func_name = sys._getframe().f_code.co_name
    logfile = os.path.join(narps.dirs.dirs['logs'],
                           'AnalyzeMaps-%s.txt' % func_name)
    log_to_file(logfile, '%s' % func_name, flush=True)
    log_to_file(logfile, stringify_dict(func_args))

    if n_clusters is None:
        n_clusters = {1: 3, 2: 3, 5: 3, 6: 3, 7: 3, 8: 3, 9: 3}

    dendrograms = {}
    membership = {}
    cc_unthresh = {}
    output_dir = narps.dirs.get_output_dir('correlation_unthresh')

    for i, hyp in enumerate(hypnums):
        print('creating correlation map for hypothesis', hyp)
        membership[str(hyp)] = {}
        maskdata, labels = get_concat_data(hyp,
                                           narps.dirs.MNI_mask,
                                           narps.dirs.dirs['output'],
                                           dataset=dataset,
                                           vox_mask_thresh=vox_mask_thresh,
                                           logfile=logfile)

        # compute correlation of all datasets with mean
        if 'mean_corr' not in locals():
            mean_corr = pandas.DataFrame(numpy.zeros(
                (len(labels), len(hypnums))),
                                         columns=['H%d' % i for i in hypnums],
                                         index=labels)
        meandata = numpy.mean(maskdata, 0)
        for t in range(maskdata.shape[0]):
            mean_corr.iloc[t,
                           i] = scipy.stats.spearmanr(maskdata[t, :],
                                                      meandata).correlation

        # cluster datasets
        if corr_type == 'spearman':
            cc = scipy.stats.spearmanr(maskdata.T).correlation
        else:  # use Pearson
            cc = numpy.corrcoef(maskdata)
        cc = numpy.nan_to_num(cc)
        df = pandas.DataFrame(cc, index=labels, columns=labels)
        df.to_csv(
            os.path.join(output_dir,
                         '%s_unthresh_hyp%d.csv' % (corr_type, hyp)))

        ward_linkage = scipy.cluster.hierarchy.ward(cc)

        # add 1 to cluster labels so they start at 1
        # rather than zero - for clarity in paper
        clustlabels = [
            s[0] + 1 for s in scipy.cluster.hierarchy.cut_tree(
                ward_linkage, n_clusters=n_clusters[hyp])
        ]
        print('clustlabels:', clustlabels)
        # get decisions for column colors
        md = narps.metadata.query('varnum==%d' % hyp).set_index('teamID')

        decision_colors = ['r', 'g']
        col_colors = [
            decision_colors[md.loc[teamID, 'Decision']] for teamID in labels
        ]

        row_colors = [cluster_colors[s] for s in clustlabels]
        print('row_colors:', row_colors)
        cm = seaborn.clustermap(df,
                                cmap='vlag',
                                figsize=(16, 16),
                                method='ward',
                                row_colors=row_colors,
                                col_colors=col_colors,
                                center=0,
                                vmin=-1,
                                vmax=1)
        plt.title('H%d:' % hyp + hypotheses_full[hyp])
        cc_unthresh[hyp] = (cc, labels)
        plt.savefig(os.path.join(
            narps.dirs.dirs['figures'],
            'hyp%d_%s_map_unthresh.pdf' % (hyp, corr_type)),
                    bbox_inches='tight')
        plt.savefig(os.path.join(
            narps.dirs.dirs['figures'],
            'hyp%d_%s_map_unthresh.png' % (hyp, corr_type)),
                    bbox_inches='tight')
        plt.close()
        dendrograms[hyp] = ward_linkage

        # get cluster membership
        for j in cm.dendrogram_row.reordered_ind:
            cl = clustlabels[j]
            if str(cl) not in membership[str(hyp)]:
                membership[str(hyp)][str(cl)] = []
            membership[str(hyp)][str(cl)].append(labels[j])

    # save cluster data to file so that we don't have to rerun everything
    with open(
            os.path.join(output_dir,
                         'unthresh_cluster_membership_%s.json' % corr_type),
            'w') as f:
        json.dump(membership, f)

    # also save correlation info
    median_corr = mean_corr.median(1).sort_values()
    median_corr_df = pandas.DataFrame(median_corr, columns=['median_corr'])
    median_corr_df.to_csv(
        os.path.join(narps.dirs.dirs['metadata'], 'median_pattern_corr.csv'))

    log_to_file(
        logfile, 'median correlation between teams: %f' %
        numpy.median(cc[numpy.triu_indices_from(cc, 1)]))

    return ((dendrograms, membership))
Beispiel #10
0
    def convert_to_zscores(self, map_metadata_file=None, overwrite=None):
        """
        convert rectified images to z scores
        - unthresholded images could be either t or z images
        - if they are already z then just copy
        - use metadata supplied by teams to determine image type
        """
        log_to_file(self.dirs.logfile,
                    '\n\n%s' % sys._getframe().f_code.co_name)
        func_args = inspect.getargvalues(inspect.currentframe()).locals
        log_to_file(self.dirs.logfile, stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite
        if map_metadata_file is None:
            map_metadata_file = os.path.join(
                self.dirs.dirs['orig'], 'narps_neurovault_images_details.csv')
        unthresh_stat_type = get_map_metadata(map_metadata_file)
        metadata = get_metadata(self.metadata_file)

        n_participants = metadata[['n_participants', 'NV_collection_string']]

        n_participants.index = metadata.teamID

        unthresh_stat_type = unthresh_stat_type.merge(n_participants,
                                                      left_index=True,
                                                      right_index=True)

        for teamID in self.complete_image_sets:
            if teamID not in unthresh_stat_type.index:
                print('no map metadata for', teamID)
                continue
            # this is a bit of a kludge
            # since some contrasts include all subjects
            # but others only include some
            # we don't have the number of participants in each
            # group so we just use the entire number
            n = unthresh_stat_type.loc[teamID, 'n_participants']

            for hyp in range(1, 10):
                infile = self.teams[teamID].images['unthresh']['rectified'][
                    hyp]
                if not os.path.exists(infile):
                    print('skipping', infile)
                    continue
                self.teams[teamID].images['unthresh']['zstat'][
                    hyp] = os.path.join(self.dirs.dirs['zstat'],
                                        self.teams[teamID].datadir_label,
                                        'hypo%d_unthresh.nii.gz' % hyp)
                if not overwrite and os.path.exists(
                        self.teams[teamID].images['unthresh']['zstat'][hyp]):
                    continue

                if unthresh_stat_type.loc[teamID,
                                          'unthresh_type'].lower() == 't':
                    if not os.path.exists(
                            os.path.dirname(self.teams[teamID].
                                            images['unthresh']['zstat'][hyp])):
                        os.mkdir(
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                    print("converting %s (hyp %d) to z - %d participants" %
                          (teamID, hyp, n))
                    TtoZ(infile,
                         self.teams[teamID].images['unthresh']['zstat'][hyp],
                         n - 1)
                elif unthresh_stat_type.loc[teamID, 'unthresh_type'] == 'z':
                    if not os.path.exists(
                            os.path.dirname(self.teams[teamID].
                                            images['unthresh']['zstat'][hyp])):
                        os.mkdir(
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                    if not os.path.exists(self.teams[teamID].images['unthresh']
                                          ['zstat'][hyp]):
                        print('copying', teamID)
                        shutil.copy(
                            infile,
                            os.path.dirname(
                                self.teams[teamID].images['unthresh']['zstat']
                                [hyp]))
                else:
                    # if it's not T or Z then we skip it as it's not usable
                    print('skipping %s - other data type' % teamID)
Beispiel #11
0
    def create_rectified_images(self, map_metadata_file=None, overwrite=None):
        """
        create rectified images
        - contrasts 5 and 6 were negative contrasts
        some teams uploaded images where negative values
        provided evidence in favor of the contrast
        using metadata provided by teams, we identify these
        images and flip their valence so that all maps
        present positive evidence for each contrast
        """
        log_to_file(self.dirs.logfile,
                    '\n\n%s' % sys._getframe().f_code.co_name)
        func_args = inspect.getargvalues(inspect.currentframe()).locals
        log_to_file(self.dirs.logfile, stringify_dict(func_args))
        if map_metadata_file is None:
            map_metadata_file = os.path.join(
                self.dirs.dirs['orig'], 'narps_neurovault_images_details.csv')
        map_metadata = get_map_metadata(map_metadata_file)
        if overwrite is None:
            overwrite = self.overwrite
        for teamID in self.complete_image_sets:
            for hyp in range(1, 10):
                if hyp in [5, 6]:
                    mdstring = map_metadata.query('teamID == "%s"' %
                                                  teamID)['hyp%d_direction' %
                                                          hyp].iloc[0]
                    rectify = mdstring.split()[0] == 'Negative'
                elif hyp == 9:
                    # manual fix for one team with reversed maps
                    if teamID in ['R7D1']:
                        mdstring = map_metadata.query(
                            'teamID == "%s"' % teamID)['hyp%d_direction' %
                                                       hyp].iloc[0]
                        rectify = True
                else:  # just copy the other hypotheses directly
                    rectify = False

                # load data from unthresh map within
                # positive voxels of thresholded mask
                unthresh_file = self.teams[teamID].images['unthresh'][
                    'resampled'][hyp]

                self.teams[teamID].images['unthresh']['rectified'][
                    hyp] = os.path.join(self.dirs.dirs['rectified'],
                                        self.teams[teamID].datadir_label,
                                        'hypo%d_unthresh.nii.gz' % hyp)

                if not os.path.exists(
                        os.path.dirname(self.teams[teamID].images['unthresh']
                                        ['rectified'][hyp])):
                    os.mkdir(
                        os.path.dirname(self.teams[teamID].images['unthresh']
                                        ['rectified'][hyp]))

                if overwrite or not os.path.exists(
                        self.teams[teamID].images['unthresh']['rectified']
                    [hyp]):
                    # if values were flipped for negative contrasts
                    if rectify:
                        print('rectifying hyp', hyp, 'for', teamID)
                        print(mdstring)
                        print('')
                        img = nibabel.load(unthresh_file)
                        img_rectified = nilearn.image.math_img('img*-1',
                                                               img=img)
                        img_rectified.to_filename(
                            self.teams[teamID].images['unthresh']['rectified']
                            [hyp])
                        self.rectified_list.append((teamID, hyp))
                    else:  # just copy original
                        shutil.copy(
                            unthresh_file,
                            self.teams[teamID].images['unthresh']['rectified']
                            [hyp])
        # write list of rectified teams to disk
        if len(self.rectified_list) > 0:
            with open(
                    os.path.join(self.dirs.dirs['metadata'],
                                 'rectified_images_list.txt'), 'w') as f:
                for l in self.rectified_list:
                    f.write('%s\t%s\n' % (l[0], l[1]))
Beispiel #12
0
    def create_rectified_images(self, map_metadata_file=None, overwrite=None):
        """
        create rectified images
        - contrasts 5 and 6 were negative contrasts
        some teams uploaded images where negative values
        provided evidence in favor of the contrast
        using metadata provided by teams, we identify these
        images and flip their valence so that all maps
        present positive evidence for each contrast
        """
        log_to_file(self.dirs.logfile,
                    sys._getframe().f_code.co_name,
                    headspace=2)
        func_args = inspect.getargvalues(inspect.currentframe()).locals
        log_to_file(self.dirs.logfile, stringify_dict(func_args))

        if overwrite is None:
            overwrite = self.overwrite
        for teamID in self.complete_image_sets['unthresh']:
            if not hasattr(self.teams[teamID], 'rectify'):
                print('no rectification data for %s, skipping' % teamID)
                continue
            for hyp in range(1, 10):
                if hyp not in self.teams[teamID].rectify:
                    print('no rectification data for %s hyp%d, skipping' %
                          (teamID, hyp))
                    continue
                rectify = self.teams[teamID].rectify[hyp]
                # load data from unthresh map within
                # positive voxels of thresholded mask
                unthresh_file = self.teams[teamID].images['unthresh'][
                    'resampled'][hyp]

                self.teams[teamID].images['unthresh']['rectified'][
                    hyp] = os.path.join(self.dirs.dirs['rectified'],
                                        self.teams[teamID].datadir_label,
                                        'hypo%d_unthresh.nii.gz' % hyp)

                if not os.path.exists(
                        os.path.dirname(self.teams[teamID].images['unthresh']
                                        ['rectified'][hyp])):
                    os.mkdir(
                        os.path.dirname(self.teams[teamID].images['unthresh']
                                        ['rectified'][hyp]))

                if overwrite or not os.path.exists(
                        self.teams[teamID].images['unthresh']['rectified']
                    [hyp]):
                    # if values were flipped for negative contrasts
                    if rectify:
                        print('rectifying hyp', hyp, 'for', teamID)
                        img = nibabel.load(unthresh_file)
                        img_rectified = nilearn.image.math_img('img*-1',
                                                               img=img)
                        img_rectified.to_filename(
                            self.teams[teamID].images['unthresh']['rectified']
                            [hyp])
                        self.rectified_list.append((teamID, hyp))
                    else:  # just copy original
                        shutil.copy(
                            unthresh_file,
                            self.teams[teamID].images['unthresh']['rectified']
                            [hyp])
        # write list of rectified teams to disk
        if len(self.rectified_list) > 0:
            with open(
                    os.path.join(self.dirs.dirs['metadata'],
                                 'rectified_images_list.txt'), 'w') as f:
                for l in self.rectified_list:
                    f.write('%s\t%s%s' % (l[0], l[1], os.linesep))
Beispiel #13
0
    def create_concat_images(self,
                             datatype='resampled',
                             create_voxel_map=False,
                             imgtypes=None,
                             overwrite=None):
        """
        create images concatenated across teams
        ordered by self.complete_image_sets
        create_voxel_map: will create a map showing
        proportion of nonzero teams at each voxel
        """
        log_to_file(self.dirs.logfile,
                    sys._getframe().f_code.co_name,
                    headspace=2)
        func_args = inspect.getargvalues(inspect.currentframe()).locals
        log_to_file(self.dirs.logfile, stringify_dict(func_args))

        if imgtypes is None:
            imgtypes = ['thresh', 'unthresh']
        if overwrite is None:
            overwrite = self.overwrite
        for imgtype in imgtypes:
            concat_dir = self.dirs.get_output_dir('%s_concat_%s' %
                                                  (imgtype, datatype))
            for hyp in range(1, 10):
                outfile = os.path.join(concat_dir, 'hypo%d.nii.gz' % hyp)
                if self.verbose:
                    print(outfile)
                if not os.path.exists(outfile) or overwrite:
                    if self.verbose:
                        print('%s - hypo %d: creating concat file' %
                              (imgtype, hyp))
                    concat_teams = [
                        teamID for teamID in self.complete_image_sets[imgtype]
                        if os.path.exists(self.teams[teamID].images[imgtype]
                                          [datatype][hyp])
                    ]
                    self.all_maps[imgtype][datatype] = [
                        self.teams[teamID].images[imgtype][datatype][hyp]
                        for teamID in concat_teams
                    ]

                    # use nilearn NiftiMasker to load data
                    # and save to a new file
                    masker = nilearn.input_data.NiftiMasker(
                        mask_img=self.dirs.MNI_mask)
                    concat_data = masker.fit_transform(
                        self.all_maps[imgtype][datatype])
                    concat_img = masker.inverse_transform(concat_data)
                    concat_img.to_filename(outfile)
                    if create_voxel_map:
                        concat_data = nibabel.load(outfile).get_data()
                        voxel_map = numpy.mean(
                            numpy.abs(concat_data) > 1e-6, 3)
                        voxel_img = nibabel.Nifti1Image(
                            voxel_map, affine=concat_img.affine)
                        mapfile = outfile.replace('.nii.gz',
                                                  '_voxelmap.nii.gz')
                        assert mapfile != outfile
                        voxel_img.to_filename(mapfile)

                    # save team ID and files to a label file for provenance
                    labelfile = outfile.replace('.nii.gz', '.labels')
                    with open(labelfile, 'w') as f:
                        for i, team in enumerate(concat_teams):
                            f.write('%s\t%s%s' %
                                    (team, self.all_maps[imgtype][datatype][i],
                                     os.linesep))
                else:
                    if self.verbose:
                        print('%s - hypo %d: using existing file' %
                              (imgtype, hyp))
        return (self.all_maps)