def get_thresh_similarity(narps, dataset='resampled'): """ For each pair of thresholded images, compute the similarity of the thresholded/binarized maps using the Jaccard coefficient. Computation with zeros per https://stackoverflow.com/questions/37003272/how-to-compute-jaccard-similarity-from-a-pandas-dataframe # noqa also add computation of jaccard on only nonzero pairs (ala scipy) """ func_args = inspect.getargvalues(inspect.currentframe()).locals func_name = sys._getframe().f_code.co_name logfile = os.path.join( narps.dirs.dirs['logs'], '%s-%s.txt' % (sys.argv[0].split('.')[0], func_name)) log_to_file(logfile, '%s' % func_name, flush=True) log_to_file(logfile, stringify_dict(func_args)) output_dir = os.path.join(narps.dirs.dirs['output'], 'jaccard_thresh') if not os.path.exists(output_dir): os.mkdir(output_dir) for hyp in hypnums: print('creating Jaccard map for hypothesis', hyp) maskdata, labels = get_masked_data(hyp, narps.dirs.MNI_mask, narps.dirs.dirs['output'], imgtype='thresh', dataset=dataset) jacsim = 1 - pairwise_distances(maskdata, metric="hamming") jacsim_nonzero = 1 - squareform(pdist(maskdata, 'jaccard')) df = pandas.DataFrame(jacsim, index=labels, columns=labels) df.to_csv(os.path.join(output_dir, 'jacsim_thresh_hyp%d.csv' % hyp)) df_nonzero = pandas.DataFrame(jacsim_nonzero, index=labels, columns=labels) df_nonzero.to_csv( os.path.join(output_dir, 'jacsim_nonzero_thresh_hyp%d.csv' % hyp)) seaborn.clustermap(df, cmap='jet', figsize=(16, 16), method='ward') plt.title(hypotheses[hyp]) plt.savefig( os.path.join(narps.dirs.dirs['figures'], 'hyp%d_jaccard_map_thresh.pdf' % hyp)) plt.close() seaborn.clustermap(df_nonzero, cmap='jet', figsize=(16, 16), method='ward') plt.title(hypotheses[hyp]) plt.savefig( os.path.join(narps.dirs.dirs['figures'], 'hyp%d_jaccard_nonzero_map_thresh.pdf' % hyp)) plt.close()
def create_unthresh_histograms(narps, overwrite=True): """ ` Create histograms for in-mask values in unthresholded images These are only created for the images that were successfully registered and rectified. """ figdir = os.path.join(narps.dirs.dirs['figures'], 'unthresh_histograms') if not os.path.exists(figdir): os.mkdir(figdir) for hyp in hypnums: outfile = os.path.join(figdir, 'hyp%d_unthresh_histogram.pdf' % hyp) if not os.path.exists(outfile) or overwrite: print('making figure for hyp', hyp) unthresh_data, labels = get_masked_data(hyp, narps.dirs.MNI_mask, narps.dirs.dirs['output'], imgtype='unthresh', dataset='rectified') fig, ax = plt.subplots(int(numpy.ceil(len(labels) / 3)), 3, figsize=(16, 50)) # make three columns - these are row and column counters ctr_x = 0 ctr_y = 0 for i, l in enumerate(labels): ax[ctr_x, ctr_y].hist(unthresh_data[i, :], 100) ax[ctr_x, ctr_y].set_title(l) ctr_y += 1 if ctr_y > 2: ctr_y = 0 ctr_x += 1 plt.tight_layout() plt.savefig(outfile) plt.close(fig)
def mk_correlation_maps_unthresh(narps, corr_type='spearman', n_clusters=None, dataset='zstat'): """ Create correlation maps for unthresholded images These correlation matrices are clustered using Ward clustering, with the number of clusters for each hypotheses determined by visual examination. """ func_args = inspect.getargvalues(inspect.currentframe()).locals func_name = sys._getframe().f_code.co_name logfile = os.path.join(narps.dirs.dirs['logs'], 'AnalyzeMaps-%s.txt' % func_name) log_to_file(logfile, '%s' % func_name, flush=True) log_to_file(logfile, stringify_dict(func_args)) if n_clusters is None: n_clusters = {1: 4, 2: 3, 5: 4, 6: 3, 7: 4, 8: 4, 9: 3} dendrograms = {} membership = {} cc_unthresh = {} output_dir = os.path.join(narps.dirs.dirs['output'], 'correlation_unthresh') if not os.path.exists(output_dir): os.mkdir(output_dir) for i, hyp in enumerate(hypnums): print('creating correlation map for hypothesis', hyp) membership[str(hyp)] = {} maskdata, labels = get_masked_data(hyp, narps.dirs.MNI_mask, narps.dirs.dirs['output'], dataset=dataset) # compute correlation of all datasets with mean if 'mean_corr' not in locals(): mean_corr = pandas.DataFrame( numpy.zeros((len(labels), len(hypnums))), columns=['hyp%d' % i for i in hypnums], index=labels) meandata = numpy.mean(maskdata, 0) for t in range(maskdata.shape[0]): mean_corr.iloc[t, i] = scipy.stats.spearmanr(maskdata[t, :], meandata).correlation # cluster datasets if corr_type == 'spearman': cc = scipy.stats.spearmanr(maskdata.T).correlation else: # use Pearson cc = numpy.corrcoef(maskdata) cc = numpy.nan_to_num(cc) df = pandas.DataFrame(cc, index=labels, columns=labels) df.to_csv( os.path.join(output_dir, '%s_unthresh_hyp%d.csv' % (corr_type, hyp))) ward_linkage = scipy.cluster.hierarchy.ward(cc) clustlabels = [ s[0] for s in scipy.cluster.hierarchy.cut_tree( ward_linkage, n_clusters=n_clusters[hyp]) ] # get decisions for column colors md = narps.metadata.query('varnum==%d' % hyp).set_index('teamID') col_colors = [ cluster_colors[md.loc[teamID, 'Decision']] for teamID in labels ] row_colors = [cluster_colors[s - 1] for s in clustlabels] cm = seaborn.clustermap(df, cmap='vlag', figsize=(16, 16), method='ward', row_colors=row_colors, col_colors=col_colors, center=0, vmin=-1, vmax=1) plt.title('hyp %d:' % hyp + hypotheses[hyp]) cc_unthresh[hyp] = (cc, labels) plt.savefig( os.path.join(narps.dirs.dirs['figures'], 'hyp%d_%s_map_unthresh.pdf' % (hyp, corr_type))) plt.close() dendrograms[hyp] = ward_linkage # get cluster membership for j in cm.dendrogram_row.reordered_ind: cl = clustlabels[j] if str(cl) not in membership[str(hyp)]: membership[str(hyp)][str(cl)] = [] membership[str(hyp)][str(cl)].append(labels[j]) # save cluster data to file so that we don't have to rerun everything with open( os.path.join(output_dir, 'unthresh_cluster_membership_%s.json' % corr_type), 'w') as f: json.dump(membership, f) # also save correlation info median_distance = mean_corr.median(1).sort_values() median_distance_df = pandas.DataFrame(median_distance, columns=['median_distance']) median_distance_df.to_csv( os.path.join(narps.dirs.dirs['metadata'], 'median_pattern_distance.csv')) log_to_file( logfile, 'median correlation between teams: %f' % numpy.median(cc[numpy.triu_indices_from(cc, 1)])) return ((dendrograms, membership))