def show_large_buckets(self, sizes, bucketkeys, use_spatial_code=False): from brainsearch import vizu indices = np.argsort(sizes)[::-1] #indices = range(len(sizes)) #all_distances = [] means = [] stds = [] nb_samples = 1000 rng = np.random.RandomState(42) for idx in indices: print "{:,} neighbors".format(sizes[idx]) patches = self.engine.storage.retrieve([bucketkeys[idx]], attribute=self.metadata['patch'])[0] labels = self.engine.storage.retrieve([bucketkeys[idx]], attribute=self.metadata['label'])[0] energies = np.sqrt(np.sum(patches**2, axis=tuple(range(1, patches.ndim)))) indices = rng.randint(0, len(patches), 2*nb_samples) distances = np.sqrt(np.sum((patches[indices[1::2]] - patches[indices[::2]])**2, axis=tuple(range(1, patches.ndim)))) #all_distances.append(distances) means.append(np.mean(distances)) stds.append(np.std(distances)) print means[-1] print stds[-1] #import pylab as plt #plt.hist(distances, bins=100) #plt.show() #print "0:{:,}, 1:{:,}".format(*np.bincount(labels.flatten())) #import pylab as plt #plt.hist(energies, bins=100) #plt.show() from ipdb import set_trace as dbg dbg() #vizu.show_images3d(patches, shape=self.metadata['patch'].shape, blocking=True) import pylab as plt plt.plot(means) plt.plot(stds) #plt.figure() #plt.hist(all_distances, bins=100) plt.show()
def execute(self, status): try: from ipdb import set_trace as dbg except ImportError: from pdb import set_trace as dbg dbg()
def execute(self, status): from ipdb import set_trace as dbg dbg()
def main(brain_manager=None): parser = buildArgsParser() args = parser.parse_args() # Build processing pipeline pipeline = BrainPipelineProcessing() if args.do_normalization: pipeline.add(BrainNormalization(type=0)) if args.resampling_factor > 1: pipeline.add(BrainResampling(args.resampling_factor)) #controls = defaultdict(lambda: []) #parkinsons = defaultdict(lambda: []) mean_controls = None mean_parkinsons = None nb_controls = 0 nb_parkinsons = 0 dtype = np.float32 with Timer("Computing mean of samples"): for config in args.configs: config = json.load(open(config)) brain_data = brain_data_factory(config, pipeline=pipeline) for brain in brain_data: if mean_controls is None and mean_parkinsons is None: mean_controls = np.zeros_like(brain.image, dtype=dtype) mean_parkinsons = np.zeros_like(brain.image, dtype=dtype) if brain.image.shape != mean_controls.shape or brain.image.shape != mean_parkinsons.shape: print "Oups shapes not the same!" from ipdb import set_trace as dbg dbg() with Timer("Processing {}".format(brain.name)): if brain.label == 0: nb_controls += 1 mean_controls += brain.image elif brain.label == 1: nb_parkinsons += 1 mean_parkinsons += brain.image else: print "Unknown brain label: {}".format(brain.label) mean_controls /= nb_controls mean_parkinsons /= nb_parkinsons std_controls = np.zeros_like(mean_controls, dtype=dtype) std_parkinsons = np.zeros_like(mean_parkinsons, dtype=dtype) with Timer("Computing standard deviation of samples"): for config in args.configs: config = json.load(open(config)) brain_data = brain_data_factory(config, pipeline=pipeline) for brain in brain_data: with Timer("Processing {}".format(brain.name)): if brain.label == 0: std_controls += (brain.image - mean_controls)**2 elif brain.label == 1: std_parkinsons += (brain.image - mean_parkinsons)**2 std_controls = np.sqrt(std_controls / (nb_controls-1)) std_parkinsons = np.sqrt(std_parkinsons / (nb_parkinsons-1)) s1 = std_controls n1 = nb_controls s2 = std_parkinsons n2 = nb_parkinsons # Compute the test statistic t stderror = np.sqrt((s1**2/n1) + (s2**2/n2)) # The Null hypothesis : mu1 - mu2 = 0 tmap = ((mean_parkinsons-mean_controls) - 0) / stderror tmap[stderror == 0] = 0 # Empty voxels # Compute p-value DF_numerator = (s1**2/n1 + s2**2/n2)**2 DF_devisor = ((s1**2/n1)**2/(n1-1)) + ((s2**2/n2)**2/(n2-1)) DF = DF_numerator // DF_devisor DF[DF_devisor == 0] = 0 # Empty voxels import scipy.stats as stat pmap = 2 * stat.t.cdf(-abs(tmap), DF) # Two-tailed test, take twice the lower tail. pmap[np.isnan(pmap)] = 1 # Empty voxels save_nifti(tmap, brain.infos['affine'], 'tmap.nii.gz') save_nifti(pmap, brain.infos['affine'], 'pmap.nii.gz') save_nifti(1-pmap, brain.infos['affine'], 'inv_pmap.nii.gz')
def create_map(brain_manager, name, brain_data, K=100, threshold=np.inf, min_nonempty=0, spatial_weight=0., use_dist=False): brain_db = brain_manager[name.strip("/").split("/")[-1]] if brain_db is None: raise ValueError("Unexisting brain database: " + name) patch_shape = brain_db.metadata['patch'].shape brain_db.engine.distance = nearpy.distances.EuclideanDistance(brain_db.metadata['patch']) #brain_db.engine.distance = nearpy.distances.CorrelationDistance(brain_db.metadata['patch']) # TODO: find how to compute a good threshood :/ ?!? #brain_db.engine.filters = [DistanceThresholdFilter(threshold), NearestFilter(K)] brain_db.engine.filters = [NearestFilter(K)] half_patch_size = np.array(patch_shape) // 2 print "Found {} brains to map".format(len(brain_data)) for i, brain in enumerate(brain_data): print "Mapping {}...".format(brain.name) brain_patches = brain.extract_patches(patch_shape, min_nonempty=min_nonempty) vectors = brain_patches.create_vectors(spatial_weight=spatial_weight) # Position of extracted patches represent to top left corner. center_positions = brain_patches.positions + half_patch_size nids = -1 * np.ones((len(brain_patches), K), dtype=np.int32) nlabels = -1 * np.ones((len(brain_patches), K), dtype=np.uint8) ndists = np.nan * np.ones((len(brain_patches), K), dtype=np.float32) #npositions = -1 * np.ones((len(brain_patches), K, 3), dtype=np.uint16) #npatches = -1 * np.ones((len(brain_patches), K, int(np.prod(patch_shape))), dtype=np.float32) start_brain = time.time() for patch_id, neighbors in brain_db.get_neighbors(vectors, brain_patches.patches, attributes=["id", "label"]): nlabels[patch_id, :len(neighbors['label'])] = neighbors['label'].flatten() nids[patch_id, :len(neighbors['id'])] = neighbors['id'].flatten() ndists[patch_id, :len(neighbors['dist'])] = neighbors['dist'].flatten() #npositions[patch_id, :len(neighbors['position']), :] = neighbors['position'] #npatches[patch_id, :len(neighbors['patch']), :] = neighbors['patch'].reshape((-1, int(np.prod(patch_shape)))) # if np.all(center_positions[patch_id] == (84, 114, 115)): # print patch_id # from ipdb import set_trace; set_trace() print "{4}. Brain #{0} ({3:,} patches) found {1:,} neighbors in {2:.2f} sec.".format(brain.id, np.sum(nlabels != -1), time.time()-start_brain, len(brain_patches), i) print "Patches with no neighbors: {:,}".format(np.all(nlabels == -1, axis=1).sum()) ## Generate map of p-values ## # Use leave-one-out strategy, i.e. do not use neighbors patches coming from the query brain. control = np.sum(np.logical_and(nlabels == 0, nids != brain.id), axis=1) parkinson = np.sum(np.logical_and(nlabels == 1, nids != brain.id), axis=1) if use_dist: # Weight the proportion by the distance of the query patch from neighbors patch nsimilarities = np.exp(-ndists) # Min-max normalize nsimilarities -= np.nanmin(nsimilarities, axis=1, keepdims=True) nsimilarities /= np.nanmax(nsimilarities, axis=1, keepdims=True) control = np.nansum(nsimilarities * np.logical_and(nlabels == 0, nids != brain.id), axis=1) parkinson = np.nansum(nsimilarities * np.logical_and(nlabels == 1, nids != brain.id), axis=1) control = np.nan_to_num(control) parkinson = np.nan_to_num(parkinson) # control = np.sum(np.exp(-ndists) * np.logical_and(nlabels == 0, nids != brain.id), axis=1) # parkinson = np.sum(np.exp(-ndists) * np.logical_and(nlabels == 1, nids != brain.id), axis=1) # control = np.sum((1-ndists) * np.logical_and(nlabels == 0, nids != brain.id), axis=1) # parkinson = np.sum((1-ndists) * np.logical_and(nlabels == 1, nids != brain.id), axis=1) P0 = brain_db.label_proportions()[1] # Hypothesized population proportion p = parkinson / (parkinson+control) # sample proportion p[np.isnan(p)] = P0 n = np.sum(nlabels != -1, axis=1) # sample size z_statistic, pvalue = two_tailed_test_of_population_proportion(P0, p, n) #prop = np.zeros_like(brain.image, dtype=np.float32) #prop[zip(*center_positions)] = p zmap = np.zeros_like(brain.image, dtype=np.float32) zmap_smooth = np.zeros_like(brain.image, dtype=np.float32) pmap = np.ones_like(brain.image, dtype=np.float32) counts = np.zeros_like(brain.image, dtype=np.float32) # Patches composite z-scores # see https://en.wikipedia.org/wiki/Fisher%27s_method#Relation_to_Stouffer.27s_Z-score_method for z in range(patch_shape[2]): for y in range(patch_shape[1]): for x in range(patch_shape[0]): pos = brain_patches.positions + np.array((x, y, z)) zmap_smooth[zip(*pos)] += z_statistic * np.sqrt(n) counts[zip(*pos)] += n #zmap_smooth[zip(*center_positions)] /= np.sqrt(counts2[zip(*center_positions)]) zmap_smooth /= np.sqrt(counts) #zmap_smooth[zip(*center_positions)] /= np.sqrt(np.prod(patch_shape)) zmap_smooth[np.isnan(zmap_smooth)] = 0. zmap[zip(*center_positions)] = z_statistic zmap[np.isnan(zmap)] = 0. import scipy.stats as stat pmap = 2 * stat.norm.cdf(-abs(zmap_smooth)) # Two-tailed test, take twice the lower tail. pmap[np.isnan(pmap)] = 1. #pmap[zip(*center_positions)] = pvalue #pmap[np.isnan(pmap)] = 1. counts = np.zeros_like(brain.image, dtype=np.float32) counts[zip(*center_positions)] = n results_folder = pjoin('.', 'results', brain_db.name, brain_data.name) if use_dist: results_folder = pjoin('.', 'results', brain_db.name, brain_data.name, "distance_weighting") if not os.path.isdir(results_folder): os.makedirs(results_folder) save_nifti(brain.image, brain.infos['affine'], pjoin(results_folder, "{}.nii.gz".format(brain.name))) #save_nifti(prop, brain.infos['affine'], pjoin(results_folder, "{}_prop.nii.gz".format(brain.name))) save_nifti(pmap, brain.infos['affine'], pjoin(results_folder, "{}_pmap.nii.gz".format(brain.name))) #save_nifti(1-pmap, brain.infos['affine'], pjoin(results_folder, "{}_pmap_inv.nii.gz".format(brain.name))) save_nifti(zmap, brain.infos['affine'], pjoin(results_folder, "{}_zmap.nii.gz".format(brain.name))) save_nifti(zmap_smooth, brain.infos['affine'], pjoin(results_folder, "{}_zmap_smooth.nii.gz".format(brain.name))) save_nifti(counts, brain.infos['affine'], pjoin(results_folder, "{}_count.nii.gz".format(brain.name))) #np.savez(pjoin(results_folder, name), dists=ndists, labels=nlabels, ids=nids, positions=npositions, voxels_positions=center_positions) from ipdb import set_trace as dbg dbg()