def set_experiment_info(model, dataset, feature_dict): if isinstance(dataset, TransformerDataset): transformer = dataset.transformer dataset = dataset.raw logger.info("Found transformer of type %s. Returning (TODO)" % type(transformer)) return else: transformer = None logger.info("Finding experiment related analysis for model of type %r and " "dataset %s" % (type(model), dataset.dataset_name)) activations = get_activations(model, dataset) if dataset.dataset_name in dataset_info.sz_datasets: ttests = get_sz_info(dataset, activations) for feature in feature_dict: feature_dict[feature]["sz_t"] = ttests[feature_dict[feature] ["real_id"]][0] if dataset.dataset_name in dataset_info.aod_datasets: target_ttests, novel_ttests = get_aod_info(dataset, activations) for feature in feature_dict: i = feature_dict[feature]["real_id"] feature_dict[feature]["tg_t"] = target_ttests[i][0] feature_dict[feature]["nv_t"] = novel_ttests[i][0]
def get_nifti(dataset, features, out_file=None, split_files=False, base_nifti=None): """ Function to get nifti image and save nifti files. Parameters ---------- dataset: MRI class. A dataset of the MRI class for processing the nifti from. Must implement get_nifti. features: array-like. Features for nifti processing. out_file: str, optional. Output file for nifti image. Returns ------- nifti: nipy image. """ logger.info("Getting nifti for dataset of type %r and %d features." % (type(dataset), features.shape[0])) if not isinstance(dataset, MRI): raise ValueError("Dataset type is %r and not an instance of %r" % (type(dataset), MRI)) weights_view = dataset.get_weights_view(features) image = dataset.get_nifti(weights_view, base_nifti=base_nifti) if out_file is not None: nipy.save_image(image, out_file + ".gz") return image
def read_niftis(file_lists): """ Read niftis. Parameters ---------- file_lists: list of list of paths. Each list of file paths is a unique class. Returns ------- data, labels: tuple of array-like and list The data and corresponding labels """ data0 = load_image(file_lists[0][0]).get_data() if len(data0.shape) == 3: x, y, z = data0.shape t = 1 elif len(data0.shape) == 4: x, y, z, t = data0.shape else: raise ValueError("Cannot parse data with dimensions %r" % data0.shape) dt = (sum(len(fl) for fl in file_lists)) * t data = np.zeros((dt, x, y, z)) labels = [[i] * (len(fl) * t) for i, fl in enumerate(file_lists)] labels = [item for sublist in labels for item in sublist] for i, fl in enumerate(file_lists): assert len([j for j in labels if j == i]) == len(fl) * t flattened_list = [item for sublist in file_lists for item in sublist] for i, f in enumerate(flattened_list): logger.info("Loading subject from file: %s%s" % (f, '' * 30)) nifti = load_image(f) subject_data = nifti.get_data() if len(subject_data.shape) == 3: data[i] = subject_data elif len(subject_data.shape) == 4: data[i * t: (i + 1) * t] = subject_data.transpose((3, 0, 1, 2)) else: raise ValueError("Cannot parse subject data with dimensions %r" % subject_data.shape) logger.info("\rLoading subject from file: %s\n" % ('DONE' + " "*30)) if data.shape[0] != len(labels): raise ValueError("Data and labels have different number of samples.") base_file = flattened_list[0] # Use nibabel in case we need to convert from 4d to 3d base = nib.load(base_file) if len(base.shape) == 4: base = nib.four_to_three(base)[0] return data, labels, base
def is_simTBdir(source_directory): """ Returns True is source_directory fits the criteria of being a simTB source directory. """ nifti_files = natural_sort(glob(path.join(source_directory, "*_DATA.nii"))) sim_files = natural_sort(glob(path.join(source_directory, "*_SIM.mat"))) if (len(nifti_files) == len(sim_files)) and len(nifti_files) > 0: logger.info("simTB directory detected") return True return False
def save_simtb_montage(dataset, features, out_file, feature_dict, target_stat=None, target_value=None): """ Saves a simtb montage. """ logger.info("Saving simtb montage") weights_view = dataset.get_weights_view(features) simtb_viewer.montage(weights_view, out_file=out_file, feature_dict=feature_dict, target_stat=target_stat, target_value=target_value)
def save_images(nifti_files, anat, roi_dict, out_dir, **kwargs): logger.info("Saving images to %s" % out_dir) p = mp.Pool(30) idx = [int(f.split("/")[-1].split(".")[0]) for f in nifti_files] args_iter = itertools.izip(nifti_files, itertools.repeat(anat), [roi_dict[i] for i in idx], [path.join(out_dir, "%d.png" % i) for i in idx], idx) p.map(save_helper, args_iter) p.close() p.join()
def save_mask(data, out_dir): """ Find and save maks of data. """ logger.info("Finding mask.") mask_path = path.join(out_dir, "mask.npy") m, r, c, d = data.shape mask = np.zeros((r, c, d)) zero_freq = (data.reshape((m, r * c * d)) == 0).sum(1) * 1 / reduce( lambda x, y: x * y, data.shape[1:4]) if zero_freq.mean() > 0.2: logger.info("Masked data found, deriving zeros from data zeros.") for freq in zero_freq: assert isinstance(freq, float), freq if abs(zero_freq.mean() - freq) > .05: raise ValueError("Spurious datapoint, mean zeros frequency is" "%.2f, datapoint is %.2f" % (zero_freq.mean(), freq)) mask[np.where(np.invert((data < 0.07).sum(0) > .01 * data.shape[0]))] = 1 else: logger.info("Deriving mask from mean image.") mask[np.where(data.mean(axis=0) > data.mean())] = 1 logger.info("Masked out %d out of %d voxels" % ((mask == 0).sum(), reduce( lambda x, y: x * y, mask.shape))) np.save(mask_path, mask) return mask
def save_simtb_mask(data, out_dir): """ Saves a simtb mask (circle) """ logger.info("Making simtb mask") m, r, c, d = data.shape assert r == c mask = np.zeros((r, c, d)) i0 = j0 = .5 * r for i in range(r): for j in range(c): if (i - i0)**2 + (j - j0)**2 <= r**2: mask[i, j] = 1 return mask
def compare_models(feature_dict): """ Inter-model comparison. """ feature_list = [(name, features) for name, features in feature_dict.iteritems()] for i in range(len(feature_list)): for j in range(i + 1, len(feature_list)): logger.info("Analyzing %s compared to %s" % (feature_list[i][0], feature_list[j][0])) indices = fe.match_parameters(feature_list[i][1].spatial_maps, feature_list[j][1].spatial_maps) for pi, qi in indices: feature_list[i][1].f[pi].match_indices[feature_list[j][0]] = qi
def save_simtb_spatial_maps(dataset, features, out_path): """ Saves a series of simtb images. """ logger.info("Saving simtb images for model %s" % features.name) spatial_maps = features.spatial_maps if len(spatial_maps.shape) != 4: spatial_maps = dataset.get_weights_view(spatial_maps) features = [v for v in features.f.values()] out_files = [path.join(out_path, "%d.png" % feature.id) for feature in features] p = mp.Pool(30) args_iter = itertools.izip(spatial_maps, out_files) p.map(save_helper, args_iter) p.close() p.join()
def save_simtb_spatial_maps(dataset, features, out_path): """ Saves a series of simtb images. """ logger.info("Saving simtb images for model %s" % features.name) spatial_maps = features.spatial_maps if len(spatial_maps.shape) != 4: spatial_maps = dataset.get_weights_view(spatial_maps) features = [v for v in features.f.values()] out_files = [ path.join(out_path, "%d.png" % feature.id) for feature in features ] p = mp.Pool(30) args_iter = itertools.izip(spatial_maps, out_files) p.map(save_helper, args_iter) p.close() p.join()
def analyze_ground_truth(feature_dict, ground_truth_dict, dataset): """ Compare models to ground truth. """ gt_topo_view = ground_truth_dict[0]["SM"].reshape( (ground_truth_dict[0]["SM"].shape[0], ) + dataset.view_converter.shape).transpose(0, 2, 1, 3) gt_spatial_maps = dataset.get_design_matrix(gt_topo_view) if isinstance(dataset, MRI.MRI_Transposed) : gt_spatial_maps = gt_spatial_maps.T gt_activations = ground_truth_dict[0]["TC"] for name, features in feature_dict.iteritems(): logger.info("Analyzing %s compared to ground truth" % name) indices = fe.match_parameters(features.spatial_maps, gt_spatial_maps) for fi, gi in indices: features.f[fi].match_indices["ground_truth"] = gi feature_dict["ground_truth"] = fe.Features(gt_topo_view, gt_activations, name="ground truth")
def find_rois(fnifti, thr): """ Function for finding regions of interest from a nifti file. Parameters ---------- fnifti: path to the nifti file or list of paths to files thr: float Threshold for clusters. Returns ------- roidict: dictionary of int, dictionary pairs """ logger.info("Finding clusters from niftis") if isinstance(fnifti, str): nifti = load_image(fnifti) num_features = nifti.shape[-1] roi_dict = {} for i in xrange(num_features): clusters = find_clusters_from_4D(fnifti, i, thr) roid_dict[i] = get_cluster_info(clusters) elif isinstance(fnifti, list): num_features = len(fnifti) roi_dict = mp.Manager().dict() p = mp.Pool(num_features) args_iter = itertools.izip(fnifti, itertools.repeat(thr), itertools.repeat(roi_dict)) p.map(worker_helper, args_iter) p.close() p.join() roi_dict = dict(roi_dict) else: raise NotImplementedError("Type %s not supported" % type(fnifti)) logger.info("Finished finding clusters") return roi_dict
def analyze_ground_truth(feature_dict, ground_truth_dict, dataset): """ Compare models to ground truth. """ gt_topo_view = ground_truth_dict[0]["SM"].reshape( (ground_truth_dict[0]["SM"].shape[0], ) + dataset.view_converter.shape).transpose(0, 2, 1, 3) gt_spatial_maps = dataset.get_design_matrix(gt_topo_view) if isinstance(dataset, MRI.MRI_Transposed): gt_spatial_maps = gt_spatial_maps.T gt_activations = ground_truth_dict[0]["TC"] for name, features in feature_dict.iteritems(): logger.info("Analyzing %s compared to ground truth" % name) indices = fe.match_parameters(features.spatial_maps, gt_spatial_maps) for fi, gi in indices: features.f[fi].match_indices["ground_truth"] = gi feature_dict["ground_truth"] = fe.Features(gt_topo_view, gt_activations, name="ground truth")
def save_niftis(dataset, features, image_dir, base_nifti=None, **kwargs): """ Saves a series of niftis. """ logger.info("Saving mri images") spatial_maps = features.spatial_maps spatial_maps = dataset.get_weights_view(spatial_maps) for i, feature in features.f.iteritems(): image = dataset.get_nifti(spatial_maps[i], base_nifti=base_nifti) nipy.save_image(image, path.join(image_dir, "%d.nii.gz" % feature.id)) nifti_files = [ path.join(image_dir, "%d.nii.gz" % feature.id) for feature in features.f.values() ] roi_dict = rois.main(nifti_files) anat_file = ("/export/mialab/users/mindgroup/Data/mrn/" "mri_extra/ch2better_aligned2EPI.nii") anat = nipy.load_image(anat_file) nifti_viewer.save_images(nifti_files, anat, roi_dict, image_dir, **kwargs)
def demo(args): """ Demo of AM-FM. Parameters ---------- args: argparse args """ logger.info("Running a demo of AM-FM") # number of time points N = args.N logger.info("Generating a chirp that sweeps all frequencies") c = ss.chirp(range(N), 0, N - 1, 0.49) logger.info("Computing fft for plot") C = np.fft.fft(c, N * 10) # This is the groundtruth IF f = 0.5 * np.arange(N) / (N - 1) logger.info("Computing AM-FM DCA") ia, ip, ifeq = amfm_DCA(c) # plot results logger.info("Plotting") plt.subplot(311) plt.plot(c) plt.title("Time series") plt.subplot(312) plt.plot(np.fft.fftfreq(N * 10), np.abs(C), ".") plt.title("Frequency spectrum") plt.subplot(313) plt.plot(f) plt.plot(ifeq) plt.legend(["Ideal", "Estimated"], loc="best") plt.title("Frequency vs time") if args.out_file: plt.savefig(args.out_file) else: plt.show()
def demo(args): """ Demo of AM-FM. Parameters ---------- args: argparse args """ logger.info("Running a demo of AM-FM") # number of time points N = args.N logger.info("Generating a chirp that sweeps all frequencies") c = ss.chirp(range(N), 0 , N - 1, 0.49) logger.info("Computing fft for plot") C = np.fft.fft(c, N * 10) # This is the groundtruth IF f = 0.5 * np.arange(N) / (N - 1) logger.info("Computing AM-FM DCA") ia, ip, ifeq = amfm_DCA(c) # plot results logger.info("Plotting") plt.subplot(311) plt.plot(c) plt.title("Time series") plt.subplot(312) plt.plot(np.fft.fftfreq(N * 10), np.abs(C), ".") plt.title("Frequency spectrum") plt.subplot(313) plt.plot(f) plt.plot(ifeq) plt.legend(["Ideal", "Estimated"], loc = "best") plt.title("Frequency vs time") if args.out_file: plt.savefig(args.out_file) else: plt.show()
def from_patterns(file_path, out_dir, args): """ Loads niftis from a set of patterns. Patterns are glob. """ file_lists = [] for i, pattern in enumerate(args.patterns): file_list = glob(pattern) logger.info("Found %r" % file_list) file_lists.append(file_list) data, labels, base = read_niftis(file_lists) nib.save(base, path.join(out_dir, "base.nii")) mask = save_mask(data, out_dir) if args.verbose: test_distribution(data, mask) split_save_data(data, labels, args.split, out_dir) with open(path.join(out_dir, "sources.txt"), "w") as f: for file_list in file_lists: for file_name in file_list: f.write("%s\n" % file_name)
def save_nii_montage(nifti, nifti_file, out_file, anat_file=None, feature_dict=None, target_stat=None, target_value=None): """ Saves a montage from a nifti file. This will also process an region of interest dictionary (ROIdict) Parameters ---------- nifti: nipy Image. Nifti file for processing. nifti_file: str Path to nifti file. Needed to process the roi dictionary. out_file: str Path to output file. anat_file: str, optional Path to anat file. If not provided, ${PYLEARN2_NI_PATH}/mri_extra/ch2better_aligned2EPI.nii is used. """ logger.info("Saving montage from %s to %s." % (nifti_file, out_file)) roi_dict = rois.main(nifti_file) if anat_file is None: anat_file = serial.preprocess( "${PYLEARN2_NI_PATH}/mri_extra/ch2better_aligned2EPI.nii") nifti_viewer.montage(nifti, anat_file, roi_dict, out_file=out_file, feature_dict=feature_dict, target_stat=target_stat, target_value=target_value)
def main(model, out_path=None, prefix=None, **anal_args): """ Main function of module. This function controls the high end analysis functions. Parameters ---------- model: Pylearn2.Model or str Model instance or path for the model. out_path: str, optional Path for the output directory. prefix: str, optional If provided, prefix for all output files. dataset_root: str, optional If provided, use as the root dir for dataset extraction. anal_args: dict argparse arguments (defined below). """ if out_path is None and prefix is None and isinstance(model, str): prefix = ".".join(path.basename(model).split(".")[:-1]) sm_prefix = prefix nifti_prefix = prefix else: nifti_prefix = "image" if out_path is None: assert isinstance(model, str), ("If you provide a model object, you " "must provide an out_path") out_path = path.abspath(path.dirname(model)) if isinstance(model, str): logger.info("Loading model from %s" % model) model = serial.load(model) if not path.isdir(out_path): os.mkdir(out_path) logger.info("Getting features") feature_dict = fe.extract_features(model, **anal_args) dataset = feature_dict.pop("dataset") if isinstance(dataset, TransformerDataset): dataset = dataset.raw ms = fe.ModelStructure(model, dataset) data_path = serial.preprocess(dataset.dataset_root + dataset.dataset_name) sim_dict_file = path.join(data_path, "sim_dict.pkl") sim_dict = pickle.load(open(sim_dict_file, "r")) analyze_ground_truth(feature_dict, sim_dict, dataset) anal_dict = dict() mask = dataset.get_mask() feature_dict["mask"] = fe.Features(np.array([mask]), np.array([[0]]), name="mask") if isinstance(dataset, MRI.MRI_Transposed): samples = dataset.X[:, :20].T else: samples = dataset.X[:20] feature_dict["samples"] = fe.Features(samples, np.array([[0] * 20]).T, name="samples") if isinstance(dataset, MRI.MRI_Transposed): mean_image = dataset.X.mean(axis=1).T else: mean_image = dataset.X.mean(axis=0) feature_dict["mean_image"] = fe.Features(np.array([mean_image]), np.array([[0]]).T, name="mean image") if dataset.variance_map is not None: variance_map = dataset.variance_map[1] feature_dict["variance_map"] = fe.Features(np.array([variance_map]), np.array([[0]]).T, name="variance map") for name, features in feature_dict.iteritems(): image_dir = path.join(out_path, "%s_images" % name) if not path.isdir(image_dir): os.mkdir(image_dir) save_simtb_spatial_maps(dataset, features, image_dir) features.set_histograms(tolist=True) fds = dict() for k, f in features.f.iteritems(): fd = dict( image=path.join("%s_images" % name, "%d.png" % f.id), image_type="simtb", index=f.id, hists=f.hists, match_indices=f.match_indices ) fd.update(**f.stats) fds[k] = fd anal_dict[name] = dict( name=name, image_dir=image_dir, features=fds ) json_file = path.join(out_path, "analysis.json") with open(json_file, "w") as f: json.dump(anal_dict, f) logger.info("Done.")
def main(model, out_path=None, prefix=None, **anal_args): """ Main function of module. This function controls the high end analysis functions. Parameters ---------- model: Pylearn2.Model or str Model instance or path for the model. out_path: str, optional Path for the output directory. prefix: str, optional If provided, prefix for all output files. dataset_root: str, optional If provided, use as the root dir for dataset extraction. anal_args: dict argparse arguments (defined below). """ if out_path is None and prefix is None and isinstance(model, str): prefix = ".".join(path.basename(model).split(".")[:-1]) sm_prefix = prefix nifti_prefix = prefix else: nifti_prefix = "image" if out_path is None: assert isinstance(model, str) out_path = path.abspath(path.dirname(model)) if isinstance(model, str): logger.info("Loading model from %s" % model) model = serial.load(model) if not path.isdir(out_path): os.mkdir(out_path) logger.info("Getting features") feature_dict = fe.extract_features(model, **anal_args) dataset = feature_dict.pop("dataset") if isinstance(dataset, TransformerDataset): dataset = dataset.raw anal_dict = dict() compare_models(feature_dict) for name, features in feature_dict.iteritems(): image_dir = path.join(out_path, "%s_images" % name) if not path.isdir(image_dir): os.mkdir(image_dir) save_niftis(dataset, features, image_dir, **anal_args) features.set_histograms(tolist=True) fds = dict() for k, f in features.f.iteritems(): fd = dict(image=path.join("%s_images" % name, "%d.png" % f.id), image_type="mri", index=f.id, hists=f.hists, relations=f.relations, match_indices=f.match_indices) fd.update(**f.stats) fds[k] = fd anal_dict[name] = dict( name=name, image_dir=image_dir, features=fds, graphs=dict(nodes=features.get_nodes(), links=dict( spatial_maps=features.get_links("spatial_maps"), activations=features.get_links("activations"))), relations=features.relations, stats=features.stats) ms = fe.ModelStructure(model, dataset) json_file = path.join(out_path, "analysis.json") with open(json_file, "w") as f: json.dump(anal_dict, f) logger.info("Analysis done")
def main(model, out_path=None, prefix=None, **anal_args): """ Main function of module. This function controls the high end analysis functions. Parameters ---------- model: Pylearn2.Model or str Model instance or path for the model. out_path: str, optional Path for the output directory. prefix: str, optional If provided, prefix for all output files. dataset_root: str, optional If provided, use as the root dir for dataset extraction. anal_args: dict argparse arguments (defined below). """ if out_path is None and prefix is None and isinstance(model, str): prefix = ".".join(path.basename(model).split(".")[:-1]) sm_prefix = prefix nifti_prefix = prefix else: nifti_prefix = "image" if out_path is None: assert isinstance(model, str), ("If you provide a model object, you " "must provide an out_path") out_path = path.abspath(path.dirname(model)) if isinstance(model, str): logger.info("Loading model from %s" % model) model = serial.load(model) if not path.isdir(out_path): os.mkdir(out_path) logger.info("Getting features") feature_dict = fe.extract_features(model, **anal_args) dataset = feature_dict.pop("dataset") if isinstance(dataset, TransformerDataset): dataset = dataset.raw ms = fe.ModelStructure(model, dataset) data_path = serial.preprocess(dataset.dataset_root + dataset.dataset_name) sim_dict_file = path.join(data_path, "sim_dict.pkl") sim_dict = pickle.load(open(sim_dict_file, "r")) analyze_ground_truth(feature_dict, sim_dict, dataset) anal_dict = dict() mask = dataset.get_mask() feature_dict["mask"] = fe.Features(np.array([mask]), np.array([[0]]), name="mask") if isinstance(dataset, MRI.MRI_Transposed): samples = dataset.X[:, :20].T else: samples = dataset.X[:20] feature_dict["samples"] = fe.Features(samples, np.array([[0] * 20]).T, name="samples") if isinstance(dataset, MRI.MRI_Transposed): mean_image = dataset.X.mean(axis=1).T else: mean_image = dataset.X.mean(axis=0) feature_dict["mean_image"] = fe.Features(np.array([mean_image]), np.array([[0]]).T, name="mean image") if dataset.variance_map is not None: variance_map = dataset.variance_map[1] feature_dict["variance_map"] = fe.Features(np.array([variance_map]), np.array([[0]]).T, name="variance map") for name, features in feature_dict.iteritems(): image_dir = path.join(out_path, "%s_images" % name) if not path.isdir(image_dir): os.mkdir(image_dir) save_simtb_spatial_maps(dataset, features, image_dir) features.set_histograms(tolist=True) fds = dict() for k, f in features.f.iteritems(): fd = dict(image=path.join("%s_images" % name, "%d.png" % f.id), image_type="simtb", index=f.id, hists=f.hists, match_indices=f.match_indices) fd.update(**f.stats) fds[k] = fd anal_dict[name] = dict(name=name, image_dir=image_dir, features=fds) json_file = path.join(out_path, "analysis.json") with open(json_file, "w") as f: json.dump(anal_dict, f) logger.info("Done.")
def save_variance_map(dataset, save_path): logger.info("Saving variance file") variance_map = dataset.X.std(axis=0) np.save(save_path, variance_map)
def test_distribution(data, mask=None): logger.info("Testing distribution.") data = data.reshape(data.shape[0], reduce(lambda x, y: x * y, data.shape[1:4])) if mask is not None: mask_idx = np.where(mask.flatten() == 1)[0].tolist() data = data[:, mask_idx] k = kurtosis(data, axis=0) s = skew(data, axis=0) logger.info("Proportion voxels k <= -1: %.2f" % (len(np.where(k <= -1)[0].tolist()) * 1. / data.shape[1])) logger.info("Proportion voxels -1 < k < 1: %.2f" % (len(np.where(np.logical_and(k > -1, k < 1))[0].tolist()) * 1. / data.shape[1])) logger.info("Proportion voxels 1 < k < 2: %.2f" % (len(np.where(np.logical_and(k >= 1, k < 2))[0].tolist()) * 1. / data.shape[1])) logger.info("Proportion voxels 2 < k < 3: %.2f" % (len(np.where(np.logical_and(k >= 2, k < 3))[0].tolist()) * 1. / data.shape[1])) logger.info("Proportion voxels k >= 3: %.2f" % (len(np.where(k >= 3)[0].tolist()) * 1. / data.shape[1])) values = len(np.unique(data)) if (values * 1. / reduce(lambda x, y: x * y, data.shape) < 10e-4): logger.warn("Quantization probable (%d unique values out of %d)." % (values, reduce(lambda x, y: x * y, data.shape))) logger.info("Number of unique values in data: %d" % values) logger.info("Krutosis k: %.2f (%.2f std) and skew s: %.2f (%.2f std)" % (k.mean(), k.std(), s.mean(), s.std()))