class MouseConnectivityCache(Cache): """ Cache class for storing and accessing data related to the adult mouse Connectivity Atlas. By default, this class will cache any downloaded metadata or files in well known locations defined in a manifest file. This behavior can be disabled. Attributes ---------- resolution: int Resolution of grid data to be downloaded when accessing projection volume, the annotation volume, and the annotation volume. Must be one of (10, 25, 50, 100). Default is 25. api: MouseConnectivityApi instance Used internally to make API queries. Parameters ---------- resolution: int Resolution of grid data to be downloaded when accessing projection volume, the annotation volume, and the annotation volume. Must be one of (10, 25, 50, 100). Default is 25. ccf_version: string Desired version of the Common Coordinate Framework. This affects the annotation volume (get_annotation_volume) and structure masks (get_structure_mask). Must be one of (MouseConnectivityApi.CCF_2015, MouseConnectivityApi.CCF_2016). Default: MouseConnectivityApi.CCF_2016 cache: boolean Whether the class should save results of API queries to locations specified in the manifest file. Queries for files (as opposed to metadata) must have a file location. If caching is disabled, those locations must be specified in the function call (e.g. get_projection_density(file_name='file.nrrd')). manifest_file: string File name of the manifest to be read. Default is "mouse_connectivity_manifest.json". """ CCF_VERSION_KEY = "CCF_VERSION" ANNOTATION_KEY = "ANNOTATION" TEMPLATE_KEY = "TEMPLATE" PROJECTION_DENSITY_KEY = "PROJECTION_DENSITY" INJECTION_DENSITY_KEY = "INJECTION_DENSITY" INJECTION_FRACTION_KEY = "INJECTION_FRACTION" DATA_MASK_KEY = "DATA_MASK" STRUCTURE_UNIONIZES_KEY = "STRUCTURE_UNIONIZES" EXPERIMENTS_KEY = "EXPERIMENTS" STRUCTURES_KEY = "STRUCTURES" STRUCTURE_MASK_KEY = "STRUCTURE_MASK" def __init__( self, resolution=None, cache=True, manifest_file="mouse_connectivity_manifest.json", ccf_version=None, base_uri=None, ): super(MouseConnectivityCache, self).__init__(manifest=manifest_file, cache=cache) if resolution is None: self.resolution = MouseConnectivityApi.VOXEL_RESOLUTION_25_MICRONS else: self.resolution = resolution self.api = MouseConnectivityApi(base_uri=base_uri) if ccf_version is None: ccf_version = MouseConnectivityApi.CCF_VERSION_DEFAULT self.ccf_version = ccf_version def get_annotation_volume(self, file_name=None): """ Read the annotation volume. Download it first if it doesn't exist. Parameters ---------- file_name: string File name to store the annotation volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.ANNOTATION_KEY, self.ccf_version, self.resolution) if file_name is None: raise Exception("No save file name provided for annotation volume.") if os.path.exists(file_name): annotation, info = nrrd.read(file_name) else: Manifest.safe_make_parent_dirs(file_name) annotation, info = self.api.download_annotation_volume(self.ccf_version, self.resolution, file_name) return annotation, info def get_template_volume(self, file_name=None): """ Read the template volume. Download it first if it doesn't exist. Parameters ---------- file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.TEMPLATE_KEY, self.resolution) if file_name is None: raise Exception("No save file provided for annotation volume.") if os.path.exists(file_name): annotation, info = nrrd.read(file_name) else: Manifest.safe_make_parent_dirs(file_name) annotation, info = self.api.download_template_volume(self.resolution, file_name) return annotation, info def get_projection_density(self, experiment_id, file_name=None): """ Read a projection density volume for a single experiment. Download it first if it doesn't exist. Projection density is the proportion of of projecting pixels in a grid voxel in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.PROJECTION_DENSITY_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_make_parent_dirs(file_name) self.api.download_projection_density(file_name, experiment_id, self.resolution) return nrrd.read(file_name) def get_injection_density(self, experiment_id, file_name=None): """ Read an injection density volume for a single experiment. Download it first if it doesn't exist. Injection density is the proportion of projecting pixels in a grid voxel only including pixels that are part of the injection site in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.INJECTION_DENSITY_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_make_parent_dirs(file_name) self.api.download_injection_density(file_name, experiment_id, self.resolution) return nrrd.read(file_name) def get_injection_fraction(self, experiment_id, file_name=None): """ Read an injection fraction volume for a single experiment. Download it first if it doesn't exist. Injection fraction is the proportion of pixels in the injection site in a grid voxel in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_make_parent_dirs(file_name) self.api.download_injection_fraction(file_name, experiment_id, self.resolution) return nrrd.read(file_name) def get_data_mask(self, experiment_id, file_name=None): """ Read a data mask volume for a single experiment. Download it first if it doesn't exist. Data mask is a binary mask of voxels that have valid data. Only use valid data in analysis! Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_make_parent_dirs(file_name) self.api.download_data_mask(file_name, experiment_id, self.resolution) return nrrd.read(file_name) def get_ontology(self, file_name=None): """ Read the list of adult mouse structures and return an Ontology instance. Parameters ---------- file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. """ return Ontology(self.get_structures(file_name)) def get_structures(self, file_name=None): """ Read the list of adult mouse structures and return a Pandas DataFrame. Parameters ---------- file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. """ file_name = self.get_cache_path(file_name, self.STRUCTURES_KEY) if os.path.exists(file_name): structures = pd.DataFrame.from_csv(file_name) else: structures = OntologiesApi(base_uri=self.api.api_url).get_structures(1) structures = pd.DataFrame(structures) if self.cache: Manifest.safe_make_parent_dirs(file_name) structures.to_csv(file_name) structures.set_index(["id"], inplace=True, drop=False) return structures def get_experiments(self, dataframe=False, file_name=None, cre=None, injection_structure_ids=None): """ Read a list of experiments that match certain criteria. If caching is enabled, this will save the whole (unfiltered) list of experiments to a file. Parameters ---------- dataframe: boolean Return the list of experiments as a Pandas DataFrame. If False, return a list of dictionaries. Default False. file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. cre: boolean or list If True, return only cre-positive experiments. If False, return only cre-negative experiments. If None, return all experients. If list, return all experiments with cre line names in the supplied list. Default None. injection_structure_ids: list Only return experiments that were injected in the structures provided here. If None, return all experiments. Default None. """ file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY) if os.path.exists(file_name): experiments = json_utilities.read(file_name) else: experiments = self.api.experiment_source_search(injection_structures="root") # removing these elements because they are specific to a particular # resolution for e in experiments: del e["num-voxels"] del e["injection-volume"] del e["sum"] del e["name"] if self.cache: Manifest.safe_make_parent_dirs(file_name) json_utilities.write(file_name, experiments) # filter the read/downloaded list of experiments experiments = self.filter_experiments(experiments, cre, injection_structure_ids) if dataframe: experiments = pd.DataFrame(experiments) experiments.set_index(["id"], inplace=True, drop=False) return experiments def filter_experiments(self, experiments, cre=None, injection_structure_ids=None): """ Take a list of experiments and filter them by cre status and injection structure. Parameters ---------- cre: boolean or list If True, return only cre-positive experiments. If False, return only cre-negative experiments. If None, return all experients. If list, return all experiments with cre line names in the supplied list. Default None. injection_structure_ids: list Only return experiments that were injected in the structures provided here. If None, return all experiments. Default None. """ if cre is True: experiments = [e for e in experiments if e["transgenic-line"]] elif cre is False: experiments = [e for e in experiments if not e["transgenic-line"]] elif cre is not None: experiments = [e for e in experiments if e["transgenic-line"] in cre] if injection_structure_ids is not None: descendant_ids = self.get_ontology().get_descendant_ids(injection_structure_ids) experiments = [e for e in experiments if e["structure-id"] in descendant_ids] return experiments def get_experiment_structure_unionizes( self, experiment_id, file_name=None, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None, ): """ Retrieve the structure unionize data for a specific experiment. Filter by structure, injection status, and hemisphere. Parameters ---------- experiment_id: int ID of the experiment of interest. Corresponds to section_data_set_id in the API. file_name: string File name to save/read the experiments list. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records for a specific set of structures. If None, return all records. Default None. include_descendants: boolean Include all descendant records for specified structures. Default False. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_UNIONIZES_KEY, experiment_id) if os.path.exists(file_name): unionizes = pd.DataFrame.from_csv(file_name) else: unionizes = self.api.get_structure_unionizes([experiment_id]) unionizes = pd.DataFrame(unionizes) # rename section_data_set_id column to experiment_id unionizes.columns = ["experiment_id" if c == "section_data_set_id" else c for c in unionizes.columns] if self.cache: Manifest.safe_make_parent_dirs(file_name) unionizes.to_csv(file_name) return self.filter_structure_unionizes( unionizes, is_injection, structure_ids, include_descendants, hemisphere_ids ) def filter_structure_unionizes( self, unionizes, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None ): """ Take a list of unionzes and return a subset of records filtered by injection status, structure, and hemisphere. Parameters ---------- is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records for a set of structures. If None, return all records. Default None. include_descendants: boolean Include all descendant records for specified structures. Default False. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ if is_injection is not None: unionizes = unionizes[unionizes.is_injection == is_injection] if structure_ids is not None: if include_descendants: structure_ids = self.get_ontology().get_descendant_ids(structure_ids) else: structure_ids = set(structure_ids) unionizes = unionizes[unionizes["structure_id"].isin(structure_ids)] if hemisphere_ids is not None: unionizes = unionizes[unionizes["hemisphere_id"].isin(hemisphere_ids)] return unionizes def get_structure_unionizes( self, experiment_ids, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None ): """ Get structure unionizes for a set of experiment IDs. Filter the results by injection status, structure, and hemisphere. Parameters ---------- experiment_ids: list List of experiment IDs. Corresponds to section_data_set_id in the API. is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records for a specific set of structures. If None, return all records. Default None. include_descendants: boolean Include all descendant records for specified structures. Default False. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ unionizes = [ self.get_experiment_structure_unionizes( eid, is_injection=is_injection, structure_ids=structure_ids, include_descendants=include_descendants, hemisphere_ids=hemisphere_ids, ) for eid in experiment_ids ] return pd.concat(unionizes, ignore_index=True) def get_projection_matrix( self, experiment_ids, projection_structure_ids, hemisphere_ids=None, parameter="projection_volume", dataframe=False, ): unionizes = self.get_structure_unionizes( experiment_ids, is_injection=False, structure_ids=projection_structure_ids, include_descendants=False, hemisphere_ids=hemisphere_ids, ) hemisphere_ids = set(unionizes["hemisphere_id"].values.tolist()) nrows = len(experiment_ids) ncolumns = len(projection_structure_ids) * len(hemisphere_ids) matrix = np.empty((nrows, ncolumns)) matrix[:] = np.NAN row_lookup = {} for idx, e in enumerate(experiment_ids): row_lookup[e] = idx column_lookup = {} columns = [] cidx = 0 hlabel = {1: "-L", 2: "-R", 3: ""} o = self.get_ontology() for hid in hemisphere_ids: for sid in projection_structure_ids: column_lookup[(hid, sid)] = cidx label = o[sid].iloc[0]["acronym"] + hlabel[hid] columns.append({"hemisphere_id": hid, "structure_id": sid, "label": label}) cidx += 1 for _, row in unionizes.iterrows(): ridx = row_lookup[row["experiment_id"]] k = (row["hemisphere_id"], row["structure_id"]) cidx = column_lookup[k] matrix[ridx, cidx] = row[parameter] if dataframe: all_experiments = self.get_experiments(dataframe=True) rows_df = all_experiments.loc[experiment_ids] cols_df = pd.DataFrame(columns) return {"matrix": matrix, "rows": rows_df, "columns": cols_df} else: return {"matrix": matrix, "rows": experiment_ids, "columns": columns} def get_structure_mask(self, structure_id, file_name=None, annotation_file_name=None): """ Read a 3D numpy array shaped like the annotation volume that has non-zero values where voxels belong to a particular structure. This will take care of identifying substructures. Parameters ---------- structure_id: int ID of a structure. file_name: string File name to store the structure mask. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. annotation_file_name: string File name to store the annotation volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_MASK_KEY, structure_id) if os.path.exists(file_name): return nrrd.read(file_name) else: ont = self.get_ontology() structure_ids = ont.get_descendant_ids([structure_id]) annotation, _ = self.get_annotation_volume(annotation_file_name) mask = self.make_structure_mask(structure_ids, annotation) if self.cache: Manifest.safe_make_parent_dirs(file_name) nrrd.write(file_name, mask) return mask, None def make_structure_mask(self, structure_ids, annotation): """ Look at an annotation volume and identify voxels that have values in a list of structure ids. Parameters ---------- structure_ids: list List of IDs to look for in the annotation volume annotation: np.ndarray Numpy array filled with IDs. """ m = np.zeros(annotation.shape, dtype=np.uint8) for _, sid in enumerate(structure_ids): m[annotation == sid] = 1 return m def build_manifest(self, file_name): """ Construct a manifest for this Cache class and save it in a file. Parameters ---------- file_name: string File location to save the manifest. """ manifest_builder = ManifestBuilder() manifest_builder.add_path("BASEDIR", ".") manifest_builder.add_path(self.EXPERIMENTS_KEY, "experiments.json", parent_key="BASEDIR", typename="file") manifest_builder.add_path(self.STRUCTURES_KEY, "structures.csv", parent_key="BASEDIR", typename="file") manifest_builder.add_path( self.STRUCTURE_UNIONIZES_KEY, "experiment_%d/structure_unionizes.csv", parent_key="BASEDIR", typename="file" ) manifest_builder.add_path(self.CCF_VERSION_KEY, "%s", parent_key="BASEDIR", typename="dir") manifest_builder.add_path( self.ANNOTATION_KEY, "annotation_%d.nrrd", parent_key=self.CCF_VERSION_KEY, typename="file" ) manifest_builder.add_path(self.TEMPLATE_KEY, "average_template_%d.nrrd", parent_key="BASEDIR", typename="file") manifest_builder.add_path( self.INJECTION_DENSITY_KEY, "experiment_%d/injection_density_%d.nrrd", parent_key="BASEDIR", typename="file" ) manifest_builder.add_path( self.INJECTION_FRACTION_KEY, "experiment_%d/injection_fraction_%d.nrrd", parent_key="BASEDIR", typename="file", ) manifest_builder.add_path( self.DATA_MASK_KEY, "experiment_%d/data_mask_%d.nrrd", parent_key="BASEDIR", typename="file" ) manifest_builder.add_path( self.PROJECTION_DENSITY_KEY, "experiment_%d/projection_density_%d.nrrd", parent_key="BASEDIR", typename="file", ) manifest_builder.add_path( self.STRUCTURE_MASK_KEY, "structure_masks/structure_%d.nrrd", parent_key="BASEDIR", typename="file" ) manifest_builder.write_json_file(file_name)
class MouseConnectivityCache(Cache): """ Cache class for storing and accessing data related to the adult mouse Connectivity Atlas. By default, this class will cache any downloaded metadata or files in well known locations defined in a manifest file. This behavior can be disabled. Attributes ---------- resolution: int Resolution of grid data to be downloaded when accessing projection volume, the annotation volume, and the annotation volume. Must be one of (10, 25, 50, 100). Default is 25. api: MouseConnectivityApi instance Used internally to make API queries. Parameters ---------- resolution: int Resolution of grid data to be downloaded when accessing projection volume, the annotation volume, and the annotation volume. Must be one of (10, 25, 50, 100). Default is 25. cache: boolean Whether the class should save results of API queries to locations specified in the manifest file. Queries for files (as opposed to metadata) must have a file location. If caching is disabled, those locations must be specified in the function call (e.g. get_projection_density(file_name='file.nrrd')). manifest_file: string File name of the manifest to be read. Default is "mouse_connectivity_manifest.json". """ ANNOTATION_KEY = 'ANNOTATION' TEMPLATE_KEY = 'TEMPLATE' PROJECTION_DENSITY_KEY = 'PROJECTION_DENSITY' INJECTION_DENSITY_KEY = 'INJECTION_DENSITY' INJECTION_FRACTION_KEY = 'INJECTION_FRACTION' DATA_MASK_KEY = 'DATA_MASK' STRUCTURE_UNIONIZES_KEY = 'STRUCTURE_UNIONIZES' EXPERIMENTS_KEY = 'EXPERIMENTS' STRUCTURES_KEY = 'STRUCTURES' STRUCTURE_MASK_KEY = 'STRUCTURE_MASK' def __init__(self, resolution=25, cache=True, manifest_file='mouse_connectivity_manifest.json', base_uri=None): super(MouseConnectivityCache, self).__init__(manifest=manifest_file, cache=cache) self.resolution = resolution self.api = MouseConnectivityApi(base_uri=base_uri) def get_annotation_volume(self, file_name=None): """ Read the annotation volume. Download it first if it doesn't exist. Parameters ---------- file_name: string File name to store the annotation volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.ANNOTATION_KEY, self.resolution) if file_name is None: raise Exception( "No save file name provided for annotation volume.") if os.path.exists(file_name): annotation, info = nrrd.read(file_name) else: Manifest.safe_mkdir(os.path.dirname(file_name)) annotation, info = self.api.download_annotation_volume( self.resolution, file_name) return annotation, info def get_template_volume(self, file_name=None): """ Read the template volume. Download it first if it doesn't exist. Parameters ---------- file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.TEMPLATE_KEY, self.resolution) if file_name is None: raise Exception("No save file provided for annotation volume.") if os.path.exists(file_name): annotation, info = nrrd.read(file_name) else: Manifest.safe_mkdir(os.path.dirname(file_name)) annotation, info = self.api.download_template_volume( self.resolution, file_name) return annotation, info def get_projection_density(self, experiment_id, file_name=None): """ Read a projection density volume for a single experiment. Download it first if it doesn't exist. Projection density is the proportion of of projecting pixels in a grid voxel in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.PROJECTION_DENSITY_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_mkdir(os.path.dirname(file_name)) self.api.download_projection_density(file_name, experiment_id, self.resolution) return nrrd.read(file_name) def get_injection_density(self, experiment_id, file_name=None): """ Read an injection density volume for a single experiment. Download it first if it doesn't exist. Injection density is the proportion of projecting pixels in a grid voxel only including pixels that are part of the injection site in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.INJECTION_DENSITY_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_mkdir(os.path.dirname(file_name)) self.api.download_injection_density(file_name, experiment_id, self.resolution) return nrrd.read(file_name) def get_injection_fraction(self, experiment_id, file_name=None): """ Read an injection fraction volume for a single experiment. Download it first if it doesn't exist. Injection fraction is the proportion of pixels in the injection site in a grid voxel in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_mkdir(os.path.dirname(file_name)) self.api.download_injection_fraction(file_name, experiment_id, self.resolution) return nrrd.read(file_name) def get_data_mask(self, experiment_id, file_name=None): """ Read a data mask volume for a single experiment. Download it first if it doesn't exist. Data mask is a binary mask of voxels that have valid data. Only use valid data in analysis! Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY, experiment_id, self.resolution) if file_name is None: raise Exception("No file name to save volume.") if not os.path.exists(file_name): Manifest.safe_mkdir(os.path.dirname(file_name)) self.api.download_data_mask(file_name, experiment_id, self.resolution) return nrrd.read(file_name) def get_ontology(self, file_name=None): """ Read the list of adult mouse structures and return an Ontology instance. Parameters ---------- file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. """ return Ontology(self.get_structures(file_name)) def get_structures(self, file_name=None): """ Read the list of adult mouse structures and return a Pandas DataFrame. Parameters ---------- file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. """ file_name = self.get_cache_path(file_name, self.STRUCTURES_KEY) if os.path.exists(file_name): structures = pd.DataFrame.from_csv(file_name) else: structures = OntologiesApi().get_structures(1) structures = pd.DataFrame(structures) if self.cache: Manifest.safe_mkdir(os.path.dirname(file_name)) structures.to_csv(file_name) structures.set_index(['id'], inplace=True, drop=False) return structures def get_experiments(self, dataframe=False, file_name=None, cre=None, injection_structure_ids=None): """ Read a list of experiments that match certain criteria. If caching is enabled, this will save the whole (unfiltered) list of experiments to a file. Parameters ---------- dataframe: boolean Return the list of experiments as a Pandas DataFrame. If False, return a list of dictionaries. Default False. file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. cre: boolean or list If True, return only cre-positive experiments. If False, return only cre-negative experiments. If None, return all experients. If list, return all experiments with cre line names in the supplied list. Default None. injection_structure_ids: list Only return experiments that were injected in the structures provided here. If None, return all experiments. Default None. """ file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY) if os.path.exists(file_name): experiments = json_utilities.read(file_name) else: experiments = self.api.experiment_source_search( injection_structures='root') # removing these elements because they are specific to a particular resolution for e in experiments: del e['num-voxels'] del e['injection-volume'] del e['sum'] del e['name'] if self.cache: Manifest.safe_mkdir(os.path.dirname(file_name)) json_utilities.write(file_name, experiments) # filter the read/downloaded list of experiments experiments = self.filter_experiments(experiments, cre, injection_structure_ids) if dataframe: experiments = pd.DataFrame(experiments) experiments.set_index(['id'], inplace=True, drop=False) return experiments def filter_experiments(self, experiments, cre=None, injection_structure_ids=None): """ Take a list of experiments and filter them by cre status and injection structure. Parameters ---------- cre: boolean or list If True, return only cre-positive experiments. If False, return only cre-negative experiments. If None, return all experients. If list, return all experiments with cre line names in the supplied list. Default None. injection_structure_ids: list Only return experiments that were injected in the structures provided here. If None, return all experiments. Default None. """ if cre == True: experiments = [e for e in experiments if e['transgenic-line']] elif cre == False: experiments = [e for e in experiments if not e['transgenic-line']] elif cre is not None: experiments = [ e for e in experiments if e['transgenic-line'] in cre ] if injection_structure_ids is not None: descendant_ids = self.get_ontology().get_descendant_ids( injection_structure_ids) experiments = [ e for e in experiments if e['structure-id'] in descendant_ids ] return experiments def get_experiment_structure_unionizes(self, experiment_id, file_name=None, is_injection=None, structure_ids=None, hemisphere_ids=None): """ Retrieve the structure unionize data for a specific experiment. Filter by structure, injection status, and hemisphere. Parameters ---------- experiment_id: int ID of the experiment of interest. Corresponds to section_data_set_id in the API. file_name: string File name to save/read the experiments list. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records that are inside a specific set of structures. If None, return all records. Default None. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_UNIONIZES_KEY, experiment_id) if os.path.exists(file_name): unionizes = pd.DataFrame.from_csv(file_name) else: unionizes = self.api.get_structure_unionizes([experiment_id]) unionizes = pd.DataFrame(unionizes) # rename section_data_set_id column to experiment_id unionizes.columns = [ 'experiment_id' if c == 'section_data_set_id' else c for c in unionizes.columns ] if self.cache: Manifest.safe_mkdir(os.path.dirname(file_name)) unionizes.to_csv(file_name) return self.filter_structure_unionizes(unionizes, is_injection, structure_ids, hemisphere_ids) def filter_structure_unionizes(self, unionizes, is_injection=None, structure_ids=None, hemisphere_ids=None): """ Take a list of unionzes and return a subset of records filtered by injection status, structure, and hemisphere. Parameters ---------- is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records that are inside a specific set of structures. If None, return all records. Default None. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ if is_injection is not None: unionizes = unionizes[unionizes.is_injection == is_injection] if structure_ids is not None: descendant_ids = self.get_ontology().get_descendant_ids( structure_ids) unionizes = unionizes[unionizes['structure_id'].isin( descendant_ids)] if hemisphere_ids is not None: unionizes = unionizes[unionizes['hemisphere_id'].isin( hemisphere_ids)] return unionizes def get_structure_unionizes(self, experiment_ids, is_injection=None, structure_ids=None, hemisphere_ids=None): """ Get structure unionizes for a set of experiment IDs. Filter the results by injection status, structure, and hemisphere. Parameters ---------- experiment_ids: list List of experiment IDs. Corresponds to section_data_set_id in the API. is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records that are inside a specific set of structures. If None, return all records. Default None. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ unionizes = [ self.get_experiment_structure_unionizes( eid, is_injection=is_injection, structure_ids=structure_ids, hemisphere_ids=hemisphere_ids) for eid in experiment_ids ] return pd.concat(unionizes, ignore_index=True) def get_projection_matrix(self, experiment_ids, projection_structure_ids, hemisphere_ids=None, parameter='projection_volume', dataframe=False): unionizes = self.get_structure_unionizes(experiment_ids, is_injection=False, hemisphere_ids=hemisphere_ids) unionizes = unionizes[unionizes.structure_id.isin( projection_structure_ids)] projection_structure_ids = set( unionizes['structure_id'].values.tolist()) hemisphere_ids = set(unionizes['hemisphere_id'].values.tolist()) nrows = len(experiment_ids) ncolumns = len(projection_structure_ids) * len(hemisphere_ids) matrix = np.empty((nrows, ncolumns)) matrix[:] = np.NAN row_lookup = {} for idx, e in enumerate(experiment_ids): row_lookup[e] = idx column_lookup = {} columns = [] cidx = 0 hlabel = {1: '-L', 2: '-R', 3: ''} o = self.get_ontology() for hid in hemisphere_ids: for sid in projection_structure_ids: column_lookup[(hid, sid)] = cidx label = o[sid].iloc[0]['acronym'] + hlabel[hid] columns.append({ 'hemisphere_id': hid, 'structure_id': sid, 'label': label }) cidx += 1 for _, row in unionizes.iterrows(): ridx = row_lookup[row['experiment_id']] k = (row['hemisphere_id'], row['structure_id']) cidx = column_lookup[k] matrix[ridx, cidx] = row[parameter] if dataframe: all_experiments = self.get_experiments(dataframe=True) rows_df = all_experiments.loc[experiment_ids] cols_df = pd.DataFrame(columns) return {'matrix': matrix, 'rows': rows_df, 'columns': cols_df} else: return { 'matrix': matrix, 'rows': experiment_ids, 'columns': columns } def get_structure_mask(self, structure_id, file_name=None, annotation_file_name=None): """ Read a 3D numpy array shaped like the annotation volume that has non-zero values where voxels belong to a particular structure. This will take care of identifying substructures. Parameters ---------- structure_id: int ID of a structure. file_name: string File name to store the structure mask. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. annotation_file_name: string File name to store the annotation volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_MASK_KEY, structure_id) if os.path.exists(file_name): return nrrd.read(file_name) else: ont = self.get_ontology() structure_ids = ont.get_descendant_ids([structure_id]) annotation, _ = self.get_annotation_volume(annotation_file_name) mask = self.make_structure_mask(structure_ids, annotation) if self.cache: Manifest.safe_mkdir(os.path.dirname(file_name)) nrrd.write(file_name, mask) return mask, None def make_structure_mask(self, structure_ids, annotation): """ Look at an annotation volume and identify voxels that have values in a list of structure ids. Parameters ---------- structure_ids: list List of IDs to look for in the annotation volume annotation: np.ndarray Numpy array filled with IDs. """ m = np.zeros(annotation.shape, dtype=np.uint8) for _, sid in enumerate(structure_ids): m[annotation == sid] = 1 return m def build_manifest(self, file_name): """ Construct a manifest for this Cache class and save it in a file. Parameters ---------- file_name: string File location to save the manifest. """ manifest_builder = ManifestBuilder() manifest_builder.add_path('BASEDIR', '.') manifest_builder.add_path(self.EXPERIMENTS_KEY, 'experiments.json', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.STRUCTURES_KEY, 'structures.csv', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.STRUCTURE_UNIONIZES_KEY, 'experiment_%d/structure_unionizes.csv', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.ANNOTATION_KEY, 'annotation_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.TEMPLATE_KEY, 'average_template_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.INJECTION_DENSITY_KEY, 'experiment_%d/injection_density_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.INJECTION_FRACTION_KEY, 'experiment_%d/injection_fraction_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.DATA_MASK_KEY, 'experiment_%d/data_mask_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.PROJECTION_DENSITY_KEY, 'experiment_%d/projection_density_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.STRUCTURE_MASK_KEY, 'structure_masks/structure_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.write_json_file(file_name)
from allensdk.api.queries.mouse_connectivity_api import MouseConnectivityApi mca = MouseConnectivityApi() # get metadata for all non-Cre experiments experiments = mca.experiment_source_search(injection_structures='root', transgenic_lines=0) # download the projection density volume for one of the experiments pd = mca.download_projection_density('example.nrrd', experiments[0]['id'], resolution=25)
class ABA(Paths): """ This class handles interaction with the Allen Brain Atlas datasets and APIs to get structure trees, experimental metadata and results, tractography data etc. """ # useful vars for analysis excluded_regions = ["fiber tracts"] resolution = 25 def __init__(self, projection_metric = "projection_energy", base_dir=None, **kwargs): """ Set up file paths and Allen SDKs :param base_dir: path to directory to use for saving data (default value None) :param path_fiprojection_metricle: - str, metric to quantify the strength of projections from the Allen Connectome. (default value 'projection_energy') :param kwargs: can be used to pass path to individual data folders. See brainrender/Utils/paths_manager.py """ Paths.__init__(self, base_dir=base_dir, **kwargs) self.projection_metric = projection_metric # get mouse connectivity cache and structure tree self.mcc = MouseConnectivityCache(manifest_file=os.path.join(self.mouse_connectivity_cache, "manifest.json")) self.structure_tree = self.mcc.get_structure_tree() # get ontologies API and brain structures sets self.oapi = OntologiesApi() self.get_structures_sets() # get reference space self.space = ReferenceSpaceApi() self.spacecache = ReferenceSpaceCache( manifest=os.path.join(self.annotated_volume, "manifest.json"), # downloaded files are stored relative to here resolution=self.resolution, reference_space_key="annotation/ccf_2017" # use the latest version of the CCF ) self.annotated_volume, _ = self.spacecache.get_annotation_volume() # mouse connectivity API [used for tractography] self.mca = MouseConnectivityApi() # Get tree search api self.tree_search = TreeSearchApi() # Get some metadata about experiments self.all_experiments = self.mcc.get_experiments(dataframe=True) self.strains = sorted([x for x in set(self.all_experiments.strain) if x is not None]) self.transgenic_lines = sorted(set([x for x in set(self.all_experiments.transgenic_line) if x is not None])) ####### GET EXPERIMENTS DATA def get_structures_sets(self): """ Get the Allen's structure sets. """ summary_structures = self.structure_tree.get_structures_by_set_id([167587189]) # main summary structures summary_structures = [s for s in summary_structures if s["acronym"] not in self.excluded_regions] self.structures = pd.DataFrame(summary_structures) # Other structures sets try: all_sets = pd.DataFrame(self.oapi.get_structure_sets()) except: print("Could not retrieve data, possibly because there is no internet connection.") else: sets = ["Summary structures of the pons", "Summary structures of the thalamus", "Summary structures of the hypothalamus", "List of structures for ABA Fine Structure Search", "Structures representing the major divisions of the mouse brain", "Summary structures of the midbrain", "Structures whose surfaces are represented by a precomputed mesh"] self.other_sets = {} for set_name in sets: set_id = all_sets.loc[all_sets.description == set_name].id.values[0] self.other_sets[set_name] = pd.DataFrame(self.structure_tree.get_structures_by_set_id([set_id])) self.all_avaliable_meshes = sorted(self.other_sets["Structures whose surfaces are represented by a precomputed mesh"].acronym.values) def print_structures_list_to_text(self): """ Saves the name of every brain structure for which a 3d mesh (.obj file) is available in a text file. """ s = self.other_sets["Structures whose surfaces are represented by a precomputed mesh"].sort_values('acronym') with open('all_regions.txt', 'w') as o: for acr, name in zip(s.acronym.values, s['name'].values): o.write("({}) -- {}\n".format(acr, name)) def load_all_experiments(self, cre=False): """ This function downloads all the experimental data from the MouseConnectivityCache and saves the unionized results as pickled pandas dataframes. The process is slow, but the ammount of disk space necessary to save the data is small, so it's worth downloading all the experiments at once to speed up subsequent analysis. :param cre: Bool - data from either wild time or cre mice lines (Default value = False) """ if not cre: raise NotImplementedError("Only works for wild type sorry") # Downloads all experiments from allen brain atlas and saves the results as an easy to read pkl file for acronym in self.structures.acronym.values: print("Fetching experiments for : {}".format(acronym)) structure = self.structure_tree.get_structures_by_acronym([acronym])[0] experiments = self.mcc.get_experiments(cre=cre, injection_structure_ids=[structure['id']]) print(" found {} experiments".format(len(experiments))) try: structure_unionizes = self.mcc.get_structure_unionizes([e['id'] for e in experiments], is_injection=False, structure_ids=self.structures.id.values, include_descendants=False) except: pass structure_unionizes.to_pickle(os.path.join(self.output_data, "{}.pkl".format(acronym))) def print_structures(self): """ Prints the name of every structure in the structure tree to the console. """ acronyms, names = self.structures.acronym.values, self.structures['name'].values sort_idx = np.argsort(acronyms) acronyms, names = acronyms[sort_idx], names[sort_idx] [print("({}) - {}".format(a, n)) for a,n in zip(acronyms, names)] def experiments_source_search(self, SOI, *args, source=True, **kwargs): """ Returns data about experiments whose injection was in the SOI, structure of interest :param SOI: str, structure of interest. Acronym of structure to use as seed for teh search :param *args: :param source: (Default value = True) :param **kwargs: """ """ list of possible kwargs injection_structures : list of integers or strings Integer Structure.id or String Structure.acronym. target_domain : list of integers or strings, optional Integer Structure.id or String Structure.acronym. injection_hemisphere : string, optional 'right' or 'left', Defaults to both hemispheres. target_hemisphere : string, optional 'right' or 'left', Defaults to both hemispheres. transgenic_lines : list of integers or strings, optional Integer TransgenicLine.id or String TransgenicLine.name. Specify ID 0 to exclude all TransgenicLines. injection_domain : list of integers or strings, optional Integer Structure.id or String Structure.acronym. primary_structure_only : boolean, optional product_ids : list of integers, optional Integer Product.id start_row : integer, optional For paging purposes. Defaults to 0. num_rows : integer, optional For paging purposes. Defaults to 2000. """ transgenic_id = kwargs.pop('transgenic_id', 0) # id = 0 means use only wild type primary_structure_only = kwargs.pop('primary_structure_only', True) if not isinstance(SOI, list): SOI = [SOI] if source: injection_structures=SOI target_domain = None else: injection_structures = None target_domain = SOI return pd.DataFrame(self.mca.experiment_source_search(injection_structures=injection_structures, target_domain = target_domain, transgenic_lines=transgenic_id, primary_structure_only=primary_structure_only)) def experiments_target_search(self, *args, **kwargs): """ :param *args: :param **kwargs: """ return self.experiments_source_search(*args, source=False, **kwargs) def fetch_experiments_data(self, experiments_id, *args, average_experiments=False, **kwargs): """ Get data and metadata for expeirments in the Allen Mouse Connectome project. :param experiments_id: int, list, np.ndarray with ID of experiments whose data need to be fetched :param *args: :param average_experiments: (Default value = False) :param **kwargs: """ if isinstance(experiments_id, np.ndarray): experiments_id = [int(x) for x in experiments_id] elif not isinstance(experiments_id, list): experiments_id = [experiments_id] if [x for x in experiments_id if not isinstance(x, int)]: raise ValueError("Invalid experiments_id argument: {}".format(experiments_id)) default_structures_ids = self.structures.id.values is_injection = kwargs.pop('is_injection', False) # Include only structures that are not injection structure_ids = kwargs.pop('structure_ids', default_structures_ids) # Pass IDs of structures of interest hemisphere_ids= kwargs.pop('hemisphere_ids', None) # 1 left, 2 right, 3 both if not average_experiments: return pd.DataFrame(self.mca.get_structure_unionizes(experiments_id, is_injection = is_injection, structure_ids = structure_ids, hemisphere_ids = hemisphere_ids)) else: raise NotImplementedError("Need to find a way to average across experiments") unionized = pd.DataFrame(self.mca.get_structure_unionizes(experiments_id, is_injection = is_injection, structure_ids = structure_ids, hemisphere_ids = hemisphere_ids)) for regionid in list(set(unionized.structure_id)): region_avg = unionized.loc[unionized.structure_id == regionid].mean(axis=1) ####### ANALYSIS ON EXPERIMENTAL DATA def analyze_efferents(self, ROI, projection_metric = None): """ Loads the experiments on ROI and looks at average statistics of efferent projections :param ROI: str, acronym of brain region of interest :param projection_metric: if None, the default projection metric is used, otherwise pass a string with metric to use (Default value = None) """ if projection_metric is None: projection_metric = self.projection_metric experiment_data = pd.read_pickle(os.path.join(self.output_data, "{}.pkl".format(ROI))) experiment_data = experiment_data.loc[experiment_data.volume > self.volume_threshold] # Loop over all structures and get the injection density results = {"left":[], "right":[], "both":[], "id":[], "acronym":[], "name":[]} for target in self.structures.id.values: target_acronym = self.structures.loc[self.structures.id == target].acronym.values[0] target_name = self.structures.loc[self.structures.id == target].name.values[0] exp_target = experiment_data.loc[experiment_data.structure_id == target] exp_target_hemi = self.hemispheres(exp_target.loc[exp_target.hemisphere_id == 1], exp_target.loc[exp_target.hemisphere_id == 2], exp_target.loc[exp_target.hemisphere_id == 3]) proj_energy = self.hemispheres(np.nanmean(exp_target_hemi.left[projection_metric].values), np.nanmean(exp_target_hemi.right[projection_metric].values), np.nanmean(exp_target_hemi.both[projection_metric].values) ) for hemi in self.hemispheres_names: results[hemi].append(proj_energy._asdict()[hemi]) results["id"].append(target) results["acronym"].append(target_acronym) results["name"].append(target_name) results = pd.DataFrame.from_dict(results).sort_values("right", na_position = "first") return results def analyze_afferents(self, ROI, projection_metric = None): """[Loads the experiments on ROI and looks at average statistics of afferent projections] :param ROI: str, acronym of region of itnerest :param projection_metric: if None, the default projection metric is used, otherwise pass a string with metric to use (Default value = None) """ if projection_metric is None: projection_metric = self.projection_metric ROI_id = self.structure_tree.get_structures_by_acronym([ROI])[0]["id"] # Loop over all strctures and get projection towards SOI results = {"left":[], "right":[], "both":[], "id":[], "acronym":[], "name":[]} for origin in self.structures.id.values: origin_acronym = self.structures.loc[self.structures.id == origin].acronym.values[0] origin_name = self.structures.loc[self.structures.id == origin].name.values[0] experiment_data = pd.read_pickle(os.path.join(self.output_data, "{}.pkl".format(origin_acronym))) experiment_data = experiment_data.loc[experiment_data.volume > self.volume_threshold] exp_target = experiment_data.loc[experiment_data.structure_id == SOI_id] exp_target_hemi = self.hemispheres(exp_target.loc[exp_target.hemisphere_id == 1], exp_target.loc[exp_target.hemisphere_id == 2], exp_target.loc[exp_target.hemisphere_id == 3]) proj_energy = self.hemispheres(np.nanmean(exp_target_hemi.left[projection_metric].values), np.nanmean(exp_target_hemi.right[projection_metric].values), np.nanmean(exp_target_hemi.both[projection_metric].values) ) for hemi in self.hemispheres_names: results[hemi].append(proj_energy._asdict()[hemi]) results["id"].append(origin) results["acronym"].append(origin_acronym) results["name"].append(origin_name) results = pd.DataFrame.from_dict(results).sort_values("right", na_position = "first") return results ####### GET TRACTOGRAPHY AND SPATIAL DATA def get_projection_tracts_to_target(self, p0=None, **kwargs): """ Gets tractography data for all experiments whose projections reach the brain region or location of iterest. :param p0: list of 3 floats with XYZ coordinates of point to be used as seed (Default value = None) :param **kwargs: """ # check args if p0 is None: raise ValueError("Please pass coordinates") elif isinstance(p0, np.ndarray): p0 = list(p0) elif not isinstance(p0, (list, tuple)): raise ValueError("Invalid argument passed (p0): {}".format(p0)) tract = self.mca.experiment_spatial_search(seed_point=p0, **kwargs) if isinstance(tract, str): raise ValueError('Something went wrong with query, query error message:\n{}'.format(tract)) else: return tract ### OPERATIONS ON STRUCTURE TREES def get_structure_ancestors(self, regions, ancestors=True, descendants=False): """ Get's the ancestors of the region(s) passed as arguments :param regions: str, list of str with acronums of regions of interest :param ancestors: if True, returns the ancestors of the region (Default value = True) :param descendants: if True, returns the descendants of the region (Default value = False) """ if not isinstance(regions, list): struct_id = self.structure_tree.get_structures_by_acronym([regions])[0]['id'] return pd.DataFrame(self.tree_search.get_tree('Structure', struct_id, ancestors=ancestors, descendants=descendants)) else: ancestors = [] for region in regions: struct_id = self.structure_tree.get_structures_by_acronym([region])[0]['id'] ancestors.append(pd.DataFrame(self.tree_search.get_tree('Structure', struct_id, ancestors=ancestors, descendants=descendants))) return ancestors def get_structure_descendants(self, regions): return self.get_structure_ancestors(regions, ancestors=False, descendants=True) def get_structure_from_coordinates(self, p0): """ Given a point in the Allen Mouse Brain reference space, returns the brain region that the point is in. :param p0: list of floats with XYZ coordinates. """ voxel = np.round(np.array(p0) / self.resolution).astype(int) try: structure_id = self.annotated_volume[voxel[0], voxel[1], voxel[2]] except: return None # Each voxel in the annotation volume is annotated as specifically as possible structure = self.structure_tree.get_structures_by_id([structure_id])[0] return structure
mca = MouseConnectivityApi() print mca # get metadata for all non-Cre experiments #experiments = mca.experiment_source_search(injection_structures='root', transgenic_lines=0) # download the projection density volume for one of the experiments #pd = mca.download_projection_density('example.nrrd', experiments[0]['id'], resolution=25) #enter selected projection numbers here, e.g. 167794131, 297231636, 287495026 id1 = 167794131 id2 = 297231636 id3 = 272736450 # import allensdk python api from allensdk.api.queries.mouse_connectivity_api import MouseConnectivityApi mca = MouseConnectivityApi() # get metadata for all experiments experiments = mca.experiment_source_search(injection_structures=['VIS','PTLp','RSP']) # find selected experiments and format filenames, **Use the %paste magic function if pasting into ipython for i in range(len(experiments)): if (experiments[i]['id'] == id1): fn1 = str(experiments[i]['injection-structures'][0]['abbreviation']) + '_' + str(experiments[i]['id']) + '_' + str(experiments[i]['transgenic-line']) print fn1 if (experiments[i]['id'] == id2): fn2 = str(experiments[i]['injection-structures'][0]['abbreviation']) + '_' + str(experiments[i]['id']) + '_' + str(experiments[i]['transgenic-line']) print fn2 if (experiments[i]['id'] == id3): fn3 = str(experiments[i]['injection-structures'][0]['abbreviation']) + '_' + str(experiments[i]['id']) + '_' + str(experiments[i]['transgenic-line']) print fn3 # download selected experiment projection density files at 25 um resolution mca.download_projection_density(fn1 + '.nrrd', id1, resolution=25) mca.download_projection_density(fn2 + '.nrrd', id2, resolution=25) mca.download_projection_density(fn3 + '.nrrd', id3, resolution=25)
class MouseConnectivityCache(ReferenceSpaceCache): """ Cache class for storing and accessing data related to the adult mouse Connectivity Atlas. By default, this class will cache any downloaded metadata or files in well known locations defined in a manifest file. This behavior can be disabled. Attributes ---------- resolution: int Resolution of grid data to be downloaded when accessing projection volume, the annotation volume, and the annotation volume. Must be one of (10, 25, 50, 100). Default is 25. api: MouseConnectivityApi instance Used internally to make API queries. Parameters ---------- resolution: int Resolution of grid data to be downloaded when accessing projection volume, the annotation volume, and the annotation volume. Must be one of (10, 25, 50, 100). Default is 25. ccf_version: string Desired version of the Common Coordinate Framework. This affects the annotation volume (get_annotation_volume) and structure masks (get_structure_mask). Must be one of (MouseConnectivityApi.CCF_2015, MouseConnectivityApi.CCF_2016). Default: MouseConnectivityApi.CCF_2016 cache: boolean Whether the class should save results of API queries to locations specified in the manifest file. Queries for files (as opposed to metadata) must have a file location. If caching is disabled, those locations must be specified in the function call (e.g. get_projection_density(file_name='file.nrrd')). manifest_file: string File name of the manifest to be read. Default is "mouse_connectivity_manifest.json". """ PROJECTION_DENSITY_KEY = 'PROJECTION_DENSITY' INJECTION_DENSITY_KEY = 'INJECTION_DENSITY' INJECTION_FRACTION_KEY = 'INJECTION_FRACTION' DATA_MASK_KEY = 'DATA_MASK' STRUCTURE_UNIONIZES_KEY = 'STRUCTURE_UNIONIZES' EXPERIMENTS_KEY = 'EXPERIMENTS' MANIFEST_VERSION = 1.2 SUMMARY_STRUCTURE_SET_ID = 167587189 DEFAULT_STRUCTURE_SET_IDS = tuple([SUMMARY_STRUCTURE_SET_ID]) @property def default_structure_ids(self): if not hasattr(self, '_default_structure_ids'): tree = self.get_structure_tree() default_structures = tree.get_structures_by_set_id( MouseConnectivityCache.DEFAULT_STRUCTURE_SET_IDS) self._default_structure_ids = [ st['id'] for st in default_structures ] return self._default_structure_ids def __init__(self, resolution=None, cache=True, manifest_file='mouse_connectivity_manifest.json', ccf_version=None, base_uri=None, version=None): if version is None: version = self.MANIFEST_VERSION if resolution is None: resolution = MouseConnectivityApi.VOXEL_RESOLUTION_25_MICRONS if ccf_version is None: ccf_version = MouseConnectivityApi.CCF_VERSION_DEFAULT super(MouseConnectivityCache, self).__init__(resolution, reference_space_key=ccf_version, cache=cache, manifest=manifest_file, version=version) self.api = MouseConnectivityApi(base_uri=base_uri) def get_projection_density(self, experiment_id, file_name=None): """ Read a projection density volume for a single experiment. Download it first if it doesn't exist. Projection density is the proportion of of projecting pixels in a grid voxel in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.PROJECTION_DENSITY_KEY, experiment_id, self.resolution) self.api.download_projection_density(file_name, experiment_id, self.resolution, strategy='lazy') return nrrd.read(file_name) def get_injection_density(self, experiment_id, file_name=None): """ Read an injection density volume for a single experiment. Download it first if it doesn't exist. Injection density is the proportion of projecting pixels in a grid voxel only including pixels that are part of the injection site in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.INJECTION_DENSITY_KEY, experiment_id, self.resolution) self.api.download_injection_density(file_name, experiment_id, self.resolution, strategy='lazy') return nrrd.read(file_name) def get_injection_fraction(self, experiment_id, file_name=None): """ Read an injection fraction volume for a single experiment. Download it first if it doesn't exist. Injection fraction is the proportion of pixels in the injection site in a grid voxel in [0,1]. Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY, experiment_id, self.resolution) self.api.download_injection_fraction(file_name, experiment_id, self.resolution, strategy='lazy') return nrrd.read(file_name) def get_data_mask(self, experiment_id, file_name=None): """ Read a data mask volume for a single experiment. Download it first if it doesn't exist. Data mask is a binary mask of voxels that have valid data. Only use valid data in analysis! Parameters ---------- experiment_id: int ID of the experiment to download/read. This corresponds to section_data_set_id in the API. file_name: string File name to store the template volume. If it already exists, it will be read from this file. If file_name is None, the file_name will be pulled out of the manifest. Default is None. """ file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY, experiment_id, self.resolution) self.api.download_data_mask(file_name, experiment_id, self.resolution, strategy='lazy') return nrrd.read(file_name) def get_experiments(self, dataframe=False, file_name=None, cre=None, injection_structure_ids=None): """ Read a list of experiments that match certain criteria. If caching is enabled, this will save the whole (unfiltered) list of experiments to a file. Parameters ---------- dataframe: boolean Return the list of experiments as a Pandas DataFrame. If False, return a list of dictionaries. Default False. file_name: string File name to save/read the structures table. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. cre: boolean or list If True, return only cre-positive experiments. If False, return only cre-negative experiments. If None, return all experients. If list, return all experiments with cre line names in the supplied list. Default None. injection_structure_ids: list Only return experiments that were injected in the structures provided here. If None, return all experiments. Default None. """ file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY) if os.path.exists(file_name): experiments = json_utilities.read(file_name) else: experiments = self.api.experiment_source_search( injection_structures='root') # removing these elements because they are specific to a particular # resolution for e in experiments: del e['num-voxels'] del e['injection-volume'] del e['sum'] del e['name'] if self.cache: Manifest.safe_make_parent_dirs(file_name) json_utilities.write(file_name, experiments) # filter the read/downloaded list of experiments experiments = self.filter_experiments(experiments, cre, injection_structure_ids) if dataframe: experiments = pd.DataFrame(experiments) experiments.set_index(['id'], inplace=True, drop=False) return experiments def filter_experiments(self, experiments, cre=None, injection_structure_ids=None): """ Take a list of experiments and filter them by cre status and injection structure. Parameters ---------- cre: boolean or list If True, return only cre-positive experiments. If False, return only cre-negative experiments. If None, return all experients. If list, return all experiments with cre line names in the supplied list. Default None. injection_structure_ids: list Only return experiments that were injected in the structures provided here. If None, return all experiments. Default None. """ if cre is True: experiments = [e for e in experiments if e['transgenic-line']] elif cre is False: experiments = [e for e in experiments if not e['transgenic-line']] elif cre is not None: cre = [c.lower() for c in cre] experiments = [ e for e in experiments if e['transgenic-line'].lower() in cre ] if injection_structure_ids is not None: structure_ids = MouseConnectivityCache.validate_structure_ids( injection_structure_ids) descendant_ids = reduce(op.add, self.get_structure_tree()\ .descendant_ids(injection_structure_ids)) experiments = [ e for e in experiments if e['structure-id'] in descendant_ids ] return experiments def get_experiment_structure_unionizes(self, experiment_id, file_name=None, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None): """ Retrieve the structure unionize data for a specific experiment. Filter by structure, injection status, and hemisphere. Parameters ---------- experiment_id: int ID of the experiment of interest. Corresponds to section_data_set_id in the API. file_name: string File name to save/read the experiments list. If file_name is None, the file_name will be pulled out of the manifest. If caching is disabled, no file will be saved. Default is None. is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records for a specific set of structures. If None, return all records. Default None. include_descendants: boolean Include all descendant records for specified structures. Default False. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ file_name = self.get_cache_path(file_name, self.STRUCTURE_UNIONIZES_KEY, experiment_id) filter_fn = functools.partial(self.filter_structure_unionizes, is_injection=is_injection, structure_ids=structure_ids, include_descendants=include_descendants, hemisphere_ids=hemisphere_ids) col_rn = lambda x: pd.DataFrame(x).rename( columns={'section_data_set_id': 'experiment_id'}) return self.api.get_structure_unionizes( [experiment_id], path=file_name, strategy='lazy', pre=col_rn, post=filter_fn, writer=lambda p, x: pd.DataFrame(x).to_csv(p), reader=pd.DataFrame.from_csv) def rank_structures(self, experiment_ids, is_injection, structure_ids=None, hemisphere_ids=None, rank_on='normalized_projection_volume', n=5, threshold=10**-2): '''Produces one or more (per experiment) ranked lists of brain structures, using a specified data field. Parameters ---------- experiment_ids : list of int Obtain injection_structures for these experiments. is_injection : boolean Use data from only injection (or non-injection) unionizes. structure_ids : list of int, optional Consider only these structures. It is a good idea to make sure that these structures are not spatially overlapping; otherwise your results will contain redundant information. Defaults to the summary structures - a brain-wide list of nonoverlapping mid-level structures. hemisphere_ids : list of int, optional Consider only these hemispheres (1: left, 2: right, 3: both). Like with structures, you might get redundant results if you select overlapping options. Defaults to [1, 2]. rank_on : str, optional Rank unionize data using this field (descending). Defaults to normalized_projection_volume. n : int, optional Return only the top n structures. threshold : float, optional Consider only records whose data value - specified by the rank_on parameter - exceeds this value. Returns ------- list : Each element (1 for each input experiment) is a list of dictionaries. The dictionaries describe the top injection structures in descending order. They are specified by their structure and hemisphere id fields and additionally report the value specified by the rank_on parameter. ''' output_keys = [ 'experiment_id', rank_on, 'hemisphere_id', 'structure_id' ] filter_fields = lambda fieldname: fieldname in output_keys if hemisphere_ids is None: hemisphere_ids = [1, 2] if structure_ids is None: structure_ids = self.default_structure_ids unionizes = self.get_structure_unionizes(experiment_ids, is_injection=is_injection, structure_ids=structure_ids, hemisphere_ids=hemisphere_ids, include_descendants=False) unionizes = unionizes[unionizes[rank_on] > threshold] results = [] for eid in experiment_ids: this_experiment_unionizes = unionizes[unionizes['experiment_id'] == eid] this_experiment_unionizes = this_experiment_unionizes.sort_values( by=rank_on, ascending=False) this_experiment_unionizes = this_experiment_unionizes.select( filter_fields, axis=1) records = this_experiment_unionizes.to_dict('record') if len(records) > n: records = records[:n] results.append(records) return results def filter_structure_unionizes(self, unionizes, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None): """ Take a list of unionzes and return a subset of records filtered by injection status, structure, and hemisphere. Parameters ---------- is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records for a set of structures. If None, return all records. Default None. include_descendants: boolean Include all descendant records for specified structures. Default False. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ if is_injection is not None: unionizes = unionizes[unionizes.is_injection == is_injection] if structure_ids is not None: structure_ids = MouseConnectivityCache.validate_structure_ids( structure_ids) if include_descendants: structure_ids = reduce( op.add, self.get_structure_tree().descendant_ids(structure_ids)) else: structure_ids = set(structure_ids) unionizes = unionizes[unionizes['structure_id'].isin( structure_ids)] if hemisphere_ids is not None: unionizes = unionizes[unionizes['hemisphere_id'].isin( hemisphere_ids)] return unionizes def get_structure_unionizes(self, experiment_ids, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None): """ Get structure unionizes for a set of experiment IDs. Filter the results by injection status, structure, and hemisphere. Parameters ---------- experiment_ids: list List of experiment IDs. Corresponds to section_data_set_id in the API. is_injection: boolean If True, only return unionize records that disregard non-injection pixels. If False, only return unionize records that disregard injection pixels. If None, return all records. Default None. structure_ids: list Only return unionize records for a specific set of structures. If None, return all records. Default None. include_descendants: boolean Include all descendant records for specified structures. Default False. hemisphere_ids: list Only return unionize records that disregard pixels outside of a hemisphere. or set of hemispheres. Left = 1, Right = 2, Both = 3. If None, include all records [1, 2, 3]. Default None. """ unionizes = [ self.get_experiment_structure_unionizes( eid, is_injection=is_injection, structure_ids=structure_ids, include_descendants=include_descendants, hemisphere_ids=hemisphere_ids) for eid in experiment_ids ] return pd.concat(unionizes, ignore_index=True) def get_projection_matrix(self, experiment_ids, projection_structure_ids=None, hemisphere_ids=None, parameter='projection_volume', dataframe=False): if projection_structure_ids is None: projection_structure_ids = self.default_structure_ids unionizes = self.get_structure_unionizes( experiment_ids, is_injection=False, structure_ids=projection_structure_ids, include_descendants=False, hemisphere_ids=hemisphere_ids) hemisphere_ids = set(unionizes['hemisphere_id'].values.tolist()) nrows = len(experiment_ids) ncolumns = len(projection_structure_ids) * len(hemisphere_ids) matrix = np.empty((nrows, ncolumns)) matrix[:] = np.NAN row_lookup = {} for idx, e in enumerate(experiment_ids): row_lookup[e] = idx column_lookup = {} columns = [] cidx = 0 hlabel = {1: '-L', 2: '-R', 3: ''} acronym_map = self.get_structure_tree().value_map( lambda x: x['id'], lambda x: x['acronym']) for hid in hemisphere_ids: for sid in projection_structure_ids: column_lookup[(hid, sid)] = cidx label = acronym_map[sid] + hlabel[hid] columns.append({ 'hemisphere_id': hid, 'structure_id': sid, 'label': label }) cidx += 1 for _, row in unionizes.iterrows(): ridx = row_lookup[row['experiment_id']] k = (row['hemisphere_id'], row['structure_id']) cidx = column_lookup[k] matrix[ridx, cidx] = row[parameter] if dataframe: warnings.warn("dataframe argument is deprecated.") all_experiments = self.get_experiments(dataframe=True) rows_df = all_experiments.loc[experiment_ids] cols_df = pd.DataFrame(columns) return {'matrix': matrix, 'rows': rows_df, 'columns': cols_df} else: return { 'matrix': matrix, 'rows': experiment_ids, 'columns': columns } def add_manifest_paths(self, manifest_builder): """ Construct a manifest for this Cache class and save it in a file. Parameters ---------- file_name: string File location to save the manifest. """ manifest_builder = super(MouseConnectivityCache, self).add_manifest_paths(manifest_builder) manifest_builder.add_path(self.EXPERIMENTS_KEY, 'experiments.json', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.STRUCTURE_UNIONIZES_KEY, 'experiment_%d/structure_unionizes.csv', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.INJECTION_DENSITY_KEY, 'experiment_%d/injection_density_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.INJECTION_FRACTION_KEY, 'experiment_%d/injection_fraction_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.DATA_MASK_KEY, 'experiment_%d/data_mask_%d.nrrd', parent_key='BASEDIR', typename='file') manifest_builder.add_path(self.PROJECTION_DENSITY_KEY, 'experiment_%d/projection_density_%d.nrrd', parent_key='BASEDIR', typename='file') return manifest_builder