class MouseConnectivityCache(Cache):
    """
    Cache class for storing and accessing data related to the adult mouse
    Connectivity Atlas.  By default, this class will cache any downloaded 
    metadata or files in well known locations defined in a manifest file.  
    This behavior can be disabled.

    Attributes
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    api: MouseConnectivityApi instance
        Used internally to make API queries.

    Parameters
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    cache: boolean
        Whether the class should save results of API queries to locations specified
        in the manifest file.  Queries for files (as opposed to metadata) must have a
        file location.  If caching is disabled, those locations must be specified
        in the function call (e.g. get_projection_density(file_name='file.nrrd')).

    manifest_file: string
        File name of the manifest to be read.  Default is "mouse_connectivity_manifest.json".
        
    """

    ANNOTATION_KEY = 'ANNOTATION'
    TEMPLATE_KEY = 'TEMPLATE'
    PROJECTION_DENSITY_KEY = 'PROJECTION_DENSITY'
    INJECTION_DENSITY_KEY = 'INJECTION_DENSITY'
    INJECTION_FRACTION_KEY = 'INJECTION_FRACTION'
    DATA_MASK_KEY = 'DATA_MASK'
    STRUCTURE_UNIONIZES_KEY = 'STRUCTURE_UNIONIZES'
    EXPERIMENTS_KEY = 'EXPERIMENTS'
    STRUCTURES_KEY = 'STRUCTURES'
    STRUCTURE_MASK_KEY = 'STRUCTURE_MASK'

    def __init__(self,
                 resolution=25,
                 cache=True,
                 manifest_file='mouse_connectivity_manifest.json',
                 base_uri=None):
        super(MouseConnectivityCache, self).__init__(manifest=manifest_file,
                                                     cache=cache)

        self.resolution = resolution
        self.api = MouseConnectivityApi(base_uri=base_uri)

    def get_annotation_volume(self, file_name=None):
        """ 
        Read the annotation volume.  Download it first if it doesn't exist.

        Parameters
        ----------

        file_name: string
            File name to store the annotation volume.  If it already exists, 
            it will be read from this file.  If file_name is None, the 
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.ANNOTATION_KEY,
                                        self.resolution)

        if file_name is None:
            raise Exception(
                "No save file name provided for annotation volume.")

        if os.path.exists(file_name):
            annotation, info = nrrd.read(file_name)
        else:
            Manifest.safe_mkdir(os.path.dirname(file_name))

            annotation, info = self.api.download_annotation_volume(
                self.resolution, file_name)

        return annotation, info

    def get_template_volume(self, file_name=None):
        """ 
        Read the template volume.  Download it first if it doesn't exist.

        Parameters
        ----------

        file_name: string
            File name to store the template volume.  If it already exists, 
            it will be read from this file.  If file_name is None, the 
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.TEMPLATE_KEY,
                                        self.resolution)

        if file_name is None:
            raise Exception("No save file provided for annotation volume.")

        if os.path.exists(file_name):
            annotation, info = nrrd.read(file_name)
        else:
            Manifest.safe_mkdir(os.path.dirname(file_name))

            annotation, info = self.api.download_template_volume(
                self.resolution, file_name)

        return annotation, info

    def get_projection_density(self, experiment_id, file_name=None):
        """ 
        Read a projection density volume for a single experiment.  Download it 
        first if it doesn't exist.  Projection density is the proportion of 
        of projecting pixels in a grid voxel in [0,1].
        
        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists, 
            it will be read from this file.  If file_name is None, the 
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.PROJECTION_DENSITY_KEY,
                                        experiment_id, self.resolution)

        if file_name is None:
            raise Exception("No file name to save volume.")

        if not os.path.exists(file_name):
            Manifest.safe_mkdir(os.path.dirname(file_name))
            self.api.download_projection_density(file_name, experiment_id,
                                                 self.resolution)

        return nrrd.read(file_name)

    def get_injection_density(self, experiment_id, file_name=None):
        """ 
        Read an injection density volume for a single experiment. Download it 
        first if it doesn't exist.  Injection density is the proportion of
        projecting pixels in a grid voxel only including pixels that are 
        part of the injection site in [0,1].
        
        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists, 
            it will be read from this file.  If file_name is None, the 
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.INJECTION_DENSITY_KEY,
                                        experiment_id, self.resolution)

        if file_name is None:
            raise Exception("No file name to save volume.")

        if not os.path.exists(file_name):
            Manifest.safe_mkdir(os.path.dirname(file_name))

            self.api.download_injection_density(file_name, experiment_id,
                                                self.resolution)

        return nrrd.read(file_name)

    def get_injection_fraction(self, experiment_id, file_name=None):
        """ 
        Read an injection fraction volume for a single experiment. Download it 
        first if it doesn't exist.  Injection fraction is the proportion of
        pixels in the injection site in a grid voxel in [0,1].
        
        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists, 
            it will be read from this file.  If file_name is None, the 
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY,
                                        experiment_id, self.resolution)

        if file_name is None:
            raise Exception("No file name to save volume.")

        if not os.path.exists(file_name):
            Manifest.safe_mkdir(os.path.dirname(file_name))

            self.api.download_injection_fraction(file_name, experiment_id,
                                                 self.resolution)

        return nrrd.read(file_name)

    def get_data_mask(self, experiment_id, file_name=None):
        """ 
        Read a data mask volume for a single experiment. Download it 
        first if it doesn't exist.  Data mask is a binary mask of
        voxels that have valid data.  Only use valid data in analysis!
        
        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists, 
            it will be read from this file.  If file_name is None, the 
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY,
                                        experiment_id, self.resolution)

        if file_name is None:
            raise Exception("No file name to save volume.")

        if not os.path.exists(file_name):
            Manifest.safe_mkdir(os.path.dirname(file_name))

            self.api.download_data_mask(file_name, experiment_id,
                                        self.resolution)

        return nrrd.read(file_name)

    def get_ontology(self, file_name=None):
        """ 
        Read the list of adult mouse structures and return an Ontology instance.

        Parameters
        ----------

        file_name: string
            File name to save/read the structures table.  If file_name is None, 
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.
        """

        return Ontology(self.get_structures(file_name))

    def get_structures(self, file_name=None):
        """ 
        Read the list of adult mouse structures and return a Pandas DataFrame.

        Parameters
        ----------

        file_name: string
            File name to save/read the structures table.  If file_name is None, 
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.
        """

        file_name = self.get_cache_path(file_name, self.STRUCTURES_KEY)

        if os.path.exists(file_name):
            structures = pd.DataFrame.from_csv(file_name)
        else:
            structures = OntologiesApi().get_structures(1)
            structures = pd.DataFrame(structures)

            if self.cache:
                Manifest.safe_mkdir(os.path.dirname(file_name))

                structures.to_csv(file_name)

        structures.set_index(['id'], inplace=True, drop=False)
        return structures

    def get_experiments(self,
                        dataframe=False,
                        file_name=None,
                        cre=None,
                        injection_structure_ids=None):
        """
        Read a list of experiments that match certain criteria.  If caching is enabled,
        this will save the whole (unfiltered) list of experiments to a file.

        Parameters
        ----------
        
        dataframe: boolean
            Return the list of experiments as a Pandas DataFrame.  If False,
            return a list of dictionaries.  Default False. 

        file_name: string
            File name to save/read the structures table.  If file_name is None, 
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.

        """

        file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY)

        if os.path.exists(file_name):
            experiments = json_utilities.read(file_name)
        else:
            experiments = self.api.experiment_source_search(
                injection_structures='root')

            # removing these elements because they are specific to a particular resolution
            for e in experiments:
                del e['num-voxels']
                del e['injection-volume']
                del e['sum']
                del e['name']

            if self.cache:
                Manifest.safe_mkdir(os.path.dirname(file_name))

                json_utilities.write(file_name, experiments)

        # filter the read/downloaded list of experiments
        experiments = self.filter_experiments(experiments, cre,
                                              injection_structure_ids)

        if dataframe:
            experiments = pd.DataFrame(experiments)
            experiments.set_index(['id'], inplace=True, drop=False)

        return experiments

    def filter_experiments(self,
                           experiments,
                           cre=None,
                           injection_structure_ids=None):
        """ 
        Take a list of experiments and filter them by cre status and injection structure.

        Parameters
        ----------

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.
        """

        if cre == True:
            experiments = [e for e in experiments if e['transgenic-line']]
        elif cre == False:
            experiments = [e for e in experiments if not e['transgenic-line']]
        elif cre is not None:
            experiments = [
                e for e in experiments if e['transgenic-line'] in cre
            ]

        if injection_structure_ids is not None:
            descendant_ids = self.get_ontology().get_descendant_ids(
                injection_structure_ids)
            experiments = [
                e for e in experiments if e['structure-id'] in descendant_ids
            ]

        return experiments

    def get_experiment_structure_unionizes(self,
                                           experiment_id,
                                           file_name=None,
                                           is_injection=None,
                                           structure_ids=None,
                                           hemisphere_ids=None):
        """
        Retrieve the structure unionize data for a specific experiment.  Filter by 
        structure, injection status, and hemisphere.

        Parameters
        ----------
        
        experiment_id: int
            ID of the experiment of interest.  Corresponds to section_data_set_id in the API.

        file_name: string
            File name to save/read the experiments list.  If file_name is None, 
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.            

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records that are inside a specific set of structures.
            If None, return all records. Default None.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all 
            records [1, 2, 3].  Default None.
            
        """

        file_name = self.get_cache_path(file_name,
                                        self.STRUCTURE_UNIONIZES_KEY,
                                        experiment_id)

        if os.path.exists(file_name):
            unionizes = pd.DataFrame.from_csv(file_name)
        else:
            unionizes = self.api.get_structure_unionizes([experiment_id])
            unionizes = pd.DataFrame(unionizes)

            # rename section_data_set_id column to experiment_id
            unionizes.columns = [
                'experiment_id' if c == 'section_data_set_id' else c
                for c in unionizes.columns
            ]

            if self.cache:
                Manifest.safe_mkdir(os.path.dirname(file_name))

                unionizes.to_csv(file_name)

        return self.filter_structure_unionizes(unionizes, is_injection,
                                               structure_ids, hemisphere_ids)

    def filter_structure_unionizes(self,
                                   unionizes,
                                   is_injection=None,
                                   structure_ids=None,
                                   hemisphere_ids=None):
        """
        Take a list of unionzes and return a subset of records filtered by injection status, structure, and
        hemisphere.

        Parameters
        ----------
        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records that are inside a specific set of structures.
            If None, return all records. Default None.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all 
            records [1, 2, 3].  Default None.
        """
        if is_injection is not None:
            unionizes = unionizes[unionizes.is_injection == is_injection]

        if structure_ids is not None:
            descendant_ids = self.get_ontology().get_descendant_ids(
                structure_ids)
            unionizes = unionizes[unionizes['structure_id'].isin(
                descendant_ids)]

        if hemisphere_ids is not None:
            unionizes = unionizes[unionizes['hemisphere_id'].isin(
                hemisphere_ids)]

        return unionizes

    def get_structure_unionizes(self,
                                experiment_ids,
                                is_injection=None,
                                structure_ids=None,
                                hemisphere_ids=None):
        """
        Get structure unionizes for a set of experiment IDs.  Filter the results by injection status, 
        structure, and hemisphere.

        Parameters
        ----------
        experiment_ids: list
            List of experiment IDs.  Corresponds to section_data_set_id in the API.
        
        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records that are inside a specific set of structures.
            If None, return all records. Default None.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all 
            records [1, 2, 3].  Default None.
        """

        unionizes = [
            self.get_experiment_structure_unionizes(
                eid,
                is_injection=is_injection,
                structure_ids=structure_ids,
                hemisphere_ids=hemisphere_ids) for eid in experiment_ids
        ]

        return pd.concat(unionizes, ignore_index=True)

    def get_projection_matrix(self,
                              experiment_ids,
                              projection_structure_ids,
                              hemisphere_ids=None,
                              parameter='projection_volume',
                              dataframe=False):

        unionizes = self.get_structure_unionizes(experiment_ids,
                                                 is_injection=False,
                                                 hemisphere_ids=hemisphere_ids)

        unionizes = unionizes[unionizes.structure_id.isin(
            projection_structure_ids)]

        projection_structure_ids = set(
            unionizes['structure_id'].values.tolist())
        hemisphere_ids = set(unionizes['hemisphere_id'].values.tolist())

        nrows = len(experiment_ids)
        ncolumns = len(projection_structure_ids) * len(hemisphere_ids)

        matrix = np.empty((nrows, ncolumns))
        matrix[:] = np.NAN

        row_lookup = {}
        for idx, e in enumerate(experiment_ids):
            row_lookup[e] = idx

        column_lookup = {}
        columns = []

        cidx = 0
        hlabel = {1: '-L', 2: '-R', 3: ''}

        o = self.get_ontology()

        for hid in hemisphere_ids:
            for sid in projection_structure_ids:
                column_lookup[(hid, sid)] = cidx
                label = o[sid].iloc[0]['acronym'] + hlabel[hid]
                columns.append({
                    'hemisphere_id': hid,
                    'structure_id': sid,
                    'label': label
                })
                cidx += 1

        for _, row in unionizes.iterrows():
            ridx = row_lookup[row['experiment_id']]
            k = (row['hemisphere_id'], row['structure_id'])
            cidx = column_lookup[k]
            matrix[ridx, cidx] = row[parameter]

        if dataframe:
            all_experiments = self.get_experiments(dataframe=True)

            rows_df = all_experiments.loc[experiment_ids]

            cols_df = pd.DataFrame(columns)

            return {'matrix': matrix, 'rows': rows_df, 'columns': cols_df}
        else:
            return {
                'matrix': matrix,
                'rows': experiment_ids,
                'columns': columns
            }

    def get_structure_mask(self,
                           structure_id,
                           file_name=None,
                           annotation_file_name=None):
        """
        Read a 3D numpy array shaped like the annotation volume that has non-zero values where 
        voxels belong to a particular structure.  This will take care of identifying substructures.

        Parameters
        ----------
        
        structure_id: int
            ID of a structure.  

        file_name: string
            File name to store the structure mask.  If it already exists, 
            it will be read from this file.  If file_name is None, the 
            file_name will be pulled out of the manifest.  Default is None.
        
        annotation_file_name: string
            File name to store the annotation volume.  If it already exists, 
            it will be read from this file.  If file_name is None, the 
            file_name will be pulled out of the manifest.  Default is None.            
        """

        file_name = self.get_cache_path(file_name, self.STRUCTURE_MASK_KEY,
                                        structure_id)

        if os.path.exists(file_name):
            return nrrd.read(file_name)
        else:
            ont = self.get_ontology()
            structure_ids = ont.get_descendant_ids([structure_id])
            annotation, _ = self.get_annotation_volume(annotation_file_name)
            mask = self.make_structure_mask(structure_ids, annotation)

            if self.cache:
                Manifest.safe_mkdir(os.path.dirname(file_name))
                nrrd.write(file_name, mask)

            return mask, None

    def make_structure_mask(self, structure_ids, annotation):
        """
        Look at an annotation volume and identify voxels that have values
        in a list of structure ids.

        Parameters
        ----------

        structure_ids: list
            List of IDs to look for in the annotation volume

        annotation: np.ndarray
            Numpy array filled with IDs.

        """

        m = np.zeros(annotation.shape, dtype=np.uint8)

        for _, sid in enumerate(structure_ids):
            m[annotation == sid] = 1

        return m

    def build_manifest(self, file_name):
        """
        Construct a manifest for this Cache class and save it in a file.
        
        Parameters
        ----------
        
        file_name: string
            File location to save the manifest.

        """

        manifest_builder = ManifestBuilder()
        manifest_builder.add_path('BASEDIR', '.')

        manifest_builder.add_path(self.EXPERIMENTS_KEY,
                                  'experiments.json',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.STRUCTURES_KEY,
                                  'structures.csv',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.STRUCTURE_UNIONIZES_KEY,
                                  'experiment_%d/structure_unionizes.csv',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.ANNOTATION_KEY,
                                  'annotation_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.TEMPLATE_KEY,
                                  'average_template_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.INJECTION_DENSITY_KEY,
                                  'experiment_%d/injection_density_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.INJECTION_FRACTION_KEY,
                                  'experiment_%d/injection_fraction_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.DATA_MASK_KEY,
                                  'experiment_%d/data_mask_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.PROJECTION_DENSITY_KEY,
                                  'experiment_%d/projection_density_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.STRUCTURE_MASK_KEY,
                                  'structure_masks/structure_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.write_json_file(file_name)
class MouseConnectivityCache(Cache):
    """
    Cache class for storing and accessing data related to the adult mouse
    Connectivity Atlas.  By default, this class will cache any downloaded
    metadata or files in well known locations defined in a manifest file.
    This behavior can be disabled.

    Attributes
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    api: MouseConnectivityApi instance
        Used internally to make API queries.

    Parameters
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    ccf_version: string
        Desired version of the Common Coordinate Framework.  This affects the annotation 
        volume (get_annotation_volume) and structure masks (get_structure_mask). 
        Must be one of (MouseConnectivityApi.CCF_2015, MouseConnectivityApi.CCF_2016). 
        Default: MouseConnectivityApi.CCF_2016

    cache: boolean
        Whether the class should save results of API queries to locations specified
        in the manifest file.  Queries for files (as opposed to metadata) must have a
        file location.  If caching is disabled, those locations must be specified
        in the function call (e.g. get_projection_density(file_name='file.nrrd')).

    manifest_file: string
        File name of the manifest to be read.  Default is "mouse_connectivity_manifest.json".

    """

    CCF_VERSION_KEY = "CCF_VERSION"
    ANNOTATION_KEY = "ANNOTATION"
    TEMPLATE_KEY = "TEMPLATE"
    PROJECTION_DENSITY_KEY = "PROJECTION_DENSITY"
    INJECTION_DENSITY_KEY = "INJECTION_DENSITY"
    INJECTION_FRACTION_KEY = "INJECTION_FRACTION"
    DATA_MASK_KEY = "DATA_MASK"
    STRUCTURE_UNIONIZES_KEY = "STRUCTURE_UNIONIZES"
    EXPERIMENTS_KEY = "EXPERIMENTS"
    STRUCTURES_KEY = "STRUCTURES"
    STRUCTURE_MASK_KEY = "STRUCTURE_MASK"

    def __init__(
        self,
        resolution=None,
        cache=True,
        manifest_file="mouse_connectivity_manifest.json",
        ccf_version=None,
        base_uri=None,
    ):
        super(MouseConnectivityCache, self).__init__(manifest=manifest_file, cache=cache)

        if resolution is None:
            self.resolution = MouseConnectivityApi.VOXEL_RESOLUTION_25_MICRONS
        else:
            self.resolution = resolution
        self.api = MouseConnectivityApi(base_uri=base_uri)

        if ccf_version is None:
            ccf_version = MouseConnectivityApi.CCF_VERSION_DEFAULT
        self.ccf_version = ccf_version

    def get_annotation_volume(self, file_name=None):
        """
        Read the annotation volume.  Download it first if it doesn't exist.

        Parameters
        ----------

        file_name: string
            File name to store the annotation volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.ANNOTATION_KEY, self.ccf_version, self.resolution)

        if file_name is None:
            raise Exception("No save file name provided for annotation volume.")

        if os.path.exists(file_name):
            annotation, info = nrrd.read(file_name)
        else:
            Manifest.safe_make_parent_dirs(file_name)

            annotation, info = self.api.download_annotation_volume(self.ccf_version, self.resolution, file_name)

        return annotation, info

    def get_template_volume(self, file_name=None):
        """
        Read the template volume.  Download it first if it doesn't exist.

        Parameters
        ----------

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.TEMPLATE_KEY, self.resolution)

        if file_name is None:
            raise Exception("No save file provided for annotation volume.")

        if os.path.exists(file_name):
            annotation, info = nrrd.read(file_name)
        else:
            Manifest.safe_make_parent_dirs(file_name)

            annotation, info = self.api.download_template_volume(self.resolution, file_name)

        return annotation, info

    def get_projection_density(self, experiment_id, file_name=None):
        """
        Read a projection density volume for a single experiment.  Download it
        first if it doesn't exist.  Projection density is the proportion of
        of projecting pixels in a grid voxel in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.PROJECTION_DENSITY_KEY, experiment_id, self.resolution)

        if file_name is None:
            raise Exception("No file name to save volume.")

        if not os.path.exists(file_name):
            Manifest.safe_make_parent_dirs(file_name)

            self.api.download_projection_density(file_name, experiment_id, self.resolution)

        return nrrd.read(file_name)

    def get_injection_density(self, experiment_id, file_name=None):
        """
        Read an injection density volume for a single experiment. Download it
        first if it doesn't exist.  Injection density is the proportion of
        projecting pixels in a grid voxel only including pixels that are
        part of the injection site in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.INJECTION_DENSITY_KEY, experiment_id, self.resolution)

        if file_name is None:
            raise Exception("No file name to save volume.")

        if not os.path.exists(file_name):
            Manifest.safe_make_parent_dirs(file_name)

            self.api.download_injection_density(file_name, experiment_id, self.resolution)

        return nrrd.read(file_name)

    def get_injection_fraction(self, experiment_id, file_name=None):
        """
        Read an injection fraction volume for a single experiment. Download it
        first if it doesn't exist.  Injection fraction is the proportion of
        pixels in the injection site in a grid voxel in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY, experiment_id, self.resolution)

        if file_name is None:
            raise Exception("No file name to save volume.")

        if not os.path.exists(file_name):
            Manifest.safe_make_parent_dirs(file_name)

            self.api.download_injection_fraction(file_name, experiment_id, self.resolution)

        return nrrd.read(file_name)

    def get_data_mask(self, experiment_id, file_name=None):
        """
        Read a data mask volume for a single experiment. Download it
        first if it doesn't exist.  Data mask is a binary mask of
        voxels that have valid data.  Only use valid data in analysis!

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY, experiment_id, self.resolution)

        if file_name is None:
            raise Exception("No file name to save volume.")

        if not os.path.exists(file_name):
            Manifest.safe_make_parent_dirs(file_name)

            self.api.download_data_mask(file_name, experiment_id, self.resolution)

        return nrrd.read(file_name)

    def get_ontology(self, file_name=None):
        """
        Read the list of adult mouse structures and return an Ontology instance.

        Parameters
        ----------

        file_name: string
            File name to save/read the structures table.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.
        """

        return Ontology(self.get_structures(file_name))

    def get_structures(self, file_name=None):
        """
        Read the list of adult mouse structures and return a Pandas DataFrame.

        Parameters
        ----------

        file_name: string
            File name to save/read the structures table.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.
        """

        file_name = self.get_cache_path(file_name, self.STRUCTURES_KEY)

        if os.path.exists(file_name):
            structures = pd.DataFrame.from_csv(file_name)
        else:
            structures = OntologiesApi(base_uri=self.api.api_url).get_structures(1)
            structures = pd.DataFrame(structures)

            if self.cache:
                Manifest.safe_make_parent_dirs(file_name)

                structures.to_csv(file_name)

        structures.set_index(["id"], inplace=True, drop=False)
        return structures

    def get_experiments(self, dataframe=False, file_name=None, cre=None, injection_structure_ids=None):
        """
        Read a list of experiments that match certain criteria.  If caching is enabled,
        this will save the whole (unfiltered) list of experiments to a file.

        Parameters
        ----------

        dataframe: boolean
            Return the list of experiments as a Pandas DataFrame.  If False,
            return a list of dictionaries.  Default False.

        file_name: string
            File name to save/read the structures table.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.

        """

        file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY)

        if os.path.exists(file_name):
            experiments = json_utilities.read(file_name)
        else:
            experiments = self.api.experiment_source_search(injection_structures="root")

            # removing these elements because they are specific to a particular
            # resolution
            for e in experiments:
                del e["num-voxels"]
                del e["injection-volume"]
                del e["sum"]
                del e["name"]

            if self.cache:
                Manifest.safe_make_parent_dirs(file_name)

                json_utilities.write(file_name, experiments)

        # filter the read/downloaded list of experiments
        experiments = self.filter_experiments(experiments, cre, injection_structure_ids)

        if dataframe:
            experiments = pd.DataFrame(experiments)
            experiments.set_index(["id"], inplace=True, drop=False)

        return experiments

    def filter_experiments(self, experiments, cre=None, injection_structure_ids=None):
        """
        Take a list of experiments and filter them by cre status and injection structure.

        Parameters
        ----------

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.
        """

        if cre is True:
            experiments = [e for e in experiments if e["transgenic-line"]]
        elif cre is False:
            experiments = [e for e in experiments if not e["transgenic-line"]]
        elif cre is not None:
            experiments = [e for e in experiments if e["transgenic-line"] in cre]

        if injection_structure_ids is not None:
            descendant_ids = self.get_ontology().get_descendant_ids(injection_structure_ids)
            experiments = [e for e in experiments if e["structure-id"] in descendant_ids]

        return experiments

    def get_experiment_structure_unionizes(
        self,
        experiment_id,
        file_name=None,
        is_injection=None,
        structure_ids=None,
        include_descendants=False,
        hemisphere_ids=None,
    ):
        """
        Retrieve the structure unionize data for a specific experiment.  Filter by
        structure, injection status, and hemisphere.

        Parameters
        ----------

        experiment_id: int
            ID of the experiment of interest.  Corresponds to section_data_set_id in the API.

        file_name: string
            File name to save/read the experiments list.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a specific set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.

        """

        file_name = self.get_cache_path(file_name, self.STRUCTURE_UNIONIZES_KEY, experiment_id)

        if os.path.exists(file_name):
            unionizes = pd.DataFrame.from_csv(file_name)
        else:
            unionizes = self.api.get_structure_unionizes([experiment_id])
            unionizes = pd.DataFrame(unionizes)

            # rename section_data_set_id column to experiment_id
            unionizes.columns = ["experiment_id" if c == "section_data_set_id" else c for c in unionizes.columns]

            if self.cache:
                Manifest.safe_make_parent_dirs(file_name)

                unionizes.to_csv(file_name)

        return self.filter_structure_unionizes(
            unionizes, is_injection, structure_ids, include_descendants, hemisphere_ids
        )

    def filter_structure_unionizes(
        self, unionizes, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None
    ):
        """
        Take a list of unionzes and return a subset of records filtered by injection status, structure, and
        hemisphere.

        Parameters
        ----------
        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.
        """
        if is_injection is not None:
            unionizes = unionizes[unionizes.is_injection == is_injection]

        if structure_ids is not None:
            if include_descendants:
                structure_ids = self.get_ontology().get_descendant_ids(structure_ids)
            else:
                structure_ids = set(structure_ids)

            unionizes = unionizes[unionizes["structure_id"].isin(structure_ids)]

        if hemisphere_ids is not None:
            unionizes = unionizes[unionizes["hemisphere_id"].isin(hemisphere_ids)]

        return unionizes

    def get_structure_unionizes(
        self, experiment_ids, is_injection=None, structure_ids=None, include_descendants=False, hemisphere_ids=None
    ):
        """
        Get structure unionizes for a set of experiment IDs.  Filter the results by injection status,
        structure, and hemisphere.

        Parameters
        ----------
        experiment_ids: list
            List of experiment IDs.  Corresponds to section_data_set_id in the API.

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a specific set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.
        """

        unionizes = [
            self.get_experiment_structure_unionizes(
                eid,
                is_injection=is_injection,
                structure_ids=structure_ids,
                include_descendants=include_descendants,
                hemisphere_ids=hemisphere_ids,
            )
            for eid in experiment_ids
        ]

        return pd.concat(unionizes, ignore_index=True)

    def get_projection_matrix(
        self,
        experiment_ids,
        projection_structure_ids,
        hemisphere_ids=None,
        parameter="projection_volume",
        dataframe=False,
    ):

        unionizes = self.get_structure_unionizes(
            experiment_ids,
            is_injection=False,
            structure_ids=projection_structure_ids,
            include_descendants=False,
            hemisphere_ids=hemisphere_ids,
        )

        hemisphere_ids = set(unionizes["hemisphere_id"].values.tolist())

        nrows = len(experiment_ids)
        ncolumns = len(projection_structure_ids) * len(hemisphere_ids)

        matrix = np.empty((nrows, ncolumns))
        matrix[:] = np.NAN

        row_lookup = {}
        for idx, e in enumerate(experiment_ids):
            row_lookup[e] = idx

        column_lookup = {}
        columns = []

        cidx = 0
        hlabel = {1: "-L", 2: "-R", 3: ""}

        o = self.get_ontology()

        for hid in hemisphere_ids:
            for sid in projection_structure_ids:
                column_lookup[(hid, sid)] = cidx
                label = o[sid].iloc[0]["acronym"] + hlabel[hid]
                columns.append({"hemisphere_id": hid, "structure_id": sid, "label": label})
                cidx += 1

        for _, row in unionizes.iterrows():
            ridx = row_lookup[row["experiment_id"]]
            k = (row["hemisphere_id"], row["structure_id"])
            cidx = column_lookup[k]
            matrix[ridx, cidx] = row[parameter]

        if dataframe:
            all_experiments = self.get_experiments(dataframe=True)

            rows_df = all_experiments.loc[experiment_ids]

            cols_df = pd.DataFrame(columns)

            return {"matrix": matrix, "rows": rows_df, "columns": cols_df}
        else:
            return {"matrix": matrix, "rows": experiment_ids, "columns": columns}

    def get_structure_mask(self, structure_id, file_name=None, annotation_file_name=None):
        """
        Read a 3D numpy array shaped like the annotation volume that has non-zero values where
        voxels belong to a particular structure.  This will take care of identifying substructures.

        Parameters
        ----------

        structure_id: int
            ID of a structure.

        file_name: string
            File name to store the structure mask.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        annotation_file_name: string
            File name to store the annotation volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.
        """

        file_name = self.get_cache_path(file_name, self.STRUCTURE_MASK_KEY, structure_id)

        if os.path.exists(file_name):
            return nrrd.read(file_name)
        else:
            ont = self.get_ontology()
            structure_ids = ont.get_descendant_ids([structure_id])
            annotation, _ = self.get_annotation_volume(annotation_file_name)
            mask = self.make_structure_mask(structure_ids, annotation)

            if self.cache:
                Manifest.safe_make_parent_dirs(file_name)
                nrrd.write(file_name, mask)

            return mask, None

    def make_structure_mask(self, structure_ids, annotation):
        """
        Look at an annotation volume and identify voxels that have values
        in a list of structure ids.

        Parameters
        ----------

        structure_ids: list
            List of IDs to look for in the annotation volume

        annotation: np.ndarray
            Numpy array filled with IDs.

        """

        m = np.zeros(annotation.shape, dtype=np.uint8)

        for _, sid in enumerate(structure_ids):
            m[annotation == sid] = 1

        return m

    def build_manifest(self, file_name):
        """
        Construct a manifest for this Cache class and save it in a file.

        Parameters
        ----------

        file_name: string
            File location to save the manifest.

        """

        manifest_builder = ManifestBuilder()
        manifest_builder.add_path("BASEDIR", ".")

        manifest_builder.add_path(self.EXPERIMENTS_KEY, "experiments.json", parent_key="BASEDIR", typename="file")

        manifest_builder.add_path(self.STRUCTURES_KEY, "structures.csv", parent_key="BASEDIR", typename="file")

        manifest_builder.add_path(
            self.STRUCTURE_UNIONIZES_KEY, "experiment_%d/structure_unionizes.csv", parent_key="BASEDIR", typename="file"
        )

        manifest_builder.add_path(self.CCF_VERSION_KEY, "%s", parent_key="BASEDIR", typename="dir")

        manifest_builder.add_path(
            self.ANNOTATION_KEY, "annotation_%d.nrrd", parent_key=self.CCF_VERSION_KEY, typename="file"
        )

        manifest_builder.add_path(self.TEMPLATE_KEY, "average_template_%d.nrrd", parent_key="BASEDIR", typename="file")

        manifest_builder.add_path(
            self.INJECTION_DENSITY_KEY, "experiment_%d/injection_density_%d.nrrd", parent_key="BASEDIR", typename="file"
        )

        manifest_builder.add_path(
            self.INJECTION_FRACTION_KEY,
            "experiment_%d/injection_fraction_%d.nrrd",
            parent_key="BASEDIR",
            typename="file",
        )

        manifest_builder.add_path(
            self.DATA_MASK_KEY, "experiment_%d/data_mask_%d.nrrd", parent_key="BASEDIR", typename="file"
        )

        manifest_builder.add_path(
            self.PROJECTION_DENSITY_KEY,
            "experiment_%d/projection_density_%d.nrrd",
            parent_key="BASEDIR",
            typename="file",
        )

        manifest_builder.add_path(
            self.STRUCTURE_MASK_KEY, "structure_masks/structure_%d.nrrd", parent_key="BASEDIR", typename="file"
        )

        manifest_builder.write_json_file(file_name)
class MouseConnectivityCache(ReferenceSpaceCache):
    """
    Cache class for storing and accessing data related to the adult mouse
    Connectivity Atlas.  By default, this class will cache any downloaded
    metadata or files in well known locations defined in a manifest file.
    This behavior can be disabled.

    Attributes
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    api: MouseConnectivityApi instance
        Used internally to make API queries.

    Parameters
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    ccf_version: string
        Desired version of the Common Coordinate Framework.  This affects the annotation
        volume (get_annotation_volume) and structure masks (get_structure_mask).
        Must be one of (MouseConnectivityApi.CCF_2015, MouseConnectivityApi.CCF_2016).
        Default: MouseConnectivityApi.CCF_2016

    cache: boolean
        Whether the class should save results of API queries to locations specified
        in the manifest file.  Queries for files (as opposed to metadata) must have a
        file location.  If caching is disabled, those locations must be specified
        in the function call (e.g. get_projection_density(file_name='file.nrrd')).

    manifest_file: string
        File name of the manifest to be read.  Default is "mouse_connectivity_manifest.json".

    """

    PROJECTION_DENSITY_KEY = 'PROJECTION_DENSITY'
    INJECTION_DENSITY_KEY = 'INJECTION_DENSITY'
    INJECTION_FRACTION_KEY = 'INJECTION_FRACTION'
    DATA_MASK_KEY = 'DATA_MASK'
    STRUCTURE_UNIONIZES_KEY = 'STRUCTURE_UNIONIZES'
    EXPERIMENTS_KEY = 'EXPERIMENTS'
    DEFORMATION_FIELD_HEADER_KEY = 'DEFORMATION_FIELD_HEADER'
    DEFORMATION_FIELD_VOXEL_KEY = 'DEFORMATION_FIELD_VOXELS'
    ALIGNMENT3D_KEY = 'ALIGNMENT3D'

    MANIFEST_VERSION = 1.3

    SUMMARY_STRUCTURE_SET_ID = 167587189
    DEFAULT_STRUCTURE_SET_IDS = tuple([SUMMARY_STRUCTURE_SET_ID])

    DFMFLD_RESOLUTIONS = (25,)

    @property
    def default_structure_ids(self):

        if not hasattr(self, '_default_structure_ids'):
            tree = self.get_structure_tree()
            default_structures = tree.get_structures_by_set_id(MouseConnectivityCache.DEFAULT_STRUCTURE_SET_IDS)
            self._default_structure_ids = [st['id'] for st in default_structures]

        return self._default_structure_ids

    def __init__(self,
                 resolution=None,
                 cache=True,
                 manifest_file=None,
                 ccf_version=None,
                 base_uri=None,
                 version=None):

        if manifest_file is None:
            manifest_file = get_default_manifest_file('mouse_connectivity')

        if version is None:
            version = self.MANIFEST_VERSION

        if resolution is None:
            resolution = MouseConnectivityApi.VOXEL_RESOLUTION_25_MICRONS

        if ccf_version is None:
            ccf_version = MouseConnectivityApi.CCF_VERSION_DEFAULT

        super(MouseConnectivityCache, self).__init__(
            resolution, reference_space_key=ccf_version, cache=cache,
            manifest=manifest_file, version=version)

        self.api = MouseConnectivityApi(base_uri=base_uri)


    def get_projection_density(self, experiment_id, file_name=None):
        """
        Read a projection density volume for a single experiment.  Download it
        first if it doesn't exist.  Projection density is the proportion of
        of projecting pixels in a grid voxel in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name,
                                        self.PROJECTION_DENSITY_KEY,
                                        experiment_id,
                                        self.resolution)

        self.api.download_projection_density(
            file_name, experiment_id, self.resolution, strategy='lazy')

        return nrrd.read(file_name)

    def get_injection_density(self, experiment_id, file_name=None):
        """
        Read an injection density volume for a single experiment. Download it
        first if it doesn't exist.  Injection density is the proportion of
        projecting pixels in a grid voxel only including pixels that are
        part of the injection site in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name,
                                        self.INJECTION_DENSITY_KEY,
                                        experiment_id,
                                        self.resolution)
        self.api.download_injection_density(
            file_name, experiment_id, self.resolution, strategy='lazy')

        return nrrd.read(file_name)

    def get_injection_fraction(self, experiment_id, file_name=None):
        """
        Read an injection fraction volume for a single experiment. Download it
        first if it doesn't exist.  Injection fraction is the proportion of
        pixels in the injection site in a grid voxel in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name,
                                        self.INJECTION_FRACTION_KEY,
                                        experiment_id,
                                        self.resolution)
        self.api.download_injection_fraction(
            file_name, experiment_id, self.resolution, strategy='lazy')

        return nrrd.read(file_name)

    def get_data_mask(self, experiment_id, file_name=None):
        """
        Read a data mask volume for a single experiment. Download it
        first if it doesn't exist.  Data mask is a binary mask of
        voxels that have valid data.  Only use valid data in analysis!

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name,
                                        self.DATA_MASK_KEY,
                                        experiment_id,
                                        self.resolution)
        self.api.download_data_mask(
            file_name, experiment_id, self.resolution, strategy='lazy')

        return nrrd.read(file_name)


    def get_experiments(self, dataframe=False, file_name=None, cre=None, injection_structure_ids=None):
        """
        Read a list of experiments that match certain criteria.  If caching is enabled,
        this will save the whole (unfiltered) list of experiments to a file.

        Parameters
        ----------

        dataframe: boolean
            Return the list of experiments as a Pandas DataFrame.  If False,
            return a list of dictionaries.  Default False.

        file_name: string
            File name to save/read the structures table.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.

        """

        file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY)

        experiments = self.api.get_experiments_api(path=file_name,
                                                   strategy='lazy',
                                                   **Cache.cache_json())

        for e in experiments:
            # renaming id
            e['id'] = e['data_set_id']
            del e['data_set_id']

            # simplify trangsenic line
            tl = e.get('transgenic_line', None)
            if tl:
                e['transgenic_line'] = tl['name']

            # parse the injection structures
            injs = [ int(i) for i in e['injection_structures'].split('/') ]
            e['injection_structures'] = injs
            e['primary_injection_structure'] = injs[0]

            # remove storage dir
            del e['storage_directory']


        # filter the read/downloaded list of experiments
        experiments = self.filter_experiments(
            experiments, cre, injection_structure_ids)

        if dataframe:
            experiments = pd.DataFrame(experiments)
            experiments.set_index(['id'], inplace=True, drop=False)

        return experiments

    def filter_experiments(self, experiments, cre=None, injection_structure_ids=None):
        """
        Take a list of experiments and filter them by cre status and injection structure.

        Parameters
        ----------

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.
        """

        if cre is True:
            experiments = [e for e in experiments if e['transgenic_line']]
        elif cre is False:
            experiments = [e for e in experiments if not e['transgenic_line']]
        elif cre is not None:
            cre = [ c.lower() for c in cre ]
            experiments = [e for e in experiments if e['transgenic_line'] is not None and e['transgenic_line'].lower() in cre]

        if injection_structure_ids is not None:
            structure_ids = MouseConnectivityCache.validate_structure_ids(injection_structure_ids)
            descendant_ids = set(reduce(op.add, self.get_structure_tree().descendant_ids(injection_structure_ids)))
            
            experiments = [e for e in experiments if e['structure_id'] in descendant_ids]

        return experiments

    def get_experiment_structure_unionizes(self, experiment_id,
                                           file_name=None,
                                           is_injection=None,
                                           structure_ids=None,
                                           include_descendants=False,
                                           hemisphere_ids=None):
        """
        Retrieve the structure unionize data for a specific experiment.  Filter by
        structure, injection status, and hemisphere.

        Parameters
        ----------

        experiment_id: int
            ID of the experiment of interest.  Corresponds to section_data_set_id in the API.

        file_name: string
            File name to save/read the experiments list.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a specific set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.

        """

        file_name = self.get_cache_path(file_name,
                                        self.STRUCTURE_UNIONIZES_KEY,
                                        experiment_id)

        filter_fn = functools.partial(self.filter_structure_unionizes,
                                      is_injection=is_injection,
                                      structure_ids=structure_ids,
                                      include_descendants=include_descendants,
                                      hemisphere_ids=hemisphere_ids)

        col_rn = lambda x: pd.DataFrame(x).rename(columns={
            'section_data_set_id': 'experiment_id'})

        return self.api.get_structure_unionizes([experiment_id],
                                                path=file_name,
                                                strategy='lazy',
                                                pre=col_rn,
                                                post=filter_fn,
                                                writer=lambda p, x : pd.DataFrame(x).to_csv(p),
                                                reader=lambda x: pd.read_csv(x, index_col=0, parse_dates=True))

    def rank_structures(self, experiment_ids, is_injection, structure_ids=None, hemisphere_ids=None,
                        rank_on='normalized_projection_volume', n=5, threshold=10**-2):
        '''Produces one or more (per experiment) ranked lists of brain structures, using a specified data field.

        Parameters
        ----------
        experiment_ids : list of int
            Obtain injection_structures for these experiments.
        is_injection : boolean
            Use data from only injection (or non-injection) unionizes.
        structure_ids : list of int, optional
            Consider only these structures. It is a good idea to make sure that these structures are not spatially
            overlapping; otherwise your results will contain redundant information. Defaults to the summary
            structures - a brain-wide list of nonoverlapping mid-level structures.
        hemisphere_ids : list of int, optional
            Consider only these hemispheres (1: left, 2: right, 3: both). Like with structures,
            you might get redundant results if you select overlapping options. Defaults to [1, 2].
        rank_on : str, optional
            Rank unionize data using this field (descending). Defaults to normalized_projection_volume.
        n : int, optional
            Return only the top n structures.
        threshold : float, optional
            Consider only records whose data value - specified by the rank_on parameter - exceeds this value.

        Returns
        -------
        list :
            Each element (1 for each input experiment) is a list of dictionaries. The dictionaries describe the top
            injection structures in descending order. They are specified by their structure and hemisphere id fields and
            additionally report the value specified by the rank_on parameter.

        '''

        output_keys = ['experiment_id', rank_on, 'hemisphere_id', 'structure_id']
        filter_fields = lambda fieldname: fieldname in output_keys

        if hemisphere_ids is None:
            hemisphere_ids = [1, 2]
        if structure_ids is None:
            structure_ids = self.default_structure_ids

        unionizes = self.get_structure_unionizes(experiment_ids,
                                                 is_injection=is_injection,
                                                 structure_ids=structure_ids,
                                                 hemisphere_ids=hemisphere_ids,
                                                 include_descendants=False)
        unionizes = unionizes[unionizes[rank_on] > threshold]

        results = []
        for eid in experiment_ids:

            this_experiment_unionizes = unionizes[unionizes['experiment_id'] == eid]
            this_experiment_unionizes = this_experiment_unionizes.sort_values(by=rank_on, ascending=False)
            this_experiment_unionizes = this_experiment_unionizes.select(filter_fields, axis=1)

            records = this_experiment_unionizes.to_dict('record')
            if len(records) > n:
                records = records[:n]
            results.append(records)

        return results

    def filter_structure_unionizes(self, unionizes,
                                   is_injection=None,
                                   structure_ids=None,
                                   include_descendants=False,
                                   hemisphere_ids=None):
        """
        Take a list of unionzes and return a subset of records filtered by injection status, structure, and
        hemisphere.

        Parameters
        ----------
        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.
        """
        if is_injection is not None:
            unionizes = unionizes[unionizes.is_injection == is_injection]

        if structure_ids is not None:
            structure_ids = MouseConnectivityCache.validate_structure_ids(structure_ids)

            if include_descendants:
                structure_ids = reduce(op.add, self.get_structure_tree().descendant_ids(structure_ids))
            else:
                structure_ids = set(structure_ids)


            unionizes = unionizes[
                unionizes['structure_id'].isin(structure_ids)]

        if hemisphere_ids is not None:
            unionizes = unionizes[
                unionizes['hemisphere_id'].isin(hemisphere_ids)]

        return unionizes

    def get_structure_unionizes(self, experiment_ids,
                                is_injection=None,
                                structure_ids=None,
                                include_descendants=False,
                                hemisphere_ids=None):
        """
        Get structure unionizes for a set of experiment IDs.  Filter the results by injection status,
        structure, and hemisphere.

        Parameters
        ----------
        experiment_ids: list
            List of experiment IDs.  Corresponds to section_data_set_id in the API.

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a specific set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.
        """

        unionizes = [self.get_experiment_structure_unionizes(eid,
                                                             is_injection=is_injection,
                                                             structure_ids=structure_ids,
                                                             include_descendants=include_descendants,
                                                             hemisphere_ids=hemisphere_ids)
                     for eid in experiment_ids]

        return pd.concat(unionizes, ignore_index=True, sort=True)

    def get_projection_matrix(self, experiment_ids,
                              projection_structure_ids=None,
                              hemisphere_ids=None,
                              parameter='projection_volume',
                              dataframe=False):

        if projection_structure_ids is None:
            projection_structure_ids = self.default_structure_ids

        unionizes = self.get_structure_unionizes(experiment_ids,
                                                 is_injection=False,
                                                 structure_ids=projection_structure_ids,
                                                 include_descendants=False,
                                                 hemisphere_ids=hemisphere_ids)

        hemisphere_ids = set(unionizes['hemisphere_id'].values.tolist())

        nrows = len(experiment_ids)
        ncolumns = len(projection_structure_ids) * len(hemisphere_ids)

        matrix = np.empty((nrows, ncolumns))
        matrix[:] = np.NAN

        row_lookup = {}
        for idx, e in enumerate(experiment_ids):
            row_lookup[e] = idx

        column_lookup = {}
        columns = []

        cidx = 0
        hlabel = {1: '-L', 2: '-R', 3: ''}

        acronym_map = self.get_structure_tree().value_map(lambda x: x['id'],
                                                          lambda x: x['acronym'])

        for hid in hemisphere_ids:
            for sid in projection_structure_ids:
                column_lookup[(hid, sid)] = cidx
                label = acronym_map[sid] + hlabel[hid]
                columns.append(
                    {'hemisphere_id': hid, 'structure_id': sid, 'label': label})
                cidx += 1

        for _, row in unionizes.iterrows():
            ridx = row_lookup[row['experiment_id']]
            k = (row['hemisphere_id'], row['structure_id'])
            cidx = column_lookup[k]
            matrix[ridx, cidx] = row[parameter]

        if dataframe:
            warnings.warn("dataframe argument is deprecated.")
            all_experiments = self.get_experiments(dataframe=True)

            rows_df = all_experiments.loc[experiment_ids]

            cols_df = pd.DataFrame(columns)

            return {'matrix': matrix, 'rows': rows_df, 'columns': cols_df}
        else:
            return {'matrix': matrix, 'rows': experiment_ids, 'columns': columns}


    def get_deformation_field(self, section_data_set_id, header_path=None, voxel_path=None):
        ''' Extract the local alignment parameters for this dataset. This a 3D vector image (3 components) describing 
        a deformable local mapping from CCF voxels to this section data set's affine-aligned image stack.

        Parameters
        ----------
            section_data_set_id : int
                Download the deformation field for this data set
            header_path : str, optional
                If supplied, the deformation field header will be downloaded to this path.
            voxel_path : str, optiona
                If supplied, the deformation field voxels will be downloaded to this path.

        Returns
        -------
            numpy.ndarray : 
                3D X 3 component vector array (origin 0, 0, 0; 25-micron isometric resolution) defining a 
                deformable transformation from CCF-space to affine-transformed image space.

        '''

        if self.resolution not in self.DFMFLD_RESOLUTIONS:
            warnings.warn(
                'deformation fields are only available at {} isometric resolutions, but this is a '\
                '{}-micron cache'.format(self.DFMFLD_RESOLUTIONS, self.resolution)
            )

        header_path = self.get_cache_path(header_path, self.DEFORMATION_FIELD_HEADER_KEY, section_data_set_id)
        voxel_path = self.get_cache_path(voxel_path, self.DEFORMATION_FIELD_VOXEL_KEY, section_data_set_id)

        if not (os.path.exists(header_path) and os.path.exists(voxel_path)):
            Manifest.safe_make_parent_dirs(header_path)
            Manifest.safe_make_parent_dirs(voxel_path)
            self.api.download_deformation_field(
                section_data_set_id,
                header_path=header_path,
                voxel_path=voxel_path
                )

        return sitk.GetArrayFromImage(sitk.ReadImage(str(header_path))) # TODO the str call here is only necessary in 2.7


    def get_affine_parameters(self, section_data_set_id, direction='trv', file_name=None):
        ''' Extract the parameters of the 3D affine tranformation mapping this section data set's image-space stack to 
        CCF-space (or vice-versa).

        Parameters
        ----------
        section_data_set_id : int
            download the parameters for this data set.
        direction : str, optional
            Valid options are:
                trv : "transform from reference to volume". Maps CCF points to image space points. If you are 
                    resampling data into CCF, this is the direction you want.
                tvr : "transform from volume to reference". Maps image space points to CCF points.
        file_name : str
            If provided, store the downloaded file here.
 
        Returns
        -------
        alignment : numpy.ndarray
            4 X 3 matrix. In order to transform a point [X_1, X_2, X_3] run 
                np.dot([X_1, X_2, X_3, 1], alignment). In 
            to build a SimpleITK affine transform run:
                transform = sitk.AffineTransform(3)
                transform.SetParameters(alignment.flatten())

        '''

        if not direction in ('trv', 'tvr'):
            raise ArgumentError('invalid direction: {}. direction must be one of tvr, trv'.format(direction))

        file_name = self.get_cache_path(file_name, self.ALIGNMENT3D_KEY)

        raw_alignment = self.api.download_alignment3d(
            strategy='lazy',
            path=file_name,
            section_data_set_id=section_data_set_id,
            **Cache.cache_json())
    
        alignment_re = re.compile('{}_(?P<index>\d+)'.format(direction))
        alignment = np.zeros((4, 3), dtype=float)

        for entry, value in raw_alignment.items():
            match = alignment_re.match(entry)
            if match is not None:
                alignment.flat[int(match.group('index'))] = value
        
        return alignment


    def add_manifest_paths(self, manifest_builder):
        """
        Construct a manifest for this Cache class and save it in a file.

        Parameters
        ----------

        file_name: string
            File location to save the manifest.

        """

        manifest_builder = super(MouseConnectivityCache, self).add_manifest_paths(manifest_builder)

        manifest_builder.add_path(self.EXPERIMENTS_KEY,
                                  'experiments.json',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.STRUCTURE_UNIONIZES_KEY,
                                  'experiment_%d/structure_unionizes.csv',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.INJECTION_DENSITY_KEY,
                                  'experiment_%d/injection_density_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.INJECTION_FRACTION_KEY,
                                  'experiment_%d/injection_fraction_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.DATA_MASK_KEY,
                                  'experiment_%d/data_mask_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.PROJECTION_DENSITY_KEY,
                                  'experiment_%d/projection_density_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.DEFORMATION_FIELD_HEADER_KEY,
                                 'experiment_%d/dfmfld.mhd',
                                 parent_key='BASEDIR',
                                 typename='file')

        manifest_builder.add_path(self.DEFORMATION_FIELD_VOXEL_KEY,
                                 'experiment_%d/dfmfld.raw',
                                 parent_key='BASEDIR',
                                 typename='file')

        manifest_builder.add_path(self.ALIGNMENT3D_KEY,
                                  'experiment_%d/alignment3d.json',
                                  parent_key='BASEDIR',
                                  typename='file')

        return manifest_builder
Esempio n. 4
0
class MouseConnectivityCache(ReferenceSpaceCache):
    """
    Cache class for storing and accessing data related to the adult mouse
    Connectivity Atlas.  By default, this class will cache any downloaded
    metadata or files in well known locations defined in a manifest file.
    This behavior can be disabled.

    Attributes
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    api: MouseConnectivityApi instance
        Used internally to make API queries.

    Parameters
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    ccf_version: string
        Desired version of the Common Coordinate Framework.  This affects the annotation
        volume (get_annotation_volume) and structure masks (get_structure_mask).
        Must be one of (MouseConnectivityApi.CCF_2015, MouseConnectivityApi.CCF_2016).
        Default: MouseConnectivityApi.CCF_2016

    cache: boolean
        Whether the class should save results of API queries to locations specified
        in the manifest file.  Queries for files (as opposed to metadata) must have a
        file location.  If caching is disabled, those locations must be specified
        in the function call (e.g. get_projection_density(file_name='file.nrrd')).

    manifest_file: string
        File name of the manifest to be read.  Default is "mouse_connectivity_manifest.json".

    """

    PROJECTION_DENSITY_KEY = 'PROJECTION_DENSITY'
    INJECTION_DENSITY_KEY = 'INJECTION_DENSITY'
    INJECTION_FRACTION_KEY = 'INJECTION_FRACTION'
    DATA_MASK_KEY = 'DATA_MASK'
    STRUCTURE_UNIONIZES_KEY = 'STRUCTURE_UNIONIZES'
    EXPERIMENTS_KEY = 'EXPERIMENTS'
    DEFORMATION_FIELD_HEADER_KEY = 'DEFORMATION_FIELD_HEADER'
    DEFORMATION_FIELD_VOXEL_KEY = 'DEFORMATION_FIELD_VOXELS'
    ALIGNMENT3D_KEY = 'ALIGNMENT3D'

    MANIFEST_VERSION = 1.3

    SUMMARY_STRUCTURE_SET_ID = 167587189
    DEFAULT_STRUCTURE_SET_IDS = tuple([SUMMARY_STRUCTURE_SET_ID])

    DFMFLD_RESOLUTIONS = (25, )

    @property
    def default_structure_ids(self):

        if not hasattr(self, '_default_structure_ids'):
            tree = self.get_structure_tree()
            default_structures = tree.get_structures_by_set_id(
                MouseConnectivityCache.DEFAULT_STRUCTURE_SET_IDS)
            self._default_structure_ids = [
                st['id'] for st in default_structures
            ]

        return self._default_structure_ids

    def __init__(self,
                 resolution=None,
                 cache=True,
                 manifest_file=None,
                 ccf_version=None,
                 base_uri=None,
                 version=None):

        if manifest_file is None:
            manifest_file = get_default_manifest_file('mouse_connectivity')

        if version is None:
            version = self.MANIFEST_VERSION

        if resolution is None:
            resolution = MouseConnectivityApi.VOXEL_RESOLUTION_25_MICRONS

        if ccf_version is None:
            ccf_version = MouseConnectivityApi.CCF_VERSION_DEFAULT

        super(MouseConnectivityCache,
              self).__init__(resolution,
                             reference_space_key=ccf_version,
                             cache=cache,
                             manifest=manifest_file,
                             version=version)

        self.api = MouseConnectivityApi(base_uri=base_uri)

    def get_projection_density(self, experiment_id, file_name=None):
        """
        Read a projection density volume for a single experiment.  Download it
        first if it doesn't exist.  Projection density is the proportion of
        of projecting pixels in a grid voxel in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.PROJECTION_DENSITY_KEY,
                                        experiment_id, self.resolution)

        self.api.download_projection_density(file_name,
                                             experiment_id,
                                             self.resolution,
                                             strategy='lazy')

        return nrrd.read(file_name)

    def get_injection_density(self, experiment_id, file_name=None):
        """
        Read an injection density volume for a single experiment. Download it
        first if it doesn't exist.  Injection density is the proportion of
        projecting pixels in a grid voxel only including pixels that are
        part of the injection site in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.INJECTION_DENSITY_KEY,
                                        experiment_id, self.resolution)
        self.api.download_injection_density(file_name,
                                            experiment_id,
                                            self.resolution,
                                            strategy='lazy')

        return nrrd.read(file_name)

    def get_injection_fraction(self, experiment_id, file_name=None):
        """
        Read an injection fraction volume for a single experiment. Download it
        first if it doesn't exist.  Injection fraction is the proportion of
        pixels in the injection site in a grid voxel in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY,
                                        experiment_id, self.resolution)
        self.api.download_injection_fraction(file_name,
                                             experiment_id,
                                             self.resolution,
                                             strategy='lazy')

        return nrrd.read(file_name)

    def get_data_mask(self, experiment_id, file_name=None):
        """
        Read a data mask volume for a single experiment. Download it
        first if it doesn't exist.  Data mask is a binary mask of
        voxels that have valid data.  Only use valid data in analysis!

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY,
                                        experiment_id, self.resolution)
        self.api.download_data_mask(file_name,
                                    experiment_id,
                                    self.resolution,
                                    strategy='lazy')

        return nrrd.read(file_name)

    def get_experiments(self,
                        dataframe=False,
                        file_name=None,
                        cre=None,
                        injection_structure_ids=None):
        """
        Read a list of experiments that match certain criteria.  If caching is enabled,
        this will save the whole (unfiltered) list of experiments to a file.

        Parameters
        ----------

        dataframe: boolean
            Return the list of experiments as a Pandas DataFrame.  If False,
            return a list of dictionaries.  Default False.

        file_name: string
            File name to save/read the structures table.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.

        """

        file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY)

        experiments = self.api.get_experiments_api(path=file_name,
                                                   strategy='lazy',
                                                   **Cache.cache_json())

        for e in experiments:
            # renaming id
            e['id'] = e['data_set_id']
            del e['data_set_id']

            # simplify trangsenic line
            tl = e.get('transgenic_line', None)
            if tl:
                e['transgenic_line'] = tl['name']

            # parse the injection structures
            injs = [int(i) for i in e['injection_structures'].split('/')]
            e['injection_structures'] = injs
            e['primary_injection_structure'] = injs[0]

            # remove storage dir
            del e['storage_directory']

        # filter the read/downloaded list of experiments
        experiments = self.filter_experiments(experiments, cre,
                                              injection_structure_ids)

        if dataframe:
            experiments = pd.DataFrame(experiments)
            experiments.set_index(['id'], inplace=True, drop=False)

        return experiments

    def filter_experiments(self,
                           experiments,
                           cre=None,
                           injection_structure_ids=None):
        """
        Take a list of experiments and filter them by cre status and injection structure.

        Parameters
        ----------

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.
        """

        if cre is True:
            experiments = [e for e in experiments if e['transgenic_line']]
        elif cre is False:
            experiments = [e for e in experiments if not e['transgenic_line']]
        elif cre is not None:
            cre = [c.lower() for c in cre]
            experiments = [
                e for e in experiments if e['transgenic_line'] is not None
                and e['transgenic_line'].lower() in cre
            ]

        if injection_structure_ids is not None:
            structure_ids = MouseConnectivityCache.validate_structure_ids(
                injection_structure_ids)
            descendant_ids = set(
                reduce(
                    op.add,
                    self.get_structure_tree().descendant_ids(
                        injection_structure_ids)))

            experiments = [
                e for e in experiments if e['structure_id'] in descendant_ids
            ]

        return experiments

    def get_experiment_structure_unionizes(self,
                                           experiment_id,
                                           file_name=None,
                                           is_injection=None,
                                           structure_ids=None,
                                           include_descendants=False,
                                           hemisphere_ids=None):
        """
        Retrieve the structure unionize data for a specific experiment.  Filter by
        structure, injection status, and hemisphere.

        Parameters
        ----------

        experiment_id: int
            ID of the experiment of interest.  Corresponds to section_data_set_id in the API.

        file_name: string
            File name to save/read the experiments list.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a specific set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.

        """

        file_name = self.get_cache_path(file_name,
                                        self.STRUCTURE_UNIONIZES_KEY,
                                        experiment_id)

        filter_fn = functools.partial(self.filter_structure_unionizes,
                                      is_injection=is_injection,
                                      structure_ids=structure_ids,
                                      include_descendants=include_descendants,
                                      hemisphere_ids=hemisphere_ids)

        col_rn = lambda x: pd.DataFrame(x).rename(
            columns={'section_data_set_id': 'experiment_id'})

        return self.api.get_structure_unionizes(
            [experiment_id],
            path=file_name,
            strategy='lazy',
            pre=col_rn,
            post=filter_fn,
            writer=lambda p, x: pd.DataFrame(x).to_csv(p),
            reader=lambda x: pd.read_csv(x, index_col=0, parse_dates=True))

    def rank_structures(self,
                        experiment_ids,
                        is_injection,
                        structure_ids=None,
                        hemisphere_ids=None,
                        rank_on='normalized_projection_volume',
                        n=5,
                        threshold=10**-2):
        '''Produces one or more (per experiment) ranked lists of brain structures, using a specified data field.

        Parameters
        ----------
        experiment_ids : list of int
            Obtain injection_structures for these experiments.
        is_injection : boolean
            Use data from only injection (or non-injection) unionizes.
        structure_ids : list of int, optional
            Consider only these structures. It is a good idea to make sure that these structures are not spatially
            overlapping; otherwise your results will contain redundant information. Defaults to the summary
            structures - a brain-wide list of nonoverlapping mid-level structures.
        hemisphere_ids : list of int, optional
            Consider only these hemispheres (1: left, 2: right, 3: both). Like with structures,
            you might get redundant results if you select overlapping options. Defaults to [1, 2].
        rank_on : str, optional
            Rank unionize data using this field (descending). Defaults to normalized_projection_volume.
        n : int, optional
            Return only the top n structures.
        threshold : float, optional
            Consider only records whose data value - specified by the rank_on parameter - exceeds this value.

        Returns
        -------
        list :
            Each element (1 for each input experiment) is a list of dictionaries. The dictionaries describe the top
            injection structures in descending order. They are specified by their structure and hemisphere id fields and
            additionally report the value specified by the rank_on parameter.

        '''

        output_keys = [
            'experiment_id', rank_on, 'hemisphere_id', 'structure_id'
        ]

        if hemisphere_ids is None:
            hemisphere_ids = [1, 2]
        if structure_ids is None:
            structure_ids = self.default_structure_ids

        unionizes = self.get_structure_unionizes(experiment_ids,
                                                 is_injection=is_injection,
                                                 structure_ids=structure_ids,
                                                 hemisphere_ids=hemisphere_ids,
                                                 include_descendants=False)
        unionizes = unionizes[unionizes[rank_on] > threshold]

        results = []
        for eid in experiment_ids:

            this_experiment_unionizes = unionizes[unionizes['experiment_id'] ==
                                                  eid]
            this_experiment_unionizes = this_experiment_unionizes.sort_values(
                by=rank_on, ascending=False)
            this_experiment_unionizes = this_experiment_unionizes.loc[:,
                                                                      output_keys]

            records = this_experiment_unionizes.to_dict('record')
            if len(records) > n:
                records = records[:n]
            results.append(records)

        return results

    def filter_structure_unionizes(self,
                                   unionizes,
                                   is_injection=None,
                                   structure_ids=None,
                                   include_descendants=False,
                                   hemisphere_ids=None):
        """
        Take a list of unionzes and return a subset of records filtered by injection status, structure, and
        hemisphere.

        Parameters
        ----------
        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.
        """
        if is_injection is not None:
            unionizes = unionizes[unionizes.is_injection == is_injection]

        if structure_ids is not None:
            structure_ids = MouseConnectivityCache.validate_structure_ids(
                structure_ids)

            if include_descendants:
                structure_ids = reduce(
                    op.add,
                    self.get_structure_tree().descendant_ids(structure_ids))
            else:
                structure_ids = set(structure_ids)

            unionizes = unionizes[unionizes['structure_id'].isin(
                structure_ids)]

        if hemisphere_ids is not None:
            unionizes = unionizes[unionizes['hemisphere_id'].isin(
                hemisphere_ids)]

        return unionizes

    def get_structure_unionizes(self,
                                experiment_ids,
                                is_injection=None,
                                structure_ids=None,
                                include_descendants=False,
                                hemisphere_ids=None):
        """
        Get structure unionizes for a set of experiment IDs.  Filter the results by injection status,
        structure, and hemisphere.

        Parameters
        ----------
        experiment_ids: list
            List of experiment IDs.  Corresponds to section_data_set_id in the API.

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a specific set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.
        """

        unionizes = [
            self.get_experiment_structure_unionizes(
                eid,
                is_injection=is_injection,
                structure_ids=structure_ids,
                include_descendants=include_descendants,
                hemisphere_ids=hemisphere_ids) for eid in experiment_ids
        ]

        return pd.concat(unionizes, ignore_index=True, sort=True)

    def get_projection_matrix(self,
                              experiment_ids,
                              projection_structure_ids=None,
                              hemisphere_ids=None,
                              parameter='projection_volume',
                              dataframe=False):

        if projection_structure_ids is None:
            projection_structure_ids = self.default_structure_ids

        unionizes = self.get_structure_unionizes(
            experiment_ids,
            is_injection=False,
            structure_ids=projection_structure_ids,
            include_descendants=False,
            hemisphere_ids=hemisphere_ids)

        hemisphere_ids = set(unionizes['hemisphere_id'].values.tolist())

        nrows = len(experiment_ids)
        ncolumns = len(projection_structure_ids) * len(hemisphere_ids)

        matrix = np.empty((nrows, ncolumns))
        matrix[:] = np.NAN

        row_lookup = {}
        for idx, e in enumerate(experiment_ids):
            row_lookup[e] = idx

        column_lookup = {}
        columns = []

        cidx = 0
        hlabel = {1: '-L', 2: '-R', 3: ''}

        acronym_map = self.get_structure_tree().value_map(
            lambda x: x['id'], lambda x: x['acronym'])

        for hid in hemisphere_ids:
            for sid in projection_structure_ids:
                column_lookup[(hid, sid)] = cidx
                label = acronym_map[sid] + hlabel[hid]
                columns.append({
                    'hemisphere_id': hid,
                    'structure_id': sid,
                    'label': label
                })
                cidx += 1

        for _, row in unionizes.iterrows():
            ridx = row_lookup[row['experiment_id']]
            k = (row['hemisphere_id'], row['structure_id'])
            cidx = column_lookup[k]
            matrix[ridx, cidx] = row[parameter]

        if dataframe:
            warnings.warn("dataframe argument is deprecated.")
            all_experiments = self.get_experiments(dataframe=True)

            rows_df = all_experiments.loc[experiment_ids]

            cols_df = pd.DataFrame(columns)

            return {'matrix': matrix, 'rows': rows_df, 'columns': cols_df}
        else:
            return {
                'matrix': matrix,
                'rows': experiment_ids,
                'columns': columns
            }

    def get_deformation_field(self,
                              section_data_set_id,
                              header_path=None,
                              voxel_path=None):
        ''' Extract the local alignment parameters for this dataset. This a 3D vector image (3 components) describing 
        a deformable local mapping from CCF voxels to this section data set's affine-aligned image stack.

        Parameters
        ----------
            section_data_set_id : int
                Download the deformation field for this data set
            header_path : str, optional
                If supplied, the deformation field header will be downloaded to this path.
            voxel_path : str, optiona
                If supplied, the deformation field voxels will be downloaded to this path.

        Returns
        -------
            numpy.ndarray : 
                3D X 3 component vector array (origin 0, 0, 0; 25-micron isometric resolution) defining a 
                deformable transformation from CCF-space to affine-transformed image space.

        '''

        if self.resolution not in self.DFMFLD_RESOLUTIONS:
            warnings.warn(
                'deformation fields are only available at {} isometric resolutions, but this is a '\
                '{}-micron cache'.format(self.DFMFLD_RESOLUTIONS, self.resolution)
            )

        header_path = self.get_cache_path(header_path,
                                          self.DEFORMATION_FIELD_HEADER_KEY,
                                          section_data_set_id)
        voxel_path = self.get_cache_path(voxel_path,
                                         self.DEFORMATION_FIELD_VOXEL_KEY,
                                         section_data_set_id)

        if not (os.path.exists(header_path) and os.path.exists(voxel_path)):
            Manifest.safe_make_parent_dirs(header_path)
            Manifest.safe_make_parent_dirs(voxel_path)
            self.api.download_deformation_field(section_data_set_id,
                                                header_path=header_path,
                                                voxel_path=voxel_path)

        return sitk.GetArrayFromImage(sitk.ReadImage(str(
            header_path)))  # TODO the str call here is only necessary in 2.7

    def get_affine_parameters(self,
                              section_data_set_id,
                              direction='trv',
                              file_name=None):
        ''' Extract the parameters of the 3D affine tranformation mapping this section data set's image-space stack to 
        CCF-space (or vice-versa).

        Parameters
        ----------
        section_data_set_id : int
            download the parameters for this data set.
        direction : str, optional
            Valid options are:
                trv : "transform from reference to volume". Maps CCF points to image space points. If you are 
                    resampling data into CCF, this is the direction you want.
                tvr : "transform from volume to reference". Maps image space points to CCF points.
        file_name : str
            If provided, store the downloaded file here.
 
        Returns
        -------
        alignment : numpy.ndarray
            4 X 3 matrix. In order to transform a point [X_1, X_2, X_3] run 
                np.dot([X_1, X_2, X_3, 1], alignment). In 
            to build a SimpleITK affine transform run:
                transform = sitk.AffineTransform(3)
                transform.SetParameters(alignment.flatten())

        '''

        if not direction in ('trv', 'tvr'):
            raise ArgumentError(
                'invalid direction: {}. direction must be one of tvr, trv'.
                format(direction))

        file_name = self.get_cache_path(file_name, self.ALIGNMENT3D_KEY)

        raw_alignment = self.api.download_alignment3d(
            strategy='lazy',
            path=file_name,
            section_data_set_id=section_data_set_id,
            **Cache.cache_json())

        alignment_re = re.compile('{}_(?P<index>\d+)'.format(direction))
        alignment = np.zeros((4, 3), dtype=float)

        for entry, value in raw_alignment.items():
            match = alignment_re.match(entry)
            if match is not None:
                alignment.flat[int(match.group('index'))] = value

        return alignment

    def add_manifest_paths(self, manifest_builder):
        """
        Construct a manifest for this Cache class and save it in a file.

        Parameters
        ----------

        file_name: string
            File location to save the manifest.

        """

        manifest_builder = super(MouseConnectivityCache,
                                 self).add_manifest_paths(manifest_builder)

        manifest_builder.add_path(self.EXPERIMENTS_KEY,
                                  'experiments.json',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.STRUCTURE_UNIONIZES_KEY,
                                  'experiment_%d/structure_unionizes.csv',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.INJECTION_DENSITY_KEY,
                                  'experiment_%d/injection_density_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.INJECTION_FRACTION_KEY,
                                  'experiment_%d/injection_fraction_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.DATA_MASK_KEY,
                                  'experiment_%d/data_mask_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.PROJECTION_DENSITY_KEY,
                                  'experiment_%d/projection_density_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.DEFORMATION_FIELD_HEADER_KEY,
                                  'experiment_%d/dfmfld.mhd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.DEFORMATION_FIELD_VOXEL_KEY,
                                  'experiment_%d/dfmfld.raw',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.ALIGNMENT3D_KEY,
                                  'experiment_%d/alignment3d.json',
                                  parent_key='BASEDIR',
                                  typename='file')

        return manifest_builder
Esempio n. 5
0
class MouseConnectivityCache(ReferenceSpaceCache):
    """
    Cache class for storing and accessing data related to the adult mouse
    Connectivity Atlas.  By default, this class will cache any downloaded
    metadata or files in well known locations defined in a manifest file.
    This behavior can be disabled.

    Attributes
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    api: MouseConnectivityApi instance
        Used internally to make API queries.

    Parameters
    ----------

    resolution: int
        Resolution of grid data to be downloaded when accessing projection volume,
        the annotation volume, and the annotation volume.  Must be one of (10, 25,
        50, 100).  Default is 25.

    ccf_version: string
        Desired version of the Common Coordinate Framework.  This affects the annotation 
        volume (get_annotation_volume) and structure masks (get_structure_mask). 
        Must be one of (MouseConnectivityApi.CCF_2015, MouseConnectivityApi.CCF_2016). 
        Default: MouseConnectivityApi.CCF_2016

    cache: boolean
        Whether the class should save results of API queries to locations specified
        in the manifest file.  Queries for files (as opposed to metadata) must have a
        file location.  If caching is disabled, those locations must be specified
        in the function call (e.g. get_projection_density(file_name='file.nrrd')).

    manifest_file: string
        File name of the manifest to be read.  Default is "mouse_connectivity_manifest.json".

    """

    PROJECTION_DENSITY_KEY = 'PROJECTION_DENSITY'
    INJECTION_DENSITY_KEY = 'INJECTION_DENSITY'
    INJECTION_FRACTION_KEY = 'INJECTION_FRACTION'
    DATA_MASK_KEY = 'DATA_MASK'
    STRUCTURE_UNIONIZES_KEY = 'STRUCTURE_UNIONIZES'
    EXPERIMENTS_KEY = 'EXPERIMENTS'

    MANIFEST_VERSION = 1.2

    SUMMARY_STRUCTURE_SET_ID = 167587189
    DEFAULT_STRUCTURE_SET_IDS = tuple([SUMMARY_STRUCTURE_SET_ID])

    @property
    def default_structure_ids(self):

        if not hasattr(self, '_default_structure_ids'):
            tree = self.get_structure_tree()
            default_structures = tree.get_structures_by_set_id(
                MouseConnectivityCache.DEFAULT_STRUCTURE_SET_IDS)
            self._default_structure_ids = [
                st['id'] for st in default_structures
            ]

        return self._default_structure_ids

    def __init__(self,
                 resolution=None,
                 cache=True,
                 manifest_file='mouse_connectivity_manifest.json',
                 ccf_version=None,
                 base_uri=None,
                 version=None):

        if version is None:
            version = self.MANIFEST_VERSION

        if resolution is None:
            resolution = MouseConnectivityApi.VOXEL_RESOLUTION_25_MICRONS

        if ccf_version is None:
            ccf_version = MouseConnectivityApi.CCF_VERSION_DEFAULT

        super(MouseConnectivityCache,
              self).__init__(resolution,
                             reference_space_key=ccf_version,
                             cache=cache,
                             manifest=manifest_file,
                             version=version)

        self.api = MouseConnectivityApi(base_uri=base_uri)

    def get_projection_density(self, experiment_id, file_name=None):
        """
        Read a projection density volume for a single experiment.  Download it
        first if it doesn't exist.  Projection density is the proportion of
        of projecting pixels in a grid voxel in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.PROJECTION_DENSITY_KEY,
                                        experiment_id, self.resolution)

        self.api.download_projection_density(file_name,
                                             experiment_id,
                                             self.resolution,
                                             strategy='lazy')

        return nrrd.read(file_name)

    def get_injection_density(self, experiment_id, file_name=None):
        """
        Read an injection density volume for a single experiment. Download it
        first if it doesn't exist.  Injection density is the proportion of
        projecting pixels in a grid voxel only including pixels that are
        part of the injection site in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.INJECTION_DENSITY_KEY,
                                        experiment_id, self.resolution)
        self.api.download_injection_density(file_name,
                                            experiment_id,
                                            self.resolution,
                                            strategy='lazy')

        return nrrd.read(file_name)

    def get_injection_fraction(self, experiment_id, file_name=None):
        """
        Read an injection fraction volume for a single experiment. Download it
        first if it doesn't exist.  Injection fraction is the proportion of
        pixels in the injection site in a grid voxel in [0,1].

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.INJECTION_FRACTION_KEY,
                                        experiment_id, self.resolution)
        self.api.download_injection_fraction(file_name,
                                             experiment_id,
                                             self.resolution,
                                             strategy='lazy')

        return nrrd.read(file_name)

    def get_data_mask(self, experiment_id, file_name=None):
        """
        Read a data mask volume for a single experiment. Download it
        first if it doesn't exist.  Data mask is a binary mask of
        voxels that have valid data.  Only use valid data in analysis!

        Parameters
        ----------

        experiment_id: int
            ID of the experiment to download/read.  This corresponds to
            section_data_set_id in the API.

        file_name: string
            File name to store the template volume.  If it already exists,
            it will be read from this file.  If file_name is None, the
            file_name will be pulled out of the manifest.  Default is None.

        """

        file_name = self.get_cache_path(file_name, self.DATA_MASK_KEY,
                                        experiment_id, self.resolution)
        self.api.download_data_mask(file_name,
                                    experiment_id,
                                    self.resolution,
                                    strategy='lazy')

        return nrrd.read(file_name)

    def get_experiments(self,
                        dataframe=False,
                        file_name=None,
                        cre=None,
                        injection_structure_ids=None):
        """
        Read a list of experiments that match certain criteria.  If caching is enabled,
        this will save the whole (unfiltered) list of experiments to a file.

        Parameters
        ----------

        dataframe: boolean
            Return the list of experiments as a Pandas DataFrame.  If False,
            return a list of dictionaries.  Default False.

        file_name: string
            File name to save/read the structures table.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.

        """

        file_name = self.get_cache_path(file_name, self.EXPERIMENTS_KEY)

        if os.path.exists(file_name):
            experiments = json_utilities.read(file_name)
        else:
            experiments = self.api.experiment_source_search(
                injection_structures='root')

            # removing these elements because they are specific to a particular
            # resolution
            for e in experiments:
                del e['num-voxels']
                del e['injection-volume']
                del e['sum']
                del e['name']

            if self.cache:
                Manifest.safe_make_parent_dirs(file_name)

                json_utilities.write(file_name, experiments)

        # filter the read/downloaded list of experiments
        experiments = self.filter_experiments(experiments, cre,
                                              injection_structure_ids)

        if dataframe:
            experiments = pd.DataFrame(experiments)
            experiments.set_index(['id'], inplace=True, drop=False)

        return experiments

    def filter_experiments(self,
                           experiments,
                           cre=None,
                           injection_structure_ids=None):
        """
        Take a list of experiments and filter them by cre status and injection structure.

        Parameters
        ----------

        cre: boolean or list
            If True, return only cre-positive experiments.  If False, return only
            cre-negative experiments.  If None, return all experients. If list, return
            all experiments with cre line names in the supplied list. Default None.

        injection_structure_ids: list
            Only return experiments that were injected in the structures provided here.
            If None, return all experiments.  Default None.
        """

        if cre is True:
            experiments = [e for e in experiments if e['transgenic-line']]
        elif cre is False:
            experiments = [e for e in experiments if not e['transgenic-line']]
        elif cre is not None:
            cre = [c.lower() for c in cre]
            experiments = [
                e for e in experiments if e['transgenic-line'].lower() in cre
            ]

        if injection_structure_ids is not None:
            structure_ids = MouseConnectivityCache.validate_structure_ids(
                injection_structure_ids)

            descendant_ids = reduce(op.add, self.get_structure_tree()\
                                    .descendant_ids(injection_structure_ids))
            experiments = [
                e for e in experiments if e['structure-id'] in descendant_ids
            ]

        return experiments

    def get_experiment_structure_unionizes(self,
                                           experiment_id,
                                           file_name=None,
                                           is_injection=None,
                                           structure_ids=None,
                                           include_descendants=False,
                                           hemisphere_ids=None):
        """
        Retrieve the structure unionize data for a specific experiment.  Filter by
        structure, injection status, and hemisphere.

        Parameters
        ----------

        experiment_id: int
            ID of the experiment of interest.  Corresponds to section_data_set_id in the API.

        file_name: string
            File name to save/read the experiments list.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a specific set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.

        """

        file_name = self.get_cache_path(file_name,
                                        self.STRUCTURE_UNIONIZES_KEY,
                                        experiment_id)

        filter_fn = functools.partial(self.filter_structure_unionizes,
                                      is_injection=is_injection,
                                      structure_ids=structure_ids,
                                      include_descendants=include_descendants,
                                      hemisphere_ids=hemisphere_ids)

        col_rn = lambda x: pd.DataFrame(x).rename(
            columns={'section_data_set_id': 'experiment_id'})

        return self.api.get_structure_unionizes(
            [experiment_id],
            path=file_name,
            strategy='lazy',
            pre=col_rn,
            post=filter_fn,
            writer=lambda p, x: pd.DataFrame(x).to_csv(p),
            reader=pd.DataFrame.from_csv)

    def rank_structures(self,
                        experiment_ids,
                        is_injection,
                        structure_ids=None,
                        hemisphere_ids=None,
                        rank_on='normalized_projection_volume',
                        n=5,
                        threshold=10**-2):
        '''Produces one or more (per experiment) ranked lists of brain structures, using a specified data field.

        Parameters
        ----------
        experiment_ids : list of int
            Obtain injection_structures for these experiments.
        is_injection : boolean
            Use data from only injection (or non-injection) unionizes.
        structure_ids : list of int, optional
            Consider only these structures. It is a good idea to make sure that these structures are not spatially 
            overlapping; otherwise your results will contain redundant information. Defaults to the summary 
            structures - a brain-wide list of nonoverlapping mid-level structures.
        hemisphere_ids : list of int, optional
            Consider only these hemispheres (1: left, 2: right, 3: both). Like with structures, 
            you might get redundant results if you select overlapping options. Defaults to [1, 2].
        rank_on : str, optional
            Rank unionize data using this field (descending). Defaults to normalized_projection_volume.
        n : int, optional
            Return only the top n structures.
        threshold : float, optional
            Consider only records whose data value - specified by the rank_on parameter - exceeds this value.

        Returns
        -------
        list : 
            Each element (1 for each input experiment) is a list of dictionaries. The dictionaries describe the top
            injection structures in descending order. They are specified by their structure and hemisphere id fields and 
            additionally report the value specified by the rank_on parameter.

        '''

        output_keys = [
            'experiment_id', rank_on, 'hemisphere_id', 'structure_id'
        ]
        filter_fields = lambda fieldname: fieldname in output_keys

        if hemisphere_ids is None:
            hemisphere_ids = [1, 2]
        if structure_ids is None:
            structure_ids = self.default_structure_ids

        unionizes = self.get_structure_unionizes(experiment_ids,
                                                 is_injection=is_injection,
                                                 structure_ids=structure_ids,
                                                 hemisphere_ids=hemisphere_ids,
                                                 include_descendants=False)
        unionizes = unionizes[unionizes[rank_on] > threshold]

        results = []
        for eid in experiment_ids:

            this_experiment_unionizes = unionizes[unionizes['experiment_id'] ==
                                                  eid]
            this_experiment_unionizes = this_experiment_unionizes.sort_values(
                by=rank_on, ascending=False)
            this_experiment_unionizes = this_experiment_unionizes.select(
                filter_fields, axis=1)

            records = this_experiment_unionizes.to_dict('record')
            if len(records) > n:
                records = records[:n]
            results.append(records)

        return results

    def filter_structure_unionizes(self,
                                   unionizes,
                                   is_injection=None,
                                   structure_ids=None,
                                   include_descendants=False,
                                   hemisphere_ids=None):
        """
        Take a list of unionzes and return a subset of records filtered by injection status, structure, and
        hemisphere.

        Parameters
        ----------
        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.
        """
        if is_injection is not None:
            unionizes = unionizes[unionizes.is_injection == is_injection]

        if structure_ids is not None:
            structure_ids = MouseConnectivityCache.validate_structure_ids(
                structure_ids)

            if include_descendants:
                structure_ids = reduce(
                    op.add,
                    self.get_structure_tree().descendant_ids(structure_ids))
            else:
                structure_ids = set(structure_ids)

            unionizes = unionizes[unionizes['structure_id'].isin(
                structure_ids)]

        if hemisphere_ids is not None:
            unionizes = unionizes[unionizes['hemisphere_id'].isin(
                hemisphere_ids)]

        return unionizes

    def get_structure_unionizes(self,
                                experiment_ids,
                                is_injection=None,
                                structure_ids=None,
                                include_descendants=False,
                                hemisphere_ids=None):
        """
        Get structure unionizes for a set of experiment IDs.  Filter the results by injection status,
        structure, and hemisphere.

        Parameters
        ----------
        experiment_ids: list
            List of experiment IDs.  Corresponds to section_data_set_id in the API.

        is_injection: boolean
            If True, only return unionize records that disregard non-injection pixels.
            If False, only return unionize records that disregard injection pixels.
            If None, return all records.  Default None.

        structure_ids: list
            Only return unionize records for a specific set of structures.
            If None, return all records. Default None.

        include_descendants: boolean
            Include all descendant records for specified structures. Default False.

        hemisphere_ids: list
            Only return unionize records that disregard pixels outside of a hemisphere.
            or set of hemispheres. Left = 1, Right = 2, Both = 3.  If None, include all
            records [1, 2, 3].  Default None.
        """

        unionizes = [
            self.get_experiment_structure_unionizes(
                eid,
                is_injection=is_injection,
                structure_ids=structure_ids,
                include_descendants=include_descendants,
                hemisphere_ids=hemisphere_ids) for eid in experiment_ids
        ]

        return pd.concat(unionizes, ignore_index=True)

    def get_projection_matrix(self,
                              experiment_ids,
                              projection_structure_ids=None,
                              hemisphere_ids=None,
                              parameter='projection_volume',
                              dataframe=False):

        if projection_structure_ids is None:
            projection_structure_ids = self.default_structure_ids

        unionizes = self.get_structure_unionizes(
            experiment_ids,
            is_injection=False,
            structure_ids=projection_structure_ids,
            include_descendants=False,
            hemisphere_ids=hemisphere_ids)

        hemisphere_ids = set(unionizes['hemisphere_id'].values.tolist())

        nrows = len(experiment_ids)
        ncolumns = len(projection_structure_ids) * len(hemisphere_ids)

        matrix = np.empty((nrows, ncolumns))
        matrix[:] = np.NAN

        row_lookup = {}
        for idx, e in enumerate(experiment_ids):
            row_lookup[e] = idx

        column_lookup = {}
        columns = []

        cidx = 0
        hlabel = {1: '-L', 2: '-R', 3: ''}

        acronym_map = self.get_structure_tree().value_map(
            lambda x: x['id'], lambda x: x['acronym'])

        for hid in hemisphere_ids:
            for sid in projection_structure_ids:
                column_lookup[(hid, sid)] = cidx
                label = acronym_map[sid] + hlabel[hid]
                columns.append({
                    'hemisphere_id': hid,
                    'structure_id': sid,
                    'label': label
                })
                cidx += 1

        for _, row in unionizes.iterrows():
            ridx = row_lookup[row['experiment_id']]
            k = (row['hemisphere_id'], row['structure_id'])
            cidx = column_lookup[k]
            matrix[ridx, cidx] = row[parameter]

        if dataframe:
            warnings.warn("dataframe argument is deprecated.")
            all_experiments = self.get_experiments(dataframe=True)

            rows_df = all_experiments.loc[experiment_ids]

            cols_df = pd.DataFrame(columns)

            return {'matrix': matrix, 'rows': rows_df, 'columns': cols_df}
        else:
            return {
                'matrix': matrix,
                'rows': experiment_ids,
                'columns': columns
            }

    def add_manifest_paths(self, manifest_builder):
        """
        Construct a manifest for this Cache class and save it in a file.

        Parameters
        ----------

        file_name: string
            File location to save the manifest.

        """

        manifest_builder = super(MouseConnectivityCache,
                                 self).add_manifest_paths(manifest_builder)

        manifest_builder.add_path(self.EXPERIMENTS_KEY,
                                  'experiments.json',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.STRUCTURE_UNIONIZES_KEY,
                                  'experiment_%d/structure_unionizes.csv',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.INJECTION_DENSITY_KEY,
                                  'experiment_%d/injection_density_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.INJECTION_FRACTION_KEY,
                                  'experiment_%d/injection_fraction_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.DATA_MASK_KEY,
                                  'experiment_%d/data_mask_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        manifest_builder.add_path(self.PROJECTION_DENSITY_KEY,
                                  'experiment_%d/projection_density_%d.nrrd',
                                  parent_key='BASEDIR',
                                  typename='file')

        return manifest_builder