class CellTypesCache(Cache):
    """
    Cache class for storing and accessing data from the Cell Types Database.
    By default, this class will cache any downloaded metadata or files in
    well known locations defined in a manifest file.  This behavior can be
    disabled.

    Attributes
    ----------

    api: CellTypesApi instance
        The object used for making API queries related to the Cell Types Database

    Parameters
    ----------

    cache: boolean
        Whether the class should save results of API queries to locations specified
        in the manifest file.  Queries for files (as opposed to metadata) must have a
        file location.  If caching is disabled, those locations must be specified
        in the function call (e.g. get_ephys_data(file_name='file.nwb')).

    manifest_file: string
       File name of the manifest to be read.  Default is "cell_types_manifest.json".
    """

    # manifest keys
    CELLS_KEY = 'CELLS'
    EPHYS_FEATURES_KEY = 'EPHYS_FEATURES'
    MORPHOLOGY_FEATURES_KEY = 'MORPHOLOGY_FEATURES'
    EPHYS_DATA_KEY = 'EPHYS_DATA'
    EPHYS_SWEEPS_KEY = 'EPHYS_SWEEPS'
    RECONSTRUCTION_KEY = 'RECONSTRUCTION'
    MARKER_KEY = 'MARKER'
    MANIFEST_VERSION = "1.1"

    def __init__(self, cache=True, manifest_file=None, base_uri=None):

        if manifest_file is None:
            manifest_file = get_default_manifest_file('cell_types')

        super(CellTypesCache, self).__init__(
            manifest=manifest_file, cache=cache, version=self.MANIFEST_VERSION)
        self.api = CellTypesApi(base_uri=base_uri)

    def get_cells(self, file_name=None,
                  require_morphology=False,
                  require_reconstruction=False,
                  reporter_status=None,
                  species=None,
                  simple=True):
        """
        Download metadata for all cells in the database and optionally return a
        subset filtered by whether or not they have a morphology or reconstruction.

        Parameters
        ----------

        file_name: string
            File name to save/read the cell metadata as JSON.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        require_morphology: boolean
            Filter out cells that have no morphological images.

        require_reconstruction: boolean
            Filter out cells that have no morphological reconstructions.

        reporter_status: list
            Filter for cells that have one or more cell reporter statuses.

        species: list
            Filter for cells that belong to one or more species.  If None, return all.
            Must be one of [ CellTypesApi.MOUSE, CellTypesApi.HUMAN ].
        """

        file_name = self.get_cache_path(file_name, self.CELLS_KEY)

        cells = self.api.list_cells_api(path=file_name,
                                        strategy='lazy',
                                        **Cache.cache_json())

        if isinstance(reporter_status, string_types):
            reporter_status = [reporter_status]

        # filter the cells on the way out
        cells = self.api.filter_cells_api(cells,
                                          require_morphology,
                                          require_reconstruction,
                                          reporter_status,
                                          species,
                                          simple)


        return cells




    def get_ephys_sweeps(self, specimen_id, file_name=None):
        """
        Download sweep metadata for a single cell specimen.

        Parameters
        ----------

        specimen_id: int
             ID of a cell.
        """

        file_name = self.get_cache_path(
            file_name, self.EPHYS_SWEEPS_KEY, specimen_id)

        sweeps = self.api.get_ephys_sweeps(specimen_id,
                                           strategy='lazy',
                                           path=file_name,
                                           **Cache.cache_json())

        return sweeps

    def get_ephys_features(self, dataframe=False, file_name=None):
        """
        Download electrophysiology features for all cells in the database.

        Parameters
        ----------

        file_name: string
            File name to save/read the ephys features metadata as CSV.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        dataframe: boolean
            Return the output as a Pandas DataFrame.  If False, return
            a list of dictionaries.
        """
        file_name = self.get_cache_path(file_name, self.EPHYS_FEATURES_KEY)

        if self.cache:
            if dataframe:
                warnings.warn("dataframe argument is deprecated.")
                args = Cache.cache_csv_dataframe()
            else:
                args = Cache.cache_csv_json()
            args['strategy'] = 'lazy'
        else:
            args = Cache.nocache_json()

        features_df = self.api.get_ephys_features(path=file_name,
                                                  **args)

        return features_df


    def get_morphology_features(self, dataframe=False, file_name=None):
        """
        Download morphology features for all cells with reconstructions in the database.

        Parameters
        ----------

        file_name: string
            File name to save/read the ephys features metadata as CSV.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        dataframe: boolean
            Return the output as a Pandas DataFrame.  If False, return
            a list of dictionaries.
        """

        file_name = self.get_cache_path(
            file_name, self.MORPHOLOGY_FEATURES_KEY)

        if self.cache:
            if dataframe:
                warnings.warn("dataframe argument is deprecated.")
                args = Cache.cache_csv_dataframe()
            else:
                args = Cache.cache_csv_json()
        else:
            args = Cache.nocache_json()

        args['strategy'] = 'lazy'
        args['path'] = file_name

        return self.api.get_morphology_features(**args)


    def get_all_features(self, dataframe=False, require_reconstruction=True):
        """
        Download morphology and electrophysiology features for all cells and merge them
        into a single table.

        Parameters
        ----------

        dataframe: boolean
            Return the output as a Pandas DataFrame.  If False, return
            a list of dictionaries.

        require_reconstruction: boolean
            Only return ephys and morphology features for cells that have
            reconstructions. Default True.
        """

        ephys_features = pd.DataFrame(self.get_ephys_features())
        morphology_features = pd.DataFrame(self.get_morphology_features())

        how = 'inner' if require_reconstruction else 'outer'

        all_features = ephys_features.merge(morphology_features,
                                            how=how,
                                            on='specimen_id')

        if dataframe:
            warnings.warn("dataframe argument is deprecated.")
            return all_features
        else:
            return all_features.to_dict('records')

    def get_ephys_data(self, specimen_id, file_name=None):
        """
        Download electrophysiology traces for a single cell in the database.

        Parameters
        ----------

        specimen_id: int
            The ID of a cell specimen to download.

        file_name: string
            File name to save/read the ephys features metadata as CSV.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        Returns
        -------
        NwbDataSet
            A class instance with helper methods for retrieving stimulus
            and response traces out of an NWB file.
        """

        file_name = self.get_cache_path(
            file_name, self.EPHYS_DATA_KEY, specimen_id)

        self.api.save_ephys_data(specimen_id, file_name, strategy='lazy')

        return NwbDataSet(file_name)

    def get_reconstruction(self, specimen_id, file_name=None):
        """
        Download and open a reconstruction for a single cell in the database.

        Parameters
        ----------

        specimen_id: int
            The ID of a cell specimen to download.

        file_name: string
            File name to save/read the reconstruction SWC.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        Returns
        -------
        Morphology
             A class instance with methods for accessing morphology compartments.
        """

        file_name = self.get_cache_path(
            file_name, self.RECONSTRUCTION_KEY, specimen_id)

        if file_name is None:
            raise Exception(
                "Please enable caching (CellTypes.cache = True) or specify a save_file_name.")

        if not os.path.exists(file_name):
            self.api.save_reconstruction(specimen_id, file_name)

        return swc.read_swc(file_name)

    def get_reconstruction_markers(self, specimen_id, file_name=None):
        """
        Download and open a reconstruction marker file for a single cell in the database.

        Parameters
        ----------

        specimen_id: int
            The ID of a cell specimen to download.

        file_name: string
            File name to save/read the reconstruction marker.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        Returns
        -------
        Morphology
             A class instance with methods for accessing morphology compartments.
        """

        file_name = self.get_cache_path(
            file_name, self.MARKER_KEY, specimen_id)

        if file_name is None:
            raise Exception(
                "Please enable caching (CellTypes.cache = True) or specify a save_file_name.")

        if not os.path.exists(file_name):
            try:
                self.api.save_reconstruction_markers(specimen_id, file_name)
            except LookupError as e:
                logging.warning(e.args)
                return []

        return swc.read_marker_file(file_name)

    def build_manifest(self, file_name):
        """
        Construct a manifest for this Cache class and save it in a file.

        Parameters
        ----------

        file_name: string
            File location to save the manifest.

        """

        mb = ManifestBuilder()
        mb.set_version(self.MANIFEST_VERSION)
        mb.add_path('BASEDIR', '.')
        mb.add_path(self.CELLS_KEY, 'cells.json',
                    typename='file', parent_key='BASEDIR')
        mb.add_path(self.EPHYS_DATA_KEY, 'specimen_%d/ephys.nwb',
                    typename='file', parent_key='BASEDIR')
        mb.add_path(self.EPHYS_FEATURES_KEY, 'ephys_features.csv',
                    typename='file', parent_key='BASEDIR')
        mb.add_path(self.MORPHOLOGY_FEATURES_KEY, 'morphology_features.csv',
                    typename='file', parent_key='BASEDIR')
        mb.add_path(self.RECONSTRUCTION_KEY, 'specimen_%d/reconstruction.swc',
                    typename='file', parent_key='BASEDIR')
        mb.add_path(self.MARKER_KEY, 'specimen_%d/reconstruction.marker',
                    typename='file', parent_key='BASEDIR')
        mb.add_path(self.EPHYS_SWEEPS_KEY, 'specimen_%d/ephys_sweeps.json',
                    typename='file', parent_key='BASEDIR')

        mb.write_json_file(file_name)
Example #2
0
class CellTypesCache(Cache):
    """
    Cache class for storing and accessing data from the Cell Types Database.
    By default, this class will cache any downloaded metadata or files in
    well known locations defined in a manifest file.  This behavior can be
    disabled.

    Attributes
    ----------

    api: CellTypesApi instance
        The object used for making API queries related to the Cell Types Database

    Parameters
    ----------

    cache: boolean
        Whether the class should save results of API queries to locations specified
        in the manifest file.  Queries for files (as opposed to metadata) must have a
        file location.  If caching is disabled, those locations must be specified
        in the function call (e.g. get_ephys_data(file_name='file.nwb')).

    manifest_file: string
       File name of the manifest to be read.  Default is "cell_types_manifest.json".
    """

    # manifest keys
    CELLS_KEY = 'CELLS'
    EPHYS_FEATURES_KEY = 'EPHYS_FEATURES'
    MORPHOLOGY_FEATURES_KEY = 'MORPHOLOGY_FEATURES'
    EPHYS_DATA_KEY = 'EPHYS_DATA'
    EPHYS_SWEEPS_KEY = 'EPHYS_SWEEPS'
    RECONSTRUCTION_KEY = 'RECONSTRUCTION'
    MARKER_KEY = 'MARKER'
    MANIFEST_VERSION = "1.1"

    def __init__(self, cache=True, manifest_file=None, base_uri=None):

        if manifest_file is None:
            manifest_file = get_default_manifest_file('cell_types')

        super(CellTypesCache, self).__init__(manifest=manifest_file,
                                             cache=cache,
                                             version=self.MANIFEST_VERSION)
        self.api = CellTypesApi(base_uri=base_uri)

    def get_cells(self,
                  file_name=None,
                  require_morphology=False,
                  require_reconstruction=False,
                  reporter_status=None,
                  species=None,
                  simple=True):
        """
        Download metadata for all cells in the database and optionally return a
        subset filtered by whether or not they have a morphology or reconstruction.

        Parameters
        ----------

        file_name: string
            File name to save/read the cell metadata as JSON.  If file_name is None,
            the file_name will be pulled out of the manifest.  If caching
            is disabled, no file will be saved. Default is None.

        require_morphology: boolean
            Filter out cells that have no morphological images.

        require_reconstruction: boolean
            Filter out cells that have no morphological reconstructions.

        reporter_status: list
            Filter for cells that have one or more cell reporter statuses.

        species: list
            Filter for cells that belong to one or more species.  If None, return all.
            Must be one of [ CellTypesApi.MOUSE, CellTypesApi.HUMAN ].
        """

        file_name = self.get_cache_path(file_name, self.CELLS_KEY)

        cells = self.api.list_cells_api(path=file_name,
                                        strategy='lazy',
                                        **Cache.cache_json())

        if isinstance(reporter_status, string_types):
            reporter_status = [reporter_status]

        # filter the cells on the way out
        cells = self.api.filter_cells_api(cells, require_morphology,
                                          require_reconstruction,
                                          reporter_status, species, simple)

        return cells

    def get_ephys_sweeps(self, specimen_id, file_name=None):
        """
        Download sweep metadata for a single cell specimen.

        Parameters
        ----------

        specimen_id: int
             ID of a cell.
        """

        file_name = self.get_cache_path(file_name, self.EPHYS_SWEEPS_KEY,
                                        specimen_id)

        sweeps = self.api.get_ephys_sweeps(specimen_id,
                                           strategy='lazy',
                                           path=file_name,
                                           **Cache.cache_json())

        return sweeps

    def get_ephys_features(self, dataframe=False, file_name=None):
        """
        Download electrophysiology features for all cells in the database.

        Parameters
        ----------

        file_name: string
            File name to save/read the ephys features metadata as CSV.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        dataframe: boolean
            Return the output as a Pandas DataFrame.  If False, return
            a list of dictionaries.
        """
        file_name = self.get_cache_path(file_name, self.EPHYS_FEATURES_KEY)

        if self.cache:
            if dataframe:
                warnings.warn("dataframe argument is deprecated.")
                args = Cache.cache_csv_dataframe()
            else:
                args = Cache.cache_csv_json()
            args['strategy'] = 'lazy'
        else:
            args = Cache.nocache_json()

        features_df = self.api.get_ephys_features(path=file_name, **args)

        return features_df

    def get_morphology_features(self, dataframe=False, file_name=None):
        """
        Download morphology features for all cells with reconstructions in the database.

        Parameters
        ----------

        file_name: string
            File name to save/read the ephys features metadata as CSV.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        dataframe: boolean
            Return the output as a Pandas DataFrame.  If False, return
            a list of dictionaries.
        """

        file_name = self.get_cache_path(file_name,
                                        self.MORPHOLOGY_FEATURES_KEY)

        if self.cache:
            if dataframe:
                warnings.warn("dataframe argument is deprecated.")
                args = Cache.cache_csv_dataframe()
            else:
                args = Cache.cache_csv_json()
        else:
            args = Cache.nocache_json()

        args['strategy'] = 'lazy'
        args['path'] = file_name

        return self.api.get_morphology_features(**args)

    def get_all_features(self, dataframe=False, require_reconstruction=True):
        """
        Download morphology and electrophysiology features for all cells and merge them
        into a single table.

        Parameters
        ----------

        dataframe: boolean
            Return the output as a Pandas DataFrame.  If False, return
            a list of dictionaries.

        require_reconstruction: boolean
            Only return ephys and morphology features for cells that have
            reconstructions. Default True.
        """

        ephys_features = pd.DataFrame(self.get_ephys_features())
        morphology_features = pd.DataFrame(self.get_morphology_features())

        how = 'inner' if require_reconstruction else 'outer'

        all_features = ephys_features.merge(morphology_features,
                                            how=how,
                                            on='specimen_id')

        if dataframe:
            warnings.warn("dataframe argument is deprecated.")
            return all_features
        else:
            return all_features.to_dict('records')

    def get_ephys_data(self, specimen_id, file_name=None):
        """
        Download electrophysiology traces for a single cell in the database.

        Parameters
        ----------

        specimen_id: int
            The ID of a cell specimen to download.

        file_name: string
            File name to save/read the ephys features metadata as CSV.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        Returns
        -------
        NwbDataSet
            A class instance with helper methods for retrieving stimulus
            and response traces out of an NWB file.
        """

        file_name = self.get_cache_path(file_name, self.EPHYS_DATA_KEY,
                                        specimen_id)

        self.api.save_ephys_data(specimen_id, file_name, strategy='lazy')

        return NwbDataSet(file_name)

    def get_reconstruction(self, specimen_id, file_name=None):
        """
        Download and open a reconstruction for a single cell in the database.

        Parameters
        ----------

        specimen_id: int
            The ID of a cell specimen to download.

        file_name: string
            File name to save/read the reconstruction SWC.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        Returns
        -------
        Morphology
             A class instance with methods for accessing morphology compartments.
        """

        file_name = self.get_cache_path(file_name, self.RECONSTRUCTION_KEY,
                                        specimen_id)

        if file_name is None:
            raise Exception(
                "Please enable caching (CellTypes.cache = True) or specify a save_file_name."
            )

        if not os.path.exists(file_name):
            self.api.save_reconstruction(specimen_id, file_name)

        return swc.read_swc(file_name)

    def get_reconstruction_markers(self, specimen_id, file_name=None):
        """
        Download and open a reconstruction marker file for a single cell in the database.

        Parameters
        ----------

        specimen_id: int
            The ID of a cell specimen to download.

        file_name: string
            File name to save/read the reconstruction marker.
            If file_name is None, the file_name will be pulled out of the
            manifest.  If caching is disabled, no file will be saved.
            Default is None.

        Returns
        -------
        Morphology
             A class instance with methods for accessing morphology compartments.
        """

        file_name = self.get_cache_path(file_name, self.MARKER_KEY,
                                        specimen_id)

        if file_name is None:
            raise Exception(
                "Please enable caching (CellTypes.cache = True) or specify a save_file_name."
            )

        if not os.path.exists(file_name):
            try:
                self.api.save_reconstruction_markers(specimen_id, file_name)
            except LookupError as e:
                logging.warning(e.args)
                return []

        return swc.read_marker_file(file_name)

    def build_manifest(self, file_name):
        """
        Construct a manifest for this Cache class and save it in a file.

        Parameters
        ----------

        file_name: string
            File location to save the manifest.

        """

        mb = ManifestBuilder()
        mb.set_version(self.MANIFEST_VERSION)
        mb.add_path('BASEDIR', '.')
        mb.add_path(self.CELLS_KEY,
                    'cells.json',
                    typename='file',
                    parent_key='BASEDIR')
        mb.add_path(self.EPHYS_DATA_KEY,
                    'specimen_%d/ephys.nwb',
                    typename='file',
                    parent_key='BASEDIR')
        mb.add_path(self.EPHYS_FEATURES_KEY,
                    'ephys_features.csv',
                    typename='file',
                    parent_key='BASEDIR')
        mb.add_path(self.MORPHOLOGY_FEATURES_KEY,
                    'morphology_features.csv',
                    typename='file',
                    parent_key='BASEDIR')
        mb.add_path(self.RECONSTRUCTION_KEY,
                    'specimen_%d/reconstruction.swc',
                    typename='file',
                    parent_key='BASEDIR')
        mb.add_path(self.MARKER_KEY,
                    'specimen_%d/reconstruction.marker',
                    typename='file',
                    parent_key='BASEDIR')
        mb.add_path(self.EPHYS_SWEEPS_KEY,
                    'specimen_%d/ephys_sweeps.json',
                    typename='file',
                    parent_key='BASEDIR')

        mb.write_json_file(file_name)