Example #1
0
def get_projection_energy(exp_id, res=50):
    '''
    Wraps allensdk to fetch projection energy given experiment ID

    Parameters
    __________
    exp_id : int
        Allen experiment ID
    res : int
        Sets voxel size for Allen data. Must be 100, 50, 25, or 10. Default is
        50.

    Returns
    _______
    aff : ndarray
        Affine matrix
    energy : ndarray
        Projection energy image for given experiment, in Allen CCF
    '''

    if res not in [100, 50, 25, 10]:
        raise ValueError('Res must be 100, 50, 25, or 10')

    fn = manifest_file.split('manifest.json')[
        0] + f'/experiment_{exp_id}/projection_energy_{res}.nrrd'
    gda = GridDataApi(res)
    gda.download_projection_grid_data(exp_id,
                                      image=['projection_energy'],
                                      resolution=res,
                                      save_file_path=fn)
    energy, _ = nrrd.read(fn)
    energy = reorient_ara_data(energy)
    aff = make_aff(res / 1000)
    return aff, energy
Example #2
0
 def download_data_mask(self, path, experiment_id, resolution):
     return GridDataApi().download_projection_grid_data(
         experiment_id, [GridDataApi.DATA_MASK], resolution, path)
Example #3
0
 def download_injection_fraction(self, path, experiment_id, resolution):
     return GridDataApi().download_projection_grid_data(
         experiment_id, [GridDataApi.INJECTION_FRACTION], resolution, path)
Example #4
0
 def download_projection_density(self, path, experiment_id, resolution):
     return GridDataApi().download_projection_grid_data(
         experiment_id, [GridDataApi.PROJECTION_DENSITY], resolution, path)
Example #5
0
def grid_data():
    gda = GridDataApi()
    gda.retrieve_file_over_http = \
        MagicMock(name='retrieve_file_over_http')

    return gda
Example #6
0
def grid_data():
    gda = GridDataApi()
    gda.retrieve_file_over_http = \
        MagicMock(name='retrieve_file_over_http')

    return gda
Example #7
0
 def __init__(self) -> None:
     self.rma = RmaApi()
     self.gda = GridDataApi()
     self.res = None  # type: List
Example #8
0
class ISHFetcher:
    ''' A downloader object for Section Data Sets

    Methods
    -------

    find_id_ish:
        Returns the ids of Section Data Sets (a single gene experiment) sorted by qc time

    download_grid_all:
        Dowloads all the expression energy 3d density file (200um grid) that satisfy the query

    download_grid_recent:
        Dowloads the most recently qc-ed expression energy 3d density file (200um grid) that satisfy the query

    Attributes
    ----------
    rma:
        Rma Api instance
    gda
        GridData Api instance
    res
        results of the find_id_ish query
    '''
    def __init__(self) -> None:
        self.rma = RmaApi()
        self.gda = GridDataApi()
        self.res = None  # type: List

    def find_id_ish(self,
                    gene: str,
                    sag_or_cor: str = "sagittal",
                    adu_or_dev: str = "adult",
                    time_point: str = "P56") -> List:
        """Returns the ids of Section Data Sets (a single gene experiment)

        Args
        ----
        gene: str
            the gene to search for

        sag_or_cor: str (accepts * wild cards)
            `coronal` or `sagittal` or `*`

        adu_or_dev: str (accepts * wild cards)
            `adult`, `development`, `both`

        time_point: str (it will be autmatically wildcarded)
            e.g. "P56", "E", "E13", "P"

        Returns
        -------
        list of ids:
            sorted by most_recent to mose ancient

        """

        if adu_or_dev == "adult" and "E" in time_point:
            raise ValueError("there is not adult with age %s" % time_point)

        if adu_or_dev == "adult":
            adu_or_dev = "Mouse"
        elif adu_or_dev == "development":
            adu_or_dev = "DevMouse"
        elif adu_or_dev == "both":
            adu_or_dev = "*Mouse"
        else:
            raise ValueError("adu_or_dev='%s' is not valid" % adu_or_dev)
        criteria = [
            "[failed$eq'false']",
            "reference_space[name$li'*%s*']" % time_point,
            "products[abbreviation$li'%s']" % adu_or_dev,
            "plane_of_section[name$li'%s']" % sag_or_cor,
            "genes[acronym$eq'%s']" % gene
        ]
        # include='reference_space',
        self.res = self.rma.model_query("SectionDataSet",
                                        criteria=','.join(criteria),
                                        only=["id", "qc_date"],
                                        num_rows='all')
        if isinstance(self.res, str):
            raise ValueError("Bad query! Server returned :\n%s" % self.res)

        if self.res == []:
            return []

        qc_date = []
        for i in self.res:
            if i["qc_date"] is None:
                qc_date.append('')
            else:
                qc_date.append(i["qc_date"])

        ix = np.argsort(qc_date)
        ix = ix[::-1]

        results = []
        for i in ix:
            results.append(int(self.res[i]["id"]))

        return results

    def download_grid_all(self,
                          gene: str,
                          folder: str = '../data',
                          sag_or_cor: str = "sagittal",
                          adu_or_dev: str = "adult",
                          time_point: str = "P56") -> None:
        """Dowloads all the files

         Args
        ----
        gene: str
            the gene to search for

        sag_or_cor: str (accepts * wild cards)
            `coronal` or `sagittal` or `*`

        adu_or_dev: str (accepts * wild cards)
            `adult`, `development`, `both`

        time_point: str (it will be autmatically wildcarded)
            e.g. "P56", "E", "E13", "P"

        """
        ids = self.find_id_ish(gene,
                               sag_or_cor=sag_or_cor,
                               adu_or_dev=adu_or_dev,
                               time_point=time_point)
        for idd in ids:
            self.gda.download_expression_grid_data(
                idd,
                path=os.path.join(
                    folder,
                    "%s_%s_%s_%s.zip" % (gene, sag_or_cor, time_point, idd)))

    def download_grid_recent(self,
                             gene: str,
                             folder: str = '../data',
                             sag_or_cor: str = "sagittal",
                             adu_or_dev: str = "adult",
                             time_point: str = "P56") -> Union[str, bool]:
        """Dowloads the most recently qc-ed file among the ones available

         Args
        ----
        gene: str
            the gene to search for

        sag_or_cor: str (accepts * wild cards)
            `coronal` or `sagittal` or `*`

        adu_or_dev: str (accepts * wild cards)
            `adult`, `development`, `both`

        time_point: str (it will be autmatically wildcarded)
            e.g. "P56", "E", "E13", "P"

        Returns
        -------
        output_path: output_path or bool
            if the download was successfull returns the path to the file otherwise False

        """
        ids = self.find_id_ish(gene,
                               sag_or_cor=sag_or_cor,
                               adu_or_dev=adu_or_dev,
                               time_point=time_point)
        try:
            idd = ids[0]
            output_path = os.path.join(
                folder,
                "%s_%s_%s_%s.zip" % (gene, sag_or_cor, time_point, idd))
            self.gda.download_expression_grid_data(idd, path=output_path)
            return output_path
        except IndexError:
            logging.warn("Experiment %s was never performed" % gene)
            return False
Example #9
0
    def getAsync(self, from_cache, aggregations):
        # load data once with from_cache = False, then change it to True to read it from disk instead of fetching it from the api
        if not from_cache:
            # we use the RmaApi to query specific information, such as the section data sets of a specific gene
            # for docs, see: https://alleninstitute.github.io/AllenSDK/allensdk.api.queries.rma_api.html
            rma = RmaApi()

            # there might be a way to retrieve data in higher resolution, as stated here (default is 25, 10 is also available - but resolution is ignored for download_gene_expression_grid_data)
            # https://alleninstitute.github.io/AllenSDK/_modules/allensdk/api/queries/grid_data_api.html
            # See `Downloading 3-D Projection Grid Data <http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data#name="Downloading3-DExpressionGridData-DOWNLOADING3DPROJECTIONGRIDDATA">`_
            gdApi = GridDataApi()

            # http://api.brain-map.org/examples/rma_builder/index.html
            # http://api.brain-map.org/examples/rma_builder/rma_builder.html
            # https://allensdk.readthedocs.io/en/latest/data_api_client.html
            sectionDataSets = pd.DataFrame(
                rma.model_query(
                    model='SectionDataSet',
                    #! criteria="plane_of_section[name$eqcoronal]", note that saggital only spans the left hemisphere, so this is tough to compare with human data.
                    filters={'failed': 'false'},
                    include=
                    f"genes[acronym$il{self.geneAcronym}],products[id$eq1]",  # $il = case-insensitive like | yes, weird notation... id = 1 = mouse brain atlas (not developing!)
                    num_rows='all'))

            # model's documentation: http://api.brain-map.org/doc/SectionDataSet.html
            # https://community.brain-map.org/t/attempting-to-download-substructures-for-coronal-p56-mouse-atlas/174/2

            experiments = {}

            # http://help.brain-map.org/display/mousebrain/Documentation
            annotations = np.fromfile(Utils.getRelativeFilepath(
                "annotations\\P56_Mouse_gridAnnotation\\gridAnnotation.raw"),
                                      dtype="uint32")

            # https://community.brain-map.org/t/how-to-acquire-the-structure-label-for-the-expression-grid-data/150/4
            # for Mouse P56, structure_graph_id = 1 according to http://help.brain-map.org/display/api/Atlas+Drawings+and+Ontologies
            structure_map = StructureMap.StructureMap(
                reference_space_key='annotation/ccf_2017',
                resolution=25).get(structure_graph_id=1)  # , annotation, meta
            # from http://alleninstitute.github.io/AllenSDK/_static/examples/nb/reference_space.html#Downloading-an-annotation-volume

            for index, row in sectionDataSets.iterrows(
            ):  # https://stackoverflow.com/questions/16476924/how-to-iterate-over-rows-in-a-dataframe-in-pandas
                exp_id = row['id']
                exp_path = f"cache\\mouse_ish-expr\\{exp_id}\\"

                try:
                    # https://community.brain-map.org/t/whole-mouse-brain-gene-expression-data/447/4
                    # explanation of what "energy" means here:
                    # expression density = sum of expressing pixels / sum of all pixels in division
                    # expression intensity = sum of expressing pixel intensity / sum of expressing pixels
                    # expression energy = expression intensity * expression density

                    gdApi.download_gene_expression_grid_data(
                        exp_id, GridDataApi.ENERGY, exp_path)

                    expression_levels = np.fromfile(exp_path + "energy.raw",
                                                    dtype=np.float32)

                    # According to the doc @ http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data
                    # we have "A raw uncompressed float (32-bit) little-endian volume representing average expression energy per voxel.
                    # A value of "-1" represents no data. This file is returned by default if the volumes parameter is null."
                    data = pd.DataFrame({
                        Constants.EXPR_LVL: expression_levels,
                        "structure_id": annotations
                    })

                    # some expression_levels are assigned to a structure of id 0. same is true for Jure's approach.
                    # according to the Allen institue, this is just due to background-noise:
                    # https://community.brain-map.org/t/how-to-acquire-the-structure-label-for-the-expression-grid-data/150/4
                    # values of -1 mean "no value obtained", hence we filter them out:
                    data = data[(data[Constants.EXPR_LVL] != -1)
                                & (data.structure_id != 0)]

                    data[Constants.Z_SCORE] = Utils.z_score(
                        data[Constants.EXPR_LVL])

                    # https://stackoverflow.com/questions/31528819/using-merge-on-a-column-and-index-in-pandas
                    # https://stackoverflow.com/questions/45147100/pandas-drop-columns-with-all-nans

                    name = f'mouse_{exp_id}_{Constants.PlaneOfSections[row["plane_of_section_id"]]}'
                    data = Utils.merge_with_structure(
                        data, structure_map, MouseISHData.VALUE_COLUMNS,
                        aggregations)

                    Utils.save(data, self.cache_path, name + '.pkl')

                    experiments['mouse - ' + Constants.PlaneOfSections[
                        row["plane_of_section_id"]]] = data
                except Exception as e:
                    print(
                        f"Error retrieving mouse-ish experiment {exp_id}: {str(e)}"
                    )
                    raise e

            return experiments
        else:
            if not glob.glob(self.cache_path):
                Utils.log.warning(
                    f"No cached dataframe found. Check whether you have access to file '{self.cache_path}' and whether it exists. Obtaining data without caching now..."
                )
                return self.get(False, aggregations)

            return {
                'mouse - ' + Utils.getFilename(file).split('_')[2]:
                Utils.load(file)
                for file in glob.glob(f'{self.cache_path}/*.pkl')
            }