def get_projection_energy(exp_id, res=50): ''' Wraps allensdk to fetch projection energy given experiment ID Parameters __________ exp_id : int Allen experiment ID res : int Sets voxel size for Allen data. Must be 100, 50, 25, or 10. Default is 50. Returns _______ aff : ndarray Affine matrix energy : ndarray Projection energy image for given experiment, in Allen CCF ''' if res not in [100, 50, 25, 10]: raise ValueError('Res must be 100, 50, 25, or 10') fn = manifest_file.split('manifest.json')[ 0] + f'/experiment_{exp_id}/projection_energy_{res}.nrrd' gda = GridDataApi(res) gda.download_projection_grid_data(exp_id, image=['projection_energy'], resolution=res, save_file_path=fn) energy, _ = nrrd.read(fn) energy = reorient_ara_data(energy) aff = make_aff(res / 1000) return aff, energy
def download_data_mask(self, path, experiment_id, resolution): return GridDataApi().download_projection_grid_data( experiment_id, [GridDataApi.DATA_MASK], resolution, path)
def download_injection_fraction(self, path, experiment_id, resolution): return GridDataApi().download_projection_grid_data( experiment_id, [GridDataApi.INJECTION_FRACTION], resolution, path)
def download_projection_density(self, path, experiment_id, resolution): return GridDataApi().download_projection_grid_data( experiment_id, [GridDataApi.PROJECTION_DENSITY], resolution, path)
def grid_data(): gda = GridDataApi() gda.retrieve_file_over_http = \ MagicMock(name='retrieve_file_over_http') return gda
def __init__(self) -> None: self.rma = RmaApi() self.gda = GridDataApi() self.res = None # type: List
class ISHFetcher: ''' A downloader object for Section Data Sets Methods ------- find_id_ish: Returns the ids of Section Data Sets (a single gene experiment) sorted by qc time download_grid_all: Dowloads all the expression energy 3d density file (200um grid) that satisfy the query download_grid_recent: Dowloads the most recently qc-ed expression energy 3d density file (200um grid) that satisfy the query Attributes ---------- rma: Rma Api instance gda GridData Api instance res results of the find_id_ish query ''' def __init__(self) -> None: self.rma = RmaApi() self.gda = GridDataApi() self.res = None # type: List def find_id_ish(self, gene: str, sag_or_cor: str = "sagittal", adu_or_dev: str = "adult", time_point: str = "P56") -> List: """Returns the ids of Section Data Sets (a single gene experiment) Args ---- gene: str the gene to search for sag_or_cor: str (accepts * wild cards) `coronal` or `sagittal` or `*` adu_or_dev: str (accepts * wild cards) `adult`, `development`, `both` time_point: str (it will be autmatically wildcarded) e.g. "P56", "E", "E13", "P" Returns ------- list of ids: sorted by most_recent to mose ancient """ if adu_or_dev == "adult" and "E" in time_point: raise ValueError("there is not adult with age %s" % time_point) if adu_or_dev == "adult": adu_or_dev = "Mouse" elif adu_or_dev == "development": adu_or_dev = "DevMouse" elif adu_or_dev == "both": adu_or_dev = "*Mouse" else: raise ValueError("adu_or_dev='%s' is not valid" % adu_or_dev) criteria = [ "[failed$eq'false']", "reference_space[name$li'*%s*']" % time_point, "products[abbreviation$li'%s']" % adu_or_dev, "plane_of_section[name$li'%s']" % sag_or_cor, "genes[acronym$eq'%s']" % gene ] # include='reference_space', self.res = self.rma.model_query("SectionDataSet", criteria=','.join(criteria), only=["id", "qc_date"], num_rows='all') if isinstance(self.res, str): raise ValueError("Bad query! Server returned :\n%s" % self.res) if self.res == []: return [] qc_date = [] for i in self.res: if i["qc_date"] is None: qc_date.append('') else: qc_date.append(i["qc_date"]) ix = np.argsort(qc_date) ix = ix[::-1] results = [] for i in ix: results.append(int(self.res[i]["id"])) return results def download_grid_all(self, gene: str, folder: str = '../data', sag_or_cor: str = "sagittal", adu_or_dev: str = "adult", time_point: str = "P56") -> None: """Dowloads all the files Args ---- gene: str the gene to search for sag_or_cor: str (accepts * wild cards) `coronal` or `sagittal` or `*` adu_or_dev: str (accepts * wild cards) `adult`, `development`, `both` time_point: str (it will be autmatically wildcarded) e.g. "P56", "E", "E13", "P" """ ids = self.find_id_ish(gene, sag_or_cor=sag_or_cor, adu_or_dev=adu_or_dev, time_point=time_point) for idd in ids: self.gda.download_expression_grid_data( idd, path=os.path.join( folder, "%s_%s_%s_%s.zip" % (gene, sag_or_cor, time_point, idd))) def download_grid_recent(self, gene: str, folder: str = '../data', sag_or_cor: str = "sagittal", adu_or_dev: str = "adult", time_point: str = "P56") -> Union[str, bool]: """Dowloads the most recently qc-ed file among the ones available Args ---- gene: str the gene to search for sag_or_cor: str (accepts * wild cards) `coronal` or `sagittal` or `*` adu_or_dev: str (accepts * wild cards) `adult`, `development`, `both` time_point: str (it will be autmatically wildcarded) e.g. "P56", "E", "E13", "P" Returns ------- output_path: output_path or bool if the download was successfull returns the path to the file otherwise False """ ids = self.find_id_ish(gene, sag_or_cor=sag_or_cor, adu_or_dev=adu_or_dev, time_point=time_point) try: idd = ids[0] output_path = os.path.join( folder, "%s_%s_%s_%s.zip" % (gene, sag_or_cor, time_point, idd)) self.gda.download_expression_grid_data(idd, path=output_path) return output_path except IndexError: logging.warn("Experiment %s was never performed" % gene) return False
def getAsync(self, from_cache, aggregations): # load data once with from_cache = False, then change it to True to read it from disk instead of fetching it from the api if not from_cache: # we use the RmaApi to query specific information, such as the section data sets of a specific gene # for docs, see: https://alleninstitute.github.io/AllenSDK/allensdk.api.queries.rma_api.html rma = RmaApi() # there might be a way to retrieve data in higher resolution, as stated here (default is 25, 10 is also available - but resolution is ignored for download_gene_expression_grid_data) # https://alleninstitute.github.io/AllenSDK/_modules/allensdk/api/queries/grid_data_api.html # See `Downloading 3-D Projection Grid Data <http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data#name="Downloading3-DExpressionGridData-DOWNLOADING3DPROJECTIONGRIDDATA">`_ gdApi = GridDataApi() # http://api.brain-map.org/examples/rma_builder/index.html # http://api.brain-map.org/examples/rma_builder/rma_builder.html # https://allensdk.readthedocs.io/en/latest/data_api_client.html sectionDataSets = pd.DataFrame( rma.model_query( model='SectionDataSet', #! criteria="plane_of_section[name$eqcoronal]", note that saggital only spans the left hemisphere, so this is tough to compare with human data. filters={'failed': 'false'}, include= f"genes[acronym$il{self.geneAcronym}],products[id$eq1]", # $il = case-insensitive like | yes, weird notation... id = 1 = mouse brain atlas (not developing!) num_rows='all')) # model's documentation: http://api.brain-map.org/doc/SectionDataSet.html # https://community.brain-map.org/t/attempting-to-download-substructures-for-coronal-p56-mouse-atlas/174/2 experiments = {} # http://help.brain-map.org/display/mousebrain/Documentation annotations = np.fromfile(Utils.getRelativeFilepath( "annotations\\P56_Mouse_gridAnnotation\\gridAnnotation.raw"), dtype="uint32") # https://community.brain-map.org/t/how-to-acquire-the-structure-label-for-the-expression-grid-data/150/4 # for Mouse P56, structure_graph_id = 1 according to http://help.brain-map.org/display/api/Atlas+Drawings+and+Ontologies structure_map = StructureMap.StructureMap( reference_space_key='annotation/ccf_2017', resolution=25).get(structure_graph_id=1) # , annotation, meta # from http://alleninstitute.github.io/AllenSDK/_static/examples/nb/reference_space.html#Downloading-an-annotation-volume for index, row in sectionDataSets.iterrows( ): # https://stackoverflow.com/questions/16476924/how-to-iterate-over-rows-in-a-dataframe-in-pandas exp_id = row['id'] exp_path = f"cache\\mouse_ish-expr\\{exp_id}\\" try: # https://community.brain-map.org/t/whole-mouse-brain-gene-expression-data/447/4 # explanation of what "energy" means here: # expression density = sum of expressing pixels / sum of all pixels in division # expression intensity = sum of expressing pixel intensity / sum of expressing pixels # expression energy = expression intensity * expression density gdApi.download_gene_expression_grid_data( exp_id, GridDataApi.ENERGY, exp_path) expression_levels = np.fromfile(exp_path + "energy.raw", dtype=np.float32) # According to the doc @ http://help.brain-map.org/display/api/Downloading+3-D+Expression+Grid+Data # we have "A raw uncompressed float (32-bit) little-endian volume representing average expression energy per voxel. # A value of "-1" represents no data. This file is returned by default if the volumes parameter is null." data = pd.DataFrame({ Constants.EXPR_LVL: expression_levels, "structure_id": annotations }) # some expression_levels are assigned to a structure of id 0. same is true for Jure's approach. # according to the Allen institue, this is just due to background-noise: # https://community.brain-map.org/t/how-to-acquire-the-structure-label-for-the-expression-grid-data/150/4 # values of -1 mean "no value obtained", hence we filter them out: data = data[(data[Constants.EXPR_LVL] != -1) & (data.structure_id != 0)] data[Constants.Z_SCORE] = Utils.z_score( data[Constants.EXPR_LVL]) # https://stackoverflow.com/questions/31528819/using-merge-on-a-column-and-index-in-pandas # https://stackoverflow.com/questions/45147100/pandas-drop-columns-with-all-nans name = f'mouse_{exp_id}_{Constants.PlaneOfSections[row["plane_of_section_id"]]}' data = Utils.merge_with_structure( data, structure_map, MouseISHData.VALUE_COLUMNS, aggregations) Utils.save(data, self.cache_path, name + '.pkl') experiments['mouse - ' + Constants.PlaneOfSections[ row["plane_of_section_id"]]] = data except Exception as e: print( f"Error retrieving mouse-ish experiment {exp_id}: {str(e)}" ) raise e return experiments else: if not glob.glob(self.cache_path): Utils.log.warning( f"No cached dataframe found. Check whether you have access to file '{self.cache_path}' and whether it exists. Obtaining data without caching now..." ) return self.get(False, aggregations) return { 'mouse - ' + Utils.getFilename(file).split('_')[2]: Utils.load(file) for file in glob.glob(f'{self.cache_path}/*.pkl') }