def get_structures_hierarchy(self): def add_descendants_to_tree(tree, structure, parent_id=None): """ Recursively goes through all the the descendants of a region and adds them to the tree """ if parent_id is not None: tree.create_node( tag=structure["name"], identifier=structure["id"], parent=parent_id, ) else: tree.create_node( tag=structure["name"], identifier=structure["id"], ) if "children" not in structure.keys(): return if structure["children"]: for child in structure["children"]: add_descendants_to_tree(tree, child, structure["id"]) structures_hierarchy = request( self._url_paths["structures_tree"]).json() tree = Tree() tree.create_node( tag="root", identifier=0, ) for supercategory in structures_hierarchy: add_descendants_to_tree(tree, supercategory, 0) self.structures_hierarchy = tree
def __init__(self, species=None, sex=None, base_dir=None, make_root=True, **kwargs): self.make_root = make_root Paths.__init__(self, base_dir=base_dir, **kwargs) # Get a list of available species self.species_info = pd.DataFrame( request( f"{self._base_url}/{self._url_paths['species_info']}").json()) self.species = list(self.species_info.scientific_name.values) # Get selected species self.structures, self.region_names, self.region_acronyms = ( None, None, None, ) self.sel_species = species self.sex = sex self.get_brain(species=species, sex=sex)
def download_and_write_mesh(self, acronym, obj_path): print(f"Downloading mesh data for {acronym}") path = self.structures.loc[self.structures.acronym == acronym].obj_path.values[0] url = f"{self._url_paths['data']}/{path}" # download and write .obj mesh_data = request(url).content.decode("utf-8").split("\n") with open(obj_path, "w") as f: for md in mesh_data: f.write(f"{md}\n") f.close() # return the vtk actor return load(obj_path)
def download_and_cache(url, cachedir): """ Given a url to download a gene's ISH experiment data, this function download and unzips the data :param url: str, utl to download data :param cachedir: str, path to folder where data will be downloaded """ # Get data req = request(url) # Create cache dir if not os.path.isdir(cachedir): os.mkdir(cachedir) # Unzip to cache dir z = zipfile.ZipFile(io.BytesIO(req.content)) z.extractall(cachedir)
def download_streamlines(eids, streamlines_folder=None): # pragma: no cover """ Given a list of expeirmental IDs, it downloads the streamline data from the https://neuroinformatics.nl cache and saves them as json files. :param eids: list of integers with experiments IDs :param streamlines_folder: str path to the folder where the JSON files should be saved, if None the default is used (Default value = None) """ streamlines_folder = Path(streamlines_folder) if not isinstance(eids, (list, np.ndarray, tuple)): eids = [eids] filepaths, data = [], [] for eid in track(eids, total=len(eids), description="downloading"): url = make_url_given_id(eid) jsonpath = streamlines_folder / f"{eid}.json" filepaths.append(str(jsonpath)) if not jsonpath.exists(): response = request(url) # Write the response content as a temporary compressed file temp_path = streamlines_folder / "temp.gz" with open(str(temp_path), "wb") as temp: temp.write(response.content) # Open in pandas and delete temp url_data = pd.read_json(str(temp_path), lines=True, compression="gzip") temp_path.unlink() # save json url_data.to_json(str(jsonpath)) # append to lists and return data.append(url_data) else: data.append(pd.read_json(str(jsonpath))) return filepaths, data
def download_streamlines(self, eids, streamlines_folder=None): """ Given a list of expeirmental IDs, it downloads the streamline data from the https://neuroinformatics.nl cache and saves them as json files. :param eids: list of integers with experiments IDs :param streamlines_folder: str path to the folder where the JSON files should be saved, if None the default is used (Default value = None) """ if streamlines_folder is None: streamlines_folder = self.streamlines_cache if not isinstance(eids, (list, np.ndarray, tuple)): eids = [eids] filepaths, data = [], [] for eid in tqdm(eids): url = self.make_url_given_id(eid) jsonpath = os.path.join(streamlines_folder, str(eid) + ".json") filepaths.append(jsonpath) if not os.path.isfile(jsonpath): response = request(url) # Write the response content as a temporary compressed file temp_path = os.path.join(streamlines_folder, "temp.gz") with open(temp_path, "wb") as temp: temp.write(response.content) # Open in pandas and delete temp url_data = pd.read_json(temp_path, lines=True, compression='gzip') os.remove(temp_path) # save json url_data.to_json(jsonpath) # append to lists and return data.append(url_data) else: data.append(pd.read_json(jsonpath)) return filepaths, data
def get_structures_hierarchy(self): structures_hierarchy = request( self._url_paths['structures_tree']).json() data = dict(ids=[], name=[], parent=[], children=[]) for structure in structures_hierarchy: data['ids'].append(structure['id']) data['name'].append(structure['name']) data['parent'].append(None) data['children'].append([(c['id'], c['name']) for c in structure['children']]) for child in structure['children']: data['ids'].append(child['id']) data['name'].append(child['name']) data['parent'].append(structure) data['children'].append([( c['id'], c['name'] ) for c in child['children']] if child['children'] else None) self.structures_hierarchy = pd.DataFrame(data)
def get_gene_experiments(self, gene_symbol): """ Given a gene_symbol it returns the list of ISH experiments for this gene :param gene_symbol: str, self.genes.gene_symbol """ if not isinstance(gene_symbol, str): if isinstance(gene_symbol, int): # it's an ID, get symbol gene_symbol = self.get_gene_symbol_by_id(gene_symbol) if gene_symbol is None: raise ValueError("Invalid gene_symbol argument") else: raise ValueError("Invalid gene_symbol argument") url = self.gene_experiments_url.replace("-GENE_SYMBOL-", gene_symbol) data = request(url).json()["msg"] if not len(data): print(f"No experiment found for gene {gene_symbol}") return None else: return [d["id"] for d in data]
def get_structures_reconstructions(self, species, sex): iid = self.get_brain_id_from_species_name(species) data = request( f"{self._base_url}/{self._url_paths['brain_info']}{iid}").json() if not len(data): print("Could not get any data for this brain") return None reconstructions = [r["viewer_files"] for r in data["reconstructions"]] if not reconstructions: print(f"No data was found for {species}") return n_elems = [len(rec) for rec in reconstructions] if sex is None: try: sex = np.argmax(n_elems) except: raise ValueError("No data retrieved") if not n_elems[sex]: raise ValueError(f"No reconstructions found for {sex} {species}") else: reconstruction = reconstructions[sex] # Get data about the brain regions structures = dict( name=[], acronym=[], color=[], obj_path=[], hemisphere=[], parent=[], children=[], ) for d in reconstruction: if "structures" not in d.keys(): continue if not d["structures"]: continue hemi = d["structures"][0]["hemisphere"] if hemi is None: hemi = "both" if hemi == "right": name = d["structures"][0]["structure"]["name"] + "_R" elif hemi == "left": name = d["structures"][0]["structure"]["name"] + "_L" else: name = d["structures"][0]["structure"]["name"] abbr = d["structures"][0]["structure"]["abbreviation"] acro = (abbr + d["p_file"]["file_name"].split(abbr)[-1].split(".")[0]) if "_left" in acro: acro = acro.split("_left")[0] + "_left" elif "_right" in acro: acro = acro.split("_right")[0] + "_right" structures["obj_path"].append(d["p_file"]["path"]) structures["hemisphere"].append(hemi.lower()) structures["name"].append(name) structures["acronym"].append(acro) structures["color"].append( d["structures"][0]["structure"]["color"]) structures["parent"].append( d["structures"][0]["structure"]["parent"]) structures["children"].append( d["structures"][0]["structure"]["children"]) self.structures = pd.DataFrame(structures)
def get_structures_reconstructions(self, species, sex): iid = self.get_brain_id_from_species_name(species) data = request( f"{self._base_url}/{self._url_paths['brain_info']}{iid}").json() if not len(data): print("Could not get any data for this brain") return None reconstructions = [r['viewer_files'] for r in data['reconstructions']] if not reconstructions: print(f"No data was found for {species}") return n_elems = [len(rec) for rec in reconstructions] if sex is None: try: sex = np.argmax(n_elems) except: raise ValueError("No data retrieved") if not n_elems[sex]: raise ValueError(f"No reconstructions found for {sex} {species}") else: reconstruction = reconstructions[sex] # Get data about the brain regions structures = dict(name=[], acronym=[], color=[], obj_path=[], hemisphere=[], parent=[], children=[]) for d in reconstruction: if 'structures' not in d.keys(): continue if not d['structures']: continue hemi = d['structures'][0]['hemisphere'] if hemi is None: hemi = "both" if hemi == 'right': name = d['structures'][0]['structure']['name'] + "_R" elif hemi == 'left': name = d['structures'][0]['structure']['name'] + "_L" else: name = d['structures'][0]['structure']['name'] abbr = d['structures'][0]['structure']['abbreviation'] acro = abbr + d['p_file']['file_name'].split(abbr)[-1].split( ".")[0] if '_left' in acro: acro = acro.split('_left')[0] + '_left' elif '_right' in acro: acro = acro.split('_right')[0] + '_right' structures['obj_path'].append(d['p_file']['path']) structures['hemisphere'].append(hemi.lower()) structures['name'].append(name) structures['acronym'].append(acro) structures['color'].append( d['structures'][0]['structure']['color']) structures['parent'].append( d['structures'][0]['structure']['parent']) structures['children'].append( d['structures'][0]['structure']['children']) self.structures = pd.DataFrame(structures)
def get_all_genes(self): """ Download metadata about all the genes available in the Allen gene expression dataset """ res = request(self.all_genes_url) return pd.DataFrame(res.json()["msg"])