Esempio n. 1
0
    def get_structures_hierarchy(self):
        def add_descendants_to_tree(tree, structure, parent_id=None):
            """
                Recursively goes through all the the descendants of a region and adds them to the tree
            """
            if parent_id is not None:
                tree.create_node(
                    tag=structure["name"],
                    identifier=structure["id"],
                    parent=parent_id,
                )
            else:
                tree.create_node(
                    tag=structure["name"],
                    identifier=structure["id"],
                )

            if "children" not in structure.keys():
                return
            if structure["children"]:
                for child in structure["children"]:
                    add_descendants_to_tree(tree, child, structure["id"])

        structures_hierarchy = request(
            self._url_paths["structures_tree"]).json()

        tree = Tree()
        tree.create_node(
            tag="root",
            identifier=0,
        )
        for supercategory in structures_hierarchy:
            add_descendants_to_tree(tree, supercategory, 0)

        self.structures_hierarchy = tree
Esempio n. 2
0
    def __init__(self,
                 species=None,
                 sex=None,
                 base_dir=None,
                 make_root=True,
                 **kwargs):
        self.make_root = make_root

        Paths.__init__(self, base_dir=base_dir, **kwargs)

        # Get a list of available species
        self.species_info = pd.DataFrame(
            request(
                f"{self._base_url}/{self._url_paths['species_info']}").json())
        self.species = list(self.species_info.scientific_name.values)

        # Get selected species
        self.structures, self.region_names, self.region_acronyms = (
            None,
            None,
            None,
        )
        self.sel_species = species
        self.sex = sex
        self.get_brain(species=species, sex=sex)
Esempio n. 3
0
    def download_and_write_mesh(self, acronym, obj_path):
        print(f"Downloading mesh data for {acronym}")
        path = self.structures.loc[self.structures.acronym ==
                                   acronym].obj_path.values[0]
        url = f"{self._url_paths['data']}/{path}"

        # download and write .obj
        mesh_data = request(url).content.decode("utf-8").split("\n")
        with open(obj_path, "w") as f:
            for md in mesh_data:
                f.write(f"{md}\n")
            f.close()

        # return the vtk actor
        return load(obj_path)
Esempio n. 4
0
def download_and_cache(url, cachedir):
    """
        Given a url to download a gene's ISH experiment data, 
        this function download and unzips the data

        :param url: str, utl to download data
        :param cachedir: str, path to folder where data will be downloaded
    """
    # Get data
    req = request(url)

    # Create cache dir
    if not os.path.isdir(cachedir):
        os.mkdir(cachedir)

    # Unzip to cache dir
    z = zipfile.ZipFile(io.BytesIO(req.content))
    z.extractall(cachedir)
Esempio n. 5
0
def download_streamlines(eids, streamlines_folder=None):  # pragma: no cover
    """
        Given a list of expeirmental IDs, it downloads the streamline data from the https://neuroinformatics.nl cache and saves them as
        json files. 

        :param eids: list of integers with experiments IDs
        :param streamlines_folder: str path to the folder where the JSON files should be saved, if None the default is used (Default value = None)

    """
    streamlines_folder = Path(streamlines_folder)

    if not isinstance(eids, (list, np.ndarray, tuple)):
        eids = [eids]

    filepaths, data = [], []
    for eid in track(eids, total=len(eids), description="downloading"):
        url = make_url_given_id(eid)
        jsonpath = streamlines_folder / f"{eid}.json"
        filepaths.append(str(jsonpath))

        if not jsonpath.exists():
            response = request(url)

            # Write the response content as a temporary compressed file
            temp_path = streamlines_folder / "temp.gz"
            with open(str(temp_path), "wb") as temp:
                temp.write(response.content)

            # Open in pandas and delete temp
            url_data = pd.read_json(str(temp_path),
                                    lines=True,
                                    compression="gzip")
            temp_path.unlink()

            # save json
            url_data.to_json(str(jsonpath))

            # append to lists and return
            data.append(url_data)
        else:
            data.append(pd.read_json(str(jsonpath)))
    return filepaths, data
Esempio n. 6
0
    def download_streamlines(self, eids, streamlines_folder=None):
        """
            Given a list of expeirmental IDs, it downloads the streamline data from the https://neuroinformatics.nl cache and saves them as
            json files. 

            :param eids: list of integers with experiments IDs
            :param streamlines_folder: str path to the folder where the JSON files should be saved, if None the default is used (Default value = None)

        """
        if streamlines_folder is None:
            streamlines_folder = self.streamlines_cache

        if not isinstance(eids, (list, np.ndarray, tuple)): eids = [eids]

        filepaths, data = [], []
        for eid in tqdm(eids):
            url = self.make_url_given_id(eid)
            jsonpath = os.path.join(streamlines_folder, str(eid) + ".json")
            filepaths.append(jsonpath)
            if not os.path.isfile(jsonpath):
                response = request(url)

                # Write the response content as a temporary compressed file
                temp_path = os.path.join(streamlines_folder, "temp.gz")
                with open(temp_path, "wb") as temp:
                    temp.write(response.content)

                # Open in pandas and delete temp
                url_data = pd.read_json(temp_path,
                                        lines=True,
                                        compression='gzip')
                os.remove(temp_path)

                # save json
                url_data.to_json(jsonpath)

                # append to lists and return
                data.append(url_data)
            else:
                data.append(pd.read_json(jsonpath))
        return filepaths, data
Esempio n. 7
0
    def get_structures_hierarchy(self):
        structures_hierarchy = request(
            self._url_paths['structures_tree']).json()

        data = dict(ids=[], name=[], parent=[], children=[])

        for structure in structures_hierarchy:
            data['ids'].append(structure['id'])
            data['name'].append(structure['name'])
            data['parent'].append(None)
            data['children'].append([(c['id'], c['name'])
                                     for c in structure['children']])

            for child in structure['children']:
                data['ids'].append(child['id'])
                data['name'].append(child['name'])
                data['parent'].append(structure)
                data['children'].append([(
                    c['id'], c['name']
                ) for c in child['children']] if child['children'] else None)

        self.structures_hierarchy = pd.DataFrame(data)
Esempio n. 8
0
    def get_gene_experiments(self, gene_symbol):
        """
            Given a gene_symbol it returns the list of ISH
            experiments for this gene

            :param gene_symbol: str, self.genes.gene_symbol
        """
        if not isinstance(gene_symbol, str):
            if isinstance(gene_symbol, int):  # it's an ID, get symbol
                gene_symbol = self.get_gene_symbol_by_id(gene_symbol)
                if gene_symbol is None:
                    raise ValueError("Invalid gene_symbol argument")
            else:
                raise ValueError("Invalid gene_symbol argument")

        url = self.gene_experiments_url.replace("-GENE_SYMBOL-", gene_symbol)
        data = request(url).json()["msg"]

        if not len(data):
            print(f"No experiment found for gene {gene_symbol}")
            return None
        else:
            return [d["id"] for d in data]
Esempio n. 9
0
    def get_structures_reconstructions(self, species, sex):
        iid = self.get_brain_id_from_species_name(species)

        data = request(
            f"{self._base_url}/{self._url_paths['brain_info']}{iid}").json()
        if not len(data):
            print("Could not get any data for this brain")
            return None

        reconstructions = [r["viewer_files"] for r in data["reconstructions"]]
        if not reconstructions:
            print(f"No data was found for {species}")
            return

        n_elems = [len(rec) for rec in reconstructions]

        if sex is None:
            try:
                sex = np.argmax(n_elems)
            except:
                raise ValueError("No data retrieved")

        if not n_elems[sex]:
            raise ValueError(f"No reconstructions found for {sex} {species}")
        else:
            reconstruction = reconstructions[sex]

        # Get data about the brain regions
        structures = dict(
            name=[],
            acronym=[],
            color=[],
            obj_path=[],
            hemisphere=[],
            parent=[],
            children=[],
        )

        for d in reconstruction:
            if "structures" not in d.keys():
                continue
            if not d["structures"]:
                continue

            hemi = d["structures"][0]["hemisphere"]
            if hemi is None:
                hemi = "both"

            if hemi == "right":
                name = d["structures"][0]["structure"]["name"] + "_R"
            elif hemi == "left":
                name = d["structures"][0]["structure"]["name"] + "_L"
            else:
                name = d["structures"][0]["structure"]["name"]

            abbr = d["structures"][0]["structure"]["abbreviation"]
            acro = (abbr +
                    d["p_file"]["file_name"].split(abbr)[-1].split(".")[0])

            if "_left" in acro:
                acro = acro.split("_left")[0] + "_left"
            elif "_right" in acro:
                acro = acro.split("_right")[0] + "_right"

            structures["obj_path"].append(d["p_file"]["path"])
            structures["hemisphere"].append(hemi.lower())

            structures["name"].append(name)
            structures["acronym"].append(acro)
            structures["color"].append(
                d["structures"][0]["structure"]["color"])

            structures["parent"].append(
                d["structures"][0]["structure"]["parent"])
            structures["children"].append(
                d["structures"][0]["structure"]["children"])

        self.structures = pd.DataFrame(structures)
Esempio n. 10
0
    def get_structures_reconstructions(self, species, sex):
        iid = self.get_brain_id_from_species_name(species)

        data = request(
            f"{self._base_url}/{self._url_paths['brain_info']}{iid}").json()
        if not len(data):
            print("Could not get any data for this brain")
            return None

        reconstructions = [r['viewer_files'] for r in data['reconstructions']]
        if not reconstructions:
            print(f"No data was found for {species}")
            return

        n_elems = [len(rec) for rec in reconstructions]

        if sex is None:
            try:
                sex = np.argmax(n_elems)
            except:
                raise ValueError("No data retrieved")

        if not n_elems[sex]:
            raise ValueError(f"No reconstructions found for {sex} {species}")
        else:
            reconstruction = reconstructions[sex]

        # Get data about the brain regions
        structures = dict(name=[],
                          acronym=[],
                          color=[],
                          obj_path=[],
                          hemisphere=[],
                          parent=[],
                          children=[])

        for d in reconstruction:
            if 'structures' not in d.keys():
                continue
            if not d['structures']:
                continue

            hemi = d['structures'][0]['hemisphere']
            if hemi is None:
                hemi = "both"

            if hemi == 'right':
                name = d['structures'][0]['structure']['name'] + "_R"
            elif hemi == 'left':
                name = d['structures'][0]['structure']['name'] + "_L"
            else:
                name = d['structures'][0]['structure']['name']

            abbr = d['structures'][0]['structure']['abbreviation']
            acro = abbr + d['p_file']['file_name'].split(abbr)[-1].split(
                ".")[0]

            if '_left' in acro:
                acro = acro.split('_left')[0] + '_left'
            elif '_right' in acro:
                acro = acro.split('_right')[0] + '_right'

            structures['obj_path'].append(d['p_file']['path'])
            structures['hemisphere'].append(hemi.lower())

            structures['name'].append(name)
            structures['acronym'].append(acro)
            structures['color'].append(
                d['structures'][0]['structure']['color'])

            structures['parent'].append(
                d['structures'][0]['structure']['parent'])
            structures['children'].append(
                d['structures'][0]['structure']['children'])
        self.structures = pd.DataFrame(structures)
Esempio n. 11
0
 def get_all_genes(self):
     """
         Download metadata about all the genes available in the Allen gene expression dataset
     """
     res = request(self.all_genes_url)
     return pd.DataFrame(res.json()["msg"])