예제 #1
0
 def _clear_arrays(self) -> None:
     """Clear out local stat values"""
     try:
         tiledb.ls(self.__tiledb_group_name,
                   lambda tdb_obj, tdb_type: tiledb.remove(tdb_obj))
     except TileDBError:
         self._logger.debug("No TileDB group to clear out.")
예제 #2
0
 def ls(self):
     paths = []
     tiledb.ls(self.ctx, self.root, lambda p, l: paths.append(p))
     if self.root.startswith("s3://"):
         results = [p[len(self.root):-1] for p in paths]
     else:
         results = [os.path.basename(p) for p in paths]
     return results
예제 #3
0
    def _lsuri(uri, tiledb_ctx):
        def _cleanpath(p):
            if p[-1] == "/":
                return p[:-1]
            else:
                return p

        result = []
        tiledb.ls(uri, lambda path, type: result.append((_cleanpath(path), type)), ctx=tiledb_ctx)
        return result
예제 #4
0
def main():
    ctx = tiledb.Ctx()

    def callback(obj_path, obj_type):
        print("{0!r} {1!r}".format(obj_path, obj_type))

    print("List children: ")
    tiledb.ls(ctx, "my_group", callback)

    print("\nPreorder traversal: ")
    tiledb.walk(ctx, "my_group", callback, order="preorder")

    print("\nPostorder traversal: ")
    tiledb.walk(ctx, "my_group", callback, order='postorder')
 def tdb_dir_contents(self, dir):
     contents = []
     tiledb.ls(self.array_path.basename, lambda obj_path, _: contents.append(obj_path),
               ctx=self.ctx)
     return contents
예제 #6
0
    def get_schema(self, filesystem, path):
        # path is to directory
        schema_dict = {"version": "1.0.0"}
        schema_dict["markers"] = []

        with tiledb.Array(os.path.join(path, "X"), mode="r") as array:
            schema_dict["shape"] = array.shape
        annotations = {}
        for ax in ["obs"]:
            with tiledb.open(os.path.join(path, ax), mode="r") as array:
                schema_hints = (json.loads(array.meta["cxg_schema"])
                                if "cxg_schema" in array.meta else {})
                if type(schema_hints) is not dict:
                    raise TypeError("Array schema was malformed.")
                cols = []
                for attr in array.schema:
                    schema = dict(name=attr.name, writable=False)
                    type_hint = schema_hints.get(attr.name, {})
                    # type hints take precedence
                    if "type" in type_hint:
                        schema["type"] = type_hint["type"]
                        if schema[
                                "type"] == "categorical" and "categories" in type_hint:
                            schema["categories"] = type_hint["categories"]
                    # else:
                    #     schema.update(get_schema_type_hint_from_dtype(attr.dtype))
                    cols.append(schema)

                annotations[ax] = dict(columns=cols)

                if "index" in schema_hints:
                    annotations[ax].update({"index": schema_hints["index"]})
        obs = []
        obs_cat = []
        category_order = {}
        for c in annotations["obs"]["columns"]:
            if c["name"] == "name_0":  # index
                continue
            if "type" in c and c["type"] == "categorical":
                obs_cat.append(c["name"])
                category_order[c["name"]] = c["categories"]
            else:
                obs.append(c["name"])
        schema_dict["obsIndex"] = annotations["obs"].get("index", "name_0")
        schema_dict["obs"] = obs
        schema_dict["obsCat"] = obs_cat
        schema_dict["categoryOrder"] = category_order

        # with tiledb.Array(os.path.join(path, 'cxg_group_metadata'), mode="r") as gmd:
        #     # cxg_version = gmd.meta["cxg_version"]
        #     # # version 0.1 used a malformed/shorthand semver string.
        #     # if cxg_version == "0.1" or cxg_version == "0.2.0":
        #     #     cxg_properties = json.loads(gmd.meta["cxg_properties"])
        #     colors = json.loads(gmd.meta["cxg_category_colors"]) if "cxg_category_colors" in gmd.meta else dict()
        embeddings_path_type = []
        tiledb.ls(os.path.join(path, "emb"),
                  lambda path, type: embeddings_path_type.append((path, type)))
        embeddings = []
        schema_dict["embeddings"] = embeddings
        for path_type in embeddings_path_type:
            if path_type[1] == "array":
                with tiledb.open(path_type[0], mode="r") as array:
                    name = os.path.basename(path_type[0])
                    dimensions = array.ndim
                    if dimensions > 2:
                        embeddings.append({"name": name, "dimensions": 3})
                        embeddings.append({"name": name, "dimensions": 2})
                    elif dimensions == 2:
                        embeddings.append({"name": name, "dimensions": 2})

        with tiledb.open(os.path.join(path, "var"), mode="r") as array:
            schema_dict["var"] = pd.Index(
                array.query(attrs=["name_0"])[:]["name_0"])
        return schema_dict