Beispiel #1
0
 def test_anndata_colors_to_cxg_colors(self):
     # test standard behavior
     adata = self._get_h5ad()
     self.assertEqual(convert_anndata_category_colors_to_cxg_category_colors(adata), pbmc3k_colors)
     # test that invalid color formats raise an exception
     adata.uns["louvain_colors"][0] = "#NOTCOOL"
     with self.assertRaises(ColorFormatException):
         convert_anndata_category_colors_to_cxg_category_colors(adata)
     # test that colors without a matching obs category are skipped
     adata = self._get_h5ad()
     del adata.obs["louvain"]
     self.assertEqual(convert_anndata_category_colors_to_cxg_category_colors(adata), {})
Beispiel #2
0
    def generate_cxg_metadata(self, convert_anndata_colors_to_cxg_colors):
        """
        Return a dictionary containing metadata about CXG dataset. This include data about the version as well as
        Corpora schema properties if they exist, among other pieces of metadata.
        """

        cxg_group_metadata = {
            "cxg_version":
            CxgConstants.CXG_VERSION,
            "cxg_properties":
            json.dumps({
                "title": self.dataset_title,
                "about": self.dataset_about
            }),
        }
        if self.corpora_properties is not None:
            cxg_group_metadata["corpora"] = json.dumps(self.corpora_properties)

        if convert_anndata_colors_to_cxg_colors:
            try:
                cxg_group_metadata["cxg_category_colors"] = json.dumps(
                    convert_anndata_category_colors_to_cxg_category_colors(
                        self.anndata))
            except ColorFormatException:
                logging.warning(
                    "Failed to extract colors from H5AD file! Fix the H5AD file or rerun with "
                    "--disable-custom-colors. See help for more details.")

        return cxg_group_metadata
Beispiel #3
0
def write_cxg(
    adata, container, title, var_names=None, obs_names=None, about=None, extract_colors=False, sparse_threshold=5.0
):
    if not adata.var.index.is_unique:
        raise ValueError("Variable index is not unique - unable to convert.")
    if not adata.obs.index.is_unique:
        raise ValueError("Observation index is not unique - unable to convert.")

    """
    TileDB bug TileDB-Inc/TileDB#1575 requires that we sanitize all column names
    prior to saving.  This can be reverted when the bug is fixed.
    """
    log(0, "Warning: sanitizing all dataframe column names.")
    clean_all_column_names(adata)

    ctx = tiledb.Ctx(
        {
            "sm.num_reader_threads": 32,
            "sm.num_writer_threads": 32,
            "sm.consolidation.buffer_size": 1 * 1024 * 1024 * 1024,
        }
    )

    tiledb.group_create(container, ctx=ctx)
    log(1, f"\t...group created, with name {container}")

    # dataset metadata
    metadata_dict = dict(cxg_version=CXG_VERSION, cxg_properties=json.dumps({"title": title, "about": about}))
    if extract_colors:
        try:
            metadata_dict["cxg_category_colors"] = json.dumps(
                convert_anndata_category_colors_to_cxg_category_colors(adata)
            )
        except ColorFormatException:
            log(
                0,
                "Warning: failed to extract colors from h5ad file! "
                "Fix the h5ad file or rerun with --disable-custom-colors. See help for details.",
            )
    save_metadata(container, metadata_dict)
    log(1, "\t...dataset metadata saved")

    # var/gene dataframe
    save_dataframe(container, "var", adata.var, var_names, ctx=ctx)
    log(1, "\t...var dataframe created")

    # obs/cell dataframe
    save_dataframe(container, "obs", adata.obs, obs_names, ctx=ctx)
    log(1, "\t...obs dataframe created")

    # embeddings
    e_container = f"{container}/emb"
    tiledb.group_create(e_container, ctx=ctx)
    save_embeddings(e_container, adata, ctx)
    log(1, "\t...embeddings created")

    # X matrix
    save_X(container, adata.X, ctx, sparse_threshold)
    log(1, "\t...X created")
Beispiel #4
0
 def get_colors(self):
     return convert_anndata_category_colors_to_cxg_category_colors(
         self.data)