def test_anndata_colors_to_cxg_colors(self): # test standard behavior adata = self._get_h5ad() self.assertEqual(convert_anndata_category_colors_to_cxg_category_colors(adata), pbmc3k_colors) # test that invalid color formats raise an exception adata.uns["louvain_colors"][0] = "#NOTCOOL" with self.assertRaises(ColorFormatException): convert_anndata_category_colors_to_cxg_category_colors(adata) # test that colors without a matching obs category are skipped adata = self._get_h5ad() del adata.obs["louvain"] self.assertEqual(convert_anndata_category_colors_to_cxg_category_colors(adata), {})
def generate_cxg_metadata(self, convert_anndata_colors_to_cxg_colors): """ Return a dictionary containing metadata about CXG dataset. This include data about the version as well as Corpora schema properties if they exist, among other pieces of metadata. """ cxg_group_metadata = { "cxg_version": CxgConstants.CXG_VERSION, "cxg_properties": json.dumps({ "title": self.dataset_title, "about": self.dataset_about }), } if self.corpora_properties is not None: cxg_group_metadata["corpora"] = json.dumps(self.corpora_properties) if convert_anndata_colors_to_cxg_colors: try: cxg_group_metadata["cxg_category_colors"] = json.dumps( convert_anndata_category_colors_to_cxg_category_colors( self.anndata)) except ColorFormatException: logging.warning( "Failed to extract colors from H5AD file! Fix the H5AD file or rerun with " "--disable-custom-colors. See help for more details.") return cxg_group_metadata
def write_cxg( adata, container, title, var_names=None, obs_names=None, about=None, extract_colors=False, sparse_threshold=5.0 ): if not adata.var.index.is_unique: raise ValueError("Variable index is not unique - unable to convert.") if not adata.obs.index.is_unique: raise ValueError("Observation index is not unique - unable to convert.") """ TileDB bug TileDB-Inc/TileDB#1575 requires that we sanitize all column names prior to saving. This can be reverted when the bug is fixed. """ log(0, "Warning: sanitizing all dataframe column names.") clean_all_column_names(adata) ctx = tiledb.Ctx( { "sm.num_reader_threads": 32, "sm.num_writer_threads": 32, "sm.consolidation.buffer_size": 1 * 1024 * 1024 * 1024, } ) tiledb.group_create(container, ctx=ctx) log(1, f"\t...group created, with name {container}") # dataset metadata metadata_dict = dict(cxg_version=CXG_VERSION, cxg_properties=json.dumps({"title": title, "about": about})) if extract_colors: try: metadata_dict["cxg_category_colors"] = json.dumps( convert_anndata_category_colors_to_cxg_category_colors(adata) ) except ColorFormatException: log( 0, "Warning: failed to extract colors from h5ad file! " "Fix the h5ad file or rerun with --disable-custom-colors. See help for details.", ) save_metadata(container, metadata_dict) log(1, "\t...dataset metadata saved") # var/gene dataframe save_dataframe(container, "var", adata.var, var_names, ctx=ctx) log(1, "\t...var dataframe created") # obs/cell dataframe save_dataframe(container, "obs", adata.obs, obs_names, ctx=ctx) log(1, "\t...obs dataframe created") # embeddings e_container = f"{container}/emb" tiledb.group_create(e_container, ctx=ctx) save_embeddings(e_container, adata, ctx) log(1, "\t...embeddings created") # X matrix save_X(container, adata.X, ctx, sparse_threshold) log(1, "\t...X created")
def get_colors(self): return convert_anndata_category_colors_to_cxg_category_colors( self.data)