def test_de_backed(sparse, file_format, tmp_path): fs = fsspec.filesystem('file') adata = get_example_data(sparse) output_dir = str(tmp_path) prepare_data = PrepareData(datasets=[adata], output=output_dir, output_format=file_format) prepare_data.execute() if file_format == 'parquet': reader = ParquetDataset() elif file_format == 'zarr': reader = ZarrDataset() batch_size = 30 obs_field = 'sc_groups' nfeatures = adata.shape[1] def get_batch_fn(i): end = min(nfeatures, i + batch_size) return reader.read_dataset(filesystem=fs, path=output_dir, dataset=dict(id=''), keys=dict(X=[slice(i, end)])) results = DE(series=adata.obs[obs_field], nfeatures=nfeatures, batch_size=batch_size, get_batch_fn=get_batch_fn, base=get_base(adata), one_vs_rest=True) diff_results(adata, obs_field, results.pair2results[0])
def test_de_2_groups(sparse): adata = get_example_data(sparse) batch_size = 3 obs_field = 'sc_groups' nfeatures = adata.shape[1] get_batch_fn = lambda i: adata[:, i:min(nfeatures, i + batch_size)] results = DE(series=adata.obs[obs_field], nfeatures=nfeatures, batch_size=batch_size, get_batch_fn=get_batch_fn, base=get_base(adata), one_vs_rest=True) diff_results(adata, obs_field, results.pair2results[0])
def test_de_4_groups(sparse): adata1 = get_example_data(sparse) adata2 = get_example_data(sparse) adata2.obs['sc_groups'] = adata2.obs['sc_groups'].replace({0: 2, 1: 3}) adata = anndata.concat((adata1, adata2)) adata.obs_names_make_unique() batch_size = 3 obs_field = 'sc_groups' adata.obs[obs_field] = adata.obs[obs_field].astype('category') nfeatures = adata.shape[1] get_batch_fn = lambda i: adata[:, i:min(nfeatures, i + batch_size)] de = DE(series=adata.obs[obs_field], nfeatures=nfeatures, batch_size=batch_size, get_batch_fn=get_batch_fn, base=get_base(adata)) for i in range(4): diff_results(adata, obs_field, de.pair2results[i], str(i))