def load( anndata_name_original="mouse_retina.h5ad", split="train", cache_dir="data_cache", cache=True, ): """ Load requested split of mouse data, where the whole dataset. Looks for a local cache of the original data, and creates it in cache_dir if not there and cache=True. Then Looks for local cache of the requested split, and if it can't find that, makes a split on the fly. If cache=True, caches the result in cache_dir for next time.""" original_fpath = os.path.join(cache_dir, anndata_name_original) if not os.path.exists(original_fpath): _create_anndata(anndata_name_out=anndata_name_original, cache_dir=cache_dir) original_fname = os.path.basename(original_fpath) original_bname, original_ext = os.path.splitext(original_fname) target_fname = "{0}_{1}{2}".format(original_bname, split, original_ext) target_fpath = os.path.join(cache_dir, target_fname) if not os.path.exists(target_fpath): adata_in = sc.read_h5ad(original_fpath) split_inds, split_adata = split_anndata(adata_in) if cache: write_splits( split_inds_dict=split_inds, split_adata_dict=split_adata, basename=original_bname, out_dir=cache_dir, ) return sc.read_h5ad(target_fpath)
def load( split="train", original_fpath="/allen/aics/modeling/data/scRNAseq_SeeligCollaboration/data_for_modeling/scrnaseq_cardio_20181129.h5ad", cache_dir="data_cache", cache=True, selected_genes_path=None, threshold=0, ): """ Load requested split of cardio data, where the whole dataset originated at original_fpath. Looks for local cache of split, and if it can't find that, makes a split on the fly. If cache=True, caches the result in cache_dir for next time. Loads raw count values. """ original_fname = os.path.basename(original_fpath) original_bname, original_ext = os.path.splitext(original_fname) target_fname = "{0}_{1}{2}".format(original_bname, split, original_ext) target_fpath = os.path.join(cache_dir, target_fname) if not os.path.exists(target_fpath): adata_in = sc.read_h5ad(original_fpath) adata_raw = sc.AnnData( X=adata_in.raw.X.todense(), obs=adata_in.obs, var=adata_in.var, uns=adata_in.uns, ) split_inds, split_adata = split_anndata(adata_raw) if cache: write_splits( split_inds_dict=split_inds, split_adata_dict=split_adata, basename=original_bname, out_dir=cache_dir, ) adata = sc.read_h5ad(target_fpath) if selected_genes_path is not None: df = pd.read_csv(selected_genes_path, delimiter="\t") coding_genes = df["Gene name"].unique() coding_genes = [str(g) + "_HUMAN" for g in coding_genes] cols = np.array([c for c in adata.var.index if c in coding_genes]) adata = adata[:, cols] gene_nz_freq = (adata.X > 0).mean(axis=0) adata = adata[:, cols[gene_nz_freq > threshold]] return adata
def preprocess_h5ad_data(raw_input_path, processed_path, scaling_option="log_min_max", sig_genes=None): """ Preprocess raw input data for the model :param raw_input_path: :param scaling_option: :param group_small: :param signature_genes: :return: """ print("Pre-processing raw data ...") raw_input = sc.read_h5ad(raw_input_path) print("Subsetting genes ...") # Select features go use raw_input = raw_input[:, sig_genes] print("Scaling using " + str(scaling_option)) # Scaling raw_input.X = sample_scaling(raw_input.X, scaling_option) print("Writing to disk ...") # Write processed data to disk raw_input.write(processed_path) print("Data pre-processing done.")
def load_h5ad_file(self, input_path, batch_size, datasets=[]): """ Load input data from a h5ad file and divide into training and test set :param input_path: path to h5ad file :param batch_size: batch size to use for training :param datasets: a list of datasets to extract from the file :return: Dataset object """ raw_input = sc.read_h5ad(input_path) # Subset dataset if len(datasets) > 0: all_ds = collections.Counter(raw_input.obs['ds']) for ds in all_ds: if ds not in datasets: raw_input = raw_input[raw_input.obs['ds'] != ds].copy() # Create training dataset ratios = [raw_input.obs[ctype] for ctype in raw_input.uns['cell_types']] self.x_data = raw_input.X.astype(np.float32) self.y_data = np.array(ratios, dtype=np.float32).transpose() # create placeholders self.x_data_ph = tf.compat.v1.placeholder(self.x_data.dtype, self.x_data.shape, name="x_data_ph") self.y_data_ph = tf.compat.v1.placeholder(self.y_data.dtype, self.y_data.shape, name="y_data_ph") self.data = tf.compat.v1.data.Dataset.from_tensor_slices((self.x_data_ph, self.y_data_ph)) self.data = self.data.shuffle(1000).repeat().batch(batch_size=batch_size) # Extract celltype and feature info self.labels = raw_input.uns['cell_types'] self.sig_genes = list(raw_input.var_names)
def verify_anndata(matrix_path, test_yaml_path): expected_values = yaml.load(open(test_yaml_path))['expected_output'] output_matrix = sc.read_h5ad(matrix_path).X.T assert numpy.count_nonzero( output_matrix) == expected_values["non_zero_count"] assert numpy.sum(output_matrix) == expected_values["sum"] assert tuple(output_matrix.shape) == tuple(expected_values["shape"])
def load( loc="data_files", blocksize=1000000, anndata_write=True, anndata_name="mouse_retina.h5ad", X_dtype=np.float32, ): adata_fpath = os.path.join(loc, anndata_name) # if we've already down;loaded and constructed the adata file, read it and use it if os.path.exists(adata_fpath) and os.path.isfile(adata_fpath): print("reading saved anndata h5ad file") adata = sc.read_h5ad(adata_fpath) # if anndata doesn't exit alread, download inputs and construct it else: # download files if they don't exist locally if not os.path.exists(loc): os.makedirs(loc) files = { "10x_mouse_retina_development.mtx": "https://www.dropbox.com/s/6d76z4grcnaxgcg/10x_mouse_retina_development.mtx?dl=1", "10x_mouse_retina_development_phenotype.csv": "https://www.dropbox.com/s/y5lho9ifzoktjcs/10x_mouse_retina_development_phenotype.csv?dl=1", "10x_mouse_retina_development_feature.csv": "https://www.dropbox.com/s/1mc4geu3hixrxhj/10x_mouse_retina_development_feature.csv?dl=1", } print("downloading data files") for fname, url in files.items(): if not os.path.exists(os.path.join(loc, fname)): download_file(url, loc=loc, blocksize=blocksize) # read in data print("reading data files") df_obs = pd.read_csv( os.path.join(loc, "10x_mouse_retina_development_phenotype.csv"), index_col=0 )[["barcode", "sample", "age", "CellType"]] df_var = pd.read_csv( os.path.join(loc, "10x_mouse_retina_development_feature.csv"), index_col=0 )[["id", "gene_short_name"]] count_mat = mmread(os.path.join(loc, "10x_mouse_retina_development.mtx")) # make anndata object print("constructing anndata object") adata = sc.AnnData( X=count_mat.toarray().astype(X_dtype).transpose(), obs=df_obs, var=df_var ) genes_to_keep = np.mean(adata.X != 0, axis=0) > 0 cells_to_keep = np.mean(adata.X != 0, axis=1) > 0 adata = adata[:, genes_to_keep][cells_to_keep, :].copy() # save a local copy if anndata_write: print("saving annndata h5ad file") adata.write(adata_fpath) return adata
def __init__(self, download=True, dir_path='.', tabula_muris_senis=False): ''' download: if True, data will be downloaded automatically and saved in dir_path, otherwise data will be read from dir_path dir_path: path to directory where data is stored (if already downloaded), or where the data should be saved tabula_muris_senis: if False generator for Tabula Muris data only will be created, otherwise for Tabula Muris Senis ''' if download: self.download_data(dir_path) self.adata = sc.read_h5ad(os.path.join(dir_path, 'tms-facs-mars.h5ad')) self.preprocess() if not tabula_muris_senis: self.adata = self.adata[self.adata.obs['age'] == '3m']
def processing(data_path, training_data, processed_path): """ Process a training dataset to contain only the genes also available in the prediction data :param data_path: path to prediction data :param training_data: path to training data (h5ad file) :param processed_path: name of processed file :return: """ # Get the common genes (signature genes) raw_input = sc.read_h5ad(training_data) sig_genes_complete = list(raw_input.var_names) sig_genes = get_signature_genes(input_path=data_path, sig_genes_complete=sig_genes_complete) # Pre-process data with new signature genes preprocess_h5ad_data(raw_input_path=training_data, processed_path=processed_path, sig_genes=sig_genes)
def read_anndata(input, genome=None): _, input_ext = splitext(input) if input_ext == ".h5": if not genome: keys = list(File(input, "r").keys()) if len(keys) == 1: genome = keys[0] else: raise Exception( "Set --genome flag when converting from 10x HDF5 (.h5) to Anndata HDF5 (.h5ad); top-level groups in file %s: %s" % (input, ",".join(keys))) return read_10x_h5(input, genome=genome) elif input_ext == ".h5ad": return read_h5ad(input) elif input_ext == ".loom": # reads the whole dataset in memory! return read_loom(input) else: raise Exception("Unrecognized input extension: %s" % input_ext)
def load_data(data): if isfile(data): name, extension = splitext(data) if extension == ".h5ad": adata = sc.read_h5ad(data) elif extension == ".loom": adata = sc.read_loom(data) else: raise click.FileError( data, hint="does not have a valid extension [.h5ad | .loom]") elif isdir(data): if not data.endswith(sep): data += sep adata = sc.read_10x_mtx(data) else: raise click.FileError(data, hint="not a valid file or path") if not set_obs_names == "": if set_obs_names not in adata.obs_keys(): raise click.UsageError( f"obs {set_obs_names} not found, options are: {adata.obs_keys()}" ) adata.obs_names = adata.obs[set_obs_names] if not set_var_names == "": if set_var_names not in adata.var_keys(): raise click.UsageError( f"var {set_var_names} not found, options are: {adata.var_keys()}" ) adata.var_names = adata.var[set_var_names] if make_obs_names_unique: adata.obs_names_make_unique() if make_var_names_unique: adata.var_names_make_unique() if not adata._obs.index.is_unique: click.echo("Warning: obs index is not unique") if not adata._var.index.is_unique: click.echo("Warning: var index is not unique") return adata
import scanpy.api as sc from umap import UMAP import scanorama import sys script_path = os.path.dirname(os.path.realpath(__file__)) output_dir = os.path.join(script_path, '../../Figures') + '/' adata_scv_pru = sc.read_h5ad(output_dir + '../Data/pru/adata_sc_velocyto.h5ad') adata_scv_me49 = sc.read_h5ad(output_dir + '../Data/011_me49/adata_sc_velocyto.h5ad') adatas = [adata_scv_me49.copy(), adata_scv_pru.copy()] integrated, corrected = scanorama.correct_scanpy(adatas, return_dimred=True) merged_x = np.concatenate(integrated) umap_merged_x = UMAP(n_components=2, random_state=4, min_dist=0.3, n_neighbors=50).fit_transform(merged_x) adatas = corrected[0].concatenate(corrected[1]) adatas.obs_names = [x.split('-')[0] for x in adatas.obs_names] adatas.obsm['X_corrected'] = merged_x adatas.obsm['X_corrected_umap'] = umap_merged_x adatas.layers['original_mat'] = sp.sparse.csr_matrix( np.concatenate([adata_scv_me49.X.A, adata_scv_pru.X.A])) batch = ['ME49' if '10099011' in x else 'Pru' for x in adatas.obs_names] adatas.obs['batch'] = batch ## Save scanorama results adatas.write_h5ad(filename=output_dir + '../Data/pru/adata_integrated_0506_me49.h5ad', compression='gzip')
def cli(dataset, engine, format, layout, recipe, output, sparse, plotting): """ Hi! This is a tool for preprocessing data for use with cellxgene. """ import matplotlib matplotlib.use('Agg') import scanpy.api as sc import pandas as pd import numpy as np # scanpy settings sc.settings.verbosity = 2 sc.settings.autosave = True # data loading adata = None if format == 'h5ad': adata = sc.read_h5ad(dataset) if format == '10x_mtx': adata = sc.read_10x_mtx(dataset) if format == 'loom' and sparse: adata = sc.read_loom(dataset, sparse=True) if format == 'loom' and not sparse: adata = sc.read_loom(dataset, sparse=False) adata.var_names_make_unique() # run a recipe if requested if recipe == 'seurat': sc.pp.recipe_seurat(adata) elif recipe == 'zheng17': sc.pp.recipe_zheng17(adata) else: sc.pp.filter_cells(adata, min_genes=5) sc.pp.filter_genes(adata, min_cells=25) if sparse: sc.pp.scale(adata, zero_center=False) else: sc.pp.scale(adata) # dimensionality reduction if sparse: sc.pp.pca(adata, svd_solver='arpack', zero_center=False) else: sc.pp.pca(adata, svd_solver='arpack') # neighbors and clustering sc.pp.neighbors(adata) sc.tl.louvain(adata) # layout and plotting if len(np.unique(adata.obs['louvain'].values)) < 10: palette = 'tab10' else: palette = 'tab20' if layout == 'umap' or layout == 'umap+tsne': sc.tl.umap(adata) if plotting: sc.pl.umap(adata, color='louvain', palette=palette, save='_louvain') if layout == 'tsne' or layout == 'umap+tsne': sc.tl.tsne(adata) if plotting: sc.pl.tsne(adata, color='louvain', palette=palette, save='_louvain') # show the structure print('data structure...') print(adata) # saving file if not output == '': print('saving output...') adata.write(output)
4689, 4708, 4722, 4730, 4739, 4747, 4749, 4800 ]) / 4800.0 cov_milestones = [ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40 ] dmx_perturb = 0 cov_round = reads_per_singlet / 1000 for i in range(len(cov_milestones)): if i == 0: if cov_round <= cov_milestones[i]: dmx_perturb = demux_perturbation[i] elif i > 0 and i <= len(cov_milestones) - 1: if cov_milestones[i - 1] < cov_round <= cov_milestones[i]: dmx_perturb = demux_perturbation[i] data = api.read_h5ad( "GROUND_TRUTH_CT_DMX/IGOR_120-individuals_3000-cells.h5ad") matrix_ = data.X observations_ = data.obs observations_.index = observations_.index.astype(int) genes = list(data.var["gene"]) individuals = list(observations_["ind_cov"].unique()) print(Counter(observations_["ind_cov"])) # permute the individuals labels due to multiplexing performance at difference levels of coverage ind_minus = {} for x in individuals: ind_minus[x] = [y for y in individuals if y != x] if dmx_perturb > 0:
def import_data(data_p1, data_p2, create = True): """ Utility funciton to import both samples together with proteins. Parameters -------- data_p1, data_p2: str data paths Output -------- adata: AnnData Object """ if not create: path = '../data_update_cite_seq/tec_cite_h5_2019jun18' return sc.read_h5ad(path + '/adata.h5') adata_list = [] for d in [data_p1, data_p2]: p_genes = d + 'filtered_feature_bc_matrix/' features = pd.read_csv(p_genes + 'features.tsv', delimiter='\t', header=None)[1] barcodes = pd.read_csv(p_genes + 'barcodes.tsv', delimiter='\t', header=None) matrix = scipy.io.mmread(p_genes + 'matrix.mtx') adata = anndata.AnnData(matrix.tocsr()) # compared execution time with anndata.read_mtx and this is faster adata.obs.index = features.values.tolist() adata.var.index = barcodes[0].str.slice(stop=-2).values.tolist() # index for proteins #adata.var.index = barcodes[0].values.tolist() # index for proteins adata_list.append(adata) adata_r1 = adata_list[0].T adata_r2 = adata_list[1].T adata_r1.var_names_make_unique() adata_r2.var_names_make_unique() # Proteomic Data prot_list = [] for d in [data_p1, data_p2]: p_prot = d + 'umi_count/' features = pd.read_csv(p_prot + 'features.tsv', delimiter='\t', header=None)[0].values.tolist() features = [f[:f.find('-')] for f in features] features[-1] = features[-1] + 'd' barcodes = pd.read_csv(p_prot + 'barcodes.tsv', delimiter='\t', header=None) matrix = scipy.io.mmread(p_prot + 'matrix.mtx') prot = pd.DataFrame(data = matrix.todense(), index = features, columns = barcodes[0].values) prot_list.append(prot) prot_r1 = prot_list[0].T prot_r2 = prot_list[1].T protein_names = list(prot_r1.columns) # bring the adata object in the right order adata_r1 = adata_r1[prot_r1.index] adata_r2 = adata_r2[prot_r2.index] # combine these again adata = adata_r1.concatenate(adata_r2) # make names unique adata_r1.obs_names_make_unique() adata_r2.obs_names_make_unique() # combine prot = pd.concat((prot_r1, prot_r2), axis=0) # add the proteins to the adata object adata.obsm['prot'] = prot.values # add the protein names adata.uns['prot_names'] = protein_names # Add some annotations # mitochondrial genes mito_genes = [name for name in adata.var_names if name.startswith('mt-')] adata.obs['percent_mito'] = np.sum( adata[:, mito_genes].X, axis=1) / np.sum(adata.X, axis=1) adata.obs['n_counts_0'] = scipy.sparse.csr_matrix.sum(adata.X, axis = 1) # Number of counts in each cell adata.obs['n_genes_0'] = scipy.sparse.csr_matrix.sum(adata.X>0, axis = 1) # Number of genes in each cell adata.var['n_cells_0'] = np.sum(adata.X>0, axis = 0).T # Number of cells where the gene is expressed adata.var['n_counts_gene'] = np.sum(adata.X, axis = 0).T # Number of counts of each gene across all cells # add some protein annotations adata.obs['n_proteins'] = np.sum(adata.obsm['prot'][:, :-1] > 0, axis = 1) adata.obs['unmapped'] = adata.obsm['prot'][:, -1] #write h5 file adata.write('../data_update_cite_seq/tec_cite_h5_2019jun18/adata.h5') return adata
import numpy as np from collections import Counter from scanpy import api import pandas as pd from scipy.sparse import csr_matrix, vstack import time import pickle exname = sys.argv[1] number_individuals = int(sys.argv[2]) singlets_per_individual = int(sys.argv[3]) multiplets_per_individual = int(sys.argv[4]) reads_per_singlet = int(sys.argv[5]) reads_per_doublet = int(sys.argv[6]) data = api.read_h5ad("IGOR_120-individuals_3000-cells.h5ad") matrix_ = data.X observations_ = data.obs observations_.index = observations_.index.astype(int) genes = list(data.var["gene"]) individuals = list(observations_["ind_cov"].unique()) if number_individuals < len(individuals): individuals = np.random.choice(list(observations_["ind_cov"].unique()), size=number_individuals, replace=False) good_rows = [] doublets = {} for individual in individuals:
def merge_anndatas(anndata_paths, output_path): first_adata = sc.read_h5ad(anndata_paths[0]) concat_adata = first_adata.concatenate( sc.read_h5ad(a) for a in anndata_paths[1:]) concat_adata.write(output_path)
#!/usr/bin/env python # coding: utf-8 # import import os import sys import scanpy.api as sc from anndata import AnnData sample_name = sys.argv[1] cluster_to_filter = sys.argv[2] print('filtering clusters:{0} for {1} '.format(cluster_to_filter, sample_name)) wd = os.path.join(os.getcwd(), sample_name) adata = sc.read_h5ad( filename=os.path.join(wd, '{0}.adata.h5ad'.format(sample_name))) barcodes = adata.obs.index[adata.obs.leiden.isin([cluster_to_filter])] filename = os.path.join(wd, '{}.filtered.txt'.format(sample_name)) print('total {0} low quality cells will be recorded in {1}'.format( len(barcodes), filename)) with open(filename, 'w') as f: f.writelines('\n'.join(barcodes)) print('END')
def createClusterFigure(doc): active_gene = None print('Starting Document....') if doc.session_context.request.arguments is not None: args = doc.session_context.request.arguments dataPath = doc.session_context.db_path if dataPath != 'None': dataSet = sc.read_h5ad(dataPath) else: dataSet = sc.read_h5ad('/app/ProcessedData.h5ad') geneList = [(args[x][0].decode())for x in args.keys() if 'Gene' in x] if 'None' not in geneList and len(geneList) is not 0: print(geneList[0]) active_gene = geneList[0] def makePlot(doc, active_gene, adata): cdsDict = {} cdsDict['x'] = adata.obsm['X_umap'][:, 0] cdsDict['y'] = adata.obsm['X_umap'][:, 1] single_gene_colors = [] # Color by Cluster color_Dict = dict(zip(adata.obs['louvain'].cat.categories, adata.uns['louvain_colors'])) colors = [ color_Dict[cluster] for cluster in adata.obs['louvain'] if cluster in color_Dict.keys() ] cdsDict['color'] = colors # Color by n_genes gene_colors = [] for x, y, z, _ in 255 * mpl.cm.viridis(mpl.colors.Normalize()(adata.obs['n_genes'].tolist())): gene_colors.append("#%02x%02x%02x" % (int(x), int(y), int(z))) cdsDict['gene_colors'] = gene_colors if active_gene is not None: # Sort matrix by gene columnthen normalize the count values geneExpression = adata.X[:, adata.var.index == active_gene].flatten() single_gene_colors = [] for x, y, z, _ in 255 * mpl.cm.viridis(mpl.colors.Normalize()(geneExpression)): single_gene_colors.append("#%02x%02x%02x" % (int(x), int(y), int(z))) if len(single_gene_colors) == 2638: cdsDict['single_gene'] = single_gene_colors source = ColumnDataSource(cdsDict) # source = ColumnDataSource(dict( x=adata.obsm['X_umap'][:, 0], y=adata.obsm['X_umap'][:, 1], color=colors, gene_colors=gene_colors, single_gene=single_gene_colors)) title = 'T-SNE visualization of sequences' geneTitle = 'n_genes' plotDict = {} plot_lda = figure(plot_width=800, plot_height=600, title=title, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave", x_axis_type=None, y_axis_type=None, min_border=1) plot_lda.scatter(x='x', y='y', legend='label', source=source, color='color', alpha=0.8, size=5) plotDict['tsne'] = plot_lda genePlot = figure(plot_width=800, plot_height=600, title=geneTitle, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave", x_axis_type=None, y_axis_type=None, min_border=1) genePlot.scatter(x='x', y='y', legend='label', source=source, color='gene_colors', alpha=0.8, size=5) plotDict['nGene'] = genePlot if 'single_gene' in cdsDict.keys(): singleGene = figure(plot_width=800, plot_height=600, title=active_gene, tools="pan,wheel_zoom,box_zoom,reset,hover,previewsave", x_axis_type=None, y_axis_type=None, min_border=1) singleGene.scatter(x='x', y='y', legend='label', source=source, color='single_gene', alpha=0.8, size=5) plotDict['sGene'] = singleGene return plotDict def update(new): active_gene = geneList[new] sgCol.children[0] = makePlot(doc, active_gene, dataSet)['sGene'] plotDict = makePlot(doc, active_gene, dataSet) # hover tools hover = plotDict['tsne'].select(dict(type=HoverTool)) hover.tooltips = {"content": "Sequence: @seq, CCS: @ccs, Charge: @charge "} plotDict['tsne'].legend.location = "top_left" button_group = RadioButtonGroup(labels=geneList) button_group.on_click(update) tabList = [] if 'sGene' in plotDict.keys(): controls = widgetbox([button_group], width=800) sgCol = column(plotDict['sGene'], controls) sgTab = Panel(child=sgCol, title="Single Gene") tabList.append(sgTab) tsneTab = Panel(child=plotDict['tsne'], title="Louvain") tabList.append(tsneTab) nGeneTab = Panel(child=plotDict['nGene'], title="nGene") tabList.append(nGeneTab) tabs = Tabs(tabs=tabList) doc.add_root(tabs) return doc
import numpy as np import matplotlib.pyplot as plt import seaborn as sns import scanpy.api as sc PARENT_DIR = os.path.join(sys.path[0], '..') dataset = sys.argv[1] data_dir = "{}/results/downstream/{}".format(PARENT_DIR, dataset) imputation = [ "deepImpute", "DCA", "VIPER", "MAGIC", "SAVER", "scImpute", "DrImpute", "raw" ] raw = sc.read_h5ad('{}/paper_data/downstream/raw_{}.h5ad'.format( PARENT_DIR, dataset)) cells = raw.obs.index genes = raw.var.index metadata = raw.obs.celltype.values components_all = [] colnames = ["UMAP_1", "UMAP_2"] for method in imputation: try: data = np.load("{}/results/downstream/UMAP/{}/{}.npy".format( PARENT_DIR, dataset, method)) df = pd.DataFrame(data, index=cells, columns=colnames) df["meta"] = metadata df["imputation"] = method components_all.append(df) except:
def asScanpy(self): adata = sc.read_h5ad("./rdata/h5/assays.h5") return adata
import scanpy.api as sc import scipy.sparse as sp_sparse # andata = sc.read_h5ad("./ExprMatrix.h5ad") andata = sc.read_h5ad("./100_test_data.h5ad") print("Finished reading.") andata.var_names_make_unique() if sp_sparse.issparse(andata.X): andata.X = andata.X.toarray() # andata = andata partial_data = andata[:100, :] print("Finished processing") sc.write("100_test_data.h5ad", partial_data) print("Finished writing.")