def run_moana(trainname, testname, n): DataPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/' + trainname + '.csv' matrix = ExpMatrix.read_tsv(DataPath, sep=',') LabelsPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/' + trainname + '_label.csv' truelab = pd.read_csv(LabelsPath, header=0, index_col=0, sep=',') data = ExpMatrix(X=matrix.X, genes=matrix.genes, cells=matrix.cells) data.genes.name = 'Genes' data.cells.name = 'Cells' data.index.name = 'Genes' data.columns.name = 'Cells' l = CellAnnVector(cells=data.cells, data=truelab['x'].values) now = time.time() tracemalloc.start() clf = CellTypeClassifier() clf.fit(matrix=data, cell_labels=l) snapshot = tracemalloc.take_snapshot() mem_train = display_top(snapshot) later = time.time() time_train = int(later - now) DataPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/' + testname + '.csv' matrix = ExpMatrix.read_tsv(DataPath, sep=',') data = ExpMatrix(X=matrix.X, genes=matrix.genes, cells=matrix.cells) data.genes.name = 'Genes' data.cells.name = 'Cells' data.index.name = 'Genes' data.columns.name = 'Cells' now = time.time() tracemalloc.start() predictions = clf.predict(data) snapshot = tracemalloc.take_snapshot() mem_test = display_top(snapshot) later = time.time() time_test = int(later - now) predictions = np.asarray(predictions) pred = pd.DataFrame(predictions) LabelsPath = '/albona/nobackup/biostat/datasets/singlecell/tabulaMuris_benchmark/' + testname + '_label.csv' truelab = pd.read_csv(LabelsPath, header=0, index_col=None, sep=',') os.chdir("/dora/nobackup/yuec/scclassify/benchmark/moanna/vary_train") truelab.to_csv(n + "_moana_True.csv", index=False) pred.to_csv(n + "_moana_Pred.csv", index=False) return mem_train, time_train, mem_test, time_test
def run_moana(ad, ad_ref): # ad_ref is the reference ad1 = ad_ref[:,ad_ref.var_names.isin(ad.var_names)] matrix1 = ExpMatrix(X=ad1.X.T.todense(), genes=ad1.var_names, cells=ad1.obs_names) ds = CellAnnVector(ad1.obs['cell'],cells=ad1.obs_names) clf = CellTypeClassifier() clf.fit(matrix=matrix1, cell_labels = ds) matrix = ExpMatrix(X=ad[:,ad1.var_names].X.T.todense(), genes=ad[:,ad1.var_names].var_names,\ cells=ad[:,ad1.var_names].obs_names) labs = clf.predict(matrix) return(labs.values)
def run_moana(input_dir, output_dir, datafile, labfile, Rfile): ''' Run moana NOTE: at the moment it is only possible to run moana with a pretrained classifier, using the PBMC classifier is therefore hardcoded here Parameters ---------- input_dir : directory of the input files output_dir : directory of the output files datafile : name of the data file labfile : name of the label file Rfile : file to read the cross validation indices from ''' os.chdir(input_dir) # read the Rdata file robjects.r['load'](Rfile) tokeep = np.array(robjects.r['Cells_to_Keep'], dtype='bool') matrix = ExpMatrix.read_tsv(datafile, sep=',') matrix = matrix.iloc[tokeep] data = ExpMatrix(X=np.transpose(matrix.X), genes=matrix.cells, cells=matrix.genes) data.genes.name = 'Genes' data.cells.name = 'Cells' data.index.name = 'Genes' data.columns.name = 'Cells' clf = CellTypeClassifier.read_pickle("moana_pbmc_classifier.pickle") start = tm.time() predictions = clf.predict(data) runtime = tm.time() - start np.asarray(predictions) pred = pd.DataFrame(predictions) os.chdir(output_dir) pred.to_csv("Moana_pred.csv", index=False) with open("Moana_time.csv", 'w') as f: f.write("%f\n" % runtime)
def test_sparse(tmpdir, my_matrix): """Test reading/writing of sparse text format.""" output_file = tmpdir.join('expression_matrix.mtx').strpath my_matrix.write_sparse(output_file) other = ExpMatrix.read_sparse(output_file) assert other is not my_matrix assert other == my_matrix
def test_tsv(tmpdir, my_matrix): output_file = tmpdir.join('expression_matrix.tsv').strpath my_matrix.write_tsv(output_file) # data = open(str(path), mode='rb').read() # h = hashlib.md5(data).hexdigest() # assert h == 'd34bf3d376eb613e4fea894f7c9d601f' other = ExpMatrix.read_tsv(output_file) assert other is not my_matrix assert other == my_matrix
def my_matrix(my_expression_file): return ExpMatrix.read_tsv(my_expression_file)
def run_moana(DataPath, LabelsPath, ClassifierPath, OutputDir, GeneOrderPath="", NumGenes=0): ''' run moana Wrapper script to run moana on a benchmark dataset with a pretrained classifier, outputs lists of true and predicted cell labels as csv files, as well as computation time. Parameters ---------- DataPath : Data file path (.csv), cells-genes matrix with cell unique barcodes as row names and gene names as column names. LabelsPath : Cell population annotations file path (.csv). CV_RDataPath : Cross validation RData file path (.RData), obtained from Cross_Validation.R function. ClassifierPath : Data file path to the pretrained classifier. OutputDir : Output directory defining the path of the exported file. GeneOrderPath : Gene order file path (.csv) obtained from feature selection, defining the genes order for each cross validation fold, default is NULL. NumGenes : Number of genes used in case of feature selection (integer), default is 0. ''' # # read the Rdata file # robjects.r['load'](CV_RDataPath) # # tokeep = np.array(robjects.r['Cells_to_Keep'], dtype = 'bool') # col = np.array(robjects.r['col_Index'], dtype = 'int') # col = col - 1 matrix = ExpMatrix.read_tsv(DataPath, sep=',') # matrix = matrix.iloc[tokeep] truelab = pd.read_csv(LabelsPath, header=0, index_col=None, sep=',') # truelab = truelab.iloc[tokeep] ct_old = [ 'CD19+ B', 'CD14+ Monocyte', 'CD4+/CD45RA+/CD25- Naive T', 'CD4+/CD45RO+ Memory', 'CD8+/CD45RA+ Naive Cytotoxic', 'Dendritic', 'CD56+ NK' ] ct_new = [ 'B cells', 'CD14+ monocytes', 'Naive CD4+ T cells', 'Memory CD4+ T cells', 'Naive CD8+ T cells', 'Dendritic cells', 'NK cells' ] tokeep2 = np.isin(truelab, ct_old) truelab = truelab[tokeep2] print(len(truelab)) matrix = matrix.iloc[np.squeeze(tokeep2)] for i in range(len(ct_old)): truelab.iloc[truelab == ct_old[i]] = ct_new[i] # read the feature file if (NumGenes > 0): features = pd.read_csv(GeneOrderPath, header=0, index_col=None, sep=',') feat_to_use = features.iloc[0:NumGenes, 0] matrix = matrix.iloc[:, feat_to_use] data = ExpMatrix(X=np.transpose(matrix.X), genes=matrix.cells, cells=matrix.genes) data.genes.name = 'Genes' data.cells.name = 'Cells' data.index.name = 'Genes' data.columns.name = 'Cells' clf = CellTypeClassifier.read_pickle(ClassifierPath) start = tm.time() predictions = clf.predict(data) runtime = tm.time() - start np.asarray(predictions) pred = pd.DataFrame(predictions) os.chdir(OutputDir) if (NumGenes == 0): truelab.to_csv("moana_True_Labels.csv", index=False) pred.to_csv("moana_Pred_Labels.csv", index=False) with open("moana_Total_Time.csv", 'w') as f: f.write("%f\n" % runtime) else: truelab.to_csv("moana_" + str(NumGenes) + "_True_Labels.csv", index=False) pred.to_csv("moana_" + str(NumGenes) + "_Pred_Labels.csv", index=False) with open("moana_" + str(NumGenes) + "_Total_Time.csv", 'w') as f: f.write("%f\n" % runtime)
def my_matrix(my_gene_names, my_cells, my_X): #genes = ['a', 'b', 'c', 'd'] #samples = ['s1', 's2', 's3'] # X = np.arange(12, dtype=np.float64).reshape(4, 3) matrix = ExpMatrix(genes=my_gene_names, cells=my_cells, X=my_X) return matrix