コード例 #1
0
from collections import Counter
import os
import sys
import numpy as np
import os
from OnClass.utils import *
from OnClass.OnClassPred import OnClassPred
from OnClass.other_datasets_utils import my_assemble, data_names_all, load_names

DATA_DIR = '../../../OnClass_data/'
OUTPUT_DIR = DATA_DIR + '/marker_genes/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

datasets, genes_list, n_cells = load_names(data_names_all,
                                           verbose=False,
                                           log1p=True)
datasets, genes = merge_datasets(datasets, genes_list)
train_X = sparse.vstack(datasets)
train_X = train_X.toarray()
g2i = {}
for i, g in enumerate(genes):
    g2i[g.lower()] = i
#'CL:0000037','hsc',
onto_ids = [
    'CL:0000236', 'CL:0000235', 'CL:0000037', 'CL:0002338', 'CL:0000492',
    'CL:0000815', 'CL:0000910', 'CL:0000794', 'CL:2000001'
]
keywords = [
    'b_cells', 'infected', 'hsc', 'cd56_nk', 'cd4_t_helper', 'regulatory_t',
    'cytotoxic_t', 'cd14_monocytes', 'pbmc'
コード例 #2
0
ファイル: Annot26datasets.py プロジェクト: jkooy/OnClass
from OnClass.other_datasets_utils import my_assemble, data_names_all, load_names


OnClassModel = OnClassModel()
tp2emb, tp2i, i2tp = OnClassModel.EmbedCellTypes(dim=500,cell_type_network_file='../../../OnClass_data/cell_ontology/cl.ontology', use_pretrain='../../../OnClass_data/pretrain/tp2emb_500')
print ('compute cell type embedding finished')


data_file = '../../../OnClass_data/raw_data/tabula-muris-senis-facs_cell_ontology.h5ad'
train_X, train_genes, train_Y = read_data(feature_file=data_file, tp2i = tp2i, AnnData_label='cell_ontology_class_reannotated')


#OnClassModel.train(train_X, train_Y, tp2emb, train_genes, nhidden=[2], max_iter=1, use_pretrain = None, save_model =  '../../../OnClass_data/pretrain/BilinearNN')
#test_label = OnClassModel.predict(train_X, train_genes)
#print (np.shape(test_label))
#print (test_label)
#print ('pretrain finished')

print ('../../../OnClass_data/pretrain/BilinearNN_500')
OnClassModel.train(train_X, train_Y, tp2emb, train_genes, nhidden=[500], log_transform = True, use_pretrain = '../../../OnClass_data/pretrain/BilinearNN_50019', pretrain_expression='../../../OnClass_data/pretrain/BilinearNN_500')
#test_label = OnClassModel.predict(train_X, train_genes)
#print (test_label)
print (len(data_names_all))

datasets, genes_list, n_cells = load_names(data_names_all,verbose=False,log1p=True, DATA_DIR='../../../OnClass_data/')
datasets, genes = scanorama.merge_datasets(datasets, genes_list)
datasets_dimred, genes = scanorama.process_data(datasets, genes, dimred=100)
expr_datasets = my_assemble(datasets_dimred, ds_names=data_names_all, expr_datasets = datasets, sigma=150)[1]
expr_corrected = sparse.vstack(expr_datasets)
test_label = OnClassModel.predict(expr_corrected, genes,log_transform=False,correct_batch=False)
print (test_label)