def correct(datasets_full, genes_list, hvg=HVG, verbose=VERBOSE, sigma=SIGMA, ds_names=None): datasets, genes = merge_datasets(datasets_full, genes_list) datasets_dimred, genes = process_data(datasets, genes, hvg=hvg) datasets_dimred = assemble( datasets_dimred, # Assemble in low dimensional space. expr_datasets=datasets, # Modified in place. verbose=verbose, knn=KNN, sigma=sigma, approx=APPROX, ds_names=ds_names ) return datasets, genes
def panorama(datasets_full, genes_list): datasets, genes = merge_datasets(datasets_full, genes_list) datasets_dimred, genes = process_data(datasets, genes) # Connected components form panoramas. panoramas = connect(datasets_dimred, knn=KNN, approx=APPROX, verbose=VERBOSE) if VERBOSE: print(panoramas) return panoramas
def panorama(datasets_full, genes_list): if VERBOSE: print('Processing and reducing dimensionality...') datasets, genes = merge_datasets(datasets_full, genes_list) datasets_dimred, genes = process_data(datasets, genes) if VERBOSE: print('Finding panoramas...') panoramas = connect(datasets_dimred) if VERBOSE: print(panoramas) return panoramas
genes_hvg, _ = hvg([dataset.X], dataset.var['gene_symbols'], 'dispersion') if hv_genes is None: hv_genes = set( [g for gene in genes_hvg[:12000] for g in gene.split(';')]) else: hv_genes &= set( [g for gene in genes_hvg[:12000] for g in gene.split(';')]) tprint('{}: {}'.format(all_namespaces[i], len(hv_genes))) sys.stdout.flush() # Keep only those highly variable genes. Xs, genes = merge_datasets( [dataset.X for dataset in all_datasets], [dataset.var['gene_symbols'] for dataset in all_datasets], keep_genes=hv_genes, ds_names=all_namespaces, verbose=True) [tprint(X.shape[0]) for X in Xs] X = vstack(Xs) X = X.log1p() cell_types = np.concatenate( [dataset.obs['cell_types'] for dataset in all_datasets], axis=None) ages = np.concatenate([dataset.obs['ages'] for dataset in all_datasets], axis=None) cds = [ PanDAG(
continue u, p = mannwhitneyu(A[:, idx], B[:, idx]) p_vals.append(p) reject, q_vals, _, _ = multipletests(p_vals, method='bonferroni') for idx, gene in enumerate(genes): if reject[idx]: print('{}\t{}'.format(gene, q_vals[idx])) if __name__ == '__main__': datasets, genes_list, n_cells = load_names(data_names) monocytes, mono_genes = datasets[:4], genes_list[:4] monocytes, mono_genes = merge_datasets(monocytes, mono_genes) datasets = [vstack(monocytes)] + datasets[4:] genes_list = [mono_genes] + genes_list[4:] data_names = ['data/macrophage/monocytes_seqwell'] + data_names[4:] datasets, genes = merge_datasets(datasets, genes_list) datasets_dimred, genes = process_data(datasets, genes) _, A, _ = find_alignments_table(datasets_dimred) time = np.array([0, 0, 3, 3, 6, 6]).reshape(-1, 1) time_align_correlate(A, time) x = np.array([0, 0, 1, 1, 2, 2]).reshape(-1, 1) y = [-.1, .1, -.1, .1, -.1, .1] time_align_visualize(A, x, y, namespace=NAMESPACE)
from time_align import time_align_correlate, time_align_visualize, time_dist NAMESPACE = 'dendritic' data_names = [ 'data/dendritic/unstimulated', 'data/dendritic/unstimulated_repl', 'data/dendritic/lps_1h', 'data/dendritic/lps_2h', 'data/dendritic/lps_4h', 'data/dendritic/lps_4h_repl', 'data/dendritic/lps_6h', ] if __name__ == '__main__': datasets, genes_list, n_cells = load_names(data_names) datasets, genes = merge_datasets(datasets, genes_list) datasets_dimred, genes = process_data(datasets, genes) _, A, _ = find_alignments_table(datasets_dimred) time = np.array([0, 0, 1, 2, 4, 4, 6]).reshape(-1, 1) time_align_correlate(A, time) time_dist(datasets_dimred, time) x = np.array([0, 0, 1, 2, 3, 3, 4]).reshape(-1, 1) y = [-.1, .1, 0, 0, -.1, .1, 0] time_align_visualize(A, x, y, namespace=NAMESPACE)
else: raise ValueError('Unhandled age {}'.format(age_str)) ages += [age] * datasets[i].shape[0] inj = 'demyelination' if 'Lysolecithin' in meta['treatment'] else 'none' injured += [inj] * datasets[i].shape[0] cell_types += ['{}_{}'.format(age_str, inj)] * datasets[i].shape[0] return np.array(cell_types), np.array(ages), np.array(injured) datasets, genes_list, n_cells = load_names(data_names, norm=False) cell_types, ages, injured = load_meta(datasets) datasets, genes = merge_datasets(datasets, genes_list, union=True, verbose=False) X = vstack(datasets) qc_idx = [i for i, s in enumerate(np.sum(X != 0, axis=1)) if s >= 500] tprint('Found {} valid cells among all datasets'.format(len(qc_idx))) X = X[qc_idx] cell_types = cell_types[qc_idx] ages = ages[qc_idx] injured = injured[qc_idx] if not os.path.isfile('data/dimred/{}_{}.txt'.format(DR_METHOD, NAMESPACE)): mkdir_p('data/dimred') tprint('Dimension reduction with {}...'.format(DR_METHOD)) X_dimred = reduce_dimensionality(normalize(X), dim_red_k=DIMRED)
'data/pbmc/10x/b_cells', 'data/pbmc/10x/cd14_monocytes', 'data/pbmc/10x/cd34', 'data/pbmc/10x/cd4_t_helper', 'data/pbmc/10x/cd56_nk', 'data/pbmc/10x/cytotoxic_t', 'data/pbmc/10x/memory_t', 'data/pbmc/10x/naive_cytotoxic_t', 'data/pbmc/10x/naive_t', 'data/pbmc/10x/regulatory_t', 'data/pbmc/68k', ] datasets, genes_list, n_cells = load_names(data_names) Xs, genes = merge_datasets(datasets[:], genes_list, ds_names=data_names) uniq_cell_types = [ 'CD19+_B', 'CD14+_Monocyte', 'CD34+', 'CD4+_T_Helper2', 'CD56+_NK', 'CD8+_Cytotoxic_T', 'CD4+/CD45RO+_Memory', 'CD8+/CD45RA+_Naive_Cytotoxic', 'CD4+/CD45RA+/CD25-_Naive_T', 'CD4+/CD25_T_Reg', ] cell_types = []