Ejemplo n.º 1
0
def correct(datasets_full, genes_list, hvg=HVG, verbose=VERBOSE,
            sigma=SIGMA, ds_names=None):
    datasets, genes = merge_datasets(datasets_full, genes_list)
    datasets_dimred, genes = process_data(datasets, genes, hvg=hvg)
    
    datasets_dimred = assemble(
        datasets_dimred, # Assemble in low dimensional space.
        expr_datasets=datasets, # Modified in place.
        verbose=verbose, knn=KNN, sigma=sigma, approx=APPROX,
        ds_names=ds_names
    )

    return datasets, genes
Ejemplo n.º 2
0
def panorama(datasets_full, genes_list):
    datasets, genes = merge_datasets(datasets_full, genes_list)
    datasets_dimred, genes = process_data(datasets, genes)

    # Connected components form panoramas.
    panoramas = connect(datasets_dimred,
                        knn=KNN,
                        approx=APPROX,
                        verbose=VERBOSE)
    if VERBOSE:
        print(panoramas)

    return panoramas
Ejemplo n.º 3
0
def panorama(datasets_full, genes_list):
    if VERBOSE:
        print('Processing and reducing dimensionality...')
    datasets, genes = merge_datasets(datasets_full, genes_list)
    datasets_dimred, genes = process_data(datasets, genes)

    if VERBOSE:
        print('Finding panoramas...')
    panoramas = connect(datasets_dimred)

    if VERBOSE:
        print(panoramas)

    return panoramas
Ejemplo n.º 4
0
        genes_hvg, _ = hvg([dataset.X], dataset.var['gene_symbols'],
                           'dispersion')
        if hv_genes is None:
            hv_genes = set(
                [g for gene in genes_hvg[:12000] for g in gene.split(';')])
        else:
            hv_genes &= set(
                [g for gene in genes_hvg[:12000] for g in gene.split(';')])
        tprint('{}: {}'.format(all_namespaces[i], len(hv_genes)))
        sys.stdout.flush()

    # Keep only those highly variable genes.

    Xs, genes = merge_datasets(
        [dataset.X for dataset in all_datasets],
        [dataset.var['gene_symbols'] for dataset in all_datasets],
        keep_genes=hv_genes,
        ds_names=all_namespaces,
        verbose=True)

    [tprint(X.shape[0]) for X in Xs]

    X = vstack(Xs)
    X = X.log1p()

    cell_types = np.concatenate(
        [dataset.obs['cell_types'] for dataset in all_datasets], axis=None)
    ages = np.concatenate([dataset.obs['ages'] for dataset in all_datasets],
                          axis=None)

    cds = [
        PanDAG(
Ejemplo n.º 5
0
            continue
        u, p = mannwhitneyu(A[:, idx], B[:, idx])
        p_vals.append(p)

    reject, q_vals, _, _ = multipletests(p_vals, method='bonferroni')

    for idx, gene in enumerate(genes):
        if reject[idx]:
            print('{}\t{}'.format(gene, q_vals[idx]))


if __name__ == '__main__':
    datasets, genes_list, n_cells = load_names(data_names)

    monocytes, mono_genes = datasets[:4], genes_list[:4]
    monocytes, mono_genes = merge_datasets(monocytes, mono_genes)
    datasets = [vstack(monocytes)] + datasets[4:]
    genes_list = [mono_genes] + genes_list[4:]
    data_names = ['data/macrophage/monocytes_seqwell'] + data_names[4:]

    datasets, genes = merge_datasets(datasets, genes_list)
    datasets_dimred, genes = process_data(datasets, genes)

    _, A, _ = find_alignments_table(datasets_dimred)

    time = np.array([0, 0, 3, 3, 6, 6]).reshape(-1, 1)
    time_align_correlate(A, time)

    x = np.array([0, 0, 1, 1, 2, 2]).reshape(-1, 1)
    y = [-.1, .1, -.1, .1, -.1, .1]
    time_align_visualize(A, x, y, namespace=NAMESPACE)
Ejemplo n.º 6
0
from time_align import time_align_correlate, time_align_visualize, time_dist

NAMESPACE = 'dendritic'

data_names = [
    'data/dendritic/unstimulated',
    'data/dendritic/unstimulated_repl',
    'data/dendritic/lps_1h',
    'data/dendritic/lps_2h',
    'data/dendritic/lps_4h',
    'data/dendritic/lps_4h_repl',
    'data/dendritic/lps_6h',
]

if __name__ == '__main__':
    datasets, genes_list, n_cells = load_names(data_names)
    datasets, genes = merge_datasets(datasets, genes_list)
    datasets_dimred, genes = process_data(datasets, genes)

    _, A, _ = find_alignments_table(datasets_dimred)

    time = np.array([0, 0, 1, 2, 4, 4, 6]).reshape(-1, 1)

    time_align_correlate(A, time)

    time_dist(datasets_dimred, time)

    x = np.array([0, 0, 1, 2, 3, 3, 4]).reshape(-1, 1)
    y = [-.1, .1, 0, 0, -.1, .1, 0]
    time_align_visualize(A, x, y, namespace=NAMESPACE)
        else:
            raise ValueError('Unhandled age {}'.format(age_str))
        ages += [age] * datasets[i].shape[0]

        inj = 'demyelination' if 'Lysolecithin' in meta['treatment'] else 'none'
        injured += [inj] * datasets[i].shape[0]

        cell_types += ['{}_{}'.format(age_str, inj)] * datasets[i].shape[0]

    return np.array(cell_types), np.array(ages), np.array(injured)


datasets, genes_list, n_cells = load_names(data_names, norm=False)
cell_types, ages, injured = load_meta(datasets)
datasets, genes = merge_datasets(datasets,
                                 genes_list,
                                 union=True,
                                 verbose=False)

X = vstack(datasets)

qc_idx = [i for i, s in enumerate(np.sum(X != 0, axis=1)) if s >= 500]
tprint('Found {} valid cells among all datasets'.format(len(qc_idx)))
X = X[qc_idx]
cell_types = cell_types[qc_idx]
ages = ages[qc_idx]
injured = injured[qc_idx]

if not os.path.isfile('data/dimred/{}_{}.txt'.format(DR_METHOD, NAMESPACE)):
    mkdir_p('data/dimred')
    tprint('Dimension reduction with {}...'.format(DR_METHOD))
    X_dimred = reduce_dimensionality(normalize(X), dim_red_k=DIMRED)
Ejemplo n.º 8
0
    'data/pbmc/10x/b_cells',
    'data/pbmc/10x/cd14_monocytes',
    'data/pbmc/10x/cd34',
    'data/pbmc/10x/cd4_t_helper',
    'data/pbmc/10x/cd56_nk',
    'data/pbmc/10x/cytotoxic_t',
    'data/pbmc/10x/memory_t',
    'data/pbmc/10x/naive_cytotoxic_t',
    'data/pbmc/10x/naive_t',
    'data/pbmc/10x/regulatory_t',
    'data/pbmc/68k',
]

datasets, genes_list, n_cells = load_names(data_names)

Xs, genes = merge_datasets(datasets[:], genes_list, ds_names=data_names)

uniq_cell_types = [
    'CD19+_B',
    'CD14+_Monocyte',
    'CD34+',
    'CD4+_T_Helper2',
    'CD56+_NK',
    'CD8+_Cytotoxic_T',
    'CD4+/CD45RO+_Memory',
    'CD8+/CD45RA+_Naive_Cytotoxic',
    'CD4+/CD45RA+/CD25-_Naive_T',
    'CD4+/CD25_T_Reg',
]

cell_types = []