Esempio n. 1
0
def test_conditions(all_names, conditions, atlas_sub, cancer_data, repeats=1):
    import time
    results = []
    ncomb = len(conditions)
    for ic, comb in enumerate(conditions):
        print('{:} / {:}: {:}'.format(ic + 1, ncomb, comb))
        for ir in range(repeats):
            kwargs = dict(zip(all_names, comb))
            if 'n' in kwargs:
                n = kwargs.pop('n')
                tmp = subsample_dataset(cancer_data, n)
            else:
                tmp = cancer_data
            t0 = time.time()
            no = northstar.Subsample(
                atlas=atlas_sub,
                **kwargs,
            )
            cell_types = no.fit_transform(tmp['counts'])
            t1 = time.time()
            acc = define_accuracy(tmp['meta']['Cell_type'].values, cell_types)
            kwargs['accuracy'] = acc
            kwargs['runtime'] = t1 - t0
            kwargs['repeat'] = ir + 1
            kwargs['ncells'] = tmp['meta'].shape[0]
            results.append(kwargs)

    return pd.DataFrame(results)
def run_northstar(
    adata,
    asub,
    nct,
    rep,
):
    print('Subsample with {:} cells per type'.format(nct))
    asub2 = northstar.subsample_atlas(
        adata,
        cell_type_column='cell_ontology_class',
        n_cells=nct,
    )
    ntot = asub2.X.shape[0]

    print('Run northstar')
    t0 = time.time()
    ns = northstar.Subsample(asub, )
    ns.fit(asub2)
    t1 = time.time()
    t = t1 - t0

    ct_orig = ns.new_data.obs['cell_ontology_class'].astype(str)
    gof = (ct_orig == ns.membership).mean()

    resd = {
        'time': t,
        'gof': gof,
        'ntot': ntot,
        'rep': rep,
        'tissue': tissue,
    }

    return resd
def test_conditions(all_names, conditions, atlas_sub, gbm, repeats=1):
    import time
    results = []
    ncomb = len(conditions)
    for ic, comb in enumerate(conditions):
        print('{:} / {:}: {:}'.format(ic + 1, ncomb, comb))
        for ir in range(repeats):
            kwargs = dict(zip(all_names, comb))
            t0 = time.time()
            no = northstar.Subsample(
                atlas=atlas_sub,
                **kwargs,
            )
            cell_types = no.fit_transform(gbm['counts'])
            t1 = time.time()
            acc = define_accuracy(gbm['meta']['Cell_type'].values, cell_types)
            kwargs['accuracy'] = acc
            kwargs['runtime'] = t1 - t0
            kwargs['repeat'] = ir + 1
            results.append(kwargs)

    return pd.DataFrame(results)
Esempio n. 4
0
    atlases = {
        'Velmeshev': adata,
        'Darmanis_2015_nofetal': adata_dmnf,
    }

    print('Run northstar (subsample)')
    ress = []
    for aname, atlas in atlases.items():
        if aname.endswith('avg'):
            continue
        print('Atlas: {:}'.format(aname))
        ns = northstar.Subsample(
            atlas,
            #n_features_per_cell_type=70,
            #n_features_overdispersed=500,
            #n_pcs=25,
            #resolution_parameter=0.003,
            #n_neighbors=30,
            #n_neighbors_external=0,
            #external_neighbors_mutual=True,
        )
        ns.fit(adata_gbm)

        ct_orig = ns.new_data.obs['Cell_type'].astype(str)
        gof = (ct_orig == ns.membership).mean()
        identity = ct_orig.to_frame()
        identity['northstar_assignment'] = ns.membership

        vs = ns.embed('umap')

        resd = {
            'atlas': aname,
    atlas_sub = af.fetch_atlas('Darmanis_2015_nofetal', kind='subsample')
    print('Rename a few cell types in the atlas')
    atlas_sub['cell_types'] = atlas_sub['cell_types'].map(
        {
            'Oligodendrocyte': 'Oligodendrocyte',
            'Vascular': 'Endothelial',
            'Astrocyte': 'Astrocyte',
            'Neuron': 'Neuron',
            'OPC': 'OPC',
            'microglia': 'Immune cell'
        }, )

    gbm = ingest_gbm_data()

    print('Classify and cluster cells with default parameters')
    no = northstar.Subsample(atlas=atlas_sub, )
    cell_types = no.fit_transform(gbm['counts'])
    acc = define_accuracy(gbm['meta']['Cell_type'].values, cell_types)
    print(acc)

    if False:
        print('Scan broad parameter space')
        params_dict = dict(
            n_features_per_cell_type=[10, 30, 50],
            n_features_overdispersed=[50, 300, 1000],
            n_pcs=[10, 20, 40],
            n_neighbors=[10, 20, 50],
            distance_metric=['correlation'],
            threshold_neighborhood=[0.8],
            resolution_parameter=[0.0001, 0.001, 0.01],
        )
Esempio n. 6
0
def atlas_subsamples_to_tsnedf(new_metadata, new_counttable, **kwargs):
    savedir = kwargs['savedir']
    date = kwargs['timestamp']
    n_pcs = kwargs['n_pcs']
    atlas = kwargs['atlas']
    cell_type_names = kwargs['CT_lut']

    #instantiate class
    no = northstar.Subsample(
        atlas=atlas,
        features=None,
        n_features_per_cell_type=kwargs['n_features_per_cell_type'],
        n_features_overdispersed=kwargs['n_features_overdispersed'],
        n_pcs=n_pcs,
        n_neighbors=kwargs['n_neighbors'],
        distance_metric='correlation',
        threshold_neighborhood=kwargs['threshold_neighborhood'],
        clustering_metric='cpm',
        resolution_parameter=kwargs['resolution_parameter'],
        normalize_counts=True,
    )

    no.fit(new_counttable)

    # add new membership to metadata
    idx = new_counttable.columns
    n_fixed = len(no.cell_types)
    c_fixed = len(np.unique(no.cell_types))
    new_metadata.loc[idx, 'new_class'] = no.membership
    new_metadata['new_class_renamed'] = [
        cell_type_names[f] if f in cell_type_names.keys() else 'NewClass_' +
        "{0:0=2d}".format(int(f) - c_fixed + 1) if (f.isdigit() == True) else f
        for f in new_metadata['new_class']
    ]

    # unweighted PCA
    cols = list(no.cell_names) + list(new_counttable.columns)
    feature_selected_matrix = pd.DataFrame(index=no.features_selected,
                                           columns=cols,
                                           data=no.matrix)
    normal_PCA, udistmat = unweighted_PCA(feature_selected_matrix, n_pcs)

    # perform tSNE
    tsnedf = perform_tSNE(normal_PCA, 20)
    tsnedf.rename(index=str, columns={0: 'Dim1', 1: 'Dim2'}, inplace=True)
    tsnedf.loc[idx, 'new_membership'] = new_metadata.loc[idx,
                                                         'new_class_renamed']
    tsnedf.loc[tsnedf[:n_fixed].index,
               'new_membership'] = list(map(cell_type_names.get,
                                            no.cell_types))

    # write params to json in new folder with date timestamp
    output_file = savedir + date + '/annotation_parameters_' + atlas + '_CellAtlasSubsampling_' + date + '.json'
    if not os.path.exists(os.path.dirname(output_file)):
        try:
            os.makedirs(os.path.dirname(output_file))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
    with open(output_file, 'w') as file:
        file.write(json.dumps(kwargs))
        file.close()

    atlastypes = list(
        np.sort(tsnedf.loc[tsnedf[:n_fixed].index, 'new_membership'].unique()))
    newtypes = list(
        set(new_metadata['new_class_renamed']).difference(atlastypes))
    celltypes = atlastypes + list(np.sort(newtypes))

    return tsnedf, celltypes
Esempio n. 7
0
        for res_par in res_pars:
            for rep in range(reps):
                print('Subsample with {:} cells per type'.format(nct))
                asub2 = northstar.subsample_atlas(
                        adata,
                        cell_type_column='cell_ontology_class',
                        n_cells=nct,
                        )
                ntot = asub2.X.shape[0]

                print('Run northstar')
                import time
                t0 = time.time()
                ns = northstar.Subsample(
                    asubr,
                    # NOTE: seems like this has to go down with more cell types
                    resolution_parameter=0.005,
                    )
                ns.fit(asub2)
                t1 = time.time()
                t = t1 - t0

                ct_orig = ns.new_data.obs['cell_ontology_class'].astype(str)
                identity = ct_orig.to_frame()
                identity['northstar'] = ns.membership
                identity['correct'] = (ct_orig == ns.membership)

                atlas_cts = asubr.obs['CellType'].unique()
                new_cats = list(set(ns.membership) - set(csti))
                cats_map = {}
                for nc in new_cats:
Esempio n. 8
0
    nreps = 5
    res = []
    for nct in ncts:
        for rep in range(nreps):
            print('Subsample with {:} cells per type'.format(nct))
            asub2 = northstar.subsample_atlas(
                    adata,
                    cell_type_column='cell_ontology_class',
                    n_cells=nct,
                    )
            ntot = asub2.X.shape[0]

            print('Run northstar')
            t0 = time.time()
            ns = northstar.Subsample(
                asub,
                )
            ns.fit(asub2)
            t1 = time.time()
            t = t1 - t0

            ct_orig = ns.new_data.obs['cell_ontology_class'].astype(str)
            gof = (ct_orig == ns.membership).mean()

            res.append({
                'time': t,
                'gof': gof,
                'ntot': ntot,
                'rep': rep,
                })
Esempio n. 9
0
                )

    sys.exit()

    print('Subsample their data')
    dsPsub = dsP.subsample(40, within_metadata='clusters')

    if False:
        print('Merge etc based on northstar')
        ns = northstar.Subsample(
            atlas={
                'cell_types': dsPsub.samplesheet['Cell Subtype'],
                'counts': dsPsub.counts,
            },
            join='intersection',
            n_pcs=35,
            resolution_parameter=0.001,
            n_features_per_cell_type=80,
            n_features_overdispersed=0,
            n_neighbors=20,
            n_neighbors_external=10,
            external_neighbors_mutual=False,
        )

        ns.new_data = ds.counts
        ns._check_init_arguments()
        ns.fetch_atlas_if_needed()
        ns.compute_feature_intersection()
        ns._check_feature_intersection()
        ns.prepare_feature_selection()
        ns.select_features()
        ns._check_feature_selection()
Esempio n. 10
0
    skip = [
        'professional antigen presenting cell',
        'lymphocyte',
        'leukocyte',
        'myeloid cell',
    ]
    ds_tm.query_samples_by_metadata('cell_ontology_class not in @skip',
                                    local_dict=locals(),
                                    inplace=True)

    print('Merge etc based on northstar')
    ns = northstar.Subsample(
        atlas={
            'cell_types': ds_tm.samplesheet['Cell Subtype'],
            'counts': ds_tm.counts,
        },
        join='intersection',
        n_pcs=35,
        resolution_parameter=0.001,
    )

    ns.new_data = ds.counts
    ns._check_init_arguments()
    ns.fetch_atlas_if_needed()
    ns.compute_feature_intersection()
    ns._check_feature_intersection()
    ns.prepare_feature_selection()
    ns.select_features()
    ns._check_feature_selection()
    ns.merge_atlas_newdata()
Esempio n. 11
0
        print('Assign subtypes based on Palantir + Northstar')

        fn_cache = '../../data/sequencing/me1/northstar_with_Palantir.pkl'
        if not os.path.isfile(fn_cache):
            print('Subsample palantir data')
            dsPsub = dsP.subsample(40, within_metadata='clusters')

            print('Merge etc based on northstar')
            atlas = dsPsub.to_AnnData()
            atlas.obs['CellType'] = atlas.obs['Cell Subtype']
            ns = northstar.Subsample(
                    atlas=atlas,
                    join='intersection',
                    n_pcs=35,
                    resolution_parameter=0.001,
                    n_features_per_cell_type=80,
                    n_features_overdispersed=0,
                    n_neighbors=20,
                    n_neighbors_external=10,
                    external_neighbors_mutual=False,
                )

            ns.new_data = ds.to_AnnData()
            ns._check_init_arguments()
            ns.fetch_atlas_if_needed()
            ns.compute_feature_intersection()
            ns._check_feature_intersection()
            ns.prepare_feature_selection()
            ns.select_features()
            ns._check_feature_selection()
            ns.merge_atlas_newdata()