예제 #1
0
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns

from lungsc.ingest.load_dataset import DatasetLung, versions


fig_fdn = '../../figures/endomese_share/endo_paper_figure_4/'


if __name__ == '__main__':

    os.makedirs(fig_fdn, exist_ok=True)

    version = versions[-1]
    ds0 = DatasetLung.load(preprocess=True, version=version, include_hyperoxia=True)
    ds0.query_samples_by_metadata(
        '(cellType == "endothelial") & (doublet == 0)',
        inplace=True)
    print('Total endothelial cells analyzed: {:}'.format(ds0.n_samples))
    ds = ds0.query_samples_by_metadata('Treatment == "normal"')

    vs = ds.samplesheet[['embed_endo_1', 'embed_endo_2']].copy()
    vs.columns = ['dimension 1', 'dimension 2']

    if False:
        print('Plot embedding of genes')
        genes = [
            'Mest',
            'Peg3',
            'Sparcl1',
예제 #2
0
            sep='\t',
            header=None,
            ).set_index(1)
    go_terms = go_terms.loc[go_terms[0] == 'Biological Process'][2]

    def fun(df):
        gt = go_terms.loc[df[4]]
        idx = gt.str.contains('cell cycle')
        #idx |= ...
        return idx.sum() == 0

    gby = go_long.loc[go_long[4].isin(go_terms.index)].groupby(2)
    go_genes = {key for key, val in gby if fun(val)}
    go_genes = sorted(go_genes)

    ds = DatasetLung.load(preprocess=True, version=versions[-2])
    ds.query_samples_by_metadata(
            '(cellType == "immune") & (doublet == 0) & (Treatment == "normal") & (cellSubtype in ("Mac I", "Mac II", "Mac III", "Mac IV", "Mac V"))',
            inplace=True)


    print('Find markers')
    csts = ['Mac I', 'Mac II', 'Mac III', 'Mac IV', 'Mac V']
    comps = {}
    for cst in csts:
        print(cst)
        ds.samplesheet['is_{:}'.format(cst)] = ds.samplesheet['cellSubtype'] == cst
        dsp = ds.split('is_{:}'.format(cst))
        dsp[True].subsample(100, inplace=True)
        dsp[False].subsample(100, inplace=True)
        comp = dsp[True].compare(dsp[False])
예제 #3
0
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns

from lungsc.ingest.load_dataset import DatasetLung, versions

fig_fdn = '../../figures/endomese_share/endo_paper_figure_5/'

if __name__ == '__main__':

    os.makedirs(fig_fdn, exist_ok=True)

    version = versions[-1]
    ds = DatasetLung.load(preprocess=True,
                          include_hyperoxia=False,
                          version=version)
    ds.query_samples_by_metadata(
        '(cellType == "endothelial") & (Treatment == "normal") & (doublet == 0)',
        inplace=True)

    ds.samplesheet['is_prolif'] = ds.samplesheet[
        'cellSubtype'] == 'Proliferative EC'

    #print('Load tSNE from file')
    #vs = pd.read_csv(
    #    '../../data/sequencing/datasets/all_{:}/tsne_with_hyperoxia_endo.tsv'.format(version),
    #    sep='\t',
    #    index_col=0,
    #    )
    #vs = vs.loc[ds.samplenames]
예제 #4
0
    pa = argparse.ArgumentParser()
    pa.add_argument(
        '--cellType',
        choices=['immune', 'general'],
        required=True,
    )
    pa.add_argument(
        '--includeNoCellType',
        action='store_true',
    )

    args = pa.parse_args()

    version = versions[-1]
    ds = DatasetLung.load(version=version, include_doublets=True)
    ds.samplesheet.loc[ds.samplesheet['doublet'] == 1,
                       'cellSubtype'] = 'doublet?'

    if args.cellType == 'general':
        print('Plot dimensionality reduction of dataset')
        print('Feature selection')
        features = ds.feature_selection.overdispersed_within_groups(
            'Mousename',
            inplace=False,
        )
        dsf = ds.query_features_by_name(features)

        print('PCA')
        dsc = dsf.dimensionality.pca(
            n_dims=30,
예제 #5
0
import glob
import gzip
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

# Ensure leidenalg is correct
sys.path.insert(0, os.path.abspath('../../packages'))
from lungsc.ingest.load_dataset import versions, DatasetLung

if __name__ == '__main__':

    version = versions[-1]
    ds = DatasetLung.load(version=version)

    print('Feature selection')
    features = ds.feature_selection.overdispersed_within_groups('Mousename',
                                                                inplace=False)
    dsf = ds.query_features_by_name(features)

    print('PCA')
    dsc = dsf.dimensionality.pca(n_dims=30,
                                 robust=False,
                                 return_dataset='samples')

    #print('UMAP')
    #vsu = dsc.dimensionality.umap()

    print('tSNE')