import matplotlib.pyplot as plt import matplotlib.lines as mlines import seaborn as sns from lungsc.ingest.load_dataset import DatasetLung, versions fig_fdn = '../../figures/endomese_share/endo_paper_figure_4/' if __name__ == '__main__': os.makedirs(fig_fdn, exist_ok=True) version = versions[-1] ds0 = DatasetLung.load(preprocess=True, version=version, include_hyperoxia=True) ds0.query_samples_by_metadata( '(cellType == "endothelial") & (doublet == 0)', inplace=True) print('Total endothelial cells analyzed: {:}'.format(ds0.n_samples)) ds = ds0.query_samples_by_metadata('Treatment == "normal"') vs = ds.samplesheet[['embed_endo_1', 'embed_endo_2']].copy() vs.columns = ['dimension 1', 'dimension 2'] if False: print('Plot embedding of genes') genes = [ 'Mest', 'Peg3', 'Sparcl1',
sep='\t', header=None, ).set_index(1) go_terms = go_terms.loc[go_terms[0] == 'Biological Process'][2] def fun(df): gt = go_terms.loc[df[4]] idx = gt.str.contains('cell cycle') #idx |= ... return idx.sum() == 0 gby = go_long.loc[go_long[4].isin(go_terms.index)].groupby(2) go_genes = {key for key, val in gby if fun(val)} go_genes = sorted(go_genes) ds = DatasetLung.load(preprocess=True, version=versions[-2]) ds.query_samples_by_metadata( '(cellType == "immune") & (doublet == 0) & (Treatment == "normal") & (cellSubtype in ("Mac I", "Mac II", "Mac III", "Mac IV", "Mac V"))', inplace=True) print('Find markers') csts = ['Mac I', 'Mac II', 'Mac III', 'Mac IV', 'Mac V'] comps = {} for cst in csts: print(cst) ds.samplesheet['is_{:}'.format(cst)] = ds.samplesheet['cellSubtype'] == cst dsp = ds.split('is_{:}'.format(cst)) dsp[True].subsample(100, inplace=True) dsp[False].subsample(100, inplace=True) comp = dsp[True].compare(dsp[False])
import matplotlib.pyplot as plt import matplotlib.lines as mlines import seaborn as sns from lungsc.ingest.load_dataset import DatasetLung, versions fig_fdn = '../../figures/endomese_share/endo_paper_figure_5/' if __name__ == '__main__': os.makedirs(fig_fdn, exist_ok=True) version = versions[-1] ds = DatasetLung.load(preprocess=True, include_hyperoxia=False, version=version) ds.query_samples_by_metadata( '(cellType == "endothelial") & (Treatment == "normal") & (doublet == 0)', inplace=True) ds.samplesheet['is_prolif'] = ds.samplesheet[ 'cellSubtype'] == 'Proliferative EC' #print('Load tSNE from file') #vs = pd.read_csv( # '../../data/sequencing/datasets/all_{:}/tsne_with_hyperoxia_endo.tsv'.format(version), # sep='\t', # index_col=0, # ) #vs = vs.loc[ds.samplenames]
pa = argparse.ArgumentParser() pa.add_argument( '--cellType', choices=['immune', 'general'], required=True, ) pa.add_argument( '--includeNoCellType', action='store_true', ) args = pa.parse_args() version = versions[-1] ds = DatasetLung.load(version=version, include_doublets=True) ds.samplesheet.loc[ds.samplesheet['doublet'] == 1, 'cellSubtype'] = 'doublet?' if args.cellType == 'general': print('Plot dimensionality reduction of dataset') print('Feature selection') features = ds.feature_selection.overdispersed_within_groups( 'Mousename', inplace=False, ) dsf = ds.query_features_by_name(features) print('PCA') dsc = dsf.dimensionality.pca( n_dims=30,
import glob import gzip import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns # Ensure leidenalg is correct sys.path.insert(0, os.path.abspath('../../packages')) from lungsc.ingest.load_dataset import versions, DatasetLung if __name__ == '__main__': version = versions[-1] ds = DatasetLung.load(version=version) print('Feature selection') features = ds.feature_selection.overdispersed_within_groups('Mousename', inplace=False) dsf = ds.query_features_by_name(features) print('PCA') dsc = dsf.dimensionality.pca(n_dims=30, robust=False, return_dataset='samples') #print('UMAP') #vsu = dsc.dimensionality.umap() print('tSNE')