import matplotlib.pyplot as plt import seaborn as sns os.environ['SINGLET_CONFIG_FILENAME'] = 'singlet.yml' sys.path.append('/home/fabio/university/postdoc/singlet') from singlet.dataset import Dataset # Script if __name__ == '__main__': ds = Dataset( counts_table='dengue', samplesheet='virus', featuresheet='humanGC38', ) ds.query_samples_by_counts('total >= 50000', inplace=True) ds.samplesheet.rename(columns={'time [h]': 'time'}, inplace=True) cov = ds.samplesheet['coverage'] = ds.counts.sum(axis=0) ds.counts.normalize('counts_per_million', inplace=True) n = ds.samplesheet['numberDengueReads'].astype(int) ds.samplesheet['virus_reads_per_million'] = 1e6 * n / (cov + n) ds.counts.log(inplace=True) # Only select cells without virus ds.query_samples_by_metadata('virus_reads_per_million < 0.1', inplace=True) # Check table with number of cells table = (ds.samplesheet.groupby( ['time', 'MOI']).count().iloc[:,
# NOTE: an env variable for the config file needs to be set when # calling this script from singlet.dataset import Dataset ds = Dataset( samplesheet='example_sheet_tsv', counts_table='example_table_tsv') print('Query samples by metadata') ds_tmp = ds.query_samples_by_metadata( 'experiment == "test_pipeline"', inplace=False) assert(tuple(ds_tmp.samplenames) == ('test_pipeline',)) print('Done!') print('Query sample by counts in one gene') ds_tmp = ds.query_samples_by_counts('KRIT1 > 100', inplace=False) assert(tuple(ds_tmp.samplenames) == ('third_sample',)) print('Done!') print('Query sample by total counts') ds_tmp = ds.query_samples_by_counts('total < 3000000', inplace=False) assert(tuple(ds_tmp.samplenames) == ('second_sample',)) print('Done!') print('Query sample by mapped counts') ds_tmp = ds.query_samples_by_counts('mapped < 1000000', inplace=False) assert(tuple(ds_tmp.samplenames) == ('second_sample',)) print('Done!') print('Query features by counts') ds_tmp = ds.query_features_by_counts(