Beispiel #1
0
import matplotlib.pyplot as plt
import seaborn as sns

os.environ['SINGLET_CONFIG_FILENAME'] = 'singlet.yml'
sys.path.append('/home/fabio/university/postdoc/singlet')
from singlet.dataset import Dataset

# Script
if __name__ == '__main__':

    ds = Dataset(
        counts_table='dengue',
        samplesheet='virus',
        featuresheet='humanGC38',
    )
    ds.query_samples_by_counts('total >= 50000', inplace=True)

    ds.samplesheet.rename(columns={'time [h]': 'time'}, inplace=True)
    cov = ds.samplesheet['coverage'] = ds.counts.sum(axis=0)
    ds.counts.normalize('counts_per_million', inplace=True)

    n = ds.samplesheet['numberDengueReads'].astype(int)
    ds.samplesheet['virus_reads_per_million'] = 1e6 * n / (cov + n)
    ds.counts.log(inplace=True)

    # Only select cells without virus
    ds.query_samples_by_metadata('virus_reads_per_million < 0.1', inplace=True)

    # Check table with number of cells
    table = (ds.samplesheet.groupby(
        ['time', 'MOI']).count().iloc[:,
Beispiel #2
0
    # NOTE: an env variable for the config file needs to be set when
    # calling this script
    from singlet.dataset import Dataset
    ds = Dataset(
            samplesheet='example_sheet_tsv',
            counts_table='example_table_tsv')

    print('Query samples by metadata')
    ds_tmp = ds.query_samples_by_metadata(
            'experiment == "test_pipeline"',
            inplace=False)
    assert(tuple(ds_tmp.samplenames) == ('test_pipeline',))
    print('Done!')

    print('Query sample by counts in one gene')
    ds_tmp = ds.query_samples_by_counts('KRIT1 > 100', inplace=False)
    assert(tuple(ds_tmp.samplenames) == ('third_sample',))
    print('Done!')

    print('Query sample by total counts')
    ds_tmp = ds.query_samples_by_counts('total < 3000000', inplace=False)
    assert(tuple(ds_tmp.samplenames) == ('second_sample',))
    print('Done!')

    print('Query sample by mapped counts')
    ds_tmp = ds.query_samples_by_counts('mapped < 1000000', inplace=False)
    assert(tuple(ds_tmp.samplenames) == ('second_sample',))
    print('Done!')

    print('Query features by counts')
    ds_tmp = ds.query_features_by_counts(