Example #1
0
def get_dataset(tissue, membrane_only=True):
    counts = parse_counts(tissue)
    if membrane_only:
        go = parse_go_plasma_membrane()
        genes_membrane = go[go.isin(counts.index)]
        counts = counts.loc[genes_membrane]

    ds = Dataset(
        samplesheet=SampleSheet(cell_types),
        counts_table=CountsTable(counts),
    )
    return ds
Example #2
0
def test_initialize():
    from singlet.samplesheet import SampleSheet
    ss = SampleSheet.from_sheetname('example_sheet_tsv')
Example #3
0
def test_initialize_fromdataset():
    from singlet.samplesheet import SampleSheet
    ct = SampleSheet.from_datasetname('example_dataset')
Example #4
0
def get_dataset(tissue,
                membrane_only=True,
                regenerate=False,
                go_contains=None,
                go_exclude=None):

    # Some tissues like brain were split for sorting, we merge them here
    dss = []
    for tissue_facs in tissues_prediction[tissue]:
        cell_types, plates = parse_annotations(tissue_facs)
        counts = parse_counts(tissue_facs, regenerate=regenerate)
        if membrane_only:
            go = parse_go_plasma_membrane().index
            genes_membrane = go[go.isin(counts.index)]
            counts = counts.loc[genes_membrane]

        if (go_contains is not None) and (go_exclude is not None):
            raise ValueError('Use either go_contains or go_exclude')
        if go_contains is not None:
            go = parse_go_plasma_membrane()
            genes = go.index[go['GONames'].str.contains(go_contains)]
            genes = np.intersect1d(genes, counts.index)
            counts = counts.loc[genes]
        elif go_exclude is not None:
            go = parse_go_plasma_membrane()
            genes = go.index[~go['GONames'].str.contains(go_exclude)]
            genes = np.intersect1d(genes, counts.index)
            counts = counts.loc[genes]

        dss.append({'samplesheet': cell_types, 'counts': counts})

    if len(dss) == 1:
        ds = Dataset(
            samplesheet=SampleSheet(cell_types),
            counts_table=counts,
        )
        return ds
    else:
        # Merging is kind of messy because some genes are absent from either
        # subtissue (grrr); I put zeroes for now, Michelle is working on the
        # better solution (we have those numbers somewhere)
        genes = set()
        for ds in dss:
            genes |= set(ds['counts'].index.values)
        genes = pd.Index(sorted(genes), name=ds['counts'].index.name)
        for ds in dss:
            genes_missing = genes[~genes.isin(ds['counts'].index)]
            for gene in genes_missing:
                # The stuff is normalized, pseudocounted, and logged
                ds['counts'].loc[gene] = -1.0
            ds['counts'] = ds['counts'].loc[genes]
        ngenes = len(genes)
        ncells = sum(ds['samplesheet'].shape[0] for ds in dss)
        samplesheet_all = pd.concat([ds['samplesheet'] for ds in dss], axis=0)
        counts_all = pd.DataFrame(np.zeros((ngenes, ncells), float),
                                  index=genes,
                                  columns=samplesheet_all.index)
        for ds in dss:
            counts_all.loc[:,
                           ds['counts'].columns.values] = ds['counts'].values
        counts_all = CountsTable(counts_all)
        if ds['counts']._normalized:
            counts_all._normalized = ds['counts']._normalized

        ds = Dataset(
            samplesheet=SampleSheet(samplesheet_all),
            counts_table=counts_all,
        )
        return ds
Example #5
0
#!/usr/bin/env python
# vim: fdm=indent
'''
author:     Fabio Zanini
date:       15/08/17
content:    Test SampleSheet class.
'''
# Script
if __name__ == '__main__':

    # NOTE: an env variable for the config file needs to be set when
    # calling this script
    print('Instantiating SampleSheet')
    from singlet.samplesheet import SampleSheet
    ss = SampleSheet.from_sheetname('example_sheet_tsv')
    print('Done!')