Example #1
0
     'Hjurp',
     'Kpna2',
     'Kif23',
     'Cks2',
     'Dtl',
     'Top2a',
     'Bub1',
     'Arl6ip1',
     'Dlgap5',
     'Ube2s',
     'Nuf2',
     'Hmmr',
     'Cdc20',
     'Birc5',
 ]
 dscc = ds.query_features_by_name(genes_cc2, inplace=False)
 dscc.counts.log(inplace=True)
 vsu = dscc.dimensionality.umap()
 fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(9, 5))
 genes_plot = ['log_virus_reads_per_million', 'Ccne2', 'Ccnd1', 'Ccnb1']
 for icol, gene in enumerate(genes_plot):
     ax = axs[0][icol]
     dscc.plot.scatter_reduced_samples(vsu,
                                       color_by=gene,
                                       alpha=0.4,
                                       ax=ax,
                                       s=10)
     ax.set_title(gene)
     ax.set_axis_off()
     ax = axs[1][icol]
     if gene in dscc.counts.index:
Example #2
0
        dss = ds.split('clusterN_SNV_{:}'.format(ic))
        comp = dss[True].compare(dss[False])

        # FIXME: maybe look symmetrically for up- and downregulated
        comp['diff'] = dss[True].counts.mean(axis=1) - dss[False].counts.mean(
            axis=1)
        genesi = comp.loc[comp['diff'] > 0,
                          'P-value'].nsmallest(n=5).index.values

        genes[ic] = genesi

    genes_all = np.unique(np.concatenate(list(genes.values())))
    with open('../data/genes_diff_expressed_clustersSNV.tsv', 'wt') as f:
        f.write('\t'.join(genes_all))

    dsv = ds.query_features_by_name(genes_all)

    # Plot distributions
    fig, axs = plt.subplots(3, 10, figsize=(17, 7), sharex=True, sharey=True)
    axs = axs.ravel()
    for ax, gene in zip(axs, genes_all):
        df = np.log10(0.1 + dsv.counts.loc[[gene]].T)
        df['clusterN'] = dsv.samplesheet['clusterN_SNV']
        sns.boxplot(
            data=df,
            y=gene,
            x='clusterN',
            ax=ax,
            order=clusters,
        )
        ax.grid(axis='y')
Example #3
0
    # Calculate transciptome distances
    ds = Dataset(
            samplesheet='dengue',
            counts_table='dengue',
            featuresheet='humanGC38',
            )
    ds.samplesheet['cluster_SNV'] = clusters
    ds.counts.normalize(inplace=True)
    ds.rename(axis='features', column='GeneName', inplace=True)
    ds.feature_selection.unique(inplace=True)

    # Restrict to differentially expresse genes
    with open('../data/genes_diff_expressed_clustersSNV.tsv', 'rt') as f:
        genes = f.read().split('\t')
    dsd = ds.query_features_by_name(genes)
    dsd.counts.log(inplace=True)

    dsp = dsd.split('cluster_SNV')
    dclut = {}
    for i1, c1 in enumerate(clustersu):
        ge1 = dsp[c1].counts.values.T
        for c2 in clustersu[:i1+1]:
            print(c1, c2)
            ge2 = dsp[c2].counts.values.T
            if c1 != c2:
                d = cdist(ge1, ge2).ravel()
            else:
                d = squareform(cdist(ge1, ge2))
            dclut[frozenset([c1, c2])] = d