コード例 #1
0
def main():
    gn = Granatum()

    adata = gn.ann_data_from_assay(gn.get_import('assay'))
    random_seed = gn.get_arg('random_seed')

    sc.tl.tsne(adata, random_state=random_seed)

    X_tsne = adata.obsm['X_tsne']

    plt.figure()
    plt.scatter(X_tsne[:, 0], X_tsne[:, 1], 5000 / adata.shape[0])
    plt.xlabel('t-SNE dim. 1')
    plt.ylabel('t-SNE dim. 2')
    plt.tight_layout()
    gn.add_current_figure_to_results('t-SNE plot: each dot represents a cell',
                                     dpi=75)

    pca_export = {
        'dimNames': ['t-SNE dim. 1', 't-SNE dim. 2'],
        'coords': {
            sample_id: X_tsne[i, :].tolist()
            for i, sample_id in enumerate(adata.obs_names)
        },
    }
    gn.export_statically(pca_export, 't-SNE coordinates')

    gn.commit()
コード例 #2
0
def main():
    tic = time.perf_counter()

    gn = Granatum()

    df = gn.pandas_from_assay(gn.get_import('assay'))
    mingenes = gn.get_arg('min_genes_per_cell')
    maxgenes = gn.get_arg('max_genes_per_cell')
    mt_percent = gn.get_arg('mt_genes_percent')/100.0

    uniquegenecount = df.astype(bool).sum(axis=0)
    totalgenecount = df.sum(axis=0)
    mtrows = df[df.index.str.startswith('MT')]
    mtgenecount = mtrows.sum(axis=0)
    mtpercent = mtgenecount.div(totalgenecount)
    colsmatching = uniquegenecount.T[(uniquegenecount.T >= mingenes) & (uniquegenecount.T <= maxgenes) & (mtpercent.T <= mt_percent)].index.values
    adata = df.loc[:, colsmatching]

    num_orig_cells = uniquegenecount.T.index.size
    num_filtered_cells = len(colsmatching)

    num_lt_min = uniquegenecount.T[(uniquegenecount.T < mingenes)].index.size
    num_gt_max = uniquegenecount.T[(uniquegenecount.T > maxgenes)].index.size
    num_gt_mt = uniquegenecount.T[(mtpercent.T > mt_percent)].index.size

    gn.add_result("Number of cells is now {} out of {} original cells with {} below min genes, {} above max genes, and {} above mt percentage threshold.".format(num_filtered_cells, num_orig_cells, num_lt_min, num_gt_max, num_gt_mt), "markdown")

    plt.figure()

    plt.subplot(2, 1, 1)
    plt.title('Unique gene count distribution')
    sns.distplot(uniquegenecount, bins=int(200), color = 'darkblue', kde_kws={'linewidth': 2})
    plt.ylabel('Frequency')
    plt.xlabel('Gene count')

    plt.subplot(2, 1, 2)
    plt.title('MT Percent Distribution')
    sns.distplot(mtpercent*100.0, bins=int(200), color = 'darkblue', kde_kws={'linewidth': 2})
    plt.ylabel('Frequency')
    plt.xlabel('MT Percent')

    plt.tight_layout()

    caption = (
        'The distribution of expression levels for each cell with various metrics.'
    )
    gn.add_current_figure_to_results(caption, zoom=1, dpi=75)

    gn.export(gn.assay_from_pandas(adata), "Filtered Cells Assay", dynamic=False)

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    timing = "* Finished cell filtering step in {} seconds*".format(time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
コード例 #3
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')

    matrix = np.array(assay.get('matrix'))
    sample_ids = assay.get('sampleIds')

    num_samples = matrix.shape[1]

    # ---- PCA --------------------------------------------------------------------

    X = np.transpose(matrix)
    model = PCA(n_components=2)
    Y_pca = model.fit_transform(X)

    pca_export = {
        'dimNames': ['PCA-1', 'PCA-2'],
        'coords': {
            sample_id: Y_pca[i, :].tolist()
            for i, sample_id in enumerate(sample_ids)
        },
    }
    gn.export_statically(pca_export, 'pca')

    plt.figure()
    plt.scatter(Y_pca[:, 0], Y_pca[:, 1], 5000 / num_samples)
    plt.tight_layout()

    gn.add_current_figure_to_results(
        'Principal Component Analysis (PCA) scatter-plot', dpi=75)

    # ---- T-SNE ------------------------------------------------------------------

    X = np.transpose(matrix)
    model = TSNE(n_jobs=multiprocessing.cpu_count())
    Y_tsne = model.fit_transform(X)

    tsne_export = {
        'dimNames': ['tSNE-1', 'tSNE-2'],
        'coords': {
            sample_id: Y_tsne[i, :].tolist()
            for i, sample_id in enumerate(sample_ids)
        },
    }
    gn.export_statically(tsne_export, 'tsne')

    plt.figure()
    plt.scatter(Y_tsne[:, 0], Y_tsne[:, 1], s=5000 / num_samples)
    plt.tight_layout()

    gn.add_current_figure_to_results(
        't-Distributed Stochastic Neighbor Embedding (t-SNE) scatter-plot',
        dpi=75)

    gn.commit()
コード例 #4
0
def main():
    gn = Granatum()

    set1 = gn.get_import('set1')
    set2 = gn.get_import('set2')
    set3 = gn.get_import('set3')

    maxScore = gn.get_arg('maxScore')
    minScore = gn.get_arg('minScore')

    labelSet1 = gn.get_arg("labelSet1")
    labelSet2 = gn.get_arg("labelSet2")
    labelSet3 = gn.get_arg("labelSet3")

    wordcloud = gn.get_arg("wordcloud")

    filtered_set1 = dict(filter(lambda elem: (isinstance(elem[1], numbers.Number) & (not isnan(elem[1]))) & (elem[1] >= minScore) & (elem[1] <= maxScore), set1.items()))
    filtered_set2 = dict(filter(lambda elem: (isinstance(elem[1], numbers.Number) & (not isnan(elem[1]))) & (elem[1] >= minScore) & (elem[1] <= maxScore), set2.items()))
    filtered_set3 = dict(filter(lambda elem: (isinstance(elem[1], numbers.Number) & (not isnan(elem[1]))) & (elem[1] >= minScore) & (elem[1] <= maxScore), set3.items()))
    merged_frequencies = {**filtered_set1, **filtered_set2, **filtered_set3}

    packedsets = [set(filtered_set1.keys()), set(filtered_set2.keys()), set(filtered_set3.keys())]

    fig, ax = plt.subplots(1,1)
    fig.set_size_inches(5,4)

    caption = (
        'The area weighted Venn diagram is shown for the gene sets matching the criteria'
    )

    if wordcloud:
        out = venn3_wordcloud(packedsets, set_labels=(labelSet1, labelSet2, labelSet3), wordcloud_kwargs=dict(max_font_size=36), word_to_frequency=merged_frequencies, ax=ax)
        for text in out.set_labels:
            if text:
                text.set_fontsize(18)
        for text in out.subset_labels:
            if text:
                text.set_fontsize(16)
                text.set_path_effects([path_effects.SimpleLineShadow(), path_effects.Normal()])
    else:
        out = venn3(packedsets, set_labels=(labelSet1, labelSet2, labelSet3))
        venn3_circles(packedsets, linestyle='dashed', linewidth=1, color="black")
        for text in out.set_labels:
            if text:
                text.set_fontsize(18)
        for text in out.subset_labels:
            if text:
                text.set_fontsize(16)
                text.set_path_effects([path_effects.SimpleLineShadow(), path_effects.Normal()])

    gn.add_current_figure_to_results(caption)

    gn.commit()
コード例 #5
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')
    matrix = np.array(assay.get('matrix'))

    take_log = gn.get_arg('take_log')
    log_base = gn.get_arg('logBase')
    epsilon = gn.get_arg('epsilon')

    transformed_matrix = (matrix + epsilon) / (1 - matrix + epsilon)
    if take_log:
        transformed_matrix = np.log(transformed_matrix) / np.log(log_base)

    non_zero_values_before = matrix.flatten()
    non_zero_values_before = non_zero_values_before[(
        non_zero_values_before > np.percentile(non_zero_values_before, 5))]

    non_zero_values_after = transformed_matrix.flatten()
    non_zero_values_after = non_zero_values_after[(
        non_zero_values_after > np.percentile(non_zero_values_after, 5))]

    plt.figure()

    plt.subplot(2, 1, 1)
    plt.title('Before beta-to-m transformation')
    plt.hist(non_zero_values_before, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.subplot(2, 1, 2)
    plt.title('After beta-to-m transformation')
    plt.hist(non_zero_values_after, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.tight_layout()

    caption = (
        'The distribution of expression level before and after beta-to-m transformation. Only the values greater '
        'than the 5 percentile (usually zero in single-cell data) and lower than 95 percentile are considered.'
    )
    gn.add_current_figure_to_results(caption, zoom=2, dpi=50)

    assay['matrix'] = transformed_matrix.tolist()
    gn.export_statically(assay, 'Beta-to-m transformed assay')

    gn.commit()
コード例 #6
0
def main():
    gn = Granatum()

    adata = gn.ann_data_from_assay(gn.get_import("assay"))
    num_top_comps = gn.get_arg("num_top_comps")

    sc.pp.pca(adata, 20)

    variance_ratios = adata.uns["pca"]["variance_ratio"]
    pc_labels = ["PC{}".format(x + 1) for x in range(len(variance_ratios))]

    plt.figure()
    plt.bar(pc_labels, variance_ratios)
    plt.tight_layout()
    gn.add_current_figure_to_results(
        "Explained variance (ratio) by each Principal Component (PC)",
        height=350,
        dpi=75)

    X_pca = adata.obsm["X_pca"]

    for i, j in combinations(range(num_top_comps), 2):
        xlabel = "PC{}".format(i + 1)
        ylabel = "PC{}".format(j + 1)

        plt.figure()
        plt.scatter(X_pca[:, i], X_pca[:, j], s=5000 / adata.shape[0])
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.tight_layout()
        gn.add_current_figure_to_results("PC{} vs. PC{}".format(i + 1, j + 1),
                                         dpi=75)

        pca_export = {
            "dimNames": [xlabel, ylabel],
            "coords": {
                sample_id: X_pca[k, [i, j]].tolist()
                for k, sample_id in enumerate(adata.obs_names)
            },
        }
        gn.export(pca_export,
                  "PC{} vs. PC{}".format(i + 1, j + 1),
                  kind="sampleCoords",
                  meta={})

    gn.commit()
コード例 #7
0
def main():
    gn = Granatum()

    df = gn.pandas_from_assay(gn.get_import('assay'))
    n_steps = gn.get_arg('n_steps')
    min_theta = gn.get_arg('min_theta')
    max_theta = gn.get_arg('max_theta')

    jammit = JAMMIT.from_dfs([df])

    jammit.scan(
        thetas=np.linspace(min_theta, max_theta, n_steps),
        calculate_fdr=True,
        n_perms=10,
        verbose=1,
        convergence_threshold=0.000000001,
    )

    jammit_result = jammit.format(columns=['theta', 'alpha', 'n_sigs', 'fdr'])
    jammit_result['theta'] = jammit_result['theta'].round(3)
    jammit_result['alpha'] = jammit_result['alpha'].round(3)

    plt.plot(jammit_result['alpha'], jammit_result['fdr'])
    plt.xlabel('alpha')
    plt.ylabel('FDR')
    gn.add_current_figure_to_results('FDR plotted against alpha', height=400)

    gn.add_result(
        {
            'pageSize':
            n_steps,
            'orient':
            'split',
            'columns': [{
                'name': h,
                'type': 'number',
                'round': 3
            } for h in jammit_result.columns],
            'data':
            jammit_result.values.tolist(),
        },
        data_type='table',
    )

    gn.commit()
コード例 #8
0
def main():
    gn = Granatum()

    sample_coords = gn.get_import("viz_data")
    df = gn.pandas_from_assay(gn.get_import("assay"))
    gene_ids = parse(gn.get_arg("gene_ids"))
    groups = gn.get_import("groups")
    alpha = 1.0 - gn.get_arg("confint") / 100.0
    min_zscore = st.norm.ppf(gn.get_arg("confint"))
    min_dist = 0.1

    coords = sample_coords.get("coords")
    dim_names = sample_coords.get("dimNames")

    inv_map = {}
    for k, v in groups.items():
        inv_map[v] = inv_map.get(v, []) + [k]

    for gene in gene_ids:
        plt.figure()
        # First form a statistic for all values, also puts out plot
        params = plot_fits(df.loc[gene, :].dropna().to_list(),
                           color="r",
                           alpha=alpha,
                           min_dist=min_dist,
                           min_zscore=min_zscore,
                           label="All")
        for k, v in inv_map.items():

            plt.subplot(1, 1, 1)
            plt.title('Gene expression level distribution for each cluster')
            plot_predict(df.loc[gene, v].dropna().to_list(), params, label=k)
            # sns.distplot(df.loc[gene,:].to_list(), bins=int(100), color = 'darkblue', kde_kws={'linewidth': 2})
            plt.ylabel('Frequency')
            plt.xlabel('Gene expression')

        plt.legend()
        plt.tight_layout()

        caption = (
            "The distribution of expression levels for gene {}.".format(gene))
        gn.add_current_figure_to_results(caption, zoom=1, dpi=75)

    gn.commit()
コード例 #9
0
def main():
    tic = time.perf_counter()

    gn = Granatum()

    df = gn.pandas_from_assay(gn.get_import('assay'))
    n_neighbors = gn.get_arg('n_neighbors')
    min_dist = gn.get_arg('min_dist')
    metric = gn.get_arg('metric')
    random_seed = gn.get_arg('random_seed')

    embedding = umap.UMAP(n_neighbors=n_neighbors,
                          min_dist=min_dist,
                          metric=metric,
                          random_state=random_seed).fit_transform(df.values.T)

    plt.figure()
    plt.scatter(embedding[:, 0], embedding[:, 1], min(5000 / df.shape[0],
                                                      36.0))
    plt.xlabel('UMAP dim. 1')
    plt.ylabel('UMAP dim. 2')
    plt.tight_layout()

    gn.add_current_figure_to_results('UMAP plot: each dot represents a cell',
                                     dpi=75)

    pca_export = {
        'dimNames': ['UMAP dim. 1', 'UMAP dim. 2'],
        'coords': {
            sample_id: embedding[i, :].tolist()
            for i, sample_id in enumerate(df.columns)
        },
    }
    gn.export_statically(pca_export, 'UMAP coordinates')

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    timing = "* Finished UMAP step in {} seconds*".format(time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
コード例 #10
0
def main():
    gn = Granatum()

    adata = gn.ann_data_from_assay(gn.get_import("assay"))
    min_cells_expressed = gn.get_arg("min_cells_expressed")
    min_mean = gn.get_arg("min_mean")
    max_mean = gn.get_arg("max_mean")
    min_disp = gn.get_arg("min_disp")
    max_disp = gn.get_arg("max_disp")

    num_genes_before = adata.shape[1]

    sc.pp.filter_genes(adata, min_cells=min_cells_expressed)

    filter_result = sc.pp.filter_genes_dispersion(
        adata.X, flavor='seurat', min_mean=math.log(min_mean), max_mean=math.log(max_mean), min_disp=min_disp, max_disp=max_disp,
    )
    adata = adata[:, filter_result.gene_subset]

    sc.pl.filter_genes_dispersion(filter_result)
    gn.add_current_figure_to_results(
        "Each dot represent a gene. The gray dots are the removed genes. The x-axis is log-transformed.",
        zoom=3,
        dpi=50,
        height=400,
    )

    gn.add_result(
        "\n".join(
            [
                "Number of genes before filtering: **{}**".format(num_genes_before),
                "",
                "Number of genes after filtering: **{}**".format(adata.shape[1]),
            ]
        ),
        type="markdown",
    )

    gn.export(gn.assay_from_ann_data(adata), "Filtered Assay", dynamic=False)

    gn.commit()
コード例 #11
0
def main():
    gn = Granatum()

    n_neighbors = gn.get_arg('nNeighbors', 15)
    neighbor_method = gn.get_arg('neighborMethod', 'gauss')

    assay = gn.get_import('assay')

    adata = sc.AnnData(np.array(assay.get('matrix')).transpose())
    adata.var_names = assay.get('geneIds')
    adata.obs_names = assay.get('sampleIds')

    sc.pp.neighbors(adata,
                    n_neighbors=n_neighbors,
                    use_rep='X',
                    method=neighbor_method)
    sc.tl.dpt(adata, n_branchings=1)

    gn._pickle(adata, 'adata')

    # dpt_groups

    for spec in [{
            'col': 'dpt_order',
            'caption': 'Cell order'
    }, {
            'col': 'dpt_groups',
            'caption': 'Cell groups'
    }]:
        fig = plt.figure()
        sc.pl.diffmap(adata, color=spec['col'])
        gn.add_current_figure_to_results(spec['caption'])
        gn.export_statically(
            dict(
                zip(adata.obs_names.tolist(),
                    adata.obs[spec['col']].values.tolist())), spec['col'])

    gn.commit()
コード例 #12
0
def main():
    gn = Granatum()

    adata = gn.ann_data_from_assay(gn.get_import('assay'))
    sample_coords = gn.get_import('sampleCoords')
    random_seed = gn.get_arg('random_seed')

    sc.pp.neighbors(adata, n_neighbors=20, use_rep='X', method='gauss')
    sc.tl.louvain(adata, random_state=random_seed)

    cluster_assignment = dict(
        zip(adata.obs_names,
            ['Cluster {}'.format(int(c) + 1) for c in adata.obs['louvain']]))
    gn.export_statically(cluster_assignment, 'Cluster assignment')

    dim_names = sample_coords.get('dimNames')
    coords_dict = sample_coords.get('coords')

    plt.figure()
    clusters = adata.obs['louvain'].cat.categories
    for c in clusters:
        cell_ids = adata.obs_names[adata.obs['louvain'] == c]
        coords = [coords_dict.get(x) for x in cell_ids]
        coords_x = [x[0] for x in coords]
        coords_y = [x[1] for x in coords]
        plt.scatter(coords_x, coords_y, label='Cluster {}'.format(int(c) + 1))

    plt.xlabel(dim_names[0])
    plt.ylabel(dim_names[1])
    plt.legend()
    plt.tight_layout()

    gn.add_current_figure_to_results(
        'Scatter-plot using imported cell coordinates. Each dot represents a cell. The colors indicate the indentified cell clusters.',
        dpi=75)

    gn.commit()
コード例 #13
0
ファイル: run_glog.py プロジェクト: granatumx/gbox-glog
def main():
    gn = Granatum()

    assay = gn.get_import('assay')
    x = np.array(assay.get('matrix')).astype(np.float)
    log_base = gn.get_arg('log_base')
    n_top = gn.get_arg('n_top')
    n_bottom = gn.get_arg('n_bottom')
    which_mid = gn.get_arg('which_mid')

    gene_df = pd.DataFrame(
        {
            'row_num': range(x.shape[0]),
            'gene_id': assay.get('geneIds'),
            'exp_mean': np.mean(x, axis=1),
            'exp_std': np.std(x, axis=1),
        }
    )
    gene_df = gene_df.sort_values('exp_mean', ascending=False)
    top_gene_row = gene_df.head(n_top).sort_values('exp_std', ascending=False).iloc[0]
    bottom_gene_row = gene_df.tail(n_bottom).sort_values('exp_std').iloc[0]

    hk_gene = np.clip(x[top_gene_row['row_num'], :], a_min=0.00001, a_max=None)

    neg_gene = x[bottom_gene_row['row_num'], :]

    if which_mid == 'mean':
        alphabk = np.mean(neg_gene[:])
    elif which_mid == 'median':
        alphabk = np.median(neg_gene[:])
    else:
        raise ValueError()

    loghkdatabk = np.log(hk_gene - alphabk) / np.log(log_base)
    
    # Drop NAN values
    loghkdatabk = loghkdatabk[~np.isnan(loghkdatabk)]

    c = (np.std(neg_gene[:], ddof=1) / np.std(loghkdatabk, ddof=1))**2

    xbk = x - alphabk
    transformed_matrix = np.log((xbk + np.sqrt(xbk**2 + c)) / 2) / np.log(log_base)

    gn.add_result(
        '\n'.join(
            [
                f"Selected benchmarking genes:",
                f"  * housekeeping gene: **{top_gene_row['gene_id']}** "
                f"(mean: {top_gene_row['exp_mean']}, std: {top_gene_row['exp_std']}) ",
                f"  * negative control gene: **{bottom_gene_row['gene_id']}**"
                f"(mean: {bottom_gene_row['exp_mean']}, std: {bottom_gene_row['exp_std']})",
                f"",
                f"Final formula is `y = log{log_base}((z + sqrt(z^2 + c))/2)`, where `z = x - {alphabk}` and `c = {c}`."
            ]
        ), 'markdown'
    )

    non_zero_values_before = x.flatten()
    non_zero_values_before = non_zero_values_before[(non_zero_values_before > np.percentile(non_zero_values_before, 5))]

    non_zero_values_after = transformed_matrix.flatten()
    non_zero_values_after = non_zero_values_after[(non_zero_values_after > np.percentile(non_zero_values_after, 5))]

    plt.figure()

    plt.subplot(2, 1, 1)
    plt.title('Before glog transformation')
    plt.hist(non_zero_values_before, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.subplot(2, 1, 2)
    plt.title('After glog transformation')
    plt.hist(non_zero_values_after, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.tight_layout()

    caption = (
        'The distribution of expression level before and after glog transformation. Only the values greater '
        'than the 5 percentile (usually zero in single-cell data) and lower than 95 percentile are considered.'
    )
    gn.add_current_figure_to_results(caption, zoom=2, dpi=50)

    assay['matrix'] = transformed_matrix.tolist()
    gn.export_statically(assay, 'GLog transformed assay')

    gn.commit()
def main():
    gn = Granatum()

    sample_coords = gn.get_import("viz_data")
    df = gn.pandas_from_assay(gn.get_import("assay"))
    gene_ids = gn.get_arg("gene_ids")
    overlay_genes = gn.get_arg("overlay_genes")
    max_colors = gn.get_arg("max_colors")
    min_level = gn.get_arg("min_level")
    max_level = gn.get_arg("max_level")
    convert_to_zscore = gn.get_arg("convert_to_zscore")
    min_marker_area = gn.get_arg("min_marker_area")
    max_marker_area = gn.get_arg("max_marker_area")
    min_alpha = gn.get_arg("min_alpha")
    max_alpha = gn.get_arg("max_alpha")
    grey_level = gn.get_arg("grey_level")

    coords = sample_coords.get("coords")
    dim_names = sample_coords.get("dimNames")

    cmaps = []
    if overlay_genes:
        if max_colors == "":
            numcolors = len(gene_ids.split(','))
            cycol = cycle('bgrcmk')
            for i in range(numcolors):
                cmaps = cmaps + [
                    LinearSegmentedColormap("fire",
                                            produce_cdict(next(cycol),
                                                          grey=grey_level,
                                                          min_alpha=min_alpha,
                                                          max_alpha=max_alpha),
                                            N=256)
                ]
        else:
            for col in max_colors.split(','):
                col = col.strip()
                cmaps = cmaps + [
                    LinearSegmentedColormap("fire",
                                            produce_cdict(col,
                                                          grey=grey_level,
                                                          min_alpha=min_alpha,
                                                          max_alpha=max_alpha),
                                            N=256)
                ]

    else:
        if max_colors == "":
            cmaps = cmaps + [LinearSegmentedColormap("fire", cdict, N=256)]
        else:
            for col in max_colors.split(','):
                col = col.strip()
                cmaps = cmaps + [
                    LinearSegmentedColormap("fire",
                                            produce_cdict(col,
                                                          grey=grey_level,
                                                          min_alpha=min_alpha,
                                                          max_alpha=max_alpha),
                                            N=256)
                ]

    colorbar_height = 10
    plot_height = 650
    num_cbars = 1
    if overlay_genes:
        num_cbars = len(gene_ids.split(','))
    cbar_height_ratio = plot_height / (num_cbars * colorbar_height)
    fig, ax = plt.subplots(
        1 + num_cbars,
        1,
        gridspec_kw={'height_ratios': [cbar_height_ratio] + [1] * num_cbars})

    gene_index = -1
    for gene_id in gene_ids.split(','):
        gene_id = gene_id.strip()
        gene_index = gene_index + 1
        if gene_id in df.index:
            if not overlay_genes:
                plt.clf()
                fig, ax = plt.subplots(
                    1 + num_cbars,
                    1,
                    gridspec_kw={
                        'height_ratios': [cbar_height_ratio] + [1] * num_cbars
                    })

            transposed_df = df.T

            mean = transposed_df[gene_id].mean()
            stdev = transposed_df[gene_id].std(ddof=0)

            if convert_to_zscore:
                scatter_df = pd.DataFrame(
                    {
                        "x": [a[0] for a in coords.values()],
                        "y": [a[1] for a in coords.values()],
                        "value": (df.loc[gene_id, :] - mean) / stdev
                    },
                    index=coords.keys())
            else:
                scatter_df = pd.DataFrame(
                    {
                        "x": [a[0] for a in coords.values()],
                        "y": [a[1] for a in coords.values()],
                        "value": df.loc[gene_id, :]
                    },
                    index=coords.keys())

            values_df = np.clip(scatter_df["value"],
                                min_level,
                                max_level,
                                out=None)
            min_value = np.nanmin(values_df)
            max_value = np.nanmax(values_df)
            scaled_marker_size = (max_marker_area - min_marker_area) * (
                values_df - min_value) / (max_value -
                                          min_value) + min_marker_area
            scaled_marker_size = scaled_marker_size * scaled_marker_size
            # s = 5000 / scatter_df.shape[0]
            scatter = ax[0].scatter(
                x=scatter_df["x"],
                y=scatter_df["y"],
                s=scaled_marker_size,
                c=values_df,
                cmap=cmaps[gene_index % len(cmaps)])  #Amp_3.mpl_colormap)
            cbar = fig.colorbar(scatter,
                                cax=ax[1 + (gene_index % num_cbars)],
                                orientation='horizontal',
                                aspect=40)
            cbar.set_label(gene_id, rotation=0)

            ax[0].set_xlabel(dim_names[0])
            ax[0].set_ylabel(dim_names[1])

            if not overlay_genes:
                gn.add_current_figure_to_results(
                    "Scatter-plot of {} expression".format(gene_id), dpi=75)

        else:

            # if the gene ID entered is not present in the assay
            # Communicate it to the user and output a table of available gene ID's

            description = 'The selected gene is not present in the assay. See the step that generated the assay'
            genes_in_assay = pd.DataFrame(
                df.index.tolist(),
                columns=['Gene unavailable in assay: choose from below'])
            gn.add_pandas_df(genes_in_assay, description)
    if overlay_genes:
        gn.add_current_figure_to_results(
            "Scatter-plot of {} expression".format(gene_ids),
            height=650 + 100 * len(gene_ids.split(',')),
            dpi=75)

    gn.commit()
コード例 #15
0
def main():
    gn = Granatum()

    df = gn.pandas_from_assay(gn.get_import('assay'))
    alpha = gn.get_arg('alpha')

    jammit = JAMMIT.from_dfs([df])

    res = jammit.run_for_one_alpha(
        alpha,
        verbose=1,
        convergence_threshold=0.000000001,
    )

    u = res['u']
    v = res['v']

    gn.export(dict(zip(df.index, u)), 'Genes loadings', kind='geneMeta')
    gn.export(dict(zip(df.columns, v)), 'Sample scores', kind='sampleMeta')

    gene_df = pd.DataFrame({
        'id_': df.index,
        'abs_loading': abs(u),
        'loading': u
    })
    gene_df = gene_df[['id_', 'abs_loading', 'loading']]
    gene_df = gene_df.loc[gene_df['loading'].abs() > EPSILON]
    gene_df = gene_df.sort_values('abs_loading', ascending=False)

    gn.add_result(
        {
            'title': f"Signal genes ({len(gene_df)})",
            'orient': 'split',
            'columns': gene_df.columns.values.tolist(),
            'data': gene_df.values.tolist(),
        },
        data_type='table',
    )
    gn.export(gene_df.to_csv(index=False),
              'signal_genes.csv',
              kind='raw',
              meta=None,
              raw=True)

    sample_df = pd.DataFrame({
        'id_': df.columns,
        'abs_score': abs(v),
        'score': v
    })
    sample_df = sample_df[['id_', 'abs_score', 'score']]
    sample_df = sample_df.loc[sample_df['score'].abs() > EPSILON]
    sample_df = sample_df.sort_values('abs_score', ascending=False)

    gn.add_result(
        {
            'title': f"Signal samples ({len(sample_df)})",
            'orient': 'split',
            'columns': sample_df.columns.values.tolist(),
            'data': sample_df.values.tolist(),
        },
        data_type='table',
    )
    gn.export(sample_df.to_csv(index=False),
              'signal_samples.csv',
              kind='raw',
              meta=None,
              raw=True)

    subset_df = df.loc[gene_df['id_'], sample_df['id_']]
    gn.export(gn.assay_from_pandas(subset_df),
              'Assay with only signal genes and samples',
              kind='assay')

    sns.clustermap(subset_df, cmap='RdBu')
    gn.add_current_figure_to_results(
        description='Cluster map of the signal genes and signal samples',
        zoom=2,
        width=750,
        height=850,
        dpi=50,
    )
    plt.close()

    plt.figure()
    plt.scatter(range(len(u)), u, s=2, c='red')
    plt.xlabel('index')
    plt.ylabel('value in u')
    gn.add_current_figure_to_results(
        description=
        'The *u* vector (loadings for genes) plotted as a scatter plot.',
        zoom=2,
        width=750,
        height=450,
        dpi=50,
    )
    plt.close()

    plt.figure()
    plt.plot(range(len(v)), v)
    plt.scatter(range(len(v)), v, s=6, c='red')
    plt.xlabel('index')
    plt.ylabel('value in v')
    gn.add_current_figure_to_results(
        description=
        'The *v* vector (scores for samples) plotted as a line plot.',
        zoom=2,
        width=750,
        height=450,
        dpi=50,
    )
    plt.close()

    # gn.export_current_figure(
    #     'cluster_map.pdf',
    #     zoom=2,
    #     width=750,
    #     height=850,
    #     dpi=50,
    # )

    gn.commit()
コード例 #16
0
def main():
    tic = time.perf_counter()

    gn = Granatum()
    sample_coords = gn.get_import("viz_data")
    value = gn.get_import("value")
    coloring_type = gn.get_arg("coloring_type")
    bounding_stdev = gn.get_arg("bounding_stdev")
    label_location = gn.get_arg("label_location")
    label_transform = gn.get_arg("label_transform")
    labelXaxis = gn.get_arg("labelXaxis")
    labelYaxis = gn.get_arg("labelYaxis")
    sigfigs = gn.get_arg("sigfigs")
    numticks = gn.get_arg("numticks")
    font = gn.get_arg('font')

    coords = sample_coords.get("coords")
    dim_names = sample_coords.get("dimNames")
    seed = gn.get_arg('random_seed')
    random.seed(seed)
    np.random.seed(seed)

    df = pd.DataFrame(
        {
            "x": [a[0] for a in coords.values()],
            "y": [a[1] for a in coords.values()],
            "value": pd.Series(value)
        },
        index=coords.keys())

    target_dpi = 300
    target_width = 7.5  # inches
    target_height = 6.5  # inches
    font_size_in_in = font / 72.0  # inches
    font_size_in_px = font_size_in_in * target_dpi

    try:

        if coloring_type == "categorical":
            uniq = df["value"].unique()
            uniq.sort(kind="stable")
            num = uniq.shape[0]
            COLORS2 = plt.get_cmap('gist_rainbow')
            carr = [0] * df.shape[0]
            listcats = list(df["value"])
            miny = min(list(df["y"]))
            maxy = max(list(df["y"]))
            scaley = (maxy - miny) / (target_height * target_dpi)
            print("Scaley = {}".format(scaley))
            colorhash = {}
            colorstep = np.ceil(256.0 / num)
            coffset = randrange(colorstep)
            grouptocolor = np.random.choice(np.arange(num), num, replace=False)

            for i, cat in enumerate(uniq):
                dff = df[df["value"] == cat]
                xs = list(dff["x"])
                ys = list(dff["y"])
                #avgx = sum(dff["x"]) / len(dff["x"])
                #avgy = sum(dff["y"]) / len(dff["y"])
                #plt.scatter(x=dff["x"], y=dff["y"], s=5000 / df.shape[0], c=COLORS[i].hex_l, label=cat)
                #plt.scatter(x=dff["x"], y=dff["y"], s=5000 / df.shape[0], c=[abs(hash(cat)) % 256]*len(dff["x"]), cmap=COLORS2, label=cat)
                #plt.scatter(x=dff["x"], y=dff["y"], s=5000 / df.shape[0], c=abs(hash(cat)) % 256, cmap=COLORS2, label=cat)
                #abs(hash(cat))
                colorindex = (coffset + grouptocolor[i] * colorstep) % 256
                colorhash[cat] = colorindex
                craw = COLORS2((colorindex + 0.0) / 256.0)
                clr = [craw[0], craw[1], craw[2], 0.2]
                whitetransparent = [1.0, 1.0, 1.0, 0.5]
                coloropaque = [craw[0], craw[1], craw[2], 1.0]
                if len(xs) > 3:
                    pts = list(zip(xs, ys))
                    cent = np.mean(pts, axis=0)
                    lengs = list(
                        map(
                            lambda p: math.sqrt(
                                (p[0] - cent[0]) * (p[0] - cent[0]) +
                                (p[1] - cent[1]) * (p[1] - cent[1])), pts))
                    avgleng = st.mean(lengs)
                    stdleng = st.stdev(lengs) * bounding_stdev
                    rpts = []
                    if (stdleng > 0.0):
                        for j, ln in enumerate(lengs):
                            if (ln - avgleng < stdleng):
                                rpts.append(pts[j])
                        pts = rpts
                    cent = np.mean(pts, axis=0)
                    hull = ConvexHull(pts)
                    ptslist = []
                    for pt in hull.simplices:
                        ptslist.append(pts[pt[0]])
                        ptslist.append(pts[pt[1]])
                    ptslist.sort(key=lambda p: np.arctan2(
                        p[1] - cent[1], p[0] - cent[0]))
                    ptslist = ptslist[0::2]
                    ptslist.insert(len(ptslist), ptslist[0])
                    lowestpt = ptslist[0]
                    if label_location == 'bottom':
                        for pt in ptslist:
                            if (pt[1] < lowestpt[1]):
                                lowestpt = pt
                    else:
                        lowestpt = ptslist[randrange(len(ptslist))]
                    if (bounding_stdev >= 0.0):
                        poly = Polygon(1.1 * (np.array(ptslist) - cent) + cent,
                                       facecolor=clr)
                        poly.set_capstyle('round')
                        plt.gca().add_patch(poly)
                        poly.set_color(clr)
                    label_text = cat
                    if label_transform == "numbers":
                        label_text = re.sub("[^0-9]", "", cat)
                    txt = plt.text(lowestpt[0],
                                   lowestpt[1] -
                                   scaley * font_size_in_px * 1.2,
                                   label_text,
                                   fontsize=font,
                                   fontname="Arial",
                                   ha="center",
                                   va="center",
                                   color="black",
                                   bbox=dict(boxstyle="round",
                                             fc=whitetransparent,
                                             ec=coloropaque))
                    # plt.gca().add_artist(txt)
                for j, x in enumerate(listcats):
                    if x == cat:
                        carr[j] = colorhash[cat]
                        #carr[j] = colorhash[cat] / 256.0
                        #int(abs(hash(cat)) % 256)

            plt.scatter(x=df["x"],
                        y=df["y"],
                        s=5000 / df.shape[0],
                        c=carr,
                        cmap=COLORS2)
            lgd = plt.legend(markerscale=6,
                             loc='upper center',
                             bbox_to_anchor=(0.5, -0.05),
                             ncol=5)
    #60 / (5000 / df.shape[0])
        elif coloring_type == "continuous":
            plt.scatter(x=df["x"],
                        y=df["y"],
                        s=5000 / df.shape[0],
                        c=df["value"],
                        cmap="Reds")
            plt.colorbar()

        xmin, xmax = plt.gca().get_xlim()
        ymin, ymax = plt.gca().get_ylim()
        # stepsizex=(xmax-xmin)/numticks
        # stepsizey=(ymax-ymin)/numticks
        xtickArray = resetArray(xmin, xmax, numticks, sigfigs)
        ytickArray = resetArray(ymin, ymax, numticks, sigfigs)
        # plt.xticks(np.arange(xmin, xmax+stepsizex, step=stepsizex), fontsize=font, fontname="Arial")
        # plt.yticks(np.arange(ymin, ymax+stepsizey, step=stepsizey), fontsize=font, fontname="Arial")
        plt.xlim(xtickArray[0], xtickArray[-1])
        plt.ylim(ytickArray[0], ytickArray[-1])
        plt.xticks(xtickArray, fontsize=font, fontname="Arial")
        plt.yticks(ytickArray, fontsize=font, fontname="Arial")
        if labelXaxis == "":
            plt.xlabel(dim_names[0], fontsize=font, fontname="Arial")
        else:
            plt.xlabel(labelXaxis, fontsize=font, fontname="Arial")

        if labelYaxis == "":
            plt.ylabel(dim_names[1], fontsize=font, fontname="Arial")
        else:
            plt.ylabel(labelYaxis, fontsize=font, fontname="Arial")

        # plt.tight_layout()

        gn.add_current_figure_to_results(
            "Scatter-plot",
            dpi=target_dpi,
            width=target_width * target_dpi,
            height=target_height * target_dpi,
            savefig_kwargs={'bbox_inches': 'tight'})

        toc = time.perf_counter()
        time_passed = round(toc - tic, 2)

        timing = "* Finished sample coloring step in {} seconds*".format(
            time_passed)
        gn.add_result(timing, "markdown")

        gn.commit()

    except Exception as e:

        plt.figure()
        plt.text(
            0.05, 0.7,
            'Values used as colors and type of sample metadata are incompatible with each other'
        )

        if coloring_type == 'categorical':
            new_coloring_type = 'continuous'
        else:
            new_coloring_type = 'categorical'

        plt.text(
            0.05, 0.5, 'Retry the step with ' + new_coloring_type +
            ' instead of ' + coloring_type)
        plt.axis('off')
        gn.add_current_figure_to_results('Scatter-plot')

        gn.commit()
コード例 #17
0
def main():

    tic = time.perf_counter()

    gn = Granatum()

    assay = gn.get_import('assay')
    sample_ids = assay.get('sampleIds')
    group_dict = gn.get_import('groupVec')
    group_vec = pd.Categorical([group_dict.get(x) for x in sample_ids])
    num_groups = len(group_vec.categories)
    figheight = 400 * (math.floor((num_groups - 1) / 7) + 1)

    adata = sc.AnnData(np.array(assay.get('matrix')).transpose())
    adata.var_names = assay.get('geneIds')
    adata.obs_names = assay.get('sampleIds')
    adata.obs['groupVec'] = group_vec

    sc.pp.neighbors(adata, n_neighbors=20, use_rep='X', method='gauss')

    try:

        sc.tl.rank_genes_groups(adata, 'groupVec', n_genes=100000)
        sc.pl.rank_genes_groups(adata, n_genes=20)
        gn.add_current_figure_to_results('One-vs-rest marker genes',
                                         dpi=75,
                                         height=figheight)

        gn._pickle(adata, 'adata')

        rg_res = adata.uns['rank_genes_groups']

        for group in rg_res['names'].dtype.names:
            genes_names = [str(x[group]) for x in rg_res['names']]
            scores = [float(x[group]) for x in rg_res['scores']]
            newdict = dict(zip(genes_names, scores))
            gn.export(newdict,
                      'Marker score ({} vs. rest)'.format(group),
                      kind='geneMeta')
            newdictstr = [
                '"' + str(k) + '"' + ", " + str(v) for k, v in newdict.items()
            ]
            gn.export("\n".join(newdictstr),
                      'Marker score {} vs rest.csv'.format(group),
                      kind='raw',
                      meta=None,
                      raw=True)

        # cluster_assignment = dict(zip(adata.obs_names, adata.obs['louvain'].values.tolist()))
        # gn.export_statically(cluster_assignment, 'cluster_assignment')

        toc = time.perf_counter()
        time_passed = round(toc - tic, 2)

        timing = "* Finished marker gene identification step in {} seconds*".format(
            time_passed)
        gn.add_result(timing, "markdown")

        gn.commit()

    except Exception as e:

        plt.figure()
        plt.text(0.01, 0.5,
                 'Incompatible group vector due to insufficent cells')
        plt.text(0.01, 0.3,
                 'Please retry the step with a different group vector')
        plt.axis('off')
        gn.add_current_figure_to_results('One-vs-rest marker genes')
        gn.add_result('Error = {}'.format(e), "markdown")

        gn.commit()