예제 #1
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')

    matrix = np.array(assay.get('matrix'))

    transformed_matrix = matrix - matrix.mean(axis=1, keepdims=True)
    assay['matrix'] = transformed_matrix.tolist()

    plot_distribution_comparison(matrix, transformed_matrix, gn)

    gn.export_statically(assay, 'Gene centered assay')

    gn.commit()
예제 #2
0
def main():
    gn = Granatum()

    sample_meta_true = gn.get_import("sample_meta_true")
    sample_meta_predicted = gn.get_import("sample_meta_predicted")

    # Using pandas series to align the two metas in case they have different sample IDs
    rand_score = adjusted_rand_score(pd.Series(sample_meta_true),
                                     pd.Series(sample_meta_predicted))
    mutual_info_score = adjusted_mutual_info_score(
        pd.Series(sample_meta_true), pd.Series(sample_meta_predicted))

    results_markdown = "\n".join([
        "Adjusted Rand score: **{}**".format(rand_score),
        "",
        "Adjusted mutual information score: **{}**".format(mutual_info_score),
    ])

    gn.add_result(results_markdown, "markdown")
    gn.commit()
예제 #3
0
def main():
    gn = Granatum()

    adata = gn.ann_data_from_assay(gn.get_import("assay"))
    num_top_comps = gn.get_arg("num_top_comps")

    sc.pp.pca(adata, 20)

    variance_ratios = adata.uns["pca"]["variance_ratio"]
    pc_labels = ["PC{}".format(x + 1) for x in range(len(variance_ratios))]

    plt.figure()
    plt.bar(pc_labels, variance_ratios)
    plt.tight_layout()
    gn.add_current_figure_to_results(
        "Explained variance (ratio) by each Principal Component (PC)",
        height=350,
        dpi=75)

    X_pca = adata.obsm["X_pca"]

    for i, j in combinations(range(num_top_comps), 2):
        xlabel = "PC{}".format(i + 1)
        ylabel = "PC{}".format(j + 1)

        plt.figure()
        plt.scatter(X_pca[:, i], X_pca[:, j], s=5000 / adata.shape[0])
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.tight_layout()
        gn.add_current_figure_to_results("PC{} vs. PC{}".format(i + 1, j + 1),
                                         dpi=75)

        pca_export = {
            "dimNames": [xlabel, ylabel],
            "coords": {
                sample_id: X_pca[k, [i, j]].tolist()
                for k, sample_id in enumerate(adata.obs_names)
            },
        }
        gn.export(pca_export,
                  "PC{} vs. PC{}".format(i + 1, j + 1),
                  kind="sampleCoords",
                  meta={})

    gn.commit()
def main():
    gn = Granatum()

    adata = gn.ann_data_from_assay(gn.get_import("assay"))
    num_cells_to_sample = gn.get_arg("num_cells_to_sample")
    random_seed = gn.get_arg("random_seed")

    np.random.seed(random_seed)

    num_cells_before = adata.shape[0]
    num_genes_before = adata.shape[1]

    if num_cells_to_sample > 0 and num_cells_to_sample < 1:
        num_cells_to_sample = round(num_cells_before * num_cells_to_sample)
    else:

        num_cells_to_sample = round(num_cells_to_sample)

    if num_cells_to_sample > num_cells_before:
        num_cells_to_sample = num_cells_before

    if num_cells_to_sample < 1:
        num_cells_to_sample = 1

    sampled_cells_idxs = np.sort(np.random.choice(num_cells_before, num_cells_to_sample, replace=False))

    adata = adata[sampled_cells_idxs, :]

    gn.add_result(
        "\n".join(
            [
                "The assay before down-sampling has **{}** cells and {} genes.".format(
                    num_cells_before, num_genes_before
                ),
                "",
                "The assay after down-sampling has **{}** cells and {} genes.".format(adata.shape[0], adata.shape[1]),
            ]
        ),
        type="markdown",
    )

    gn.export(gn.assay_from_ann_data(adata), "Down-sampled Assay", dynamic=False)

    gn.commit()
예제 #5
0
def main():
    gn = Granatum()

    df = gn.pandas_from_assay(gn.get_import('assay'))
    n_steps = gn.get_arg('n_steps')
    min_theta = gn.get_arg('min_theta')
    max_theta = gn.get_arg('max_theta')

    jammit = JAMMIT.from_dfs([df])

    jammit.scan(
        thetas=np.linspace(min_theta, max_theta, n_steps),
        calculate_fdr=True,
        n_perms=10,
        verbose=1,
        convergence_threshold=0.000000001,
    )

    jammit_result = jammit.format(columns=['theta', 'alpha', 'n_sigs', 'fdr'])
    jammit_result['theta'] = jammit_result['theta'].round(3)
    jammit_result['alpha'] = jammit_result['alpha'].round(3)

    plt.plot(jammit_result['alpha'], jammit_result['fdr'])
    plt.xlabel('alpha')
    plt.ylabel('FDR')
    gn.add_current_figure_to_results('FDR plotted against alpha', height=400)

    gn.add_result(
        {
            'pageSize':
            n_steps,
            'orient':
            'split',
            'columns': [{
                'name': h,
                'type': 'number',
                'round': 3
            } for h in jammit_result.columns],
            'data':
            jammit_result.values.tolist(),
        },
        data_type='table',
    )

    gn.commit()
예제 #6
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')

    args_for_init = {
        'selected_embedding': gn.get_arg('selectedEmbedding'),
        'selected_clustering': gn.get_arg('selectedClustering'),
        'n_components': gn.get_arg('nComponents'),
        'n_clusters': gn.get_arg('nClusters'),
        'find_best_number_of_cluster': gn.get_arg('findBestNumberOfCluster'),
    }

    args_for_fit = {
        'matrix': np.transpose(np.array(assay.get('matrix'))),
        'sample_ids': assay.get('sampleIds'),
    }

    granatum_clustering = GranatumDeepClustering(**args_for_init)
    fit_results = granatum_clustering.fit(**args_for_fit)

    fit_exp = fit_results.get('clusters')
    gn.export_statically(fit_exp, 'Cluster assignment')
    newdictstr = ['"'+str(k)+'"'+", "+str(v) for k, v in fit_exp.items()]
    gn.export("\n".join(newdictstr), 'Cluster assignment.csv', kind='raw', meta=None, raw=True)

    md_str = f"""\
## Results

  * Cluster array: `{fit_results.get('clusters_array')}`
  * Cluster array: `{fit_results.get('clusters_array')}`
  * nClusters: {fit_results.get('n_clusters')}
  * Number of components: {fit_results.get('n_components')}
  * Outliers: {fit_results.get('outliers')}"""
    # gn.add_result(md_str, 'markdown')

    gn.add_result(
        {
            'orient': 'split',
            'columns': ['Sample ID', 'Cluster Assignment'],
            'data': [{'Sample ID':x, 'Cluster Assignment':y} for x, y in zip(assay.get('sampleIds'), fit_results.get('clusters_array'))],
        },
        'table',
    )

    gn.commit()
예제 #7
0
def main():
    gn = Granatum()

    tb1 = gn.pandas_from_assay(gn.get_import('assay1'))
    tb2 = gn.pandas_from_assay(gn.get_import('assay2'))
    label1 = gn.get_arg('label1')
    label2 = gn.get_arg('label2')
    direction = gn.get_arg('direction')
    normalization = gn.get_arg('normalization')

    if direction == 'samples':
        tb1 = tb1.T
        tb2 = tb2.T

    overlapped_index = set(tb1.index) & set(tb2.index)
    tb1.index = [
        f"{label1}_{x}" if x in overlapped_index else x for x in tb1.index
    ]
    tb2.index = [
        f"{label2}_{x}" if x in overlapped_index else x for x in tb2.index
    ]

    if normalization == 'none':
        tb = pd.concat([tb1, tb2], axis=0)
    elif normalization == 'frobenius':
        ntb1 = np.linalg.norm(tb1)
        ntb2 = np.linalg.norm(tb2)
        ntb = np.mean([ntb1, ntb2])
        fct1 = ntb / ntb1
        fct2 = ntb / ntb2
        tb = pd.concat([tb1 * fct1, tb2 * fct2], axis=0)
        gn.add_markdown(f"""\

Normalization info:

  - Assay **{label1}** is multiplied by {fct1}
  - Assay **{label2}** is multiplied by {fct2}
""")
    elif normalization == 'mean':
        ntb1 = np.mean(tb1)
        ntb2 = np.mean(tb2)
        ntb = np.mean([ntb1, ntb2])
        fct1 = ntb / ntb1
        fct2 = ntb / ntb2
        tb = pd.concat([tb1 * fct1, tb2 * fct2], axis=0)

        gn.add_markdown(f"""\

Normalization info:",

  - Assay **{label1}** is multiplied by {fct1}
  - Assay **{label2}** is multiplied by {fct2}
""")
    else:
        raise ValueError()

    if direction == 'samples':
        tb = tb.T

    gn.add_markdown(f"""\
You combined the following assays:

  - Assay 1 (with {tb1.shape[0]} genes and {tb1.shape[1]} cells)
  - Assay 2 (with {tb2.shape[0]} genes and {tb2.shape[1]} cells)

into:

  - Combined Assay (with {tb.shape[0]} genes and {tb.shape[1]} cells)
""")

    gn.export_statically(gn.assay_from_pandas(tb), 'Combined assay')

    if direction == 'samples':
        meta_type = 'sampleMeta'
    elif direction == 'genes':
        meta_type = 'geneMeta'
    else:
        raise ValueError()

    gn.export(
        {
            **{x: label1
               for x in tb1.index},
            **{x: label2
               for x in tb2.index}
        }, 'Assay label', meta_type)

    gn.commit()
예제 #8
0
def main():
    gn = Granatum()
    assay_df = gn.pandas_from_assay(gn.get_import('assay'))
    grdict = gn.get_import('groupVec')
    phe_dict = pd.Series(gn.get_import('groupVec'))
    groups = set(parse(gn.get_arg('groups')))

    inv_map = {}
    for k, v in grdict.items():
        if v in groups:
            inv_map[v] = inv_map.get(v, []) + [k]
    cells = []
    for k, v in inv_map.items():
        cells.extend(v)
    assay_df = assay_df.loc[:, cells]
    assay_df = assay_df.sparse.to_dense().fillna(0)
    #assay_mat = r['as.matrix'](pandas2ri.py2ri(assay_df))
    # assay_mat = r['as.matrix'](conversion.py2rpy(assay_df))
    phe_vec = phe_dict[assay_df.columns]

    r.source('./drive_DESeq2.R')
    ret_r = r['run_DESeq'](assay_df, phe_vec)
    ret_r_as_df = r['as.data.frame'](ret_r)

    # ret_py_df = pandas2ri.ri2py(ret_r_as_df)
    # TODO: maybe rename the columns to be more self-explanatory?
    result_df = ret_r_as_df
    result_df = result_df.sort_values('padj')
    result_df.index.name = 'gene'
    gn.add_pandas_df(result_df.reset_index(),
                     description='The result table as returned by DESeq2.')
    gn.export(result_df.to_csv(), 'DESeq2_results.csv', raw=True)
    significant_genes = result_df.loc[
        result_df['padj'] < 0.05]['log2FoldChange'].to_dict()
    gn.export(significant_genes, 'Significant genes', kind='geneMeta')
    gn.commit()
예제 #9
0
def main():
    tic = time.perf_counter()

    gn = Granatum()
    assay = gn.pandas_from_assay(gn.get_import('assay'))
    groups = gn.get_import('groups')
    reflabels = gn.get_import('reflabels')
    remove_cells = gn.get_arg('remove_cells')

    inv_map = {}
    for k, v in groups.items():
        inv_map[v] = inv_map.get(v, []) + [k]

    inv_map_ref = {}
    for k, v in reflabels.items():
        inv_map_ref[v] = inv_map_ref.get(v, []) + [k]

    group_relabel = {}
    mislabelled_cells = []
    for k, v in inv_map.items():
        vset = set(v)
        label_scores = {}
        for kref, vref in inv_map_ref.items():
            label_scores[kref] = len(set(vref).intersection(vset))
        group_relabel[k] = max(label_scores, key=label_scores.get)
        mislabelled_cells = mislabelled_cells + list(
            vset.difference(set(inv_map_ref[group_relabel[k]])))

    if remove_cells:
        gn.add_result(
            "Dropping {} mislabelled cells".format(len(mislabelled_cells)),
            "markdown")
        assay = assay.drop(mislabelled_cells, axis=1)
        groups = {
            key: val
            for key, val in groups.items() if not key in mislabelled_cells
        }

    for cell in groups:
        groups[cell] = group_relabel[groups[cell]]

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    gn.export_statically(gn.assay_from_pandas(assay), "Corresponded assay")
    gn.export_statically(groups, "Corresponded labels")

    timing = "* Finished sample coloring step in {} seconds*".format(
        time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
예제 #10
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')
    matrix = np.array(assay.get('matrix'))

    log_base = gn.get_arg('logBase')
    pseudo_counts = gn.get_arg('pseudoCounts')

    transformed_matrix = np.log(matrix + pseudo_counts) / np.log(log_base)

    non_zero_values_before = matrix.flatten()
    # non_zero_values_before = non_zero_values_before[(non_zero_values_before > np.percentile(non_zero_values_before, 5)) &
    #                                                 (non_zero_values_before < np.percentile(non_zero_values_before, 95))]
    non_zero_values_before = non_zero_values_before[(non_zero_values_before > np.percentile(non_zero_values_before, 5))]

    non_zero_values_after = transformed_matrix.flatten()
    # non_zero_values_after = non_zero_values_after[(non_zero_values_after > np.percentile(non_zero_values_after, 5)) &
    #                                               (non_zero_values_after < np.percentile(non_zero_values_after, 95))]
    non_zero_values_after = non_zero_values_after[(non_zero_values_after > np.percentile(non_zero_values_after, 5))]

    plt.figure()

    plt.subplot(2, 1, 1)
    plt.title('Before log transformation')
    plt.hist(non_zero_values_before, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.subplot(2, 1, 2)
    plt.title('After log transformation')
    plt.hist(non_zero_values_after, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.tight_layout()

    caption = (
        'The distribution of expression level before and after log transformation. Only the values greater '
        'than the 5 percentile (usually zero in single-cell data) and lower than 95 percentile are considered.'
    )
    gn.add_current_figure_to_results(caption, zoom=2, dpi=50)

    assay['matrix'] = transformed_matrix.tolist()
    gn.export_statically(assay, 'Log transformed assay')

    gn.commit()
예제 #11
0
def main():
    tic = time.perf_counter()

    gn = Granatum()
    assay = gn.pandas_from_assay(gn.get_import('assay'))
    groups = gn.get_import('groups')

    inv_map = {}
    for k, v in groups.items():
        inv_map[v] = inv_map.get(v, []) + [k]

    drop_set = parse(gn.get_arg('drop_set'))
    merge_set_1 = parse(gn.get_arg('merge_set_1'))
    merge_set_2 = parse(gn.get_arg('merge_set_2'))
    merge_set_3 = parse(gn.get_arg('merge_set_3'))
    relabel_set_1 = gn.get_arg('relabel_set_1')
    relabel_set_2 = gn.get_arg('relabel_set_2')
    relabel_set_3 = gn.get_arg('relabel_set_3')

    if len(merge_set_1) > 0:
        if relabel_set_1 == "":
            relabel_set_1 = " + ".join(merge_set_1)

    if len(merge_set_2) > 0:
        if relabel_set_2 == "":
            relabel_set_2 = " + ".join(merge_set_2)

    if len(merge_set_3) > 0:
        if relabel_set_3 == "":
            relabel_set_3 = " + ".join(merge_set_3)

    try:
        for ds in drop_set:
            cells = inv_map[ds]
            gn.add_result(
                "Dropping {} cells that match {}".format(len(cells), ds),
                "markdown")
            assay = assay.drop(cells, axis=1)
            groups = {key: val for key, val in groups.items() if val != ds}
    except Exception as e:
        gn.add_result(
            "Error found in drop set, remember it should be comma separated: {}"
            .format(e), "markdown")

    try:
        if len(merge_set_1) > 0:
            merge_set_1_cells = []
            for ms1 in merge_set_1:
                merge_set_1_cells = merge_set_1_cells + inv_map[ms1]
            for cell in merge_set_1_cells:
                groups[cell] = relabel_set_1

        if len(merge_set_2) > 0:
            merge_set_2_cells = []
            for ms2 in merge_set_2:
                merge_set_2_cells = merge_set_2_cells + inv_map[ms2]
            for cell in merge_set_2_cells:
                groups[cell] = relabel_set_2

        if len(merge_set_3) > 0:
            merge_set_3_cells = []
            for ms3 in merge_set_3:
                merge_set_3_cells = merge_set_3_cells + inv_map[ms3]
            for cell in merge_set_3_cells:
                groups[cell] = relabel_set_3
    except Exception as e:
        gn.add_result(
            "Error found in merge sets, remember it should be comma separated: {}"
            .format(e), "markdown")

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    gn.export_statically(gn.assay_from_pandas(assay), "Label adjusted assay")
    gn.export_statically(groups, "Adjusted labels")

    timing = "* Finished sample coloring step in {} seconds*".format(
        time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
예제 #12
0
def main():
    tic = time.perf_counter()

    gn = Granatum()

    df = gn.pandas_from_assay(gn.get_import('assay'))
    mingenes = gn.get_arg('min_genes_per_cell')
    maxgenes = gn.get_arg('max_genes_per_cell')
    mt_percent = gn.get_arg('mt_genes_percent')/100.0

    uniquegenecount = df.astype(bool).sum(axis=0)
    totalgenecount = df.sum(axis=0)
    mtrows = df[df.index.str.startswith('MT')]
    mtgenecount = mtrows.sum(axis=0)
    mtpercent = mtgenecount.div(totalgenecount)
    colsmatching = uniquegenecount.T[(uniquegenecount.T >= mingenes) & (uniquegenecount.T <= maxgenes) & (mtpercent.T <= mt_percent)].index.values
    adata = df.loc[:, colsmatching]

    num_orig_cells = uniquegenecount.T.index.size
    num_filtered_cells = len(colsmatching)

    num_lt_min = uniquegenecount.T[(uniquegenecount.T < mingenes)].index.size
    num_gt_max = uniquegenecount.T[(uniquegenecount.T > maxgenes)].index.size
    num_gt_mt = uniquegenecount.T[(mtpercent.T > mt_percent)].index.size

    gn.add_result("Number of cells is now {} out of {} original cells with {} below min genes, {} above max genes, and {} above mt percentage threshold.".format(num_filtered_cells, num_orig_cells, num_lt_min, num_gt_max, num_gt_mt), "markdown")

    plt.figure()

    plt.subplot(2, 1, 1)
    plt.title('Unique gene count distribution')
    sns.distplot(uniquegenecount, bins=int(200), color = 'darkblue', kde_kws={'linewidth': 2})
    plt.ylabel('Frequency')
    plt.xlabel('Gene count')

    plt.subplot(2, 1, 2)
    plt.title('MT Percent Distribution')
    sns.distplot(mtpercent*100.0, bins=int(200), color = 'darkblue', kde_kws={'linewidth': 2})
    plt.ylabel('Frequency')
    plt.xlabel('MT Percent')

    plt.tight_layout()

    caption = (
        'The distribution of expression levels for each cell with various metrics.'
    )
    gn.add_current_figure_to_results(caption, zoom=1, dpi=75)

    gn.export(gn.assay_from_pandas(adata), "Filtered Cells Assay", dynamic=False)

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    timing = "* Finished cell filtering step in {} seconds*".format(time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
def main():
    tic = time.perf_counter()

    gn = Granatum()

    clustersvsgenes = gn.pandas_from_assay(gn.get_import('clustersvsgenes'))
    gset_group_id = gn.get_arg('gset_group_id')
    min_zscore = gn.get_arg('min_zscore')

    clustercomparisonstotest = list(clustersvsgenes.index)

    # Load all gene sets
    gsets = load_gsets(gset_group_id)

    G = nx.MultiDiGraph()
    clusternames = list(clustersvsgenes.T.columns)
    individualclusters = [
        n[:n.index(" vs rest")] for n in clusternames if n.endswith("vs rest")
    ]
    print(individualclusters, flush=True)
    for cl in individualclusters:
        G.add_node(cl)

    # {pathway : {"cluster1":score1, "cluster2":score2}, pathway2 : {}}
    resultsmap = {}
    relabels = {}
    keys = {}
    urlsforkeys = {}
    currentkeyindex = 0
    for gset in gsets:
        urlsforkeys[gset["name"]] = gset["url"]
        for cluster in clustercomparisonstotest:
            try:
                resultdf = clustersvsgenes.loc[cluster, gset["gene_ids"]]
                resultdf = np.nan_to_num(resultdf)
                score = np.nanmean(resultdf)
                if score >= min_zscore:
                    keys[gset["name"]] = keys.get(gset["name"],
                                                  currentkeyindex + 1)
                    print("Score = {}".format(score), flush=True)
                    olddict = resultsmap.get(gset["name"], {})
                    olddict[cluster] = score
                    resultsmap[gset["name"]] = olddict
                    from_to = re.split(' vs ', cluster)
                    if from_to[1] != 'rest':
                        G.add_weighted_edges_from(
                            [(from_to[1], from_to[0], score * 2.0)],
                            label=str(keys[gset["name"]]),
                            penwidth=str(score * 2.0))
                    else:
                        relabel_dict = relabels.get(from_to[0], "")
                        if relabel_dict == "":
                            relabel_dict = from_to[0] + ": " + str(
                                keys[gset["name"]])
                        else:
                            relabel_dict = relabel_dict + ", " + str(
                                keys[gset["name"]])
                        relabels[from_to[0]] = relabel_dict
                    currentkeyindex = max(currentkeyindex, keys[gset["name"]])
            except Exception as inst:
                print("Key error with {}".format(gset["name"]), flush=True)
                print("Exception: {}".format(inst), flush=True)

    print("Relabels {}".format(relabels), flush=True)
    G = nx.relabel_nodes(G, relabels)
    pos = nx.spring_layout(G)
    edge_labels = nx.get_edge_attributes(G, 'label')
    write_dot(G, 'plot.dot')
    os.system("dot plot.dot -Tpng -Gdpi=600 > plot.png")
    with open('plot.png', "rb") as f:
        image_b64 = b64encode(f.read()).decode("utf-8")

    gn.results.append({
        "type": "png",
        "width": 650,
        "height": 480,
        "description": 'Network of clusters based on expression',
        "data": image_b64,
    })

    footnote = ""
    for k, v in sorted(keys.items(), key=lambda item: item[1]):
        newstr = "{}: [{}]({})".format(v, k, urlsforkeys[k])
        if footnote == "":
            footnote = newstr
        else:
            footnote = footnote + "  \n" + newstr

    gn.add_result(footnote, "markdown")

    # gn.export(return_df.T.to_csv(), 'differential_gene_sets.csv', kind='raw', meta=None, raw=True)

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    timing = "* Finished differential expression sets step in {} seconds*".format(
        time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
def main():
    tic = time.perf_counter()

    gn = Granatum()

    clustersvsgenes = gn.pandas_from_assay(gn.get_import('clustersvsgenes'))
    max_dist = gn.get_arg('max_dist')
    min_zscore = gn.get_arg('min_zscore')

    clustercomparisonstotest = list(clustersvsgenes.index)

    G = nx.MultiDiGraph()
    clusternames = list(clustersvsgenes.T.columns)
    individualclusters = [
        n[:n.index(" vs rest")] for n in clusternames if n.endswith("vs rest")
    ]
    print(individualclusters, flush=True)
    for cl in individualclusters:
        G.add_node(cl)

    # {pathway : {"cluster1":score1, "cluster2":score2}, pathway2 : {}}
    # resultsmap = {}
    relabels = {}
    keys = {}
    currentkeyindex = 0
    maxexpression = np.max(np.max(clustersvsgenes))
    print("Max expression = {}".format(maxexpression))
    print("Number to analyze = {}".format(
        len(clustersvsgenes.columns) * len(clustercomparisonstotest)),
          flush=True)
    gene_count = 0
    for gene_id in clustersvsgenes.columns:
        gene_count = gene_count + 1
        print("Genecount = {}/{}".format(gene_count,
                                         len(clustersvsgenes.columns)),
              flush=True)
        add_all_edges_for_current_gene = True
        for cluster in clustercomparisonstotest:
            score = clustersvsgenes.loc[cluster, gene_id]
            if score >= min_zscore:
                add_edges = True
                if not gene_id in keys:
                    # First check if within distance of another group
                    closestkey = None
                    closestkeyvalue = 1.0e12
                    for key in keys:
                        gene_values = clustersvsgenes.loc[:, gene_id]
                        ref_values = clustersvsgenes.loc[:, key]
                        sc = np.sqrt(
                            np.nansum(np.square(gene_values - ref_values)) /
                            len(gene_values))
                        if sc <= max_dist and sc < closestkeyvalue:
                            closestkeyvalue = sc
                            closestkey = key
                            break
                    if closestkey == None:
                        keys[gene_id] = currentkeyindex + 1
                    else:
                        keys[gene_id] = keys[closestkey]
                        add_edges = False
                        add_all_edges_for_current_gene = False
                        print("Found a near gene: {}".format(closestkey),
                              flush=True)
                else:
                    add_edges = add_all_edges_for_current_gene
                # print("Score = {}".format(score), flush=True)
                # olddict = resultsmap.get(gene_id, {})
                # olddict[cluster] = score
                # resultsmap[gene_id] = olddict
                if add_edges:
                    from_to = re.split(' vs ', cluster)
                    if from_to[1] != 'rest':
                        G.add_weighted_edges_from(
                            [(from_to[1], from_to[0],
                              score / maxexpression * 1.0)],
                            label=str(keys[gene_id]),
                            penwidth=str(score / maxexpression * 1.0))
                    else:
                        relabel_dict = relabels.get(from_to[0], "")
                        if relabel_dict == "":
                            relabel_dict = from_to[0] + ": " + str(
                                keys[gene_id])
                        else:
                            relabel_dict = relabel_dict + ", " + str(
                                keys[gene_id])
                        relabels[from_to[0]] = relabel_dict
                currentkeyindex = max(currentkeyindex, keys[gene_id])

    print("Relabels {}".format(relabels), flush=True)
    G = nx.relabel_nodes(G, relabels)
    pos = nx.spring_layout(G)
    edge_labels = nx.get_edge_attributes(G, 'label')
    write_dot(G, 'plot.dot')
    os.system('dot plot.dot -Kcirco -Tpng -Gsize="6,6" -Gdpi=600 > plot.png')
    with open('plot.png', "rb") as f:
        image_b64 = b64encode(f.read()).decode("utf-8")

    gn.results.append({
        "type": "png",
        "width": 650,
        "height": 480,
        "description": 'Network of clusters based on expression',
        "data": image_b64,
    })

    footnote = ""
    inv_map = {}
    for k, v in keys.items():
        inv_map[v] = inv_map.get(v, []) + [k]

    for k, v in sorted(inv_map.items(), key=lambda item: item[0]):
        newv = map(lambda gene: "[{}]({})".format(gene, geturl(gene)), v)
        vliststr = ", ".join(newv)
        newstr = "{}: {} {}".format(
            k, (clustersvsgenes.loc[clustersvsgenes[v[0]] > min_zscore,
                                    v[0]]).to_dict(), vliststr)
        if footnote == "":
            footnote = newstr
        else:
            footnote = footnote + "  \n" + newstr

    gn.add_result(footnote, "markdown")

    # gn.export(return_df.T.to_csv(), 'differential_gene_sets.csv', kind='raw', meta=None, raw=True)

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    timing = "* Finished differential expression sets step in {} seconds*".format(
        time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
예제 #15
0
def main():
    gn = Granatum()

    n_neighbors = gn.get_arg('nNeighbors', 15)
    neighbor_method = gn.get_arg('neighborMethod', 'gauss')

    assay = gn.get_import('assay')

    adata = sc.AnnData(np.array(assay.get('matrix')).transpose())
    adata.var_names = assay.get('geneIds')
    adata.obs_names = assay.get('sampleIds')

    sc.pp.neighbors(adata,
                    n_neighbors=n_neighbors,
                    use_rep='X',
                    method=neighbor_method)
    sc.tl.dpt(adata, n_branchings=1)

    gn._pickle(adata, 'adata')

    # dpt_groups

    for spec in [{
            'col': 'dpt_order',
            'caption': 'Cell order'
    }, {
            'col': 'dpt_groups',
            'caption': 'Cell groups'
    }]:
        fig = plt.figure()
        sc.pl.diffmap(adata, color=spec['col'])
        gn.add_current_figure_to_results(spec['caption'])
        gn.export_statically(
            dict(
                zip(adata.obs_names.tolist(),
                    adata.obs[spec['col']].values.tolist())), spec['col'])

    gn.commit()