def main(): gn = Granatum() assay = gn.get_import('assay') matrix = np.array(assay.get('matrix')) transformed_matrix = matrix - matrix.mean(axis=1, keepdims=True) assay['matrix'] = transformed_matrix.tolist() plot_distribution_comparison(matrix, transformed_matrix, gn) gn.export_statically(assay, 'Gene centered assay') gn.commit()
def main(): gn = Granatum() sample_meta_true = gn.get_import("sample_meta_true") sample_meta_predicted = gn.get_import("sample_meta_predicted") # Using pandas series to align the two metas in case they have different sample IDs rand_score = adjusted_rand_score(pd.Series(sample_meta_true), pd.Series(sample_meta_predicted)) mutual_info_score = adjusted_mutual_info_score( pd.Series(sample_meta_true), pd.Series(sample_meta_predicted)) results_markdown = "\n".join([ "Adjusted Rand score: **{}**".format(rand_score), "", "Adjusted mutual information score: **{}**".format(mutual_info_score), ]) gn.add_result(results_markdown, "markdown") gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import("assay")) num_top_comps = gn.get_arg("num_top_comps") sc.pp.pca(adata, 20) variance_ratios = adata.uns["pca"]["variance_ratio"] pc_labels = ["PC{}".format(x + 1) for x in range(len(variance_ratios))] plt.figure() plt.bar(pc_labels, variance_ratios) plt.tight_layout() gn.add_current_figure_to_results( "Explained variance (ratio) by each Principal Component (PC)", height=350, dpi=75) X_pca = adata.obsm["X_pca"] for i, j in combinations(range(num_top_comps), 2): xlabel = "PC{}".format(i + 1) ylabel = "PC{}".format(j + 1) plt.figure() plt.scatter(X_pca[:, i], X_pca[:, j], s=5000 / adata.shape[0]) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.tight_layout() gn.add_current_figure_to_results("PC{} vs. PC{}".format(i + 1, j + 1), dpi=75) pca_export = { "dimNames": [xlabel, ylabel], "coords": { sample_id: X_pca[k, [i, j]].tolist() for k, sample_id in enumerate(adata.obs_names) }, } gn.export(pca_export, "PC{} vs. PC{}".format(i + 1, j + 1), kind="sampleCoords", meta={}) gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import("assay")) num_cells_to_sample = gn.get_arg("num_cells_to_sample") random_seed = gn.get_arg("random_seed") np.random.seed(random_seed) num_cells_before = adata.shape[0] num_genes_before = adata.shape[1] if num_cells_to_sample > 0 and num_cells_to_sample < 1: num_cells_to_sample = round(num_cells_before * num_cells_to_sample) else: num_cells_to_sample = round(num_cells_to_sample) if num_cells_to_sample > num_cells_before: num_cells_to_sample = num_cells_before if num_cells_to_sample < 1: num_cells_to_sample = 1 sampled_cells_idxs = np.sort(np.random.choice(num_cells_before, num_cells_to_sample, replace=False)) adata = adata[sampled_cells_idxs, :] gn.add_result( "\n".join( [ "The assay before down-sampling has **{}** cells and {} genes.".format( num_cells_before, num_genes_before ), "", "The assay after down-sampling has **{}** cells and {} genes.".format(adata.shape[0], adata.shape[1]), ] ), type="markdown", ) gn.export(gn.assay_from_ann_data(adata), "Down-sampled Assay", dynamic=False) gn.commit()
def main(): gn = Granatum() df = gn.pandas_from_assay(gn.get_import('assay')) n_steps = gn.get_arg('n_steps') min_theta = gn.get_arg('min_theta') max_theta = gn.get_arg('max_theta') jammit = JAMMIT.from_dfs([df]) jammit.scan( thetas=np.linspace(min_theta, max_theta, n_steps), calculate_fdr=True, n_perms=10, verbose=1, convergence_threshold=0.000000001, ) jammit_result = jammit.format(columns=['theta', 'alpha', 'n_sigs', 'fdr']) jammit_result['theta'] = jammit_result['theta'].round(3) jammit_result['alpha'] = jammit_result['alpha'].round(3) plt.plot(jammit_result['alpha'], jammit_result['fdr']) plt.xlabel('alpha') plt.ylabel('FDR') gn.add_current_figure_to_results('FDR plotted against alpha', height=400) gn.add_result( { 'pageSize': n_steps, 'orient': 'split', 'columns': [{ 'name': h, 'type': 'number', 'round': 3 } for h in jammit_result.columns], 'data': jammit_result.values.tolist(), }, data_type='table', ) gn.commit()
def main(): gn = Granatum() assay = gn.get_import('assay') args_for_init = { 'selected_embedding': gn.get_arg('selectedEmbedding'), 'selected_clustering': gn.get_arg('selectedClustering'), 'n_components': gn.get_arg('nComponents'), 'n_clusters': gn.get_arg('nClusters'), 'find_best_number_of_cluster': gn.get_arg('findBestNumberOfCluster'), } args_for_fit = { 'matrix': np.transpose(np.array(assay.get('matrix'))), 'sample_ids': assay.get('sampleIds'), } granatum_clustering = GranatumDeepClustering(**args_for_init) fit_results = granatum_clustering.fit(**args_for_fit) fit_exp = fit_results.get('clusters') gn.export_statically(fit_exp, 'Cluster assignment') newdictstr = ['"'+str(k)+'"'+", "+str(v) for k, v in fit_exp.items()] gn.export("\n".join(newdictstr), 'Cluster assignment.csv', kind='raw', meta=None, raw=True) md_str = f"""\ ## Results * Cluster array: `{fit_results.get('clusters_array')}` * Cluster array: `{fit_results.get('clusters_array')}` * nClusters: {fit_results.get('n_clusters')} * Number of components: {fit_results.get('n_components')} * Outliers: {fit_results.get('outliers')}""" # gn.add_result(md_str, 'markdown') gn.add_result( { 'orient': 'split', 'columns': ['Sample ID', 'Cluster Assignment'], 'data': [{'Sample ID':x, 'Cluster Assignment':y} for x, y in zip(assay.get('sampleIds'), fit_results.get('clusters_array'))], }, 'table', ) gn.commit()
def main(): gn = Granatum() tb1 = gn.pandas_from_assay(gn.get_import('assay1')) tb2 = gn.pandas_from_assay(gn.get_import('assay2')) label1 = gn.get_arg('label1') label2 = gn.get_arg('label2') direction = gn.get_arg('direction') normalization = gn.get_arg('normalization') if direction == 'samples': tb1 = tb1.T tb2 = tb2.T overlapped_index = set(tb1.index) & set(tb2.index) tb1.index = [ f"{label1}_{x}" if x in overlapped_index else x for x in tb1.index ] tb2.index = [ f"{label2}_{x}" if x in overlapped_index else x for x in tb2.index ] if normalization == 'none': tb = pd.concat([tb1, tb2], axis=0) elif normalization == 'frobenius': ntb1 = np.linalg.norm(tb1) ntb2 = np.linalg.norm(tb2) ntb = np.mean([ntb1, ntb2]) fct1 = ntb / ntb1 fct2 = ntb / ntb2 tb = pd.concat([tb1 * fct1, tb2 * fct2], axis=0) gn.add_markdown(f"""\ Normalization info: - Assay **{label1}** is multiplied by {fct1} - Assay **{label2}** is multiplied by {fct2} """) elif normalization == 'mean': ntb1 = np.mean(tb1) ntb2 = np.mean(tb2) ntb = np.mean([ntb1, ntb2]) fct1 = ntb / ntb1 fct2 = ntb / ntb2 tb = pd.concat([tb1 * fct1, tb2 * fct2], axis=0) gn.add_markdown(f"""\ Normalization info:", - Assay **{label1}** is multiplied by {fct1} - Assay **{label2}** is multiplied by {fct2} """) else: raise ValueError() if direction == 'samples': tb = tb.T gn.add_markdown(f"""\ You combined the following assays: - Assay 1 (with {tb1.shape[0]} genes and {tb1.shape[1]} cells) - Assay 2 (with {tb2.shape[0]} genes and {tb2.shape[1]} cells) into: - Combined Assay (with {tb.shape[0]} genes and {tb.shape[1]} cells) """) gn.export_statically(gn.assay_from_pandas(tb), 'Combined assay') if direction == 'samples': meta_type = 'sampleMeta' elif direction == 'genes': meta_type = 'geneMeta' else: raise ValueError() gn.export( { **{x: label1 for x in tb1.index}, **{x: label2 for x in tb2.index} }, 'Assay label', meta_type) gn.commit()
def main(): gn = Granatum() assay_df = gn.pandas_from_assay(gn.get_import('assay')) grdict = gn.get_import('groupVec') phe_dict = pd.Series(gn.get_import('groupVec')) groups = set(parse(gn.get_arg('groups'))) inv_map = {} for k, v in grdict.items(): if v in groups: inv_map[v] = inv_map.get(v, []) + [k] cells = [] for k, v in inv_map.items(): cells.extend(v) assay_df = assay_df.loc[:, cells] assay_df = assay_df.sparse.to_dense().fillna(0) #assay_mat = r['as.matrix'](pandas2ri.py2ri(assay_df)) # assay_mat = r['as.matrix'](conversion.py2rpy(assay_df)) phe_vec = phe_dict[assay_df.columns] r.source('./drive_DESeq2.R') ret_r = r['run_DESeq'](assay_df, phe_vec) ret_r_as_df = r['as.data.frame'](ret_r) # ret_py_df = pandas2ri.ri2py(ret_r_as_df) # TODO: maybe rename the columns to be more self-explanatory? result_df = ret_r_as_df result_df = result_df.sort_values('padj') result_df.index.name = 'gene' gn.add_pandas_df(result_df.reset_index(), description='The result table as returned by DESeq2.') gn.export(result_df.to_csv(), 'DESeq2_results.csv', raw=True) significant_genes = result_df.loc[ result_df['padj'] < 0.05]['log2FoldChange'].to_dict() gn.export(significant_genes, 'Significant genes', kind='geneMeta') gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() assay = gn.pandas_from_assay(gn.get_import('assay')) groups = gn.get_import('groups') reflabels = gn.get_import('reflabels') remove_cells = gn.get_arg('remove_cells') inv_map = {} for k, v in groups.items(): inv_map[v] = inv_map.get(v, []) + [k] inv_map_ref = {} for k, v in reflabels.items(): inv_map_ref[v] = inv_map_ref.get(v, []) + [k] group_relabel = {} mislabelled_cells = [] for k, v in inv_map.items(): vset = set(v) label_scores = {} for kref, vref in inv_map_ref.items(): label_scores[kref] = len(set(vref).intersection(vset)) group_relabel[k] = max(label_scores, key=label_scores.get) mislabelled_cells = mislabelled_cells + list( vset.difference(set(inv_map_ref[group_relabel[k]]))) if remove_cells: gn.add_result( "Dropping {} mislabelled cells".format(len(mislabelled_cells)), "markdown") assay = assay.drop(mislabelled_cells, axis=1) groups = { key: val for key, val in groups.items() if not key in mislabelled_cells } for cell in groups: groups[cell] = group_relabel[groups[cell]] toc = time.perf_counter() time_passed = round(toc - tic, 2) gn.export_statically(gn.assay_from_pandas(assay), "Corresponded assay") gn.export_statically(groups, "Corresponded labels") timing = "* Finished sample coloring step in {} seconds*".format( time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): gn = Granatum() assay = gn.get_import('assay') matrix = np.array(assay.get('matrix')) log_base = gn.get_arg('logBase') pseudo_counts = gn.get_arg('pseudoCounts') transformed_matrix = np.log(matrix + pseudo_counts) / np.log(log_base) non_zero_values_before = matrix.flatten() # non_zero_values_before = non_zero_values_before[(non_zero_values_before > np.percentile(non_zero_values_before, 5)) & # (non_zero_values_before < np.percentile(non_zero_values_before, 95))] non_zero_values_before = non_zero_values_before[(non_zero_values_before > np.percentile(non_zero_values_before, 5))] non_zero_values_after = transformed_matrix.flatten() # non_zero_values_after = non_zero_values_after[(non_zero_values_after > np.percentile(non_zero_values_after, 5)) & # (non_zero_values_after < np.percentile(non_zero_values_after, 95))] non_zero_values_after = non_zero_values_after[(non_zero_values_after > np.percentile(non_zero_values_after, 5))] plt.figure() plt.subplot(2, 1, 1) plt.title('Before log transformation') plt.hist(non_zero_values_before, bins=100) plt.ylabel('Frequency') plt.xlabel('Expression level') plt.subplot(2, 1, 2) plt.title('After log transformation') plt.hist(non_zero_values_after, bins=100) plt.ylabel('Frequency') plt.xlabel('Expression level') plt.tight_layout() caption = ( 'The distribution of expression level before and after log transformation. Only the values greater ' 'than the 5 percentile (usually zero in single-cell data) and lower than 95 percentile are considered.' ) gn.add_current_figure_to_results(caption, zoom=2, dpi=50) assay['matrix'] = transformed_matrix.tolist() gn.export_statically(assay, 'Log transformed assay') gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() assay = gn.pandas_from_assay(gn.get_import('assay')) groups = gn.get_import('groups') inv_map = {} for k, v in groups.items(): inv_map[v] = inv_map.get(v, []) + [k] drop_set = parse(gn.get_arg('drop_set')) merge_set_1 = parse(gn.get_arg('merge_set_1')) merge_set_2 = parse(gn.get_arg('merge_set_2')) merge_set_3 = parse(gn.get_arg('merge_set_3')) relabel_set_1 = gn.get_arg('relabel_set_1') relabel_set_2 = gn.get_arg('relabel_set_2') relabel_set_3 = gn.get_arg('relabel_set_3') if len(merge_set_1) > 0: if relabel_set_1 == "": relabel_set_1 = " + ".join(merge_set_1) if len(merge_set_2) > 0: if relabel_set_2 == "": relabel_set_2 = " + ".join(merge_set_2) if len(merge_set_3) > 0: if relabel_set_3 == "": relabel_set_3 = " + ".join(merge_set_3) try: for ds in drop_set: cells = inv_map[ds] gn.add_result( "Dropping {} cells that match {}".format(len(cells), ds), "markdown") assay = assay.drop(cells, axis=1) groups = {key: val for key, val in groups.items() if val != ds} except Exception as e: gn.add_result( "Error found in drop set, remember it should be comma separated: {}" .format(e), "markdown") try: if len(merge_set_1) > 0: merge_set_1_cells = [] for ms1 in merge_set_1: merge_set_1_cells = merge_set_1_cells + inv_map[ms1] for cell in merge_set_1_cells: groups[cell] = relabel_set_1 if len(merge_set_2) > 0: merge_set_2_cells = [] for ms2 in merge_set_2: merge_set_2_cells = merge_set_2_cells + inv_map[ms2] for cell in merge_set_2_cells: groups[cell] = relabel_set_2 if len(merge_set_3) > 0: merge_set_3_cells = [] for ms3 in merge_set_3: merge_set_3_cells = merge_set_3_cells + inv_map[ms3] for cell in merge_set_3_cells: groups[cell] = relabel_set_3 except Exception as e: gn.add_result( "Error found in merge sets, remember it should be comma separated: {}" .format(e), "markdown") toc = time.perf_counter() time_passed = round(toc - tic, 2) gn.export_statically(gn.assay_from_pandas(assay), "Label adjusted assay") gn.export_statically(groups, "Adjusted labels") timing = "* Finished sample coloring step in {} seconds*".format( time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() df = gn.pandas_from_assay(gn.get_import('assay')) mingenes = gn.get_arg('min_genes_per_cell') maxgenes = gn.get_arg('max_genes_per_cell') mt_percent = gn.get_arg('mt_genes_percent')/100.0 uniquegenecount = df.astype(bool).sum(axis=0) totalgenecount = df.sum(axis=0) mtrows = df[df.index.str.startswith('MT')] mtgenecount = mtrows.sum(axis=0) mtpercent = mtgenecount.div(totalgenecount) colsmatching = uniquegenecount.T[(uniquegenecount.T >= mingenes) & (uniquegenecount.T <= maxgenes) & (mtpercent.T <= mt_percent)].index.values adata = df.loc[:, colsmatching] num_orig_cells = uniquegenecount.T.index.size num_filtered_cells = len(colsmatching) num_lt_min = uniquegenecount.T[(uniquegenecount.T < mingenes)].index.size num_gt_max = uniquegenecount.T[(uniquegenecount.T > maxgenes)].index.size num_gt_mt = uniquegenecount.T[(mtpercent.T > mt_percent)].index.size gn.add_result("Number of cells is now {} out of {} original cells with {} below min genes, {} above max genes, and {} above mt percentage threshold.".format(num_filtered_cells, num_orig_cells, num_lt_min, num_gt_max, num_gt_mt), "markdown") plt.figure() plt.subplot(2, 1, 1) plt.title('Unique gene count distribution') sns.distplot(uniquegenecount, bins=int(200), color = 'darkblue', kde_kws={'linewidth': 2}) plt.ylabel('Frequency') plt.xlabel('Gene count') plt.subplot(2, 1, 2) plt.title('MT Percent Distribution') sns.distplot(mtpercent*100.0, bins=int(200), color = 'darkblue', kde_kws={'linewidth': 2}) plt.ylabel('Frequency') plt.xlabel('MT Percent') plt.tight_layout() caption = ( 'The distribution of expression levels for each cell with various metrics.' ) gn.add_current_figure_to_results(caption, zoom=1, dpi=75) gn.export(gn.assay_from_pandas(adata), "Filtered Cells Assay", dynamic=False) toc = time.perf_counter() time_passed = round(toc - tic, 2) timing = "* Finished cell filtering step in {} seconds*".format(time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() clustersvsgenes = gn.pandas_from_assay(gn.get_import('clustersvsgenes')) gset_group_id = gn.get_arg('gset_group_id') min_zscore = gn.get_arg('min_zscore') clustercomparisonstotest = list(clustersvsgenes.index) # Load all gene sets gsets = load_gsets(gset_group_id) G = nx.MultiDiGraph() clusternames = list(clustersvsgenes.T.columns) individualclusters = [ n[:n.index(" vs rest")] for n in clusternames if n.endswith("vs rest") ] print(individualclusters, flush=True) for cl in individualclusters: G.add_node(cl) # {pathway : {"cluster1":score1, "cluster2":score2}, pathway2 : {}} resultsmap = {} relabels = {} keys = {} urlsforkeys = {} currentkeyindex = 0 for gset in gsets: urlsforkeys[gset["name"]] = gset["url"] for cluster in clustercomparisonstotest: try: resultdf = clustersvsgenes.loc[cluster, gset["gene_ids"]] resultdf = np.nan_to_num(resultdf) score = np.nanmean(resultdf) if score >= min_zscore: keys[gset["name"]] = keys.get(gset["name"], currentkeyindex + 1) print("Score = {}".format(score), flush=True) olddict = resultsmap.get(gset["name"], {}) olddict[cluster] = score resultsmap[gset["name"]] = olddict from_to = re.split(' vs ', cluster) if from_to[1] != 'rest': G.add_weighted_edges_from( [(from_to[1], from_to[0], score * 2.0)], label=str(keys[gset["name"]]), penwidth=str(score * 2.0)) else: relabel_dict = relabels.get(from_to[0], "") if relabel_dict == "": relabel_dict = from_to[0] + ": " + str( keys[gset["name"]]) else: relabel_dict = relabel_dict + ", " + str( keys[gset["name"]]) relabels[from_to[0]] = relabel_dict currentkeyindex = max(currentkeyindex, keys[gset["name"]]) except Exception as inst: print("Key error with {}".format(gset["name"]), flush=True) print("Exception: {}".format(inst), flush=True) print("Relabels {}".format(relabels), flush=True) G = nx.relabel_nodes(G, relabels) pos = nx.spring_layout(G) edge_labels = nx.get_edge_attributes(G, 'label') write_dot(G, 'plot.dot') os.system("dot plot.dot -Tpng -Gdpi=600 > plot.png") with open('plot.png', "rb") as f: image_b64 = b64encode(f.read()).decode("utf-8") gn.results.append({ "type": "png", "width": 650, "height": 480, "description": 'Network of clusters based on expression', "data": image_b64, }) footnote = "" for k, v in sorted(keys.items(), key=lambda item: item[1]): newstr = "{}: [{}]({})".format(v, k, urlsforkeys[k]) if footnote == "": footnote = newstr else: footnote = footnote + " \n" + newstr gn.add_result(footnote, "markdown") # gn.export(return_df.T.to_csv(), 'differential_gene_sets.csv', kind='raw', meta=None, raw=True) toc = time.perf_counter() time_passed = round(toc - tic, 2) timing = "* Finished differential expression sets step in {} seconds*".format( time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() clustersvsgenes = gn.pandas_from_assay(gn.get_import('clustersvsgenes')) max_dist = gn.get_arg('max_dist') min_zscore = gn.get_arg('min_zscore') clustercomparisonstotest = list(clustersvsgenes.index) G = nx.MultiDiGraph() clusternames = list(clustersvsgenes.T.columns) individualclusters = [ n[:n.index(" vs rest")] for n in clusternames if n.endswith("vs rest") ] print(individualclusters, flush=True) for cl in individualclusters: G.add_node(cl) # {pathway : {"cluster1":score1, "cluster2":score2}, pathway2 : {}} # resultsmap = {} relabels = {} keys = {} currentkeyindex = 0 maxexpression = np.max(np.max(clustersvsgenes)) print("Max expression = {}".format(maxexpression)) print("Number to analyze = {}".format( len(clustersvsgenes.columns) * len(clustercomparisonstotest)), flush=True) gene_count = 0 for gene_id in clustersvsgenes.columns: gene_count = gene_count + 1 print("Genecount = {}/{}".format(gene_count, len(clustersvsgenes.columns)), flush=True) add_all_edges_for_current_gene = True for cluster in clustercomparisonstotest: score = clustersvsgenes.loc[cluster, gene_id] if score >= min_zscore: add_edges = True if not gene_id in keys: # First check if within distance of another group closestkey = None closestkeyvalue = 1.0e12 for key in keys: gene_values = clustersvsgenes.loc[:, gene_id] ref_values = clustersvsgenes.loc[:, key] sc = np.sqrt( np.nansum(np.square(gene_values - ref_values)) / len(gene_values)) if sc <= max_dist and sc < closestkeyvalue: closestkeyvalue = sc closestkey = key break if closestkey == None: keys[gene_id] = currentkeyindex + 1 else: keys[gene_id] = keys[closestkey] add_edges = False add_all_edges_for_current_gene = False print("Found a near gene: {}".format(closestkey), flush=True) else: add_edges = add_all_edges_for_current_gene # print("Score = {}".format(score), flush=True) # olddict = resultsmap.get(gene_id, {}) # olddict[cluster] = score # resultsmap[gene_id] = olddict if add_edges: from_to = re.split(' vs ', cluster) if from_to[1] != 'rest': G.add_weighted_edges_from( [(from_to[1], from_to[0], score / maxexpression * 1.0)], label=str(keys[gene_id]), penwidth=str(score / maxexpression * 1.0)) else: relabel_dict = relabels.get(from_to[0], "") if relabel_dict == "": relabel_dict = from_to[0] + ": " + str( keys[gene_id]) else: relabel_dict = relabel_dict + ", " + str( keys[gene_id]) relabels[from_to[0]] = relabel_dict currentkeyindex = max(currentkeyindex, keys[gene_id]) print("Relabels {}".format(relabels), flush=True) G = nx.relabel_nodes(G, relabels) pos = nx.spring_layout(G) edge_labels = nx.get_edge_attributes(G, 'label') write_dot(G, 'plot.dot') os.system('dot plot.dot -Kcirco -Tpng -Gsize="6,6" -Gdpi=600 > plot.png') with open('plot.png', "rb") as f: image_b64 = b64encode(f.read()).decode("utf-8") gn.results.append({ "type": "png", "width": 650, "height": 480, "description": 'Network of clusters based on expression', "data": image_b64, }) footnote = "" inv_map = {} for k, v in keys.items(): inv_map[v] = inv_map.get(v, []) + [k] for k, v in sorted(inv_map.items(), key=lambda item: item[0]): newv = map(lambda gene: "[{}]({})".format(gene, geturl(gene)), v) vliststr = ", ".join(newv) newstr = "{}: {} {}".format( k, (clustersvsgenes.loc[clustersvsgenes[v[0]] > min_zscore, v[0]]).to_dict(), vliststr) if footnote == "": footnote = newstr else: footnote = footnote + " \n" + newstr gn.add_result(footnote, "markdown") # gn.export(return_df.T.to_csv(), 'differential_gene_sets.csv', kind='raw', meta=None, raw=True) toc = time.perf_counter() time_passed = round(toc - tic, 2) timing = "* Finished differential expression sets step in {} seconds*".format( time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): gn = Granatum() n_neighbors = gn.get_arg('nNeighbors', 15) neighbor_method = gn.get_arg('neighborMethod', 'gauss') assay = gn.get_import('assay') adata = sc.AnnData(np.array(assay.get('matrix')).transpose()) adata.var_names = assay.get('geneIds') adata.obs_names = assay.get('sampleIds') sc.pp.neighbors(adata, n_neighbors=n_neighbors, use_rep='X', method=neighbor_method) sc.tl.dpt(adata, n_branchings=1) gn._pickle(adata, 'adata') # dpt_groups for spec in [{ 'col': 'dpt_order', 'caption': 'Cell order' }, { 'col': 'dpt_groups', 'caption': 'Cell groups' }]: fig = plt.figure() sc.pl.diffmap(adata, color=spec['col']) gn.add_current_figure_to_results(spec['caption']) gn.export_statically( dict( zip(adata.obs_names.tolist(), adata.obs[spec['col']].values.tolist())), spec['col']) gn.commit()