def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import('assay')) random_seed = gn.get_arg('random_seed') sc.tl.tsne(adata, random_state=random_seed) X_tsne = adata.obsm['X_tsne'] plt.figure() plt.scatter(X_tsne[:, 0], X_tsne[:, 1], 5000 / adata.shape[0]) plt.xlabel('t-SNE dim. 1') plt.ylabel('t-SNE dim. 2') plt.tight_layout() gn.add_current_figure_to_results('t-SNE plot: each dot represents a cell', dpi=75) pca_export = { 'dimNames': ['t-SNE dim. 1', 't-SNE dim. 2'], 'coords': { sample_id: X_tsne[i, :].tolist() for i, sample_id in enumerate(adata.obs_names) }, } gn.export_statically(pca_export, 't-SNE coordinates') gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import("assay")) num_top_comps = gn.get_arg("num_top_comps") sc.pp.pca(adata, 20) variance_ratios = adata.uns["pca"]["variance_ratio"] pc_labels = ["PC{}".format(x + 1) for x in range(len(variance_ratios))] plt.figure() plt.bar(pc_labels, variance_ratios) plt.tight_layout() gn.add_current_figure_to_results( "Explained variance (ratio) by each Principal Component (PC)", height=350, dpi=75) X_pca = adata.obsm["X_pca"] for i, j in combinations(range(num_top_comps), 2): xlabel = "PC{}".format(i + 1) ylabel = "PC{}".format(j + 1) plt.figure() plt.scatter(X_pca[:, i], X_pca[:, j], s=5000 / adata.shape[0]) plt.xlabel(xlabel) plt.ylabel(ylabel) plt.tight_layout() gn.add_current_figure_to_results("PC{} vs. PC{}".format(i + 1, j + 1), dpi=75) pca_export = { "dimNames": [xlabel, ylabel], "coords": { sample_id: X_pca[k, [i, j]].tolist() for k, sample_id in enumerate(adata.obs_names) }, } gn.export(pca_export, "PC{} vs. PC{}".format(i + 1, j + 1), kind="sampleCoords", meta={}) gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import("assay")) num_cells_to_sample = gn.get_arg("num_cells_to_sample") random_seed = gn.get_arg("random_seed") np.random.seed(random_seed) num_cells_before = adata.shape[0] num_genes_before = adata.shape[1] if num_cells_to_sample > 0 and num_cells_to_sample < 1: num_cells_to_sample = round(num_cells_before * num_cells_to_sample) else: num_cells_to_sample = round(num_cells_to_sample) if num_cells_to_sample > num_cells_before: num_cells_to_sample = num_cells_before if num_cells_to_sample < 1: num_cells_to_sample = 1 sampled_cells_idxs = np.sort(np.random.choice(num_cells_before, num_cells_to_sample, replace=False)) adata = adata[sampled_cells_idxs, :] gn.add_result( "\n".join( [ "The assay before down-sampling has **{}** cells and {} genes.".format( num_cells_before, num_genes_before ), "", "The assay after down-sampling has **{}** cells and {} genes.".format(adata.shape[0], adata.shape[1]), ] ), type="markdown", ) gn.export(gn.assay_from_ann_data(adata), "Down-sampled Assay", dynamic=False) gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import("assay")) min_cells_expressed = gn.get_arg("min_cells_expressed") min_mean = gn.get_arg("min_mean") max_mean = gn.get_arg("max_mean") min_disp = gn.get_arg("min_disp") max_disp = gn.get_arg("max_disp") num_genes_before = adata.shape[1] sc.pp.filter_genes(adata, min_cells=min_cells_expressed) filter_result = sc.pp.filter_genes_dispersion( adata.X, flavor='seurat', min_mean=math.log(min_mean), max_mean=math.log(max_mean), min_disp=min_disp, max_disp=max_disp, ) adata = adata[:, filter_result.gene_subset] sc.pl.filter_genes_dispersion(filter_result) gn.add_current_figure_to_results( "Each dot represent a gene. The gray dots are the removed genes. The x-axis is log-transformed.", zoom=3, dpi=50, height=400, ) gn.add_result( "\n".join( [ "Number of genes before filtering: **{}**".format(num_genes_before), "", "Number of genes after filtering: **{}**".format(adata.shape[1]), ] ), type="markdown", ) gn.export(gn.assay_from_ann_data(adata), "Filtered Assay", dynamic=False) gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import('assay')) outliers = gn.get_arg('outliers') num_cells_before = adata.shape[0] kept_cell_ids = adata.obs_names.drop(outliers, errors='ignore').values adata = adata[kept_cell_ids, :] gn.export_statically(gn.assay_from_ann_data(adata), 'Outlier removed assay') gn.add_result( 'You removed {} outliers from {} cells, the result assay has {} cells (and {} genes).'.format( len(outliers), num_cells_before, adata.shape[0], adata.shape[1] ), type='markdown' ) gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import('assay')) sample_coords = gn.get_import('sampleCoords') random_seed = gn.get_arg('random_seed') sc.pp.neighbors(adata, n_neighbors=20, use_rep='X', method='gauss') sc.tl.louvain(adata, random_state=random_seed) cluster_assignment = dict( zip(adata.obs_names, ['Cluster {}'.format(int(c) + 1) for c in adata.obs['louvain']])) gn.export_statically(cluster_assignment, 'Cluster assignment') dim_names = sample_coords.get('dimNames') coords_dict = sample_coords.get('coords') plt.figure() clusters = adata.obs['louvain'].cat.categories for c in clusters: cell_ids = adata.obs_names[adata.obs['louvain'] == c] coords = [coords_dict.get(x) for x in cell_ids] coords_x = [x[0] for x in coords] coords_y = [x[1] for x in coords] plt.scatter(coords_x, coords_y, label='Cluster {}'.format(int(c) + 1)) plt.xlabel(dim_names[0]) plt.ylabel(dim_names[1]) plt.legend() plt.tight_layout() gn.add_current_figure_to_results( 'Scatter-plot using imported cell coordinates. Each dot represents a cell. The colors indicate the indentified cell clusters.', dpi=75) gn.commit()