def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import('assay')) random_seed = gn.get_arg('random_seed') sc.tl.tsne(adata, random_state=random_seed) X_tsne = adata.obsm['X_tsne'] plt.figure() plt.scatter(X_tsne[:, 0], X_tsne[:, 1], 5000 / adata.shape[0]) plt.xlabel('t-SNE dim. 1') plt.ylabel('t-SNE dim. 2') plt.tight_layout() gn.add_current_figure_to_results('t-SNE plot: each dot represents a cell', dpi=75) pca_export = { 'dimNames': ['t-SNE dim. 1', 't-SNE dim. 2'], 'coords': { sample_id: X_tsne[i, :].tolist() for i, sample_id in enumerate(adata.obs_names) }, } gn.export_statically(pca_export, 't-SNE coordinates') gn.commit()
def main(): gn = Granatum() assay = gn.get_import('assay') matrix = np.array(assay.get('matrix')) sample_ids = assay.get('sampleIds') num_samples = matrix.shape[1] # ---- PCA -------------------------------------------------------------------- X = np.transpose(matrix) model = PCA(n_components=2) Y_pca = model.fit_transform(X) pca_export = { 'dimNames': ['PCA-1', 'PCA-2'], 'coords': { sample_id: Y_pca[i, :].tolist() for i, sample_id in enumerate(sample_ids) }, } gn.export_statically(pca_export, 'pca') plt.figure() plt.scatter(Y_pca[:, 0], Y_pca[:, 1], 5000 / num_samples) plt.tight_layout() gn.add_current_figure_to_results( 'Principal Component Analysis (PCA) scatter-plot', dpi=75) # ---- T-SNE ------------------------------------------------------------------ X = np.transpose(matrix) model = TSNE(n_jobs=multiprocessing.cpu_count()) Y_tsne = model.fit_transform(X) tsne_export = { 'dimNames': ['tSNE-1', 'tSNE-2'], 'coords': { sample_id: Y_tsne[i, :].tolist() for i, sample_id in enumerate(sample_ids) }, } gn.export_statically(tsne_export, 'tsne') plt.figure() plt.scatter(Y_tsne[:, 0], Y_tsne[:, 1], s=5000 / num_samples) plt.tight_layout() gn.add_current_figure_to_results( 't-Distributed Stochastic Neighbor Embedding (t-SNE) scatter-plot', dpi=75) gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() assay = gn.pandas_from_assay(gn.get_import('assay')) groups = gn.get_import('groups') reflabels = gn.get_import('reflabels') remove_cells = gn.get_arg('remove_cells') inv_map = {} for k, v in groups.items(): inv_map[v] = inv_map.get(v, []) + [k] inv_map_ref = {} for k, v in reflabels.items(): inv_map_ref[v] = inv_map_ref.get(v, []) + [k] group_relabel = {} mislabelled_cells = [] for k, v in inv_map.items(): vset = set(v) label_scores = {} for kref, vref in inv_map_ref.items(): label_scores[kref] = len(set(vref).intersection(vset)) group_relabel[k] = max(label_scores, key=label_scores.get) mislabelled_cells = mislabelled_cells + list( vset.difference(set(inv_map_ref[group_relabel[k]]))) if remove_cells: gn.add_result( "Dropping {} mislabelled cells".format(len(mislabelled_cells)), "markdown") assay = assay.drop(mislabelled_cells, axis=1) groups = { key: val for key, val in groups.items() if not key in mislabelled_cells } for cell in groups: groups[cell] = group_relabel[groups[cell]] toc = time.perf_counter() time_passed = round(toc - tic, 2) gn.export_statically(gn.assay_from_pandas(assay), "Corresponded assay") gn.export_statically(groups, "Corresponded labels") timing = "* Finished sample coloring step in {} seconds*".format( time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): gn = Granatum() assay = gn.get_import('assay') matrix = np.array(assay.get('matrix')) transformed_matrix = matrix - matrix.mean(axis=1, keepdims=True) assay['matrix'] = transformed_matrix.tolist() plot_distribution_comparison(matrix, transformed_matrix, gn) gn.export_statically(assay, 'Gene centered assay') gn.commit()
def main(): gn = Granatum() assay = gn.get_import('assay') matrix = np.array(assay.get('matrix')) take_log = gn.get_arg('take_log') log_base = gn.get_arg('logBase') epsilon = gn.get_arg('epsilon') transformed_matrix = (matrix + epsilon) / (1 - matrix + epsilon) if take_log: transformed_matrix = np.log(transformed_matrix) / np.log(log_base) non_zero_values_before = matrix.flatten() non_zero_values_before = non_zero_values_before[( non_zero_values_before > np.percentile(non_zero_values_before, 5))] non_zero_values_after = transformed_matrix.flatten() non_zero_values_after = non_zero_values_after[( non_zero_values_after > np.percentile(non_zero_values_after, 5))] plt.figure() plt.subplot(2, 1, 1) plt.title('Before beta-to-m transformation') plt.hist(non_zero_values_before, bins=100) plt.ylabel('Frequency') plt.xlabel('Expression level') plt.subplot(2, 1, 2) plt.title('After beta-to-m transformation') plt.hist(non_zero_values_after, bins=100) plt.ylabel('Frequency') plt.xlabel('Expression level') plt.tight_layout() caption = ( 'The distribution of expression level before and after beta-to-m transformation. Only the values greater ' 'than the 5 percentile (usually zero in single-cell data) and lower than 95 percentile are considered.' ) gn.add_current_figure_to_results(caption, zoom=2, dpi=50) assay['matrix'] = transformed_matrix.tolist() gn.export_statically(assay, 'Beta-to-m transformed assay') gn.commit()
def main(): gn = Granatum() assay = gn.get_import('assay') args_for_init = { 'selected_embedding': gn.get_arg('selectedEmbedding'), 'selected_clustering': gn.get_arg('selectedClustering'), 'n_components': gn.get_arg('nComponents'), 'n_clusters': gn.get_arg('nClusters'), 'find_best_number_of_cluster': gn.get_arg('findBestNumberOfCluster'), } args_for_fit = { 'matrix': np.transpose(np.array(assay.get('matrix'))), 'sample_ids': assay.get('sampleIds'), } granatum_clustering = GranatumDeepClustering(**args_for_init) fit_results = granatum_clustering.fit(**args_for_fit) fit_exp = fit_results.get('clusters') gn.export_statically(fit_exp, 'Cluster assignment') newdictstr = ['"'+str(k)+'"'+", "+str(v) for k, v in fit_exp.items()] gn.export("\n".join(newdictstr), 'Cluster assignment.csv', kind='raw', meta=None, raw=True) md_str = f"""\ ## Results * Cluster array: `{fit_results.get('clusters_array')}` * Cluster array: `{fit_results.get('clusters_array')}` * nClusters: {fit_results.get('n_clusters')} * Number of components: {fit_results.get('n_components')} * Outliers: {fit_results.get('outliers')}""" # gn.add_result(md_str, 'markdown') gn.add_result( { 'orient': 'split', 'columns': ['Sample ID', 'Cluster Assignment'], 'data': [{'Sample ID':x, 'Cluster Assignment':y} for x, y in zip(assay.get('sampleIds'), fit_results.get('clusters_array'))], }, 'table', ) gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() df = gn.pandas_from_assay(gn.get_import('assay')) n_neighbors = gn.get_arg('n_neighbors') min_dist = gn.get_arg('min_dist') metric = gn.get_arg('metric') random_seed = gn.get_arg('random_seed') embedding = umap.UMAP(n_neighbors=n_neighbors, min_dist=min_dist, metric=metric, random_state=random_seed).fit_transform(df.values.T) plt.figure() plt.scatter(embedding[:, 0], embedding[:, 1], min(5000 / df.shape[0], 36.0)) plt.xlabel('UMAP dim. 1') plt.ylabel('UMAP dim. 2') plt.tight_layout() gn.add_current_figure_to_results('UMAP plot: each dot represents a cell', dpi=75) pca_export = { 'dimNames': ['UMAP dim. 1', 'UMAP dim. 2'], 'coords': { sample_id: embedding[i, :].tolist() for i, sample_id in enumerate(df.columns) }, } gn.export_statically(pca_export, 'UMAP coordinates') toc = time.perf_counter() time_passed = round(toc - tic, 2) timing = "* Finished UMAP step in {} seconds*".format(time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import('assay')) outliers = gn.get_arg('outliers') num_cells_before = adata.shape[0] kept_cell_ids = adata.obs_names.drop(outliers, errors='ignore').values adata = adata[kept_cell_ids, :] gn.export_statically(gn.assay_from_ann_data(adata), 'Outlier removed assay') gn.add_result( 'You removed {} outliers from {} cells, the result assay has {} cells (and {} genes).'.format( len(outliers), num_cells_before, adata.shape[0], adata.shape[1] ), type='markdown' ) gn.commit()
def main(): gn = Granatum() n_neighbors = gn.get_arg('nNeighbors', 15) neighbor_method = gn.get_arg('neighborMethod', 'gauss') assay = gn.get_import('assay') adata = sc.AnnData(np.array(assay.get('matrix')).transpose()) adata.var_names = assay.get('geneIds') adata.obs_names = assay.get('sampleIds') sc.pp.neighbors(adata, n_neighbors=n_neighbors, use_rep='X', method=neighbor_method) sc.tl.dpt(adata, n_branchings=1) gn._pickle(adata, 'adata') # dpt_groups for spec in [{ 'col': 'dpt_order', 'caption': 'Cell order' }, { 'col': 'dpt_groups', 'caption': 'Cell groups' }]: fig = plt.figure() sc.pl.diffmap(adata, color=spec['col']) gn.add_current_figure_to_results(spec['caption']) gn.export_statically( dict( zip(adata.obs_names.tolist(), adata.obs[spec['col']].values.tolist())), spec['col']) gn.commit()
def main(): gn = Granatum() adata = gn.ann_data_from_assay(gn.get_import('assay')) sample_coords = gn.get_import('sampleCoords') random_seed = gn.get_arg('random_seed') sc.pp.neighbors(adata, n_neighbors=20, use_rep='X', method='gauss') sc.tl.louvain(adata, random_state=random_seed) cluster_assignment = dict( zip(adata.obs_names, ['Cluster {}'.format(int(c) + 1) for c in adata.obs['louvain']])) gn.export_statically(cluster_assignment, 'Cluster assignment') dim_names = sample_coords.get('dimNames') coords_dict = sample_coords.get('coords') plt.figure() clusters = adata.obs['louvain'].cat.categories for c in clusters: cell_ids = adata.obs_names[adata.obs['louvain'] == c] coords = [coords_dict.get(x) for x in cell_ids] coords_x = [x[0] for x in coords] coords_y = [x[1] for x in coords] plt.scatter(coords_x, coords_y, label='Cluster {}'.format(int(c) + 1)) plt.xlabel(dim_names[0]) plt.ylabel(dim_names[1]) plt.legend() plt.tight_layout() gn.add_current_figure_to_results( 'Scatter-plot using imported cell coordinates. Each dot represents a cell. The colors indicate the indentified cell clusters.', dpi=75) gn.commit()
def main(): gn = Granatum() assay = gn.get_import('assay') x = np.array(assay.get('matrix')).astype(np.float) log_base = gn.get_arg('log_base') n_top = gn.get_arg('n_top') n_bottom = gn.get_arg('n_bottom') which_mid = gn.get_arg('which_mid') gene_df = pd.DataFrame( { 'row_num': range(x.shape[0]), 'gene_id': assay.get('geneIds'), 'exp_mean': np.mean(x, axis=1), 'exp_std': np.std(x, axis=1), } ) gene_df = gene_df.sort_values('exp_mean', ascending=False) top_gene_row = gene_df.head(n_top).sort_values('exp_std', ascending=False).iloc[0] bottom_gene_row = gene_df.tail(n_bottom).sort_values('exp_std').iloc[0] hk_gene = np.clip(x[top_gene_row['row_num'], :], a_min=0.00001, a_max=None) neg_gene = x[bottom_gene_row['row_num'], :] if which_mid == 'mean': alphabk = np.mean(neg_gene[:]) elif which_mid == 'median': alphabk = np.median(neg_gene[:]) else: raise ValueError() loghkdatabk = np.log(hk_gene - alphabk) / np.log(log_base) # Drop NAN values loghkdatabk = loghkdatabk[~np.isnan(loghkdatabk)] c = (np.std(neg_gene[:], ddof=1) / np.std(loghkdatabk, ddof=1))**2 xbk = x - alphabk transformed_matrix = np.log((xbk + np.sqrt(xbk**2 + c)) / 2) / np.log(log_base) gn.add_result( '\n'.join( [ f"Selected benchmarking genes:", f" * housekeeping gene: **{top_gene_row['gene_id']}** " f"(mean: {top_gene_row['exp_mean']}, std: {top_gene_row['exp_std']}) ", f" * negative control gene: **{bottom_gene_row['gene_id']}**" f"(mean: {bottom_gene_row['exp_mean']}, std: {bottom_gene_row['exp_std']})", f"", f"Final formula is `y = log{log_base}((z + sqrt(z^2 + c))/2)`, where `z = x - {alphabk}` and `c = {c}`." ] ), 'markdown' ) non_zero_values_before = x.flatten() non_zero_values_before = non_zero_values_before[(non_zero_values_before > np.percentile(non_zero_values_before, 5))] non_zero_values_after = transformed_matrix.flatten() non_zero_values_after = non_zero_values_after[(non_zero_values_after > np.percentile(non_zero_values_after, 5))] plt.figure() plt.subplot(2, 1, 1) plt.title('Before glog transformation') plt.hist(non_zero_values_before, bins=100) plt.ylabel('Frequency') plt.xlabel('Expression level') plt.subplot(2, 1, 2) plt.title('After glog transformation') plt.hist(non_zero_values_after, bins=100) plt.ylabel('Frequency') plt.xlabel('Expression level') plt.tight_layout() caption = ( 'The distribution of expression level before and after glog transformation. Only the values greater ' 'than the 5 percentile (usually zero in single-cell data) and lower than 95 percentile are considered.' ) gn.add_current_figure_to_results(caption, zoom=2, dpi=50) assay['matrix'] = transformed_matrix.tolist() gn.export_statically(assay, 'GLog transformed assay') gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() assay = gn.pandas_from_assay(gn.get_import('assay')) # Groups is {"cell":"cluster} groups = gn.get_import('groups') certainty = gn.get_arg('certainty') alpha = 1 - certainty / 100.0 min_zscore = st.norm.ppf(gn.get_arg("certainty") / 100.0) min_dist = 0.1 # Likely we want to filter genes before we get started, namely if we cannot create a good statistic norms_df = assay.apply(np.linalg.norm, axis=1) assay = assay.loc[norms_df.T >= min_dist, :] inv_map = {} inv_map_rest = {} for k, v in groups.items(): inv_map[v] = inv_map.get(v, []) + [k] clist = inv_map_rest.get(v, list(assay.columns)) clist.remove(k) inv_map_rest[v] = clist # Inv map is {"cluster": ["cell"]} print("Completed setup", flush=True) cols = list(inv_map.keys()) colnames = [] for coli in cols: for colj in cols: if coli != colj: colnames.append("{} vs {}".format(coli, colj)) for coli in cols: colnames.append("{} vs rest".format(coli)) # Instead of scoring into a dataframe, let's analyze each statistically # Dict (gene) of dict (cluster) of dict (statistics) # { "gene_name" : { "cluster_name" : { statistics data } }} # Export would be percentage more/less expressed in "on" state # For example gene "XIST" expresses at least 20% more in cluster 1 vs cluster 4 with 95% certainty total_genes = len(assay.index) print("Executing parallel for {} genes".format(total_genes), flush=True) results = Parallel( n_jobs=math.floor(multiprocessing.cpu_count() * 2 * 9 / 10))( delayed(compref)(gene, assay.loc[gene, :], colnames, inv_map, inv_map_rest, alpha, min_dist, min_zscore) for gene in tqdm(list(assay.index))) result = pd.concat(results, axis=0) gn.export_statically(gn.assay_from_pandas(result.T), 'Differential expression sets') gn.export(result.to_csv(), 'differential_gene_sets.csv', kind='raw', meta=None, raw=True) toc = time.perf_counter() time_passed = round(toc - tic, 2) timing = "* Finished differential expression sets step in {} seconds*".format( time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() assay = gn.pandas_from_assay(gn.get_import('assay')) groups = gn.get_import('groups') min_zscore = gn.get_arg('min_zscore') max_zscore = gn.get_arg('max_zscore') min_expression_variation = gn.get_arg('min_expression_variation') inv_map = {} for k, v in groups.items(): inv_map[v] = inv_map.get(v, []) + [k] low_mean_dfs = [] high_mean_dfs = [] mean_dfs = [] std_dfs = [] colnames = [] for k, v in inv_map.items(): group_values = assay.loc[:, v] lowbound_clust = {} highbound_clust = {} for index, row in group_values.iterrows(): meanbounds = sms.DescrStatsW(row).tconfint_mean() lowbound_clust[index] = meanbounds[0] highbound_clust[index] = meanbounds[1] low_mean_dfs.append(pd.DataFrame.from_dict(lowbound_clust, orient="index", columns=[k])) high_mean_dfs.append(pd.DataFrame.from_dict(highbound_clust, orient="index", columns=[k])) mean_dfs.append(group_values.mean(axis=1)) std_dfs.append(group_values.std(axis=1)) colnames.append(k) mean_df = pd.concat(mean_dfs, axis=1) mean_df.columns = colnames low_mean_df = pd.concat(low_mean_dfs, axis=1) low_mean_df.columns = colnames high_mean_df = pd.concat(high_mean_dfs, axis=1) high_mean_df.columns = colnames std_df = pd.concat(std_dfs, axis=1) std_df.columns = colnames print(std_df) minvalues = std_df.min(axis=1).to_frame() minvalues.columns=["min"] print("Minvalues>>") print(minvalues, flush=True) genes_below_min = list((minvalues[minvalues["min"]<min_expression_variation]).index) print("{} out of {}".format(len(genes_below_min), len(minvalues.index)), flush=True) mean_df = mean_df.drop(genes_below_min, axis=0) low_mean_df = low_mean_df.drop(genes_below_min, axis=0) high_mean_df = high_mean_df.drop(genes_below_min, axis=0) std_df = std_df.drop(genes_below_min, axis=0) assay = assay.drop(genes_below_min, axis=0) print("Filtered assay to get {} columns by {} rows".format(len(assay.columns), len(assay.index)), flush=True) mean_rest_dfs = [] std_rest_dfs = [] colnames = [] for k, v in inv_map.items(): rest_v = list(set(list(assay.columns)).difference(set(v))) mean_rest_dfs.append(assay.loc[:, rest_v].mean(axis=1)) std_rest_dfs.append(assay.loc[:, rest_v].std(axis=1)) colnames.append(k) mean_rest_df = pd.concat(mean_rest_dfs, axis=1) mean_rest_df.columns = colnames std_rest_df = pd.concat(std_rest_dfs, axis=1) std_rest_df.columns = colnames zscore_dfs = [] cols = colnames colnames = [] for coli in cols: for colj in cols: if coli != colj: # Here we should check significance # Fetch most realistic mean comparison set, what is smallest difference between two ranges mean_diff_overlap_low_high = (low_mean_df[coli]-high_mean_df[colj]) mean_diff_overlap_high_low = (high_mean_df[coli]-low_mean_df[colj]) diff_df = mean_diff_overlap_low_high.combine(mean_diff_overlap_high_low, range_check) zscore_dfs.append((diff_df/(std_df[colj]+std_df[coli]/4)).fillna(0).clip(-max_zscore, max_zscore)) colnames.append("{} vs {}".format(coli, colj)) for coli in cols: zscore_dfs.append(((mean_df[coli]-mean_rest_df[colj])/(std_rest_df[colj]+std_rest_df[coli]/4)).fillna(0).clip(-max_zscore, max_zscore)) colnames.append("{} vs rest".format(coli)) zscore_df = pd.concat(zscore_dfs, axis=1) zscore_df.columns = colnames norms_df = zscore_df.apply(np.linalg.norm, axis=1) colsmatching = norms_df.T[(norms_df.T >= min_zscore)].index.values return_df = zscore_df.T[colsmatching] gn.export_statically(gn.assay_from_pandas(return_df), 'Differential expression sets') gn.export(return_df.T.to_csv(), 'differential_gene_sets.csv', kind='raw', meta=None, raw=True) toc = time.perf_counter() time_passed = round(toc - tic, 2) timing = "* Finished differential expression sets step in {} seconds*".format(time_passed) gn.add_result(timing, "markdown") gn.commit()
def main(): gn = Granatum() tb1 = gn.pandas_from_assay(gn.get_import('assay1')) tb2 = gn.pandas_from_assay(gn.get_import('assay2')) label1 = gn.get_arg('label1') label2 = gn.get_arg('label2') direction = gn.get_arg('direction') normalization = gn.get_arg('normalization') if direction == 'samples': tb1 = tb1.T tb2 = tb2.T overlapped_index = set(tb1.index) & set(tb2.index) tb1.index = [ f"{label1}_{x}" if x in overlapped_index else x for x in tb1.index ] tb2.index = [ f"{label2}_{x}" if x in overlapped_index else x for x in tb2.index ] if normalization == 'none': tb = pd.concat([tb1, tb2], axis=0) elif normalization == 'frobenius': ntb1 = np.linalg.norm(tb1) ntb2 = np.linalg.norm(tb2) ntb = np.mean([ntb1, ntb2]) fct1 = ntb / ntb1 fct2 = ntb / ntb2 tb = pd.concat([tb1 * fct1, tb2 * fct2], axis=0) gn.add_markdown(f"""\ Normalization info: - Assay **{label1}** is multiplied by {fct1} - Assay **{label2}** is multiplied by {fct2} """) elif normalization == 'mean': ntb1 = np.mean(tb1) ntb2 = np.mean(tb2) ntb = np.mean([ntb1, ntb2]) fct1 = ntb / ntb1 fct2 = ntb / ntb2 tb = pd.concat([tb1 * fct1, tb2 * fct2], axis=0) gn.add_markdown(f"""\ Normalization info:", - Assay **{label1}** is multiplied by {fct1} - Assay **{label2}** is multiplied by {fct2} """) else: raise ValueError() if direction == 'samples': tb = tb.T gn.add_markdown(f"""\ You combined the following assays: - Assay 1 (with {tb1.shape[0]} genes and {tb1.shape[1]} cells) - Assay 2 (with {tb2.shape[0]} genes and {tb2.shape[1]} cells) into: - Combined Assay (with {tb.shape[0]} genes and {tb.shape[1]} cells) """) gn.export_statically(gn.assay_from_pandas(tb), 'Combined assay') if direction == 'samples': meta_type = 'sampleMeta' elif direction == 'genes': meta_type = 'geneMeta' else: raise ValueError() gn.export( { **{x: label1 for x in tb1.index}, **{x: label2 for x in tb2.index} }, 'Assay label', meta_type) gn.commit()
def main(): tic = time.perf_counter() gn = Granatum() assay = gn.pandas_from_assay(gn.get_import('assay')) groups = gn.get_import('groups') inv_map = {} for k, v in groups.items(): inv_map[v] = inv_map.get(v, []) + [k] drop_set = parse(gn.get_arg('drop_set')) merge_set_1 = parse(gn.get_arg('merge_set_1')) merge_set_2 = parse(gn.get_arg('merge_set_2')) merge_set_3 = parse(gn.get_arg('merge_set_3')) relabel_set_1 = gn.get_arg('relabel_set_1') relabel_set_2 = gn.get_arg('relabel_set_2') relabel_set_3 = gn.get_arg('relabel_set_3') if len(merge_set_1) > 0: if relabel_set_1 == "": relabel_set_1 = " + ".join(merge_set_1) if len(merge_set_2) > 0: if relabel_set_2 == "": relabel_set_2 = " + ".join(merge_set_2) if len(merge_set_3) > 0: if relabel_set_3 == "": relabel_set_3 = " + ".join(merge_set_3) try: for ds in drop_set: cells = inv_map[ds] gn.add_result( "Dropping {} cells that match {}".format(len(cells), ds), "markdown") assay = assay.drop(cells, axis=1) groups = {key: val for key, val in groups.items() if val != ds} except Exception as e: gn.add_result( "Error found in drop set, remember it should be comma separated: {}" .format(e), "markdown") try: if len(merge_set_1) > 0: merge_set_1_cells = [] for ms1 in merge_set_1: merge_set_1_cells = merge_set_1_cells + inv_map[ms1] for cell in merge_set_1_cells: groups[cell] = relabel_set_1 if len(merge_set_2) > 0: merge_set_2_cells = [] for ms2 in merge_set_2: merge_set_2_cells = merge_set_2_cells + inv_map[ms2] for cell in merge_set_2_cells: groups[cell] = relabel_set_2 if len(merge_set_3) > 0: merge_set_3_cells = [] for ms3 in merge_set_3: merge_set_3_cells = merge_set_3_cells + inv_map[ms3] for cell in merge_set_3_cells: groups[cell] = relabel_set_3 except Exception as e: gn.add_result( "Error found in merge sets, remember it should be comma separated: {}" .format(e), "markdown") toc = time.perf_counter() time_passed = round(toc - tic, 2) gn.export_statically(gn.assay_from_pandas(assay), "Label adjusted assay") gn.export_statically(groups, "Adjusted labels") timing = "* Finished sample coloring step in {} seconds*".format( time_passed) gn.add_result(timing, "markdown") gn.commit()