コード例 #1
0
def main():
    gn = Granatum()

    adata = gn.ann_data_from_assay(gn.get_import('assay'))
    random_seed = gn.get_arg('random_seed')

    sc.tl.tsne(adata, random_state=random_seed)

    X_tsne = adata.obsm['X_tsne']

    plt.figure()
    plt.scatter(X_tsne[:, 0], X_tsne[:, 1], 5000 / adata.shape[0])
    plt.xlabel('t-SNE dim. 1')
    plt.ylabel('t-SNE dim. 2')
    plt.tight_layout()
    gn.add_current_figure_to_results('t-SNE plot: each dot represents a cell',
                                     dpi=75)

    pca_export = {
        'dimNames': ['t-SNE dim. 1', 't-SNE dim. 2'],
        'coords': {
            sample_id: X_tsne[i, :].tolist()
            for i, sample_id in enumerate(adata.obs_names)
        },
    }
    gn.export_statically(pca_export, 't-SNE coordinates')

    gn.commit()
コード例 #2
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')

    matrix = np.array(assay.get('matrix'))
    sample_ids = assay.get('sampleIds')

    num_samples = matrix.shape[1]

    # ---- PCA --------------------------------------------------------------------

    X = np.transpose(matrix)
    model = PCA(n_components=2)
    Y_pca = model.fit_transform(X)

    pca_export = {
        'dimNames': ['PCA-1', 'PCA-2'],
        'coords': {
            sample_id: Y_pca[i, :].tolist()
            for i, sample_id in enumerate(sample_ids)
        },
    }
    gn.export_statically(pca_export, 'pca')

    plt.figure()
    plt.scatter(Y_pca[:, 0], Y_pca[:, 1], 5000 / num_samples)
    plt.tight_layout()

    gn.add_current_figure_to_results(
        'Principal Component Analysis (PCA) scatter-plot', dpi=75)

    # ---- T-SNE ------------------------------------------------------------------

    X = np.transpose(matrix)
    model = TSNE(n_jobs=multiprocessing.cpu_count())
    Y_tsne = model.fit_transform(X)

    tsne_export = {
        'dimNames': ['tSNE-1', 'tSNE-2'],
        'coords': {
            sample_id: Y_tsne[i, :].tolist()
            for i, sample_id in enumerate(sample_ids)
        },
    }
    gn.export_statically(tsne_export, 'tsne')

    plt.figure()
    plt.scatter(Y_tsne[:, 0], Y_tsne[:, 1], s=5000 / num_samples)
    plt.tight_layout()

    gn.add_current_figure_to_results(
        't-Distributed Stochastic Neighbor Embedding (t-SNE) scatter-plot',
        dpi=75)

    gn.commit()
コード例 #3
0
def main():
    tic = time.perf_counter()

    gn = Granatum()
    assay = gn.pandas_from_assay(gn.get_import('assay'))
    groups = gn.get_import('groups')
    reflabels = gn.get_import('reflabels')
    remove_cells = gn.get_arg('remove_cells')

    inv_map = {}
    for k, v in groups.items():
        inv_map[v] = inv_map.get(v, []) + [k]

    inv_map_ref = {}
    for k, v in reflabels.items():
        inv_map_ref[v] = inv_map_ref.get(v, []) + [k]

    group_relabel = {}
    mislabelled_cells = []
    for k, v in inv_map.items():
        vset = set(v)
        label_scores = {}
        for kref, vref in inv_map_ref.items():
            label_scores[kref] = len(set(vref).intersection(vset))
        group_relabel[k] = max(label_scores, key=label_scores.get)
        mislabelled_cells = mislabelled_cells + list(
            vset.difference(set(inv_map_ref[group_relabel[k]])))

    if remove_cells:
        gn.add_result(
            "Dropping {} mislabelled cells".format(len(mislabelled_cells)),
            "markdown")
        assay = assay.drop(mislabelled_cells, axis=1)
        groups = {
            key: val
            for key, val in groups.items() if not key in mislabelled_cells
        }

    for cell in groups:
        groups[cell] = group_relabel[groups[cell]]

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    gn.export_statically(gn.assay_from_pandas(assay), "Corresponded assay")
    gn.export_statically(groups, "Corresponded labels")

    timing = "* Finished sample coloring step in {} seconds*".format(
        time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
コード例 #4
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')

    matrix = np.array(assay.get('matrix'))

    transformed_matrix = matrix - matrix.mean(axis=1, keepdims=True)
    assay['matrix'] = transformed_matrix.tolist()

    plot_distribution_comparison(matrix, transformed_matrix, gn)

    gn.export_statically(assay, 'Gene centered assay')

    gn.commit()
コード例 #5
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')
    matrix = np.array(assay.get('matrix'))

    take_log = gn.get_arg('take_log')
    log_base = gn.get_arg('logBase')
    epsilon = gn.get_arg('epsilon')

    transformed_matrix = (matrix + epsilon) / (1 - matrix + epsilon)
    if take_log:
        transformed_matrix = np.log(transformed_matrix) / np.log(log_base)

    non_zero_values_before = matrix.flatten()
    non_zero_values_before = non_zero_values_before[(
        non_zero_values_before > np.percentile(non_zero_values_before, 5))]

    non_zero_values_after = transformed_matrix.flatten()
    non_zero_values_after = non_zero_values_after[(
        non_zero_values_after > np.percentile(non_zero_values_after, 5))]

    plt.figure()

    plt.subplot(2, 1, 1)
    plt.title('Before beta-to-m transformation')
    plt.hist(non_zero_values_before, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.subplot(2, 1, 2)
    plt.title('After beta-to-m transformation')
    plt.hist(non_zero_values_after, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.tight_layout()

    caption = (
        'The distribution of expression level before and after beta-to-m transformation. Only the values greater '
        'than the 5 percentile (usually zero in single-cell data) and lower than 95 percentile are considered.'
    )
    gn.add_current_figure_to_results(caption, zoom=2, dpi=50)

    assay['matrix'] = transformed_matrix.tolist()
    gn.export_statically(assay, 'Beta-to-m transformed assay')

    gn.commit()
コード例 #6
0
def main():
    gn = Granatum()

    assay = gn.get_import('assay')

    args_for_init = {
        'selected_embedding': gn.get_arg('selectedEmbedding'),
        'selected_clustering': gn.get_arg('selectedClustering'),
        'n_components': gn.get_arg('nComponents'),
        'n_clusters': gn.get_arg('nClusters'),
        'find_best_number_of_cluster': gn.get_arg('findBestNumberOfCluster'),
    }

    args_for_fit = {
        'matrix': np.transpose(np.array(assay.get('matrix'))),
        'sample_ids': assay.get('sampleIds'),
    }

    granatum_clustering = GranatumDeepClustering(**args_for_init)
    fit_results = granatum_clustering.fit(**args_for_fit)

    fit_exp = fit_results.get('clusters')
    gn.export_statically(fit_exp, 'Cluster assignment')
    newdictstr = ['"'+str(k)+'"'+", "+str(v) for k, v in fit_exp.items()]
    gn.export("\n".join(newdictstr), 'Cluster assignment.csv', kind='raw', meta=None, raw=True)

    md_str = f"""\
## Results

  * Cluster array: `{fit_results.get('clusters_array')}`
  * Cluster array: `{fit_results.get('clusters_array')}`
  * nClusters: {fit_results.get('n_clusters')}
  * Number of components: {fit_results.get('n_components')}
  * Outliers: {fit_results.get('outliers')}"""
    # gn.add_result(md_str, 'markdown')

    gn.add_result(
        {
            'orient': 'split',
            'columns': ['Sample ID', 'Cluster Assignment'],
            'data': [{'Sample ID':x, 'Cluster Assignment':y} for x, y in zip(assay.get('sampleIds'), fit_results.get('clusters_array'))],
        },
        'table',
    )

    gn.commit()
コード例 #7
0
def main():
    tic = time.perf_counter()

    gn = Granatum()

    df = gn.pandas_from_assay(gn.get_import('assay'))
    n_neighbors = gn.get_arg('n_neighbors')
    min_dist = gn.get_arg('min_dist')
    metric = gn.get_arg('metric')
    random_seed = gn.get_arg('random_seed')

    embedding = umap.UMAP(n_neighbors=n_neighbors,
                          min_dist=min_dist,
                          metric=metric,
                          random_state=random_seed).fit_transform(df.values.T)

    plt.figure()
    plt.scatter(embedding[:, 0], embedding[:, 1], min(5000 / df.shape[0],
                                                      36.0))
    plt.xlabel('UMAP dim. 1')
    plt.ylabel('UMAP dim. 2')
    plt.tight_layout()

    gn.add_current_figure_to_results('UMAP plot: each dot represents a cell',
                                     dpi=75)

    pca_export = {
        'dimNames': ['UMAP dim. 1', 'UMAP dim. 2'],
        'coords': {
            sample_id: embedding[i, :].tolist()
            for i, sample_id in enumerate(df.columns)
        },
    }
    gn.export_statically(pca_export, 'UMAP coordinates')

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    timing = "* Finished UMAP step in {} seconds*".format(time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
def main():
  gn = Granatum()

  adata = gn.ann_data_from_assay(gn.get_import('assay'))
  outliers = gn.get_arg('outliers')

  num_cells_before = adata.shape[0]

  kept_cell_ids = adata.obs_names.drop(outliers, errors='ignore').values

  adata = adata[kept_cell_ids, :]

  gn.export_statically(gn.assay_from_ann_data(adata), 'Outlier removed assay')
  gn.add_result(
    'You removed {} outliers from {} cells, the result assay has {} cells (and {} genes).'.format(
      len(outliers), num_cells_before, adata.shape[0], adata.shape[1]
    ),
    type='markdown'
  )

  gn.commit()
コード例 #9
0
def main():
    gn = Granatum()

    n_neighbors = gn.get_arg('nNeighbors', 15)
    neighbor_method = gn.get_arg('neighborMethod', 'gauss')

    assay = gn.get_import('assay')

    adata = sc.AnnData(np.array(assay.get('matrix')).transpose())
    adata.var_names = assay.get('geneIds')
    adata.obs_names = assay.get('sampleIds')

    sc.pp.neighbors(adata,
                    n_neighbors=n_neighbors,
                    use_rep='X',
                    method=neighbor_method)
    sc.tl.dpt(adata, n_branchings=1)

    gn._pickle(adata, 'adata')

    # dpt_groups

    for spec in [{
            'col': 'dpt_order',
            'caption': 'Cell order'
    }, {
            'col': 'dpt_groups',
            'caption': 'Cell groups'
    }]:
        fig = plt.figure()
        sc.pl.diffmap(adata, color=spec['col'])
        gn.add_current_figure_to_results(spec['caption'])
        gn.export_statically(
            dict(
                zip(adata.obs_names.tolist(),
                    adata.obs[spec['col']].values.tolist())), spec['col'])

    gn.commit()
コード例 #10
0
def main():
    gn = Granatum()

    adata = gn.ann_data_from_assay(gn.get_import('assay'))
    sample_coords = gn.get_import('sampleCoords')
    random_seed = gn.get_arg('random_seed')

    sc.pp.neighbors(adata, n_neighbors=20, use_rep='X', method='gauss')
    sc.tl.louvain(adata, random_state=random_seed)

    cluster_assignment = dict(
        zip(adata.obs_names,
            ['Cluster {}'.format(int(c) + 1) for c in adata.obs['louvain']]))
    gn.export_statically(cluster_assignment, 'Cluster assignment')

    dim_names = sample_coords.get('dimNames')
    coords_dict = sample_coords.get('coords')

    plt.figure()
    clusters = adata.obs['louvain'].cat.categories
    for c in clusters:
        cell_ids = adata.obs_names[adata.obs['louvain'] == c]
        coords = [coords_dict.get(x) for x in cell_ids]
        coords_x = [x[0] for x in coords]
        coords_y = [x[1] for x in coords]
        plt.scatter(coords_x, coords_y, label='Cluster {}'.format(int(c) + 1))

    plt.xlabel(dim_names[0])
    plt.ylabel(dim_names[1])
    plt.legend()
    plt.tight_layout()

    gn.add_current_figure_to_results(
        'Scatter-plot using imported cell coordinates. Each dot represents a cell. The colors indicate the indentified cell clusters.',
        dpi=75)

    gn.commit()
コード例 #11
0
ファイル: run_glog.py プロジェクト: granatumx/gbox-glog
def main():
    gn = Granatum()

    assay = gn.get_import('assay')
    x = np.array(assay.get('matrix')).astype(np.float)
    log_base = gn.get_arg('log_base')
    n_top = gn.get_arg('n_top')
    n_bottom = gn.get_arg('n_bottom')
    which_mid = gn.get_arg('which_mid')

    gene_df = pd.DataFrame(
        {
            'row_num': range(x.shape[0]),
            'gene_id': assay.get('geneIds'),
            'exp_mean': np.mean(x, axis=1),
            'exp_std': np.std(x, axis=1),
        }
    )
    gene_df = gene_df.sort_values('exp_mean', ascending=False)
    top_gene_row = gene_df.head(n_top).sort_values('exp_std', ascending=False).iloc[0]
    bottom_gene_row = gene_df.tail(n_bottom).sort_values('exp_std').iloc[0]

    hk_gene = np.clip(x[top_gene_row['row_num'], :], a_min=0.00001, a_max=None)

    neg_gene = x[bottom_gene_row['row_num'], :]

    if which_mid == 'mean':
        alphabk = np.mean(neg_gene[:])
    elif which_mid == 'median':
        alphabk = np.median(neg_gene[:])
    else:
        raise ValueError()

    loghkdatabk = np.log(hk_gene - alphabk) / np.log(log_base)
    
    # Drop NAN values
    loghkdatabk = loghkdatabk[~np.isnan(loghkdatabk)]

    c = (np.std(neg_gene[:], ddof=1) / np.std(loghkdatabk, ddof=1))**2

    xbk = x - alphabk
    transformed_matrix = np.log((xbk + np.sqrt(xbk**2 + c)) / 2) / np.log(log_base)

    gn.add_result(
        '\n'.join(
            [
                f"Selected benchmarking genes:",
                f"  * housekeeping gene: **{top_gene_row['gene_id']}** "
                f"(mean: {top_gene_row['exp_mean']}, std: {top_gene_row['exp_std']}) ",
                f"  * negative control gene: **{bottom_gene_row['gene_id']}**"
                f"(mean: {bottom_gene_row['exp_mean']}, std: {bottom_gene_row['exp_std']})",
                f"",
                f"Final formula is `y = log{log_base}((z + sqrt(z^2 + c))/2)`, where `z = x - {alphabk}` and `c = {c}`."
            ]
        ), 'markdown'
    )

    non_zero_values_before = x.flatten()
    non_zero_values_before = non_zero_values_before[(non_zero_values_before > np.percentile(non_zero_values_before, 5))]

    non_zero_values_after = transformed_matrix.flatten()
    non_zero_values_after = non_zero_values_after[(non_zero_values_after > np.percentile(non_zero_values_after, 5))]

    plt.figure()

    plt.subplot(2, 1, 1)
    plt.title('Before glog transformation')
    plt.hist(non_zero_values_before, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.subplot(2, 1, 2)
    plt.title('After glog transformation')
    plt.hist(non_zero_values_after, bins=100)
    plt.ylabel('Frequency')
    plt.xlabel('Expression level')

    plt.tight_layout()

    caption = (
        'The distribution of expression level before and after glog transformation. Only the values greater '
        'than the 5 percentile (usually zero in single-cell data) and lower than 95 percentile are considered.'
    )
    gn.add_current_figure_to_results(caption, zoom=2, dpi=50)

    assay['matrix'] = transformed_matrix.tolist()
    gn.export_statically(assay, 'GLog transformed assay')

    gn.commit()
コード例 #12
0
def main():
    tic = time.perf_counter()

    gn = Granatum()

    assay = gn.pandas_from_assay(gn.get_import('assay'))
    # Groups is {"cell":"cluster}
    groups = gn.get_import('groups')

    certainty = gn.get_arg('certainty')
    alpha = 1 - certainty / 100.0

    min_zscore = st.norm.ppf(gn.get_arg("certainty") / 100.0)

    min_dist = 0.1

    # Likely we want to filter genes before we get started, namely if we cannot create a good statistic
    norms_df = assay.apply(np.linalg.norm, axis=1)
    assay = assay.loc[norms_df.T >= min_dist, :]

    inv_map = {}
    inv_map_rest = {}
    for k, v in groups.items():
        inv_map[v] = inv_map.get(v, []) + [k]
        clist = inv_map_rest.get(v, list(assay.columns))
        clist.remove(k)
        inv_map_rest[v] = clist
    # Inv map is {"cluster": ["cell"]}
    print("Completed setup", flush=True)

    cols = list(inv_map.keys())

    colnames = []
    for coli in cols:
        for colj in cols:
            if coli != colj:
                colnames.append("{} vs {}".format(coli, colj))
    for coli in cols:
        colnames.append("{} vs rest".format(coli))

    # Instead of scoring into a dataframe, let's analyze each statistically
    # Dict (gene) of dict (cluster) of dict (statistics)
    # { "gene_name" : { "cluster_name" : { statistics data } }}
    # Export would be percentage more/less expressed in "on" state
    # For example gene "XIST" expresses at least 20% more in cluster 1 vs cluster 4 with 95% certainty
    total_genes = len(assay.index)
    print("Executing parallel for {} genes".format(total_genes), flush=True)

    results = Parallel(
        n_jobs=math.floor(multiprocessing.cpu_count() * 2 * 9 / 10))(
            delayed(compref)(gene, assay.loc[gene, :], colnames, inv_map,
                             inv_map_rest, alpha, min_dist, min_zscore)
            for gene in tqdm(list(assay.index)))
    result = pd.concat(results, axis=0)

    gn.export_statically(gn.assay_from_pandas(result.T),
                         'Differential expression sets')
    gn.export(result.to_csv(),
              'differential_gene_sets.csv',
              kind='raw',
              meta=None,
              raw=True)

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    timing = "* Finished differential expression sets step in {} seconds*".format(
        time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
コード例 #13
0
def main():
    tic = time.perf_counter()

    gn = Granatum()

    assay = gn.pandas_from_assay(gn.get_import('assay'))
    groups = gn.get_import('groups')

    min_zscore = gn.get_arg('min_zscore')
    max_zscore = gn.get_arg('max_zscore')
    min_expression_variation = gn.get_arg('min_expression_variation')

    inv_map = {}
    for k, v in groups.items():
        inv_map[v] = inv_map.get(v, []) + [k]

    low_mean_dfs = []
    high_mean_dfs = []
    mean_dfs = []
    std_dfs = []
    colnames = []
    for k, v in inv_map.items():
        group_values = assay.loc[:, v]
        lowbound_clust = {}
        highbound_clust = {}
        for index, row in group_values.iterrows():
            meanbounds = sms.DescrStatsW(row).tconfint_mean()
            lowbound_clust[index] = meanbounds[0]
            highbound_clust[index] = meanbounds[1]
        low_mean_dfs.append(pd.DataFrame.from_dict(lowbound_clust, orient="index", columns=[k]))
        high_mean_dfs.append(pd.DataFrame.from_dict(highbound_clust, orient="index", columns=[k]))
        mean_dfs.append(group_values.mean(axis=1))
        std_dfs.append(group_values.std(axis=1))
        colnames.append(k)
    mean_df = pd.concat(mean_dfs, axis=1)
    mean_df.columns = colnames
    low_mean_df = pd.concat(low_mean_dfs, axis=1)
    low_mean_df.columns = colnames
    high_mean_df = pd.concat(high_mean_dfs, axis=1)
    high_mean_df.columns = colnames
    std_df = pd.concat(std_dfs, axis=1)
    std_df.columns = colnames
    print(std_df)
    minvalues = std_df.min(axis=1).to_frame()
    minvalues.columns=["min"]
    print("Minvalues>>")
    print(minvalues, flush=True)
    genes_below_min = list((minvalues[minvalues["min"]<min_expression_variation]).index)
    print("{} out of {}".format(len(genes_below_min), len(minvalues.index)), flush=True)
    mean_df = mean_df.drop(genes_below_min, axis=0)
    low_mean_df = low_mean_df.drop(genes_below_min, axis=0)
    high_mean_df = high_mean_df.drop(genes_below_min, axis=0)
    std_df = std_df.drop(genes_below_min, axis=0)
    assay = assay.drop(genes_below_min, axis=0)
    print("Filtered assay to get {} columns by {} rows".format(len(assay.columns), len(assay.index)), flush=True)

    mean_rest_dfs = []
    std_rest_dfs = []
    colnames = []
    for k, v in inv_map.items():
        rest_v = list(set(list(assay.columns)).difference(set(v)))
        mean_rest_dfs.append(assay.loc[:, rest_v].mean(axis=1))
        std_rest_dfs.append(assay.loc[:, rest_v].std(axis=1))
        colnames.append(k)
    mean_rest_df = pd.concat(mean_rest_dfs, axis=1)
    mean_rest_df.columns = colnames
    std_rest_df = pd.concat(std_rest_dfs, axis=1)
    std_rest_df.columns = colnames

    zscore_dfs = []
    cols = colnames
    colnames = []
    for coli in cols:
        for colj in cols:
            if coli != colj:
                # Here we should check significance
                # Fetch most realistic mean comparison set, what is smallest difference between two ranges
                mean_diff_overlap_low_high = (low_mean_df[coli]-high_mean_df[colj])
                mean_diff_overlap_high_low = (high_mean_df[coli]-low_mean_df[colj])
                diff_df = mean_diff_overlap_low_high.combine(mean_diff_overlap_high_low, range_check)

                zscore_dfs.append((diff_df/(std_df[colj]+std_df[coli]/4)).fillna(0).clip(-max_zscore, max_zscore))
                colnames.append("{} vs {}".format(coli, colj)) 
    for coli in cols:
        zscore_dfs.append(((mean_df[coli]-mean_rest_df[colj])/(std_rest_df[colj]+std_rest_df[coli]/4)).fillna(0).clip(-max_zscore, max_zscore))
        colnames.append("{} vs rest".format(coli)) 

    zscore_df = pd.concat(zscore_dfs, axis=1)
    zscore_df.columns = colnames
    norms_df = zscore_df.apply(np.linalg.norm, axis=1)
    colsmatching = norms_df.T[(norms_df.T >= min_zscore)].index.values
    return_df = zscore_df.T[colsmatching]
    gn.export_statically(gn.assay_from_pandas(return_df), 'Differential expression sets')
    gn.export(return_df.T.to_csv(), 'differential_gene_sets.csv', kind='raw', meta=None, raw=True)

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    timing = "* Finished differential expression sets step in {} seconds*".format(time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()
コード例 #14
0
def main():
    gn = Granatum()

    tb1 = gn.pandas_from_assay(gn.get_import('assay1'))
    tb2 = gn.pandas_from_assay(gn.get_import('assay2'))
    label1 = gn.get_arg('label1')
    label2 = gn.get_arg('label2')
    direction = gn.get_arg('direction')
    normalization = gn.get_arg('normalization')

    if direction == 'samples':
        tb1 = tb1.T
        tb2 = tb2.T

    overlapped_index = set(tb1.index) & set(tb2.index)
    tb1.index = [
        f"{label1}_{x}" if x in overlapped_index else x for x in tb1.index
    ]
    tb2.index = [
        f"{label2}_{x}" if x in overlapped_index else x for x in tb2.index
    ]

    if normalization == 'none':
        tb = pd.concat([tb1, tb2], axis=0)
    elif normalization == 'frobenius':
        ntb1 = np.linalg.norm(tb1)
        ntb2 = np.linalg.norm(tb2)
        ntb = np.mean([ntb1, ntb2])
        fct1 = ntb / ntb1
        fct2 = ntb / ntb2
        tb = pd.concat([tb1 * fct1, tb2 * fct2], axis=0)
        gn.add_markdown(f"""\

Normalization info:

  - Assay **{label1}** is multiplied by {fct1}
  - Assay **{label2}** is multiplied by {fct2}
""")
    elif normalization == 'mean':
        ntb1 = np.mean(tb1)
        ntb2 = np.mean(tb2)
        ntb = np.mean([ntb1, ntb2])
        fct1 = ntb / ntb1
        fct2 = ntb / ntb2
        tb = pd.concat([tb1 * fct1, tb2 * fct2], axis=0)

        gn.add_markdown(f"""\

Normalization info:",

  - Assay **{label1}** is multiplied by {fct1}
  - Assay **{label2}** is multiplied by {fct2}
""")
    else:
        raise ValueError()

    if direction == 'samples':
        tb = tb.T

    gn.add_markdown(f"""\
You combined the following assays:

  - Assay 1 (with {tb1.shape[0]} genes and {tb1.shape[1]} cells)
  - Assay 2 (with {tb2.shape[0]} genes and {tb2.shape[1]} cells)

into:

  - Combined Assay (with {tb.shape[0]} genes and {tb.shape[1]} cells)
""")

    gn.export_statically(gn.assay_from_pandas(tb), 'Combined assay')

    if direction == 'samples':
        meta_type = 'sampleMeta'
    elif direction == 'genes':
        meta_type = 'geneMeta'
    else:
        raise ValueError()

    gn.export(
        {
            **{x: label1
               for x in tb1.index},
            **{x: label2
               for x in tb2.index}
        }, 'Assay label', meta_type)

    gn.commit()
コード例 #15
0
def main():
    tic = time.perf_counter()

    gn = Granatum()
    assay = gn.pandas_from_assay(gn.get_import('assay'))
    groups = gn.get_import('groups')

    inv_map = {}
    for k, v in groups.items():
        inv_map[v] = inv_map.get(v, []) + [k]

    drop_set = parse(gn.get_arg('drop_set'))
    merge_set_1 = parse(gn.get_arg('merge_set_1'))
    merge_set_2 = parse(gn.get_arg('merge_set_2'))
    merge_set_3 = parse(gn.get_arg('merge_set_3'))
    relabel_set_1 = gn.get_arg('relabel_set_1')
    relabel_set_2 = gn.get_arg('relabel_set_2')
    relabel_set_3 = gn.get_arg('relabel_set_3')

    if len(merge_set_1) > 0:
        if relabel_set_1 == "":
            relabel_set_1 = " + ".join(merge_set_1)

    if len(merge_set_2) > 0:
        if relabel_set_2 == "":
            relabel_set_2 = " + ".join(merge_set_2)

    if len(merge_set_3) > 0:
        if relabel_set_3 == "":
            relabel_set_3 = " + ".join(merge_set_3)

    try:
        for ds in drop_set:
            cells = inv_map[ds]
            gn.add_result(
                "Dropping {} cells that match {}".format(len(cells), ds),
                "markdown")
            assay = assay.drop(cells, axis=1)
            groups = {key: val for key, val in groups.items() if val != ds}
    except Exception as e:
        gn.add_result(
            "Error found in drop set, remember it should be comma separated: {}"
            .format(e), "markdown")

    try:
        if len(merge_set_1) > 0:
            merge_set_1_cells = []
            for ms1 in merge_set_1:
                merge_set_1_cells = merge_set_1_cells + inv_map[ms1]
            for cell in merge_set_1_cells:
                groups[cell] = relabel_set_1

        if len(merge_set_2) > 0:
            merge_set_2_cells = []
            for ms2 in merge_set_2:
                merge_set_2_cells = merge_set_2_cells + inv_map[ms2]
            for cell in merge_set_2_cells:
                groups[cell] = relabel_set_2

        if len(merge_set_3) > 0:
            merge_set_3_cells = []
            for ms3 in merge_set_3:
                merge_set_3_cells = merge_set_3_cells + inv_map[ms3]
            for cell in merge_set_3_cells:
                groups[cell] = relabel_set_3
    except Exception as e:
        gn.add_result(
            "Error found in merge sets, remember it should be comma separated: {}"
            .format(e), "markdown")

    toc = time.perf_counter()
    time_passed = round(toc - tic, 2)

    gn.export_statically(gn.assay_from_pandas(assay), "Label adjusted assay")
    gn.export_statically(groups, "Adjusted labels")

    timing = "* Finished sample coloring step in {} seconds*".format(
        time_passed)
    gn.add_result(timing, "markdown")

    gn.commit()