def main(save_folder, adata):
    tissue_layers = ['upper EPIDERMIS', 'middle EPIDERMIS', 'basal EPIDERMIS']
    img_key = 'hires'
    # 1. Get cytokines and responders
    t_cell_cytocines, cyto_resps_list, cytokine_responders = gene_lists.get_publication_cyto_resps(
    )

    # 3. Add meta data like which samples belong to which donor (optional)
    if "patient" not in adata.obs_keys():
        adata, tissue_cell_labels, disease_labels, lesion_labels = ctools.add_metadata(
            adata)
        # 1.2 Remove spots having no tissue/cell labels (since 06.10.2020)
        adata = adata[np.where(
            adata.obs[tissue_cell_labels].to_numpy().any(axis=1))[0]]

    # 4. Use only tissue tyoes of interest
    # 4.1 Add tissue types
    adata = ctools.add_tissue_obs(adata)
    # 4.2 Subset adata to tissue_types of interest: upper EPIDERMIS', 'middle EPIDERMIS', 'basal EPIDERMIS'
    bool_col = adata.obs[tissue_layers] == 1
    merged = bool_col.sum(axis=1)
    adata = adata[merged == 1]
    # Rename tissue region 'INTERFACE' to basal EPIDERMIS because some spots got both labels
    m_interface = adata.obs['tissue_type'] == 'INTERFACE'
    adata.obs['tissue_type'][m_interface] = 'basal EPIDERMIS'
    """Paper Figure 4D: Highlight cytokine and responder genes containing spots and UMI-counts in the EPIDERMIS """
    convert_categories_cytokines_responders_others(
        adata,
        cyto_responder_genes=cytokine_responders,
        save_folder=save_folder,
        img_key=img_key)
def main(save_folder, spatial_adata):
    """Read out data for ST DGE Analysis and create UMAPs for Figure 3A

    :return:
    """
    spatial_cluster_label = 'tissue_type'

    # load data
    cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps(
    )

    # remove all spots without a tissue label
    spatial_adata = spatial_adata[
        spatial_adata.obs[spatial_cluster_label] != 'Unknown']

    # 1. get observable for cytokine genes
    spatial_adata, obs_name = add_observables.convert_variable_to_observable(
        adata=spatial_adata,
        gene_names=cytokines,
        task='cell_gene',
        label='celltype',
        condition=None)

    # 2. Highlight tissues epidermis and dermis + cytokines and for each single cytokine
    plot_tissuerlayers_cyto(adata=spatial_adata,
                            obs_name='cytokine_IL17A',
                            title='Wholedataset_IL17A',
                            save_folder=save_folder,
                            regions=spatial_cluster_label)
def main(save_folder, spatial_adata):
    """Read out data for ST and scRNA-seq DGE Analysis and create UMAPs for Figure 3A/E and Suppl. Figures 3

    :return:
    """
    spatial_cluster_label = 'tissue_type'

    # load data
    cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps(
    )
    leukocyte_markers = gene_lists.leukocyte_markers()

    # remove all spots without a tissue label
    spatial_adata = spatial_adata[
        spatial_adata.obs[spatial_cluster_label] != 'Unknown']

    # 1. get observable for cytokine genes
    spatial_adata, obs_name = add_observables.convert_variable_to_observable(
        adata=spatial_adata,
        gene_names=cytokines,
        task='cell_gene',
        label='celltype',
        condition=None)

    spatial_adata, _ = add_observables.convert_variable_to_observable(
        adata=spatial_adata,
        gene_names=leukocyte_markers,
        task='cell_gene',
        label='celltype',
        condition=None)

    # # 2. Read out counts and metaData for DGE Analysis including double positive cytokine cells
    # 2.1 Read out only leukocytes spots by 'CD2', 'CD3D', 'CD3E', 'CD3G', 'CD247' and 'PTPRC' surface markers
    adata_leukocytes = get_celltypes_data(spatial_adata,
                                          genes=leukocyte_markers)

    # 2.2 Merge layers of epidermis and save it as epidermis and merge dermis depths and save it as dermis
    adata_leukocytes = get_tissueregions(adata=adata_leukocytes,
                                         tissue_label=spatial_cluster_label)

    # 3. Highlicht tissues epidermis and dermis + cytokines and for each single cytokine
    plot_tissueregions_cyto(adata=adata_leukocytes,
                            obs_name='cytokine_IL13',
                            title='Leukocytes_IL13',
                            save_folder=save_folder)
    plot_tissueregions_cyto(adata=adata_leukocytes,
                            obs_name='cytokine_IFNG',
                            title='Leukocytes_IFNG',
                            save_folder=save_folder)

    # 4. Read out all leukocyte positive spots
    include_cytokine_dp(adata=adata_leukocytes,
                        cytokines=cytokines,
                        save_folder=save_folder,
                        label=spatial_cluster_label,
                        key='ST',
                        paper_figure='3AC_Leukocytes')
def main(save_folder, adata):
    """Read out spots for DGE analysis and create UMAP of single cell RNA-seq data for Suppl. Figures 4B

    Parameters
    ----------
    save_folder : str
    adata : annData

    Returns
    -------

    """
    sc_cluster_obs = 'cluster_labels'

    # 1. load gene list
    cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps(
    )
    leukocyte_markers = gene_lists.leukocyte_markers()

    # 2. get observable for cytokine genes
    adata, obs_name = add_observables.convert_variable_to_observable(
        adata=adata,
        gene_names=cytokines,
        task='cell_gene',
        label='celltype',
        condition=None)

    # Only Leukocytes:
    # 3. Read out counts and metaData for DGE Analysis including double positive cytokine cells
    # 3.1 Read out only T-cell spots by CD2 surface markers
    adata_leukocytes = get_celltypes_data(adata, genes=leukocyte_markers)

    # 3.3 Read out all leukocyte cells
    include_cytokine_dp(adata=adata_leukocytes,
                        cytokines=cytokines,
                        save_folder=save_folder,
                        label=sc_cluster_obs,
                        key='SC_merged',
                        paper_figure='SC')

    # 3.4 Add IL17A label to adata
    adata_leukocytes = add_observables.add_columns_genes(
        adata=adata_leukocytes, genes='IL17A', label='IL17A')
    """ Figure 3D: Highlight IL-17A """
    plot_annotated_cells(adata=adata_leukocytes,
                         color='IL17A_label',
                         paper_figure='D',
                         save_folder=save_folder,
                         key='SC',
                         title='Leukocytes_IL17A',
                         xpos=0.02,
                         ypos=0.95)
def main(save_folder, adata):
    """Read out spots for DGE analysis and create UMAP of single cell RNA-seq data for Suppl. Figures 4B

    Parameters
    ----------
    save_folder : str
    adata : annData

    Returns
    -------

    """
    sc_cluster_obs = 'cluster_labels'

    # 1. load gene list
    cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps()
    leukocyte_markers = gene_lists.leukocyte_markers()

    # 2. get observable for cytokine genes
    adata, obs_name = add_observables.convert_variable_to_observable(
        adata=adata, gene_names=cytokines, task='cell_gene', label='celltype', condition=None)

    # Only Leukocytes:
    # 3. Read out counts and metaData for DGE Analysis including double positive cytokine cells
    # 3.1 Read out only T-cell spots by CD2 surface markers
    adata_leukocytes = get_celltypes_data(adata, genes=leukocyte_markers)

    # 3.3 Read out all leukocyte cells
    include_cytokine_dp(adata=adata_leukocytes, cytokines=cytokines, save_folder=save_folder,
                        label=sc_cluster_obs, key='SC_merged', paper_figure='SC')
    # 3.3 Read out all leukocyte cells but exclude double positive cytokine cells
    adata_leukocytes, obs_name = exclude_cytokine_dp(adata=adata_leukocytes, cytoresps_dict=cytoresps_dict)

    # Plot cytokines and highlight double positive
    plot_annotated_cells(adata=adata_leukocytes, color='cytokines_others', paper_figure='',
                         save_folder=save_folder, key='SC', title="Leukocytes_IL17A_IFNG",
                         xpos=0.02, ypos=0.95, palette=["#ff7f00", "#377eb8", 'purple'])

    # Add cytokine label to adata and Plot: Highlight cytokines
    adata_leukocytes = add_observables.add_columns_genes(adata=adata_leukocytes, genes='IFNG', label='IFNG')

    """ Suppl. Figure 4B: Highlight IFN-g """
    plot_annotated_cells(adata=adata_leukocytes, color='IFNG_label', paper_figure='4B', save_folder=save_folder,
                         key='SC', title="Leukocyte_IFNG", xpos=0.02, ypos=0.95)
Ejemplo n.º 6
0
def main(save_folder, adata):
    img_key = 'hires'
    # 1. Get cytokines and responders
    t_cell_cytocines, cyto_resps_list, cytokine_responders = gene_lists.get_publication_cyto_resps(
    )

    # 3. Add meta data like which samples belong to which donor (optional)
    if "patient" not in adata.obs_keys():
        adata, tissue_cell_labels, disease_labels, lesion_labels = ctools.add_metadata(
            adata)
        # 1.2 Remove spots having no tissue/cell labels (since 06.10.2020)
        adata = adata[np.where(
            adata.obs[tissue_cell_labels].to_numpy().any(axis=1))[0]]
    """Paper Figure 4B: Highlight cytokine and responder genes containing spots and UMI-counts """
    convert_categories_cytokines_responders_others(
        adata,
        cyto_responder_genes=cytokine_responders,
        save_folder=save_folder,
        img_key=img_key)
Ejemplo n.º 7
0
def main(save_folder, pp_adata, cluster_algorithm):
    """
    1. scRNAseq data set
    Read ou pre-processed Count matrix and apply Leiden clustering with resolution r = 0.1 for scRNAseq data set
    Annotate clusters manually

    2. ST data set
    Visualise count matrix with tissue types

    :return:
    """

    # 1. load gene list
    cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps(
    )

    # 2. Get observable for cytokine genes
    pp_adata, _ = add_observables.convert_variable_to_observable(
        adata=pp_adata,
        gene_names=cytokines,
        task='cell_gene',
        label='celltype',
        condition=None)

    # 3. Apply cluster algorithm
    pp_adata, key = apply_clusteralgo(adata=pp_adata,
                                      algorithm=cluster_algorithm,
                                      resolution=0.1)

    # 4. Annotate clusters with expert opinion - before the best resolution r=0.1 was identified
    pp_adata = annotate_cluster(adata=pp_adata,
                                cluster_algorithm=cluster_algorithm,
                                resolution=0.1)

    # 5. Plot UMAP scRNAseq data
    visualise_clusters(adata=pp_adata,
                       save_folder=save_folder,
                       key='cluster_labels',
                       title="SC")
def main(adata, save_folder, tissue_types, radii, get_plots=False):
    """

    Parameters
    ----------
    adata : annData
    save_folder : str
    tissue_types : str, list
    radii : int, list
    get_plots : bool

    Returns
    -------

    """
    # 1. Get cytokines and responders
    conditional_genes, _, conditionalgenes_responders = gene_lists.get_publication_cyto_resps()

    # 2. prepare adata object
    adata = data_preparation(adata=adata, tissue_types=tissue_types, conditional_genes=conditional_genes,
                             conditionalgenes_responders=conditionalgenes_responders)

    # 3. Run conditional clustering and calculate (spatially weighted) correlation
    sig = []
    if isinstance(radii, list):
        for radius in radii:
            sig = run_spatialcorrelation(adata=adata, tissue_types=tissue_types,
                                         cytokine_responders=conditionalgenes_responders, save_folder=save_folder,
                                         radius=radius, sig=sig, get_plots=get_plots)
    else:
        sig = run_spatialcorrelation(adata=adata, tissue_types=tissue_types,
                                     cytokine_responders=conditionalgenes_responders,
                                     save_folder=save_folder, radius=radii, sig=sig, get_plots=get_plots)

    if len(sig) > 1:
        # 6. Evaluate distance via elbow plot
        plot_evaluations.plot_evaluate_distance(
            significance=sig, cytokines=conditional_genes, save_folder=save_folder)
def main(dataset_type, save_folder, df_keys, log, dge_results_folder):
    """

    Parameters
    ----------
    dataset_type : str
    save_folder : str
    df_keys : list
    log : bool
    dge_results_folder : str

    Returns
    -------

    """
    # Determine name of cluster observable
    if dataset == 'SC':
        cluster_label = 'cluster_labels'
    else:
        cluster_label = 'tissue_type'

    print("# ------ Load data ------ #")
    cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps(
    )
    genes_to_highlight = gene_lists.highlight_genes()
    # # 1. Load adata
    adata = load_adata(type_dataset=dataset_type, cluster_label=cluster_label)
    # # 2 Read out only T-cell spots by leukocyte markers
    adata = get_sub_adata(adata=adata, gene=gene_lists.leukocyte_markers())
    # # 3. Assign condition to spots
    adata, observable = exclude_cytokine_dp(adata=adata,
                                            cytoresps_dict=cytoresps_dict)

    for cyto in cytokines:
        # get observable of condition
        obs_label_condition = "_".join(['cytokine', cyto])
        genes_labeling = genes_to_highlight[cyto]

        all_csv_files = [
            file for path, subdir, files in os.walk(dge_results_folder)
            for file in glob.glob(os.path.join(path, '*.csv'))
        ]

        pattern = "".join(['*', cyto, '*all_genes.csv'])
        for file in all_csv_files:
            if fnmatch(file, pattern):
                # Read out only those driver and responder genes which are specific for a cytokine
                allgenes_df = pd.read_csv(file, error_bad_lines=False)
                # Remove column Unnamed: 0
                allgenes_df = allgenes_df.drop(['Unnamed: 0'], axis=1)

                # Check if column names and row names are unique
                print("Unique Genes Stratified Sampling:",
                      allgenes_df['gene_symbol'].is_unique)
                print("Unique Genes Stratified Sampling:",
                      allgenes_df.columns.is_unique)
                # remove duplicated rows
                allgenes_df = allgenes_df.loc[
                    ~allgenes_df['gene_symbol'].duplicated(), :]

                # Name of used design function
                design = file.split(os.sep)[-4]
                # Name of used DGE Analysis method
                method = file.split(os.sep)[-1].split("_")[-4]
                # Create output folder
                output_folder = os.path.join(save_folder, design, cyto)
                os.makedirs(output_folder, exist_ok=True)

                allgenes_df = _write_dataframe(adata,
                                               df=allgenes_df,
                                               cytokine=cyto,
                                               observable=obs_label_condition,
                                               output_folder=output_folder,
                                               method=method)

                print("# ------ Volcano plot ------ #")
                # 3. Volcano plot interactive plot
                plotly_interactive_volcano(
                    df=allgenes_df,
                    df_keys=df_keys,
                    save_folder=output_folder,
                    key="".join([method, "_", cyto, "+", "_vs_", cyto, "-"]),
                    x_lab=r'log$_2$(FC)',
                    y_lab=r'-log$_{10}$(pvalue)',
                    log2fc_cut=1,
                    pval_cut=0.05)

                volcano_plot(df=allgenes_df,
                             df_keys=df_keys,
                             cytokine=cyto,
                             adjust=True,
                             label_genes=genes_labeling,
                             title="_".join(
                                 [method, cyto, "Volcano_plot_zoom"]),
                             save_folder=output_folder,
                             log2fc_cut=1.0,
                             threshold=0.05)

                print(
                    "# ------ Violin plots of Novel and Golden Standard genes ------ #"
                )
                # Get expression level of genes of interest to create boxplots
                if isinstance(genes_labeling, dict):
                    driver_group = genes_to_highlight[cyto]['Driver_genes']
                    responder_group = genes_to_highlight[cyto][
                        'Responder_genes']
                    merged_genes = driver_group.copy()
                    merged_genes.extend(responder_group)
                else:
                    merged_genes = genes_labeling.copy()
                # First check if genes are in data set
                available_genes = list(
                    set(adata.var.index) & set(merged_genes))
                available_genes.append(cyto)

                # 3. get counts for each gene of interest
                goi = pd.DataFrame()
                goi['Cyto+_vs_Cyto-'] = adata.obs[obs_label_condition].values
                # available_genes = ['IL17A', 'IFNG', 'IL13']
                for gene in available_genes:
                    if gene in adata.var_names:
                        # Get counts
                        adata_gene, new_obs_name = get_expression_values(
                            adata=adata, gene=gene)
                        # 4. sub-divide into cyto+ and cyto- group and read out counts
                        spatial_adata_gene = create_obs_cytopos_cytoneg(
                            adata=adata_gene,
                            cyto=cyto,
                            gene=gene,
                            observable=obs_label_condition)
                        goi[gene] = spatial_adata_gene.obs["_".join(
                            [gene, 'group'])].values

                        # 5. Visualise counts of gene of interest in a violin plot
                        plot_violins(adata=spatial_adata_gene,
                                     group=gene,
                                     groupby=obs_label_condition,
                                     output_folder=output_folder,
                                     log=log)

                # Save counts of genes of interest
                goi.to_csv(
                    os.path.join(
                        output_folder,
                        "_".join([cyto, "Counts_Highlight_genes.csv"])))
def main(save_folder, spatial_adata):
    """
    Read out data for ST and scRNA-seq DGE Analysis and create UMAPs for Figure 3A/E and Suppl. Figures 3

    :return:
    """
    spatial_cluster_label = 'tissue_type'

    # 1. load gene lists
    cytokines, allinone, cytoresps_dict = gene_lists.get_publication_cyto_resps(
    )
    leukocyte_markers = gene_lists.leukocyte_markers()

    # 2. remove all spots without a tissue label
    spatial_adata = spatial_adata[
        spatial_adata.obs[spatial_cluster_label] != 'Unknown']

    # 3. get observable for cytokine genes and leukocyte markers
    spatial_adata, obs_name = add_observables.convert_variable_to_observable(
        adata=spatial_adata,
        gene_names=cytokines,
        task='cell_gene',
        label='celltype',
        condition=None)

    spatial_adata, obs_name = add_observables.convert_variable_to_observable(
        adata=spatial_adata,
        gene_names=leukocyte_markers,
        task='cell_gene',
        label='celltype',
        condition=None)

    # 4. Read out only leukocytes spots by 'CD2', 'CD3D', 'CD3E', 'CD3G', 'CD247' and 'PTPRC' surface markers
    adata_leukocytes = get_celltypes_data(spatial_adata,
                                          genes=leukocyte_markers)

    # 5. add observable healthy_disease
    spatial_adata = add_observables.add_disease_healthy_obs(spatial_adata)

    # keys: 'patient', 'biopsy_type', 'disease', 'tissue_type'
    # Suppl Figure 2A
    visualise_clusters(adata=spatial_adata,
                       save_folder=save_folder,
                       key='healthy_disease',
                       title="Diagnoses")
    # Suppl. Figure 2C
    visualise_clusters(adata=adata_leukocytes,
                       save_folder=save_folder,
                       key='tissue_type',
                       title="Leukocytes_tissuelayers")

    # 5. Read out spots which either have IL17A, IL13 or INFG genes
    adata_leukocytes, obs_name = exclude_cytokine_dp(
        adata=adata_leukocytes, cytoresps_dict=cytoresps_dict)

    # 6. Merge layers of epidermis and save it as epidermis and merge dermis depths and save it as dermis
    adata_leukocytes = get_tissueregions(adata=adata_leukocytes,
                                         tissue_label=spatial_cluster_label)

    # Suppl. Figure 2B
    plot_tissueregions_cyto(adata=adata_leukocytes,
                            obs_name=obs_name,
                            title='Leukocytes_Cytokines',
                            save_folder=save_folder,
                            gene_colors=[
                                "#ff7f00", "#e41a1c", 'darkgoldenrod',
                                'purple', "#377eb8", 'deeppink'
                            ])