Exemple #1
0
    def run_tea(parent, sleuth, out, q_threshold=0.05):
        """
        Runs TEA on sleuth output.
        """
        try:
            import tissue_enrichment_analysis as tea
        except ImportError as e:
            print_with_flush('# TEA is not installed...skipping')
            sys.exit(0)
        try:
            import pandas as pd
        except ImportError as e:
            print_with_flush('# pandas is not installed...skipping')
            sys.exit(0)

        analyses = ['tissue', 'phenotype', 'go']

        # Load sleuth results.
        wdir = os.getcwd()
        print_with_flush('# entering 3_diff_exp')
        os.chdir(parent)
        print_with_flush('# creating {} directory'.format(out))
        os.makedirs(out, exist_ok=True)
        for file in os.listdir(sleuth):
            if file.endswith('.csv'):
                df = pd.read_csv(os.path.join(sleuth, file), index_col=0)
                gene_list = df[df.qval < q_threshold].ens_gene
                name = os.path.splitext(os.path.basename(file))[0]

                if len(gene_list) == 0:
                    print_with_flush(('# there are no genes with q < {} in ' +
                                      '{}!').format(q_threshold, file))
                    print_with_flush(
                        '# this means there are no significantly ' +
                        'differentially-expressed genes for ' +
                        'this set of conditions.')
                    continue

                for analysis in analyses:
                    print_with_flush(('# performing {} enrichment analysis ' +
                                      'for {}').format(analysis, file))
                    fname = '{}_{}'.format(name.replace('betas_wt', out),
                                           analysis)
                    title = os.path.join(out, fname)
                    df_dict = tea.fetch_dictionary(analysis)
                    df_results = tea.enrichment_analysis(gene_list,
                                                         df_dict,
                                                         aname=title + '.csv',
                                                         save=True,
                                                         show=False)
                    tea.plot_enrichment_results(df_results,
                                                analysis=analysis,
                                                title=title,
                                                save=True)
        os.chdir(wdir)
        print_with_flush('# returned to root')
def walker(tissue_df, directory, save=True):
    """Given the tissue dictionary and a directory to save to,
    open all the gene sets, analyze them and deposit the results in the
    specified directory.

    Parameters:
    -------------------
    tissue_df - pandas dataframe containing specified tissue dictionary
    directory - where to save to
    save - boolean indicating whether to save results or not.

    """
    with open(directory + 'empty.txt', 'w') as f:
        f.write('Genesets with no enrichment:\n')

    # go through each file in the folder
    for fodder in os.walk(path_sets):
        for f_set in fodder[2]:
            # open df
            df = pd.read_csv(path_sets + f_set)

            # extract gene list and analyze
            short_name = f_set
            test = df.gene.values
            df_analysis, unused = tea.enrichment_analysis(test,
                                                          tissue_df,
                                                          show=False)

            # if it's not empty and you want to save:
            if df_analysis.empty is False & save:
                # save without index
                df_analysis.to_csv(directory + short_name + '.csv',
                                   index=False)
                # add a comment
                line = '#' + short_name + '\n'
                line_prepender(directory + short_name + '.csv', line)
                # plot
                tea.plot_enrichment_results(df_analysis,
                                            title=short_name,
                                            dirGraphs=directory,
                                            ftype='pdf')
                plt.close()

            # if it's empty and you want to save, place it in file called empty
            if df_analysis.empty & save:
                with open(directory + 'empty.txt', 'a+') as f:
                    f.write(short_name + '\n')
def walker(tissue_df, directory, save=True):
    """Given the tissue dictionary and a directory to save to,
    open all the gene sets, analyze them and deposit the results in the
    specified directory.

    Parameters:
    -------------------
    tissue_df - pandas dataframe containing specified tissue dictionary
    directory - where to save to
    save - boolean indicating whether to save results or not.

    """
    with open(directory+'empty.txt', 'w') as f:
        f.write('Genesets with no enrichment:\n')

    # go through each file in the folder
    for fodder in os.walk(path_sets):
        for f_set in fodder[2]:
            # open df
            df = pd.read_csv(path_sets + f_set)

            # extract gene list and analyze
            short_name = f_set
            test = df.gene.values
            df_analysis, unused = tea.enrichment_analysis(test, tissue_df,
                                                          show=False)

            # if it's not empty and you want to save:
            if df_analysis.empty is False & save:
                # save without index
                df_analysis.to_csv(directory+short_name+'.csv', index=False)
                # add a comment
                line = '#' + short_name+'\n'
                line_prepender(directory+short_name+'.csv', line)
                # plot
                tea.plot_enrichment_results(df_analysis, title=short_name,
                                            dirGraphs=directory, ftype='pdf')
                plt.close()

            # if it's empty and you want to save, place it in file called empty
            if df_analysis.empty & save:
                with open(directory+'empty.txt', 'a+') as f:
                    f.write(short_name+'\n')
for species in Lreceptacles:
    current = Lreceptacles[species]

    n = current.n  # number of dfs
    keys = current.result_dict.keys()

    if n == 0:
        next

    i = 0
    if n > 1:
        fig, ax = plt.subplots(nrows=n, figsize=(8, 8))
        fig.subplots_adjust(top=2)
        fig.suptitle(species, fontsize=15, y=1.02)

        tea.plot_enrichment_results(current.result_dict['Infection_downregulated'],
                                    title='Name', save=False, fig=fig, ax=ax[0])

        # suppress xlabel
        ax[0].set_xlabel('')
        ax[0].set_ylabel('Down-Regulated Tissues')
        ax[0].yaxis.set_label_position('right')
        tea.plot_enrichment_results(current.result_dict['Infection_upregulated'],
                                    title='Name', save=False, fig=fig, ax=ax[1])
        ax[1].set_ylabel('Up-Regulated Tissues')
        xlabel = ax[1].set_xlabel('Enrichment Fold Change - {0}'.format(species))
        ax[1].yaxis.set_label_position('right')
        fig.tight_layout()
        plt.savefig('../output/Engelmann/Graphs/'+species+'Enrichment.pdf',
                    rect=[0, 0.03, 1, 0.95], bbox_extra_artists=[xlabel],
                    bbox_inches='tight')
#        plt.close()
            for f_set in fodder[2]:
                df = pd.read_csv(path_sets + f_set)
                test = df.gene.values
                ntest = len(test)
                short_name = f_set[16:len(f_set)-16]

                df_analysis, unused = tea.enrichment_analysis(test, tissue_df,
                                                              alpha=0.05,
                                                              show=False)

                # save the analysis to the relevant folder
                savepath = '../output/HGT'+annot + '_' + method + '_Results/'
                df_analysis.to_csv(savepath + f_set+'.csv', index=False)

                tea.plot_enrichment_results(df_analysis,
                                            save='savepath'+f_set+'Graph',
                                            ftype='pdf')

                nana = len(df_analysis)  # len of results
                nun = len(unused)  # number of genes dropped
                avf = df_analysis['Enrichment Fold Change'].mean()
                avq = df_analysis['Q value'].mean()
                s = '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10}'.format(
                        annot, thresh, method, f_set, ntiss, ntest, nana,
                        ntest-nun, avf, avq, ngenes)
                with open(dirSummaries+'ExecutiveSummary.csv', 'a+') as fSum:
                    fSum.write(s)
                    fSum.write('\n')

# Print summary to csv
df_summary = pd.read_csv(dirSummaries+'ExecutiveSummary.csv', comment='#')
                  get_n(go_traits, worm_genes))
            print('-----------------------------------\n')

        # one of the traits foolishly has a '/'
        if '/' in trait:
            # rename the trait with this character or it breaks your code
            trait = 'post bronchodilator fev1 fevc ratio'

        # enrichment analyses:
        df = tea.enrichment_analysis(worm_genes, phenotype_df, show=False)
        df = df[df.Observed > n_min_obs].copy()
        df.to_csv('../output/phenologues_2/pea_' + trait + '.csv', index=False)
        if 'lupus' in trait:
            print('Graphing PEA results for ', trait)
            fig, ax = plt.subplots()
            tea.plot_enrichment_results(df, title='../output/lupus_pea',
                                        save=True, analysis='phenotype')
            plt.close()

        df = tea.enrichment_analysis(worm_genes, tissue_df, show=False)
        df = df[df.Observed > n_min_obs].copy()
        df.to_csv('../output/disease_tissues_2/tea_' + trait + '.csv',
                  index=False)
        if 'lupus' in trait:
            print('Graphing TEA results for ', trait)
            fig, ax = plt.subplots()
            tea.plot_enrichment_results(df, title='../output/lupus_tea',
                                        save=True, analysis='tissue')
            plt.close()

        df = tea.enrichment_analysis(worm_genes, go_df, show=False)
        df = df[df.Observed > n_min_obs].copy()
                df = pd.read_csv(path_sets + f_set)
                test = df.gene.values
                ntest = len(test)
                short_name = f_set[16:len(f_set) - 16]

                df_analysis, unused = tea.enrichment_analysis(test,
                                                              tissue_df,
                                                              alpha=0.05,
                                                              show=False)

                # save the analysis to the relevant folder
                savepath = '../output/HGT' + annot + '_' + method + '_Results/'
                df_analysis.to_csv(savepath + f_set + '.csv', index=False)

                tea.plot_enrichment_results(df_analysis,
                                            save='savepath' + f_set + 'Graph',
                                            ftype='pdf')

                nana = len(df_analysis)  # len of results
                nun = len(unused)  # number of genes dropped
                avf = df_analysis['Enrichment Fold Change'].mean()
                avq = df_analysis['Q value'].mean()
                s = '{0},{1},{2},{3},{4},{5},{6},{7},{8},{9},{10}'.format(
                    annot, thresh, method, f_set, ntiss, ntest, nana,
                    ntest - nun, avf, avq, ngenes)
                with open(dirSummaries + 'ExecutiveSummary.csv', 'a+') as fSum:
                    fSum.write(s)
                    fSum.write('\n')

# Print summary to csv
df_summary = pd.read_csv(dirSummaries + 'ExecutiveSummary.csv', comment='#')
Exemple #8
0
def run_post(project, code='post', requires='diff'):
    print_with_flush('# starting post for project {}'.format(project.objectId))

    organism = project.relation('samples').query()[0].reference.organism
    if organism.genus != 'caenorhabditis' or organism.species != 'elegans':
        print_with_flush('# Currently, post analysis is only supported for '
                         'C. elegans')
        return

    config = Config.get()
    q_threshold = config['qThreshold']
    tea_types = config['teaTypes']

    diff_path = project.paths[requires]
    post_path = project.paths[code]

    for file in os.listdir(diff_path):
        file_name = os.path.splitext(os.path.basename(file))[0]
        file_path = os.path.join(diff_path, file)

        if file.startswith('betas') and file.endswith('.csv'):
            df = pd.read_csv(file_path, index_col=0)
            gene_list = df[df.qval < q_threshold].ens_gene

            # Skip if gene list is empty.
            if len(gene_list) == 0:
                print_with_flush(
                    ('# there are no genes with q < {} in ' + '{}!').format(
                        q_threshold, file))
                print_with_flush('# this means there are no significantly ' +
                                 'differentially-expressed genes for ' +
                                 'this set of conditions.')
                continue

            for tea_type in tea_types:
                tea_file = '{}_{}'.format(
                    file_name.replace('betas_wt', 'enrichment'), tea_type)
                tea_title = os.path.join(post_path, tea_file)
                print_with_flush(
                    ('# performing {} enrichment analysis ' + 'for {}').format(
                        tea_type, file))
                df_dict = tea.fetch_dictionary(tea_type)
                df_results = tea.enrichment_analysis(gene_list,
                                                     df_dict,
                                                     aname=tea_title + '.csv',
                                                     save=True,
                                                     show=False)
                tea.plot_enrichment_results(df_results,
                                            analysis=tea_type,
                                            title=tea_title,
                                            save=True)

    # Archive.
    archive_path = archive(project, code)

    if code not in project.files:
        project.files[code] = {}
    project.files[code]['archive'] = archive_path
    project.save()

    print_with_flush('# done')