コード例 #1
0
    def test_unequal_group_sizes(self):
        exp = pd.Series(index=self.exp_index,
                        data=['ANOSIM', 'R', 6, 3, -0.363636, 0.878, 999])

        np.random.seed(0)
        obs = anosim(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = anosim(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)
コード例 #2
0
ファイル: test_anosim.py プロジェクト: squarednob/scikit-bio
    def test_unequal_group_sizes(self):
        exp = pd.Series(index=self.exp_index,
                        data=['ANOSIM', 'R', 6, 3, -0.363636, 0.878, 999])

        np.random.seed(0)
        obs = anosim(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = anosim(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)
コード例 #3
0
 def test_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['ANOSIM', 'R', 4, 2, 0.625, 0.332, 999],
                     name='ANOSIM results')
     np.random.seed(0)
     obs = anosim(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
コード例 #4
0
ファイル: test_anosim.py プロジェクト: squarednob/scikit-bio
    def test_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['ANOSIM', 'R', 4, 2, 0.25, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = anosim(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = anosim(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)
コード例 #5
0
ファイル: test_anosim.py プロジェクト: 7924102/scikit-bio
 def test_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['ANOSIM', 'R', 4, 2, 0.625, 0.332, 999],
                     name='ANOSIM results')
     np.random.seed(0)
     obs = anosim(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
コード例 #6
0
    def test_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['ANOSIM', 'R', 4, 2, 0.25, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = anosim(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = anosim(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)
コード例 #7
0
def beta_diversity_pcoa(biom_fp, method="braycurtis", permutations=99, dim=2,
                        col='method', colormap={'expected': 'red',
                                                'rdp': 'seagreen',
                                                'sortmerna': 'gray',
                                                'uclust': 'blue',
                                                'blast': 'purple'}):

    '''From biom table, compute Bray-Curtis distance; generate PCoA plot;
    and calculate adonis differences.

    biom_fp: path
        Path to biom.Table containing sample metadata.
    method: str
        skbio.Diversity method to use for ordination.
    permutations: int
        Number of permutations to perform for anosim tests.
    dim: int
        Number of dimensions to plot. Currently supports only 2-3 dimensions.
    col: str
        metadata name to use for distinguishing groups for anosim tests and
        pcoa plots.
    colormap: dict
        map groups names (must be group names in col) to colors used for plots.
    '''

    dm, s_md = make_distance_matrix(biom_fp, method=method)

    # pcoa
    pc = pcoa(dm)

    # anosim tests
    results = anosim(dm, s_md, column=col, permutations=permutations)
    print('R = ', results['test statistic'], '; P = ', results['p-value'])

    if dim == 2:
        # bokeh pcoa plots
        pc123 = pc.samples.ix[:, ["PC1", "PC2", "PC3"]]
        smd_merge = s_md.merge(pc123, left_index=True, right_index=True)
        smd_merge['Color'] = [colormap[x] for x in smd_merge['method']]
        title = smd_merge['reference'][0]
        labels = ['PC {0} ({1:.2f})'.format(d + 1, pc.proportion_explained[d])
                  for d in range(0, 2)]
        circle_plot_from_dataframe(smd_merge, "PC1", "PC2", title,
                                   columns=["method", "sample_id", "params"],
                                   color="Color", labels=labels)

    else:
        # skbio pcoa plots
        pcoa_plot_skbio(pc, s_md, col='method')

    return s_md, results, pc, dm
コード例 #8
0
def main(args):
    data_df =  pd.read_table(args.data, index_col=0)
    data_df_nonnull = data_df[data_df['taxon'].notnull()]

    val_cols = data_df_nonnull.columns
    val_cols.remove('taxon')

    dm = DistanceMatrix(squareform(pdist(data_df_nonnull[val_cols], metric='euclidean')))
    a = anosim(dm, data_df_nonnull['taxon'], permutations=0)

    a_df = pd.DataFrame(a).T
    a_df.index = [args.data_name]

    a_df.to_csv(sys.stdout, header=None)
コード例 #9
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_fins = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_fins)

df_fins.reset_index()
df_fins = df_fins[['sal']]
print(df_fins)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_fins['Observed OTUs'] = adiv_obs_otuss
df_fins['Faith PD'] = adiv_faith_pds

anosims = anosim(wu_dms, df_fins, column='sal', permutations=999)
print(anosims['test statistic'])
print(anosims['p-value'])

print(df_fins.corr(method="spearman"))

print(adiv_obs_otuss)

figs = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
figs = wu_pcs.plot(df_fins,
                   'sal',
                   axis_labels=('PC1' + str(eigen1) + '%',
                                'PC2' + str(eigen2) + '%',
                                'PC3' + str(eigen3) + '%'),
コード例 #10
0
def beta_diversity(TaXon_table_xlsx, width, heigth, cmap, meta_data_to_test,
                   taxonomic_level, path_to_outdirs, template, font_size,
                   diss_metric):

    import pandas as pd
    import numpy as np
    from skbio.diversity import beta_diversity
    from skbio.stats.distance import anosim
    import plotly.express as px
    from pathlib import Path
    import PySimpleGUI as sg
    import webbrowser

    TaXon_table_xlsx = Path(TaXon_table_xlsx)
    Meta_data_table_xlsx = Path(
        str(path_to_outdirs) + "/" + "Meta_data_table" + "/" +
        TaXon_table_xlsx.stem + "_metadata.xlsx")
    TaXon_table_df = pd.read_excel(TaXon_table_xlsx,
                                   header=0).fillna("unidentified")
    TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
    Meta_data_table_df = pd.read_excel(Meta_data_table_xlsx,
                                       header=0).fillna("nan")
    Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

    metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()
    metadata_loc = Meta_data_table_df.columns.tolist().index(meta_data_to_test)

    ## drop samples with metadata called nan (= empty)
    drop_samples = [
        i[0] for i in Meta_data_table_df.values.tolist()
        if i[metadata_loc] == "nan"
    ]

    if drop_samples != []:
        ## filter the TaXon table
        TaXon_table_df = TaXon_table_df.drop(drop_samples, axis=1)
        TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
        ## also remove empty OTUs
        row_filter_list = []
        for row in TaXon_table_df.values.tolist():
            reads = set(row[10:])
            if reads != {0}:
                row_filter_list.append(row)
        columns = TaXon_table_df.columns.tolist()
        TaXon_table_df = pd.DataFrame(row_filter_list, columns=columns)
        Meta_data_table_df = pd.DataFrame(
            [
                i for i in Meta_data_table_df.values.tolist()
                if i[0] not in drop_samples
            ],
            columns=Meta_data_table_df.columns.tolist())
        Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

    metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()

    ## create a y axis title text
    taxon_title = taxonomic_level

    ## adjust taxonomic level if neccessary
    if taxonomic_level in ["ASVs", "ESVs", "OTUs", "zOTUs"]:
        taxon_title = taxonomic_level
        taxonomic_level = "ID"

    # check if the meta data differs
    if len(set(Meta_data_table_df[meta_data_to_test])) == len(
            Meta_data_table_df['Samples'].tolist()):
        sg.Popup(
            "The meta data is unique for all samples. Please adjust the meta data table!",
            title=("Error"))
        raise RuntimeError

    # check if the meta data differs
    if len(set(Meta_data_table_df[meta_data_to_test])) == 1:
        sg.Popup(
            "The meta data is similar for all samples. Please adjust the meta data table!",
            title=("Error"))
        raise RuntimeError

    if sorted(TaXon_table_samples) == sorted(Meta_data_table_samples):

        ## collect samples for plot
        samples = Meta_data_table_samples

        ## extract the relevant data
        TaXon_table_df = TaXon_table_df[[taxonomic_level] + samples]
        ## define an aggregation function to combine multiple hit of one taxonimic level
        aggregation_functions = {}
        ## define samples functions
        for sample in samples:
            ## 'sum' will calculate the sum of p/a data
            aggregation_functions[sample] = 'sum'
        ## define taxon level function
        aggregation_functions[taxonomic_level] = 'first'
        ## create condensed dataframe
        df_new = TaXon_table_df.groupby(
            TaXon_table_df[taxonomic_level]).aggregate(aggregation_functions)
        if 'unidentified' in df_new.index:
            df_new = df_new.drop('unidentified')

        ## collect reads
        data = df_new[samples].transpose().values.tolist()
        ## calculate dissimilarity distances
        dissimilarity_dm = beta_diversity(diss_metric, data, samples)

        anosim_results = anosim(dissimilarity_dm,
                                metadata_list,
                                permutations=999)
        anosim_r = round(anosim_results['test statistic'], 5)
        anosim_p = anosim_results['p-value']
        textbox = "Anosim (" + meta_data_to_test + ", " + taxon_title + ")<br>" + "R = " + str(
            anosim_r) + "<br>" + "p = " + str(anosim_p)

        matrix = dissimilarity_dm.data
        matrix_df = pd.DataFrame(matrix)
        matrix_df.columns = samples
        matrix_df.index = samples

        # create plot
        color_label = diss_metric + " distance"
        fig = px.imshow(matrix,
                        x=samples,
                        y=samples,
                        color_continuous_scale=cmap,
                        labels=dict(color=color_label))
        fig.update_layout(height=int(heigth),
                          width=int(width),
                          template=template,
                          showlegend=True,
                          title=textbox,
                          font_size=font_size,
                          title_font_size=font_size)

        # finish script
        output_pdf = Path(
            str(path_to_outdirs) + "/" + "Beta_diversity" + "/" +
            TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" +
            taxon_title + "_" + diss_metric + ".pdf")
        output_html = Path(
            str(path_to_outdirs) + "/" + "Beta_diversity" + "/" +
            TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" +
            taxon_title + "_" + diss_metric + ".html")
        output_xlsx = Path(
            str(path_to_outdirs) + "/" + "Beta_diversity" + "/" +
            TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" +
            taxon_title + "_" + diss_metric + ".xlsx")
        fig.write_image(str(output_pdf))
        fig.write_html(str(output_html))
        matrix_df.to_excel(output_xlsx)

        ## ask to show plot
        answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
        if answer == "Yes":
            webbrowser.open('file://' + str(output_html))

        ## write to log file
        sg.Popup("Beta diversity estimate are found in",
                 path_to_outdirs,
                 "/Beta_diversity/",
                 title="Finished",
                 keep_on_top=True)
        from taxontabletools.create_log import ttt_log
        ttt_log("beta diversity", "analysis", TaXon_table_xlsx.name,
                output_pdf.name, meta_data_to_test, path_to_outdirs)

    else:
        sg.PopupError(
            "Error: The samples between the taxon table and meta table do not match!",
            keep_on_top=True)
コード例 #11
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_fin = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_fin)

df_fin.reset_index()
df_fin = df_fin[['true_lat']]
print(df_fin)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_fin['Observed OTUs'] = adiv_obs_otus
df_fin['Faith PD'] = adiv_faith_pd

anosim_lat = anosim(wu_dm, df_fin, column='true_lat', permutations=999)
print(anosim_lat['test statistic'])
print(anosim_lat['p-value'])

print(df_fin.corr(method="spearman"))

print(adiv_obs_otus)

fig = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
fig = wu_pc.plot(df_fin,
                 'true_lat',
                 axis_labels=('PC1' + str(eigen1) + '%',
                              'PC2' + str(eigen2) + '%',
                              'PC3' + str(eigen3) + '%'),
コード例 #12
0
                its - 1][k].cluster_label
        field_plot = np.ma.masked_array(field_plot, field_plot == -10000)

        #%% Determine from which clusters the data is part of:
        nomask = np.where(~field_plot.mask)
        field_plot = field_plot[nomask]
        args = nwf.find_nearest_args(vLons[nomask], vLats[nomask], Flats,
                                     Flons)
        Flabels = field_plot[args]
        args = nwf.find_nearest_args(vLons[nomask], vLats[nomask], FlatsDino,
                                     FlonsDino)
        Dinolabels = field_plot[args]
        #%%
        if (len(np.unique(Dinolabels)) > 1):
            Dano = anosim(DistanceMatrix(Dinotaxdist),
                          Dinolabels.astype(str),
                          permutations=perm)
            DinoP[its] = list(Dano)[5]
            DinoR[its] = list(Dano)[4]
        if (len(np.unique(Flabels)) > 1):
            Fano = anosim(DistanceMatrix(Ftaxdist),
                          Flabels.astype(str),
                          permutations=perm)
            FP[its] = list(Fano)[5]
            FR[its] = list(Fano)[4]

    #%% Save file with ANOSIM results
    np.savez('ANOSIM_hierarchicalclus%s_sp%d_perm%d_its%d_mlat%d.npz' %
             (season, sp, perm, iterations, maxlat),
             ForamP=FP,
             DinoP=DinoP,
コード例 #13
0
        sample_id = each_sample_split[0]
        sample_group = each_sample_split[1]
        sample_id_list.append(sample_id)
        sample_group_list.append(sample_group)

# read in data as dataframe
df = pd.read_csv(infile_data, sep='\t')

# get list of list from dataframe
lol_data_in = []
for col_id in sample_id_list:
    column_num_list = (df[col_id].values).tolist()
    lol_data_in.append(column_num_list)

# calculate distance matrix
dist_arrary = pairwise_distances(lol_data_in,
                                 lol_data_in,
                                 metric=distance_metric)

# add sample id to distance matrix
dist_matrix = DistanceMatrix(dist_arrary, sample_id_list)

# perform anosim test
anosim_test = anosim(dist_matrix, sample_group_list, permutations=999)
print(anosim_test)
print()

# perform permanova test
permanova_test = permanova(dist_matrix, sample_group_list, permutations=999)
print(permanova_test)
コード例 #14
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_fin = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_fin)

df_fin.reset_index()
df_fin = df_fin[['region']]
print(df_fin)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_fin['Observed OTUs'] = adiv_obs_otus
df_fin['Faith PD'] = adiv_faith_pd

anosim = anosim(wu_dm, df_fin, column='region', permutations=999)
print(anosim['test statistic'])
print(anosim['p-value'])

print(df_fin.corr(method="spearman"))

#print(adiv_obs_otus)

fig = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
fig = wu_pc.plot(df_fin,
                 'region',
                 axis_labels=('PC1' + str(eigen1) + '%',
                              'PC2' + str(eigen2) + '%',
                              'PC3' + str(eigen3) + '%'),
コード例 #15
0
 def test_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['ANOSIM', 'R', 4, 2, 0.625, np.nan, 0],
                     name='ANOSIM results')
     obs = anosim(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
コード例 #16
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_fint = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_fint)

df_fint.reset_index()
df_fint = df_fint[['temp']]
print(df_fint)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_fint['Observed OTUs'] = adiv_obs_otust
df_fint['Faith PD'] = adiv_faith_pdt

anosimt = anosim(wu_dmt, df_fint, column='temp', permutations=999)
print(anosimt['test statistic'])
print(anosimt['p-value'])

print(df_fint.corr(method="spearman"))

print(adiv_obs_otust)

figt = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
figt = wu_pct.plot(df_fint,
                   'temp',
                   axis_labels=('PC1' + str(eigen1) + '%',
                                'PC2' + str(eigen2) + '%',
                                'PC3' + str(eigen3) + '%'),
コード例 #17
0
for a in range(len(rows[0])):
    if a > 0:
        this_sample = []
        for b in range(len(rows)):
            if b > 0:
                this_sample.append(float(rows[b][a]))
        samples.append(this_sample)
"""
only_samples = ['LR', 'SR']
new_samples, new_names = [], []
for a in range(len(sample_names)):
    for b in range(len(only_samples)):
        if sample_names[a] == only_samples[b]:
            new_samples.append(samples[a])
            new_names.append(sample_names[a])
samples = new_samples
sample_names = new_names
print(len(samples), len(sample_names))
"""

sam_dm = dm.from_iterable(samples, metric=braycurtis)
pdisp = permdisp(sam_dm,
                 sample_names,
                 column=None,
                 test='median',
                 permutations=999)
print(pdisp)
asim = anosim(sam_dm, sample_names, column=None, permutations=999)
print(asim)
perm = permanova(sam_dm, sample_names, column=None, permutations=999)
print(perm)
コード例 #18
0
ファイル: test_anosim.py プロジェクト: 7924102/scikit-bio
 def test_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['ANOSIM', 'R', 4, 2, 0.625, np.nan, 0],
                     name='ANOSIM results')
     obs = anosim(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
コード例 #19
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_find = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_find)

df_find.reset_index()
df_find = df_find[['depth_group_50']]
print(df_find)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_find['Observed OTUs'] = adiv_obs_otusd
df_find['Faith PD'] = adiv_faith_pdd

anosimd = anosim(wu_dmd, df_find, column='depth_group_50', permutations=999)
print(anosimd['test statistic'])
print(anosimd['p-value'])

print(df_find.corr(method="spearman"))

print(adiv_obs_otusd)

figd = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
figd = wu_pcd.plot(df_find,
                   'depth_group_50',
                   axis_labels=('PC1' + str(eigen1) + '%',
                                'PC2' + str(eigen2) + '%',
                                'PC3' + str(eigen3) + '%'),
コード例 #20
0
def PCoA_analysis(TaXon_table_xlsx, meta_data_to_test, taxonomic_level, width,
                  height, pcoa_s, path_to_outdirs, template, font_size,
                  color_discrete_sequence, pcoa_dissimilarity):
    import pandas as pd
    import numpy as np
    from skbio.diversity import beta_diversity
    from skbio.stats.ordination import pcoa
    from skbio.stats.distance import anosim
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    import plotly.express as px
    from pathlib import Path
    import PySimpleGUI as sg
    import os, webbrowser
    from itertools import combinations

    TaXon_table_xlsx = Path(TaXon_table_xlsx)
    Meta_data_table_xlsx = Path(
        str(path_to_outdirs) + "/" + "Meta_data_table" + "/" +
        TaXon_table_xlsx.stem + "_metadata.xlsx")
    TaXon_table_df = pd.read_excel(TaXon_table_xlsx,
                                   header=0).fillna("unidentified")
    TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
    Meta_data_table_df = pd.read_excel(Meta_data_table_xlsx,
                                       header=0).fillna("nan")
    Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

    metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()
    metadata_loc = Meta_data_table_df.columns.tolist().index(meta_data_to_test)

    ## drop samples with metadata called nan (= empty)
    drop_samples = [
        i[0] for i in Meta_data_table_df.values.tolist()
        if i[metadata_loc] == "nan"
    ]

    if drop_samples != []:
        ## filter the TaXon table
        TaXon_table_df = TaXon_table_df.drop(drop_samples, axis=1)
        TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
        ## also remove empty OTUs
        row_filter_list = []
        for row in TaXon_table_df.values.tolist():
            reads = set(row[10:])
            if reads != {0}:
                row_filter_list.append(row)
        columns = TaXon_table_df.columns.tolist()
        TaXon_table_df = pd.DataFrame(row_filter_list, columns=columns)
        Meta_data_table_df = pd.DataFrame(
            [
                i for i in Meta_data_table_df.values.tolist()
                if i[0] not in drop_samples
            ],
            columns=Meta_data_table_df.columns.tolist())
        Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

    ## create a y axis title text
    taxon_title = taxonomic_level.lower()

    ## adjust taxonomic level if neccessary
    if taxonomic_level in ["ASVs", "ESVs", "OTUs", "zOTUs"]:
        taxon_title = taxonomic_level
        taxonomic_level = "ID"

    # check if the meta data differs
    if len(set(Meta_data_table_df[meta_data_to_test])) == len(
            Meta_data_table_df['Samples'].tolist()):
        sg.Popup(
            "The meta data is unique for all samples. Please adjust the meta data table!",
            title=("Error"))
        raise RuntimeError

    # check if the meta data differs
    if len(set(Meta_data_table_df[meta_data_to_test])) == 1:
        sg.Popup(
            "The meta data is similar for all samples. Please adjust the meta data table!",
            title=("Error"))
        raise RuntimeError

    if sorted(TaXon_table_samples) == sorted(Meta_data_table_samples):

        samples = Meta_data_table_samples

        ## extract the relevant data
        TaXon_table_df = TaXon_table_df[[taxonomic_level] + samples]
        ## define an aggregation function to combine multiple hit of one taxonimic level
        aggregation_functions = {}
        ## define samples functions
        for sample in samples:
            ## 'sum' will calculate the sum of p/a data
            aggregation_functions[sample] = 'sum'
        ## define taxon level function
        aggregation_functions[taxonomic_level] = 'first'
        ## create condensed dataframe
        TaXon_table_df = TaXon_table_df.groupby(
            TaXon_table_df[taxonomic_level]).aggregate(aggregation_functions)
        if 'unidentified' in TaXon_table_df.index:
            TaXon_table_df = TaXon_table_df.drop('unidentified')

        data = TaXon_table_df[samples].transpose().values.tolist()
        jc_dm = beta_diversity(pcoa_dissimilarity, data, samples)
        ordination_result = pcoa(jc_dm)
        metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()

        anosim_results = anosim(jc_dm, metadata_list, permutations=999)
        anosim_r = round(anosim_results['test statistic'], 5)
        anosim_p = anosim_results['p-value']
        textbox = meta_data_to_test + ", " + taxon_title + "<br>Anosim " + "R = " + str(
            anosim_r) + " " + "p = " + str(anosim_p)

        #######################################################################################
        # create window to ask for PCoA axis to test
        def slices(list, slice):
            for i in range(0, len(list), slice):
                yield list[i:i + slice]

        # collect the PCoA proportion explained values
        proportion_explained_list = []
        for i, pcoa_axis in enumerate(ordination_result.proportion_explained):
            if round(pcoa_axis * 100, 2) >= 1:
                proportion_explained_list.append("PC" + str(i + 1) + " (" +
                                                 str(round(pcoa_axis *
                                                           100, 2)) + " %)")

        pcoa_axis_checkboxes = list(
            slices([
                sg.Checkbox(name, key=name, size=(15, 1))
                for name in proportion_explained_list
            ], 10))

        pcoa_window_layout = [
            [sg.Text('Check up to four axes to be displayed')],
            [sg.Frame(layout=pcoa_axis_checkboxes, title='')],
            [sg.Text('Only axes >= 1 % explained variance are shown')],
            [sg.CB("Connect categories", default=True, key="draw_mesh")],
            [sg.Text('')],
            [sg.Button('Plot', key='Plot')],
            [sg.Button('Back')],
        ]

        pcoa_window = sg.Window('PCoA axis',
                                pcoa_window_layout,
                                keep_on_top=True)

        while True:
            event, values = pcoa_window.read()

            draw_mesh = values["draw_mesh"]

            if event is None or event == 'Back':
                break

            if event == 'Plot':

                ## create a subfolder for better sorting and overview
                dirName = Path(
                    str(path_to_outdirs) + "/" + "PCoA_plots" + "/" +
                    TaXon_table_xlsx.stem + "/")
                if not os.path.exists(dirName):
                    os.mkdir(dirName)

                # collect the pcoa axis values
                axis_to_plot = [
                    key for key, value in values.items()
                    if value == True and "PC" in key
                ]
                # pass on only if two pcoa axes were checked
                if len(axis_to_plot) == 2:
                    cat1 = axis_to_plot[1].split()[0]
                    cat2 = axis_to_plot[0].split()[0]

                    df_pcoa = ordination_result.samples[[cat1, cat2]]
                    df_pcoa.insert(
                        2, "Metadata",
                        Meta_data_table_df[meta_data_to_test].values.tolist(),
                        True)
                    df_pcoa.insert(
                        3, "Samples",
                        Meta_data_table_df["Samples"].values.tolist(), True)

                    if draw_mesh == True:
                        combinations_list = []
                        for metadata in df_pcoa["Metadata"]:
                            ## collect all entries for the respective metadata
                            arr = df_pcoa.loc[df_pcoa['Metadata'] == metadata][
                                [cat1, cat2, "Metadata",
                                 "Samples"]].to_numpy()
                            ## create a df for all possible combinations using itertools combinations
                            for entry in list(combinations(arr, 2)):
                                combinations_list.append(list(entry[0]))
                                combinations_list.append(list(entry[1]))
                        ## create a dataframe to draw the plot from
                        df = pd.DataFrame(combinations_list)
                        df.columns = [cat1, cat2, "Metadata", "Samples"]

                        fig = px.scatter(
                            df,
                            x=cat1,
                            y=cat2,
                            color="Metadata",
                            text="Samples",
                            title=textbox,
                            color_discrete_sequence=color_discrete_sequence)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers+lines")
                        fig.update_layout(height=int(height),
                                          width=int(width),
                                          template=template,
                                          showlegend=True,
                                          font_size=font_size,
                                          title_font_size=font_size)
                        fig.update_xaxes(title=axis_to_plot[1])
                        fig.update_yaxes(title=axis_to_plot[0])

                    else:
                        fig = px.scatter(
                            df_pcoa,
                            x=cat1,
                            y=cat2,
                            color="Metadata",
                            text="Samples",
                            title=textbox,
                            color_discrete_sequence=color_discrete_sequence)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers")
                        fig.update_layout(height=int(height),
                                          width=int(width),
                                          template=template,
                                          showlegend=True,
                                          font_size=font_size,
                                          title_font_size=font_size)
                        fig.update_xaxes(title=axis_to_plot[1])
                        fig.update_yaxes(title=axis_to_plot[0])

                    ## define output files
                    output_pdf = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + ".pdf")
                    output_html = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + ".html")
                    output_xlsx = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + ".xlsx")

                    ## write files
                    fig.write_image(str(output_pdf))
                    fig.write_html(str(output_html))
                    ordination_result.samples[[cat1,
                                               cat2]].to_excel(output_xlsx)

                    ## ask to show file
                    answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
                    if answer == "Yes":
                        webbrowser.open('file://' + str(output_html))

                    ## print closing text
                    closing_text = "\n" + "PCoA plots are found in: " + str(
                        path_to_outdirs) + "/PCoA_plots/"
                    sg.Popup(closing_text, title="Finished", keep_on_top=True)

                    ## write to log
                    from taxontabletools.create_log import ttt_log
                    ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name,
                            output_pdf.name, meta_data_to_test,
                            path_to_outdirs)
                    break

                elif len(axis_to_plot) == 3:
                    cat1 = axis_to_plot[0].split()[0]
                    cat2 = axis_to_plot[1].split()[0]
                    cat3 = axis_to_plot[2].split()[0]

                    df_pcoa = ordination_result.samples[[cat1, cat2, cat3]]
                    df_pcoa.insert(
                        3, "Metadata",
                        Meta_data_table_df[meta_data_to_test].values.tolist(),
                        True)
                    df_pcoa.insert(
                        4, "Samples",
                        Meta_data_table_df["Samples"].values.tolist(), True)

                    ## check if lines are to be drawn between the dots
                    if draw_mesh == True:
                        combinations_list = []
                        for metadata in df_pcoa["Metadata"]:
                            ## collect all entries for the respective metadata
                            arr = df_pcoa.loc[df_pcoa['Metadata'] == metadata][
                                [cat1, cat2, cat3, "Metadata",
                                 "Samples"]].to_numpy()
                            ## create a df for all possible combinations using itertools combinations
                            for entry in list(combinations(arr, 2)):
                                combinations_list.append(list(entry[0]))
                                combinations_list.append(list(entry[1]))
                        ## create a dataframe to draw the plot from
                        df = pd.DataFrame(combinations_list)
                        df.columns = [cat1, cat2, cat3, "Metadata", "Samples"]
                        ## draw the plot
                        fig = px.scatter_3d(
                            df,
                            x=cat1,
                            y=cat2,
                            z=cat3,
                            color="Metadata",
                            text="Samples",
                            title=textbox,
                            color_discrete_sequence=color_discrete_sequence)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers+lines",
                                          line=dict(width=0.5))
                        fig.update_layout(height=int(height),
                                          width=int(width),
                                          template=template,
                                          title=textbox,
                                          showlegend=True,
                                          font_size=font_size,
                                          title_font_size=font_size)
                        fig.update_layout(
                            scene=dict(xaxis_title=axis_to_plot[0],
                                       yaxis_title=axis_to_plot[1],
                                       zaxis_title=axis_to_plot[2]))
                    else:
                        fig = px.scatter_3d(
                            df_pcoa,
                            x=cat1,
                            y=cat2,
                            z=cat3,
                            color="Metadata",
                            text="Samples",
                            color_discrete_sequence=color_discrete_sequence)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers")
                        fig.update_layout(height=int(height),
                                          width=int(width),
                                          template=template,
                                          showlegend=True,
                                          title=textbox,
                                          font_size=font_size,
                                          title_font_size=font_size)
                        fig.update_layout(
                            scene=dict(xaxis_title=axis_to_plot[0],
                                       yaxis_title=axis_to_plot[1],
                                       zaxis_title=axis_to_plot[2]))

                    ## define output files
                    output_pdf = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_3d.pdf")
                    output_html = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_3d.html")
                    output_xlsx = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_3d.xlsx")

                    ## write output files
                    fig.write_image(str(output_pdf))
                    fig.write_html(str(output_html))
                    ordination_result.samples[[cat1,
                                               cat2]].to_excel(output_xlsx)

                    ## ask to show file
                    answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
                    if answer == "Yes":
                        webbrowser.open('file://' + str(output_html))

                    ## print closing text
                    closing_text = "PCoA plots are found in: " + str(
                        path_to_outdirs) + "/PCoA_plots/"
                    sg.Popup(closing_text, title="Finished", keep_on_top=True)

                    ## write log file
                    from taxontabletools.create_log import ttt_log
                    ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name,
                            output_pdf.name, meta_data_to_test,
                            path_to_outdirs)
                    break

                else:
                    sg.Popup("Please choose not more than 3 PCoA axes",
                             title="Error",
                             keep_on_top=True)

            if event == 'Plot matrix':
                if len(proportion_explained_list) >= 4:

                    ## create a subfolder for better sorting and overview
                    dirName = Path(
                        str(path_to_outdirs) + "/" + "PCoA_plots" + "/" +
                        TaXon_table_xlsx.stem + "/")
                    if not os.path.exists(dirName):
                        os.mkdir(dirName)

                    df_pcoa = ordination_result.samples[[
                        "PC1", "PC2", "PC3", "PC4"
                    ]]
                    df_pcoa.insert(
                        4, "Metadata",
                        Meta_data_table_df[meta_data_to_test].values.tolist(),
                        True)
                    df_pcoa.insert(
                        5, "Sample",
                        Meta_data_table_df["Samples"].values.tolist(), True)

                    fig = make_subplots(rows=4, cols=4)
                    ########### 1 ###########
                    fig.add_trace(go.Scatter(), row=1, col=1)
                    fig.update_layout(template=template,
                                      font_size=font_size,
                                      title_font_size=font_size)
                    text = "PC1 (" + str(
                        round(
                            ordination_result.proportion_explained["PC1"] *
                            100, 2)) + " %)"
                    fig.add_annotation(text=text, showarrow=False)
                    fig.update_xaxes(showticklabels=False, showgrid=False)
                    fig.update_yaxes(showticklabels=False, showgrid=False)
                    ########### 2 ###########
                    df = df_pcoa[["PC1", "PC2", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , )
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC1"].values.tolist(),
                            y=df_metadata["PC2"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=1,
                                      col=2)
                    ########### 3 ###########
                    df = df_pcoa[["PC1", "PC3", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , )
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC1"].values.tolist(),
                            y=df_metadata["PC3"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=1,
                                      col=3)
                    ########### 4 ###########
                    df = df_pcoa[["PC1", "PC4", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC1"].values.tolist(),
                            y=df_metadata["PC4"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=1,
                                      col=4)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers")
                        fig.update_xaxes(showgrid=False, row=1, col=4)
                        fig.update_yaxes(showgrid=False, row=1, col=4)
                    ########### 5 ###########
                    fig.add_trace(go.Scatter(), row=2, col=2)
                    fig.update_layout(template=template,
                                      font_size=font_size,
                                      title_font_size=font_size)
                    text = "PC2 (" + str(
                        round(
                            ordination_result.proportion_explained["PC2"] *
                            100, 2)) + " %)"
                    fig.add_annotation(text=text,
                                       showarrow=False,
                                       row=2,
                                       col=2)
                    ########### 6 ###########
                    df = df_pcoa[["PC2", "PC3", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , )
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC2"].values.tolist(),
                            y=df_metadata["PC3"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=2,
                                      col=3)
                    ########### 7 ###########
                    df = df_pcoa[["PC2", "PC4", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC2"].values.tolist(),
                            y=df_metadata["PC4"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=2,
                                      col=4)
                    ########### 8 ###########
                    fig.add_trace(go.Scatter(), row=3, col=3)
                    fig.update_layout(template=template,
                                      font_size=font_size,
                                      title_font_size=font_size)
                    text = "PC3 (" + str(
                        round(
                            ordination_result.proportion_explained["PC3"] *
                            100, 2)) + " %)"
                    fig.add_annotation(text=text,
                                       showarrow=False,
                                       row=3,
                                       col=3)
                    ########### 9 ###########
                    df = df_pcoa[["PC3", "PC4", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , )
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC3"].values.tolist(),
                            y=df_metadata["PC4"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=3,
                                      col=4)
                    ########### 5 ###########
                    fig.add_trace(go.Scatter(), row=4, col=4)
                    fig.update_layout(template=template,
                                      font_size=font_size,
                                      title_font_size=font_size)
                    text = "PC4 (" + str(
                        round(
                            ordination_result.proportion_explained["PC4"] *
                            100, 2)) + " %)"
                    fig.add_annotation(text=text,
                                       showarrow=False,
                                       row=4,
                                       col=4)

                    ######################
                    fig.update_xaxes(showline=True,
                                     mirror=True,
                                     linewidth=1,
                                     linecolor='black')
                    fig.update_yaxes(showline=True,
                                     mirror=True,
                                     linewidth=1,
                                     linecolor='black')
                    fig.update_traces(marker_size=int(pcoa_s), mode="markers")
                    # finish plot matrix
                    fig.update_layout(height=1000,
                                      width=1000,
                                      title_text=textbox)

                    ## define output files
                    output_pdf = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_matrix.pdf")
                    output_html = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_matrix.html")

                    ## write output files
                    fig.write_image(str(output_pdf))
                    fig.write_html(str(output_html))

                    ## ask to show file
                    answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
                    if answer == "Yes":
                        webbrowser.open('file://' + str(output_html))

                    ## print closing text
                    closing_text = "\n" + "PCoA plots are found in: " + str(
                        path_to_outdirs) + "/PCoA_plots/"
                    sg.Popup(closing_text, title="Finished", keep_on_top=True)

                    ## write to log file
                    from taxontabletools.create_log import ttt_log
                    ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name,
                            output_pdf.name, meta_data_to_test,
                            path_to_outdirs)
                    break
                else:
                    sg.Popup(
                        "There must be at least 4 PCoA axis available to plot the matrix!"
                    )

        pcoa_window.close()

    else:
        sg.PopupError(
            "The sample of both the TaXon table and the metadata table have to match!"
        )
コード例 #21
0
        title="CoMA",
        text=
        "ATTENTION: At least 1 of your eigenvalues is negative, potentially leading to problems! You may want to choose another metric for distance calculation or apply data transformation on the distance matrix (e.g. square root) to get rid of this problem."
    )

eig_dm = pd.DataFrame(pc.eigvals, columns=["Eigenvalue"])
eig_dm["Explained"] = pc.proportion_explained
eig_dm["Summed_explanation"] = pc.proportion_explained.cumsum()
if metric == "minkowski":
    eig_dm.to_csv("eigenvalues_" + mname + "_p" + str(p) + ".txt", sep="\t")
else:
    eig_dm.to_csv("eigenvalues_" + mname + ".txt", sep="\t")

#Statistics

anos = anosim(div, map_DF, column=var, permutations=999)
perm = permanova(div, map_DF, column=var, permutations=999)

if metric == "minkowski":
    stat_file = "statistics_" + mname + "_p" + str(p) + "_" + var + ".txt"
else:
    stat_file = "statistics_" + mname + "_" + var + ".txt"

with open(stat_file, "w") as st:
    st.write("ANOSIM\tPermutations: 999\n\n")
    st.write("R\t" + str(anos["test statistic"]) + "\n")
    st.write("p-value\t" + str(anos["p-value"]) + "\n\n")
    st.write("PERMANOVA\tPermutations: 999\n\n")
    st.write("F\t" + str(perm["test statistic"]) + "\n")
    st.write("p-value\t" + str(perm["p-value"]) + "\n\n")