Ejemplo n.º 1
0
    def test_unequal_group_sizes(self):
        exp = pd.Series(index=self.exp_index,
                        data=['ANOSIM', 'R', 6, 3, -0.363636, 0.878, 999])

        np.random.seed(0)
        obs = anosim(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = anosim(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)
Ejemplo n.º 2
0
    def test_unequal_group_sizes(self):
        exp = pd.Series(index=self.exp_index,
                        data=['ANOSIM', 'R', 6, 3, -0.363636, 0.878, 999])

        np.random.seed(0)
        obs = anosim(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = anosim(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)
Ejemplo n.º 3
0
 def test_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['ANOSIM', 'R', 4, 2, 0.625, 0.332, 999],
                     name='ANOSIM results')
     np.random.seed(0)
     obs = anosim(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
Ejemplo n.º 4
0
    def test_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['ANOSIM', 'R', 4, 2, 0.25, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = anosim(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = anosim(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)
Ejemplo n.º 5
0
 def test_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['ANOSIM', 'R', 4, 2, 0.625, 0.332, 999],
                     name='ANOSIM results')
     np.random.seed(0)
     obs = anosim(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
Ejemplo n.º 6
0
    def test_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['ANOSIM', 'R', 4, 2, 0.25, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = anosim(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = anosim(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)
Ejemplo n.º 7
0
def beta_diversity_pcoa(biom_fp, method="braycurtis", permutations=99, dim=2,
                        col='method', colormap={'expected': 'red',
                                                'rdp': 'seagreen',
                                                'sortmerna': 'gray',
                                                'uclust': 'blue',
                                                'blast': 'purple'}):

    '''From biom table, compute Bray-Curtis distance; generate PCoA plot;
    and calculate adonis differences.

    biom_fp: path
        Path to biom.Table containing sample metadata.
    method: str
        skbio.Diversity method to use for ordination.
    permutations: int
        Number of permutations to perform for anosim tests.
    dim: int
        Number of dimensions to plot. Currently supports only 2-3 dimensions.
    col: str
        metadata name to use for distinguishing groups for anosim tests and
        pcoa plots.
    colormap: dict
        map groups names (must be group names in col) to colors used for plots.
    '''

    dm, s_md = make_distance_matrix(biom_fp, method=method)

    # pcoa
    pc = pcoa(dm)

    # anosim tests
    results = anosim(dm, s_md, column=col, permutations=permutations)
    print('R = ', results['test statistic'], '; P = ', results['p-value'])

    if dim == 2:
        # bokeh pcoa plots
        pc123 = pc.samples.ix[:, ["PC1", "PC2", "PC3"]]
        smd_merge = s_md.merge(pc123, left_index=True, right_index=True)
        smd_merge['Color'] = [colormap[x] for x in smd_merge['method']]
        title = smd_merge['reference'][0]
        labels = ['PC {0} ({1:.2f})'.format(d + 1, pc.proportion_explained[d])
                  for d in range(0, 2)]
        circle_plot_from_dataframe(smd_merge, "PC1", "PC2", title,
                                   columns=["method", "sample_id", "params"],
                                   color="Color", labels=labels)

    else:
        # skbio pcoa plots
        pcoa_plot_skbio(pc, s_md, col='method')

    return s_md, results, pc, dm
Ejemplo n.º 8
0
def main(args):
    data_df =  pd.read_table(args.data, index_col=0)
    data_df_nonnull = data_df[data_df['taxon'].notnull()]

    val_cols = data_df_nonnull.columns
    val_cols.remove('taxon')

    dm = DistanceMatrix(squareform(pdist(data_df_nonnull[val_cols], metric='euclidean')))
    a = anosim(dm, data_df_nonnull['taxon'], permutations=0)

    a_df = pd.DataFrame(a).T
    a_df.index = [args.data_name]

    a_df.to_csv(sys.stdout, header=None)
Ejemplo n.º 9
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_fins = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_fins)

df_fins.reset_index()
df_fins = df_fins[['sal']]
print(df_fins)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_fins['Observed OTUs'] = adiv_obs_otuss
df_fins['Faith PD'] = adiv_faith_pds

anosims = anosim(wu_dms, df_fins, column='sal', permutations=999)
print(anosims['test statistic'])
print(anosims['p-value'])

print(df_fins.corr(method="spearman"))

print(adiv_obs_otuss)

figs = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
figs = wu_pcs.plot(df_fins,
                   'sal',
                   axis_labels=('PC1' + str(eigen1) + '%',
                                'PC2' + str(eigen2) + '%',
                                'PC3' + str(eigen3) + '%'),
Ejemplo n.º 10
0
def beta_diversity(TaXon_table_xlsx, width, heigth, cmap, meta_data_to_test,
                   taxonomic_level, path_to_outdirs, template, font_size,
                   diss_metric):

    import pandas as pd
    import numpy as np
    from skbio.diversity import beta_diversity
    from skbio.stats.distance import anosim
    import plotly.express as px
    from pathlib import Path
    import PySimpleGUI as sg
    import webbrowser

    TaXon_table_xlsx = Path(TaXon_table_xlsx)
    Meta_data_table_xlsx = Path(
        str(path_to_outdirs) + "/" + "Meta_data_table" + "/" +
        TaXon_table_xlsx.stem + "_metadata.xlsx")
    TaXon_table_df = pd.read_excel(TaXon_table_xlsx,
                                   header=0).fillna("unidentified")
    TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
    Meta_data_table_df = pd.read_excel(Meta_data_table_xlsx,
                                       header=0).fillna("nan")
    Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

    metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()
    metadata_loc = Meta_data_table_df.columns.tolist().index(meta_data_to_test)

    ## drop samples with metadata called nan (= empty)
    drop_samples = [
        i[0] for i in Meta_data_table_df.values.tolist()
        if i[metadata_loc] == "nan"
    ]

    if drop_samples != []:
        ## filter the TaXon table
        TaXon_table_df = TaXon_table_df.drop(drop_samples, axis=1)
        TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
        ## also remove empty OTUs
        row_filter_list = []
        for row in TaXon_table_df.values.tolist():
            reads = set(row[10:])
            if reads != {0}:
                row_filter_list.append(row)
        columns = TaXon_table_df.columns.tolist()
        TaXon_table_df = pd.DataFrame(row_filter_list, columns=columns)
        Meta_data_table_df = pd.DataFrame(
            [
                i for i in Meta_data_table_df.values.tolist()
                if i[0] not in drop_samples
            ],
            columns=Meta_data_table_df.columns.tolist())
        Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

    metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()

    ## create a y axis title text
    taxon_title = taxonomic_level

    ## adjust taxonomic level if neccessary
    if taxonomic_level in ["ASVs", "ESVs", "OTUs", "zOTUs"]:
        taxon_title = taxonomic_level
        taxonomic_level = "ID"

    # check if the meta data differs
    if len(set(Meta_data_table_df[meta_data_to_test])) == len(
            Meta_data_table_df['Samples'].tolist()):
        sg.Popup(
            "The meta data is unique for all samples. Please adjust the meta data table!",
            title=("Error"))
        raise RuntimeError

    # check if the meta data differs
    if len(set(Meta_data_table_df[meta_data_to_test])) == 1:
        sg.Popup(
            "The meta data is similar for all samples. Please adjust the meta data table!",
            title=("Error"))
        raise RuntimeError

    if sorted(TaXon_table_samples) == sorted(Meta_data_table_samples):

        ## collect samples for plot
        samples = Meta_data_table_samples

        ## extract the relevant data
        TaXon_table_df = TaXon_table_df[[taxonomic_level] + samples]
        ## define an aggregation function to combine multiple hit of one taxonimic level
        aggregation_functions = {}
        ## define samples functions
        for sample in samples:
            ## 'sum' will calculate the sum of p/a data
            aggregation_functions[sample] = 'sum'
        ## define taxon level function
        aggregation_functions[taxonomic_level] = 'first'
        ## create condensed dataframe
        df_new = TaXon_table_df.groupby(
            TaXon_table_df[taxonomic_level]).aggregate(aggregation_functions)
        if 'unidentified' in df_new.index:
            df_new = df_new.drop('unidentified')

        ## collect reads
        data = df_new[samples].transpose().values.tolist()
        ## calculate dissimilarity distances
        dissimilarity_dm = beta_diversity(diss_metric, data, samples)

        anosim_results = anosim(dissimilarity_dm,
                                metadata_list,
                                permutations=999)
        anosim_r = round(anosim_results['test statistic'], 5)
        anosim_p = anosim_results['p-value']
        textbox = "Anosim (" + meta_data_to_test + ", " + taxon_title + ")<br>" + "R = " + str(
            anosim_r) + "<br>" + "p = " + str(anosim_p)

        matrix = dissimilarity_dm.data
        matrix_df = pd.DataFrame(matrix)
        matrix_df.columns = samples
        matrix_df.index = samples

        # create plot
        color_label = diss_metric + " distance"
        fig = px.imshow(matrix,
                        x=samples,
                        y=samples,
                        color_continuous_scale=cmap,
                        labels=dict(color=color_label))
        fig.update_layout(height=int(heigth),
                          width=int(width),
                          template=template,
                          showlegend=True,
                          title=textbox,
                          font_size=font_size,
                          title_font_size=font_size)

        # finish script
        output_pdf = Path(
            str(path_to_outdirs) + "/" + "Beta_diversity" + "/" +
            TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" +
            taxon_title + "_" + diss_metric + ".pdf")
        output_html = Path(
            str(path_to_outdirs) + "/" + "Beta_diversity" + "/" +
            TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" +
            taxon_title + "_" + diss_metric + ".html")
        output_xlsx = Path(
            str(path_to_outdirs) + "/" + "Beta_diversity" + "/" +
            TaXon_table_xlsx.stem + "_" + meta_data_to_test + "_" +
            taxon_title + "_" + diss_metric + ".xlsx")
        fig.write_image(str(output_pdf))
        fig.write_html(str(output_html))
        matrix_df.to_excel(output_xlsx)

        ## ask to show plot
        answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
        if answer == "Yes":
            webbrowser.open('file://' + str(output_html))

        ## write to log file
        sg.Popup("Beta diversity estimate are found in",
                 path_to_outdirs,
                 "/Beta_diversity/",
                 title="Finished",
                 keep_on_top=True)
        from taxontabletools.create_log import ttt_log
        ttt_log("beta diversity", "analysis", TaXon_table_xlsx.name,
                output_pdf.name, meta_data_to_test, path_to_outdirs)

    else:
        sg.PopupError(
            "Error: The samples between the taxon table and meta table do not match!",
            keep_on_top=True)
Ejemplo n.º 11
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_fin = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_fin)

df_fin.reset_index()
df_fin = df_fin[['true_lat']]
print(df_fin)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_fin['Observed OTUs'] = adiv_obs_otus
df_fin['Faith PD'] = adiv_faith_pd

anosim_lat = anosim(wu_dm, df_fin, column='true_lat', permutations=999)
print(anosim_lat['test statistic'])
print(anosim_lat['p-value'])

print(df_fin.corr(method="spearman"))

print(adiv_obs_otus)

fig = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
fig = wu_pc.plot(df_fin,
                 'true_lat',
                 axis_labels=('PC1' + str(eigen1) + '%',
                              'PC2' + str(eigen2) + '%',
                              'PC3' + str(eigen3) + '%'),
                its - 1][k].cluster_label
        field_plot = np.ma.masked_array(field_plot, field_plot == -10000)

        #%% Determine from which clusters the data is part of:
        nomask = np.where(~field_plot.mask)
        field_plot = field_plot[nomask]
        args = nwf.find_nearest_args(vLons[nomask], vLats[nomask], Flats,
                                     Flons)
        Flabels = field_plot[args]
        args = nwf.find_nearest_args(vLons[nomask], vLats[nomask], FlatsDino,
                                     FlonsDino)
        Dinolabels = field_plot[args]
        #%%
        if (len(np.unique(Dinolabels)) > 1):
            Dano = anosim(DistanceMatrix(Dinotaxdist),
                          Dinolabels.astype(str),
                          permutations=perm)
            DinoP[its] = list(Dano)[5]
            DinoR[its] = list(Dano)[4]
        if (len(np.unique(Flabels)) > 1):
            Fano = anosim(DistanceMatrix(Ftaxdist),
                          Flabels.astype(str),
                          permutations=perm)
            FP[its] = list(Fano)[5]
            FR[its] = list(Fano)[4]

    #%% Save file with ANOSIM results
    np.savez('ANOSIM_hierarchicalclus%s_sp%d_perm%d_its%d_mlat%d.npz' %
             (season, sp, perm, iterations, maxlat),
             ForamP=FP,
             DinoP=DinoP,
Ejemplo n.º 13
0
        sample_id = each_sample_split[0]
        sample_group = each_sample_split[1]
        sample_id_list.append(sample_id)
        sample_group_list.append(sample_group)

# read in data as dataframe
df = pd.read_csv(infile_data, sep='\t')

# get list of list from dataframe
lol_data_in = []
for col_id in sample_id_list:
    column_num_list = (df[col_id].values).tolist()
    lol_data_in.append(column_num_list)

# calculate distance matrix
dist_arrary = pairwise_distances(lol_data_in,
                                 lol_data_in,
                                 metric=distance_metric)

# add sample id to distance matrix
dist_matrix = DistanceMatrix(dist_arrary, sample_id_list)

# perform anosim test
anosim_test = anosim(dist_matrix, sample_group_list, permutations=999)
print(anosim_test)
print()

# perform permanova test
permanova_test = permanova(dist_matrix, sample_group_list, permutations=999)
print(permanova_test)
Ejemplo n.º 14
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_fin = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_fin)

df_fin.reset_index()
df_fin = df_fin[['region']]
print(df_fin)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_fin['Observed OTUs'] = adiv_obs_otus
df_fin['Faith PD'] = adiv_faith_pd

anosim = anosim(wu_dm, df_fin, column='region', permutations=999)
print(anosim['test statistic'])
print(anosim['p-value'])

print(df_fin.corr(method="spearman"))

#print(adiv_obs_otus)

fig = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
fig = wu_pc.plot(df_fin,
                 'region',
                 axis_labels=('PC1' + str(eigen1) + '%',
                              'PC2' + str(eigen2) + '%',
                              'PC3' + str(eigen3) + '%'),
Ejemplo n.º 15
0
 def test_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['ANOSIM', 'R', 4, 2, 0.625, np.nan, 0],
                     name='ANOSIM results')
     obs = anosim(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
Ejemplo n.º 16
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_fint = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_fint)

df_fint.reset_index()
df_fint = df_fint[['temp']]
print(df_fint)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_fint['Observed OTUs'] = adiv_obs_otust
df_fint['Faith PD'] = adiv_faith_pdt

anosimt = anosim(wu_dmt, df_fint, column='temp', permutations=999)
print(anosimt['test statistic'])
print(anosimt['p-value'])

print(df_fint.corr(method="spearman"))

print(adiv_obs_otust)

figt = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
figt = wu_pct.plot(df_fint,
                   'temp',
                   axis_labels=('PC1' + str(eigen1) + '%',
                                'PC2' + str(eigen2) + '%',
                                'PC3' + str(eigen3) + '%'),
Ejemplo n.º 17
0
for a in range(len(rows[0])):
    if a > 0:
        this_sample = []
        for b in range(len(rows)):
            if b > 0:
                this_sample.append(float(rows[b][a]))
        samples.append(this_sample)
"""
only_samples = ['LR', 'SR']
new_samples, new_names = [], []
for a in range(len(sample_names)):
    for b in range(len(only_samples)):
        if sample_names[a] == only_samples[b]:
            new_samples.append(samples[a])
            new_names.append(sample_names[a])
samples = new_samples
sample_names = new_names
print(len(samples), len(sample_names))
"""

sam_dm = dm.from_iterable(samples, metric=braycurtis)
pdisp = permdisp(sam_dm,
                 sample_names,
                 column=None,
                 test='median',
                 permutations=999)
print(pdisp)
asim = anosim(sam_dm, sample_names, column=None, permutations=999)
print(asim)
perm = permanova(sam_dm, sample_names, column=None, permutations=999)
print(perm)
Ejemplo n.º 18
0
 def test_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['ANOSIM', 'R', 4, 2, 0.625, np.nan, 0],
                     name='ANOSIM results')
     obs = anosim(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
Ejemplo n.º 19
0
eigen3 = eigen['PC3'].values
print(eigen)
print(eigen1)

df_find = pd.read_csv("samples_id_all.tsv", sep="\t", header=0, index_col=0)
print(df_find)

df_find.reset_index()
df_find = df_find[['depth_group_50']]
print(df_find)
#df_fin.to_csv("test6.tsv", sep="\t", header=1)

df_find['Observed OTUs'] = adiv_obs_otusd
df_find['Faith PD'] = adiv_faith_pdd

anosimd = anosim(wu_dmd, df_find, column='depth_group_50', permutations=999)
print(anosimd['test statistic'])
print(anosimd['p-value'])

print(df_find.corr(method="spearman"))

print(adiv_obs_otusd)

figd = plt.figure()
#plt.close('all')
#plt.subplot(1,3,1)
figd = wu_pcd.plot(df_find,
                   'depth_group_50',
                   axis_labels=('PC1' + str(eigen1) + '%',
                                'PC2' + str(eigen2) + '%',
                                'PC3' + str(eigen3) + '%'),
Ejemplo n.º 20
0
def PCoA_analysis(TaXon_table_xlsx, meta_data_to_test, taxonomic_level, width,
                  height, pcoa_s, path_to_outdirs, template, font_size,
                  color_discrete_sequence, pcoa_dissimilarity):
    import pandas as pd
    import numpy as np
    from skbio.diversity import beta_diversity
    from skbio.stats.ordination import pcoa
    from skbio.stats.distance import anosim
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    import plotly.express as px
    from pathlib import Path
    import PySimpleGUI as sg
    import os, webbrowser
    from itertools import combinations

    TaXon_table_xlsx = Path(TaXon_table_xlsx)
    Meta_data_table_xlsx = Path(
        str(path_to_outdirs) + "/" + "Meta_data_table" + "/" +
        TaXon_table_xlsx.stem + "_metadata.xlsx")
    TaXon_table_df = pd.read_excel(TaXon_table_xlsx,
                                   header=0).fillna("unidentified")
    TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
    Meta_data_table_df = pd.read_excel(Meta_data_table_xlsx,
                                       header=0).fillna("nan")
    Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

    metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()
    metadata_loc = Meta_data_table_df.columns.tolist().index(meta_data_to_test)

    ## drop samples with metadata called nan (= empty)
    drop_samples = [
        i[0] for i in Meta_data_table_df.values.tolist()
        if i[metadata_loc] == "nan"
    ]

    if drop_samples != []:
        ## filter the TaXon table
        TaXon_table_df = TaXon_table_df.drop(drop_samples, axis=1)
        TaXon_table_samples = TaXon_table_df.columns.tolist()[10:]
        ## also remove empty OTUs
        row_filter_list = []
        for row in TaXon_table_df.values.tolist():
            reads = set(row[10:])
            if reads != {0}:
                row_filter_list.append(row)
        columns = TaXon_table_df.columns.tolist()
        TaXon_table_df = pd.DataFrame(row_filter_list, columns=columns)
        Meta_data_table_df = pd.DataFrame(
            [
                i for i in Meta_data_table_df.values.tolist()
                if i[0] not in drop_samples
            ],
            columns=Meta_data_table_df.columns.tolist())
        Meta_data_table_samples = Meta_data_table_df['Samples'].tolist()

    ## create a y axis title text
    taxon_title = taxonomic_level.lower()

    ## adjust taxonomic level if neccessary
    if taxonomic_level in ["ASVs", "ESVs", "OTUs", "zOTUs"]:
        taxon_title = taxonomic_level
        taxonomic_level = "ID"

    # check if the meta data differs
    if len(set(Meta_data_table_df[meta_data_to_test])) == len(
            Meta_data_table_df['Samples'].tolist()):
        sg.Popup(
            "The meta data is unique for all samples. Please adjust the meta data table!",
            title=("Error"))
        raise RuntimeError

    # check if the meta data differs
    if len(set(Meta_data_table_df[meta_data_to_test])) == 1:
        sg.Popup(
            "The meta data is similar for all samples. Please adjust the meta data table!",
            title=("Error"))
        raise RuntimeError

    if sorted(TaXon_table_samples) == sorted(Meta_data_table_samples):

        samples = Meta_data_table_samples

        ## extract the relevant data
        TaXon_table_df = TaXon_table_df[[taxonomic_level] + samples]
        ## define an aggregation function to combine multiple hit of one taxonimic level
        aggregation_functions = {}
        ## define samples functions
        for sample in samples:
            ## 'sum' will calculate the sum of p/a data
            aggregation_functions[sample] = 'sum'
        ## define taxon level function
        aggregation_functions[taxonomic_level] = 'first'
        ## create condensed dataframe
        TaXon_table_df = TaXon_table_df.groupby(
            TaXon_table_df[taxonomic_level]).aggregate(aggregation_functions)
        if 'unidentified' in TaXon_table_df.index:
            TaXon_table_df = TaXon_table_df.drop('unidentified')

        data = TaXon_table_df[samples].transpose().values.tolist()
        jc_dm = beta_diversity(pcoa_dissimilarity, data, samples)
        ordination_result = pcoa(jc_dm)
        metadata_list = Meta_data_table_df[meta_data_to_test].values.tolist()

        anosim_results = anosim(jc_dm, metadata_list, permutations=999)
        anosim_r = round(anosim_results['test statistic'], 5)
        anosim_p = anosim_results['p-value']
        textbox = meta_data_to_test + ", " + taxon_title + "<br>Anosim " + "R = " + str(
            anosim_r) + " " + "p = " + str(anosim_p)

        #######################################################################################
        # create window to ask for PCoA axis to test
        def slices(list, slice):
            for i in range(0, len(list), slice):
                yield list[i:i + slice]

        # collect the PCoA proportion explained values
        proportion_explained_list = []
        for i, pcoa_axis in enumerate(ordination_result.proportion_explained):
            if round(pcoa_axis * 100, 2) >= 1:
                proportion_explained_list.append("PC" + str(i + 1) + " (" +
                                                 str(round(pcoa_axis *
                                                           100, 2)) + " %)")

        pcoa_axis_checkboxes = list(
            slices([
                sg.Checkbox(name, key=name, size=(15, 1))
                for name in proportion_explained_list
            ], 10))

        pcoa_window_layout = [
            [sg.Text('Check up to four axes to be displayed')],
            [sg.Frame(layout=pcoa_axis_checkboxes, title='')],
            [sg.Text('Only axes >= 1 % explained variance are shown')],
            [sg.CB("Connect categories", default=True, key="draw_mesh")],
            [sg.Text('')],
            [sg.Button('Plot', key='Plot')],
            [sg.Button('Back')],
        ]

        pcoa_window = sg.Window('PCoA axis',
                                pcoa_window_layout,
                                keep_on_top=True)

        while True:
            event, values = pcoa_window.read()

            draw_mesh = values["draw_mesh"]

            if event is None or event == 'Back':
                break

            if event == 'Plot':

                ## create a subfolder for better sorting and overview
                dirName = Path(
                    str(path_to_outdirs) + "/" + "PCoA_plots" + "/" +
                    TaXon_table_xlsx.stem + "/")
                if not os.path.exists(dirName):
                    os.mkdir(dirName)

                # collect the pcoa axis values
                axis_to_plot = [
                    key for key, value in values.items()
                    if value == True and "PC" in key
                ]
                # pass on only if two pcoa axes were checked
                if len(axis_to_plot) == 2:
                    cat1 = axis_to_plot[1].split()[0]
                    cat2 = axis_to_plot[0].split()[0]

                    df_pcoa = ordination_result.samples[[cat1, cat2]]
                    df_pcoa.insert(
                        2, "Metadata",
                        Meta_data_table_df[meta_data_to_test].values.tolist(),
                        True)
                    df_pcoa.insert(
                        3, "Samples",
                        Meta_data_table_df["Samples"].values.tolist(), True)

                    if draw_mesh == True:
                        combinations_list = []
                        for metadata in df_pcoa["Metadata"]:
                            ## collect all entries for the respective metadata
                            arr = df_pcoa.loc[df_pcoa['Metadata'] == metadata][
                                [cat1, cat2, "Metadata",
                                 "Samples"]].to_numpy()
                            ## create a df for all possible combinations using itertools combinations
                            for entry in list(combinations(arr, 2)):
                                combinations_list.append(list(entry[0]))
                                combinations_list.append(list(entry[1]))
                        ## create a dataframe to draw the plot from
                        df = pd.DataFrame(combinations_list)
                        df.columns = [cat1, cat2, "Metadata", "Samples"]

                        fig = px.scatter(
                            df,
                            x=cat1,
                            y=cat2,
                            color="Metadata",
                            text="Samples",
                            title=textbox,
                            color_discrete_sequence=color_discrete_sequence)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers+lines")
                        fig.update_layout(height=int(height),
                                          width=int(width),
                                          template=template,
                                          showlegend=True,
                                          font_size=font_size,
                                          title_font_size=font_size)
                        fig.update_xaxes(title=axis_to_plot[1])
                        fig.update_yaxes(title=axis_to_plot[0])

                    else:
                        fig = px.scatter(
                            df_pcoa,
                            x=cat1,
                            y=cat2,
                            color="Metadata",
                            text="Samples",
                            title=textbox,
                            color_discrete_sequence=color_discrete_sequence)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers")
                        fig.update_layout(height=int(height),
                                          width=int(width),
                                          template=template,
                                          showlegend=True,
                                          font_size=font_size,
                                          title_font_size=font_size)
                        fig.update_xaxes(title=axis_to_plot[1])
                        fig.update_yaxes(title=axis_to_plot[0])

                    ## define output files
                    output_pdf = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + ".pdf")
                    output_html = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + ".html")
                    output_xlsx = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + ".xlsx")

                    ## write files
                    fig.write_image(str(output_pdf))
                    fig.write_html(str(output_html))
                    ordination_result.samples[[cat1,
                                               cat2]].to_excel(output_xlsx)

                    ## ask to show file
                    answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
                    if answer == "Yes":
                        webbrowser.open('file://' + str(output_html))

                    ## print closing text
                    closing_text = "\n" + "PCoA plots are found in: " + str(
                        path_to_outdirs) + "/PCoA_plots/"
                    sg.Popup(closing_text, title="Finished", keep_on_top=True)

                    ## write to log
                    from taxontabletools.create_log import ttt_log
                    ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name,
                            output_pdf.name, meta_data_to_test,
                            path_to_outdirs)
                    break

                elif len(axis_to_plot) == 3:
                    cat1 = axis_to_plot[0].split()[0]
                    cat2 = axis_to_plot[1].split()[0]
                    cat3 = axis_to_plot[2].split()[0]

                    df_pcoa = ordination_result.samples[[cat1, cat2, cat3]]
                    df_pcoa.insert(
                        3, "Metadata",
                        Meta_data_table_df[meta_data_to_test].values.tolist(),
                        True)
                    df_pcoa.insert(
                        4, "Samples",
                        Meta_data_table_df["Samples"].values.tolist(), True)

                    ## check if lines are to be drawn between the dots
                    if draw_mesh == True:
                        combinations_list = []
                        for metadata in df_pcoa["Metadata"]:
                            ## collect all entries for the respective metadata
                            arr = df_pcoa.loc[df_pcoa['Metadata'] == metadata][
                                [cat1, cat2, cat3, "Metadata",
                                 "Samples"]].to_numpy()
                            ## create a df for all possible combinations using itertools combinations
                            for entry in list(combinations(arr, 2)):
                                combinations_list.append(list(entry[0]))
                                combinations_list.append(list(entry[1]))
                        ## create a dataframe to draw the plot from
                        df = pd.DataFrame(combinations_list)
                        df.columns = [cat1, cat2, cat3, "Metadata", "Samples"]
                        ## draw the plot
                        fig = px.scatter_3d(
                            df,
                            x=cat1,
                            y=cat2,
                            z=cat3,
                            color="Metadata",
                            text="Samples",
                            title=textbox,
                            color_discrete_sequence=color_discrete_sequence)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers+lines",
                                          line=dict(width=0.5))
                        fig.update_layout(height=int(height),
                                          width=int(width),
                                          template=template,
                                          title=textbox,
                                          showlegend=True,
                                          font_size=font_size,
                                          title_font_size=font_size)
                        fig.update_layout(
                            scene=dict(xaxis_title=axis_to_plot[0],
                                       yaxis_title=axis_to_plot[1],
                                       zaxis_title=axis_to_plot[2]))
                    else:
                        fig = px.scatter_3d(
                            df_pcoa,
                            x=cat1,
                            y=cat2,
                            z=cat3,
                            color="Metadata",
                            text="Samples",
                            color_discrete_sequence=color_discrete_sequence)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers")
                        fig.update_layout(height=int(height),
                                          width=int(width),
                                          template=template,
                                          showlegend=True,
                                          title=textbox,
                                          font_size=font_size,
                                          title_font_size=font_size)
                        fig.update_layout(
                            scene=dict(xaxis_title=axis_to_plot[0],
                                       yaxis_title=axis_to_plot[1],
                                       zaxis_title=axis_to_plot[2]))

                    ## define output files
                    output_pdf = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_3d.pdf")
                    output_html = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_3d.html")
                    output_xlsx = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_3d.xlsx")

                    ## write output files
                    fig.write_image(str(output_pdf))
                    fig.write_html(str(output_html))
                    ordination_result.samples[[cat1,
                                               cat2]].to_excel(output_xlsx)

                    ## ask to show file
                    answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
                    if answer == "Yes":
                        webbrowser.open('file://' + str(output_html))

                    ## print closing text
                    closing_text = "PCoA plots are found in: " + str(
                        path_to_outdirs) + "/PCoA_plots/"
                    sg.Popup(closing_text, title="Finished", keep_on_top=True)

                    ## write log file
                    from taxontabletools.create_log import ttt_log
                    ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name,
                            output_pdf.name, meta_data_to_test,
                            path_to_outdirs)
                    break

                else:
                    sg.Popup("Please choose not more than 3 PCoA axes",
                             title="Error",
                             keep_on_top=True)

            if event == 'Plot matrix':
                if len(proportion_explained_list) >= 4:

                    ## create a subfolder for better sorting and overview
                    dirName = Path(
                        str(path_to_outdirs) + "/" + "PCoA_plots" + "/" +
                        TaXon_table_xlsx.stem + "/")
                    if not os.path.exists(dirName):
                        os.mkdir(dirName)

                    df_pcoa = ordination_result.samples[[
                        "PC1", "PC2", "PC3", "PC4"
                    ]]
                    df_pcoa.insert(
                        4, "Metadata",
                        Meta_data_table_df[meta_data_to_test].values.tolist(),
                        True)
                    df_pcoa.insert(
                        5, "Sample",
                        Meta_data_table_df["Samples"].values.tolist(), True)

                    fig = make_subplots(rows=4, cols=4)
                    ########### 1 ###########
                    fig.add_trace(go.Scatter(), row=1, col=1)
                    fig.update_layout(template=template,
                                      font_size=font_size,
                                      title_font_size=font_size)
                    text = "PC1 (" + str(
                        round(
                            ordination_result.proportion_explained["PC1"] *
                            100, 2)) + " %)"
                    fig.add_annotation(text=text, showarrow=False)
                    fig.update_xaxes(showticklabels=False, showgrid=False)
                    fig.update_yaxes(showticklabels=False, showgrid=False)
                    ########### 2 ###########
                    df = df_pcoa[["PC1", "PC2", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , )
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC1"].values.tolist(),
                            y=df_metadata["PC2"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=1,
                                      col=2)
                    ########### 3 ###########
                    df = df_pcoa[["PC1", "PC3", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , )
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC1"].values.tolist(),
                            y=df_metadata["PC3"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=1,
                                      col=3)
                    ########### 4 ###########
                    df = df_pcoa[["PC1", "PC4", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC1"].values.tolist(),
                            y=df_metadata["PC4"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=1,
                                      col=4)
                        fig.update_traces(marker_size=int(pcoa_s),
                                          mode="markers")
                        fig.update_xaxes(showgrid=False, row=1, col=4)
                        fig.update_yaxes(showgrid=False, row=1, col=4)
                    ########### 5 ###########
                    fig.add_trace(go.Scatter(), row=2, col=2)
                    fig.update_layout(template=template,
                                      font_size=font_size,
                                      title_font_size=font_size)
                    text = "PC2 (" + str(
                        round(
                            ordination_result.proportion_explained["PC2"] *
                            100, 2)) + " %)"
                    fig.add_annotation(text=text,
                                       showarrow=False,
                                       row=2,
                                       col=2)
                    ########### 6 ###########
                    df = df_pcoa[["PC2", "PC3", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , )
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC2"].values.tolist(),
                            y=df_metadata["PC3"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=2,
                                      col=3)
                    ########### 7 ###########
                    df = df_pcoa[["PC2", "PC4", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC2"].values.tolist(),
                            y=df_metadata["PC4"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=2,
                                      col=4)
                    ########### 8 ###########
                    fig.add_trace(go.Scatter(), row=3, col=3)
                    fig.update_layout(template=template,
                                      font_size=font_size,
                                      title_font_size=font_size)
                    text = "PC3 (" + str(
                        round(
                            ordination_result.proportion_explained["PC3"] *
                            100, 2)) + " %)"
                    fig.add_annotation(text=text,
                                       showarrow=False,
                                       row=3,
                                       col=3)
                    ########### 9 ###########
                    df = df_pcoa[["PC3", "PC4", "Metadata", "Sample"]]
                    for metadata in set(metadata_list):
                        df_metadata = df[df['Metadata'] == metadata]
                        #fig = px.scatter(df_pcoa, x="PC1", y="PC2", , )
                        fig.add_trace(go.Scatter(
                            x=df_metadata["PC3"].values.tolist(),
                            y=df_metadata["PC4"].values.tolist(),
                            mode='markers',
                            name=metadata,
                            showlegend=False,
                            text=df_metadata["Sample"].values.tolist()),
                                      row=3,
                                      col=4)
                    ########### 5 ###########
                    fig.add_trace(go.Scatter(), row=4, col=4)
                    fig.update_layout(template=template,
                                      font_size=font_size,
                                      title_font_size=font_size)
                    text = "PC4 (" + str(
                        round(
                            ordination_result.proportion_explained["PC4"] *
                            100, 2)) + " %)"
                    fig.add_annotation(text=text,
                                       showarrow=False,
                                       row=4,
                                       col=4)

                    ######################
                    fig.update_xaxes(showline=True,
                                     mirror=True,
                                     linewidth=1,
                                     linecolor='black')
                    fig.update_yaxes(showline=True,
                                     mirror=True,
                                     linewidth=1,
                                     linecolor='black')
                    fig.update_traces(marker_size=int(pcoa_s), mode="markers")
                    # finish plot matrix
                    fig.update_layout(height=1000,
                                      width=1000,
                                      title_text=textbox)

                    ## define output files
                    output_pdf = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_matrix.pdf")
                    output_html = Path(
                        str(dirName) + "/" + meta_data_to_test + "_" +
                        taxon_title + "_matrix.html")

                    ## write output files
                    fig.write_image(str(output_pdf))
                    fig.write_html(str(output_html))

                    ## ask to show file
                    answer = sg.PopupYesNo('Show plot?', keep_on_top=True)
                    if answer == "Yes":
                        webbrowser.open('file://' + str(output_html))

                    ## print closing text
                    closing_text = "\n" + "PCoA plots are found in: " + str(
                        path_to_outdirs) + "/PCoA_plots/"
                    sg.Popup(closing_text, title="Finished", keep_on_top=True)

                    ## write to log file
                    from taxontabletools.create_log import ttt_log
                    ttt_log("pcoa analysis", "analysis", TaXon_table_xlsx.name,
                            output_pdf.name, meta_data_to_test,
                            path_to_outdirs)
                    break
                else:
                    sg.Popup(
                        "There must be at least 4 PCoA axis available to plot the matrix!"
                    )

        pcoa_window.close()

    else:
        sg.PopupError(
            "The sample of both the TaXon table and the metadata table have to match!"
        )
Ejemplo n.º 21
0
        title="CoMA",
        text=
        "ATTENTION: At least 1 of your eigenvalues is negative, potentially leading to problems! You may want to choose another metric for distance calculation or apply data transformation on the distance matrix (e.g. square root) to get rid of this problem."
    )

eig_dm = pd.DataFrame(pc.eigvals, columns=["Eigenvalue"])
eig_dm["Explained"] = pc.proportion_explained
eig_dm["Summed_explanation"] = pc.proportion_explained.cumsum()
if metric == "minkowski":
    eig_dm.to_csv("eigenvalues_" + mname + "_p" + str(p) + ".txt", sep="\t")
else:
    eig_dm.to_csv("eigenvalues_" + mname + ".txt", sep="\t")

#Statistics

anos = anosim(div, map_DF, column=var, permutations=999)
perm = permanova(div, map_DF, column=var, permutations=999)

if metric == "minkowski":
    stat_file = "statistics_" + mname + "_p" + str(p) + "_" + var + ".txt"
else:
    stat_file = "statistics_" + mname + "_" + var + ".txt"

with open(stat_file, "w") as st:
    st.write("ANOSIM\tPermutations: 999\n\n")
    st.write("R\t" + str(anos["test statistic"]) + "\n")
    st.write("p-value\t" + str(anos["p-value"]) + "\n\n")
    st.write("PERMANOVA\tPermutations: 999\n\n")
    st.write("F\t" + str(perm["test statistic"]) + "\n")
    st.write("p-value\t" + str(perm["p-value"]) + "\n\n")