コード例 #1
0
    def test_call_unequal_group_sizes(self):
        exp = pd.Series(
            index=self.exp_index,
            data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645, 999])

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)
コード例 #2
0
    def test_call_unequal_group_sizes(self):
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645,
                              999])

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)
コード例 #3
0
ファイル: test_permanova.py プロジェクト: RNAer/scikit-bio
 def test_call_no_ties(self):
     exp = pd.Series(
         index=self.exp_index, data=["PERMANOVA", "pseudo-F", 4, 2, 4.4, 0.332, 999], name="PERMANOVA results"
     )
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
コード例 #4
0
    def test_call_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)
コード例 #5
0
    def test_call_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)
コード例 #6
0
 def test_call_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999],
                     name='PERMANOVA results')
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
コード例 #7
0
ファイル: permanova.py プロジェクト: HWChang/emmer
def permanovaResult(args, current_wd, retrospect_dir, output_file_tag,
                    notebook_name, suppress, silence, neglect):
    # python3 -m emmer.bake -m 'Permanova' -i emmer/data/bake_data_dir_6/filtered_infoRich__PCA_coordinates.csv

    permanova_args = PermanovaArgs(args=args,
                                   current_wd=current_wd,
                                   suppress=suppress,
                                   silence=silence)

    ## conduct PERMANOVA
    numpy.random.seed(0)

    result = permanova(
        permanova_args.dist_matrix, permanova_args.cluster,
        permutations=999)  ## TODO: allow user-define $permutations and $seed
    print(result)

    notebook = UpdateNoteBook(notebook_name=notebook_name,
                              neglect=neglect).updatePermanovaResult(
                                  set_seed='0',
                                  set_cluster=permanova_args.cluster,
                                  test_result=result)

    parameter_df = pandas.DataFrame({
        'individual': permanova_args.individual,
        'cluster': permanova_args.cluster
    })
    output_file_name = os.path.join(
        retrospect_dir,
        (output_file_tag + '_retrospect_permanova_parameter.csv'))
    parameter_df.to_csv(output_file_name)
コード例 #8
0
 def permanova_permdisp(self):
     # compute the permanova
     print('running permdisp\n\n')
     print(permdisp(distance_matrix=DistanceMatrix(self.dist_df),
                    grouping=[_.split('_')[0] for _ in list(self.dist_df)], permutations=999))
     print('running permanova\n\n')
     print(permanova(distance_matrix=DistanceMatrix(self.dist_df),
                     grouping=[_.split('_')[0] for _ in list(self.dist_df)], permutations=9999))
コード例 #9
0
ファイル: Diversity.py プロジェクト: lgguzman/biodiversity
 def testPer(self, dist, group):
     per = self.permanova(dist, group)
     print(per[0])
     print(per[2])
     print(
         permanova(DistanceMatrix(dist, range(len(group))),
                   group,
                   column=None,
                   permutations=999))
コード例 #10
0
ファイル: effect_size.py プロジェクト: serenejiang/evident
def _beta(permutations, data, xvalues, yvalues):
    x_ids = list(xvalues.index.values)
    y_ids = list(yvalues.index.values)
    ids = x_ids + y_ids
    data_test = data.filter(ids)
    permanova_result = permanova(
        distance_matrix=data_test,
        # we can use use either x or y cause they are the same
        column=xvalues.name,
        grouping=pd.concat([xvalues, yvalues]).to_frame(),
        permutations=permutations).to_dict()
    xvals = list(
        data_test.filter(xvalues.index.values).to_series().dropna().values)
    yvals = list(
        data_test.filter(yvalues.index.values).to_series().dropna().values)
    return (permanova_result['p-value'], permanova_result['test statistic'],
            xvals, yvals)
コード例 #11
0
ファイル: effect_size.py プロジェクト: antgonza/evident
def _beta(permutations, data, xvalues, yvalues):
    x_ids = list(xvalues.index.values)
    y_ids = list(yvalues.index.values)
    ids = x_ids + y_ids
    data_test = data.filter(ids)
    permanova_result = permanova(
        distance_matrix=data_test,
        # we can use use either x or y cause they are the same
        column=xvalues.name,
        grouping=pd.concat([xvalues, yvalues]).to_frame(),
        permutations=permutations).to_dict()
    xvals = list(
        data_test.filter(xvalues.index.values).to_series().dropna().values)
    yvals = list(
        data_test.filter(yvalues.index.values).to_series().dropna().values)
    return (permanova_result['p-value'], permanova_result['test statistic'],
            xvals, yvals)
コード例 #12
0
def get_permanova_ranked_list(x, y, feature_list, label_set):
    x = x.transpose().values

    values = []
    for f in range(len(feature_list)):
        sub_x = x[:, f]
        dist = pairwise_distances(sub_x.reshape(-1, 1),
                                  sub_x.reshape(-1, 1),
                                  metric="cityblock")
        dist = DistanceMatrix(data=dist)
        perm = permanova(dist, y)
        values.append(perm.loc["p-value"])

    fdr_values = multipletests(values, method="fdr_bh")[1]
    permanova_df = pd.DataFrame(index=feature_list,
                                data={
                                    "p-value": np.array(values).reshape(-1),
                                    "Adj p-value":
                                    np.array(fdr_values).reshape(-1)
                                })
    return permanova_df
コード例 #13
0
def pseudoF_permanova(points, labels):
    """ Statistical significance is assessed via a permutation test.
     The assignment of objects to groups (grouping) is randomly permuted a number of times
     (controlled via permutations). A pseudo-F statistic is computed for each permutation and the
     p-value is the proportion of
    permuted pseudo-F statisics that are equal to or greater than the original
     (unpermuted) pseudo-F statistic. (using sklearn pairwise euclidean_distance function)

    Parameters
    ----------
    points : np.array
        np.array([N, p]) of all points
    labels: np.array
        np.array([N]) labels of all points
    """
    distances = skbio.DistanceMatrix(points.as_matrix())
    ks = np.sort(np.unique(labels))

    pseudo_f = permanova(distances, labels)
    print(pseudo_f)
    return pseudo_f
コード例 #14
0
sns.set(font_scale=1.5, style="ticks")
g = sns.FacetGrid(tsne, hue="taxa", height=10, aspect=16 / 10)
gm = g.map(plt.scatter, "x", "y", alpha=0.25)
means = tsne.groupby(taxa).agg("median").reset_index()
texts = means.apply(lambda df: plt.text(df.x, df.y, df.taxa, alpha=0.65),
                    axis=1)
texts = adjust_text(
    texts,
    force_text=(0.02, 0.5),
    arrowprops=dict(arrowstyle="-|>", alpha=0.5, color="k"),
)
plt.savefig("figures/individual_media.png", dpi=200)
plt.close()

# Some statistics about metabolite usage
# indicator matrix 0 = metabolite not consumed, 1 = metabolite consumed
binary = mat.where(mat < -1e-6, 0).where(mat > -1e-6, 1)

# Jaccard distances = 1 - percent overlap
J = pdist(binary, "jaccard")
print("Jaccard distances:", pd.Series(J).describe(), sep="\n")

# euclidean distances
E = pdist(mat, "euclidean")

# Test whether genus explains a good amount of that variation
p = permanova(DistanceMatrix(E), taxa)
r2 = 1 - 1 / (1 + p[4] * p[3] / (p[2] - p[3] - 1))
p["R2"] = r2
print("PERMANOVA on euclidean distances:", p, sep="\n")
コード例 #15
0
 def test_call_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0])
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
コード例 #16
0
 def test_call_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999])
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)
コード例 #17
0
            clr_res = clr_inv(np.dot(np.dot(U, s), V.T))
            # use just kl_div here because already closed
            kl_clr = entropy(closure(basetmp_sub).T, clr_res.T).mean()
            results[(rank_, power_, depth_, 'rclr', 'KL-Div')] = [kl_clr]

            # test KL without rclr
            X_spn = np.array(subtmp_sub.copy()).astype(float)
            X_spn[X_spn == 0] = np.nan
            U_, s_, V_ = OptSpace(iteration=1000).fit_transform(X_spn)
            res_raw = np.dot(np.dot(U_, s_), V_.T)
            res_raw[res_raw <= 0] = 1
            kl_raw = entropy(closure(basetmp_sub).T, closure(res_raw).T).mean()
            results[(rank_, power_, depth_, 'Raw Counts', 'KL-Div')] = [kl_raw]

            # f-stat
            resfclr = permanova(DistanceMatrix(distance.cdist(U, U)),
                                meta['group'])['test statistic']
            rawfres = permanova(DistanceMatrix(distance.cdist(U_, U_)),
                                meta['group'])['test statistic']
            results[(rank_, power_, depth_, 'rclr', 'F-Statistic')] = [resfclr]
            results[(rank_, power_, depth_, 'Raw Counts',
                     'F-Statistic')] = [rawfres]

            # KNN
            for U_tmp, method in zip([U, U_], ['rclr', 'Raw Counts']):
                pcoa_tmp = pcoa(DistanceMatrix(distance.cdist(U_tmp,
                                                              U_tmp))).samples
                pcoa_tmp.index = subtmp_sub.index
                # split
                X_train, X_test, y_train, y_test = train_test_split(
                    pcoa_tmp,
                    meta['group'].ravel(),
コード例 #18
0
perm_res = {}
perm_res_tmp = {}
for dataset_, subs in distances.items():
    perm_res[dataset_] = {}
    perm_res_tmp[dataset_] = {}
    for (fold_, Nsamp_), methods_ in subs.items():
        meta_ = meta[dataset_][(fold_, Nsamp_)]['metadata']
        if len(meta_.index) < Nsamp_:
            continue
        perm_res[dataset_][(fold_, Nsamp_)] = {}
        perm_res_tmp[dataset_][(fold_, Nsamp_)] = {}
        for method, dist_tmp in methods_.items():
            perm_res[dataset_][(fold_, Nsamp_)][method] = {}
            dist_tmp = DistanceMatrix(dist_tmp)
            perm_tmp = permanova(
                dist_tmp, meta[dataset_][(fold_, Nsamp_)]['metadata'][
                    case_study[dataset_]['factor']].values)
            perm_res[dataset_][(
                fold_,
                Nsamp_)][method]['test statistic'] = perm_tmp['test statistic']
            perm_res_tmp[dataset_][(fold_, Nsamp_)] = pd.DataFrame(
                perm_res[dataset_][(fold_, Nsamp_)])

    both_perm_res[dataset_] = pd.concat(perm_res_tmp[dataset_])

# run calssiification
import warnings
warnings.simplefilter('ignore')  #for PCoA warnings
from skbio.stats.ordination import pcoa
from sklearn import metrics
from sklearn.cluster import KMeans
コード例 #19
0
 def test_call_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0])
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
コード例 #20
0
        sample_id = each_sample_split[0]
        sample_group = each_sample_split[1]
        sample_id_list.append(sample_id)
        sample_group_list.append(sample_group)

# read in data as dataframe
df = pd.read_csv(infile_data, sep='\t')

# get list of list from dataframe
lol_data_in = []
for col_id in sample_id_list:
    column_num_list = (df[col_id].values).tolist()
    lol_data_in.append(column_num_list)

# calculate distance matrix
dist_arrary = pairwise_distances(lol_data_in,
                                 lol_data_in,
                                 metric=distance_metric)

# add sample id to distance matrix
dist_matrix = DistanceMatrix(dist_arrary, sample_id_list)

# perform anosim test
anosim_test = anosim(dist_matrix, sample_group_list, permutations=999)
print(anosim_test)
print()

# perform permanova test
permanova_test = permanova(dist_matrix, sample_group_list, permutations=999)
print(permanova_test)
コード例 #21
0
        text=
        "ATTENTION: At least 1 of your eigenvalues is negative, potentially leading to problems! You may want to choose another metric for distance calculation or apply data transformation on the distance matrix (e.g. square root) to get rid of this problem."
    )

eig_dm = pd.DataFrame(pc.eigvals, columns=["Eigenvalue"])
eig_dm["Explained"] = pc.proportion_explained
eig_dm["Summed_explanation"] = pc.proportion_explained.cumsum()
if metric == "minkowski":
    eig_dm.to_csv("eigenvalues_" + mname + "_p" + str(p) + ".txt", sep="\t")
else:
    eig_dm.to_csv("eigenvalues_" + mname + ".txt", sep="\t")

#Statistics

anos = anosim(div, map_DF, column=var, permutations=999)
perm = permanova(div, map_DF, column=var, permutations=999)

if metric == "minkowski":
    stat_file = "statistics_" + mname + "_p" + str(p) + "_" + var + ".txt"
else:
    stat_file = "statistics_" + mname + "_" + var + ".txt"

with open(stat_file, "w") as st:
    st.write("ANOSIM\tPermutations: 999\n\n")
    st.write("R\t" + str(anos["test statistic"]) + "\n")
    st.write("p-value\t" + str(anos["p-value"]) + "\n\n")
    st.write("PERMANOVA\tPermutations: 999\n\n")
    st.write("F\t" + str(perm["test statistic"]) + "\n")
    st.write("p-value\t" + str(perm["p-value"]) + "\n\n")

end = time.time()
コード例 #22
0
ファイル: test_permanova.py プロジェクト: RNAer/scikit-bio
 def test_call_no_permutations(self):
     exp = pd.Series(
         index=self.exp_index, data=["PERMANOVA", "pseudo-F", 4, 2, 4.4, np.nan, 0], name="PERMANOVA results"
     )
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)
コード例 #23
0
for a in range(len(rows[0])):
    if a > 0:
        this_sample = []
        for b in range(len(rows)):
            if b > 0:
                this_sample.append(float(rows[b][a]))
        samples.append(this_sample)
"""
only_samples = ['LR', 'SR']
new_samples, new_names = [], []
for a in range(len(sample_names)):
    for b in range(len(only_samples)):
        if sample_names[a] == only_samples[b]:
            new_samples.append(samples[a])
            new_names.append(sample_names[a])
samples = new_samples
sample_names = new_names
print(len(samples), len(sample_names))
"""

sam_dm = dm.from_iterable(samples, metric=braycurtis)
pdisp = permdisp(sam_dm,
                 sample_names,
                 column=None,
                 test='median',
                 permutations=999)
print(pdisp)
asim = anosim(sam_dm, sample_names, column=None, permutations=999)
print(asim)
perm = permanova(sam_dm, sample_names, column=None, permutations=999)
print(perm)