Ejemplos de permanova en Python, ejemplos de skbio.stats.distance.permanova en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: squarednob/scikit-bio

    def test_call_unequal_group_sizes(self):
        exp = pd.Series(
            index=self.exp_index,
            data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645, 999])

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: AndreaEdwards/scikit-bio

    def test_call_unequal_group_sizes(self):
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645,
                              999])

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal)
        self.assert_series_equal(obs, exp)

        np.random.seed(0)
        obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled)
        self.assert_series_equal(obs, exp)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: RNAer/scikit-bio

 def test_call_no_ties(self):
     exp = pd.Series(
         index=self.exp_index, data=["PERMANOVA", "pseudo-F", 4, 2, 4.4, 0.332, 999], name="PERMANOVA results"
     )
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: squarednob/scikit-bio

    def test_call_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: AndreaEdwards/scikit-bio

    def test_call_ties(self):
        # Ensure we get the same results if we rerun the method using the same
        # inputs. Also ensure we get the same results if we run the method
        # using a grouping vector or a data frame with equivalent groupings.
        exp = pd.Series(index=self.exp_index,
                        data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999])

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.grouping_equal)
            self.assert_series_equal(obs, exp)

        for _ in range(2):
            np.random.seed(0)
            obs = permanova(self.dm_ties, self.df, column='Group')
            self.assert_series_equal(obs, exp)

Ejemplo n.º 6

0

Mostrar archivo

 def test_call_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999],
                     name='PERMANOVA results')
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: permanova.py Proyecto: HWChang/emmer

def permanovaResult(args, current_wd, retrospect_dir, output_file_tag,
                    notebook_name, suppress, silence, neglect):
    # python3 -m emmer.bake -m 'Permanova' -i emmer/data/bake_data_dir_6/filtered_infoRich__PCA_coordinates.csv

    permanova_args = PermanovaArgs(args=args,
                                   current_wd=current_wd,
                                   suppress=suppress,
                                   silence=silence)

    ## conduct PERMANOVA
    numpy.random.seed(0)

    result = permanova(
        permanova_args.dist_matrix, permanova_args.cluster,
        permutations=999)  ## TODO: allow user-define $permutations and $seed
    print(result)

    notebook = UpdateNoteBook(notebook_name=notebook_name,
                              neglect=neglect).updatePermanovaResult(
                                  set_seed='0',
                                  set_cluster=permanova_args.cluster,
                                  test_result=result)

    parameter_df = pandas.DataFrame({
        'individual': permanova_args.individual,
        'cluster': permanova_args.cluster
    })
    output_file_name = os.path.join(
        retrospect_dir,
        (output_file_tag + '_retrospect_permanova_parameter.csv'))
    parameter_df.to_csv(output_file_name)

Ejemplo n.º 8

0

Mostrar archivo

 def permanova_permdisp(self):
     # compute the permanova
     print('running permdisp\n\n')
     print(permdisp(distance_matrix=DistanceMatrix(self.dist_df),
                    grouping=[_.split('_')[0] for _ in list(self.dist_df)], permutations=999))
     print('running permanova\n\n')
     print(permanova(distance_matrix=DistanceMatrix(self.dist_df),
                     grouping=[_.split('_')[0] for _ in list(self.dist_df)], permutations=9999))

Ejemplo n.º 9

0

Mostrar archivo

Archivo: Diversity.py Proyecto: lgguzman/biodiversity

 def testPer(self, dist, group):
     per = self.permanova(dist, group)
     print(per[0])
     print(per[2])
     print(
         permanova(DistanceMatrix(dist, range(len(group))),
                   group,
                   column=None,
                   permutations=999))

Ejemplo n.º 10

0

Mostrar archivo

Archivo: effect_size.py Proyecto: serenejiang/evident

def _beta(permutations, data, xvalues, yvalues):
    x_ids = list(xvalues.index.values)
    y_ids = list(yvalues.index.values)
    ids = x_ids + y_ids
    data_test = data.filter(ids)
    permanova_result = permanova(
        distance_matrix=data_test,
        # we can use use either x or y cause they are the same
        column=xvalues.name,
        grouping=pd.concat([xvalues, yvalues]).to_frame(),
        permutations=permutations).to_dict()
    xvals = list(
        data_test.filter(xvalues.index.values).to_series().dropna().values)
    yvals = list(
        data_test.filter(yvalues.index.values).to_series().dropna().values)
    return (permanova_result['p-value'], permanova_result['test statistic'],
            xvals, yvals)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: effect_size.py Proyecto: antgonza/evident

def _beta(permutations, data, xvalues, yvalues):
    x_ids = list(xvalues.index.values)
    y_ids = list(yvalues.index.values)
    ids = x_ids + y_ids
    data_test = data.filter(ids)
    permanova_result = permanova(
        distance_matrix=data_test,
        # we can use use either x or y cause they are the same
        column=xvalues.name,
        grouping=pd.concat([xvalues, yvalues]).to_frame(),
        permutations=permutations).to_dict()
    xvals = list(
        data_test.filter(xvalues.index.values).to_series().dropna().values)
    yvals = list(
        data_test.filter(yvalues.index.values).to_series().dropna().values)
    return (permanova_result['p-value'], permanova_result['test statistic'],
            xvals, yvals)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: permanova_test.py Proyecto: derekreiman/Meta-Signer

def get_permanova_ranked_list(x, y, feature_list, label_set):
    x = x.transpose().values

    values = []
    for f in range(len(feature_list)):
        sub_x = x[:, f]
        dist = pairwise_distances(sub_x.reshape(-1, 1),
                                  sub_x.reshape(-1, 1),
                                  metric="cityblock")
        dist = DistanceMatrix(data=dist)
        perm = permanova(dist, y)
        values.append(perm.loc["p-value"])

    fdr_values = multipletests(values, method="fdr_bh")[1]
    permanova_df = pd.DataFrame(index=feature_list,
                                data={
                                    "p-value": np.array(values).reshape(-1),
                                    "Adj p-value":
                                    np.array(fdr_values).reshape(-1)
                                })
    return permanova_df

Ejemplo n.º 13

0

Mostrar archivo

def pseudoF_permanova(points, labels):
    """ Statistical significance is assessed via a permutation test.
     The assignment of objects to groups (grouping) is randomly permuted a number of times
     (controlled via permutations). A pseudo-F statistic is computed for each permutation and the
     p-value is the proportion of
    permuted pseudo-F statisics that are equal to or greater than the original
     (unpermuted) pseudo-F statistic. (using sklearn pairwise euclidean_distance function)

    Parameters
    ----------
    points : np.array
        np.array([N, p]) of all points
    labels: np.array
        np.array([N]) labels of all points
    """
    distances = skbio.DistanceMatrix(points.as_matrix())
    ks = np.sort(np.unique(labels))

    pseudo_f = permanova(distances, labels)
    print(pseudo_f)
    return pseudo_f

Ejemplo n.º 14

0

Mostrar archivo

sns.set(font_scale=1.5, style="ticks")
g = sns.FacetGrid(tsne, hue="taxa", height=10, aspect=16 / 10)
gm = g.map(plt.scatter, "x", "y", alpha=0.25)
means = tsne.groupby(taxa).agg("median").reset_index()
texts = means.apply(lambda df: plt.text(df.x, df.y, df.taxa, alpha=0.65),
                    axis=1)
texts = adjust_text(
    texts,
    force_text=(0.02, 0.5),
    arrowprops=dict(arrowstyle="-|>", alpha=0.5, color="k"),
)
plt.savefig("figures/individual_media.png", dpi=200)
plt.close()

# Some statistics about metabolite usage
# indicator matrix 0 = metabolite not consumed, 1 = metabolite consumed
binary = mat.where(mat < -1e-6, 0).where(mat > -1e-6, 1)

# Jaccard distances = 1 - percent overlap
J = pdist(binary, "jaccard")
print("Jaccard distances:", pd.Series(J).describe(), sep="\n")

# euclidean distances
E = pdist(mat, "euclidean")

# Test whether genus explains a good amount of that variation
p = permanova(DistanceMatrix(E), taxa)
r2 = 1 - 1 / (1 + p[4] * p[3] / (p[2] - p[3] - 1))
p["R2"] = r2
print("PERMANOVA on euclidean distances:", p, sep="\n")

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: AndreaEdwards/scikit-bio

 def test_call_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0])
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: AndreaEdwards/scikit-bio

 def test_call_no_ties(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999])
     np.random.seed(0)
     obs = permanova(self.dm_no_ties, self.grouping_equal)
     self.assert_series_equal(obs, exp)

Ejemplo n.º 17

0

Mostrar archivo

            clr_res = clr_inv(np.dot(np.dot(U, s), V.T))
            # use just kl_div here because already closed
            kl_clr = entropy(closure(basetmp_sub).T, clr_res.T).mean()
            results[(rank_, power_, depth_, 'rclr', 'KL-Div')] = [kl_clr]

            # test KL without rclr
            X_spn = np.array(subtmp_sub.copy()).astype(float)
            X_spn[X_spn == 0] = np.nan
            U_, s_, V_ = OptSpace(iteration=1000).fit_transform(X_spn)
            res_raw = np.dot(np.dot(U_, s_), V_.T)
            res_raw[res_raw <= 0] = 1
            kl_raw = entropy(closure(basetmp_sub).T, closure(res_raw).T).mean()
            results[(rank_, power_, depth_, 'Raw Counts', 'KL-Div')] = [kl_raw]

            # f-stat
            resfclr = permanova(DistanceMatrix(distance.cdist(U, U)),
                                meta['group'])['test statistic']
            rawfres = permanova(DistanceMatrix(distance.cdist(U_, U_)),
                                meta['group'])['test statistic']
            results[(rank_, power_, depth_, 'rclr', 'F-Statistic')] = [resfclr]
            results[(rank_, power_, depth_, 'Raw Counts',
                     'F-Statistic')] = [rawfres]

            # KNN
            for U_tmp, method in zip([U, U_], ['rclr', 'Raw Counts']):
                pcoa_tmp = pcoa(DistanceMatrix(distance.cdist(U_tmp,
                                                              U_tmp))).samples
                pcoa_tmp.index = subtmp_sub.index
                # split
                X_train, X_test, y_train, y_test = train_test_split(
                    pcoa_tmp,
                    meta['group'].ravel(),

Ejemplo n.º 18

0

Mostrar archivo

perm_res = {}
perm_res_tmp = {}
for dataset_, subs in distances.items():
    perm_res[dataset_] = {}
    perm_res_tmp[dataset_] = {}
    for (fold_, Nsamp_), methods_ in subs.items():
        meta_ = meta[dataset_][(fold_, Nsamp_)]['metadata']
        if len(meta_.index) < Nsamp_:
            continue
        perm_res[dataset_][(fold_, Nsamp_)] = {}
        perm_res_tmp[dataset_][(fold_, Nsamp_)] = {}
        for method, dist_tmp in methods_.items():
            perm_res[dataset_][(fold_, Nsamp_)][method] = {}
            dist_tmp = DistanceMatrix(dist_tmp)
            perm_tmp = permanova(
                dist_tmp, meta[dataset_][(fold_, Nsamp_)]['metadata'][
                    case_study[dataset_]['factor']].values)
            perm_res[dataset_][(
                fold_,
                Nsamp_)][method]['test statistic'] = perm_tmp['test statistic']
            perm_res_tmp[dataset_][(fold_, Nsamp_)] = pd.DataFrame(
                perm_res[dataset_][(fold_, Nsamp_)])

    both_perm_res[dataset_] = pd.concat(perm_res_tmp[dataset_])

# run calssiification
import warnings
warnings.simplefilter('ignore')  #for PCoA warnings
from skbio.stats.ordination import pcoa
from sklearn import metrics
from sklearn.cluster import KMeans

Ejemplo n.º 19

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: squarednob/scikit-bio

 def test_call_no_permutations(self):
     exp = pd.Series(index=self.exp_index,
                     data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0])
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: Test_6_permanova.py Proyecto: songweizhi/FlowCellBiofilm

        sample_id = each_sample_split[0]
        sample_group = each_sample_split[1]
        sample_id_list.append(sample_id)
        sample_group_list.append(sample_group)

# read in data as dataframe
df = pd.read_csv(infile_data, sep='\t')

# get list of list from dataframe
lol_data_in = []
for col_id in sample_id_list:
    column_num_list = (df[col_id].values).tolist()
    lol_data_in.append(column_num_list)

# calculate distance matrix
dist_arrary = pairwise_distances(lol_data_in,
                                 lol_data_in,
                                 metric=distance_metric)

# add sample id to distance matrix
dist_matrix = DistanceMatrix(dist_arrary, sample_id_list)

# perform anosim test
anosim_test = anosim(dist_matrix, sample_group_list, permutations=999)
print(anosim_test)
print()

# perform permanova test
permanova_test = permanova(dist_matrix, sample_group_list, permutations=999)
print(permanova_test)

Ejemplo n.º 21

0

Mostrar archivo

        text=
        "ATTENTION: At least 1 of your eigenvalues is negative, potentially leading to problems! You may want to choose another metric for distance calculation or apply data transformation on the distance matrix (e.g. square root) to get rid of this problem."
    )

eig_dm = pd.DataFrame(pc.eigvals, columns=["Eigenvalue"])
eig_dm["Explained"] = pc.proportion_explained
eig_dm["Summed_explanation"] = pc.proportion_explained.cumsum()
if metric == "minkowski":
    eig_dm.to_csv("eigenvalues_" + mname + "_p" + str(p) + ".txt", sep="\t")
else:
    eig_dm.to_csv("eigenvalues_" + mname + ".txt", sep="\t")

#Statistics

anos = anosim(div, map_DF, column=var, permutations=999)
perm = permanova(div, map_DF, column=var, permutations=999)

if metric == "minkowski":
    stat_file = "statistics_" + mname + "_p" + str(p) + "_" + var + ".txt"
else:
    stat_file = "statistics_" + mname + "_" + var + ".txt"

with open(stat_file, "w") as st:
    st.write("ANOSIM\tPermutations: 999\n\n")
    st.write("R\t" + str(anos["test statistic"]) + "\n")
    st.write("p-value\t" + str(anos["p-value"]) + "\n\n")
    st.write("PERMANOVA\tPermutations: 999\n\n")
    st.write("F\t" + str(perm["test statistic"]) + "\n")
    st.write("p-value\t" + str(perm["p-value"]) + "\n\n")

end = time.time()

Ejemplo n.º 22

0

Mostrar archivo

Archivo: test_permanova.py Proyecto: RNAer/scikit-bio

 def test_call_no_permutations(self):
     exp = pd.Series(
         index=self.exp_index, data=["PERMANOVA", "pseudo-F", 4, 2, 4.4, np.nan, 0], name="PERMANOVA results"
     )
     obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0)
     self.assert_series_equal(obs, exp)

Ejemplo n.º 23

0

Mostrar archivo

for a in range(len(rows[0])):
    if a > 0:
        this_sample = []
        for b in range(len(rows)):
            if b > 0:
                this_sample.append(float(rows[b][a]))
        samples.append(this_sample)
"""
only_samples = ['LR', 'SR']
new_samples, new_names = [], []
for a in range(len(sample_names)):
    for b in range(len(only_samples)):
        if sample_names[a] == only_samples[b]:
            new_samples.append(samples[a])
            new_names.append(sample_names[a])
samples = new_samples
sample_names = new_names
print(len(samples), len(sample_names))
"""

sam_dm = dm.from_iterable(samples, metric=braycurtis)
pdisp = permdisp(sam_dm,
                 sample_names,
                 column=None,
                 test='median',
                 permutations=999)
print(pdisp)
asim = anosim(sam_dm, sample_names, column=None, permutations=999)
print(asim)
perm = permanova(sam_dm, sample_names, column=None, permutations=999)
print(perm)