def test_scaling2(self): eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids) # p. 460 L&L 1998 features = pd.DataFrame( np.array([ [0.40887, -0.06955], # F_hat [-0.11539, 0.29977], [-0.30997, -0.18739] ]), self.feature_ids, self.pc_ids) samples = pd.DataFrame( np.array([ [-0.84896, -0.88276], # V_hat [-0.22046, 1.34482], [1.66697, -0.47032] ]), self.sample_ids, self.pc_ids) exp = OrdinationResults('CA', 'Correspondance Analysis', eigvals=eigvals, features=features, samples=samples) scores = ca(self.contingency, 2) assert_ordination_results_equal(exp, scores, decimal=5, ignore_directionality=True)
def test_scaling1(self): eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids) # p. 458 features = pd.DataFrame( np.array([ [1.31871, -0.34374], # V [-0.37215, 1.48150], [-0.99972, -0.92612] ]), self.feature_ids, self.pc_ids) samples = pd.DataFrame( np.array([ [-0.26322, -0.17862], # F [-0.06835, 0.27211], [0.51685, -0.09517] ]), self.sample_ids, self.pc_ids) exp = OrdinationResults('CA', 'Correspondance Analysis', eigvals=eigvals, features=features, samples=samples) scores = ca(self.contingency, 1) assert_ordination_results_equal(exp, scores, decimal=5, ignore_directionality=True)
def test_scaling2(self): eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids) # p. 460 L&L 1998 features = pd.DataFrame(np.array([[0.40887, -0.06955], # F_hat [-0.11539, 0.29977], [-0.30997, -0.18739]]), self.feature_ids, self.pc_ids) samples = pd.DataFrame(np.array([[-0.84896, -0.88276], # V_hat [-0.22046, 1.34482], [1.66697, -0.47032]]), self.sample_ids, self.pc_ids) proportion_explained = pd.Series(np.array([0.701318, 0.298682]), self.pc_ids) exp = OrdinationResults('CA', 'Correspondance Analysis', eigvals=eigvals, features=features, samples=samples, proportion_explained=proportion_explained) scores = ca(self.contingency, 2) assert_ordination_results_equal(exp, scores, decimal=5, ignore_directionality=True)
def test_maintain_chi_square_distance_scaling2(self): """In scaling 2, chi^2 distance among columns (features) is equal to euclidean distance between them in transformed space.""" frequencies = self.X / self.X.sum() chi2_distances = chi_square_distance(frequencies, between_rows=False) transformed_species = ca(self.contingency, 2).features.values euclidean_distances = pdist(transformed_species, 'euclidean') npt.assert_almost_equal(chi2_distances, euclidean_distances)
def test_maintain_chi_square_distance_scaling1(self): """In scaling 1, chi^2 distance among rows (samples) is equal to euclidean distance between them in transformed space.""" frequencies = self.X / self.X.sum() chi2_distances = chi_square_distance(frequencies) transformed_sites = ca(self.contingency, 1).samples.values euclidean_distances = pdist(transformed_sites, 'euclidean') npt.assert_almost_equal(chi2_distances, euclidean_distances)
def test_scaling1(self): eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids) # p. 458 features = pd.DataFrame(np.array([[1.31871, -0.34374], # V [-0.37215, 1.48150], [-0.99972, -0.92612]]), self.feature_ids, self.pc_ids) samples = pd.DataFrame(np.array([[-0.26322, -0.17862], # F [-0.06835, 0.27211], [0.51685, -0.09517]]), self.sample_ids, self.pc_ids) exp = OrdinationResults('CA', 'Correspondance Analysis', eigvals=eigvals, features=features, samples=samples) scores = ca(self.contingency, 1) assert_ordination_results_equal(exp, scores, decimal=5, ignore_directionality=True)
alabels = [ "%4.2f" % pca.explained_variance_[0], "%4.2f" % pca.explained_variance_[1] ] lcoords = [coords[:, 0].tolist(), coords[:, 1].tolist()] elif fmethod.find("PCoA") != -1: M = pd.DataFrame(np.array(cdata), site_ids, site_ids) ordres = ordination.pcoa(M) alabels = [ "%5.3f" % ordres.eigvals["PC1"], "%5.3f" % ordres.eigvals["PC2"] ] lcoords = [ordres.samples["PC1"].tolist(), ordres.samples["PC2"].tolist()] elif fmethod == "CA (skbio)": M = pd.DataFrame(np.array(cdata), site_ids, site_ids) ordres = ordination.ca(M) alabels = [ "%5.3f" % ordres.eigvals["CA1"], "%5.3f" % ordres.eigvals["CA2"] ] lcoords = [ordres.samples["CA1"].tolist(), ordres.samples["CA2"].tolist()] print """ <style> .axis line, .axis path { shape-rendering: crispEdges; stroke: black; fill: none; } circle {
def test_negative(self): X = np.array([[1, 2], [-0.1, -2]]) with npt.assert_raises(ValueError): ca(pd.DataFrame(X))