def test_not_a_pcoa(self):
     self.ordination.short_method_name = 'RDA'
     self.ordination.long_method_name = 'Redundancy Analysis'
     with self.assertRaisesRegex(
             ValueError, 'This biplot computation can'
             ' only be performed in a PCoA matrix.'):
         pcoa_biplot(self.ordination, self.descriptors)
 def test_not_a_pcoa(self):
     self.ordination.short_method_name = 'RDA'
     self.ordination.long_method_name = 'Redundancy Analysis'
     with self.assertRaisesRegex(ValueError, 'This biplot computation can'
                                             ' only be performed in a '
                                             'PCoA matrix.'):
         pcoa_biplot(self.ordination, self.descriptors)
    def test_mismatching_samples(self):
        new_index = self.descriptors.index.tolist()
        new_index[3] = 'Not.an.id'
        self.descriptors.index = pd.Index(new_index)

        with self.assertRaisesRegex(
                ValueError, 'The eigenvectors and the '
                'descriptors must describe the same '
                'samples.'):
            pcoa_biplot(self.ordination, self.descriptors)
    def test_mismatching_samples(self):
        new_index = self.descriptors.index.tolist()
        new_index[3] = 'Not.an.id'
        self.descriptors.index = pd.Index(new_index)

        with self.assertRaisesRegex(ValueError, 'The eigenvectors and the '
                                                'descriptors must describe '
                                                'the same '
                                                'samples.'):
            pcoa_biplot(self.ordination, self.descriptors)
Exemple #5
0
    def test_from_seralized_results(self):
        # the current implementation of ordination results loses some
        # information, test that pcoa_biplot works fine regardless
        results = OrdinationResults.read(get_data_path('PCoA_skbio'))

        serialized = pcoa_biplot(results, self.descriptors)
        in_memory = pcoa_biplot(self.ordination, self.descriptors)

        assert_ordination_results_equal(serialized, in_memory,
                                        ignore_directionality=True,
                                        ignore_axis_labels=True,
                                        ignore_method_names=True)
    def test_pcoa_biplot_subset_input(self):
        # create a 2D copy of the full ordination
        two_dims = deepcopy(self.ordination)
        two_dims.eigvals = two_dims.eigvals[:2]
        two_dims.samples = two_dims.samples.iloc[:, :2]
        two_dims.proportion_explained = two_dims.proportion_explained[:2]

        # only look at the features
        subset = pcoa_biplot(two_dims, self.descriptors).features
        full = pcoa_biplot(self.ordination, self.descriptors).features

        # the biplot should be identical regardless of the number of axes used
        pd.util.testing.assert_almost_equal(subset, full.iloc[:, :2])
    def test_pcoa_biplot_subset_input(self):
        # create a 2D copy of the full ordination
        two_dims = deepcopy(self.ordination)
        two_dims.eigvals = two_dims.eigvals[:2]
        two_dims.samples = two_dims.samples.iloc[:, :2]
        two_dims.proportion_explained = two_dims.proportion_explained[:2]

        # only look at the features
        subset = pcoa_biplot(two_dims, self.descriptors).features
        full = pcoa_biplot(self.ordination, self.descriptors).features

        # the biplot should be identical regardless of the number of axes used
        pd.util.testing.assert_almost_equal(subset, full.iloc[:, :2])
    def test_pcoa_biplot_from_ape(self):
        """Test against a reference implementation from R's ape package

        The test data was generated with the R script below and using a
        modified version of pcoa.biplot that returns the U matrix.

        library(ape)
        # files can be found in the test data folder of the ordination module
        y = t(read.table('PCoA_biplot_descriptors', row.names = 1, header = 1))
        dm = read.table('PCoA_sample_data_3', row.names = 1, header = 1)

        h = pcoa(dm)

        # biplot.pcoa will only calculate the biplot for two axes at a time
        acc = NULL
        for (axes in c(1, 3, 5, 7)) {
            new = biplot.pcoa(h, y, plot.axes=c(axes, axes+1),
                              rn = rep('.', length(colnames(dm))) )

            if(is.null(acc)) {
                acc = new
            }
            else {
                b = acc
                acc <- cbind(acc, new)
            }
        }
        write.csv(acc, file='PCoA_biplot_projected_descriptors')
        """
        obs = pcoa_biplot(self.ordination, self.descriptors)

        # we'll build a dummy ordination results object based on the expected
        # the main thing we'll compare and modify is the features dataframe
        exp = deepcopy(obs)

        fp = get_data_path('PCoA_biplot_projected_descriptors')
        # R won't calculate the last dimension, so pad with zeros to make the
        # arrays comparable
        exp.features = pd.read_table(fp, sep=',', index_col=0)
        exp.features['Axis.9'] = np.zeros_like(exp.features['Axis.8'])

        # make the order comparable
        exp.features = exp.features.reindex(obs.features.index)

        assert_ordination_results_equal(obs,
                                        exp,
                                        ignore_directionality=True,
                                        ignore_axis_labels=True)
    def test_pcoa_biplot_from_ape(self):
        """Test against a reference implementation from R's ape package

        The test data was generated with the R script below and using a
        modified version of pcoa.biplot that returns the U matrix.

        library(ape)
        # files can be found in the test data folder of the ordination module
        y = t(read.table('PCoA_biplot_descriptors', row.names = 1, header = 1))
        dm = read.table('PCoA_sample_data_3', row.names = 1, header = 1)

        h = pcoa(dm)

        # biplot.pcoa will only calculate the biplot for two axes at a time
        acc = NULL
        for (axes in c(1, 3, 5, 7)) {
            new = biplot.pcoa(h, y, plot.axes=c(axes, axes+1),
                              rn = rep('.', length(colnames(dm))) )

            if(is.null(acc)) {
                acc = new
            }
            else {
                b = acc
                acc <- cbind(acc, new)
            }
        }
        write.csv(acc, file='PCoA_biplot_projected_descriptors')
        """
        obs = pcoa_biplot(self.ordination, self.descriptors)

        # we'll build a dummy ordination results object based on the expected
        # the main thing we'll compare and modify is the features dataframe
        exp = deepcopy(obs)

        fp = get_data_path('PCoA_biplot_projected_descriptors')
        # R won't calculate the last dimension, so pad with zeros to make the
        # arrays comparable
        exp.features = pd.read_table(fp, sep=',', index_col=0)
        exp.features['Axis.9'] = np.zeros_like(exp.features['Axis.8'])

        # make the order comparable
        exp.features = exp.features.reindex(obs.features.index)

        assert_ordination_results_equal(obs, exp, ignore_directionality=True,
                                        ignore_axis_labels=True)
 def _build_model(self, dism, data, base_model):
     distance_matrix = DistanceMatrix(data=dism.values, ids=dism.nodes)
     ordination_results = base_model(distance_matrix)
     if data is not None:
         ordination_results = pcoa_biplot(ordination_results, data)
     return ordination_results