Ejemplo n.º 1
0
def main():  # pragma: no cover
    # pylint: disable=import-outside-toplevel
    from Orange.evaluation import ShuffleSplit

    data = Table("brown-selected")

    if not "test_rows":  # change to `if not "test_rows" to test columns
        data = append_column(data, "M", StringVariable("Test"),
                             (np.arange(len(data)).reshape(-1, 1) %
                              30).astype(str))
        res = ShuffleSplit(n_resamples=5, test_size=0.7, stratified=False)
        indices = iter(res.get_indices(data))
        datasets = []
        for i in range(1, 6):
            sample, _ = next(indices)
            data1 = data[sample]
            data1.name = chr(ord("A") + i)
            datasets.append((data1, i))
    else:
        domain = data.domain
        data1 = data.transform(Domain(domain.attributes[:15],
                                      domain.class_var))
        data2 = data.transform(Domain(domain.attributes[10:],
                                      domain.class_var))
        datasets = [(data1, 1), (data2, 2)]

    WidgetPreview(OWVennDiagram).run(setData=datasets)
Ejemplo n.º 2
0
 def test_results(self):
     data = self.random_table
     train_size, n_resamples = 0.6, 10
     res = ShuffleSplit(data, [NaiveBayesLearner()], train_size=train_size,
                        test_size=1 - train_size, n_resamples=n_resamples)
     self.assertEqual(len(res.predicted[0]),
                      n_resamples * self.nrows * (1 - train_size))
 def test_init(self):
     res = ShuffleSplit(n_resamples=1, train_size=0.1, test_size=0.2,
                        stratified=False, random_state=42)
     self.assertEqual(res.n_resamples, 1)
     self.assertEqual(res.train_size, 0.1)
     self.assertEqual(res.test_size, 0.2)
     self.assertFalse(res.stratified)
     self.assertEqual(res.random_state, 42)
Ejemplo n.º 4
0
 def test_results(self):
     nrows, ncols = 100, 10
     data = random_data(nrows, ncols)
     train_size, n_resamples = 0.6, 10
     res = ShuffleSplit(data, [NaiveBayesLearner()], train_size=train_size,
                        test_size=1 - train_size, n_resamples=n_resamples)
     self.assertEqual(len(res.predicted[0]),
                      n_resamples * nrows * (1 - train_size))
    def test_stratified(self):
        # strata size
        n = 50
        res = ShuffleSplit(
            train_size=.5, test_size=.5, n_resamples=3, stratified=True,
            random_state=0)(self.iris, [NaiveBayesLearner()])

        for fold in res.folds:
            self.assertEqual(np.count_nonzero(res.row_indices[fold] < n), n // 2)
            self.assertEqual(np.count_nonzero(res.row_indices[fold] < 2 * n), n)
Ejemplo n.º 6
0
    def test_not_stratified(self):
        # strata size
        n = 50
        res = ShuffleSplit(self.iris, [NaiveBayesLearner()],
                           train_size=.5, test_size=.5,
                           n_resamples=3, stratified=False, random_state=0)

        strata_samples = []
        for train, test in res.indices:
            strata_samples.append(np.count_nonzero(train < n) == n/2)
            strata_samples.append(np.count_nonzero(train < 2 * n) == n)

        self.assertTrue(not all(strata_samples))
    def test_not_stratified(self):
        # strata size
        n = 50
        res = ShuffleSplit(
            train_size=.5, test_size=.5, n_resamples=3, stratified=False,
            random_state=0)(self.iris, [NaiveBayesLearner()])

        strata_samples = []
        for fold in res.folds:
            strata_samples += [
                np.count_nonzero(res.row_indices[fold] < n) == n // 2,
                np.count_nonzero(res.row_indices[fold] < 2 * n) == n]

        self.assertTrue(not all(strata_samples))