예제 #1
0
    def test_evaluate_multilabel_classification(self):
        X_train, Y_train, X_test, Y_test = get_dataset('iris')
        Y_train = np.array(convert_to_bin(Y_train, 3))
        Y_train[:, -1] = 1
        Y_test = np.array(convert_to_bin(Y_test, 3))
        Y_test[:, -1] = 1

        X_valid = X_test[:25, ]
        Y_valid = Y_test[:25, ]
        X_test = X_test[25:, ]
        Y_test = Y_test[25:, ]

        D = Dummy()
        D.info = {
            'metric': F1_METRIC,
            'task': MULTILABEL_CLASSIFICATION,
            'is_sparse': False,
            'label_num': 3
        }
        D.data = {
            'X_train': X_train,
            'Y_train': Y_train,
            'X_valid': X_valid,
            'X_test': X_test
        }
        D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']

        configuration_space = get_configuration_space(
            D.info,
            include_estimators=['extra_trees'],
            include_preprocessors=['no_preprocessing'])

        err = np.zeros([N_TEST_RUNS])
        for i in range(N_TEST_RUNS):
            print('Evaluate configuration: %d; result:' % i)
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = HoldoutEvaluator(D_, configuration)
            if not self._fit(evaluator):
                continue
            err[i] = evaluator.predict()
            print(err[i])

            self.assertTrue(np.isfinite(err[i]))
            self.assertGreaterEqual(err[i], 0.0)
예제 #2
0
    def test_evaluate_multilabel_classification(self):
        X_train, Y_train, X_test, Y_test = get_dataset('iris')
        Y_train = np.array(convert_to_bin(Y_train, 3))
        Y_train[:, -1] = 1
        Y_test = np.array(convert_to_bin(Y_test, 3))
        Y_test[:, -1] = 1

        X_valid = X_test[:25, ]
        Y_valid = Y_test[:25, ]
        X_test = X_test[25:, ]
        Y_test = Y_test[25:, ]

        D = Dummy()
        D.info = {
            'metric': F1_METRIC,
            'task': MULTILABEL_CLASSIFICATION,
            'is_sparse': False,
            'label_num': 3
        }
        D.data = {
            'X_train': X_train,
            'Y_train': Y_train,
            'X_valid': X_valid,
            'X_test': X_test
        }
        D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']

        configuration_space = get_configuration_space(
            D.info,
            include_estimators=['extra_trees'],
            include_preprocessors=['no_preprocessing'])

        err = np.zeros([N_TEST_RUNS])
        for i in range(N_TEST_RUNS):
            print('Evaluate configuration: %d; result:' % i)
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = HoldoutEvaluator(D_, configuration)
            if not self._fit(evaluator):
                continue
            err[i] = evaluator.predict()
            print(err[i])

            self.assertTrue(np.isfinite(err[i]))
            self.assertGreaterEqual(err[i], 0.0)
def get_multilabel_classification_datamanager():
    X_train, Y_train, X_test, Y_test = get_dataset('iris')
    indices = list(range(X_train.shape[0]))
    np.random.seed(1)
    np.random.shuffle(indices)
    X_train = X_train[indices]
    Y_train = Y_train[indices]

    Y_train = np.array(convert_to_bin(Y_train, 3))
    # for i in range(Y_train_.shape[0]):
    #    Y_train_[:, Y_train[i]] = 1
    # Y_train = Y_train_
    Y_test = np.array(convert_to_bin(Y_test, 3))
    # for i in range(Y_test_.shape[0]):
    #    Y_test_[:, Y_test[i]] = 1
    # Y_test = Y_test_

    X_valid = X_test[:25, ]
    Y_valid = Y_test[:25, ]
    X_test = X_test[25:, ]
    Y_test = Y_test[25:, ]

    D = Dummy()
    D.info = {
        'task': MULTILABEL_CLASSIFICATION,
        'is_sparse': False,
        'label_num': 3
    }
    D.data = {
        'X_train': X_train,
        'Y_train': Y_train,
        'X_valid': X_valid,
        'Y_valid': Y_valid,
        'X_test': X_test,
        'Y_test': Y_test
    }
    D.feat_type = {
        0: 'numerical',
        1: 'Numerical',
        2: 'numerical',
        3: 'numerical'
    }
    return D
예제 #4
0
def get_multilabel_classification_datamanager():
    X_train, Y_train, X_test, Y_test = get_dataset('iris')
    indices = list(range(X_train.shape[0]))
    np.random.seed(1)
    np.random.shuffle(indices)
    X_train = X_train[indices]
    Y_train = Y_train[indices]

    Y_train = np.array(convert_to_bin(Y_train, 3))
    #for i in range(Y_train_.shape[0]):
    #    Y_train_[:, Y_train[i]] = 1
    #Y_train = Y_train_
    Y_test = np.array(convert_to_bin(Y_test, 3))
    #for i in range(Y_test_.shape[0]):
    #    Y_test_[:, Y_test[i]] = 1
    #Y_test = Y_test_

    X_valid = X_test[:25, ]
    Y_valid = Y_test[:25, ]
    X_test = X_test[25:, ]
    Y_test = Y_test[25:, ]

    D = Dummy()
    D.info = {
        'metric': ACC_METRIC,
        'task': MULTILABEL_CLASSIFICATION,
        'is_sparse': False,
        'label_num': 3
    }
    D.data = {
        'X_train': X_train,
        'Y_train': Y_train,
        'X_valid': X_valid,
        'Y_valid': Y_valid,
        'X_test': X_test,
        'Y_test': Y_test
    }
    D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical']
    return D
    def test_evaluate_multilabel_classification(self):
        X_train, Y_train, X_test, Y_test = get_dataset("iris")
        Y_train = np.array(convert_to_bin(Y_train, 3))
        Y_train[:, -1] = 1
        Y_test = np.array(convert_to_bin(Y_test, 3))
        Y_test[:, -1] = 1

        X_valid = X_test[:25,]
        Y_valid = Y_test[:25,]
        X_test = X_test[25:,]
        Y_test = Y_test[25:,]

        D = Dummy()
        D.info = {"metric": F1_METRIC, "task": MULTILABEL_CLASSIFICATION, "is_sparse": False, "label_num": 3}
        D.data = {"X_train": X_train, "Y_train": Y_train, "X_valid": X_valid, "X_test": X_test}
        D.feat_type = ["numerical", "Numerical", "numerical", "numerical"]

        configuration_space = get_configuration_space(
            D.info, include_estimators=["extra_trees"], include_preprocessors=["no_preprocessing"]
        )

        err = np.zeros([N_TEST_RUNS])
        for i in range(N_TEST_RUNS):
            print("Evaluate configuration: %d; result:" % i)
            configuration = configuration_space.sample_configuration()
            D_ = copy.deepcopy(D)
            evaluator = HoldoutEvaluator(D_, configuration)
            if not self._fit(evaluator):
                continue
            err[i] = evaluator.predict()
            print(err[i])

            self.assertTrue(np.isfinite(err[i]))
            self.assertGreaterEqual(err[i], 0.0)

        print("Number of times it was worse than random guessing:" + str(np.sum(err > 1)))