Example #1
0
    def _train_classifier(self, X, Y, num_splits=5):
        """
        Trains a classifier if we have less than `total_classifiers` number of models,
        else simply returns.

        # Arguments:
            X (np.ndarray): A numpy array representing all of the encoded samples that
                 will be used for training the classifiers.
            Y (np.ndarray): A numpy array representing all of the encoded evaluations
                 that will be used for training the classifiers.
            num_splits (int): number of splits to perform cross validated training.
                Useful if we have sufficient samples for training a cross validated
                model per batch, else set to 1 (do not cross validate).

        # Returns:
            A trained model or None
        """
        if len(self.classifiers) < self.total_classifiers:
            model = xgb_utils.train_single_model(X,
                                                 Y,
                                                 num_splits=num_splits,
                                                 n_jobs=self.num_workers)
            return model
        else:
            return None
Example #2
0
def test_evaluate_train_evaluate():
    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    dataset = data.Dataset(h)

    # models
    clfs = []

    # fit samples
    num_samples = 16
    for i in range(3):
        samples = [h.sample() for _ in range(num_samples)]
        labels = [np.sum(sample) for sample in samples]
        x, y = samples, labels
        x, y = dataset.encode_dataset(x, y)
        model = xgb_utils.train_single_model(x, y)
        clfs.append(model)

    # test samples
    num_samples = 100
    samples = [h.sample() for _ in range(num_samples)]
    ex2, _ = dataset.encode_dataset(samples, None)

    preds = xgb_utils.evaluate_models(ex2, clfs)
    count = np.sum(preds)

    print(count)
    assert preds.shape == (num_samples,)
    assert count > 0
Example #3
0
def test_serialization_deserialization():
    basepath = 'shac'

    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    dataset = data.Dataset(h)

    # models
    clfs = []

    # fit samples
    num_samples = 16
    for i in range(3):
        samples = [h.sample() for _ in range(num_samples)]
        labels = [np.sum(sample) for sample in samples]
        x, y = samples, labels
        x, y = dataset.encode_dataset(x, y)
        model = xgb_utils.train_single_model(x, y)
        clfs.append(model)

    xgb_utils.save_classifiers(clfs, basepath)
    assert os.path.exists(os.path.join(basepath, 'classifiers', 'classifiers.pkl'))

    models = xgb_utils.restore_classifiers(basepath)
    assert len(models) == len(clfs)

    with pytest.raises(FileNotFoundError):
        models = xgb_utils.restore_classifiers('none')
Example #4
0
def test_evaluate_single_sample():
    params = get_hyperparameter_list()
    h = hp.HyperParameterList(params)

    dataset = data.Dataset(h)

    # models
    clfs = []

    # fit samples
    num_samples = 16
    for i in range(3):
        samples = [h.sample() for _ in range(num_samples)]
        labels = [np.sum(sample) for sample in samples]
        x, y = samples, labels
        x, y = dataset.encode_dataset(x, y)
        model = xgb_utils.train_single_model(x, y)
        clfs.append(model)

    # single sample test
    sample = h.sample()
    ex2, _ = dataset.encode_dataset([sample])

    assert ex2.shape == (1, 3)

    pred = xgb_utils.evaluate_models(ex2, clfs)
    assert pred.shape == (1,)
Example #5
0
def test_evaluate_train_evaluate_failure():
    params = [hp.DiscreteHyperParameter('h%d' % i, [0]) for i in range(3)]
    h = hp.HyperParameterList(params)

    dataset = data.Dataset(h)

    # models
    clfs = []

    # fit samples
    num_samples = 16
    for i in range(3):
        samples = [h.sample() for _ in range(num_samples)]
        labels = [np.sum(sample) for sample in samples]
        x, y = samples, labels
        x, y = dataset.encode_dataset(x, y)
        model = xgb_utils.train_single_model(x, y)
        clfs.append(model)

    # test samples
    for model in clfs:
        assert model is None