Beispiel #1
0
def load_data_cache(use_test=False):
    # Random state
    random_state = np.random.RandomState(seed=42)

    if use_test:
        print("!!! USING TEST DATA !!!")
        (X_train, y_train), (X_valid,
                             y_valid), (X_test, y_test) = airlines.load_data(
                                 random_state=random_state,
                                 test_size=0.33,
                                 valid_size=0.33 * (1 - 0.33))
        X_train = np.concatenate([X_train, X_valid])
        y_train = np.concatenate([y_train, y_valid])
        X_valid, y_valid = X_test, y_test
    else:
        (X_train, y_train), (X_valid, y_valid), _ = airlines.load_data(
            random_state=random_state,
            test_size=0.33,
            valid_size=0.33 * (1 - 0.33))

    prepro_output = preprocessing.OneHotEncoder()
    y_train = y_train.reshape(-1, 1)
    y_valid = y_valid.reshape(-1, 1)
    y_train = prepro_output.fit_transform(y_train).toarray()
    y_valid = prepro_output.transform(y_valid).toarray()

    prepro_input = minmaxstdscaler()
    X_train = prepro_input.fit_transform(X_train)
    X_valid = prepro_input.transform(X_valid)

    print(f"X_train shape: {np.shape(X_train)}")
    print(f"y_train shape: {np.shape(y_train)}")
    print(f"X_valid shape: {np.shape(X_valid)}")
    print(f"y_valid shape: {np.shape(y_valid)}")
    return (X_train, y_train), (X_valid, y_valid)
Beispiel #2
0
def test_config(config):
    import numpy as np
    from sklearn.utils import check_random_state
    from sklearn.ensemble import RandomForestClassifier
    from deephyper.benchmark.datasets import airlines as dataset

    rs_data = np.random.RandomState(seed=42)

    ratio_test = 0.33
    ratio_valid = (1 - ratio_test) * 0.33

    train, valid, test = dataset.load_data(random_state=rs_data,
                                           test_size=ratio_test,
                                           valid_size=ratio_valid)

    rs_classifier = check_random_state(42)

    classifier = RandomForestClassifier(n_jobs=8,
                                        random_state=rs_classifier,
                                        **config)
    classifier.fit(*train)

    acc_train = classifier.score(*train)
    acc_valid = classifier.score(*valid)
    acc_test = classifier.score(*test)

    print(f"Accuracy on Training: {acc_train:.3f}")
    print(f"Accuracy on Validation: {acc_valid:.3f}")
    print(f"Accuracy on Testing: {acc_test:.3f}")
Beispiel #3
0
def load_data():

    # In this case passing a random state is critical to make sure
    # that the same data are loaded all the time and that the test set
    # is not mixed with either the training or validation set.
    # It is important to not avoid setting a global seed for safety reasons.
    random_state = np.random.RandomState(seed=42)

    # Proportion of the test set on the full dataset
    ratio_test = 0.33

    # Proportion of the valid set on "dataset \ test set"
    # here we want the test and validation set to have same number of elements
    ratio_valid = (1 - ratio_test) * 0.33

    # The 3rd result is ignored with "_" because it corresponds to the test set
    # which is not interesting for us now.
    (X_train,
     y_train), (X_valid,
                y_valid), _ = dataset.load_data(random_state=random_state,
                                                test_size=ratio_test,
                                                valid_size=ratio_valid)

    # Uncomment the next line if you want to sub-sample the training data to speed-up
    # the search, "n_samples" controls the size of the new training data
    # X_train, y_train = resample(X_train, y_train, n_samples=int(1e4))

    print(f"X_train shape: {np.shape(X_train)}")
    print(f"y_train shape: {np.shape(y_train)}")
    print(f"X_valid shape: {np.shape(X_valid)}")
    print(f"y_valid shape: {np.shape(y_valid)}")
    return (X_train, y_train), (X_valid, y_valid)
Beispiel #4
0
def test_load_data_airlines():
    from deephyper.benchmark.datasets import airlines
    import numpy as np

    names = ["train", "valid", "test "]
    data = airlines.load_data(random_state=42, summary=True)
    for (X, y), subset_name in zip(data, names):
        print(
            f"X_{subset_name} shape: ",
            np.shape(X),
            f", y_{subset_name} shape: ",
            np.shape(y),
        )
Beispiel #5
0
from dhproj.advanced_hpo.mapping import CLASSIFIERS
from deephyper.benchmark.datasets import airlines as dataset
from sklearn.utils import check_random_state

rs_clf = check_random_state(42)

rs_data = check_random_state(42)

ratio_test = 0.33
ratio_valid = (1 - ratio_test) * 0.33

train, valid, test = dataset.load_data(
    random_state=rs_data,
    test_size=ratio_test,
    valid_size=ratio_valid,
)

for clf_name, clf_class in CLASSIFIERS.items():
    print(clf_name)

    clf = clf_class(random_state=rs_clf)

    clf.fit(*train)

    acc_train = clf.score(*train)
    acc_valid = clf.score(*valid)
    acc_test = clf.score(*test)

    print(f"Accuracy on Training: {acc_train:.3f}")
    print(f"Accuracy on Validation: {acc_valid:.3f}")
    print(f"Accuracy on Testing: {acc_test:.3f}\n")
Beispiel #6
0
 def load_data():
     train, valid, _ = airlines.load_data(random_state=42)
     return train, valid