def test_default_works(self): """ `SKPlumber.fit`'s default evaluator should work out of the box. """ plumber = SKPlumber("classification", 1) X, y = load_dataset("iris") plumber.fit(X, y)
def load_diabetes_dataset(): logging.info("Loading Diabetes dataset") (x_train_diabetes, y_train_diabetes), (x_test_diabetes, y_test_diabetes), _, _ = load_dataset("diabetes") yield (x_train_diabetes, y_train_diabetes), (x_test_diabetes, y_test_diabetes)
def test_can_do_train_test(self): """ The evaluator returned by `make_train_test_evaluator` should work, and should cupport custom test size. """ plumber = SKPlumber("classification", 1, evaluator=make_train_test_evaluator(0.2)) X, y = load_dataset("iris") plumber.fit(X, y)
def test_can_run(self) -> None: X, y = load_dataset("iris") # Should be able to run with the most basic configuration plumber = SKPlumber("classification", 1) plumber.fit(X, y) # Should be able to run using a non-default metric plumber = SKPlumber("classification", 1, metric="f1macro") plumber.fit(X, y)
def test_can_do_down_sample_evaluation(self): """ The evaluator returned by `make_down_sample_evaluator` should work, and should cupport custom test size. """ plumber = SKPlumber("classification", 1, evaluator=make_down_sample_evaluator(0.8, 0.2)) X, y = load_dataset("iris") # Should be able to do down-sampled train/test validation. plumber.fit(X, y)
def test_can_do_k_fold_cv(self): """ The evaluator returned by `make_kfold_evaluator` should work, and should cupport custom number of folds. """ plumber = SKPlumber("classification", 1, evaluator=make_kfold_evaluator(3)) X, y = load_dataset("iris") # Should be able to do k-fold cross validation. plumber.fit(X, y)
def create_scikit_model_weights(): master_seed(1234) model_list = { "decisionTreeClassifier": DecisionTreeClassifier(), "extraTreeClassifier": ExtraTreeClassifier(), "adaBoostClassifier": AdaBoostClassifier(), "baggingClassifier": BaggingClassifier(), "extraTreesClassifier": ExtraTreesClassifier(n_estimators=10), "gradientBoostingClassifier": GradientBoostingClassifier(n_estimators=10), "randomForestClassifier": RandomForestClassifier(n_estimators=10), "logisticRegression": LogisticRegression(solver='lbfgs', multi_class='auto'), "svc": SVC(gamma='auto'), "linearSVC": LinearSVC() } clipped_models = { model_name: SklearnClassifier(model=model, clip_values=(0, 1)) for model_name, model in model_list.items() } unclipped_models = { model_name: SklearnClassifier(model=model) for model_name, model in model_list.items() } (x_train_iris, y_train_iris), (_, _), _, _ = load_dataset('iris') for model_name, model in clipped_models.items(): model.fit(x=x_train_iris, y=y_train_iris) pickle.dump( model, open( os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources/models/scikit/", model_name + "iris_clipped.sav"), 'wb')) for model_name, model in unclipped_models.items(): model.fit(x=x_train_iris, y=y_train_iris) pickle.dump( model, open( os.path.join(os.path.dirname(os.path.dirname(__file__)), "resources/models/scikit/", model_name + "iris_unclipped.sav"), 'wb'))
def test_can_take_callback(self) -> None: self.n_iters = 0 X, y = load_dataset("iris") def cb(state) -> bool: self.n_iters = state.n_iters return True if state.n_iters == 2 else False plumber = SKPlumber("classification", 100, callback=cb) plumber.fit(X, y) assert self.n_iters < 3 and self.n_iters > 0
def main_mnist_binary(): master_seed(1234) model = Sequential() model.add( Conv2D(1, kernel_size=(7, 7), activation="relu", input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(4, 4))) model.add(Flatten()) model.add(Dense(1, activation="sigmoid")) model.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(lr=0.01), metrics=["accuracy"]) (x_train, y_train), (_, _), _, _ = load_dataset("mnist") y_train = np.argmax(y_train, axis=1) y_train[y_train < 5] = 0 y_train[y_train >= 5] = 1 model.fit(x_train, y_train, batch_size=128, epochs=10) w_0, b_0 = model.layers[0].get_weights() w_3, b_3 = model.layers[3].get_weights() np.save( os.path.join(os.path.dirname(os.path.dirname(__file__)), "utils/resources/models/scikit/", "W_CONV2D_MNIST_BINARY"), w_0, ) np.save( os.path.join(os.path.dirname(os.path.dirname(__file__)), "utils/resources/models/scikit/" "B_CONV2D_MNIST_BINARY"), b_0, ) np.save( os.path.join(os.path.dirname(os.path.dirname(__file__)), "utils/resources/models/scikit/" "W_DENSE_MNIST_BINARY"), w_3, ) np.save( os.path.join(os.path.dirname(os.path.dirname(__file__)), "utils/resources/models/scikit/" "B_DENSE_MNIST_BINARY"), b_3, )
def test_can_sample_for_classification(self) -> None: sampler = OneStackPipelineSampler() plumber = SKPlumber("classification", 1, sampler=sampler) X, y = load_dataset("titanic") plumber.fit(X, y)
def test_can_sample_for_regression(self) -> None: sampler = OneStackPipelineSampler() plumber = SKPlumber("regression", 1, sampler=sampler) X, y = load_dataset("boston") plumber.fit(X, y)
def load_mnist_dataset(): logging.info("Loading mnist") (x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist), _, _ = load_dataset("mnist") yield (x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist)
def load_iris_dataset(): logging.info("Loading Iris dataset") (x_train_iris, y_train_iris), (x_test_iris, y_test_iris), _, _ = load_dataset("iris") yield (x_train_iris, y_train_iris), (x_test_iris, y_test_iris)
def setUpClass(cls): X, y = X, y = load_dataset("iris") cls.X = X cls.y = y
def test_can_sample_multiple_preprocessors(self) -> None: sampler = StraightPipelineSampler(preprocessors=2) X, y = load_dataset("boston") plumber = SKPlumber("regression", 1, sampler=sampler) plumber.fit(X, y) self.assertEqual(len(plumber.best_pipeline.steps), 5)