예제 #1
0
    def test_comparison_with_scikit(self):
        import warnings
        warnings.filterwarnings("ignore")
        from lale.lib.sklearn import PCA
        import sklearn.datasets
        from lale.helpers import cross_val_score
        pca = PCA(n_components=3, random_state=42, svd_solver='arpack')
        nys = Nystroem(n_components=10, random_state=42)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        trainable = (pca & nys) >> concat >> lr
        digits = sklearn.datasets.load_digits()
        X, y = sklearn.utils.shuffle(digits.data, digits.target, random_state=42)

        cv_results = cross_val_score(trainable, X, y)
        cv_results = ['{0:.1%}'.format(score) for score in cv_results]

        from sklearn.pipeline import make_pipeline, FeatureUnion
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.kernel_approximation import Nystroem as SklearnNystroem
        from sklearn.linear_model import LogisticRegression as SklearnLR
        from sklearn.model_selection import cross_val_score
        union = FeatureUnion([("pca", SklearnPCA(n_components=3, random_state=42, svd_solver='arpack')),
                            ("nys", SklearnNystroem(n_components=10, random_state=42))])
        lr = SklearnLR(random_state=42, C=0.1)
        pipeline = make_pipeline(union, lr)

        scikit_cv_results = cross_val_score(pipeline, X, y, cv = 5)
        scikit_cv_results = ['{0:.1%}'.format(score) for score in scikit_cv_results]
        self.assertEqual(cv_results, scikit_cv_results)
        warnings.resetwarnings()
예제 #2
0
    def test_invalid_input(self):
        from sklearn.linear_model import LogisticRegression as SklearnLR

        scikit_lr = SklearnLR()
        from lale.helpers import to_graphviz

        with self.assertRaises(TypeError):
            to_graphviz(scikit_lr)
예제 #3
0
    def dont_test_with_gridsearchcv2_auto(self):
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer
        from sklearn.model_selection import GridSearchCV

        lr = LogisticRegression(random_state=42)
        pca = PCA(random_state=42, svd_solver="arpack")
        trainable = pca >> lr
        from sklearn.pipeline import Pipeline

        scikit_pipeline = Pipeline([
            (pca.name(), PCA(random_state=42, svd_solver="arpack")),
            (lr.name(), LogisticRegression(random_state=42)),
        ])
        all_parameters = get_grid_search_parameter_grids(trainable,
                                                         num_samples=1)
        # otherwise the test takes too long
        parameters = random.sample(all_parameters, 2)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            clf = GridSearchCV(scikit_pipeline,
                               parameters,
                               cv=2,
                               scoring=make_scorer(accuracy_score))
            iris = load_iris()
            clf.fit(iris.data, iris.target)
            predicted = clf.predict(iris.data)
            accuracy_with_lale_operators = accuracy_score(
                iris.target, predicted)

        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.linear_model import LogisticRegression as SklearnLR
        from sklearn.pipeline import Pipeline

        scikit_pipeline = Pipeline([
            (pca.name(), SklearnPCA(random_state=42, svd_solver="arpack")),
            (lr.name(), SklearnLR(random_state=42)),
        ])
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            clf = GridSearchCV(scikit_pipeline,
                               parameters,
                               cv=2,
                               scoring=make_scorer(accuracy_score))
            iris = load_iris()
            clf.fit(iris.data, iris.target)
            predicted = clf.predict(iris.data)
            accuracy_with_scikit_operators = accuracy_score(
                iris.target, predicted)
        self.assertEqual(accuracy_with_lale_operators,
                         accuracy_with_scikit_operators)
예제 #4
0
    def test_compare_with_sklearn(self):
        tfm = PCA()
        clf = LogisticRegression(LogisticRegression.solver.lbfgs,
                                 LogisticRegression.multi_class.auto)
        trainable = lale.operators.make_pipeline(tfm, clf)
        digits = sklearn.datasets.load_digits()
        trained = trainable.fit(digits.data, digits.target)
        predicted = trained.predict(digits.data)
        from sklearn.decomposition import PCA as SklearnPCA
        from sklearn.linear_model import LogisticRegression as SklearnLR
        sklearn_pipeline = sklearn.pipeline.make_pipeline(
            SklearnPCA(), SklearnLR(solver="lbfgs", multi_class="auto"))
        sklearn_pipeline.fit(digits.data, digits.target)
        predicted_sklearn = sklearn_pipeline.predict(digits.data)

        lale_score = accuracy_score(digits.target, predicted)
        scikit_score = accuracy_score(digits.target, predicted_sklearn)
        self.assertEqual(lale_score, scikit_score)
예제 #5
0
    def setUpClass(cls) -> None:
        pl.seed_everything(0)

        cls.n_features = 10

        cls.n_params = 2 * cls.n_features

        cls.model = LinearRegression(cls.n_features)

        gpus = 1 if torch.cuda.is_available() else 0
        trainer = pl.Trainer(gpus=gpus, max_epochs=10)
        # trainer.fit(cls.model)

        print(tuple(cls.model.parameters()))
        use_sklearn = True
        if use_sklearn:
            train_dataset = DummyDataset(cls.n_features)
            clf = SklearnLR()
            clf.fit(train_dataset.data, train_dataset.targets)

            with torch.no_grad():
                cls.model.linear.weight = torch.nn.Parameter(
                    torch.tensor([clf.coef_], dtype=torch.float))
                cls.model.linear.bias = torch.nn.Parameter(
                    torch.tensor([clf.intercept_], dtype=torch.float))

        cls.train_loader = cls.model.train_dataloader(batch_size=40000)

        # Setup test point data
        cls.test_idx = 8
        cls.x_test = torch.tensor([cls.model.test_set.data[[cls.test_idx]]],
                                  dtype=torch.float)
        cls.y_test = torch.tensor([cls.model.test_set.targets[[cls.test_idx]]],
                                  dtype=torch.float)

        # Compute estimated IVHP
        cls.gpu = 1 if torch.cuda.is_available() else -1

        # Compute anc flatten grad
        grads = grad_z(cls.x_test, cls.y_test, cls.model, gpu=cls.gpu)
        flat_grads = parameters_to_vector(grads)

        print("Grads:")
        print(flat_grads)

        # Make model functional
        params, names = make_functional(cls.model)
        # Make params regular Tensors instead of nn.Parameter
        params = tuple(p.detach().requires_grad_() for p in params)
        flat_params = parameters_to_vector(params)

        # Initialize Hessian
        h = torch.zeros([flat_params.shape[0], flat_params.shape[0]])

        # Compute real IHVP
        for x_train, y_train in cls.train_loader:

            if cls.gpu >= 0:
                x_train, y_train = x_train.cuda(), y_train.cuda()

            def f(flat_params_):
                split_params = tensor_to_tuple(flat_params_, params)
                load_weights(cls.model, names, split_params)
                out = cls.model(x_train)
                loss = calc_loss(out, y_train)
                return loss

            batch_h = hessian(f, flat_params, strict=True)

            with torch.no_grad():
                h += batch_h / float(len(cls.train_loader))

        print("Hessian:")
        print(h)

        complete_x_train = cls.train_loader.dataset.data

        real_hessian = complete_x_train.T @ complete_x_train / complete_x_train.shape[
            0] * 2

        print(real_hessian)

        print(np.linalg.norm(real_hessian - h.cpu().numpy()[:10, :10]))

        np.save("hessian_pytorch.npy", h.cpu().numpy())

        # Make the model back `nn`

        with torch.no_grad():
            load_weights(cls.model, names, params, as_params=True)
            inv_h = torch.inverse(h)
            print("Inverse Hessian")
            print(inv_h)
            cls.real_ihvp = inv_h @ flat_grads

        print("Real IHVP")
        print(cls.real_ihvp)