Beispiel #1
0
class NOGDClassifier(object):
    def __init__(self, n_components=100, n_iter=1):
        self.nys = Nystroem(n_components=n_components)
        self.clf = SGDClassifier(loss='hinge',
                                 penalty='l2',
                                 shuffle=True,
                                 n_iter=n_iter)
        self.count = 0

    def fit(self, X, y):
        if self.count == 0:
            X_tran = self.nys.fit_transform(X)
        else:
            X_tran = self.nys.transform(X)
        self.count += 1
        self.clf.fit(X_tran, y)

    def partial_fit(self, X, y):
        if self.count == 0:
            X_tran = self.nys.fit_transform(X)
        else:
            X_tran = self.nys.transform(X)
        self.count += 1
        self.clf.partial_fit(X_tran, y)

    def predict(self, X):
        X_tran = self.nys.transform(X)
        y_pred = self.clf.predict(X_tran)
        return y_pred
Beispiel #2
0
class Kernel_tica(object):
    def __init__(
        self,
        n_components,
        lag_time,
        gamma,  # gamma value for rbf kernel
        n_components_nystroem=100,  # number of components for Nystroem kernel approximation
        landmarks=None,
        shrinkage=None,
        weights='empirical'  # if 'koopman', use Koopman reweighting for tICA (see Wu, Hao, et al. "Variational Koopman models: slow collective variables and molecular kinetics from short off-equilibrium simulations." The Journal of Chemical Physics 146.15 (2017): 154104.)
    ):
        self._n_components = n_components
        self._lag_time = lag_time
        self._n_components_nystroem = n_components_nystroem
        self._landmarks = landmarks
        self._gamma = gamma
        self._nystroem = Nystroem(gamma=gamma,
                                  n_components=n_components_nystroem)
        self._weights = weights
        # self._tica = tICA(n_components=n_components, lag_time=lag_time, shrinkage=shrinkage)
        self._shrinkage = shrinkage
        return

    def fit(self, sequence_list):
        if self._landmarks is None:
            self._nystroem.fit(np.concatenate(sequence_list))
        else:
            print("using landmarks")
            self._nystroem.fit(self._landmarks)
        sequence_transformed = [
            self._nystroem.transform(item) for item in sequence_list
        ]
        # define tica object at fit() with sequence_list supplied for initialization, as it is required by
        # Koopman reweighting
        self._tica = py.coordinates.tica(sequence_transformed,
                                         lag=self._lag_time,
                                         dim=self._n_components,
                                         kinetic_map=True,
                                         weights=self._weights)
        return

    def transform(self, sequence_list):
        return self._tica.transform(
            [self._nystroem.transform(item) for item in sequence_list])

    def fit_transform(self, sequence_list):
        self.fit(sequence_list)
        return self.transform(sequence_list)

    def score(self, sequence_list):
        model = self.__class__(
            n_components=self._n_components,
            lag_time=self._lag_time,
            gamma=self._gamma,
            n_components_nystroem=self._n_components_nystroem,
            landmarks=self._landmarks,
            shrinkage=self._shrinkage)
        model.fit(sequence_list)
        return np.sum(model._tica.eigenvalues)
Beispiel #3
0
class NystroemImpl():
    def __init__(self,
                 kernel='rbf',
                 gamma=None,
                 coef0=None,
                 degree=None,
                 kernel_params=None,
                 n_components=100,
                 random_state=None):
        self._hyperparams = {
            'kernel': kernel,
            'gamma': gamma,
            'coef0': coef0,
            'degree': degree,
            'kernel_params': kernel_params,
            'n_components': n_components,
            'random_state': random_state
        }
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
Beispiel #4
0
def faster_svm_eval(X, y, n_splits=5, **kwargs):
    '''
    This is an accelerated version of the svm_eval function.
    An accuracy is calculated by an SVM with rbf kernel.
    Input:
        X: A numpy array with the shape [N, k]. The lower dimension embedding
           of some dataset. Expected to have some clusters.
        y: A numpy array with the shape [N, 1]. The labels of the original
           dataset.
        kwargs: Any keyword argument that is send into the SVM.
    Output:
        acc: The (avg) accuracy generated by an SVM with rbf kernel.
    '''

    X = X.astype(np.float)
    X = scale(X)
    skf = StratifiedKFold(n_splits=n_splits)
    sum_acc = 0
    max_acc = n_splits
    for train_index, test_index in skf.split(X, y):
        feature_map_nystroem = Nystroem(gamma=1 / (X.var() * X.shape[1]),
                                        random_state=1,
                                        n_components=300)
        data_transformed = feature_map_nystroem.fit_transform(X[train_index])
        clf = LinearSVC(random_state=0, tol=1e-5, **kwargs)
        clf.fit(data_transformed, y[train_index])
        test_transformed = feature_map_nystroem.transform(X[test_index])
        acc = clf.score(test_transformed, y[test_index])
        sum_acc += acc
    avg_acc = sum_acc / max_acc
    return avg_acc
Beispiel #5
0
    def construct_Q(self, feature_normalization=True):
        """ Step 1 and 2 in Algorithm 1 in MSc. Thesis.
        Computes Q, such that QQ^T \approx K, where K is the RBF kernel matrix.
        Also applies normalization to the features by default. This is carried
        over by example from the CCNN code of Zhang et. al
            
        Input
        Z_train, Z_test: (N,P,d1) arrays. Each Z[i,:,:] is one Z(x_i). 
                         Result from __init__.
        
        Output
        Q_train, Q_test: (N,P,m) arrays Each Q[i,:,:] is one Q(x_i). 
                         Used in train() function below.
        """
        from sklearn.kernel_approximation import Nystroem
        import numpy as np
        import math
        tprint("Using Scikitlearn Nystroem function")
        tprint("Creating Q...")
        Z_train = self.Z[0:self.n_train].reshape(
            (self.n_train * self.P, self.d1))
        Z_test = self.Z[self.n_train:self.n].reshape(
            (self.n_test * self.P, self.d1))
        transformer = Nystroem(gamma=self.gamma, n_components=self.nystrom_dim)
        transformer = transformer.fit(X=Z_train)
        Q_train = transformer.transform(Z_train)
        Q_test = transformer.transform(Z_test)
        self.Q_train = Q_train.reshape(
            (self.n_train, self.P, self.nystrom_dim))
        self.Q_test = Q_test.reshape((self.n_test, self.P, self.nystrom_dim))

        if feature_normalization == True:
            self.Q_train = self.Q_train.reshape(
                (self.n_train * self.P, self.nystrom_dim))
            self.Q_train -= np.mean(self.Q_train, axis=0)
            self.Q_train /= LA.norm(self.Q_train) / math.sqrt(
                self.n_train * self.P)
            self.Q_train = self.Q_train.reshape(
                (self.n_train, self.P, self.nystrom_dim))
            self.Q_test = self.Q_test.reshape(
                (self.n_test * self.P, self.nystrom_dim))
            self.Q_test -= np.mean(self.Q_test, axis=0)
            self.Q_test /= LA.norm(self.Q_test) / math.sqrt(
                self.n_train * self.P)
            self.Q_test = self.Q_test.reshape(
                (self.n_test, self.P, self.nystrom_dim))
Beispiel #6
0
class SparseKernelClassifier(CDClassifier):
    def __init__(self,
                 mode='exact',
                 kernel='rbf',
                 gamma=1e-3,
                 C=1,
                 alpha=1,
                 n_components=500,
                 n_jobs=1,
                 verbose=False):
        self.mode = mode
        self.kernel = kernel
        self.gamma = gamma
        self.C = C
        self.alpha = alpha
        self.n_components = n_components
        self.n_jobs = n_jobs
        self.verbose = verbose
        super(SparseKernelClassifier, self).__init__(
            C=C,
            alpha=alpha,
            loss='squared_hinge',
            penalty='l1',
            multiclass=False,
            debiasing=True,
            Cd=C,
            warm_debiasing=True,
            n_jobs=n_jobs,
            verbose=False,
        )

    def fit(self, X, y):
        if self.mode == 'exact':
            K = pairwise_kernels(X,
                                 metric=self.kernel,
                                 filter_params=True,
                                 gamma=self.gamma)
            self.X_train_ = X
        else:
            self.kernel_sampler_ = Nystroem(kernel=self.kernel,
                                            gamma=self.gamma,
                                            n_components=self.n_components)
            K = self.kernel_sampler_.fit_transform(X)
        super(SparseKernelClassifier, self).fit(K, y)
        return self

    def decision_function(self, X):
        if self.mode == 'exact':
            K = pairwise_kernels(X,
                                 self.X_train_,
                                 metric=self.kernel,
                                 filter_params=True,
                                 gamma=self.gamma)
        else:
            K = self.kernel_sampler_.transform(X)
        return super(SparseKernelClassifier, self).decision_function(K)
Beispiel #7
0
class SparseKernelClassifier(CDClassifier):
    def __init__(self, mode='exact', kernel='rbf', gamma=1e-3, C=1, alpha=1,
                 n_components=500, n_jobs=1, verbose=False):
        self.mode = mode
        self.kernel = kernel
        self.gamma = gamma
        self.C = C
        self.alpha = alpha
        self.n_components = n_components
        self.n_jobs = n_jobs
        self.verbose = verbose
        super(SparseKernelClassifier, self).__init__(
            C=C,
            alpha=alpha,
            loss='squared_hinge',
            penalty='l1',
            multiclass=False,
            debiasing=True,
            Cd=C,
            warm_debiasing=True,
            n_jobs=n_jobs,
            verbose=False,
        )

    def fit(self, X, y):
        if self.mode == 'exact':
            K = pairwise_kernels(
                X,
                metric=self.kernel,
                filter_params=True,
                gamma=self.gamma
            )
            self.X_train_ = X
        else:
            self.kernel_sampler_ = Nystroem(
                kernel=self.kernel,
                gamma=self.gamma,
                n_components=self.n_components
            )
            K = self.kernel_sampler_.fit_transform(X)
        super(SparseKernelClassifier, self).fit(K, y)
        return self

    def decision_function(self, X):
        if self.mode == 'exact':
            K = pairwise_kernels(
                X, self.X_train_,
                metric=self.kernel,
                filter_params=True,
                gamma=self.gamma
            )
        else:
            K = self.kernel_sampler_.transform(X)
        return super(SparseKernelClassifier, self).decision_function(K)
    def kernel_transformation_using_nystroem_rbf(self,df,cat):
        df=df.fillna(0)
        df=df.replace([np.inf, -np.inf], 0)
        datecol=[x for x in df.columns if df[x].dtypes=='datetime64[ns]']
        X1=[x for x in df.columns if df[x].dtypes != 'object' and x not in datecol and x not in self.target]     
        X=[x for x in X1 if x not in cat]
        y=self.target
        j = np.linspace((10**-2),(10**2),50)
        g=0
        max1=0
        df=df.fillna(0)
        df=df.replace(np.inf, 0)
        df=df.replace(-np.inf, 0)
        for i in j:
            rbf_feature = Nystroem(kernel = 'rbf', gamma=i, random_state=2,n_components=10)
            rbf_feature.fit(df[X])
            X_features = rbf_feature.transform(df[X])
            X_features=np.nan_to_num(X_features)
#            SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, shuffle=True, verbose=0, epsilon=0.1, n_jobs=1, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, class_weight=None, warm_start=False, average=False, n_iter=None)
            clf = SGDClassifier()   
            clf.fit(X_features, df[y])
            y_pred = clf.predict(X_features)
            score=f1_score(df[y], y_pred, average='micro') 
            if(score>max1):
                max1=score
                g=i
        rbf_feature = RBFSampler(gamma=g, random_state=2,n_components=10)
        rbf_feature.fit(df[X])
        X_features = rbf_feature.transform(df[X])
        l=[]
        for r in range(10):
            l.append('k_'+str(r))
        X_features=pd.DataFrame(data=X_features,columns=l)
#        SGDClassifier(loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True,shuffle=True, verbose=0, epsilon=0.1, n_jobs=1, random_state=None, learning_rate='optimal', eta0=0.0, power_t=0.5, class_weight=None, warm_start=False, average=False, n_iter=None)
        clf = SGDClassifier()   
        clf.fit(X_features, df[y])
        score=f1_score(df[y], y_pred, average='micro') 
        print("Score is")
        print(score)
        print(g)
        return X_features
Beispiel #9
0
class WeightedSparseKernelClassifier(LinearSVC):
    def __init__(
            self, mode='exact', kernel='rbf', gamma=1e-3, C=1,
            multi_class='ovr', class_weight='auto', n_components=5000,
            verbose=False
    ):
        self.mode = mode
        self.kernel = kernel
        self.gamma = gamma
        self.C = C
        self.multi_class = multi_class
        self.class_weight = class_weight
        self.n_components = n_components
        self.verbose = verbose

        super(WeightedSparseKernelClassifier, self).__init__(
            C=C,
            loss='squared_hinge',
            penalty='l1',
            dual=False,
            verbose=verbose
        )

    def fit(self, X, y):
        if self.mode == 'exact':
            K = pairwise_kernels(
                X,
                metric=self.kernel,
                filter_params=True,
                gamma=self.gamma
            )
            self.X_train_ = X
        else:
            self.kernel_sampler_ = Nystroem(
                kernel=self.kernel,
                gamma=self.gamma,
                n_components=self.n_components
            )
            K = self.kernel_sampler_.fit_transform(X)
        return super(WeightedSparseKernelClassifier, self).fit(K, y)

    def decision_function(self, X):
        if self.mode == 'exact':
            K = pairwise_kernels(
                X, self.X_train_,
                metric=self.kernel,
                filter_params=True,
                gamma=self.gamma
            )
        else:
            K = self.kernel_sampler_.transform(X)
        return super(WeightedSparseKernelClassifier, self).decision_function(K)
Beispiel #10
0
class NystroemImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
def test_nystroem_singular_kernel():
    # test that nystroem works with singular kernel matrix
    rng = np.random.RandomState(0)
    X = rng.rand(10, 20)
    X = np.vstack([X] * 2)  # duplicate samples

    gamma = 100
    N = Nystroem(gamma=gamma, n_components=X.shape[0]).fit(X)
    X_transformed = N.transform(X)

    K = rbf_kernel(X, gamma=gamma)

    assert_array_almost_equal(K, np.dot(X_transformed, X_transformed.T))
    assert_true(np.all(np.isfinite(Y)))
Beispiel #12
0
class WeightedSparseKernelClassifier(LinearSVC):
    def __init__(self,
                 mode='exact',
                 kernel='rbf',
                 gamma=1e-3,
                 C=1,
                 multi_class='ovr',
                 class_weight='auto',
                 n_components=5000,
                 verbose=False):
        self.mode = mode
        self.kernel = kernel
        self.gamma = gamma
        self.C = C
        self.multi_class = multi_class
        self.class_weight = class_weight
        self.n_components = n_components
        self.verbose = verbose

        super(WeightedSparseKernelClassifier,
              self).__init__(C=C,
                             loss='squared_hinge',
                             penalty='l1',
                             dual=False,
                             verbose=verbose)

    def fit(self, X, y):
        if self.mode == 'exact':
            K = pairwise_kernels(X,
                                 metric=self.kernel,
                                 filter_params=True,
                                 gamma=self.gamma)
            self.X_train_ = X
        else:
            self.kernel_sampler_ = Nystroem(kernel=self.kernel,
                                            gamma=self.gamma,
                                            n_components=self.n_components)
            K = self.kernel_sampler_.fit_transform(X)
        return super(WeightedSparseKernelClassifier, self).fit(K, y)

    def decision_function(self, X):
        if self.mode == 'exact':
            K = pairwise_kernels(X,
                                 self.X_train_,
                                 metric=self.kernel,
                                 filter_params=True,
                                 gamma=self.gamma)
        else:
            K = self.kernel_sampler_.transform(X)
        return super(WeightedSparseKernelClassifier, self).decision_function(K)
Beispiel #13
0
def test_nystroem_singular_kernel():
    # test that nystroem works with singular kernel matrix
    rng = np.random.RandomState(0)
    X = rng.rand(10, 20)
    X = np.vstack([X] * 2)  # duplicate samples

    gamma = 100
    N = Nystroem(gamma=gamma, n_components=X.shape[0]).fit(X)
    X_transformed = N.transform(X)

    K = rbf_kernel(X, gamma=gamma)

    assert_array_almost_equal(K, np.dot(X_transformed, X_transformed.T))
    assert_true(np.all(np.isfinite(Y)))
Beispiel #14
0
def main(C, gamma):
    script_path = os.path.realpath(__file__)
    data_path = os.path.join(os.path.split(script_path)[0], 'data')
    train_data = datasets.MNIST(data_path, train=True, download=False)
    test_data = datasets.MNIST(data_path, train=False, download=False)
    train_X, train_y = train_data.data.numpy(), train_data.targets.numpy()
    test_X, test_y = test_data.data.numpy(), test_data.targets.numpy()
    train_X = train_X.reshape(-1, train_X.shape[1] * train_X.shape[2])
    test_X = test_X.reshape(-1, test_X.shape[1] * test_X.shape[2])

    scaler = MinMaxScaler()
    train_X = scaler.fit_transform(train_X)
    test_X = scaler.transform(test_X)
    feature_map_nystroem = Nystroem(gamma=gamma,
                                    random_state=23,
                                    n_components=300)
    train_X = feature_map_nystroem.fit_transform(train_X)
    test_X = feature_map_nystroem.transform(test_X)
    svc = LinearSVC(C=C, random_state=23)
    svc.fit(train_X, train_y)
    print(svc.score(test_X, test_y))
Beispiel #15
0
def nystroem_pca(X, nystroem_kwargs, pca_kwargs, verbose=1):
    """
    Perform Nystroem kernel approximation and then PCA decomposition. The Nystroem method constructs an
    approximate feature map, for an arbitrary kernel using a subset of the data as basis. We then
    apply this feature map to X, and perform PCA decomposition in the new feature space.
    :param X: Values of our data
    :param nystroem_kwargs: (dict) Keyword arguments that are passed to the constructor of an implementation
            of the Nystroem method - `sklearn.kernel_approximation.Nystroem`.
    :param pca_kwargs: dict) Keyword arguments that are passed to `sklearn.decomposition.PCA`.
    :param verbose: (int) If verbose=1, print information relevant to the PCA decomposition. Set verbose=0
            for no output.
    :return: `numpy.ndarray`:  The PCA decomposition of our data set in the new feature space, that was
            approximated via the nystroem method.
    """

    # Get the nystroem approximation
    nystroem = Nystroem(**nystroem_kwargs)
    nystroem.fit(X)
    X_nystroem = nystroem.transform(X)

    # Get the PCA decomposition
    pca_nystroem = PCA(**pca_kwargs)
    pca_nystroem.fit(X_nystroem)
    X_nystroem_pca = pca_nystroem.transform(X_nystroem)

    # Print Relevant PCA information
    if verbose:
        print("Dimensionality reduction: {} -> {}".format(
            X_nystroem.shape[1], X_nystroem_pca.shape[1]))
        print("Variance explained by each component:",
              (pca_nystroem.explained_variance_ratio_ * 1000).astype(int) /
              1000)
        print(
            "Total variance explained by those {} components:".format(
                X_nystroem_pca.shape[1]),
            format(pca_nystroem.explained_variance_ratio_.sum(), ".4f"))

    return X_nystroem_pca
Beispiel #16
0
def main():
    data = pd.DataFrame(circle(3000), columns=['x', 'y'])
    ns = Nystroem(n_components=3000, gamma=0.15)
    ns.fit(data)

    fullsample = pd.DataFrame(
        [(x, y) for x in np.around(
            np.linspace(-20.0, 20.0, 128),
            decimals=2,
        ) for y in np.around(
            np.linspace(-20.0, 20.0, 128),
            decimals=2,
        )],
        columns=['x', 'y'],
    )

    transformed = ns.transform(fullsample)

    fullsample['c'] = pd.Series(transformed[:, 0])
    sns.heatmap(fullsample.pivot('x', 'y', 'c'),
                xticklabels=32,
                yticklabels=32)
    pyplot.show()
Beispiel #17
0
# 对于线性不可分情况,使用rbf核将数据映射到高维空间中

# In[ ]:

from sklearn.kernel_approximation import RBFSampler, Nystroem

USE_RBF = False   #True:RBFSampler, False:Nystroem
if USE_RBF:
    rbf_feature = RBFSampler(gamma=1, random_state=1)
    train_SGD = rbf_feature.fit_transform(train)
    test_SGD = rbf_feature.transform(test)
else:
    Nys_feature = Nystroem(gamma=1, random_state=1)
    train_SGD = Nys_feature.fit_transform(train)
    test_SGD = Nys_feature.transform(test)



# In[ ]:

from sklearn.linear_model import SGDClassifier

USE_GridSearch = False
clf = SGDClassifier(loss='modified_huber', alpha=0.01, n_iter=100, class_weight="balanced", random_state=27)
if USE_GridSearch:
    param_test1 = {'loss':['hinge', 'log','modified_huber', 'squared_hinge', 'perceptron'], 'alpha':[0.1, 0.01, 0.01, 0.0001]}
    gsearch1 = GridSearchCV(estimator=clf, param_grid = param_test1, scoring='accuracy', n_jobs=-1,cv=2, verbose=True)
    gsearch1.fit(train_SGD, train_y)
    print gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_
    clf = gsearch1
Beispiel #18
0
    del lka, lowrank_feats

    # # # # # # # # # # # # # # # #
    # Nyström again!
    # # # # # # # # # # # # # # # #
    print("Running Nystroem Approximation")
    factor = np.max(np.linalg.norm(train_features, axis=1))
    train_features /= factor
    test_features /= factor
    dim = min(train_features.shape[1] * 2, train_features.shape[0])
    print("Nystroem dim is {}".format(dim))

    # Use the Nyström approximation in sklearn
    approx = Nystroem(kernel='rbf', gamma=1., n_components=dim)
    approx.fit(train_features)
    train_features = approx.transform(train_features)
    test_features = approx.transform(test_features)

    # # # # # # # # # # # # # # # #
    # Ridge regression with cross validation
    # # # # # # # # # # # # # # # #

    style = 'c' if train_features.shape[0] > train_features.shape[1] else 'k'
    clf = GridSearchCV(RidgeRegression(), {
        'alpha': [(10**i) for i in range(-7, 0)],
        'style': [style]
    },
                       n_jobs=4)

    clf.fit(train_features, train_onehot)
    y_pred_ = np.argmax(clf.predict(test_features), axis=-1)
Beispiel #19
0
class KANNR(MLPRegressor):     
    
    def __init__(self, hidden_layer_sizes=(100,), activation="identity",
                 solver='adam', alpha=0.0001,
                 batch_size='auto', learning_rate="constant",
                 learning_rate_init=0.001,
                 power_t=0.5, max_iter=200, shuffle=True,
                 random_state=None, tol=1e-4,
                 verbose=False, warm_start=False, momentum=0.9,
                 nesterovs_momentum=True, early_stopping=False,
                 validation_fraction=0.1, beta_1=0.9, beta_2=0.999,
                 epsilon=1e-8, n_iter_no_change=10, max_fun=15000,kernel='rbf', degree=2, gamma=0.1,
                     coef0=0.0):
        super().__init__(
            hidden_layer_sizes=hidden_layer_sizes,
            activation=activation, solver=solver, alpha=alpha,
            batch_size=batch_size, learning_rate=learning_rate,
            learning_rate_init=learning_rate_init, power_t=power_t,
            max_iter=max_iter, shuffle=shuffle,
            random_state=random_state, tol=tol, verbose=verbose,
            warm_start=warm_start, momentum=momentum,
            nesterovs_momentum=nesterovs_momentum,
            early_stopping=early_stopping,
            validation_fraction=validation_fraction,
            beta_1=beta_1, beta_2=beta_2, epsilon=epsilon,
            n_iter_no_change=n_iter_no_change)  


        self.kernel=kernel
        self.gamma=gamma
        self.degree=degree
        self.coef0=coef0   
     
        
    def fit(self, X, y):
        self.feature_map_nystroem=None  
        self.isfit=False
        if(self.kernel=="linear"):
            super().fit(X,y)
            return self
        else:   
            
            if(self.kernel=="poly"):  
                self.feature_map_nystroem = Nystroem(kernel=c_polynomial_kernel(degree=self.degree, gamma=self.gamma))                                  
                
            if(self.kernel=="rbf"):                  
                self.feature_map_nystroem = Nystroem(kernel=c_mrbf_kernel(degree=self.degree, gamma=self.gamma)) 
            if(self.kernel=="hyperbolic"):                
                self.feature_map_nystroem = Nystroem(kernel=c_hyperbolic_kernel(gamma=self.gamma,coef0=self.coef0)) 
            if(self.kernel=="triangle"):                
                self.feature_map_nystroem = Nystroem(kernel=c_triangle_kernel(gamma=self.gamma)) 
            if(self.kernel=="radial_basic"):                
                self.feature_map_nystroem = Nystroem(kernel=c_radial_basic_kernel(degree=self.degree, gamma=self.gamma))            
            if(self.kernel=="rquadratic"):                
                self.feature_map_nystroem = Nystroem(kernel=c_rquadratic_kernel(degree=self.degree, gamma=self.gamma)) 
            if(self.kernel=="can"):                
                self.feature_map_nystroem = Nystroem(kernel=c_canberra_kernel(gamma=self.gamma))  
            if(self.kernel=="tru"):                
                self.feature_map_nystroem = Nystroem(kernel=c_truncated_kernel(gamma=self.gamma))                 
            
            if not self.feature_map_nystroem is None:
                data_transformed = self.feature_map_nystroem.fit_transform(X)
                super().fit(data_transformed,y)    
                self.isfit=True
                return self
            
    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self
    def predict(self, X):  
        if self.isfit:
            X=self.mtransform(X)            
        return super().predict(X)
    def mtransform(self,X):
        if not self.feature_map_nystroem is None and self.isfit:
            X=self.feature_map_nystroem.transform(X)            
            return X
        return X
Beispiel #20
0
import numpy as np
from params import *
from sklearn.kernel_approximation import RBFSampler, Nystroem
from sklearn.ensemble import RandomTreesEmbedding

fabric = np.genfromtxt(fabric_file, delimiter=' ',
                       skip_header=True).astype('float32')

depths = fabric[:, 0].reshape(fabric[:, 0].size, 1)
depths_sd = depths.std()
depths_mean = depths.mean()
depths_norm = (depths - depths_mean) / depths_sd

transformer = Nystroem(kernel='rbf', gamma=gamma, n_components=n_fea)
depths_tr = transformer.fit_transform(depths_norm).astype('float32')
n_fea = depths_tr.shape[1]
wais_cs = fabric[:, 1:4]
depths_test = np.linspace(-1, 1, 1000).reshape(1000, 1)
depths_test_tr = transformer.transform(depths_test).astype('float32')
class KernelLogisticRegression(LogisticRegression):
    """A simple kernel logistic implementation using a Nystroem kernel approximation

    Warnings
    --------
    This kernel method is not specialized for temporal classification.

    See Also
    --------
    wildboar.datasets.outlier.EmmottLabeler : Synthetic outlier dataset construction
    """
    def __init__(self,
                 kernel=None,
                 *,
                 kernel_params=None,
                 n_components=100,
                 penalty="l2",
                 dual=False,
                 tol=1e-4,
                 C=1.0,
                 fit_intercept=True,
                 intercept_scaling=1,
                 class_weight=None,
                 random_state=None,
                 solver="lbfgs",
                 max_iter=100,
                 multi_class="auto",
                 verbose=0,
                 warm_start=False,
                 n_jobs=None,
                 l1_ratio=None):
        """Create a new kernel logistic regression

        Parameters
        ----------
        kernel : str, optional
            The kernel function to use. See `sklearn.metrics.pairwise.kernel_metric` for kernels. The default kernel
            is 'rbf'.

        kernel_params : dict, optional
            Parameters to the kernel function.

        n_components : int, optional
            Number of features to construct
        """
        super().__init__(
            penalty=penalty,
            dual=dual,
            tol=tol,
            C=C,
            fit_intercept=fit_intercept,
            intercept_scaling=intercept_scaling,
            class_weight=class_weight,
            random_state=random_state,
            solver=solver,
            max_iter=max_iter,
            multi_class=multi_class,
            verbose=verbose,
            warm_start=warm_start,
            n_jobs=n_jobs,
            l1_ratio=l1_ratio,
        )
        self.kernel = kernel
        self.kernel_params = kernel_params
        self.n_components = n_components

    def fit(self, x, y, sample_weight=None):
        random_state = check_random_state(self.random_state)
        kernel = self.kernel or "rbf"
        n_components = min(x.shape[0], self.n_components)
        self.nystroem_ = Nystroem(
            kernel=kernel,
            kernel_params=self.kernel_params,
            n_components=n_components,
            random_state=random_state.randint(np.iinfo(np.int32).max),
        )
        self.nystroem_.fit(x)
        super().fit(self.nystroem_.transform(x),
                    y,
                    sample_weight=sample_weight)
        return self

    def decision_function(self, x):
        check_is_fitted(self)
        return super().decision_function(self.nystroem_.transform(x))
#%%timeit
length = 50000
num_sampling = 100

x = example_data.permute(1, 0, 2, 3)
X_i = LazyTensor(x.view(1, 1000, 784))

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_T(num_sampling, kernel='rbf', random_state=0).fit(X_i)
X_new_i = LN_test.transform(X_i)

#NUMPY on MNIST
#%%timeit
# Instatiate & fit Nystroem for comparison
x_ = x.reshape(1, 1000, 784)

X_i = LazyTensor(x_)

# Instatiate & fit Nystroem for comparison
sk_N = Nystroem(kernel='rbf', n_components=num_sampling,
                random_state=0).fit(x[0].numpy())
x_new = sk_N.transform(x[0].numpy())  # (length, num_sampling) array

# Instatiate & fit on lazy tensor version
LN_test = LazyNystrom_T(num_sampling, kernel='rbf', random_state=0).fit(X_i)
X_new_i = LN_test.transform(X_i)  # (1,length,num_sampling) lazy tensor

# Print the L2 error
err = np.linalg.norm(x_new - X_new_i.sum(dim=0).numpy()) / x_new.size
print(f'Error when compared to sklearn = {err}')
#>>Error when compared to sklearn = 0.00022449403762817382
ax = plt.axes(projection='3d')
ax.scatter(X_skernpca[y_tr==0, 0], X_skernpca[y_tr==0, 1], X_skernpca[y_tr==0, 2], color='red', alpha=0.5)
ax.scatter(X_skernpca[y_tr==1, 0], X_skernpca[y_tr==1, 1], X_skernpca[y_tr==1, 2], color='blue', alpha=0.5)
plt.show()



scikit_pca = PCA(n_components=9)        
X_pca = scikit_pca.fit_transform(train_x)
X_pca_t =scikit_pca.transform(test_x)


X_effi=Nystroem(kernel="rbf", gamma=2**(-30), n_components=50)
X_pc=X_effi.fit_transform(train_x)
X_pc_1=X_pc[:,0:9]
X_pc_t =X_effi.transform(test_x)
        
        

clf= LinearDiscriminantAnalysis()
clf.fit(X_skernpca, y_tr)
error1=1-accuracy_score(y_t,clf.predict(X_skernpca_t))

print(error1)


clf2 = LinearDiscriminantAnalysis()
clf2.fit(X_pca, y_tr)
error2=1-accuracy_score(y_t,clf2.predict(X_pca_t))

print(error2)
 XtestT = kcca.transform(ktest)
 kccaScores = np.zeros((2,np.alen(nComponents)))
 for i,n in enumerate(nComponents):   
     kccaScores[:,i] = util.classify(XtrainT[:,0:n],XtestT[:,0:n],labelsTrain,labelsTest)
 
 #%% Subsampling methods
 kpls = PLSRegression(n_components=150)
 nComponents = np.arange(173,2173,100)
 
 # Nystroem method
 elapTimeNys = np.zeros(np.shape(nComponents))
 kplsScoresNys = np.zeros((2,3))
 for i,n in enumerate(nComponents):
     nys = Nystroem(n_components=n,gamma=gamma)
     nys.fit(Xtrain)
     ktrain = nys.transform(Xtrain)
     ktest = nys.transform(Xtest)
     startTime = timeit.default_timer()
     kpls.fit(ktrain,Ytrain)
     elapTimeNys[i] = timeit.default_timer() - startTime
     XtrainT = kpls.transform(ktrain)
     XtestT = kpls.transform(ktest)
     
     if n==573:
         kplsScoresNys[:,0] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1073:
         kplsScoresNys[:,1] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
     elif n==1573:
         kplsScoresNys[:,2] = util.classify(XtrainT,XtestT,labelsTrain,labelsTest)
 
 # RBF sampler method
Beispiel #25
0
class SkClassif(Model):
    # Layer names
    CLF = 'clf'
    CLASS_SCORES = CLF  # Symbolic name of the layer providing the class scores as output

    def __init__(self, config, input_shape=None):
        super(SkClassif, self).__init__(config, input_shape)

        self.INPUT_SIZE = utils.shape2size(self.get_input_shape())
        self.NUM_CLASSES = P.GLB_PARAMS[P.KEY_DATASET_METADATA][
            P.KEY_DS_NUM_CLASSES]

        self.NUM_SAMPLES = config.CONFIG_OPTIONS.get(
            P.KEY_SKCLF_NUM_SAMPLES,
            config.CONFIG_OPTIONS.get(
                P.KEY_NUM_TRN_SAMPLES,
                config.CONFIG_OPTIONS.get(
                    P.KEY_TOT_TRN_SAMPLES, P.GLB_PARAMS[P.KEY_DATASET_METADATA]
                    [P.KEY_DS_TRN_SET_SIZE])))
        self.N_COMPONENTS = config.CONFIG_OPTIONS.get(
            P.KEY_NYSTROEM_N_COMPONENTS, 100)
        self.N_COMPONENTS = min(self.N_COMPONENTS, self.NUM_SAMPLES)
        self.nystroem = Nystroem(n_components=self.N_COMPONENTS)
        self.clf = None
        self.nystroem_fitted = False
        self.clf_fitted = False
        self.X = []
        self.X_transformed = []
        self.y = []

    def state_dict(self):
        d = super(SkClassif, self).state_dict()
        d['nystroem'] = self.nystroem
        d['clf'] = self.clf
        d['nystroem_fitted'] = self.nystroem_fitted
        d['clf_fitted'] = self.clf_fitted
        return d

    def load_state_dict(self, state_dict):
        self.nystroem = state_dict.pop('nystroem')
        self.clf = state_dict.pop('clf')
        self.nystroem_fitted = state_dict.pop('nystroem_fitted')
        self.clf_fitted = state_dict.pop('clf_fitted')
        super(SkClassif, self).load_state_dict(state_dict)

    # Returns classifier predictions for a given input batch
    def compute_output(self, x):
        return utils.dense2onehot(
            torch.tensor(self.clf.predict(
                self.nystroem.transform(x.view(x.size(0), -1).tolist())),
                         device=P.DEVICE), self.NUM_CLASSES)

    # Here we define the flow of information through the network
    def forward(self, x):
        out = {}

        if self.training:
            if not self.clf_fitted:
                # Here we use just the first NUM_SAMPLES samples to do a Nystroem approximation, because they are already
                # a random subset of the dataset. This allows to save memory by avoiding to store the whole dataset.
                if not self.nystroem_fitted:
                    self.X += x.view(x.size(0), -1).tolist()
                    if len(self.X) >= self.N_COMPONENTS:
                        self.X_transformed = self.nystroem.fit_transform(
                            self.X).tolist()
                        self.nystroem_fitted = True
                        self.X = []
                else:
                    self.X_transformed += self.nystroem.transform(
                        x.view(x.size(0), -1).tolist()).tolist()

                # Here we fit the actual classifier
                if len(self.X_transformed) >= self.NUM_SAMPLES:
                    self.clf.fit(self.X_transformed, self.y)
                    self.clf_fitted = True
                    self.X_transformed = []
                    self.y = []

        out[self.CLF] = self.compute_output(
            x) if self.clf_fitted else torch.rand(
                (x.size(0), self.NUM_CLASSES), device=P.DEVICE)
        return out

    def set_teacher_signal(self, y):
        if y is not None and len(
                self.y
        ) < self.NUM_SAMPLES and self.training and not self.clf_fitted:
            self.y += y.tolist()
Beispiel #26
0
dataTestT = kcca.transform(ktest)
kccaScores = np.zeros((2,np.alen(nComponents)))
for i,n in enumerate(nComponents):   
    kccaScores[:,i] = util.classify(dataTrainT[:,0:n],dataTestT[:,0:n],labelsTrain,labelsTest)

#%% Subsampling methods
kpls = PLSRegression(n_components=150)
nComponents = np.arange(173,2173,100)

# Nystroem method
elapTimeNys = np.zeros(np.shape(nComponents))
kplsScoresNys = np.zeros((2,3))
for i,n in enumerate(nComponents):
    nys = Nystroem(n_components=n,gamma=gamma)
    nys.fit(dataTrain)
    ktrain = nys.transform(dataTrain)
    ktest = nys.transform(dataTest)
    startTime = timeit.default_timer()
    kpls.fit(ktrain,Ytrain)
    elapTimeNys[i] = timeit.default_timer() - startTime
    dataTrainT = kpls.transform(ktrain)
    dataTestT = kpls.transform(ktest)
    
    if n==573:
        kplsScoresNys[:,0] = util.classify(dataTrainT,dataTestT,labelsTrain,labelsTest)
    elif n==1073:
        kplsScoresNys[:,1] = util.classify(dataTrainT,dataTestT,labelsTrain,labelsTest)
    elif n==1573:
        kplsScoresNys[:,2] = util.classify(dataTrainT,dataTestT,labelsTrain,labelsTest)

# RBF sampler method
Beispiel #27
0
def permute_dictionaries(training_data, training_labels):
	#takes two dictionaries and permutes both while keeping consistency
	perm = np.random.RandomState(seed=70).permutation(len(training_data))
	return (training_data[perm], training_labels[perm])

total_data = io.loadmat("data/%s_data.mat" % "mnist")

index = 60000
total_training_data = total_data["training_data"] / float(255)
total_training_data_labels = total_data["training_labels"]
total_training_data, total_training_data_labels = permute_dictionaries(total_training_data, total_training_data_labels)
test_data = total_data["test_data"] / float(255)

feature_map_nystroem = Nystroem(gamma = .05, n_components = 25000)
features_training = feature_map_nystroem.fit_transform(total_training_data)
features_test = feature_map_nystroem.transform(test_data)

print("mnist_training_data", features_training.shape)
print("mnist_training_data_labels", total_training_data_labels.shape)
print("mnist_test_data", features_test.shape)

def problem5(training_data, training_data_labels, test_data, C_value):	
	classifier = svm.LinearSVC(dual = False, random_state = 10, C = C_value)

	classifier.fit(training_data, np.ravel(training_data_labels))

	predict_training_results = classifier.predict(training_data)
	print(accuracy_score(np.ravel(training_data_labels), np.ravel(predict_training_results)))
	predict_test_results = classifier.predict(test_data)
	results_to_csv(predict_test_results)
Beispiel #28
0
class SkClassif(Model):
    # Layer names
    CLF = 'clf'
    CLASS_SCORES = 'class_scores'  # Name of the classification output providing the class scores

    def __init__(self, config, input_shape=None):
        super(SkClassif, self).__init__(config, input_shape)

        self.INPUT_SIZE = utils.shape2size(self.get_input_shape())
        self.NUM_CLASSES = P.GLB_PARAMS[P.KEY_DATASET_METADATA][
            P.KEY_DS_NUM_CLASSES]

        self.NUM_SAMPLES = config.CONFIG_OPTIONS.get(
            P.KEY_SKCLF_NUM_SAMPLES,
            config.CONFIG_OPTIONS.get(
                P.KEY_NUM_TRN_SAMPLES,
                config.CONFIG_OPTIONS.get(
                    P.KEY_TOT_TRN_SAMPLES, P.GLB_PARAMS[P.KEY_DATASET_METADATA]
                    [P.KEY_DS_TRN_SET_SIZE])))
        self.N_COMPONENTS = config.CONFIG_OPTIONS.get(
            P.KEY_NYSTROEM_N_COMPONENTS, 100)
        if self.N_COMPONENTS is None: self.N_COMPONENTS = 0
        self.N_COMPONENTS = min(self.N_COMPONENTS, self.NUM_SAMPLES)
        self.nystroem = Nystroem(
            n_components=self.N_COMPONENTS) if self.N_COMPONENTS > 0 else None
        self.clf = None
        self.nystroem_fitted = False
        self.clf_fitted = False
        self.X = []
        self.X_transformed = []
        self.y = []
        self.normalize = config.CONFIG_OPTIONS.get(P.KEY_SKCLF_NORM, False)

    def state_dict(self):
        d = super(SkClassif, self).state_dict()
        d['nystroem'] = self.nystroem
        d['clf'] = self.clf
        d['nystroem_fitted'] = self.nystroem_fitted
        d['clf_fitted'] = self.clf_fitted
        return d

    def load_state_dict(self, state_dict, strict=...):
        self.nystroem = state_dict.pop('nystroem')
        self.clf = state_dict.pop('clf')
        self.nystroem_fitted = state_dict.pop('nystroem_fitted')
        self.clf_fitted = state_dict.pop('clf_fitted')
        super(SkClassif, self).load_state_dict(state_dict, strict)

    def norm_if_needed(self, x):
        if not self.normalize: return x
        norm_x = x.norm(p=2, dim=1, keepdim=True)
        norm_x += (norm_x == 0).float()  # Prevent divisions by zero
        return x / norm_x

    # Here we define the flow of information through the network
    def forward(self, x):
        x = self.norm_if_needed(x.view(
            x.size(0), -1)).tolist()  # Normalize input if needed

        # Here we append inputs to training pipeline if we are in training mode
        if self.training:
            if not self.clf_fitted:
                # Here we use just the first NUM_SAMPLES samples to do a Nystroem approximation, because they are already
                # a random subset of the dataset. This allows to save memory by avoiding to store the whole dataset.
                if self.nystroem is not None:
                    if not self.nystroem_fitted:
                        self.X += x
                        if len(self.X) >= self.N_COMPONENTS:
                            self.X_transformed = self.norm_if_needed(
                                torch.tensor(self.nystroem.fit_transform(
                                    self.X),
                                             device=P.DEVICE)).tolist()
                            self.nystroem_fitted = True
                            self.X = []
                    else:
                        self.X_transformed += self.norm_if_needed(
                            torch.tensor(self.nystroem.transform(x),
                                         device=P.DEVICE)).tolist()
                else:
                    self.X_transformed += self.norm_if_needed(
                        torch.tensor(x, device=P.DEVICE)).tolist()

                # Here we fit the actual classifier
                if len(self.X_transformed) >= self.NUM_SAMPLES:
                    self.clf.fit(self.X_transformed, self.y)
                    self.clf_fitted = True
                    self.X_transformed = []
                    self.y = []

        return self.compute_output(x)

    # Process incput batch and compute output dictionary
    def compute_output(self, x):
        out = {}

        clf_out = self.get_clf_pred(x)

        out[self.CLF] = clf_out
        out[self.CLASS_SCORES] = {P.KEY_CLASS_SCORES: clf_out}

        return out

    # Returns classifier predictions for a given input batch
    def get_clf_pred(self, x):
        if not self.clf_fitted:
            return torch.rand((len(x), self.NUM_CLASSES), device=P.DEVICE)
        return utils.dense2onehot(
            torch.tensor(self.clf.predict(self.nystroem.transform(x)),
                         device=P.DEVICE), self.NUM_CLASSES)

    # Set label info for current batch
    def set_teacher_signal(self, y):
        if y is not None and len(
                self.y
        ) < self.NUM_SAMPLES and self.training and not self.clf_fitted:
            self.y += y.tolist()
Beispiel #29
0
    def estimateAll(self, loggedData):
        numInstances = len(loggedData)
        targets = numpy.zeros(numInstances, order='C', dtype=numpy.float64)
        null_covariates = scipy.sparse.lil_matrix(
            (numInstances, self.numFeatures * self.rankingSize),
            dtype=numpy.float64)
        target_covariates = scipy.sparse.lil_matrix(
            (numInstances, self.numFeatures * self.rankingSize),
            dtype=numpy.float64)
        print("Starting to create covariates", flush=True)
        for j in range(numInstances):
            currentDatapoint = loggedData[j]

            targets[j] = currentDatapoint[2]

            currentQuery = currentDatapoint[0]
            currentRanking = currentDatapoint[1]
            newRanking = currentDatapoint[3]

            nullFeatures = self.loggingPolicy.dataset.features[currentQuery][
                currentRanking, :]
            nullFeatures.eliminate_zeros()

            targetFeatures = self.loggingPolicy.dataset.features[currentQuery][
                newRanking, :]
            targetFeatures.eliminate_zeros()

            null_covariates.data[j] = nullFeatures.data
            target_covariates.data[j] = targetFeatures.data
            nullIndices = nullFeatures.indices
            targetIndices = targetFeatures.indices
            for k in range(nullFeatures.shape[0]):
                nullIndices[nullFeatures.indptr[k]:nullFeatures.
                            indptr[k + 1]] += k * self.numFeatures
                targetIndices[targetFeatures.indptr[k]:targetFeatures.
                              indptr[k + 1]] += k * self.numFeatures

            null_covariates.rows[j] = nullIndices
            target_covariates.rows[j] = targetIndices

            if j % 1000 == 0:
                print(".", end='', flush=True)
            del currentDatapoint
            del nullFeatures
            del targetFeatures

        print("Converting covariates", flush=True)
        null_covariates = null_covariates.toarray()
        target_covariates = target_covariates.toarray()

        scaler = sklearn.preprocessing.MinMaxScaler()
        scaler.fit(null_covariates)

        s_null_covariates = scaler.transform(null_covariates)
        s_target_covariates = scaler.transform(target_covariates)
        print("Finished conversion", flush=True)

        print("Calculating heuristic kernel param", flush=True)

        random_indices = numpy.arange(
            0, s_null_covariates.shape[0])  # array of all indices
        numpy.random.shuffle(random_indices)  # shuffle the array

        sample_null_covar = s_null_covariates[
            random_indices[:2000]]  # get N samples without replacement

        sample_target_covar = s_target_covariates[
            random_indices[:2000]]  # get N samples without replacement

        recom_param = (0.5 * self.kernel_param) / numpy.median(
            pdist(numpy.vstack([sample_null_covar, sample_target_covar]),
                  'sqeuclidean'))

        print("Computing kernel matrix", flush=True)

        m = null_covariates.shape[0]
        n = target_covariates.shape[0]
        reg_params = self.reg_param / n

        if self.approx and m > self.p:
            p = self.p
            rets = []
            for i in range(self.n_approx):
                nystroem = Nystroem(gamma=recom_param, n_components=p)
                nystroem.fit(s_null_covariates)

                nullPhi = nystroem.transform(s_null_covariates)
                targetPhi = nystroem.transform(s_target_covariates)

                b = numpy.dot(targetPhi.T, numpy.repeat(1.0 / m, m, axis=0))
                A = nullPhi.T.dot(nullPhi) + numpy.diag(
                    numpy.repeat(p * reg_params, p))
                beta_vec_approx = nullPhi.dot(
                    scipy.sparse.linalg.cg(A, b, tol=1e-08, maxiter=5000)[0])

                ret = numpy.dot(beta_vec_approx,
                                targets) / beta_vec_approx.sum()
                rets.append(ret)

            return numpy.mean(rets)
        else:
            nullRecomMatrix = self.kernel(s_null_covariates, s_null_covariates,
                                          recom_param)
            targetRecomMatrix = self.kernel(s_null_covariates,
                                            s_target_covariates, recom_param)

            b = numpy.dot(targetRecomMatrix, numpy.repeat(1.0 / m, m, axis=0))
            A = nullRecomMatrix + numpy.diag(numpy.repeat(n * reg_params, n))

            print("Finding beta_vec", flush=True)
            beta_vec, _ = scipy.sparse.linalg.cg(A, b, tol=1e-08, maxiter=5000)

            return numpy.dot(beta_vec, targets) / beta_vec.sum()
Beispiel #30
0
from params import ts_depths,n_fea,gamma, np, sp
class Whitener:
    def __init__(self,X):
        self.Xmean = X.mean(0)
        self.Xstd = X.std(0)
    def whiten(self,Z):
        return (Z-self.Xmean)/self.Xstd
    def unwhiten(self,Zw):
        return Zw*self.Xstd + self.Xmean

def expkern(x,y):
    return np.exp(-gamma*la.norm(x-y))

wh = Whitener(ts_depths)
ts_depths_w = wh.whiten(ts_depths)
xx = np.linspace(ts_depths_w.min(),ts_depths_w.max(),n_fea)[:,np.newaxis]
rbf_tr = Nystroem(expkern,gamma,n_components=n_fea)
#rbf_tr = Nystroem(gamma=gamma,n_components=n_fea)
#class rbf_transformer:
#    def __init__(self,X,gamma):
#        self.X = X
#        self.gamma = gamma
#    def transform(self,xx):
#        return rbf_kernel(xx,self.X) 

#rbf_tr = rbf_transformer(xx,gamma)
#rbf_tr.fit(x)
rbf_tr.fit(xx)
ts_depths_tr = rbf_tr.transform(ts_depths_w)

Beispiel #31
0
    kccaScores = np.zeros((2, np.alen(nComponents)))
    for i, n in enumerate(nComponents):
        kccaScores[:, i] = util.classify(XtrainT[:, 0:n], XtestT[:, 0:n],
                                         labelsTrain, labelsTest)

    #%% Subsampling methods
    kpls = PLSRegression(n_components=150)
    nComponents = np.arange(173, 2173, 100)

    # Nystroem method
    elapTimeNys = np.zeros(np.shape(nComponents))
    kplsScoresNys = np.zeros((2, 3))
    for i, n in enumerate(nComponents):
        nys = Nystroem(n_components=n, gamma=gamma)
        nys.fit(Xtrain)
        ktrain = nys.transform(Xtrain)
        ktest = nys.transform(Xtest)
        startTime = timeit.default_timer()
        kpls.fit(ktrain, Ytrain)
        elapTimeNys[i] = timeit.default_timer() - startTime
        XtrainT = kpls.transform(ktrain)
        XtestT = kpls.transform(ktest)

        if n == 573:
            kplsScoresNys[:, 0] = util.classify(XtrainT, XtestT, labelsTrain,
                                                labelsTest)
        elif n == 1073:
            kplsScoresNys[:, 1] = util.classify(XtrainT, XtestT, labelsTrain,
                                                labelsTest)
        elif n == 1573:
            kplsScoresNys[:, 2] = util.classify(XtrainT, XtestT, labelsTrain,
Beispiel #32
0
        print "mean accuracy  :", np.mean(Acc)

    # train Nystroem linear-svm
    if 0:
        T1 = []
        T2 = []
        Acc = []
        for i in range(500):
            X_train, X_test, y_train, y_test = train_test_split(X, y)
            sampler = Nystroem(n_components=100)
            clf = svm.LinearSVC()
            t1 = time()
            X_train_new = sampler.fit_transform(X_train)
            clf.fit(X_train_new, y_train)
            t2 = time()
            X_test_new = sampler.transform(X_test)
            y_pred = clf.predict(X_test_new)
            print_stats('Nys-SVM {}'.format(i), t1, t2, time(), y_test, y_pred)
        print "mean train time:", np.mean(T1)
        print "mean test  time:", np.mean(T2)
        print "mean accuracy  :", np.mean(Acc)

    # train a linear-svm
    if 0:
        T1 = []
        T2 = []
        Acc = []
        for i in range(20):
            X_train, X_test, y_train, y_test = train_test_split(X, y)
            clf = svm.LinearSVC()
            t1 = time()
rbf2.fit(Xtr)

# the random state needs to be specified so that the test and train features are the same
clfa2 = svm.LinearSVC(C=C, loss='hinge')
clfa2.fit(rbf2.transform(Xtr), Ytr)

nys = Nystroem(kernel='rbf',
               gamma=gamma,
               coef0=1,
               degree=3,
               kernel_params=None,
               n_components=nc,
               random_state=None)
nys.fit(Xtr)
clf3 = svm.LinearSVC(C=C, loss='hinge')
clf3.fit(nys.transform(Xtr), Ytr)

clf = svm.SVC(kernel='rbf', C=C, gamma=gamma)
clf.fit(Xtr, Ytr)

plt.close('all')
npts = 50
xm, xM = np.min(Xtr), np.max(Xtr)
x = np.linspace(xm, xM, npts)
y = np.linspace(xm, xM, npts)
t = np.array(list(itertools.product(x, y)))


def plotit(z, title):
    plt.figure()
    z = np.reshape(z, (npts, npts))
Beispiel #34
0
class NystronemSampler(Transformer):
    def __init__(self,
                 kernel='rbf',
                 n_components=100,
                 gamma=1.0,
                 degree=3,
                 coef0=1,
                 random_state=None):
        super().__init__("nystronem_sampler", 15, random_state=random_state)
        self.input_type = [NUMERICAL, DISCRETE, CATEGORICAL]
        self.compound_mode = 'only_new'
        self.output_type = NUMERICAL

        self.kernel = kernel
        self.n_components = n_components
        self.gamma = gamma
        self.degree = degree
        self.coef0 = coef0
        self.random_state = random_state

    @ease_trans
    def operate(self, input_datanode, target_fields=None):
        X, y = input_datanode.data
        X_new = X[:, target_fields].astype(np.float64)

        # Because the pipeline guarantees that each feature is positive,
        # clip all values below zero to zero
        if self.kernel == 'chi2':
            if scipy.sparse.issparse(X_new):
                X_new.data[X_new.data < 0] = 0.0
            else:
                X_new[X_new < 0] = 0.0

        if not self.model:
            n_components = min(X.shape[0], self.n_components)

            self.gamma = float(self.gamma)
            self.degree = int(self.degree)
            self.coef0 = float(self.coef0)

            self.model = Nystroem(kernel=self.kernel,
                                  n_components=n_components,
                                  gamma=self.gamma,
                                  degree=self.degree,
                                  coef0=self.coef0,
                                  random_state=self.random_state)

            self.model.fit(X_new.astype(np.float64))

        _X = self.model.transform(X_new)

        return _X

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        if dataset_properties is not None and \
                (dataset_properties.get("sparse") is True or
                 dataset_properties.get("signed") is False):
            allow_chi2 = False
        else:
            allow_chi2 = True

        possible_kernels = ['poly', 'rbf', 'sigmoid', 'cosine']
        if allow_chi2:
            possible_kernels.append("chi2")
        kernel = CategoricalHyperparameter('kernel', possible_kernels, 'rbf')
        n_components = UniformIntegerHyperparameter("n_components",
                                                    10,
                                                    2000,
                                                    default_value=100,
                                                    log=True)
        gamma = UniformFloatHyperparameter("gamma",
                                           3.0517578125e-05,
                                           8,
                                           log=True,
                                           default_value=0.1)
        degree = UniformIntegerHyperparameter('degree', 2, 5, 3)
        coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0)

        cs = ConfigurationSpace()
        cs.add_hyperparameters([kernel, degree, gamma, coef0, n_components])

        degree_depends_on_poly = EqualsCondition(degree, kernel, "poly")
        coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"])

        gamma_kernels = ["poly", "rbf", "sigmoid"]
        if allow_chi2:
            gamma_kernels.append("chi2")
        gamma_condition = InCondition(gamma, kernel, gamma_kernels)
        cs.add_conditions(
            [degree_depends_on_poly, coef0_condition, gamma_condition])
        return cs
Beispiel #35
0
class ParametricModelApproximation(object):
    """Approximate a Gaussian Process by a parametric model.

    Approximating a Gaussian Process by a parametric model can be useful if
    one has to evaluate a sample function from the GP repeatedly or on many
    evaluation points as this would become computationally very expensive
    with a GP.

    Parameters
    ----------
    model : GaussianProcessRegressor
        The Gaussian Process which is to be approximated

    bounds: list of pair of floats
        The boundaries of the data space. This is used when determining the
        features of the parametric approximation (they are centered at random
        points in the data space)

    n_components: int
        The number of features/parameters of the parametric model

    seed: int
        The seed of the random number generator
    """
    def __init__(self, model, bounds, n_components, seed):
        self.gp = model
        self.bounds = bounds
        self.n_components = n_components
        self.rng = np.random.RandomState(seed)

        self.X_space = self.rng.uniform(self.bounds[:, 0], self.bounds[:, 1],
                                        (1000, self.bounds.shape[0]))

        assert self.gp.X_fit_.shape[1] == self.X_space.shape[1]

        self.kernel = self.gp.kernel_
        self.nystr = Nystroem(
            n_components=min(self.n_components, self.X_space.shape[0]),
            kernel='precomputed', random_state=self.rng)
        self.nystr.fit(self.kernel(self.X_space))

    def determine_coefs(self, X_query=None, y_query_samples=None, n_samples=1):
        """ Determine coefficients of parametric model.

        Simulate an evaluation at X_query with outcomes y_query_samples.
        Determine coefficients of parametric model the updated GP.

        Parameters
        ----------
        X_query : ndarray-like, default: None
            The query point at which an additional evaluation is simulated.
            If None, a parametric approximation of the unmodified GP is
            returned.

        y_query_samples: ndarray-like, default: None
            The possible outcomes of a query at X_query.

        n_samples: int
            The number of independent samples of model coefficients from the
            Bayesian posterior over model coefficients
        """
        if X_query is not None:
            X_query = np.asarray(X_query)
            X_queried = np.vstack((self.gp.X_fit_, X_query))
        else:
            X_queried = self.gp.X_fit_
            y_queried = self.gp.y_fit_

        Phi = self.nystr.transform(self.kernel(self.X_space, X_queried))
        A = Phi.T.dot(Phi) + self.gp.alpha * np.eye(Phi.shape[1])
        A_inv = np.linalg.inv(A)

        cov = self.gp.alpha * A_inv

        coefs = \
            np.empty((n_samples, self.n_components, y_query_samples.shape[0]))
        for i in range(y_query_samples.shape[0]): # XXX: Vectorize
            y_queried = np.hstack((self.gp.y_fit_, y_query_samples[i]))
            mean = A_inv.dot(Phi.T).dot(y_queried)
            coefs[:, :, i] = self.rng.multivariate_normal(mean, cov, n_samples)
        return np.array(coefs)

    def __call__(self, X, coefs):
        """ Evaluate parametric model at X for the given sampled coefficients.

        Parameters
        ----------
        X : ndarray-like
            The points at which the parametric model is to be evaluated

        coefs: ndarray-like
            The coefficients of the parametric model.
        """
        X = np.atleast_2d(X)

        Phi = self.nystr.transform(self.kernel(self.X_space, X))
        f = Phi.dot(coefs)
        return f