예제 #1
0
    def predict(self, X):
        """
        Make predictions on the new samplets in X.

        For an one-class model, +1 or -1 is returned.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            For kernel="precomputed", the expected shape of X is
            [n_samples_test, n_samples_train]

        Returns
        -------
        y_pred : array, shape (n_samples,)
            Class labels for samples in X.
        """

        if not hasattr(self, '_km'):
            raise NotFittedError("Can't predict. Not fitted yet. Run .fit() first!")

        test_X = check_array(X)

        # this is a fresh new KM
        self._km = KernelMatrix(self.k_func, name='test_km',
                                normalized=self.normalized)

        # sample_one must be test data to get the right shape for sklearn X
        self._km.attach_to(sample_one=test_X, sample_two=self._train_X)

        predicted_y = self._estimator.predict(self._km.full)

        return np.asarray(predicted_y, dtype=self._train_y.dtype)
예제 #2
0
def test_size_property_mismatch():

    ks = KernelSet(num_samples=sample_data.shape[0] + 1)
    lin = KernelMatrix(LinearKernel(skip_input_checks=True))
    lin.attach_to(sample_data)
    with raises(KMSetAdditionError):
        ks.append(lin)
예제 #3
0
def _test_func_is_valid_kernel(kernel, sample_dim, num_samples):
    """A func is a valid kernel if the kernel matrix generated by it is PSD.

    Not including this in tests for all kernels to allow for non-PSD kernels in the future

    """

    KM = KernelMatrix(kernel, name='TestKM')
    KM.attach_to(gen_random_sample(num_samples, sample_dim))
    is_psd = is_positive_semidefinite(KM.full, verbose=True)
    if not is_psd:
        raise ValueError('{} is not PSD'.format(str(KM)))
예제 #4
0
    def fit(self, X, y, sample_weight=None):
        """Fit the chosen Estimator based on the user-defined kernel.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.

        y : array-like, shape (n_samples,)
            Target values (class labels in classification, real numbers in
            regression)

        sample_weight : array-like, shape (n_samples,)
            Per-sample weights. Rescale C per sample. Higher weights
            force the classifier to put more emphasis on these points.

        Returns
        -------
        self : object

        Notes
        ------
        If X and y are not C-ordered and contiguous arrays of np.float64 and
        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.

        If X is a dense array, then the other methods will not support sparse
        matrices as input.

        """

        if is_regressor(self):
            self._train_X, self._train_y = check_X_y(X, y, y_numeric=True)
            self._train_y = self._train_y.astype(np.float_)
        else:
            self._train_X, self._train_y = check_X_y(X, y)

        self._km = KernelMatrix(self.k_func,
                                name='train_km',
                                normalized=self.normalized)
        self._km.attach_to(self._train_X)

        self._estimator, self.param_grid = get_estimator(self.learner_id)
        self._estimator.fit(X=self._km.full,
                            y=self._train_y,
                            sample_weight=sample_weight)

        if is_classifier(self):
            self.classes_ = self._estimator.classes_

        return self
예제 #5
0
def test_KernelMatrix_design():

    with raises(TypeError):
        km = KernelMatrix(kernel=simple_callable)

    with raises(TypeError):
        km = KernelMatrix(kernel=LinearKernel, normalized='True')

    assert len(km_lin) == num_samples**2

    colon_access = km_lin[:, :]
    if colon_access.size != km_lin.size:
        raise ValueError('error in getitem implementation when using [:, :]')

    _ = km_lin[1, :]
    _ = km_lin[:, 1]
    for invalid_index in (-1, np.Inf, np.NaN):
        with raises(KMAccessError):
            _ = km_lin[:, invalid_index]
예제 #6
0
    def add_parametrized_kernels(self, kernel_func, param, values):
        """
        Adds a list of kernels parametrized by various values for a given param

        Parameters
        ----------
        kernel_func : BaseKernelFunction
            Kernel function to be added (not an instance, but callable class)

        param : str
            Name of the parameter to the above kernel function

        values : Iterable
            List of parameter values. One kernel will be added for each value

        """

        if (not isinstance(kernel_func, type)) or \
            (not issubclass(kernel_func, BaseKernelFunction)):
            raise KernelMethodsException(
                'Input {} is not a valid kernel func!'
                ' Must be derived from BaseKernelFunction'
                ''.format(kernel_func))

        if values is None:
            # warn('No values provided for {}. Doing nothing!'.format(param))
            return

        if not is_iterable_but_not_str(values, min_length=1):
            raise ValueError(
                'values must be an iterable set of param values (n>=1)')

        for val in values:
            try:
                param_dict = {
                    param: val,
                    'skip_input_checks': self._skip_input_checks
                }
                self.append(
                    KernelMatrix(kernel_func(**param_dict),
                                 normalized=self._norm_kernels))
            except:
                warn(
                    'Unable to add {} to the bucket for {}={}. Skipping it.'
                    ''.format(kernel_func, param, val), KernelMethodsWarning)
예제 #7
0
def test_alignment_centered():
    km1 = KernelMatrix(kernel=LinearKernel())
    km1.attach_to(gen_random_sample(num_samples, sample_dim))

    km2 = KernelMatrix(kernel=LinearKernel())
    km2.attach_to(gen_random_sample(num_samples, sample_dim))

    km3_bad_size = KernelMatrix(kernel=LinearKernel())
    km3_bad_size.attach_to(gen_random_sample(num_samples + 2, sample_dim))

    with raises(ValueError):
        alignment_centered(km1.full, km3_bad_size.full)

    # bad type : must be ndarray
    with raises(TypeError):
        alignment_centered(km1, km2.full)

    # bad type : must be ndarray
    with raises(TypeError):
        alignment_centered(km1.full, km2)

    for flag in (True, False):
        _ = alignment_centered(km1.full, km2.full, centered_already=flag)

    with raises(ValueError):
        _ = alignment_centered(np.zeros((10, 10)),
                               randn(10, 10),
                               value_if_zero_division='raise')

    return_val_requested = 'random_set_value'
    with warns(UserWarning):
        ret_value = alignment_centered(
            randn(10, 10),
            np.zeros((10, 10)),
            value_if_zero_division=return_val_requested)
    if ret_value != return_val_requested:
        raise ValueError('Not returning the value requested in case of error!')
예제 #8
0
from kernelmethods.base import KMSetAdditionError, KernelMatrix, KernelSet, \
    BaseKernelFunction
from kernelmethods.numeric_kernels import GaussianKernel, LinearKernel, PolyKernel
from kernelmethods.sampling import make_kernel_bucket

num_samples = 50  # 9
sample_dim = 3  # 2
target_label_set = [1, 2]

sample_data = np.random.rand(num_samples, sample_dim)
target_labels = np.random.choice(target_label_set, (num_samples, 1))

IdealKM = target_labels.dot(target_labels.T)

rbf = KernelMatrix(GaussianKernel(sigma=10, skip_input_checks=True))
lin = KernelMatrix(LinearKernel(skip_input_checks=True))
poly = KernelMatrix(PolyKernel(degree=2, skip_input_checks=True))

# lin.attach_to(sample_data)
# rbf.attach_to(sample_data)
# poly.attach_to(sample_data)

kset = KernelSet([lin, poly, rbf])
print(kset)


def test_creation():

    try:
        ks = KernelSet()
예제 #9
0
class BaseKernelMachine(BaseEstimator):
    """Generic class to return a drop-in sklearn estimator.

    Parameters
    ----------
    k_func : KernelFunction
        The kernel function the kernel machine bases itself on

    learner_id : str
        Identifier for the estimator to be built based on the kernel function.
        Options: ``SVC`` and ``SVR``.
        Default: ``SVC`` (classifier version of SVM)

    normalized : flag
        Flag to indicate whether to keep the kernel matrix normalized
        Default: False

    """


    def __init__(self,
                 k_func=GaussianKernel(),
                 learner_id='SVC',
                 normalized=False):
        """
        Constructor for the KernelMachine class.

        Parameters
        ----------
        k_func : KernelFunction
            The kernel function the kernel machine bases itself on

        learner_id : str
            Identifier for the estimator to be built based on the kernel function.
            Options: ``SVC`` and ``SVR``.
            Default: ``SVC`` (classifier version of SVM)

        normalized : flag
            Flag to indicate whether to keep the kernel matrix normalized.
            Default: False
        """

        self.k_func = k_func
        self.learner_id = learner_id
        self.normalized = normalized
        self._estimator, self.param_grid = get_estimator(self.learner_id)


    def fit(self, X, y, sample_weight=None):
        """Fit the chosen Estimator based on the user-defined kernel.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training vectors, where n_samples is the number of samples
            and n_features is the number of features.
            For kernel="precomputed", the expected shape of X is
            (n_samples, n_samples).

        y : array-like, shape (n_samples,)
            Target values (class labels in classification, real numbers in
            regression)

        sample_weight : array-like, shape (n_samples,)
            Per-sample weights. Rescale C per sample. Higher weights
            force the classifier to put more emphasis on these points.

        Returns
        -------
        self : object

        Notes
        ------
        If X and y are not C-ordered and contiguous arrays of np.float64 and
        X is not a scipy.sparse.csr_matrix, X and/or y may be copied.

        If X is a dense array, then the other methods will not support sparse
        matrices as input.

        """

        if is_regressor(self):
            self._train_X, self._train_y = check_X_y(X, y, y_numeric=True)
            self._train_y = self._train_y.astype(np.float_)
        else:
            self._train_X, self._train_y = check_X_y(X, y)

        self._km = KernelMatrix(self.k_func, name='train_km',
                                normalized=self.normalized)
        self._km.attach_to(self._train_X)

        self._estimator.fit(X=self._km.full, y=self._train_y,
                            sample_weight=sample_weight)

        if is_classifier(self):
            self.classes_ = self._estimator.classes_

        return self


    def predict(self, X):
        """
        Make predictions on the new samplets in X.

        For an one-class model, +1 or -1 is returned.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            For kernel="precomputed", the expected shape of X is
            [n_samples_test, n_samples_train]

        Returns
        -------
        y_pred : array, shape (n_samples,)
            Class labels for samples in X.
        """

        if not hasattr(self, '_km'):
            raise NotFittedError("Can't predict. Not fitted yet. Run .fit() first!")

        test_X = check_array(X)

        # this is a fresh new KM
        self._km = KernelMatrix(self.k_func, name='test_km',
                                normalized=self.normalized)

        # sample_one must be test data to get the right shape for sklearn X
        self._km.attach_to(sample_one=test_X, sample_two=self._train_X)

        predicted_y = self._estimator.predict(self._km.full)

        return np.asarray(predicted_y, dtype=self._train_y.dtype)


    def get_params(self, deep=True):
        """returns all the relevant parameters for this estimator!"""

        return {'k_func'    : self.k_func,
                'normalized': self.normalized,
                'learner_id': self.learner_id}


    def set_params(self, **parameters):
        """Param setter"""

        for parameter, value in parameters.items():
            if parameter in ('k_func', 'learner_id', 'normalized'):
                setattr(self, parameter, value)

        return self
예제 #10
0
def test_normalize():

    km = KernelMatrix(kernel=LinearKernel())
    km.attach_to(gen_random_sample(num_samples, sample_dim))
    km.normalize()
예제 #11
0
def test_centering():

    km = KernelMatrix(kernel=LinearKernel())
    km.attach_to(gen_random_sample(num_samples, sample_dim))
    km.center()
예제 #12
0
def gen_random_array(dim):
    """To better control precision and type of floats"""

    # TODO input sparse arrays for test
    return np.random.rand(dim)


def gen_random_sample(num_samples, sample_dim):
    """To better control precision and type of floats"""

    # TODO input sparse arrays for test
    return np.random.rand(num_samples, sample_dim)


km_lin = KernelMatrix(kernel=LinearKernel())
km_lin.attach_to(gen_random_sample(num_samples, sample_dim))


def simple_callable(x, y):
    return np.dot(x, y)


def test_kernel_from_callable():

    kf = KernelFromCallable(simple_callable)
    if not isinstance(kf, BaseKernelFunction):
        raise TypeError('Error in implementation of KernelFromCallable')

    _test_for_all_kernels(kf, 5)
예제 #13
0
    def __init__(
        self,
        poly_degree_values=cfg.default_degree_values_poly_kernel,
        rbf_sigma_values=cfg.default_sigma_values_gaussian_kernel,
        laplace_gamma_values=cfg.default_gamma_values_laplacian_kernel,
        sigmoid_gamma_values=cfg.default_gamma_values_sigmoid_kernel,
        sigmoid_offset_values=cfg.default_offset_values_sigmoid_kernel,
        name='KernelBucket',
        normalize_kernels=True,
        skip_input_checks=False,
    ):
        """
        Constructor.

        Parameters
        ----------
        poly_degree_values : Iterable
            List of values for the degree parameter of the PolyKernel. One
            KernelMatrix will be added to the bucket for each value.

        rbf_sigma_values : Iterable
            List of values for the sigma parameter of the GaussianKernel. One
            KernelMatrix will be added to the bucket for each value.

        laplace_gamma_values : Iterable
            List of values for the gamma parameter of the LaplacianKernel. One
            KernelMatrix will be added to the bucket for each value.

        sigmoid_gamma_values : Iterable
            List of values for the gamma parameter of the SigmoidKernel. One
            KernelMatrix will be added to the bucket for each value.

        sigmoid_offset_values : Iterable
            List of values for the offset parameter of the SigmoidKernel. One
            KernelMatrix will be added to the bucket for each value.

        name : str
            String to identify the purpose or type of the bucket of kernels.
            Also helps easily distinguishing it from other buckets.

        normalize_kernels : bool
            Flag to indicate whether the kernel matrices need to be normalized

        skip_input_checks : bool
            Flag to indicate whether checks on input data (type, format etc) can
            be skipped. This helps save a tiny bit of runtime for expert uses when
            data types and formats are managed thoroughly in numpy. Default:
            False. Disable this only when you know exactly what you're doing!

        """

        if isinstance(normalize_kernels, bool):
            self._norm_kernels = normalize_kernels
        else:
            raise TypeError('normalize_kernels must be bool')

        if isinstance(skip_input_checks, bool):
            self._skip_input_checks = skip_input_checks
        else:
            raise TypeError('skip_input_checks must be bool')

        # start with the addition of kernel matrix for linear kernel
        init_kset = [
            KernelMatrix(LinearKernel(), normalized=self._norm_kernels),
        ]
        super().__init__(km_list=init_kset, name=name)
        # not attached to a sample yet
        self._num_samples = None

        self.add_parametrized_kernels(PolyKernel, 'degree', poly_degree_values)
        self.add_parametrized_kernels(GaussianKernel, 'sigma',
                                      rbf_sigma_values)
        self.add_parametrized_kernels(LaplacianKernel, 'gamma',
                                      laplace_gamma_values)
        self.add_parametrized_kernels(SigmoidKernel, 'gamma',
                                      sigmoid_gamma_values)
        self.add_parametrized_kernels(SigmoidKernel, 'offset',
                                      sigmoid_offset_values)