def predict(self, X): """ Make predictions on the new samplets in X. For an one-class model, +1 or -1 is returned. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) For kernel="precomputed", the expected shape of X is [n_samples_test, n_samples_train] Returns ------- y_pred : array, shape (n_samples,) Class labels for samples in X. """ if not hasattr(self, '_km'): raise NotFittedError("Can't predict. Not fitted yet. Run .fit() first!") test_X = check_array(X) # this is a fresh new KM self._km = KernelMatrix(self.k_func, name='test_km', normalized=self.normalized) # sample_one must be test data to get the right shape for sklearn X self._km.attach_to(sample_one=test_X, sample_two=self._train_X) predicted_y = self._estimator.predict(self._km.full) return np.asarray(predicted_y, dtype=self._train_y.dtype)
def test_size_property_mismatch(): ks = KernelSet(num_samples=sample_data.shape[0] + 1) lin = KernelMatrix(LinearKernel(skip_input_checks=True)) lin.attach_to(sample_data) with raises(KMSetAdditionError): ks.append(lin)
def _test_func_is_valid_kernel(kernel, sample_dim, num_samples): """A func is a valid kernel if the kernel matrix generated by it is PSD. Not including this in tests for all kernels to allow for non-PSD kernels in the future """ KM = KernelMatrix(kernel, name='TestKM') KM.attach_to(gen_random_sample(num_samples, sample_dim)) is_psd = is_positive_semidefinite(KM.full, verbose=True) if not is_psd: raise ValueError('{} is not PSD'.format(str(KM)))
def fit(self, X, y, sample_weight=None): """Fit the chosen Estimator based on the user-defined kernel. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) Target values (class labels in classification, real numbers in regression) sample_weight : array-like, shape (n_samples,) Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points. Returns ------- self : object Notes ------ If X and y are not C-ordered and contiguous arrays of np.float64 and X is not a scipy.sparse.csr_matrix, X and/or y may be copied. If X is a dense array, then the other methods will not support sparse matrices as input. """ if is_regressor(self): self._train_X, self._train_y = check_X_y(X, y, y_numeric=True) self._train_y = self._train_y.astype(np.float_) else: self._train_X, self._train_y = check_X_y(X, y) self._km = KernelMatrix(self.k_func, name='train_km', normalized=self.normalized) self._km.attach_to(self._train_X) self._estimator, self.param_grid = get_estimator(self.learner_id) self._estimator.fit(X=self._km.full, y=self._train_y, sample_weight=sample_weight) if is_classifier(self): self.classes_ = self._estimator.classes_ return self
def test_KernelMatrix_design(): with raises(TypeError): km = KernelMatrix(kernel=simple_callable) with raises(TypeError): km = KernelMatrix(kernel=LinearKernel, normalized='True') assert len(km_lin) == num_samples**2 colon_access = km_lin[:, :] if colon_access.size != km_lin.size: raise ValueError('error in getitem implementation when using [:, :]') _ = km_lin[1, :] _ = km_lin[:, 1] for invalid_index in (-1, np.Inf, np.NaN): with raises(KMAccessError): _ = km_lin[:, invalid_index]
def add_parametrized_kernels(self, kernel_func, param, values): """ Adds a list of kernels parametrized by various values for a given param Parameters ---------- kernel_func : BaseKernelFunction Kernel function to be added (not an instance, but callable class) param : str Name of the parameter to the above kernel function values : Iterable List of parameter values. One kernel will be added for each value """ if (not isinstance(kernel_func, type)) or \ (not issubclass(kernel_func, BaseKernelFunction)): raise KernelMethodsException( 'Input {} is not a valid kernel func!' ' Must be derived from BaseKernelFunction' ''.format(kernel_func)) if values is None: # warn('No values provided for {}. Doing nothing!'.format(param)) return if not is_iterable_but_not_str(values, min_length=1): raise ValueError( 'values must be an iterable set of param values (n>=1)') for val in values: try: param_dict = { param: val, 'skip_input_checks': self._skip_input_checks } self.append( KernelMatrix(kernel_func(**param_dict), normalized=self._norm_kernels)) except: warn( 'Unable to add {} to the bucket for {}={}. Skipping it.' ''.format(kernel_func, param, val), KernelMethodsWarning)
def test_alignment_centered(): km1 = KernelMatrix(kernel=LinearKernel()) km1.attach_to(gen_random_sample(num_samples, sample_dim)) km2 = KernelMatrix(kernel=LinearKernel()) km2.attach_to(gen_random_sample(num_samples, sample_dim)) km3_bad_size = KernelMatrix(kernel=LinearKernel()) km3_bad_size.attach_to(gen_random_sample(num_samples + 2, sample_dim)) with raises(ValueError): alignment_centered(km1.full, km3_bad_size.full) # bad type : must be ndarray with raises(TypeError): alignment_centered(km1, km2.full) # bad type : must be ndarray with raises(TypeError): alignment_centered(km1.full, km2) for flag in (True, False): _ = alignment_centered(km1.full, km2.full, centered_already=flag) with raises(ValueError): _ = alignment_centered(np.zeros((10, 10)), randn(10, 10), value_if_zero_division='raise') return_val_requested = 'random_set_value' with warns(UserWarning): ret_value = alignment_centered( randn(10, 10), np.zeros((10, 10)), value_if_zero_division=return_val_requested) if ret_value != return_val_requested: raise ValueError('Not returning the value requested in case of error!')
from kernelmethods.base import KMSetAdditionError, KernelMatrix, KernelSet, \ BaseKernelFunction from kernelmethods.numeric_kernels import GaussianKernel, LinearKernel, PolyKernel from kernelmethods.sampling import make_kernel_bucket num_samples = 50 # 9 sample_dim = 3 # 2 target_label_set = [1, 2] sample_data = np.random.rand(num_samples, sample_dim) target_labels = np.random.choice(target_label_set, (num_samples, 1)) IdealKM = target_labels.dot(target_labels.T) rbf = KernelMatrix(GaussianKernel(sigma=10, skip_input_checks=True)) lin = KernelMatrix(LinearKernel(skip_input_checks=True)) poly = KernelMatrix(PolyKernel(degree=2, skip_input_checks=True)) # lin.attach_to(sample_data) # rbf.attach_to(sample_data) # poly.attach_to(sample_data) kset = KernelSet([lin, poly, rbf]) print(kset) def test_creation(): try: ks = KernelSet()
class BaseKernelMachine(BaseEstimator): """Generic class to return a drop-in sklearn estimator. Parameters ---------- k_func : KernelFunction The kernel function the kernel machine bases itself on learner_id : str Identifier for the estimator to be built based on the kernel function. Options: ``SVC`` and ``SVR``. Default: ``SVC`` (classifier version of SVM) normalized : flag Flag to indicate whether to keep the kernel matrix normalized Default: False """ def __init__(self, k_func=GaussianKernel(), learner_id='SVC', normalized=False): """ Constructor for the KernelMachine class. Parameters ---------- k_func : KernelFunction The kernel function the kernel machine bases itself on learner_id : str Identifier for the estimator to be built based on the kernel function. Options: ``SVC`` and ``SVR``. Default: ``SVC`` (classifier version of SVM) normalized : flag Flag to indicate whether to keep the kernel matrix normalized. Default: False """ self.k_func = k_func self.learner_id = learner_id self.normalized = normalized self._estimator, self.param_grid = get_estimator(self.learner_id) def fit(self, X, y, sample_weight=None): """Fit the chosen Estimator based on the user-defined kernel. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of features. For kernel="precomputed", the expected shape of X is (n_samples, n_samples). y : array-like, shape (n_samples,) Target values (class labels in classification, real numbers in regression) sample_weight : array-like, shape (n_samples,) Per-sample weights. Rescale C per sample. Higher weights force the classifier to put more emphasis on these points. Returns ------- self : object Notes ------ If X and y are not C-ordered and contiguous arrays of np.float64 and X is not a scipy.sparse.csr_matrix, X and/or y may be copied. If X is a dense array, then the other methods will not support sparse matrices as input. """ if is_regressor(self): self._train_X, self._train_y = check_X_y(X, y, y_numeric=True) self._train_y = self._train_y.astype(np.float_) else: self._train_X, self._train_y = check_X_y(X, y) self._km = KernelMatrix(self.k_func, name='train_km', normalized=self.normalized) self._km.attach_to(self._train_X) self._estimator.fit(X=self._km.full, y=self._train_y, sample_weight=sample_weight) if is_classifier(self): self.classes_ = self._estimator.classes_ return self def predict(self, X): """ Make predictions on the new samplets in X. For an one-class model, +1 or -1 is returned. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) For kernel="precomputed", the expected shape of X is [n_samples_test, n_samples_train] Returns ------- y_pred : array, shape (n_samples,) Class labels for samples in X. """ if not hasattr(self, '_km'): raise NotFittedError("Can't predict. Not fitted yet. Run .fit() first!") test_X = check_array(X) # this is a fresh new KM self._km = KernelMatrix(self.k_func, name='test_km', normalized=self.normalized) # sample_one must be test data to get the right shape for sklearn X self._km.attach_to(sample_one=test_X, sample_two=self._train_X) predicted_y = self._estimator.predict(self._km.full) return np.asarray(predicted_y, dtype=self._train_y.dtype) def get_params(self, deep=True): """returns all the relevant parameters for this estimator!""" return {'k_func' : self.k_func, 'normalized': self.normalized, 'learner_id': self.learner_id} def set_params(self, **parameters): """Param setter""" for parameter, value in parameters.items(): if parameter in ('k_func', 'learner_id', 'normalized'): setattr(self, parameter, value) return self
def test_normalize(): km = KernelMatrix(kernel=LinearKernel()) km.attach_to(gen_random_sample(num_samples, sample_dim)) km.normalize()
def test_centering(): km = KernelMatrix(kernel=LinearKernel()) km.attach_to(gen_random_sample(num_samples, sample_dim)) km.center()
def gen_random_array(dim): """To better control precision and type of floats""" # TODO input sparse arrays for test return np.random.rand(dim) def gen_random_sample(num_samples, sample_dim): """To better control precision and type of floats""" # TODO input sparse arrays for test return np.random.rand(num_samples, sample_dim) km_lin = KernelMatrix(kernel=LinearKernel()) km_lin.attach_to(gen_random_sample(num_samples, sample_dim)) def simple_callable(x, y): return np.dot(x, y) def test_kernel_from_callable(): kf = KernelFromCallable(simple_callable) if not isinstance(kf, BaseKernelFunction): raise TypeError('Error in implementation of KernelFromCallable') _test_for_all_kernels(kf, 5)
def __init__( self, poly_degree_values=cfg.default_degree_values_poly_kernel, rbf_sigma_values=cfg.default_sigma_values_gaussian_kernel, laplace_gamma_values=cfg.default_gamma_values_laplacian_kernel, sigmoid_gamma_values=cfg.default_gamma_values_sigmoid_kernel, sigmoid_offset_values=cfg.default_offset_values_sigmoid_kernel, name='KernelBucket', normalize_kernels=True, skip_input_checks=False, ): """ Constructor. Parameters ---------- poly_degree_values : Iterable List of values for the degree parameter of the PolyKernel. One KernelMatrix will be added to the bucket for each value. rbf_sigma_values : Iterable List of values for the sigma parameter of the GaussianKernel. One KernelMatrix will be added to the bucket for each value. laplace_gamma_values : Iterable List of values for the gamma parameter of the LaplacianKernel. One KernelMatrix will be added to the bucket for each value. sigmoid_gamma_values : Iterable List of values for the gamma parameter of the SigmoidKernel. One KernelMatrix will be added to the bucket for each value. sigmoid_offset_values : Iterable List of values for the offset parameter of the SigmoidKernel. One KernelMatrix will be added to the bucket for each value. name : str String to identify the purpose or type of the bucket of kernels. Also helps easily distinguishing it from other buckets. normalize_kernels : bool Flag to indicate whether the kernel matrices need to be normalized skip_input_checks : bool Flag to indicate whether checks on input data (type, format etc) can be skipped. This helps save a tiny bit of runtime for expert uses when data types and formats are managed thoroughly in numpy. Default: False. Disable this only when you know exactly what you're doing! """ if isinstance(normalize_kernels, bool): self._norm_kernels = normalize_kernels else: raise TypeError('normalize_kernels must be bool') if isinstance(skip_input_checks, bool): self._skip_input_checks = skip_input_checks else: raise TypeError('skip_input_checks must be bool') # start with the addition of kernel matrix for linear kernel init_kset = [ KernelMatrix(LinearKernel(), normalized=self._norm_kernels), ] super().__init__(km_list=init_kset, name=name) # not attached to a sample yet self._num_samples = None self.add_parametrized_kernels(PolyKernel, 'degree', poly_degree_values) self.add_parametrized_kernels(GaussianKernel, 'sigma', rbf_sigma_values) self.add_parametrized_kernels(LaplacianKernel, 'gamma', laplace_gamma_values) self.add_parametrized_kernels(SigmoidKernel, 'gamma', sigmoid_gamma_values) self.add_parametrized_kernels(SigmoidKernel, 'offset', sigmoid_offset_values)