def test_multinomial_loss(): # test if the multinomial loss and gradient computations are consistent X, y = iris.data, iris.target.astype(np.float64) n_samples, n_features = X.shape n_classes = len(np.unique(y)) rng = check_random_state(42) weights = rng.randn(n_features, n_classes) intercept = rng.randn(n_classes) sample_weights = rng.randn(n_samples) np.abs(sample_weights, sample_weights) # compute loss and gradient like in multinomial SAG dataset, _ = make_dataset(X, y, sample_weights, random_state=42) loss_1, grad_1 = _multinomial_grad_loss_all_samples( dataset, weights, intercept, n_samples, n_features, n_classes) # compute loss and gradient like in multinomial LogisticRegression lbin = LabelBinarizer() Y_bin = lbin.fit_transform(y) weights_intercept = np.vstack((weights, intercept)).T.ravel() loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin, 0.0, sample_weights) grad_2 = grad_2.reshape(n_classes, -1) grad_2 = grad_2[:, :-1].T # comparison assert_array_almost_equal(grad_1, grad_2) assert_almost_equal(loss_1, loss_2)
def test_multinomial_loss(): # test if the multinomial loss and gradient computations are consistent X, y = iris.data, iris.target.astype(np.float64) n_samples, n_features = X.shape n_classes = len(np.unique(y)) rng = check_random_state(42) weights = rng.randn(n_features, n_classes) intercept = rng.randn(n_classes) sample_weights = rng.randn(n_samples) np.abs(sample_weights, sample_weights) # compute loss and gradient like in multinomial SAG dataset, _ = make_dataset(X, y, sample_weights, random_state=42) loss_1, grad_1 = _multinomial_grad_loss_all_samples(dataset, weights, intercept, n_samples, n_features, n_classes) # compute loss and gradient like in multinomial LogisticRegression lbin = LabelBinarizer() Y_bin = lbin.fit_transform(y) weights_intercept = np.vstack((weights, intercept)).T.ravel() loss_2, grad_2, _ = _multinomial_loss_grad(weights_intercept, X, Y_bin, 0.0, sample_weights) grad_2 = grad_2.reshape(n_classes, -1) grad_2 = grad_2[:, :-1].T # comparison assert_array_almost_equal(grad_1, grad_2) assert_almost_equal(loss_1, loss_2)
def test_fused_types_make_dataset(): iris = load_iris() X_32 = iris.data.astype(np.float32) y_32 = iris.target.astype(np.float32) X_csr_32 = sparse.csr_matrix(X_32) sample_weight_32 = np.arange(y_32.size, dtype=np.float32) X_64 = iris.data.astype(np.float64) y_64 = iris.target.astype(np.float64) X_csr_64 = sparse.csr_matrix(X_64) sample_weight_64 = np.arange(y_64.size, dtype=np.float64) # array dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32) dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64) xi_32, yi_32, _, _ = dataset_32._next_py() xi_64, yi_64, _, _ = dataset_64._next_py() xi_data_32, _, _ = xi_32 xi_data_64, _, _ = xi_64 assert_equal(xi_data_32.dtype, np.float32) assert_equal(xi_data_64.dtype, np.float64) assert isinstance(yi_32, float) assert isinstance(yi_64, float) # assert_array_almost_equal(yi_64, yi_32, decimal=5) # csr datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32) datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64) xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py() xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py() xicsr_data_32, _, _ = xicsr_32 xicsr_data_64, _, _ = xicsr_64 assert_equal(xicsr_data_32.dtype, np.float32) assert_equal(xicsr_data_64.dtype, np.float64) assert isinstance(yicsr_32, float) assert isinstance(yicsr_64, float) assert_array_almost_equal(xicsr_data_64, xicsr_data_32, decimal=5) assert_array_almost_equal(yicsr_64, yicsr_32, decimal=5) assert_array_equal(xi_data_32, xicsr_data_32) assert_array_equal(xi_data_64, xicsr_data_64) assert_array_equal(yi_32, yicsr_32) assert_array_equal(yi_64, yicsr_64)
def test_fused_types_make_dataset(): iris = load_iris() X_32 = iris.data.astype(np.float32) y_32 = iris.target.astype(np.float32) X_csr_32 = sparse.csr_matrix(X_32) sample_weight_32 = np.arange(y_32.size, dtype=np.float32) X_64 = iris.data.astype(np.float64) y_64 = iris.target.astype(np.float64) X_csr_64 = sparse.csr_matrix(X_64) sample_weight_64 = np.arange(y_64.size, dtype=np.float64) # array dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32) dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64) xi_32, yi_32, _, _ = dataset_32._next_py() xi_64, yi_64, _, _ = dataset_64._next_py() xi_data_32, _, _ = xi_32 xi_data_64, _, _ = xi_64 assert xi_data_32.dtype == np.float32 assert xi_data_64.dtype == np.float64 assert_allclose(yi_64, yi_32, rtol=rtol) # csr datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32) datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64) xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py() xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py() xicsr_data_32, _, _ = xicsr_32 xicsr_data_64, _, _ = xicsr_64 assert xicsr_data_32.dtype == np.float32 assert xicsr_data_64.dtype == np.float64 assert_allclose(xicsr_data_64, xicsr_data_32, rtol=rtol) assert_allclose(yicsr_64, yicsr_32, rtol=rtol) assert_array_equal(xi_data_32, xicsr_data_32) assert_array_equal(xi_data_64, xicsr_data_64) assert_array_equal(yi_32, yicsr_32) assert_array_equal(yi_64, yicsr_64)
def _fit_regressor(self, X, y, alpha, C, loss, learning_rate, sample_weight, n_iter): dataset, intercept_decay = make_dataset(X, y, sample_weight) self.coef_ = np.zeros((3,), dtype=np.float64, order="C") loss_function = self._get_loss_function(loss) penalty_type = self._get_penalty_type(self.penalty) learning_rate_type = self._get_learning_rate_type(learning_rate) if self.t_ is None: self.t_ = 1.0 random_state = check_random_state(self.random_state) # numpy mtrand expects a C long which is a signed 32 bit integer under # Windows seed = random_state.randint(0, np.iinfo(np.int32).max) if self.average > 0: self.standard_coef_, self.standard_intercept_, \ self.average_coef_, self.average_intercept_ = \ average_sgd(self.standard_coef_, self.standard_intercept_[0], self.average_coef_, self.average_intercept_[0], loss_function, penalty_type, alpha, C, self.l1_ratio, dataset, n_iter, int(self.fit_intercept), int(self.verbose), int(self.shuffle), seed, 1.0, 1.0, learning_rate_type, self.eta0, self.power_t, self.t_, intercept_decay, self.average) self.average_intercept_ = np.atleast_1d(self.average_intercept_) self.standard_intercept_ = np.atleast_1d(self.standard_intercept_) self.t_ += n_iter * X.shape[0] if self.average <= self.t_ - 1.0: self.coef_ = self.average_coef_ self.intercept_ = self.average_intercept_ else: self.coef_ = self.standard_coef_ self.intercept_ = self.standard_intercept_ else: self.coef_, self.intercept_ = \ self.parallelizer(self.coef_, 0.0, loss_function, penalty_type, alpha, C, self.l1_ratio, dataset, n_iter, int(self.fit_intercept), int(self.verbose), int(self.shuffle), seed, 1.0, 1.0, learning_rate_type, self.eta0, self.power_t, self.t_, intercept_decay) print(self.coef_) print(self.intercept_) self.t_ += n_iter * X.shape[0] self.intercept_ = np.atleast_1d(self.intercept_)
def _fit_regressor(self, X, y, alpha, C, loss, learning_rate, sample_weight, n_iter): dataset, intercept_decay = make_dataset(X, y, sample_weight) self.coef_ = np.zeros((3, ), dtype=np.float64, order="C") loss_function = self._get_loss_function(loss) penalty_type = self._get_penalty_type(self.penalty) learning_rate_type = self._get_learning_rate_type(learning_rate) if self.t_ is None: self.t_ = 1.0 random_state = check_random_state(self.random_state) # numpy mtrand expects a C long which is a signed 32 bit integer under # Windows seed = random_state.randint(0, np.iinfo(np.int32).max) if self.average > 0: self.standard_coef_, self.standard_intercept_, \ self.average_coef_, self.average_intercept_ = \ average_sgd(self.standard_coef_, self.standard_intercept_[0], self.average_coef_, self.average_intercept_[0], loss_function, penalty_type, alpha, C, self.l1_ratio, dataset, n_iter, int(self.fit_intercept), int(self.verbose), int(self.shuffle), seed, 1.0, 1.0, learning_rate_type, self.eta0, self.power_t, self.t_, intercept_decay, self.average) self.average_intercept_ = np.atleast_1d(self.average_intercept_) self.standard_intercept_ = np.atleast_1d(self.standard_intercept_) self.t_ += n_iter * X.shape[0] if self.average <= self.t_ - 1.0: self.coef_ = self.average_coef_ self.intercept_ = self.average_intercept_ else: self.coef_ = self.standard_coef_ self.intercept_ = self.standard_intercept_ else: self.coef_, self.intercept_ = \ self.parallelizer(self.coef_, 0.0, loss_function, penalty_type, alpha, C, self.l1_ratio, dataset, n_iter, int(self.fit_intercept), int(self.verbose), int(self.shuffle), seed, 1.0, 1.0, learning_rate_type, self.eta0, self.power_t, self.t_, intercept_decay) print(self.coef_) print(self.intercept_) self.t_ += n_iter * X.shape[0] self.intercept_ = np.atleast_1d(self.intercept_)