def test_multiple_outputs_balanced_weights_cv(self): """Test MultiTaskWeightedLassoCV with weights.""" # Define weights sample_weight = np.concatenate( (np.ones(TestLassoExtensions.n_samples // 2), np.ones(TestLassoExtensions.n_samples // 2) * 2)) # Define extended datasets X_expanded = np.concatenate( (TestLassoExtensions.X, TestLassoExtensions.X[TestLassoExtensions.n_samples // 2:])) y_expanded = np.concatenate( (TestLassoExtensions.y_2D, TestLassoExtensions.y_2D[TestLassoExtensions.n_samples // 2:])) # Define splitters # WeightedKFold splitter cv_splitter = WeightedKFold(n_splits=3) wlasso_cv = list( cv_splitter.split(TestLassoExtensions.X, TestLassoExtensions.y_2D, sample_weight=sample_weight)) # Map weighted splitter to an extended splitter index_mapper = {} for i in range(TestLassoExtensions.n_samples): if i < TestLassoExtensions.n_samples // 2: index_mapper[i] = [i] else: index_mapper[i] = [i, i + TestLassoExtensions.n_samples // 2] lasso_cv = self._map_splitter( wlasso_cv, TestLassoExtensions.n_samples + TestLassoExtensions.n_samples // 2, index_mapper) # Define alphas to test alphas = np.logspace(-4, -1, num=10) # Compare with LassoCV # --> No intercept params = {'fit_intercept': False} self._compare_with_lasso_cv(X_expanded, y_expanded, TestLassoExtensions.X, TestLassoExtensions.y_2D, sample_weight=sample_weight, alphas=alphas, lasso_cv=lasso_cv, wlasso_cv=wlasso_cv, params=params) # --> With intercept params = {'fit_intercept': True} self._compare_with_lasso_cv(X_expanded, y_expanded, TestLassoExtensions.X, TestLassoExtensions.y_2D, sample_weight=sample_weight, alphas=alphas, lasso_cv=lasso_cv, wlasso_cv=wlasso_cv, params=params)
def test_weighted_KFold(self): """Test WeightedKFold used in WeightedLassoCV.""" # Choose a smaller n to speed-up process n = 100 sample_weight = np.random.choice(10, size=n) n_splits = 3 wkf = WeightedKFold(n_splits=n_splits) total_weight = np.sum(sample_weight) for _, test_index in wkf.split(TestLassoExtensions.X[:n], TestLassoExtensions.y_simple[:n], sample_weight=sample_weight): # Compare fold weights self.assertAlmostEqual( np.sum(sample_weight[test_index]) / total_weight, 1 / n_splits, delta=5e-2)
def test_multiple_outputs_no_weights_cv(self): """Test MultiTaskWeightedLassoCV with no weights.""" # Define alphas to test alphas = np.logspace(-4, -1, num=10) # Define splitter cv = WeightedKFold(n_splits=3) # Compare with MultiTaskLassoCV # --> No intercept params = {'fit_intercept': False} self._compare_with_lasso_cv(TestLassoExtensions.X, TestLassoExtensions.y_2D, TestLassoExtensions.X, TestLassoExtensions.y_2D, sample_weight=None, alphas=alphas, lasso_cv=cv, wlasso_cv=cv, params=params) # --> With intercept params = {'fit_intercept': True} self._compare_with_lasso_cv(TestLassoExtensions.X, TestLassoExtensions.y_2D, TestLassoExtensions.X, TestLassoExtensions.y_2D, sample_weight=None, alphas=alphas, lasso_cv=cv, wlasso_cv=cv, params=params)
def _weighted_check_cv(cv='warn', y=None, classifier=False): if cv is None or cv == 'warn': warnings.warn(CV_WARNING, FutureWarning) cv = 3 if isinstance(cv, numbers.Integral): if (classifier and (y is not None) and (type_of_target(y) in ('binary', 'multiclass'))): return WeightedStratifiedKFold(cv) else: return WeightedKFold(cv) if not hasattr(cv, 'split') or isinstance(cv, str): if not isinstance(cv, Iterable) or isinstance(cv, str): raise ValueError("Expected cv as an integer, cross-validation " "object (from sklearn.model_selection) " "or an iterable. Got %s." % cv) return _WeightedCVIterableWrapper(cv) return cv # New style cv objects are passed without any modification