class SGDRegressorImpl(): def __init__(self, loss='squared_loss', penalty='l2', alpha=0.0001, l1_ratio=0.15, fit_intercept=True, max_iter=None, tol=None, shuffle=True, verbose=0, epsilon=0.1, random_state=None, learning_rate='invscaling', eta0=0.01, power_t=0.25, early_stopping=False, validation_fraction=0.1, n_iter_no_change=5, warm_start=False, average=False): self._hyperparams = { 'loss': loss, 'penalty': penalty, 'alpha': alpha, 'l1_ratio': l1_ratio, 'fit_intercept': fit_intercept, 'max_iter': max_iter, 'tol': tol, 'shuffle': shuffle, 'verbose': verbose, 'epsilon': epsilon, 'random_state': random_state, 'learning_rate': learning_rate, 'eta0': eta0, 'power_t': power_t, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'n_iter_no_change': n_iter_no_change, 'warm_start': warm_start, 'average': average} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
class SGD(AutoSklearnRegressionAlgorithm): def __init__(self, loss, penalty, alpha, fit_intercept, n_iter, learning_rate, l1_ratio=0.15, epsilon=0.1, eta0=0.01, power_t=0.5, average=False, random_state=None): self.loss = loss self.penalty = penalty self.alpha = alpha self.fit_intercept = fit_intercept self.n_iter = n_iter self.learning_rate = learning_rate self.l1_ratio = l1_ratio self.epsilon = epsilon self.eta0 = eta0 self.power_t = power_t self.random_state = random_state self.average = average self.estimator = None self.scaler = None def fit(self, X, y): while not self.configuration_fully_fitted(): self.iterative_fit(X, y, n_iter=1) return self def iterative_fit(self, X, y, n_iter=1, refit=False): from sklearn.linear_model.stochastic_gradient import SGDRegressor import sklearn.preprocessing if refit: self.estimator = None self.scaler = None if self.estimator is None: self.alpha = float(self.alpha) self.fit_intercept = self.fit_intercept == 'True' self.n_iter = int(self.n_iter) self.l1_ratio = float( self.l1_ratio) if self.l1_ratio is not None else 0.15 self.epsilon = float( self.epsilon) if self.epsilon is not None else 0.1 self.eta0 = float(self.eta0) self.power_t = float( self.power_t) if self.power_t is not None else 0.25 self.average = self.average == 'True' self.estimator = SGDRegressor(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, n_iter=self.n_iter, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=self.epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(y) Y_scaled = self.scaler.transform(y) self.estimator.n_iter += n_iter self.estimator.fit(X, Y_scaled) return self def configuration_fully_fitted(self): if self.estimator is None: return False return not self.estimator.n_iter < self.n_iter def predict(self, X): if self.estimator is None: raise NotImplementedError() Y_pred = self.estimator.predict(X) return self.scaler.inverse_transform(Y_pred) @staticmethod def get_properties(dataset_properties=None): return {'shortname': 'SGD Regressor', 'name': 'Stochastic Gradient Descent Regressor', 'handles_missing_values': False, 'handles_nominal_values': False, 'handles_numerical_features': True, 'prefers_data_scaled': True, 'prefers_data_normalized': True, 'handles_regression': True, 'handles_classification': False, 'handles_multiclass': False, 'handles_multilabel': False, 'is_deterministic': True, 'handles_sparse': True, 'input': (DENSE, SPARSE, UNSIGNED_DATA), 'output': (PREDICTIONS,), # TODO find out what is best used here! 'preferred_dtype': None} @staticmethod def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = cs.add_hyperparameter(CategoricalHyperparameter("loss", ["squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"], default="squared_loss")) penalty = cs.add_hyperparameter(CategoricalHyperparameter( "penalty", ["l1", "l2", "elasticnet"], default="l2")) alpha = cs.add_hyperparameter(UniformFloatHyperparameter( "alpha", 10e-7, 1e-1, log=True, default=0.01)) l1_ratio = cs.add_hyperparameter(UniformFloatHyperparameter( "l1_ratio", 1e-9, 1., log=True, default=0.15)) fit_intercept = cs.add_hyperparameter(UnParametrizedHyperparameter( "fit_intercept", "True")) n_iter = cs.add_hyperparameter(UniformIntegerHyperparameter( "n_iter", 5, 1000, log=True, default=20)) epsilon = cs.add_hyperparameter(UniformFloatHyperparameter( "epsilon", 1e-5, 1e-1, default=1e-4, log=True)) learning_rate = cs.add_hyperparameter(CategoricalHyperparameter( "learning_rate", ["optimal", "invscaling", "constant"], default="optimal")) eta0 = cs.add_hyperparameter(UniformFloatHyperparameter( "eta0", 10 ** -7, 0.1, default=0.01)) power_t = cs.add_hyperparameter(UniformFloatHyperparameter( "power_t", 1e-5, 1, default=0.5)) average = cs.add_hyperparameter(CategoricalHyperparameter( "average", ["False", "True"], default="False")) # TODO add passive/aggressive here, although not properly documented? elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet") epsilon_condition = InCondition(epsilon, loss, ["huber", "epsilon_insensitive", "squared_epsilon_insensitive"]) # eta0 seems to be always active according to the source code; when # learning_rate is set to optimial, eta0 is the starting value: # https://github.com/scikit-learn/scikit-learn/blob/0.15.X/sklearn/linear_model/sgd_fast.pyx # eta0_and_inv = EqualsCondition(eta0, learning_rate, "invscaling") #eta0_and_constant = EqualsCondition(eta0, learning_rate, "constant") #eta0_condition = OrConjunction(eta0_and_inv, eta0_and_constant) power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling") cs.add_condition(elasticnet) cs.add_condition(epsilon_condition) cs.add_condition(power_t_condition) return cs
coef[inds[n_features // 2:]] = 0 # sparsify coef print("true coef sparsity: %f" % sparsity_ratio(coef)) y = np.dot(X, coef) # add noise y += 0.01 * np.random.normal((n_samples, )) # Split data in train set and test set n_samples = X.shape[0] X_train, y_train = X[:n_samples // 2], y[:n_samples // 2] X_test, y_test = X[n_samples // 2:], y[n_samples // 2:] print("test data sparsity: %f" % sparsity_ratio(X_test)) ############################################################################### clf = SGDRegressor(penalty='l1', alpha=.2, max_iter=2000, tol=None) clf.fit(X_train, y_train) print("model sparsity: %f" % sparsity_ratio(clf.coef_)) def benchmark_dense_predict(): for _ in range(300): clf.predict(X_test) def benchmark_sparse_predict(): X_test_sparse = csr_matrix(X_test) for _ in range(300): clf.predict(X_test_sparse) def score(y_test, y_pred, case):
class SgdLibraryLinearRegression: def __init__(self, data): self.df = pd.read_csv(data) self.regressor = SGDRegressor(max_iter=40, tol=1e-5, learning_rate='constant', eta0=0.06) def preprocess(self): # Removing Null values self.df.dropna() # Removing Duplicates self.df.drop_duplicates() # Checking the type of input data self.df.dtypes # Since horsepower is of object type we want to determine the nature of the attribute self.df['horsepower'].unique() # We are able to see ? in between numerical values so we are disregarding those instances self.df = self.df[self.df.horsepower != '?'] # We are then casting the object to float for further processing self.df['horsepower'] = self.df['horsepower'].astype('float') # We are removing the car name attribute since that does not correlate with the mpg of the car self.df.drop(['car name'], axis=1, inplace=True) # Attributes are starting from column 1 self.X = self.df.iloc[:, 1:].values self.Y = self.df.iloc[:, 0].values # Scaling the input attributes self.X = StandardScaler().fit_transform(self.X) # Splitting the data into training and test data set of the proportion 70:30 self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split( self.X, self.Y, test_size=0.3, random_state=1) def train(self, epoch_count=40, learning_rate=0.06): self.regressor = SGDRegressor(max_iter=epoch_count, tol=1e-5, learning_rate='constant', eta0=learning_rate) # Running the training by calling the library method self.regressor.fit(self.X_train, self.Y_train) def predictTrain(self): # Predicting the values based on the test data self.Y_pred = self.regressor.predict(self.X_train) # Getting the accuracy from the library method self.accuracy_score = self.regressor.score(self.X_train, self.Y_train) # print("accuracy score ", accuracy_score) # Getting the mean squared error by comparing the predicted value with the actual test value self.calculated_mse = mean_squared_error(self.Y_train, self.Y_pred) # print("mean square error ",calculated_mse) # Getting the r2 score by comparing the predicted value with the actual test value self.r2_scor = r2_score(self.Y_train, self.Y_pred) # print("r2_score ", r2_scor) return self.calculated_mse def print(self): print("accuracy score ", self.accuracy_score) print("mean square error ", self.calculated_mse) print("r2_score ", self.r2_scor) def predictTest(self): # Predicting the values based on the test data self.Y_pred = self.regressor.predict(self.X_test) # Getting the accuracy from the library method self.accuracy_score = self.regressor.score(self.X_test, self.Y_test) # print("accuracy score ", accuracy_score) # Getting the mean squared error by comparing the predicted value with the actual test value self.calculated_mse = mean_squared_error(self.Y_test, self.Y_pred) # print("mean square error ", calculated_mse) # Getting the r2 score by comparing the predicted value with the actual test value self.r2_scor = r2_score(self.Y_test, self.Y_pred) # print("r2_score ", r2_scor) return self.calculated_mse def plotLearningRate(self, epoch_count, min, max, step, color): mse_error = list() step_size = max x_scale = list() label = "epoch = " label += str(epoch_count) while (step_size >= min): self.train(epoch_count, step_size) mse_error.append(self.predictTest()) x_scale.append(step_size) step_size = step_size - step return plt.scatter(x_scale, mse_error, color=color)
data1=pd.read_csv("data/ex1data1.txt",names=["X","y"]) x=data1.X.values[:,None] y=data1.y.values poly=PolynomialFeatures(1) X=poly.fit_transform(x) #%% use sklearn # pick models regr_gd=SGDRegressor(fit_intercept=False,alpha=0.00001,max_iter=10000) regr_lr=LinearRegression(fit_intercept=False) # feed data regr_gd.fit(X,y) regr_lr.fit(X,y) #%% plot the solution via the Gradient Decent ind=x.argsort(axis=0).flatten() fig,ax=plt.subplots() # create empty figure plt.plot(x,y,'rx',label='Training data') plt.plot(x[ind],X[ind,:].dot(regr_lr.coef_),'-k',label='lin. reg. (sklearn)') plt.plot(x[ind],X[ind,:].dot(regr_gd.coef_),'-b',label='stoch. grad. descent (sklearn)') ax.set_xlabel("Population of City in 10,000s") ax.set_ylabel("Profit in $10,000s") ax.legend() plt.show()
class SGD(AutoSklearnRegressionAlgorithm): def __init__(self, loss, penalty, alpha, fit_intercept, tol, learning_rate, l1_ratio=0.15, epsilon=0.1, eta0=0.01, power_t=0.5, average=False, random_state=None): self.loss = loss self.penalty = penalty self.alpha = alpha self.fit_intercept = fit_intercept self.tol = tol self.learning_rate = learning_rate self.l1_ratio = l1_ratio self.epsilon = epsilon self.eta0 = eta0 self.power_t = power_t self.random_state = random_state self.average = average self.estimator = None self.scaler = None def fit(self, X, y): self.iterative_fit(X, y, n_iter=2, refit=True) while not self.configuration_fully_fitted(): self.iterative_fit(X, y, n_iter=2) return self def iterative_fit(self, X, y, n_iter=2, refit=False): from sklearn.linear_model.stochastic_gradient import SGDRegressor import sklearn.preprocessing # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. n_iter = max(n_iter, 2) if refit: self.estimator = None self.scaler = None if self.estimator is None: self.alpha = float(self.alpha) self.fit_intercept = self.fit_intercept == 'True' self.tol = float(self.tol) self.l1_ratio = float( self.l1_ratio) if self.l1_ratio is not None else 0.15 self.epsilon = float( self.epsilon) if self.epsilon is not None else 0.1 self.eta0 = float(self.eta0) self.power_t = float( self.power_t) if self.power_t is not None else 0.25 self.average = self.average == 'True' self.estimator = SGDRegressor(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=self.epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(y.reshape((-1, 1))) Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 1000) Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel() self.estimator._validate_params() self.estimator._partial_fit( X, Y_scaled, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=None, coef_init=None, intercept_init=None ) if self.estimator._max_iter >= 1000 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self def configuration_fully_fitted(self): if self.estimator is None: return False elif not hasattr(self, 'fully_fit_'): return False else: return self.fully_fit_ def predict(self, X): if self.estimator is None: raise NotImplementedError() Y_pred = self.estimator.predict(X) return self.scaler.inverse_transform(Y_pred) @staticmethod def get_properties(dataset_properties=None): return {'shortname': 'SGD Regressor', 'name': 'Stochastic Gradient Descent Regressor', 'handles_regression': True, 'handles_classification': False, 'handles_multiclass': False, 'handles_multilabel': False, 'is_deterministic': True, 'handles_sparse': True, 'input': (DENSE, SPARSE, UNSIGNED_DATA), 'output': (PREDICTIONS,), } @staticmethod def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = CategoricalHyperparameter("loss", ["squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"], default_value="squared_loss") penalty = CategoricalHyperparameter( "penalty", ["l1", "l2", "elasticnet"], default_value="l2") alpha = UniformFloatHyperparameter( "alpha", 1e-7, 1e-1, log=True, default_value=0.0001) l1_ratio = UniformFloatHyperparameter( "l1_ratio", 1e-9, 1., log=True, default_value=0.15) fit_intercept = UnParametrizedHyperparameter( "fit_intercept", "True") tol = UniformFloatHyperparameter( "tol", 1e-4, 1e-1, default_value=1e-3, log=True) epsilon = UniformFloatHyperparameter( "epsilon", 1e-5, 1e-1, default_value=0.1, log=True) learning_rate = CategoricalHyperparameter( "learning_rate", ["optimal", "invscaling", "constant"], default_value="invscaling") eta0 = UniformFloatHyperparameter( "eta0", 1e-7, 1e-1, default_value=0.01) power_t = UniformFloatHyperparameter( "power_t", 1e-5, 1, default_value=0.25) average = CategoricalHyperparameter( "average", ["False", "True"], default_value="False") cs.add_hyperparameters([loss, penalty, alpha, l1_ratio, fit_intercept, tol, epsilon, learning_rate, eta0, power_t, average]) # TODO add passive/aggressive here, although not properly documented? elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet") epsilon_condition = InCondition(epsilon, loss, ["huber", "epsilon_insensitive", "squared_epsilon_insensitive"]) # eta0 seems to be always active according to the source code; when # learning_rate is set to optimial, eta0 is the starting value: # https://github.com/scikit-learn/scikit-learn/blob/0.15.X/sklearn/linear_model/sgd_fast.pyx # eta0_and_inv = EqualsCondition(eta0, learning_rate, "invscaling") #eta0_and_constant = EqualsCondition(eta0, learning_rate, "constant") #eta0_condition = OrConjunction(eta0_and_inv, eta0_and_constant) power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling") cs.add_conditions([elasticnet, epsilon_condition, power_t_condition]) return cs
coef[inds[n_features/2:]] = 0 # sparsify coef print("true coef sparsity: %f" % sparsity_ratio(coef)) y = np.dot(X, coef) # add noise y += 0.01 * np.random.normal((n_samples,)) # Split data in train set and test set n_samples = X.shape[0] X_train, y_train = X[:n_samples / 2], y[:n_samples / 2] X_test, y_test = X[n_samples / 2:], y[n_samples / 2:] print("test data sparsity: %f" % sparsity_ratio(X_test)) ############################################################################### clf = SGDRegressor(penalty='l1', alpha=.2, fit_intercept=True, n_iter=2000) clf.fit(X_train, y_train) print("model sparsity: %f" % sparsity_ratio(clf.coef_)) def benchmark_dense_predict(): for _ in range(300): clf.predict(X_test) def benchmark_sparse_predict(): X_test_sparse = csr_matrix(X_test) for _ in range(300): clf.predict(X_test_sparse) def score(y_test, y_pred, case):
class SGD( IterativeComponentWithSampleWeight, BaseRegressionModel, ): def __init__(self, loss, penalty, alpha, fit_intercept, tol, learning_rate, epsilon_insensitive, l1_ratio=0.15, epsilon_huber=0.1, eta0=0.01, power_t=0.5, average=False, random_state=None): self.loss = loss self.penalty = penalty self.alpha = alpha self.fit_intercept = fit_intercept self.tol = tol self.learning_rate = learning_rate self.l1_ratio = l1_ratio self.epsilon_huber = epsilon_huber self.epsilon_insensitive = epsilon_insensitive self.eta0 = eta0 self.power_t = power_t self.random_state = random_state self.average = average self.estimator = None self.start_time = time.time() self.time_limit = None def iterative_fit(self, X, y, n_iter=2, refit=False, sample_weight=None): from sklearn.linear_model.stochastic_gradient import SGDRegressor # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. if refit: self.estimator = None if self.estimator is None: self.fully_fit_ = False self.alpha = float(self.alpha) if not check_none(self.epsilon_insensitive): self.epsilon_insensitive = float(self.epsilon_insensitive) self.l1_ratio = float(self.l1_ratio) if self.l1_ratio is not None \ else 0.15 self.epsilon_huber = float(self.epsilon_huber) if self.epsilon_huber is not None \ else 0.1 self.eta0 = float(self.eta0) if self.eta0 is not None else 0.01 self.power_t = float(self.power_t) if self.power_t is not None \ else 0.5 self.average = check_for_bool(self.average) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) if self.loss == "huber": epsilon = self.epsilon_huber elif self.loss in [ "epsilon_insensitive", "squared_epsilon_insensitive" ]: epsilon = self.epsilon_insensitive else: epsilon = None self.estimator = SGDRegressor(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.estimator.fit(X, y, sample_weight=sample_weight) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 512) self.estimator._validate_params() self.estimator._partial_fit( X, y, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=sample_weight, coef_init=None, intercept_init=None) if self.estimator.max_iter >= 512 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self def configuration_fully_fitted(self): if self.estimator is None: return False elif not hasattr(self, 'fully_fit_'): return False else: return self.fully_fit_ def predict(self, X): if self.estimator is None: raise NotImplementedError() return self.estimator.predict(X) @staticmethod def get_properties(dataset_properties=None): return { 'shortname': 'SGD Regressor', 'name': 'Stochastic Gradient Descent Regressor', 'handles_regression': True, 'handles_classification': False, 'handles_multiclass': False, 'handles_multilabel': False, 'is_deterministic': True, 'input': (DENSE, SPARSE, UNSIGNED_DATA), 'output': (PREDICTIONS, ) } @staticmethod def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = CategoricalHyperparameter("loss", [ "squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive" ], default_value="squared_loss") penalty = CategoricalHyperparameter("penalty", ["l1", "l2", "elasticnet"], default_value="l2") alpha = UniformFloatHyperparameter("alpha", 1e-7, 1e-1, log=True, default_value=0.0001) l1_ratio = UniformFloatHyperparameter("l1_ratio", 1e-9, 1, log=True, default_value=0.15) fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True") tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, log=True, default_value=1e-4) epsilon_huber = UniformFloatHyperparameter("epsilon_huber", 1e-5, 1e-1, default_value=1e-4, log=True) epsilon_insensitive = UniformFloatHyperparameter("epsilon_insensitive", 1e-5, 1e-1, default_value=1e-4, log=True) learning_rate = CategoricalHyperparameter( "learning_rate", ["optimal", "invscaling", "constant"], default_value="invscaling") eta0 = UniformFloatHyperparameter("eta0", 1e-7, 1e-1, default_value=0.01, log=True) power_t = UniformFloatHyperparameter("power_t", 1e-5, 1, log=True, default_value=0.5) average = CategoricalHyperparameter("average", ["False", "True"], default_value="False") cs.add_hyperparameters([ loss, penalty, alpha, l1_ratio, fit_intercept, tol, epsilon_huber, epsilon_insensitive, learning_rate, eta0, power_t, average ]) elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet") epsilon_huber_condition = EqualsCondition(epsilon_huber, loss, "huber") epsilon_insensitive_condition = InCondition( epsilon_insensitive, loss, ["epsilon_insensitive", "squared_epsilon_insensitive"]) power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling") # eta0 is only relevant if learning_rate!='optimal' according to code # https://github.com/scikit-learn/scikit-learn/blob/0.19.X/sklearn/ # linear_model/sgd_fast.pyx#L603 eta0_in_inv_con = InCondition(eta0, learning_rate, ["invscaling", "constant"]) cs.add_conditions([ elasticnet, epsilon_huber_condition, epsilon_insensitive_condition, power_t_condition, eta0_in_inv_con ]) return cs
standardized_X_test = X_scaler.transform(X_test) standardized_y_test = y_scaler.transform(y_test.values.reshape(-1, 1)).ravel() # Check print("mean:", np.mean(standardized_X_train, axis=0), np.mean(standardized_y_train, axis=0)) # mean should be ~0 print("std:", np.std(standardized_X_train, axis=0), np.std(standardized_y_train, axis=0)) # std should be 1 # Initialize the model lm = SGDRegressor(loss="squared_loss", penalty="none", max_iter=args.num_epochs) # Train lm.fit(X=standardized_X_train, y=standardized_y_train) # Predictions (unstandardize them) pred_train = (lm.predict(standardized_X_train) * np.sqrt(y_scaler.var_)) + y_scaler.mean_ pred_test = (lm.predict(standardized_X_test) * np.sqrt(y_scaler.var_)) + y_scaler.mean_ # Train and test MSE train_mse = np.mean((y_train - pred_train)**2) test_mse = np.mean((y_test - pred_test)**2) print("train_MSE: {0:.2f}, test_MSE: {1:.2f}".format(train_mse, test_mse)) # Figure size plt.figure(figsize=(15, 5))
E(w, b) = 1/n * sum(L(yi, f(xi))) + alpha * R(w) Note: L is loss function, R(w) is regularization term (penalty) For Elastic Net R(w): R(w) = p/2 * sum(wi^2) + (1 - p) * |wi| where p is given by 1 - l1_ratio For inverse scaling learning_rate: lr = eta0 / t^power_t ''' regr = SGDRegressor(penalty = 'elasticnet', alpha = 0.0001, l1_ratio = 0.25, learning_rate = 'invscaling', eta0 = 0.01, power_t = 0.25, loss = 'epsilon_insensitive', epsilon = 0.1, shuffle = True, fit_intercept = True, n_iter = 1000000, average = False, verbose = 0) regr.fit(x, y) data_pred = regr.predict(x) y_pred = scaler.inverse_transform(data_pred) print('coefficients: \n', regr.coef_) #if data is expected to be already centered then intercept_ is not needed print('intercept: \n', regr.intercept_) #Calculate mean squared error print('Mean Squared Error: %.4f' % mean_squared_error(y, data_pred)) #Calculate R^2 (regression score function) print('Variance score: %.2f' % r2_score(y, data_pred))
class SGD( IterativeComponent, AutoSklearnRegressionAlgorithm, ): def __init__(self, loss, penalty, alpha, fit_intercept, tol, learning_rate, l1_ratio=0.15, epsilon=0.1, eta0=0.01, power_t=0.5, average=False, random_state=None): self.loss = loss self.penalty = penalty self.alpha = alpha self.fit_intercept = fit_intercept self.tol = tol self.learning_rate = learning_rate self.l1_ratio = l1_ratio self.epsilon = epsilon self.eta0 = eta0 self.power_t = power_t self.random_state = random_state self.average = average self.estimator = None self.scaler = None def iterative_fit(self, X, y, n_iter=2, refit=False): from sklearn.linear_model.stochastic_gradient import SGDRegressor import sklearn.preprocessing # Need to fit at least two iterations, otherwise early stopping will not # work because we cannot determine whether the algorithm actually # converged. The only way of finding this out is if the sgd spends less # iterations than max_iter. If max_iter == 1, it has to spend at least # one iteration and will always spend at least one iteration, so we # cannot know about convergence. n_iter = max(n_iter, 2) if refit: self.estimator = None self.scaler = None if self.estimator is None: self.alpha = float(self.alpha) self.fit_intercept = check_for_bool(self.fit_intercept) self.tol = float(self.tol) self.l1_ratio = float( self.l1_ratio) if self.l1_ratio is not None else 0.15 self.epsilon = float( self.epsilon) if self.epsilon is not None else 0.1 self.eta0 = float(self.eta0) self.power_t = float( self.power_t) if self.power_t is not None else 0.25 self.average = check_for_bool(self.average) self.estimator = SGDRegressor(loss=self.loss, penalty=self.penalty, alpha=self.alpha, fit_intercept=self.fit_intercept, max_iter=n_iter, tol=self.tol, learning_rate=self.learning_rate, l1_ratio=self.l1_ratio, epsilon=self.epsilon, eta0=self.eta0, power_t=self.power_t, shuffle=True, average=self.average, random_state=self.random_state, warm_start=True) self.scaler = sklearn.preprocessing.StandardScaler(copy=True) self.scaler.fit(y.reshape((-1, 1))) Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel() self.estimator.fit(X, Y_scaled) else: self.estimator.max_iter += n_iter self.estimator.max_iter = min(self.estimator.max_iter, 512) Y_scaled = self.scaler.transform(y.reshape((-1, 1))).ravel() self.estimator._validate_params() self.estimator._partial_fit( X, Y_scaled, alpha=self.estimator.alpha, C=1.0, loss=self.estimator.loss, learning_rate=self.estimator.learning_rate, max_iter=n_iter, sample_weight=None, coef_init=None, intercept_init=None ) if self.estimator.max_iter >= 512 or n_iter > self.estimator.n_iter_: self.fully_fit_ = True return self def configuration_fully_fitted(self): if self.estimator is None: return False elif not hasattr(self, 'fully_fit_'): return False else: return self.fully_fit_ def predict(self, X): if self.estimator is None: raise NotImplementedError() Y_pred = self.estimator.predict(X) return self.scaler.inverse_transform(Y_pred) @staticmethod def get_properties(dataset_properties=None): return {'shortname': 'SGD Regressor', 'name': 'Stochastic Gradient Descent Regressor', 'handles_regression': True, 'handles_classification': False, 'handles_multiclass': False, 'handles_multilabel': False, 'is_deterministic': True, 'handles_sparse': True, 'input': (DENSE, SPARSE, UNSIGNED_DATA), 'output': (PREDICTIONS,), } @staticmethod def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() loss = CategoricalHyperparameter("loss", ["squared_loss", "huber", "epsilon_insensitive", "squared_epsilon_insensitive"], default_value="squared_loss") penalty = CategoricalHyperparameter( "penalty", ["l1", "l2", "elasticnet"], default_value="l2") alpha = UniformFloatHyperparameter( "alpha", 1e-7, 1e-1, log=True, default_value=0.0001) l1_ratio = UniformFloatHyperparameter( "l1_ratio", 1e-9, 1., log=True, default_value=0.15) fit_intercept = UnParametrizedHyperparameter( "fit_intercept", "True") tol = UniformFloatHyperparameter( "tol", 1e-5, 1e-1, default_value=1e-4, log=True) epsilon = UniformFloatHyperparameter( "epsilon", 1e-5, 1e-1, default_value=0.1, log=True) learning_rate = CategoricalHyperparameter( "learning_rate", ["optimal", "invscaling", "constant"], default_value="invscaling") eta0 = UniformFloatHyperparameter( "eta0", 1e-7, 1e-1, default_value=0.01, log=True) power_t = UniformFloatHyperparameter( "power_t", 1e-5, 1, default_value=0.25) average = CategoricalHyperparameter( "average", ["False", "True"], default_value="False") cs.add_hyperparameters([loss, penalty, alpha, l1_ratio, fit_intercept, tol, epsilon, learning_rate, eta0, power_t, average]) # TODO add passive/aggressive here, although not properly documented? elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet") epsilon_condition = InCondition(epsilon, loss, ["huber", "epsilon_insensitive", "squared_epsilon_insensitive"]) # eta0 is only relevant if learning_rate!='optimal' according to code # https://github.com/scikit-learn/scikit-learn/blob/0.19.X/sklearn/ # linear_model/sgd_fast.pyx#L603 eta0_in_inv_con = InCondition(eta0, learning_rate, ["invscaling", "constant"]) power_t_condition = EqualsCondition(power_t, learning_rate, "invscaling") cs.add_conditions([elasticnet, epsilon_condition, power_t_condition, eta0_in_inv_con]) return cs
from sklearn.linear_model.stochastic_gradient import SGDRegressor x_train = [[1, 0., 3], [1, 1., 3], [1, 2., 3], [1, 3., 2], [1, 4., 4]] y_train = [95.364, 97.217205, 75.195834, 60.105519, 49.342380] model = SGDRegressor(max_iter=5000000, alpha=0.00001) #[ 45.71878249 -13.02758034 1.14608487] model.fit(x_train, y_train) print(model.coef_) print(model.intercept_)
# 定义绘图辅助函数 def plt_helper(label, title, xlabel='x 轴', ylabel='y 轴'): fig = plt.figure() ax = fig.add_subplot(111, label=label) ax.set_title(title, fontproperties=myfont) ax.set_xlabel(xlabel, fontproperties=myfont) ax.set_ylabel(ylabel, fontproperties=myfont) ax.grid(True) return ax ax1 = plt_helper('ax1', '观察模拟数据的分布') ax1.plot(X[:, 0], y, 'r*') #%% linear_SGD = SGDRegressor(loss='squared_loss', max_iter=100) linear_SGD.fit(train_x, train_y) y_SGD = linear_SGD.predict(test_x) linear_rg = LinearRegression( fit_intercept=True, #计算截距 normalize=False, #回归之前不对数据集进行规范化处理 copy_X=True, #复制X,不会对X的原始值产生影响 n_jobs=-1) #使用所有的CPU linear_rg.fit(train_x, train_y) y_rg = linear_rg.predict(test_x) print('模拟数据参数', coef) print('SGDRegressor模型参数', linear_SGD.coef_) print('LinearRegression模型参数', linear_rg.coef_) scores = cross_val_score(linear_SGD, train_x, train_y, cv=5)