def get_scaler_time(self): scaler_time = self.kwargs.get("scaler_time", self.SCALER_TIME) if scaler_time == "RobustScaler": scaler_use = RobustScaler() elif scaler_time == "StandardScaler": scaler_use = StandardScaler() elif scaler_time == 'MinMaxScaler': scaler_use = MinMaxScaler() scaler_time_params = self.kwargs.get("scaler_time_params", {}) self.mlflow_log_param("scaler_time", scaler_time) scaler_use.set_params(**scaler_time_params) print(colored(scaler_use.__class__.__name__, "blue")) return scaler_use
def get_scaler_raised_amount(self): scaler_amount = self.kwargs.get("scaler_amount", self.SCALER_AMOUNT) if scaler_amount == "RobustScaler": scaler_use = RobustScaler() elif scaler_amount == "StandardScaler": scaler_use = StandardScaler() elif scaler_amount == 'MinMaxScaler': scaler_use = MinMaxScaler() scaler_amount_params = self.kwargs.get("scaler_amount_params", {}) self.mlflow_log_param("scaler_amount", scaler_amount) scaler_use.set_params(**scaler_amount_params) print(colored(scaler_use.__class__.__name__, "blue")) return scaler_use
def get_scaler_participant(self): scaler_participants = self.kwargs.get("scaler_participants", self.SCALER_PARTICIPANTS) if scaler_participants == "RobustScaler": scaler_use = RobustScaler() elif scaler_participants == "StandardScaler": scaler_use = StandardScaler() elif scaler_participants == 'MinMaxScaler': scaler_use = MinMaxScaler() scaler_participant_params = self.kwargs.get("scaler_participant_params", {}) self.mlflow_log_param("scaler_participants", scaler_participants) scaler_use.set_params(**scaler_participant_params) print(colored(scaler_use.__class__.__name__, "blue")) return scaler_use
def get_scaler_professionals(self): scaler_professionals = self.kwargs.get("scaler_professionals", self.SCALER_PROFESSIONALS) if scaler_professionals == "RobustScaler": scaler_use = RobustScaler() elif scaler_professionals == "StandardScaler": scaler_use = StandardScaler() elif scaler_professionals == 'MinMaxScaler': scaler_use = MinMaxScaler() scaler_professionals_params = self.kwargs.get("scaler_professionals_params", {}) self.mlflow_log_param("scaler_professionals", scaler_professionals) scaler_use.set_params(**scaler_professionals_params) print(colored(scaler_use.__class__.__name__, "blue")) return scaler_use
class RobustScalerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm): def __init__(self, q_min, q_max, random_state): from sklearn.preprocessing import RobustScaler self.q_min = q_min self.q_max = q_max self.preprocessor = RobustScaler( quantile_range=(self.q_min, self.q_max), copy=False, ) @staticmethod def get_properties(dataset_properties=None): return { 'shortname': 'RobustScaler', 'name': 'RobustScaler', 'handles_regression': True, 'handles_classification': True, 'handles_multiclass': True, 'handles_multilabel': True, 'handles_multioutput': True, 'is_deterministic': True, # TODO find out if this is right! 'handles_sparse': True, 'handles_dense': True, 'input': (SPARSE, DENSE, UNSIGNED_DATA), 'output': (INPUT, SIGNED_DATA), 'preferred_dtype': None } def get_hyperparameter_search_space(dataset_properties=None): cs = ConfigurationSpace() q_min = UniformFloatHyperparameter('q_min', 0.001, 0.3, default_value=0.25) q_max = UniformFloatHyperparameter('q_max', 0.7, 0.999, default_value=0.75) cs.add_hyperparameters((q_min, q_max)) return cs def fit(self, X, y=None): if sparse.isspmatrix(X): self.preprocessor.set_params(with_centering=False) return super(RobustScalerComponent, self).fit(X, y)
def classify(datapath,v, normalize=True):#datapath: directory name of the datasets, (v)erbose: True or false, normalize = True normalizes training data # Grab both wine datasets in one dataset concat_data = get_data(datapath) # Bag data to 5 scores recode = {3:0, 4:0, 5:1, 6:2, 7:3, 8:4,9:4} concat_data['quality_c'] = bag_data(recode,concat_data,'quality') # Split up dataset 70/30 training,testing y_wine = concat_data['quality_c'] X_wine = concat_data.drop(['quality_c','quality'], axis=1) X_train, X_test, y_train, y_test = train_test_split(X_wine, y_wine, test_size=0.3, random_state=420) X_train_c , X_test_c = X_train.copy(), X_test.copy() #save test and train X sets for classification if normalize: # Normalize training examples by removing mean and scaling by interquartile range (better than using s.d=1 for outliers in dataset) sclr = RobustScaler() X_train = sclr.fit_transform(X_train) # Retain Training scale params for scaling test set scl_params = sclr.get_params() # Normalise test examples using training set normalization params sclr = sclr.set_params(**scl_params) X_test = sclr.transform(X_test) # Set parameters by cross validation #========================================================================================== # REGRESSION PROBLEM #========================================================================================== # Multivariate Linear Regression clf = LinearRegression(fit_intercept=True, normalize=True, copy_X=True) clf.fit(X_train, y_train) # Make Predictions for both sets pred_train = clf.predict(X_train) pred_test = clf.predict(X_test) print('='*100+"\nLinear Regression:\n") print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v,problem_type=1) #========================================================================================== # Support Vector Machine(kernel=rbf), Regression clf = svm.SVR(C=3,kernel='rbf') clf.fit(X_train, y_train) # Make Predictions for both sets pred_train = clf.predict(X_train) pred_test = clf.predict(X_test) print('='*100+"\nSVR :\n") print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v,problem_type=1) #========================================================================================== # NN Regression, default params # Grid Search h_max = 2 #specify maximum number of hidden layers hidden_layer_sizes = build_grid(h_max) tuned_param = {'hidden_layer_sizes': hidden_layer_sizes} clf = GridSearchCV(neural_network.MLPRegressor(),tuned_param,cv=3) clf.fit(X_train,y_train) # Make Predictions for both sets pred_train = clf.predict(X_train) pred_test = clf.predict(X_test) print('='*100+"\nNNs :\n") print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v,problem_type=1) print("Best params:", clf.best_params_) #========================================================================================== # CLASSIFICATION PROBLEM #========================================================================================== # Restore normalized examples back to original X_train, X_test = X_train_c, X_test_c # Support Vector Machine(Kernel=rbf), Classification clf = svm.SVC(C=3,kernel='rbf',random_state=0) clf.fit(X_train, y_train) # Make Predictions for both sets pred_train = clf.predict(X_train) pred_test = clf.predict(X_test) print('='*100+"\nSVC :\n") print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v) #========================================================================================== # Support Vector Machine(Kernel=rbf), One vs Rest Classification clf = OneVsRestClassifier(estimator=svm.SVC(C=3,kernel='rbf', random_state=1)) clf.fit(X_train, y_train) # Make Predictions for both sets pred_train = clf.predict(X_train) pred_test = clf.predict(X_test) print('='*100+"\nSVC(OneVsRest):\n") print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v) #========================================================================================== # NN Classification # Grid Search h_max = 2 #specify maximum number of hidden layers hidden_layer_sizes = build_grid(h_max) tuned_param = {'hidden_layer_sizes': hidden_layer_sizes} clf = GridSearchCV(neural_network.MLPClassifier(),tuned_param,cv=3) clf.fit(X_train,y_train) # Make Predictions for both sets pred_train = clf.predict(X_train) pred_test = clf.predict(X_test) print('='*100+"\nNNs :\n") print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v) print("Best params:", clf.best_params_)
class RobustScalerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm): def __init__( self, q_min: float, q_max: float, random_state: Optional[Union[int, np.random.RandomState]] = None) -> None: from sklearn.preprocessing import RobustScaler self.q_min = q_min self.q_max = q_max self.preprocessor = RobustScaler( quantile_range=(self.q_min, self.q_max), copy=False, ) @staticmethod def get_properties( dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]: return { 'shortname': 'RobustScaler', 'name': 'RobustScaler', 'handles_regression': True, 'handles_classification': True, 'handles_multiclass': True, 'handles_multilabel': True, 'handles_multioutput': True, 'is_deterministic': True, # TODO find out if this is right! 'handles_sparse': True, 'handles_dense': True, 'input': (SPARSE, DENSE, UNSIGNED_DATA), 'output': (INPUT, SIGNED_DATA), 'preferred_dtype': None } @staticmethod def get_hyperparameter_search_space( dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None ) -> ConfigurationSpace: cs = ConfigurationSpace() q_min = UniformFloatHyperparameter('q_min', 0.001, 0.3, default_value=0.25) q_max = UniformFloatHyperparameter('q_max', 0.7, 0.999, default_value=0.75) cs.add_hyperparameters((q_min, q_max)) return cs def fit( self, X: PIPELINE_DATA_DTYPE, y: Optional[PIPELINE_DATA_DTYPE] = None ) -> 'AutoSklearnPreprocessingAlgorithm': if self.preprocessor is None: raise NotFittedError() if sparse.isspmatrix(X): self.preprocessor.set_params(with_centering=False) return super(RobustScalerComponent, self).fit(X, y)