Exemple #1
0
    def get_scaler_time(self):
        scaler_time = self.kwargs.get("scaler_time", self.SCALER_TIME)
        if scaler_time == "RobustScaler":
            scaler_use = RobustScaler()
        elif scaler_time == "StandardScaler":
            scaler_use = StandardScaler()
        elif scaler_time == 'MinMaxScaler':
            scaler_use = MinMaxScaler()

        scaler_time_params = self.kwargs.get("scaler_time_params", {})
        self.mlflow_log_param("scaler_time", scaler_time)
        scaler_use.set_params(**scaler_time_params)
        print(colored(scaler_use.__class__.__name__, "blue"))

        return scaler_use
Exemple #2
0
    def get_scaler_raised_amount(self):
        scaler_amount = self.kwargs.get("scaler_amount", self.SCALER_AMOUNT)
        if scaler_amount == "RobustScaler":
            scaler_use = RobustScaler()
        elif scaler_amount == "StandardScaler":
            scaler_use = StandardScaler()
        elif scaler_amount == 'MinMaxScaler':
            scaler_use = MinMaxScaler()

        scaler_amount_params = self.kwargs.get("scaler_amount_params", {})
        self.mlflow_log_param("scaler_amount", scaler_amount)
        scaler_use.set_params(**scaler_amount_params)
        print(colored(scaler_use.__class__.__name__, "blue"))

        return scaler_use
Exemple #3
0
    def get_scaler_participant(self):
        scaler_participants = self.kwargs.get("scaler_participants", self.SCALER_PARTICIPANTS)
        if scaler_participants == "RobustScaler":
            scaler_use = RobustScaler()
        elif scaler_participants == "StandardScaler":
            scaler_use = StandardScaler()
        elif scaler_participants == 'MinMaxScaler':
            scaler_use = MinMaxScaler()

        scaler_participant_params = self.kwargs.get("scaler_participant_params", {})
        self.mlflow_log_param("scaler_participants", scaler_participants)
        scaler_use.set_params(**scaler_participant_params)
        print(colored(scaler_use.__class__.__name__, "blue"))

        return scaler_use
Exemple #4
0
    def get_scaler_professionals(self):
        scaler_professionals = self.kwargs.get("scaler_professionals", self.SCALER_PROFESSIONALS)
        if scaler_professionals == "RobustScaler":
            scaler_use = RobustScaler()
        elif scaler_professionals == "StandardScaler":
            scaler_use = StandardScaler()
        elif scaler_professionals == 'MinMaxScaler':
            scaler_use = MinMaxScaler()

        scaler_professionals_params = self.kwargs.get("scaler_professionals_params", {})
        self.mlflow_log_param("scaler_professionals", scaler_professionals)
        scaler_use.set_params(**scaler_professionals_params)
        print(colored(scaler_use.__class__.__name__, "blue"))

        return scaler_use
class RobustScalerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
    def __init__(self, q_min, q_max, random_state):
        from sklearn.preprocessing import RobustScaler
        self.q_min = q_min
        self.q_max = q_max
        self.preprocessor = RobustScaler(
            quantile_range=(self.q_min, self.q_max),
            copy=False,
        )

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            'shortname': 'RobustScaler',
            'name': 'RobustScaler',
            'handles_regression': True,
            'handles_classification': True,
            'handles_multiclass': True,
            'handles_multilabel': True,
            'handles_multioutput': True,
            'is_deterministic': True,
            # TODO find out if this is right!
            'handles_sparse': True,
            'handles_dense': True,
            'input': (SPARSE, DENSE, UNSIGNED_DATA),
            'output': (INPUT, SIGNED_DATA),
            'preferred_dtype': None
        }

    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        q_min = UniformFloatHyperparameter('q_min',
                                           0.001,
                                           0.3,
                                           default_value=0.25)
        q_max = UniformFloatHyperparameter('q_max',
                                           0.7,
                                           0.999,
                                           default_value=0.75)
        cs.add_hyperparameters((q_min, q_max))
        return cs

    def fit(self, X, y=None):
        if sparse.isspmatrix(X):
            self.preprocessor.set_params(with_centering=False)

        return super(RobustScalerComponent, self).fit(X, y)
Exemple #6
0
def classify(datapath,v, normalize=True):#datapath: directory name of the datasets, (v)erbose: True or false, normalize = True normalizes training data
	# Grab both wine datasets in one dataset
	concat_data = get_data(datapath)
	# Bag data to 5 scores 
	recode = {3:0, 4:0, 5:1, 6:2, 7:3, 8:4,9:4}
	concat_data['quality_c'] = bag_data(recode,concat_data,'quality')

	# Split up dataset 70/30 training,testing
	y_wine = concat_data['quality_c']
	X_wine = concat_data.drop(['quality_c','quality'], axis=1)
	X_train, X_test, y_train, y_test = train_test_split(X_wine, y_wine, test_size=0.3, random_state=420)

	X_train_c , X_test_c = X_train.copy(), X_test.copy() #save test and train X sets for classification

	if normalize:
		# Normalize training examples by removing mean and scaling by interquartile range (better than using s.d=1 for outliers in dataset)
		sclr = RobustScaler()
		X_train = sclr.fit_transform(X_train)
		# Retain Training scale params for scaling test set
		scl_params = sclr.get_params()
		# Normalise test examples using training set normalization params
		sclr = sclr.set_params(**scl_params)
		X_test = sclr.transform(X_test)

	# Set parameters by cross validation
	#==========================================================================================
	# REGRESSION PROBLEM
	#==========================================================================================
	# Multivariate Linear Regression
	clf = LinearRegression(fit_intercept=True, normalize=True, copy_X=True)
	clf.fit(X_train, y_train)
	# Make Predictions for both sets
	pred_train = clf.predict(X_train)
	pred_test = clf.predict(X_test)
	print('='*100+"\nLinear Regression:\n")
	print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v,problem_type=1)
	#==========================================================================================
	# Support Vector Machine(kernel=rbf), Regression
	clf = svm.SVR(C=3,kernel='rbf')
	clf.fit(X_train, y_train)
	# Make Predictions for both sets
	pred_train = clf.predict(X_train)
	pred_test = clf.predict(X_test)
	print('='*100+"\nSVR :\n")
	print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v,problem_type=1)
	#==========================================================================================
	# NN Regression, default params
	# Grid Search
	h_max = 2 #specify maximum number of hidden layers
	hidden_layer_sizes = build_grid(h_max)
	tuned_param = {'hidden_layer_sizes': hidden_layer_sizes}
	clf = GridSearchCV(neural_network.MLPRegressor(),tuned_param,cv=3) 
	clf.fit(X_train,y_train)
	# Make Predictions for both sets
	pred_train = clf.predict(X_train)
	pred_test = clf.predict(X_test)
	print('='*100+"\nNNs :\n")
	print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v,problem_type=1)
	print("Best params:", clf.best_params_)
	#==========================================================================================
	# CLASSIFICATION PROBLEM
	#==========================================================================================
	# Restore normalized examples back to original
	X_train, X_test = X_train_c, X_test_c
	# Support Vector Machine(Kernel=rbf), Classification
	clf = svm.SVC(C=3,kernel='rbf',random_state=0)
	clf.fit(X_train, y_train)
	# Make Predictions for both sets
	pred_train = clf.predict(X_train)
	pred_test = clf.predict(X_test)
	print('='*100+"\nSVC :\n")
	print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v)
	#==========================================================================================
	# Support Vector Machine(Kernel=rbf), One vs Rest Classification
	clf = OneVsRestClassifier(estimator=svm.SVC(C=3,kernel='rbf', random_state=1))
	clf.fit(X_train, y_train)
	# Make Predictions for both sets
	pred_train = clf.predict(X_train)
	pred_test = clf.predict(X_test)
	print('='*100+"\nSVC(OneVsRest):\n")
	print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v)
	#==========================================================================================
	# NN Classification
	# Grid Search
	h_max = 2 #specify maximum number of hidden layers
	hidden_layer_sizes = build_grid(h_max)
	tuned_param = {'hidden_layer_sizes': hidden_layer_sizes}
	clf = GridSearchCV(neural_network.MLPClassifier(),tuned_param,cv=3)
	clf.fit(X_train,y_train)
	# Make Predictions for both sets
	pred_train = clf.predict(X_train)
	pred_test = clf.predict(X_test)
	print('='*100+"\nNNs :\n")
	print_metrics(clf,X_train,y_train,X_test,y_test,pred_train,pred_test,verbose=v)
	print("Best params:", clf.best_params_)
class RobustScalerComponent(Rescaling, AutoSklearnPreprocessingAlgorithm):
    def __init__(
        self,
        q_min: float,
        q_max: float,
        random_state: Optional[Union[int,
                                     np.random.RandomState]] = None) -> None:
        from sklearn.preprocessing import RobustScaler
        self.q_min = q_min
        self.q_max = q_max
        self.preprocessor = RobustScaler(
            quantile_range=(self.q_min, self.q_max),
            copy=False,
        )

    @staticmethod
    def get_properties(
        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
    ) -> Dict[str, Optional[Union[str, int, bool, Tuple]]]:
        return {
            'shortname': 'RobustScaler',
            'name': 'RobustScaler',
            'handles_regression': True,
            'handles_classification': True,
            'handles_multiclass': True,
            'handles_multilabel': True,
            'handles_multioutput': True,
            'is_deterministic': True,
            # TODO find out if this is right!
            'handles_sparse': True,
            'handles_dense': True,
            'input': (SPARSE, DENSE, UNSIGNED_DATA),
            'output': (INPUT, SIGNED_DATA),
            'preferred_dtype': None
        }

    @staticmethod
    def get_hyperparameter_search_space(
        dataset_properties: Optional[DATASET_PROPERTIES_TYPE] = None
    ) -> ConfigurationSpace:
        cs = ConfigurationSpace()
        q_min = UniformFloatHyperparameter('q_min',
                                           0.001,
                                           0.3,
                                           default_value=0.25)
        q_max = UniformFloatHyperparameter('q_max',
                                           0.7,
                                           0.999,
                                           default_value=0.75)
        cs.add_hyperparameters((q_min, q_max))
        return cs

    def fit(
        self,
        X: PIPELINE_DATA_DTYPE,
        y: Optional[PIPELINE_DATA_DTYPE] = None
    ) -> 'AutoSklearnPreprocessingAlgorithm':
        if self.preprocessor is None:
            raise NotFittedError()
        if sparse.isspmatrix(X):
            self.preprocessor.set_params(with_centering=False)

        return super(RobustScalerComponent, self).fit(X, y)