def pipeline(x_train, y_train, x_test, y_test, param_dict=None, problem='classification'): """Trains and evaluates a random forest classifier. Args: x_train: np.array or scipy.sparse.*matrix array of features of training data y_train: np.array 1-D array of class labels of training data x_test: np.array or scipy.sparse.*matrix array of features of test data y_test: np.array 1-D array of class labels of the test data param_dict: {string: ?} dictionary of parameters and their values problem: string type of learning problem; values = 'classification', 'regression' Returns: model: sklearn.ensemble.RandomForestClassifier trained random forest model metrics: {str: float} dictionary of metric scores """ assert problem in ['classification', 'regression'] if param_dict is None: param_dict = {} if problem == 'regression': model = ensemble.RandomForestRegressor(**param_dict) else: model = ensemble.RandomForestClassifier(**param_dict) return generic_pipeline( model, x_train, y_train, x_test, y_test, problem=problem)
def pipeline(x_train, y_train, x_test, y_test, param_dict=None, problem='classification'): """Trains and evaluates a logistic regression classifier. Args: x_train: np.array or scipy.sparse.*matrix array of features of training data y_train: np.array 1-D array of class labels of training data x_test: np.array or scipy.sparse.*matrix array of features of test data y_test: np.array 1-D array of class labels of the test data param_dict: {string: ?} dictionary of parameters and their values problem: string type of learning problem; values = 'classification', 'regression' Returns: model: sklearn.linear_model.* trained linear model metrics: {str: float} dictionary of metric scores """ assert problem in ['classification', 'regression'] if param_dict is None: param_dict = {} if problem == 'classification': scaler = preprocessing.MaxAbsScaler() x_train = scaler.fit_transform(x_train) x_test = scaler.transform(x_test) if 'penalty' in param_dict and problem == 'regression': penalty = param_dict.pop('penalty') elif 'penalty' in param_dict: penalty = param_dict['penalty'] else: penalty = 'l2' # default to l2 model_init = choose_linear_model(problem, penalty) model = model_init(**param_dict) return generic_pipeline(model, x_train, y_train, x_test, y_test, problem=problem)
def pipeline(x_train, y_train, x_test, y_test, param_dict=None, problem='classification'): """Runs a pipeline to train and evaluate GBDT classifiers. Args: x_train: np.array or scipy.sparse.*matrix array of features of training data y_train: np.array 1-D array of class labels of training data x_test: np.array or scipy.sparse.*matrix array of features of test data y_test: np.array 1-D array of class labels of the test data param_dict: {string: ?} dictionary of parameters and their values problem: string type of learning problem; values = 'classification', 'regression' Returns: model: xgb.Booster trained XGBoost gradient boosted trees model metrics: {str: float} dictionary of metric scores """ assert problem in ['classification', 'regression'] if param_dict is None: param_dict = {} if problem == 'regression': model = xgb.XGBRegressor(**param_dict) else: is_binary = max(y_train) + 1 == 2 if 'objective' not in param_dict: param_dict['objective'] = get_objective(is_binary) model = xgb.XGBClassifier(**param_dict) return generic_pipeline(model, x_train, y_train, x_test, y_test, problem=problem)
def pipeline(x_train, y_train, x_test, y_test, param_dict=None, problem='classification'): """Trains and evaluates a DNN classifier. Args: x_train: np.array or scipy.sparse.*matrix array of features of training data y_train: np.array 1-D array of class labels of training data x_test: np.array or scipy.sparse.*matrix array of features of test data y_test: np.array 1-D array of class labels of the test data param_dict: {string: ?} dictionary of parameters of their values problem: string type of learning problem; values = 'classification', 'regression' Returns: model: Keras.models.Model trained Keras model metrics: {str: float} dictionary of metric scores """ assert problem in ['classification', 'regression'] if param_dict is None: param_dict = {'epochs': 10, 'batch_size': 256} num_feature = x_train.shape[1] is_sparse = sparse.issparse(x_train) param_dict = param_dict.copy() num_epoch = param_dict.pop('epochs') batch_size = param_dict.pop('batch_size') if problem == 'regression': num_output = 1 loss = 'mean_squared_error' model_init = KerasRegressor else: num_output = len(set(y_train)) loss = 'categorical_crossentropy' model_init = FunctionalKerasClassifier build_fn = pseudo_partial(keras_build_fn, num_feature=num_feature, num_output=num_output, is_sparse=is_sparse, loss=loss, **param_dict) model = model_init(build_fn=build_fn, epochs=num_epoch, batch_size=batch_size, shuffle=True, verbose=False) return generic_pipeline(model, x_train, y_train, x_test, y_test, problem=problem)