def fetch_predict_estimator(task_type, config, X_train, y_train, weight_balance=0, data_balance=0, combined=False): # Build the ML estimator. from solnml.components.utils.balancing import get_weights, smote _fit_params = {} config_dict = config.get_dictionary().copy() if weight_balance == 1: _init_params, _fit_params = get_weights(y_train, config['estimator'], None, {}, {}) for key, val in _init_params.items(): config_dict[key] = val if data_balance == 1: X_train, y_train = smote(X_train, y_train) if task_type in CLS_TASKS: if combined: from solnml.utils.combined_evaluator import get_estimator else: from solnml.components.evaluators.cls_evaluator import get_estimator else: from solnml.components.evaluators.reg_evaluator import get_estimator _, estimator = get_estimator(config_dict) estimator.fit(X_train, y_train, **_fit_params) return estimator
def holdout_validation(estimator, scorer, X, y, test_size=0.33, fit_params=None, if_stratify=True, onehot=None, random_state=1): with warnings.catch_warnings(): # ignore all caught warnings warnings.filterwarnings("ignore") if if_stratify: ss = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state) else: ss = ShuffleSplit(n_splits=1, test_size=test_size, random_state=random_state) for train_index, test_index in ss.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] _fit_params = dict() if fit_params: if 'sample_weight' in fit_params: _fit_params['sample_weight'] = fit_params['sample_weight'][ train_index] elif 'data_balance' in fit_params: X_train, y_train = smote(X_train, y_train) estimator.fit(X_train, y_train, **_fit_params) if onehot is not None: y_test = get_onehot_y(onehot, y_test) return scorer(estimator, X_test, y_test)
def fetch_predict_estimator(task_type, estimator_id, config, X_train, y_train, weight_balance=0, data_balance=0): # Build the ML estimator. from solnml.components.utils.balancing import get_weights, smote _fit_params = {} config_dict = config.copy() if weight_balance == 1: _init_params, _fit_params = get_weights(y_train, estimator_id, None, {}, {}) for key, val in _init_params.items(): config_dict[key] = val if data_balance == 1: X_train, y_train = smote(X_train, y_train) if task_type in CLS_TASKS: from solnml.components.evaluators.cls_evaluator import get_estimator elif task_type in RGS_TASKS: from solnml.components.evaluators.rgs_evaluator import get_estimator _, estimator = get_estimator(config_dict, estimator_id) estimator.fit(X_train, y_train, **_fit_params) return estimator
def validation(estimator, scorer, X_train, y_train, X_val, y_val, fit_params=None, onehot=None, random_state=1): with warnings.catch_warnings(): # ignore all caught warnings warnings.filterwarnings("ignore") _fit_params = dict() if fit_params: if 'sample_weight' in fit_params: _fit_params['sample_weight'] = fit_params['sample_weight'] elif 'data_balance' in fit_params: X_train, y_train = smote(X_train, y_train) estimator.fit(X_train, y_train, **_fit_params) if onehot is not None: y_val = get_onehot_y(onehot, y_val) return scorer(estimator, X_val, y_val)
def cross_validation(estimator, scorer, X, y, n_fold=5, shuffle=True, fit_params=None, if_stratify=True, onehot=None, random_state=1): with warnings.catch_warnings(): # ignore all caught warnings warnings.filterwarnings("ignore") if if_stratify: kfold = StratifiedKFold(n_splits=n_fold, random_state=random_state, shuffle=shuffle) else: kfold = KFold(n_splits=n_fold, random_state=random_state, shuffle=shuffle) scores = list() for train_idx, valid_idx in kfold.split(X, y): train_x, valid_x = X[train_idx], X[valid_idx] train_y, valid_y = y[train_idx], y[valid_idx] _fit_params = dict() if fit_params: if 'sample_weight' in fit_params: _fit_params['sample_weight'] = fit_params['sample_weight'][ train_idx] elif 'data_balance' in fit_params: X_train, y_train = smote(X_train, y_train) estimator.fit(train_x, train_y, **_fit_params) if onehot is not None: valid_y = get_onehot_y(onehot, valid_y) scores.append(scorer(estimator, valid_x, valid_y)) return np.mean(scores)