Exemple #1
0
 def train_ensemble_predictor(self, data: np.ndarray,
                              labels: np.ndarray,
                              predictor: str = None,
                              model_params: str = None):
     try:
         model_params = json.loads(model_params)
     except json.decoder.JSONDecodeError:
         model_params = yaml.load(model_params)
     model = self.MODELS[predictor](**model_params)
     if predictor == 'SVR':
         # If the model is an SVR, extend its functionality
         # to multi-target regression:
         model = MultiOutputRegressor(model)
     models_count, samples, classes = data.shape
     data = data.swapaxes(0, 1).reshape(samples, models_count * classes)
     self.predictor = model.fit(data, labels)
Exemple #2
0
def test_multi_target_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    references = np.zeros_like(y_test)
    for n in range(3):
        rgr = GradientBoostingRegressor(random_state=0)
        rgr.fit(X_train, y_train[:, n])
        references[:, n] = rgr.predict(X_test)

    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X_train, y_train)
    y_pred = rgr.predict(X_test)

    assert_almost_equal(references, y_pred)
def test_diff_detector_threshold(mode: str, n_features_x: int,
                                 n_features_y: int):
    """
    Basic construction logic of thresholds_ attribute in the
    DiffBasedAnomalyDetector and DiffBasedKFCVAnomalyDetector
    """
    X = np.random.random((300, n_features_x))
    y = np.random.random((300, n_features_y))

    base_estimator = MultiOutputRegressor(estimator=LinearRegression())
    if mode == "tscv":
        model = DiffBasedAnomalyDetector(base_estimator=base_estimator)
    elif mode == "kfcv":
        model = DiffBasedKFCVAnomalyDetector(base_estimator=base_estimator)

    # Model has own implementation of cross_validate
    assert hasattr(model, "cross_validate")

    # When initialized it should not have a threshold calculated.
    assert not hasattr(model, "feature_thresholds_")
    assert not hasattr(model, "aggregate_threshold_")
    assert not hasattr(model, "feature_thresholds_per_fold_")
    assert not hasattr(model, "aggregate_thresholds_per_fold_")

    model.fit(X, y)

    # Until it has done cross validation, it has no threshold.
    assert not hasattr(model, "feature_thresholds_")
    assert not hasattr(model, "aggregate_threshold_")
    assert not hasattr(model, "feature_thresholds_per_fold_")
    assert not hasattr(model, "aggregate_thresholds_per_fold_")

    # Calling cross validate should set the threshold for it.
    model.cross_validate(X=X, y=y)

    # Now we have calculated thresholds based on cross validation folds
    assert hasattr(model, "feature_thresholds_")
    assert hasattr(model, "aggregate_threshold_")
    assert isinstance(model.feature_thresholds_, pd.Series)
    assert len(model.feature_thresholds_) == y.shape[1]
    assert all(model.feature_thresholds_.notna())

    if not isinstance(model, DiffBasedKFCVAnomalyDetector):
        assert hasattr(model, "feature_thresholds_per_fold_")
        assert hasattr(model, "aggregate_thresholds_per_fold_")
        assert isinstance(model.feature_thresholds_per_fold_, pd.DataFrame)
        assert isinstance(model.aggregate_thresholds_per_fold_, dict)
Exemple #4
0
def find_best_params(train_data, train_labels, test_data, test_labels):
    test_len = len(test_data)

    # Search space where the best params will be chosen
    c_values = [
        0.0000001, 0.0000005, 0.000001, 0.000005, 0.00001, 0.00005, 0.0001,
        0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 10.0, 50.0, 100.0,
        500.0, 1000.0, 5000.0, 10000.0, 50000.0
    ]
    c_val_len = len(c_values)

    eps_values = [
        0.0000001, 0.0000005, 0.000001, 0.000005, 0.00001, 0.00005, 0.0001,
        0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 10.0
    ]
    eps_val_len = len(eps_values)

    # Variables to be set according to the RMSEs to be calculated
    min_rmse_sum = 1e10
    c_idx = -1
    eps_idx = -1

    for i in range(c_val_len):
        for j in range(eps_val_len):
            svm_reg = svm.SVR(C=c_values[i], epsilon=eps_values[j])

            pred_labels = MultiOutputRegressor(svm_reg).fit(
                train_data, train_labels).predict(test_data)
            rmse_lat = 0.0
            rmse_long = 0.0

            for k in range(test_len):
                rmse_lat = rmse_lat + (pred_labels[k][0] -
                                       test_labels.iloc[k, 0])**2
                rmse_long = rmse_long + (pred_labels[k][1] -
                                         test_labels.iloc[k, 1])**2

            rmse_lat = math.sqrt(rmse_lat / test_len)
            rmse_long = math.sqrt(rmse_long / test_len)

            if (rmse_lat + rmse_long < min_rmse_sum):
                min_rmse_sum = rmse_lat + rmse_long
                c_idx = i
                eps_idx = j

    print('Best C', c_values[c_idx])
    print('Best EPS', eps_values[eps_idx])
def baselineModels(model_name):
    if model_name == 'REG':
        model = LinearRegression()
    elif model_name == 'SVR':
        model = SVR(cache_size=1000)
    elif model_name == 'TREE':
        model = RandomForestRegressor()
    elif model_name == 'ENSEMBLE':  #ensemble of linear, polynomial regression, Random Forest Regressor
        model = []  #list of models
        model.append(LinearRegression())
        model.append(SVR())
        model.append(RandomForestRegressor())

    if prediction_type == 'multi':
        model = MultiOutputRegressor(model, n_jobs=-1)

    return model
Exemple #6
0
def adaMultiple(X, y):
   

    #score = make_scorer(mean_squared_error)

    temp_cls_ = AdaBoostRegressor()

    parameters = {
    'estimator__n_estimators': [50, 60, 70, 80],
    'estimator__learning_rate': [0.01,0.1,1],
    }

    param_tuner_ = GridSearchCV(MultiOutputRegressor(temp_cls_), param_grid=parameters)
    
    param_tuner_.fit(X, y)
    cls = param_tuner_.best_estimator_.fit(X, y)
    return cls
    def base_estimator(self, value):
        # Build `base_estimator` if string given
        if isinstance(value, str):
            value = cook_estimator(
                value, space=self.space, random_state=self.rng.randint(0, np.iinfo(np.int32).max)
            )

        # Check if regressor
        if not is_regressor(value) and value is not None:
            raise ValueError(f"`base_estimator` must be a regressor. Got {value}")

        # Treat per second acquisition function specially
        is_multi_regressor = isinstance(value, MultiOutputRegressor)
        if self.acq_func.endswith("ps") and not is_multi_regressor:
            value = MultiOutputRegressor(value)

        self._base_estimator = value
Exemple #8
0
 def fit(self, X, y):
     X, y = np.array(X), np.array(y)
     for i, (train_idx, test_idx) in enumerate(self.folds.split(X)):
         # print("Fold #%u" % (i + 1))
         # print("=========================================")
         X_train, y_train = X[train_idx], y[train_idx]
         best = (float('inf'), None)
         X_test, y_test = X[test_idx], y[test_idx]
         for num_features in self.FEATURES:
             cf = MultiOutputRegressor(RandomForestRegressor(max_features=num_features, n_estimators=100, n_jobs=-1))
             cf.fit(X_train, y_train)
             y_pred = cf.predict(X_test)
             error = mean_absolute_error(y_test, y_pred)
             if error < best[0]:
                 best = (error, cf)
         self.models.append(best[1])
     return self
Exemple #9
0
def regression(train_x, train_label, text_x, text_label):
    clf = MultiOutputRegressor(svm.SVR(gamma='scale'))
    clf.fit(train_x, train_label)
    y_pred = pd.DataFrame(clf.predict(text_x))

    catagory = y_pred.shape[1]
    # Person=np.corrcoef(text_label.iloc[:,],y_pred,rowvar=False)
    # print(text_label.iloc[:,0])
    # print(text_label.shape)
    # print("Person: ")
    # print(Person.shape)
    RMSE = np.sqrt(mean_squared_error(text_label, y_pred, multioutput='raw_values'))

    result = []
    for i in range(0, catagory):
        result.append(RMSE[i])
    return result
Exemple #10
0
    def __init__(
        self,
        tracker: ModelTracker,
        objective: Literal["regression", "ranking"] = "regression",
        use_simple_dataset_features: bool = False,
        use_seasonal_naive_performance: bool = False,
        use_catch22_features: bool = False,
        predict: Optional[List[str]] = None,
        output_normalization: OutputNormalization = None,
        impute_simulatable: bool = False,
    ):
        """
        Args:
            tracker: A tracker that can be used to impute latency and number of model parameters
                into model performances. Also, it is required for some input features.
            objective: The optimization objective for the XGBoost estimators.
            use_simple_dataset_features: Whether to use dataset features to predict using a
                weighted average.
            use_seasonal_naive_performance: Whether to use the Seasonal Naïve nCRPS as dataset
                featuers. Requires the cacher to be set.
            use_catch22_features: Whether to use catch22 features for datasets statistics. Ignored
                if `use_dataset_features` is not set.
            predict: The metrics to predict. All if not provided.
            output_normalization: The type of normalization to apply to the features of each
                dataset independently. `None` applies no normalization, "quantile" applies quantile
                normalization, and "standard" transforms data to have zero mean and unit variance.
            impute_simulatable: Whether the tracker should impute latency and number of model
                parameters into the returned performance object.
        """
        super().__init__(tracker, predict, output_normalization,
                         impute_simulatable)

        self.use_ranking = objective == "ranking"
        self.config_transformer = ConfigTransformer(
            add_model_features=True,
            add_dataset_statistics=use_simple_dataset_features,
            add_seasonal_naive_performance=use_seasonal_naive_performance,
            add_catch22_features=use_catch22_features,
            tracker=tracker,
        )

        if self.use_ranking:
            base_estimator = XGBRanker(objective="rank:pairwise", nthread=4)
        else:
            base_estimator = XGBRegressor(nthread=4)
        self.estimator = MultiOutputRegressor(base_estimator)
Exemple #11
0
def train_right_eye_cyl_axis_model(config):
    try:
        print("Model training started...")

        # Import the dataset
        bucket_file = get_training_data(config)
        dataset = pd.read_csv(io.BytesIO(bucket_file['Body'].read()))

        # Extract data for the right eye - cyl/axis
        columns = config["data_set_columns"]["right_eye_cyl_axis"]

        right_eye_dataset = pd.DataFrame(dataset, columns=columns)

        # Check for duplicates and remove if exists
        duplicates_exists = right_eye_dataset.duplicated().any()
        if duplicates_exists:
            right_eye_dataset = right_eye_dataset.drop_duplicates()

        # map categorical data
        notes_map = {"happy": 1, "unhappy": 0}
        right_eye_dataset["notes"] = right_eye_dataset["notes"].map(notes_map)

        # Create feature matrix
        X = right_eye_dataset.iloc[:, :-3]

        # Create predicted matrix
        y = right_eye_dataset.iloc[:, 7:9]

        # Split dataset to train and test set
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.3, random_state=42)

        # SVR - Train the model
        from sklearn.svm import SVR
        from sklearn.multioutput import MultiOutputRegressor
        regressor = MultiOutputRegressor(SVR(kernel = "linear"), n_jobs = -1)
        regressor.fit(X_train, y_train)

        print("Model training done.")

        return list(X.columns), regressor
    except Exception as e:
        print(str(e))
        return None, None
Exemple #12
0
def run_one_configuration(
    full_train_covariate_matrix,
    complete_target,
    new_valid_covariate_data_frames,
    new_valid_target_data_frame,
    std_data_frame,
    target_clusters,
    featurizer,
    model_name,
    parameters,
    log_file,
):
    model_baseline = dict()
    model_baseline["type"] = model_name
    model_baseline["target_clusters"] = target_clusters

    if model_name == "multi_task_lasso":
        model = MultiTaskLasso(max_iter=5000, **parameters)
    elif model_name == "xgboost":
        model = MultiOutputRegressor(
            XGBRegressor(n_jobs=10,
                         objective="reg:squarederror",
                         verbosity=0,
                         **parameters))

    model.fit(featurizer(full_train_covariate_matrix),
              complete_target.to_numpy(copy=True))
    model_baseline["model"] = lambda x: model.predict(featurizer(x))

    skill, _, _, _ = location_wise_metric(
        new_valid_target_data_frame,
        new_valid_covariate_data_frames,
        std_data_frame,
        model_baseline,
        "skill",
    )
    cos_sim, _, _, _ = location_wise_metric(
        new_valid_target_data_frame,
        new_valid_covariate_data_frames,
        std_data_frame,
        model_baseline,
        "cosine-sim",
    )
    with open(log_file, "a") as f:
        f.write(f"{len(target_clusters)} {parameters} {skill} {cos_sim}\n")
 def first_stage():
     return GridSearchCVList([
         LinearRegression(),
         WeightedMultiTaskLasso(
             alpha=0.05, fit_intercept=True, tol=1e-6, random_state=123),
         RandomForestRegressor(n_estimators=100,
                               max_depth=3,
                               min_samples_leaf=10,
                               random_state=123),
         MultiOutputRegressor(
             GradientBoostingRegressor(n_estimators=20,
                                       max_depth=3,
                                       min_samples_leaf=10,
                                       random_state=123))
     ],
                             param_grid_list=[{}, {}, {}, {}],
                             cv=3,
                             iid=True)
    def XGBoost_mod(self, daily_df, interval_forecast):
        test_df = daily_df.loc['Total'].T
        final_df = test_df.copy()
        fixed_interval = 5
        for i in range(fixed_interval + interval_forecast):
            final_df = pd.concat([test_df.shift(i + 1), final_df], axis=1)
        final_df = final_df.iloc[fixed_interval + interval_forecast:, 1:]
        final_df.columns = [i for i in range(fixed_interval + interval_forecast)]

        model = xgb.XGBRegressor(n_estimators=300, early_stopping_rounds=50, verbosity=0)
        x, y = final_df.iloc[:, :-interval_forecast], final_df.iloc[:, -interval_forecast:]
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

        multi_model = MultiOutputRegressor(model).fit(x_train, y_train)

        x_forecast = pd.DataFrame(final_df.iloc[-1, interval_forecast:].tolist(), index=x_train.columns).T
        pred = multi_model.predict(x_forecast)
        return pred[0]
def generate_joint_model(single_model):
    model = MultiOutputRegressor(single_model)
    model.fit(X_train, Y_train)
    
    score_train = model.score(X_train, Y_train)
    print('Score of train', round(score_train * 100, 1), "%")
    
    score = model.score(X_test, Y_test)
    print('Score of test', round(score * 100, 1), "%")
    
    model_path = model_folder + r"/" +  \
                    str(round(score, 3)).replace('.', '_') + r"_" +  \
                    str(model.get_params()['estimator']).split('(')[0] + \
                    '.joblib'
    joblib.dump(model, model_path)
    print("Save model file", model_path)
    
    return model, model_path
Exemple #16
0
def objective(space):

    global X, Xt, y, yt

    clf = MultiOutputRegressor(
        XGBRegressor(n_estimators=int(space['n_estimators']),
                     max_depth=int(space['max_depth']),
                     gamma=space['gamma'],
                     reg_alpha=space['reg_alpha'],
                     reg_lambda=space['reg_lambda'],
                     min_child_weight=space['min_child_weight']))

    clf.fit(X, y, verbose=False)

    pred = clf.predict(Xt)
    accuracy = mean_squared_error(yt, pred)
    print("SCORE:", accuracy)
    return {'loss': accuracy, 'status': STATUS_OK}
Exemple #17
0
def train_diff_levels(noise, size):
    # Load data with specified amount of noise and number of examples.
    data = Data(noise,
                size,
                imageFiles='./datasets/noise_0_alt/train_data/regular/*.png',
                labelFiles='./datasets/noise_0_alt/train_data/regular/*.npy')

    # Train the SVR.
    svr = LinearSVR(tol=0.1, verbose=10)
    multi_svr = MultiOutputRegressor(svr, n_jobs=-1)
    multi_svr.fit(data.x / 255.0, data.y)

    # Save trained model.
    pickle.dump(
        multi_svr,
        open(
            "saved_models/svr/noise_{0}_training_{1}.ckpt".format(noise, size),
            'wb'))
def load_SVM():
    '''
    Loads Support Vector Machine and gives a name for the output files.
    
    Parameters : None
    
    Returns    : model_name : (str) Name of the model for output file.
                       clf  : (Classifier) Building and Floor Classifier
                       regr : (REgressor) Longitude and Latitude Regressor
    '''
    model_name = "Support Vector Machine"
    clf = SVC(C=100, kernel="linear", max_iter=1000)
    clf = MultiOutputClassifier(clf)

    regr = SVR(C=100, kernel="linear", max_iter=1000)
    regr = MultiOutputRegressor(regr)

    return model_name, clf, regr
def randomSearch(base_model, random_grid):
    random = RandomizedSearchCV(MultiOutputRegressor(base_model),
                                param_distributions=random_grid,
                                n_iter=100,
                                cv=3,
                                verbose=2,
                                random_state=42,
                                n_jobs=-1)

    random.fit(train_X, train_y)
    print(random.best_params_)
    best_random = random.best_estimator_
    pred_y_train = best_random.predict(train_X)
    print_scores(train_y_array, pred_y_train)
    pred_y_test = best_random.predict(test_X)
    print_scores(test_y_array, pred_y_test)
    pred_y_dev = best_random.predict(dev_X)
    print_scores(dev_y_array, pred_y_dev)
Exemple #20
0
def crossValidationMLPR(X, Y):
    """Fonction qui essaie plusieurs possibilités"""

    # On découpe le set en set d'enrtainement et de validation
    print("***Decoupe le set de validation***")
    x_train, x_validation, y_train, y_validation_txt = train_test_split(
        X, Y, stratify=Y, test_size=0.2, shuffle=True)
    y_train, y_validation = transformerGranuArgi(
        y_train), transformerGranuArgi(y_validation_txt)

    print('***Definition des parametres a tester***')
    param = {
        'hidden_layer_sizes': [
            tuple(np.random.randint(20, 35, np.random.randint(3, 5, 1)))
            for _ in range(5)
        ]
    }

    print('***Definition des modeles a entrainer***')
    mlpr = [
        MLPRegressor(solver='adam',
                     max_iter=1000,
                     alpha=1e-5,
                     activation='tanh',
                     hidden_layer_sizes=param['hidden_layer_sizes'][i])
        for i in range(len(param['hidden_layer_sizes']))
    ]
    multioutput_rna = [MultiOutputRegressor(modele) for modele in mlpr]

    # Score de resultat justes sur le set de validation
    resultat_sur_validation = [
        0 for _ in range(len(param['hidden_layer_sizes']))
    ]

    for i, modele in enumerate(multioutput_rna):
        print(
            f"[Entrainement du modele {i}] Couches de neurones : {param['hidden_layer_sizes'][i]}"
        )
        modele.fit(x_train, y_train)
        print(modele.score(x_validation, y_validation))
        y_res = modele.predict(x_validation)
        y_res = conversionPredictionSol(y_res)
        print(scorePrediction(y_res, np.array(y_validation_txt)))
        print('\n')
def runBaseLineRegression(model_params,data,estimator):

	#regr = MultiOutputRegressor(sklearn.linear_model.LinearRegression())
	regr = MultiOutputRegressor(estimator)
	#regr = MultiOutputRegressor(sklearn.linear_model.BayesianRidge())
	#regr = MultiOutputRegressor(sklearn.linear_model.Lasso())

	#data
	AP_train,TRP_train = data[0]
	AP_dev,TRP_dev = data[1]

	if model_params["DirectionForward"]:
		X_train,Y_train,X_dev,Y_dev = TRP_train,AP_train,TRP_dev,AP_dev
	else:
		X_train,Y_train,X_dev,Y_dev = AP_train,TRP_train,AP_dev,TRP_dev
		model_params["OutputNames"],model_params["InputNames"] = model_params["InputNames"],model_params["OutputNames"]

	regr.fit(X_train,Y_train)
	Y_dev_pred = regr.predict(X_dev)
	Y_train_pred = regr.predict(X_train)

	if model_params["DirectionForward"]:
		#train
		mse_totoal_train = customUtils.mse_p(ix = (3,6),Y_pred = Y_train_pred,Y_true = Y_train)
		#dev
		mse_totoal_dev = customUtils.mse_p(ix = (3,6),Y_pred = Y_dev_pred,Y_true = Y_dev)

	else:
		mse_totoal_train = mse(Y_train,Y_train_pred,multioutput = 'raw_values')
		mse_totoal_dev = mse(Y_dev,Y_dev_pred,multioutput = 'raw_values')

	
	model_location = os.path.join('models',model_params["model_name"] +  '.json')


	with open(os.path.join('model_params',model_params["model_name"] +  '.json'), 'w') as fp:
		json.dump(model_params, fp, sort_keys=True)

	_ = run_eval_base(model_location,dataset = "train",email = model_params["email"])
	_ = run_eval_base(model_location,dataset = "test",email = model_params["email"])
	mse_total = run_eval_base(model_location,dataset = "dev",email = model_params["email"])

	
	return (mse_totoal_train.tolist(),mse_totoal_dev.tolist(),mse_totoal_train.sum(),mse_totoal_dev.sum())
Exemple #22
0
    def decision_function(self, X):
        X = X.copy()
        X.iloc[:, :-2] *= 1e12

        L, parcel_indices_L, subj_dict = self._get_lead_field_info()
        # use only Lead Fields of the subjects found in X
        subj_dict = dict((k, subj_dict[k]) for k in np.unique(X['subject']))
        self.lead_field, self.parcel_indices = [], []
        subj_dict_x = {}
        for idx, s_key in enumerate(subj_dict.keys()):
            subj_dict_x[s_key] = idx
            self.lead_field.append(L[subj_dict[s_key]])
            self.parcel_indices.append(parcel_indices_L[subj_dict[s_key]])

        X['subject_id'] = X['subject'].map(subj_dict_x)
        X.astype({'subject_id': 'int32'}).dtypes
        model = MultiOutputRegressor(self.model, n_jobs=self.n_jobs)
        X = X.reset_index(drop=True)

        betas = np.empty((len(X), 0)).tolist()
        for subj_idx in np.unique(X['subject_id']):
            l_used = self.lead_field[subj_idx]

            X_used = X[X['subject_id'] == subj_idx]
            X_used = X_used.iloc[:, :-2]

            norms = l_used.std(axis=0)
            l_used = l_used / norms[None, :]

            alpha_max = abs(l_used.T.dot(X_used.T)).max() / len(l_used)
            alpha = 0.2 * alpha_max
            model.estimator.alpha = alpha
            model.fit(l_used, X_used.T)  # cross validation done here

            for idx, idx_used in enumerate(X_used.index.values):
                est_coef = np.abs(_get_coef(model.estimators_[idx]))
                est_coef /= norms
                beta = pd.DataFrame(
                        np.abs(est_coef)
                        ).groupby(
                        self.parcel_indices[subj_idx]).max().transpose()
                betas[idx_used] = np.array(beta).ravel()
        betas = np.array(betas)
        return betas
Exemple #23
0
def train_consumer():
    cdf = pd.read_csv(CONSUMER_TRAINING)

    xs = ['risk', 'delta_risk', 'grat_payoff', 'delta_grat_payoff',\
        'inv_payoff', 'delta_inv_payoff', 'surface_area_risk_factor',\
        'delta_surface_area_risk_factor']

    ys = ['GREED', 'FOCUS', 'SPEND', 'INVEST']

    cx, cy = cdf[xs], cdf[ys]
    '''
    will use multi-output regressor
    '''
    model = MultiOutputRegressor(
        GradientBoostingRegressor(random_state=0)).fit(cx, cy)

    # clear CMODEL_FILE
    open(CMODEL_FILE, 'w').close()
    pickle.dump(model, open(CMODEL_FILE, 'wb'))
Exemple #24
0
def evaluate(individual):
    C = 1 + 2 * abs(individual[0]) * 1.00e03
    epsilon = 0.1 + abs(individual[1]) * 0.1 + 0.02
    gamma = abs(individual[2]) * 0.1 + 0.02
    multi_regr_rbf = MultiOutputRegressor(
        SVR(kernel='rbf', C=C, epsilon=epsilon, gamma=gamma))
    model = multi_regr_rbf.fit(x_train, y_train)
    output = multi_regr_rbf.predict(x_test)
    r_squared = abs(multi_regr_rbf.score(x_test, y_test))
    if r_squared > 1:
        r_squared = 0
    params = (r_squared, e.AkaikeInformationCriterion_c(output),
              e.BayesianInformationCriterion(output), e.PRESS(output, y_test),
              e.MAPE(output,
                     y_test), e.StructuralRiskMinimisation(output, y_test),
              e.FinalPredictionError(output,
                                     y_test), e.RMSErrors(output, y_test))
    print("The parameters are: ", params)
    return params
Exemple #25
0
def gbr_model(yvar, n_estimators, max_depth, min_samples_leaf,
              min_samples_split, max_features, loss):
    if max_features != 'auto':
        max_features = int(max_features)
    n_estimators, min_samples_leaf, min_samples_split, max_depth = \
        int(n_estimators), int(min_samples_leaf), int(min_samples_split), int(max_depth)

    reg = GradientBoostingRegressor(random_state=42,
                                    max_depth=max_depth,
                                    n_estimators=n_estimators,
                                    max_features=max_features,
                                    min_samples_leaf=min_samples_leaf,
                                    loss=loss,
                                    min_samples_split=min_samples_split)
    if yvar.shape[1] == 1:
        reg_trans = reg
    if yvar.shape[1] != 1:
        reg_trans = MultiOutputRegressor(reg, n_jobs=-1)
    return reg_trans
def make_bayesian_pred(df, next_week, debug=0):
    """
    This method creates predictions using bayesian regression.
    """
    space = {
        'estimator__alpha_1': [1e-10, 1e-5, 1],
        'estimator__alpha_2': [1e-10, 1e-5, 1],
        'estimator__lambda_1': [1e-10, 1e-5, 1],
        'estimator__lambda_2': [1e-10, 1e-5, 1],
        'estimator__n_iter': [10, 300, 1000],
        'estimator__normalize': [True, False],
        'estimator__fit_intercept': [True, False]
    }
    params = {
        'estimator__alpha_1': [1e-10, 1e-5, 1, 5],
        'estimator__alpha_2': [1e-10, 1e-5, 1, 5],
        'estimator__lambda_1': [1e-10, 1e-5, 1, 5],
        'estimator__lambda_2': [1e-10, 1e-5, 1, 5],
        'estimator__n_iter': [10, 300, 1000],
        'estimator__normalize': [True, False],
        'estimator__n_jobs': -1,
        'n_jobs': -1,
        'estimator__fit_intercept': [True, False]
    }
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_bay = MultiOutputRegressor(BayesianRidge())
    #multi_bay.set_params(**params)
    #best_random = grid_search(multi_bay, space, next_week, 3, X_train, Y_train)
    multi_bay.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_bay.predict(next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_bay.predict(X_train)
        print(next_week)
        print("Score: ", multi_bay.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_bay,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
def train_model(X, Y, layers=None, weight=False, with_onsets=False):
    """
    Create and train a model from the given data.
    
    Parameters
    ==========
    X : np.ndarray
        An N x (history + num_features + 2) ndarray containing the N data points on which to train.
        
    Y : np.ndarray
        A length-N array, containing the targets for each data point. Or, an (N,2) target ndarray,
        if with_onsets is True.
        
    layers : list(int)
        The hidden layer sizes for the trained network. Defaults to None, which is logistic regression.
        
    weight : boolean
        True to have the model output prior weights, and False to have it output the prior directly.
        Defaults to False.
        
    with_onsets : boolean
        True to output presence and onset values. False for only presence.
        
    Returns
    =======
    model : sklearn classifier
        A trained model.
    """
    if weight:
        convert_targets_to_weight(X, Y, with_onsets=with_onsets)

    if layers is None or len(layers) == 0:
        la = -1
        ac = -2
        strengths = strengths = np.abs(X[:, la] - X[:, ac])
        regressor = MultiOutputRegressor(
            LogisticRegression()) if with_onsets else LogisticRegression()
        model = regressor.fit(X, Y, sample_weight=strengths)
    else:
        model = MLPClassifier(max_iter=1000,
                              hidden_layer_sizes=layers).fit(X, Y)

    return model
Exemple #28
0
    def create_model(self, C=-1, gamma=-1, epsilon=-1):
        # questo controllo serve per dire che di solito uso i valori di default scelti da me (inizializzati nel costruttore),
        # altrimenti usi i valori passati come parametro
        if (C == -1):
            C = self.C
        if (gamma == -1):
            gamma = self.gamma
        if (epsilon == -1):
            epsilon = self.epsilon

        self.model = SVR(C=C, gamma=gamma, epsilon=epsilon)
        if (
                self.output_multi
        ):  # se uso molteplici y, devo fare il wrapping del svr in modo da saperle gestire
            multi_output_model = MultiOutputRegressor(estimator=self.model)
            self.model = multi_output_model

        print self.model
        return self.model
Exemple #29
0
 def __init__(self, fl, max_depth=8, num_est=300, chain=False):
     """
     Initialises new DTR model
     :param fl: fl class
     :param max_depth: max depth of each tree
     :param num_est: Number of estimators in the ensemble of trees
     :param chain: regressor chain (True) or independent multi-output (False)
     """
     self.labels_dim = fl.labels_dim
     self.labels_scaler = fl.labels_scaler
     if chain:
         self.model = RegressorChain(
             AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                               n_estimators=num_est))
     else:
         self.model = MultiOutputRegressor(
             AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                               n_estimators=num_est))
     self.normalise_labels = fl.normalise_labels
Exemple #30
0
    def _check_arguments(self, base_estimator, n_initial_points, acq_optimizer,
                         dimensions):
        """Check arguments for sanity."""

        if isinstance(base_estimator, str):
            base_estimator = cook_estimator(base_estimator,
                                            space=dimensions,
                                            random_state=self.rng.randint(
                                                0,
                                                np.iinfo(np.int32).max))

        if not is_regressor(base_estimator) and base_estimator is not None:
            raise ValueError("%s has to be a regressor." % base_estimator)

        is_multi_regressor = isinstance(base_estimator, MultiOutputRegressor)
        if "ps" in self.acq_func and not is_multi_regressor:
            self.base_estimator_ = MultiOutputRegressor(base_estimator)
        else:
            self.base_estimator_ = base_estimator

        if n_initial_points < 0:
            raise ValueError("Expected `n_initial_points` >= 0, got %d" %
                             n_initial_points)
        self._n_initial_points = n_initial_points
        self.n_initial_points_ = n_initial_points

        if acq_optimizer == "auto":
            if has_gradients(self.base_estimator_):
                acq_optimizer = "lbfgs"
            else:
                acq_optimizer = "sampling"

        if acq_optimizer not in ["lbfgs", "sampling"]:
            raise ValueError("Expected acq_optimizer to be 'lbfgs' or "
                             "'sampling', got {0}".format(acq_optimizer))

        if (not has_gradients(self.base_estimator_)
                and acq_optimizer != "sampling"):
            raise ValueError("The regressor {0} should run with "
                             "acq_optimizer"
                             "='sampling'.".format(type(base_estimator)))

        self.acq_optimizer = acq_optimizer