コード例 #1
0
ファイル: test_multioutput.py プロジェクト: sinhrks/pandas-ml
    def test_multioutput(self):

        # http://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py

        from sklearn.multioutput import MultiOutputRegressor
        from sklearn.ensemble import RandomForestRegressor

        # Create a random dataset
        rng = np.random.RandomState(1)
        X = np.sort(200 * rng.rand(600, 1) - 100, axis=0)
        y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T
        y += (0.5 - rng.rand(*y.shape))

        df = pdml.ModelFrame(X, target=y)

        max_depth = 30

        rf1 = df.ensemble.RandomForestRegressor(max_depth=max_depth,
                                                random_state=self.random_state)
        reg1 = df.multioutput.MultiOutputRegressor(rf1)

        rf2 = RandomForestRegressor(max_depth=max_depth,
                                    random_state=self.random_state)
        reg2 = MultiOutputRegressor(rf2)

        df.fit(reg1)
        reg2.fit(X, y)

        result = df.predict(reg2)
        expected = pd.DataFrame(reg2.predict(X))
        tm.assert_frame_equal(result, expected)
コード例 #2
0
def test_multi_target_sample_weights_api():
    X = [[1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [2.718, 3.141]]
    w = [0.8, 0.6]

    rgr = MultiOutputRegressor(Lasso())
    assert_raises_regex(ValueError, "does not support sample weights", rgr.fit, X, y, w)

    # no exception should be raised if the base estimator supports weights
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y, w)
コード例 #3
0
def test_acquisition_per_second_gradient(acq_func):
    rng = np.random.RandomState(0)
    X = rng.randn(20, 10)
    # Make the second component large, so that mean_grad and std_grad
    # do not become zero.
    y = np.vstack((X[:, 0], np.abs(X[:, 0])**3)).T

    for X_new in [rng.randn(10), rng.randn(10)]:
        gpr = cook_estimator("GP", Space(((-5.0, 5.0),)), random_state=0)
        mor = MultiOutputRegressor(gpr)
        mor.fit(X, y)
        check_gradient_correctness(X_new, mor, acq_func, 1.5)
コード例 #4
0
def test_multi_target_sparse_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test = X[50:]

    for sparse in [sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix, sp.lil_matrix]:
        rgr = MultiOutputRegressor(Lasso(random_state=0))
        rgr_sparse = MultiOutputRegressor(Lasso(random_state=0))

        rgr.fit(X_train, y_train)
        rgr_sparse.fit(sparse(X_train), y_train)

        assert_almost_equal(rgr.predict(X_test), rgr_sparse.predict(sparse(X_test)))
コード例 #5
0
def test_multi_target_sample_weights():
    # weighted regressor
    Xw = [[1, 2, 3], [4, 5, 6]]
    yw = [[3.141, 2.718], [2.718, 3.141]]
    w = [2., 1.]
    rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]]
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
    assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
コード例 #6
0
def test_multi_target_sample_weights():
    # weighted regressor
    Xw = [[1, 2, 3], [4, 5, 6]]
    yw = [[3.141, 2.718], [2.718, 3.141]]
    w = [2.0, 1.0]
    rgr_w = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr_w.fit(Xw, yw, w)

    # unweighted, but with repeated samples
    X = [[1, 2, 3], [1, 2, 3], [4, 5, 6]]
    y = [[3.141, 2.718], [3.141, 2.718], [2.718, 3.141]]
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    rgr.fit(X, y)

    X_test = [[1.5, 2.5, 3.5], [3.5, 4.5, 5.5]]
    assert_almost_equal(rgr.predict(X_test), rgr_w.predict(X_test))
コード例 #7
0
ファイル: solver.py プロジェクト: longyangking/Gandalf
class Solver:
    def __init__(self, func, scopes):
        self.func = func
        self.scopes = np.array(scopes)

        self.model = None

    def train(self, epochs=1e3, verbose=False):
        self.model = MultiOutputRegressor(
            MLPRegressor(solver='lbfgs',
                         alpha=1e-5,
                         hidden_layer_sizes=(100, 30),
                         random_state=1))

        n_variables = len(self.scopes)
        xmin = self.scopes[:, 0]
        xmax = self.scopes[:, 1]

        Xs = list()
        Ys = list()
        if verbose:
            print("Generating training data...", end="")
        for i in range(int(epochs)):
            x = xmin + (xmax - xmin) * np.random.random(n_variables)
            Xs.append(self.func(x))
            Ys.append(x)
            if (i + 1) % int(epochs / 10) == 0 and verbose:
                print(" {value:0.0f}% ".format(value=(i + 1) / int(epochs) *
                                               100),
                      end="")

        if verbose:
            print("Complete!")
        #Xs = np.array(Xs)
        #Ys = np.array(Ys)
        if verbose:
            print("Training model...", end='')
        self.model.fit(Xs, Ys)
        if verbose:
            print("End with R^2: {value:0.4f}".format(
                value=self.model.score(Xs, Ys)))

    def evaluate(self, bs):
        return self.model.predict(bs)

    def evaluate_single(self, b):
        return self.model.predict([b])[0]
コード例 #8
0
 def fit(self, X, y):
     X, y = np.array(X), np.array(y)
     for i, (train_idx, test_idx) in enumerate(self.folds.split(X)):
         # print("Fold #%u" % (i + 1))
         # print("=========================================")
         X_train, y_train = X[train_idx], y[train_idx]
         best = (float('inf'), None)
         X_test, y_test = X[test_idx], y[test_idx]
         for num_features in self.FEATURES:
             cf = MultiOutputRegressor(RandomForestRegressor(max_features=num_features, n_estimators=100, n_jobs=-1))
             cf.fit(X_train, y_train)
             y_pred = cf.predict(X_test)
             error = mean_absolute_error(y_test, y_pred)
             if error < best[0]:
                 best = (error, cf)
         self.models.append(best[1])
     return self
コード例 #9
0
def regression(train_x, train_label, text_x, text_label):
    clf = MultiOutputRegressor(svm.SVR(gamma='scale'))
    clf.fit(train_x, train_label)
    y_pred = pd.DataFrame(clf.predict(text_x))

    catagory = y_pred.shape[1]
    # Person=np.corrcoef(text_label.iloc[:,],y_pred,rowvar=False)
    # print(text_label.iloc[:,0])
    # print(text_label.shape)
    # print("Person: ")
    # print(Person.shape)
    RMSE = np.sqrt(mean_squared_error(text_label, y_pred, multioutput='raw_values'))

    result = []
    for i in range(0, catagory):
        result.append(RMSE[i])
    return result
コード例 #10
0
ファイル: SERVER.py プロジェクト: matheusfsa/jMetal-DVL
def train_model(x, y, n):
    x = x.iloc[:n, :]
    y = y.iloc[:n, :]
    est = linear_model.RidgeCV(
        alphas=[0.05, 0.1, 0.3, 1, 3, 5, 10, 15, 30, 50, 75])
    model = MultiOutputRegressor(est)
    model = model.fit(x, y)
    return model
コード例 #11
0
def test_multi_target_sparse_regression():
    X, y = datasets.make_regression(n_targets=3)
    X_train, y_train = X[:50], y[:50]
    X_test = X[50:]

    for sparse in [
            sp.csr_matrix, sp.csc_matrix, sp.coo_matrix, sp.dok_matrix,
            sp.lil_matrix
    ]:
        rgr = MultiOutputRegressor(Lasso(random_state=0))
        rgr_sparse = MultiOutputRegressor(Lasso(random_state=0))

        rgr.fit(X_train, y_train)
        rgr_sparse.fit(sparse(X_train), y_train)

        assert_almost_equal(rgr.predict(X_test),
                            rgr_sparse.predict(sparse(X_test)))
コード例 #12
0
ファイル: run_lightgbm.py プロジェクト: davidtangGT/MOF_color
def fit(x_train, y_train, parameters_01, parameters_median, parameters_09):
    regressor_median = BaggingRegressor(MultiOutputRegressor(
        LGBMRegressor(objective='quantile', alpha=0.5, **parameters_median)),
                                        n_jobs=-1,
                                        n_estimators=15)
    regressor_median.fit(x_train, y_train)

    regressor_0_1 = MultiOutputRegressor(
        LGBMRegressor(objective='quantile', alpha=0.1, **parameters_01))
    regressor_0_1.fit(x_train, y_train)

    regressor_0_9 = MultiOutputRegressor(
        LGBMRegressor(objective='quantile', alpha=0.9, **parameters_09))

    regressor_0_9.fit(x_train, y_train)

    return regressor_median, regressor_0_1, regressor_0_9
コード例 #13
0
def train_right_eye_cyl_axis_model(config):
    try:
        print("Model training started...")

        # Import the dataset
        bucket_file = get_training_data(config)
        dataset = pd.read_csv(io.BytesIO(bucket_file['Body'].read()))

        # Extract data for the right eye - cyl/axis
        columns = config["data_set_columns"]["right_eye_cyl_axis"]

        right_eye_dataset = pd.DataFrame(dataset, columns=columns)

        # Check for duplicates and remove if exists
        duplicates_exists = right_eye_dataset.duplicated().any()
        if duplicates_exists:
            right_eye_dataset = right_eye_dataset.drop_duplicates()

        # map categorical data
        notes_map = {"happy": 1, "unhappy": 0}
        right_eye_dataset["notes"] = right_eye_dataset["notes"].map(notes_map)

        # Create feature matrix
        X = right_eye_dataset.iloc[:, :-3]

        # Create predicted matrix
        y = right_eye_dataset.iloc[:, 7:9]

        # Split dataset to train and test set
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.3, random_state=42)

        # SVR - Train the model
        from sklearn.svm import SVR
        from sklearn.multioutput import MultiOutputRegressor
        regressor = MultiOutputRegressor(SVR(kernel = "linear"), n_jobs = -1)
        regressor.fit(X_train, y_train)

        print("Model training done.")

        return list(X.columns), regressor
    except Exception as e:
        print(str(e))
        return None, None
コード例 #14
0
def objective(space):

    global X, Xt, y, yt

    clf = MultiOutputRegressor(
        XGBRegressor(n_estimators=int(space['n_estimators']),
                     max_depth=int(space['max_depth']),
                     gamma=space['gamma'],
                     reg_alpha=space['reg_alpha'],
                     reg_lambda=space['reg_lambda'],
                     min_child_weight=space['min_child_weight']))

    clf.fit(X, y, verbose=False)

    pred = clf.predict(Xt)
    accuracy = mean_squared_error(yt, pred)
    print("SCORE:", accuracy)
    return {'loss': accuracy, 'status': STATUS_OK}
コード例 #15
0
    def test_sklearn_multioutput_regressor(self):
        for n_targets in [2, 3, 4]:
            for model_class in [DecisionTreeRegressor, ExtraTreesRegressor, RandomForestRegressor, LinearRegression]:
                seed = random.randint(0, 2**32 - 1)
                if model_class != LinearRegression:
                    model = MultiOutputRegressor(model_class(random_state=seed))
                else:
                    model = MultiOutputRegressor(model_class())
                X, y = datasets.make_regression(
                    n_samples=50, n_features=10, n_informative=5, n_targets=n_targets, random_state=seed
                )
                X = X.astype("float32")
                y = y.astype("float32")
                model.fit(X, y)

                torch_model = hummingbird.ml.convert(model, "torch", extra_config={constants.TREE_OP_PRECISION_DTYPE: "float64"})
                self.assertTrue(torch_model is not None)
                np.testing.assert_allclose(model.predict(X), torch_model.predict(X), rtol=1e-5, atol=1e-4, err_msg="{}/{}/{}".format(n_targets, model_class, seed))
コード例 #16
0
def multir(request, model):
    bolsa = pd.read_csv("app/data/bolsa.csv",
                        index_col='Date').groupby('Codigo')
    lista = [
        'B3SA3', 'BBDC4', 'BRAP4', 'BRFS3', 'BRKM5', 'BRML3', 'BTOW3', 'CCRO3',
        'CIEL3', 'CMIG4', 'CSAN3', 'CSNA3', 'CYRE3', 'ECOR3', 'EGIE3', 'ELET3',
        'ELET6', 'EMBR3', 'ENBR3', 'EQTL3', 'ESTC3', 'FLRY3', 'GGBR4', 'GOAU4',
        'GOLL4', 'HYPE3', 'IGTA3', 'KROT3', 'ITSA4', 'ITUB4', 'LAME4', 'LREN3',
        'MGLU3', 'MRFG3', 'MRVE3', 'MULT3', 'NATU3', 'PCAR4', 'PETR3', 'PETR4',
        'QUAL3', 'RADL3', 'RENT3', 'SANB11', 'SBSP3', 'TAEE11', 'TIMP3',
        'UGPA3', 'USIM5', 'VALE3', 'VIVT4', 'WEGE3'
    ]

    resultado = []
    for item in lista:
        bolsa = pd.read_csv("app/data/bolsa.csv",
                            index_col='Date').groupby('Codigo')
        dados = bolsa.get_group(item)
        X = dados[['Open', 'High', 'Low', 'Close', 'Volume']]
        y = pd.DataFrame({
            'Alta_real':
            dados['High'].shift(-1).fillna(method='pad'),
            'Baixa_real':
            dados['Low'].shift(-1).fillna(method='pad')
        })
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.20,
                                                            shuffle=False,
                                                            random_state=0)
        if (model == 'adr'):
            modelo = "Automatic Relevance Determination Regression"
            #regr_multi = MultiOutputRegressor(svm.SVR())
            regr_multi = MultiOutputRegressor(
                linear_model.ARDRegression(compute_score=True))
        elif (model == 'ada'):
            modelo = "Ada Regressor"
            regr_multi = MultiOutputRegressor(
                AdaBoostRegressor(random_state=0, n_estimators=100))
        elif (model == 'GB'):
            modelo = "GradientBoostingRegressor"
            regr_multi = MultiOutputRegressor(
                GradientBoostingRegressor(random_state=1, n_estimators=10))
        else:
            modelo = "LinerRegression com Bayesian Ridge"
            regr_multi = MultiOutputRegressor(linear_model.BayesianRidge())
        regr_multi = regr_multi.fit(X_train, y_train)
        y_pred = regr_multi.predict(X_test)
        #print(item)
        #print(": ")
        #print(r2_score(y_test, y_pred))
        #print(item,": ", r2_score(y_test, y_pred))
        r = r2_score(y_test, y_pred)
        resultado.append([item, r])
    resultado_geral = pd.DataFrame(resultado).to_html()
    context = {'modelo': modelo, 'resultado': resultado_geral}
    return render(request, 'app/multi.html', context)
def generate_joint_model(single_model):
    model = MultiOutputRegressor(single_model)
    model.fit(X_train, Y_train)
    
    score_train = model.score(X_train, Y_train)
    print('Score of train', round(score_train * 100, 1), "%")
    
    score = model.score(X_test, Y_test)
    print('Score of test', round(score * 100, 1), "%")
    
    model_path = model_folder + r"/" +  \
                    str(round(score, 3)).replace('.', '_') + r"_" +  \
                    str(model.get_params()['estimator']).split('(')[0] + \
                    '.joblib'
    joblib.dump(model, model_path)
    print("Save model file", model_path)
    
    return model, model_path
コード例 #18
0
ファイル: svr.py プロジェクト: dtseng/RAPID_deep_learning
def train_diff_levels(noise, size):
    # Load data with specified amount of noise and number of examples.
    data = Data(noise,
                size,
                imageFiles='./datasets/noise_0_alt/train_data/regular/*.png',
                labelFiles='./datasets/noise_0_alt/train_data/regular/*.npy')

    # Train the SVR.
    svr = LinearSVR(tol=0.1, verbose=10)
    multi_svr = MultiOutputRegressor(svr, n_jobs=-1)
    multi_svr.fit(data.x / 255.0, data.y)

    # Save trained model.
    pickle.dump(
        multi_svr,
        open(
            "saved_models/svr/noise_{0}_training_{1}.ckpt".format(noise, size),
            'wb'))
コード例 #19
0
ファイル: Multi_output.py プロジェクト: diarra2/Projet
def multi_reg(data, out, saison):

    cols = [
        'temp_1', 'temp_2', 'mean_national_temp', 'humidity_1', 'humidity_2',
        'consumption_secondary_1', 'consumption_secondary_2',
        'consumption_secondary_3'
    ]

    output_col = ['consumption_1', 'consumption_2']

    X_week, X_week_end = sub_data(data, cols, saison)
    Y_week, Y_week_end = sub_data(out, output_col, saison)

    from sklearn.multioutput import MultiOutputRegressor
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.svm import SVR

    if saison == 'ete':
        clf_week = RandomForestRegressor(n_estimators=100,
                                         criterion='mae',
                                         random_state=0)
        clf_week.fit(X_week, Y_week)

        clf_week_end = MultiOutputRegressor(
            RandomForestRegressor(n_estimators=100,
                                  criterion='mae',
                                  random_state=0)
        )  #SVR(kernel ='rbf', gamma = 'scale', tol = 10e-5))
        clf_week_end.fit(X_week_end, Y_week_end)
    else:
        clf_week = MultiOutputRegressor(
            RandomForestRegressor(n_estimators=100,
                                  criterion='mae',
                                  random_state=0)
        )  #SVR(kernel ='rbf', gamma = 'scale', tol = 10e-5))
        clf_week.fit(X_week, Y_week)

        clf_week_end = RandomForestRegressor(n_estimators=100,
                                             criterion='mae',
                                             random_state=0)
        clf_week_end.fit(X_week_end, Y_week_end)
    print("training score {} : {}".format(saison, (clf_week.score(
        X_week, Y_week), clf_week_end.score(X_week_end, Y_week_end))))
    return (clf_week, clf_week_end)
コード例 #20
0
def runBaseLineRegression(model_params,data,estimator):

	#regr = MultiOutputRegressor(sklearn.linear_model.LinearRegression())
	regr = MultiOutputRegressor(estimator)
	#regr = MultiOutputRegressor(sklearn.linear_model.BayesianRidge())
	#regr = MultiOutputRegressor(sklearn.linear_model.Lasso())

	#data
	AP_train,TRP_train = data[0]
	AP_dev,TRP_dev = data[1]

	if model_params["DirectionForward"]:
		X_train,Y_train,X_dev,Y_dev = TRP_train,AP_train,TRP_dev,AP_dev
	else:
		X_train,Y_train,X_dev,Y_dev = AP_train,TRP_train,AP_dev,TRP_dev
		model_params["OutputNames"],model_params["InputNames"] = model_params["InputNames"],model_params["OutputNames"]

	regr.fit(X_train,Y_train)
	Y_dev_pred = regr.predict(X_dev)
	Y_train_pred = regr.predict(X_train)

	if model_params["DirectionForward"]:
		#train
		mse_totoal_train = customUtils.mse_p(ix = (3,6),Y_pred = Y_train_pred,Y_true = Y_train)
		#dev
		mse_totoal_dev = customUtils.mse_p(ix = (3,6),Y_pred = Y_dev_pred,Y_true = Y_dev)

	else:
		mse_totoal_train = mse(Y_train,Y_train_pred,multioutput = 'raw_values')
		mse_totoal_dev = mse(Y_dev,Y_dev_pred,multioutput = 'raw_values')

	
	model_location = os.path.join('models',model_params["model_name"] +  '.json')


	with open(os.path.join('model_params',model_params["model_name"] +  '.json'), 'w') as fp:
		json.dump(model_params, fp, sort_keys=True)

	_ = run_eval_base(model_location,dataset = "train",email = model_params["email"])
	_ = run_eval_base(model_location,dataset = "test",email = model_params["email"])
	mse_total = run_eval_base(model_location,dataset = "dev",email = model_params["email"])

	
	return (mse_totoal_train.tolist(),mse_totoal_dev.tolist(),mse_totoal_train.sum(),mse_totoal_dev.sum())
コード例 #21
0
    def decision_function(self, X):
        X = X.copy()
        X.iloc[:, :-2] *= 1e12

        L, parcel_indices_L, subj_dict = self._get_lead_field_info()
        # use only Lead Fields of the subjects found in X
        subj_dict = dict((k, subj_dict[k]) for k in np.unique(X['subject']))
        self.lead_field, self.parcel_indices = [], []
        subj_dict_x = {}
        for idx, s_key in enumerate(subj_dict.keys()):
            subj_dict_x[s_key] = idx
            self.lead_field.append(L[subj_dict[s_key]])
            self.parcel_indices.append(parcel_indices_L[subj_dict[s_key]])

        X['subject_id'] = X['subject'].map(subj_dict_x)
        X.astype({'subject_id': 'int32'}).dtypes
        model = MultiOutputRegressor(self.model, n_jobs=self.n_jobs)
        X = X.reset_index(drop=True)

        betas = np.empty((len(X), 0)).tolist()
        for subj_idx in np.unique(X['subject_id']):
            l_used = self.lead_field[subj_idx]

            X_used = X[X['subject_id'] == subj_idx]
            X_used = X_used.iloc[:, :-2]

            norms = l_used.std(axis=0)
            l_used = l_used / norms[None, :]

            alpha_max = abs(l_used.T.dot(X_used.T)).max() / len(l_used)
            alpha = 0.2 * alpha_max
            model.estimator.alpha = alpha
            model.fit(l_used, X_used.T)  # cross validation done here

            for idx, idx_used in enumerate(X_used.index.values):
                est_coef = np.abs(_get_coef(model.estimators_[idx]))
                est_coef /= norms
                beta = pd.DataFrame(
                        np.abs(est_coef)
                        ).groupby(
                        self.parcel_indices[subj_idx]).max().transpose()
                betas[idx_used] = np.array(beta).ravel()
        betas = np.array(betas)
        return betas
コード例 #22
0
def baseline(X_train, y_train, X_test, model_name):
    if model_name == 'linear':
        regr_multirf = MultiOutputRegressor(LinearRegression())
    elif model_name == 'ridge':
        regr_multirf = MultiOutputRegressor(Ridge())
    elif model_name == 'lasso':
        regr_multirf = MultiOutputRegressor(Lasso())
    elif model_name == 'xgb':
        regr_multirf = MultiOutputRegressor(RandomForestRegressor(n_estimators=100,
                                                                  max_depth=2,
                                                                  random_state=0))
        # first run local mean smape 0.84345, public 17.47
        # too long
    else:
        raise Exception('unknown model', model_name)

    regr_multirf.fit(X_train, y_train)
    y_pred = regr_multirf.predict(X_test)
    return regr_multirf, y_pred
コード例 #23
0
def score(params):
    params['n_estimators'] = int(params['n_estimators'])
    print("Training with params: ")
    print(params)
    sys.stdout.flush()

    gbm_model = MultiOutputRegressor(XGBRegressor(**params))
    gbm_model.fit(DanQ_train, scores_train)

    predictions = gbm_model.predict(kmer_val)

    #getting score, MSE
    total_se = (scores_val - predictions)**2
    mse = []
    for i in range(4):
        mse.append(np.mean(total_se[:, i]))
    score = np.mean(mse)
    print("\tScore {0}\n\n".format(score))
    return {'loss': score, 'status': STATUS_OK}
コード例 #24
0
ファイル: SVM.py プロジェクト: bellineq/ETF
class SVM():
    def __init__(self):
        self.model = MultiOutputRegressor(SVR(kernel='rbf', C=1e3, gamma=0.1))

    def fit(self, train_input, train_target):
        self.model.fit(train_input, train_target)

    def predict(self, test_input):
        return self.model.predict(test_input)

    def save(self, code=50):
        filename = 'SVM' + str(code) + '.pkl'
        filepath = './model/' + filename
        joblib.dump(self.model, filepath)

    def load(self, code=50):
        filename = 'SVM' + str(code) + '.pkl'
        filepath = './model/' + filename
        self.model = joblib.load(filepath)
コード例 #25
0
def make_bayesian_pred(df, next_week, debug=0):
    """
    This method creates predictions using bayesian regression.
    """
    space = {
        'estimator__alpha_1': [1e-10, 1e-5, 1],
        'estimator__alpha_2': [1e-10, 1e-5, 1],
        'estimator__lambda_1': [1e-10, 1e-5, 1],
        'estimator__lambda_2': [1e-10, 1e-5, 1],
        'estimator__n_iter': [10, 300, 1000],
        'estimator__normalize': [True, False],
        'estimator__fit_intercept': [True, False]
    }
    params = {
        'estimator__alpha_1': [1e-10, 1e-5, 1, 5],
        'estimator__alpha_2': [1e-10, 1e-5, 1, 5],
        'estimator__lambda_1': [1e-10, 1e-5, 1, 5],
        'estimator__lambda_2': [1e-10, 1e-5, 1, 5],
        'estimator__n_iter': [10, 300, 1000],
        'estimator__normalize': [True, False],
        'estimator__n_jobs': -1,
        'n_jobs': -1,
        'estimator__fit_intercept': [True, False]
    }
    X_train, X_test, Y_train, Y_test = process_data(df, next_week)
    multi_bay = MultiOutputRegressor(BayesianRidge())
    #multi_bay.set_params(**params)
    #best_random = grid_search(multi_bay, space, next_week, 3, X_train, Y_train)
    multi_bay.fit(X_train, Y_train)
    next_week[Y_train.columns] = multi_bay.predict(next_week[X_train.columns])
    if debug:
        y_pred_untrain = multi_bay.predict(X_train)
        print(next_week)
        print("Score: ", multi_bay.score(X_train, Y_train) * 100)
        print("MSE: ", metrics.mean_squared_error(Y_train, y_pred_untrain))
        print(
            "CV: ",
            ms.cross_val_score(multi_bay,
                               Y_train,
                               y_pred_untrain,
                               cv=10,
                               scoring='neg_mean_squared_error'))
    return next_week
コード例 #26
0
class DTRmodel:
    def __init__(self, fl, max_depth=8, num_est=300):
        """
        Initialises new DNN model based on input features_dim, labels_dim, hparams
        :param features_dim: Number of input feature nodes. Integer
        :param labels_dim: Number of output label nodes. Integer
        :param hparams: Dict containing hyperparameter information. Dict can be created using create_hparams() function.
        hparams includes: hidden_layers: List containing number of nodes in each hidden layer. [10, 20] means 10 then 20 nodes.
        """
        self.labels_dim = fl.labels_dim  # Assuming that each task has only 1 dimensional output
        self.labels_scaler = fl.labels_scaler
        self.model = MultiOutputRegressor(
            AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
                              n_estimators=num_est))
        self.normalise_labels = fl.normalise_labels

    def train_model(self, fl, save_mode=False, plot_name=None):
        training_features = fl.features_c_norm
        if self.normalise_labels:
            training_labels = fl.labels_norm
        else:
            training_labels = fl.labels

        self.model.fit(training_features, training_labels)

        return self.model

    def eval(self, eval_fl):
        features = eval_fl.features_c_norm
        if self.labels_dim == 1:
            y_pred = self.model.predict(features)[:, None]
        else:
            y_pred = self.model.predict(features)
        if self.normalise_labels:
            mse_norm = mean_squared_error(eval_fl.labels_norm, y_pred)
            mse = mean_squared_error(
                eval_fl.labels, self.labels_scaler.inverse_transform(y_pred))
        else:
            mse_norm = -1
            mse = mean_squared_error(eval_fl.labels, y_pred)

        return y_pred, mse, mse_norm
コード例 #27
0
ファイル: gbr.py プロジェクト: a-jd/npsn
    def train_model(self, params):
        '''
        Input a dict, params, containing:
            loss_type: String, 'ls', 'lad', or 'huber'
            learning_rate: Float, ~0.1
            n_estimators: Int, boosting stages, ~100
            criterion: String, split quality, 'friedman_mse', 'mse', 'mae'
            max_depth: Int, depth of regressors, ~3
            max_features: String, method, 'auto', 'sqrt', 'log2'
        Returns:
            Dict containing info on combination
        '''
        loss_type = params['loss']
        learning_rate = params['learning_rate']
        n_estimators = int(params['n_estimators'])
        criterion = params['criterion']
        max_depth = int(params['max_depth'])
        max_features = params['max_features']

        model = MOR(
            skGBR(loss=loss_type,
                  learning_rate=learning_rate,
                  n_estimators=n_estimators,
                  criterion=criterion,
                  max_depth=max_depth,
                  max_features=max_features))

        # Print current combination
        print('Current GBR combination: {}'.format(params))

        # Flat versions of y (power/flux distribution)
        y_tr_fl, y_te_fl = self.flat_y()

        # Fit
        model.fit(self.x_train, y_tr_fl)

        # Hyperopt loss for each combination
        y_predict = model.predict(self.x_test)
        hyp_loss = sklmse(y_te_fl, y_predict)
        self.tr_hist.update_history(params, hyp_loss, model)

        return {'loss': hyp_loss, 'status': STATUS_OK}
コード例 #28
0
ファイル: svr.py プロジェクト: a-jd/npsn
    def train_model(self, params):
        '''
        Input a dict, params, containing:
            nu: Float, fraction of support vectors (0,1]
            C: Float, penalty parameter of error (~1.0)
            kernel: String, 'linear', 'poly', 'rbf', sigmoid'
            degree: Int, degree of polynomial for poly
            gamma: String, 'scale'/'auto' for 'rbf', 'poly', 'sigmoid'
        Returns:
            Dict containing info on combination
        '''
        kernel = params['kernel']
        nu = params['nu']
        C = params['C']

        # Instantiate SVR
        if kernel in ['linear']:
            model = MOR(NuSVR(C=C, nu=nu, kernel=kernel))
        elif kernel in ['rbf', 'sigmoid']:
            gamma = params['gamma']
            model = MOR(NuSVR(C=C, nu=nu, kernel=kernel, gamma=gamma))
        elif kernel in ['poly']:
            gamma = params['gamma']
            degree = params['degree']
            model = MOR(
                NuSVR(C=C, nu=nu, kernel=kernel, degree=degree, gamma=gamma))

        # Print current combination
        print('Current SVR combination: {}'.format(params))

        # Flat versions of y (power/flux distribution)
        y_tr_fl, y_te_fl = self.flat_y()

        # Fit
        model.fit(self.x_train, y_tr_fl)

        # Hyperopt loss for each combination
        y_predict = model.predict(self.x_test)
        hyp_loss = sklmse(y_te_fl, y_predict)
        self.tr_hist.update_history(params, hyp_loss, model)

        return {'loss': hyp_loss, 'status': STATUS_OK}
コード例 #29
0
ファイル: train_test.py プロジェクト: Suyi32/George-private
def training_with_cross_validation(npzfile_path='datasets/0507-all-110-results.npz', verbose=0):

    npzfile = np.load('./simulator/' + npzfile_path, allow_pickle=True)
    alloc, rt_50, rt_99, rps = npzfile['alloc'], npzfile['rt_50'], npzfile['rt_99'], npzfile['rps']
    length = len(rps)
    # for i in range(length):
    #     if alloc[i,0] * alloc[i,1] > 0:
    #         rps[i, 0] *= 0.5
    #     if alloc[i,5] * alloc[i,4] > 0:
    #         rps[i, 5] *= 2
    # for i in range(length):
    #     for j in range(6):
    #         # if rps[i, j] > 1:
    #         #     rps[i, j] *= 2.0
    #         if (rps[i, j] > 0) & (rps[i, j] < 1):
    #             rps[i, j] *= 0.5
    # for i in range(length):
    #     for j in range(6):
    #         if rps[i, j] > 1:
    #             if (j == 1) & (alloc[i, 0] * alloc[i, 1] > 0):
    #                 rps[i, j] *= 2.0
    #         if (rps[i, j] > 0) & (rps[i, j] < 1):
    #             rps[i, j] *= 0.5
    #: pre-processing
    rps = np.nan_to_num(rps.astype(float))
    X_train, X_test, y_train, y_test = train_test_split(alloc, rps, test_size=0.1, random_state=42)  # this random_state is not a hyper-parameter of Regressor
    if verbose:
        print("X_train {} => y_train {}".format(X_train.shape, y_train.shape))
        print("X_test  {} => y_test  {}".format(X_test.shape, y_test.shape))

    regr = MultiOutputRegressor(RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=random_state))

    cv_scores = cross_val_score(regr, X_train, y_train, cv=5, n_jobs=4)
    np.set_printoptions(precision=4, suppress=True)
    if verbose:
        print("5-fold cross validation scores:\n", cv_scores)

    regr.fit(X_train[:,0:7], y_train[:,0:7])
    score = regr.score(X_test[:,0:7], y_test[:,0:7])
    if verbose:
        print("R^2 score of regressor: %.4f" % score)
    return regr
コード例 #30
0
def grant_predictor(onu_id,onu_df,window,predict,features,model,metric):
    index=0 # window start
    index_max = 0 # prediction end

    # list with metrics of each prediction in different observation windows
    metric_list = []
    reg = MultiOutputRegressor(model)#Implement the model

    while index+window < len(onu_df):
        interval=index+window # window final position

        df_tmp = onu_df.iloc[index:interval] # training dataset
        if interval+predict < len(onu_df): # check if prediction doesnt overflow input data
            index_max = interval+predict
        else:
            index_max = len(onu_df)-1

        # check if features evaluated is simple(counter) else counter+timestamp
        if len(features) == 1:
            X_pred = np.array(onu_df[features].iloc[interval:index_max]).reshape(-1,1)
            if len(X_pred) == 0:
                break
            # fitting the model
            reg.fit(np.array( df_tmp[features] ).reshape(-1,1) , df_tmp[['start','end']])
        else:
            X_pred = onu_df[features].iloc[interval:index_max]
            if len(X_pred) == 0:
                break
            # fitting the model
            reg.fit(df_tmp[features] , df_tmp[['start','end']])

        # make prediction
        pred = reg.predict(X_pred)
        # real values to compare with prediction
        Y_true = onu_df[['start','end']].iloc[interval:index_max]
        # metric calculation
        metric_list.append(metric(Y_true, pred,multioutput='uniform_average'))

        # shift past observations window in p positions
        index += predict

    return metric_list
コード例 #31
0
def run_multi_output_regressor(X, y):
    total_acc = np.zeros(shape=(y.shape[1]))

    kf = KFold(n_splits=5)
    for i, (train_index, valid_index) in enumerate(kf.split(X)):
        x_train, x_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]

        # Train classifier
        lr = LinearRegression()
        mor = MultiOutputRegressor(lr)
        mor.fit(x_train, y_train)
        y_pred = np.rint(mor.predict(x_valid))

        acc = accuracy_score(y_valid, y_pred)
        print(f"Iteration {i+1}: L1 = {acc}")
        total_acc = total_acc + acc

    print(f"Average accuracy = {total_acc/kf.get_n_splits()}")
    return total_acc
コード例 #32
0
def stratCV(model, nfolds, train_X, train_Y, output_name, **params):
    mskf = MultilabelStratifiedKFold(n_splits=nfolds, shuffle=True)
    scores = []
    for train_index, valid_index in mskf.split(train_X, train_Y):
        print("TRAIN:", train_index, "VALID:", valid_index)
        X_train, X_valid = train_X[train_index], train_X[valid_index]
        Y_train, Y_valid = train_Y[train_index], train_Y[valid_index]

        m = MultiOutputRegressor(model(**params))
        m.fit(X_train, Y_train)
        y_preds = m.predict(X_valid)
        y_score = log_loss_metric(Y_valid, y_preds)
        print(y_score)
        scores.append(y_score)

    # Save to file in the current working directory
    joblib_file = "joblib_model_{}.pkl".format(output_name)
    joblib.dump((model, scores), joblib_file)

    return scores
コード例 #33
0
class ML:
    def __init__(self):
        self.model = GradientBoostingRegressor()
        self.model = MultiOutputRegressor(self.model)

    def train(self, x, y):
        self.model.fit(x, y)

    def predict(self, x):
        return self.model.predict(x)

    @staticmethod
    def mse(x, y):
        return mean_squared_error(x, y)

    def save(self, model_file):
        joblib.dump(self.model, model_file)

    def load(self, model_file):
        self.model = joblib.load(model_file)
コード例 #34
0
def train_stack_model(
    xtrain: Union[np.ndarray, pd.DataFrame],
    ytrain: Union[np.ndarray, pd.DataFrame],
    verbose: int = 0,
    n_jobs: int = 1,
    order: Tuple[str, str] = ("rf", "lr"),
    lr_params: Optional[Dict]=None,
    rf_params: Optional[Dict]=None
) -> BaseEstimator:

    rf_estimator = RandomForestRegressor(
        n_estimators=1_000,
        criterion="mse",
        n_jobs=n_jobs,
        random_state=123,
        warm_start=False,
        verbose=verbose,
    )
    lr_estimator = LinearRegression()

    # Initialize GLM
    if order == ("rf", "lr"):
        stacking_regressor = StackingRegressor(
            estimators=[("Random Forest", rf_estimator)], final_estimator=lr_estimator
        )
    elif order == ("lr", "rf"):
        stacking_regressor = StackingRegressor(
            estimators=[("Linear Regression", lr_estimator)],
            final_estimator=rf_estimator,
        )
    else:
        raise ValueError()

    mo_regressor = MultiOutputRegressor(stacking_regressor, n_jobs=1)
    # train GLM
    t0 = time.time()
    mo_regressor.fit(xtrain, ytrain)
    t1 = time.time() - t0
    if verbose > 0:
        print(f"Training time: {t1:.3f} secs.")
    return mo_regressor
コード例 #35
0
ファイル: processing.py プロジェクト: oscartorres098/amas
def train_nmodel(data, labels, model, is_std, names):
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.25,
                                                        random_state=42)
    mor = MultiOutputRegressor(model)
    mor.fit(x_train, y_train)
    y_pred = mor.predict(x_test)
    mse, r2 = get_metrics(y_test, y_pred, labels, is_std)
    cvs = cross_val_score(mor,
                          data,
                          labels,
                          cv=4,
                          scoring='neg_mean_squared_error')
    print(y_pred)
    print(x_train)
    images = []
    for i in range(0, len(names)):
        images.append(create_graph(y_test[i], y_pred[i], names[i]))

    return mor, mse, r2, cvs, images
コード例 #36
0

# Create a random dataset
rng = np.random.RandomState(1)
X = np.sort(200 * rng.rand(600, 1) - 100, axis=0)
y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T
y += (0.5 - rng.rand(*y.shape))

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    train_size=400,
                                                    random_state=4)

max_depth = 30
regr_multirf = MultiOutputRegressor(RandomForestRegressor(max_depth=max_depth,
                                                          random_state=0))
regr_multirf.fit(X_train, y_train)

regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2)
regr_rf.fit(X_train, y_train)

# Predict on new data
y_multirf = regr_multirf.predict(X_test)
y_rf = regr_rf.predict(X_test)

# Plot the results
plt.figure()
s = 50
a = 0.4
plt.scatter(y_test[:, 0], y_test[:, 1], edgecolor='k',
            c="navy", s=s, marker="s", alpha=a, label="Data")
plt.scatter(y_multirf[:, 0], y_multirf[:, 1], edgecolor='k',