Python LinearSVR.predictの例、sklearn.svm.LinearSVR.predict Pythonの例

コード例 #1

0

ファイルを表示

    def train_SVM(self, data):
        train, validacion = data
        x_tr, y_tr = train
        x_val, y_val = validacion
        #print("El set de train tiene {} filas y {} columnas".format(x_tr.shape[0],x_tr.shape[1]))
        #print("El set de validacion tiene {} filas y {} columnas".format(x_val.shape[0],x_val.shape[1]))

        print('Start training LinearSVR...')
        start_time = self.timer()

        svr = LinearSVR()
        svr.fit(x_tr, y_tr)
        print("The R2 is: {}".format(svr.score(x_tr, y_tr)))
        self.timer(start_time)

        print("Making prediction on validation data")
        y_val = np.expm1(y_val)
        y_val_pred = np.expm1(svr.predict(x_val))
        mae = mean_absolute_error(y_val, y_val_pred)
        print("El mean absolute error de es {}".format(mae))

        print('Saving model into a pickle')
        try:
            os.mkdir('pickles')
        except:
            pass

        with open('pickles/svrCV.pkl', 'wb') as f:
            pickle.dump(svr, f)

        print('Making prediction and saving into a csv')
        y_test = svr.predict(self.x_test)

        return y_test

コード例 #2

0

ファイルを表示

class SVMWrapper:
    def __init__(self,
                 c=1.0,
                 e=0.0,
                 loss="epsilon_insensitive",
                 dual=True,
                 max_iter=1000):
        self.regressor = LinearSVR(C=c,
                                   epsilon=e,
                                   loss=loss,
                                   dual=dual,
                                   max_iter=max_iter)
        self.training_time = None

    def train(self, x_train, y_train):
        start = time.perf_counter()
        self.regressor.fit(x_train, y_train)
        self.training_time = time.perf_counter() - start

    def score(self, x_test, y_test):
        return self.regressor.score(x_test, y_test)

    def predict(self, x_test):
        return self.regressor.predict(x_test)

    def predict_one(self, x_single):
        return self.regressor.predict(x_single)

    def get_training_time(self):
        if self.training_time is None:
            raise ValueError()
        else:
            return self.training_time

コード例 #3

0

ファイルを表示

ファイル: LinearSVR.py プロジェクト: CyberCrack/TestAing-Regression

def LinearSVRRegressor(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = LinearSVR(epsilon=0.001,
                     max_iter=5000,
                     C=3,
                     loss='squared_epsilon_insensitive')
    reg1.fit(X_train, y_train1)
    reg2 = LinearSVR(epsilon=0.001,
                     max_iter=5000,
                     C=3,
                     loss='squared_epsilon_insensitive')
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    logSave(nameOfModel="LinearSVRRegressor",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)

コード例 #4

0

ファイルを表示

class LSVR:
    def __init__(self):
        super(LSVR, self).__init__()
        self.C = 0.1
        self.n_time = 5
        self.model = LinearSVR(C=self.C)

    def fit(self, train_x, train_y):
        self.model.fit(train_x, train_y)

    def predict(self, test_x):
        return self.model.predict(test_x)

    def eval(self, out_time, v_path, w_path):
        train_x, train_y, test_x, test_y = Helper.retrieve_data(
            n_time=5,
            out_time=out_time,
            train_pct=0.7,
            test_pct=0.2,
            v_path=v_path,
            w_path=w_path)
        train_x = np.squeeze(train_x.transpose(
            (0, 2, 1, 3))).reshape(-1, self.n_time)
        test_x = np.squeeze(test_x.transpose(
            (0, 2, 1, 3))).reshape(-1, self.n_time)
        train_y = train_y.reshape(-1)
        test_y = test_y.reshape(-1)
        print("LSVR Fitting...")
        self.model.fit(train_x, train_y)
        print("LSVR Fitted!")
        y_pred = self.model.predict(test_x)
        Helper.metrics(y_pred, test_y)

コード例 #5

0

ファイルを表示

ファイル: LinearSVR.py プロジェクト: CyberCrack/TestAing-Regression

def LinearSVRRegressorGS(X_train, X_test, y_train, y_test):
    y_train1 = y_train[:, 0]
    y_train2 = y_train[:, 1]
    reg1 = LinearSVR()
    reg2 = LinearSVR()
    grid_values = {
        'epsilon': list(range(1, 3)) + [value * 0.01 for value in range(1, 3)],
        'C': [value * 0.01 for value in range(1, 3)],
        'loss': ['epsilon_insensitive', 'squared_epsilon_insensitive']
    }

    grid_reg1 = GridSearchCV(
        reg1,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg1.fit(X_train, y_train1)
    reg1 = grid_reg1.best_estimator_
    reg1.fit(X_train, y_train1)
    grid_reg2 = GridSearchCV(
        reg2,
        param_grid=grid_values,
        scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'],
        refit='r2',
        n_jobs=-1,
        cv=2,
        verbose=100)
    grid_reg2.fit(X_train, y_train2)
    reg2 = grid_reg1.best_estimator_
    reg2.fit(X_train, y_train2)
    y_pred1 = reg1.predict(X=X_test)
    y_pred2 = reg2.predict(X=X_test)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))

    printMetrics(y_true=y_test, y_pred=y_pred)

    val_metrics = getMetrics(y_true=y_test, y_pred=y_pred)
    y_pred1 = reg1.predict(X=X_train)
    y_pred2 = reg2.predict(X=X_train)
    y_pred = np.hstack((y_pred1.reshape(-1, 1), y_pred2.reshape(-1, 1)))
    metrics = getMetrics(y_true=y_train, y_pred=y_pred)

    printMetrics(y_true=y_train, y_pred=y_pred)

    best_params1: dict = grid_reg1.best_params_
    best_params2: dict = grid_reg2.best_params_
    best_params = {}
    for key in best_params1.keys():
        best_params[key] = [best_params1[key], best_params2[key]]
    saveBestParams(nameOfModel="LinearSVRRegressorGS", best_params=best_params)
    logSave(nameOfModel="LinearSVRRegressorGS",
            reg=[reg1, reg2],
            metrics=metrics,
            val_metrics=val_metrics)

コード例 #6

0

ファイルを表示

	def fitSVR(self, X, Y, name, lastX = None):
		if not hasattr(self, name):
			SVR = []
			setattr(self, name, SVR)
		else:
			SVR = getattr(self, name)

#		if "ridge_alpha" in self.args:
#			alpha = self.args['ridge_alpha']
#		else:
		epsilon_options = [0, 0.1, 10, 100]
		C_options = [0.1, 10, 100]

		Xselect = 30000

		kf = KFold(n_splits=5, shuffle=True)
		for i1, i2 in kf.split(X):
			train_index, test_index = i1[:Xselect], i2
			break

		bestscore = 3
		bestarg = None
		for epsilon in epsilon_options:
			for C in C_options:
				logging.info("SVR trying %f %f", epsilon, C)
				model = LinearSVR(epsilon=epsilon, C=C)
				model.fit(X[train_index], Y[train_index][:, 24])
				if lastX is None:
					predY = model.predict(X[test_index])
					score = calSMAPE1(Y[test_index][:, 24], predY)
				else:
					predY = lastX[test_index][:, 0] + model.predict(X[test_index])
					score = calSMAPE1(lastX[test_index][:, 0] + Y[test_index][:, 24], predY)
				if score < bestscore:
					bestscore = score
					bestarg = (epsilon, C)
				logging.info("SVR try %f %f, score %f", epsilon, C, score)
		epsilon, C = bestarg
		logging.info("SVR best %f %f, bestscore %f", epsilon, C, bestscore)

		global SVRargs
		SVRargs = (X[train_index], Y[train_index], epsilon, C)

		for idx in self.divide(list(range(len(SVR), Y.shape[1])), 18):
			with mp.Pool(6) as pool:
				SVR += pool.map(train_SVR, idx)
			logging.info("SVR group %d", idx[0])
			self.saveModule(name, False)

		logging.info("SVR ok")

コード例 #7

0

ファイルを表示

    def test_linear_svr_evaluation(self):
        """
        Check that the evaluation results are the same in scikit learn and coremltools
        """
        ARGS = [
            {},
            {"C": 0.5, "epsilon": 0.25},
            {"dual": False, "loss": "squared_epsilon_insensitive"},
            {"tol": 0.005},
            {"fit_intercept": False},
            {"intercept_scaling": 1.5},
        ]

        input_names = self.scikit_data.feature_names
        df = pd.DataFrame(self.scikit_data.data, columns=input_names)

        for cur_args in ARGS:
            print(cur_args)
            cur_model = LinearSVR(**cur_args)
            cur_model.fit(self.scikit_data["data"], self.scikit_data["target"])
            spec = convert(cur_model, input_names, "target")

            df["prediction"] = cur_model.predict(self.scikit_data.data)

            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics["max_error"], 0)

コード例 #8

0

ファイルを表示

ファイル: main.py プロジェクト: kltdhc/algorithm_lab1

def test_svm_model(train_X, train_y, dev_X, dev_y):
    print('Testing svm model...')
    from sklearn.svm import LinearSVR
    clf = LinearSVR()
    clf.fit(train_X, train_y)
    pred_y = clf.predict(dev_X)
    print('RMSE: {}'.format(math.sqrt(mean_squared_error(dev_y, pred_y))))

コード例 #9

0

ファイルを表示

def try_Cs(X, y, cv, Cs):
    results = []

    for C in Cs:
        t0 = time()
        scores = []

        for train_idx, val_idx in cv:
            svm = LinearSVR(C=C, loss='squared_epsilon_insensitive', dual=False, random_state=1)
            svm.fit(X[train_idx], y[train_idx])

            y_pred = svm.predict(X[val_idx])
            y_pred[y_pred < 0] = 0.0
            y_pred[y_pred > 1] = 1.0

            rmse = mean_squared_error(y[val_idx], y_pred)
            scores.append(rmse)

        m = np.mean(scores)
        s = np.std(scores)

        print('C=%s, took %.3fs, mse=%.3f+-%.3f' % (C, time() - t0, m, s))
        
        results.append((m.round(3), s, C))
    
    _, _, best_C = min(results)
    return best_C

コード例 #10

0

ファイルを表示

class LinearSVRPrim(primitive):
    def __init__(self, random_state=0):
        super(LinearSVRPrim, self).__init__(name='LinearSVR')
        self.hyperparams = []
        self.type = 'Regressor'
        self.description = "We make use of the epsilon-insensitive loss, i.e. errors of less than epsilon are ignored. This is the form that is directly optimized by LinearSVR."
        self.hyperparams_run = {'default': True}
        self.random_state = random_state
        self.model = LinearSVR()
        self.accept_type = 'c_r'

    def can_accept(self, data):
        return self.can_accept_c(data, 'Regression')

    def is_needed(self, data):
        # data = handle_data(data)
        return True

    def fit(self, data):
        data = handle_data(data)
        self.model.fit(data['X'], data['Y'])

    def produce(self, data):
        output = handle_data(data)
        output['predictions'] = self.model.predict(output['X'])
        output['X'] = pd.DataFrame(output['predictions'], columns=[self.name+"Pred"])
        final_output = {0: output}
        return final_output

コード例 #11

0

ファイルを表示

ファイル: cw2_7.py プロジェクト: Eelin-xyl/Python

def mse_of_linear_svr(X, y, epsilon):
    """
        Compute the mean square error of a linear SVR predictor with hyperparameter epsilon.
        As a model, use LinearSVR library to train a linear SVR predictor.
        Set its epsilon hyperparameter to the value of the epsilon argument,
            and its random state to 5.

        Split the dataset into training dataset, test dataset, training labels, and test labels;
            with 0.2 as the test size and 5 as its random state.
        Use StandardScaler to scale the both datasets.

        Fit and test the model, and return the mean square error on the test dataset.

        Args:
            X - (n, d) numpy array of the dataset of n sample points each with d features
            y - (n, ) numpy array of the label values for each sample point
            epsilon - a scalar of the hyperparameter epsilon of a linear SVR predictor
        Returns:
            mse - a scalar of the mean square error of the test dataset
    """
    # Write your code here
    model_linearSVR = LinearSVR(epsilon = epsilon,random_state=5)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)
    scaler = StandardScaler()
    scaler_x_train = scaler.fit_transform(X_train)
    scaler_x_test = scaler.transform(X_test)
    model_linearSVR.fit(scaler_x_train, y_train)
    y_pred = model_linearSVR.predict(scaler_x_test)
    a = mean_squared_error(y_test, y_pred)
    return a

コード例 #12

0

ファイルを表示

def train_svr(X, y, plot=False, linear=False):
    """
    Trains a SVR Model. If the parameter linear is given, trains a Linear SVR.
    :param X: X of the current dataset
    :param y: target of the current dataset
    :param plot: either true or false. Controls if plots are shown while training this model.
    :param linear: either true or false. Controls if the trained model will be a Linear SVR or  Epsilon-SVR.
    :return: trained SVR model
    """
    print("Training SVR Model")
    if linear:
        estimator = LinearSVR()
        model = LinearSVR()
    else:
        estimator = SVR()
        model = SVR()
    model_name = type(estimator).__name__
    estimated_test_error = estimate_test_error(estimator, X, y)
    print("Estimated test error for {} model : {}".format(
        model_name, estimated_test_error))
    model.fit(X, y)
    y_pred = model.predict(X)
    rmse = np.sqrt(mean_squared_error(y, y_pred))
    print("Training error for {} model : {}".format(model_name, rmse))
    if plot:
        plot_residuals(y_pred, y, model_name)
    return model

コード例 #13

0

ファイルを表示

ファイル: FaceAlignment.py プロジェクト: lrghust/face-alignment-3000

def GlobalRegression(local_binary_features, targets):
    t1=time.time()
    updates=np.zeros((len(targets), param_landmark_num, 2))
    svrs=[]
    for i in range(param_landmark_num):
        # dx
        svr_x=LinearSVR(C=1./len(targets), dual=True, loss='squared_epsilon_insensitive', epsilon=0.0001)
        svr_x.fit(local_binary_features, targets[:, i, 0])
        updates[:, i, 0]=svr_x.predict(local_binary_features)
        # dy
        svr_y=LinearSVR(C=1./len(targets), dual=True, loss='squared_epsilon_insensitive', epsilon=0.0001)
        svr_y.fit(local_binary_features, targets[:, i, 1])
        updates[:, i, 1]=svr_y.predict(local_binary_features)
        svrs.append([svr_x, svr_y])
    print('Global Regression use:', time.time()-t1, 's')
    return updates, svrs

コード例 #14

0

ファイルを表示

ファイル: test_toydata.py プロジェクト: timothycrosley/fri

def test_data_truth():
    n = 100
    d = 10
    strRel = 2
    generator = check_random_state(1337)
    X, Y = genRegressionData(
        n_samples=n,
        n_features=d,
        n_redundant=0,
        n_strel=strRel,
        n_repeated=0,
        random_state=generator,
        noise=0,
    )
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        random_state=generator)

    linsvr = LinearSVR()
    linsvr.fit(X_train, y_train)
    pred = linsvr.predict(X_test)
    r2 = r2_score(y_test, pred)

    assert r2 > 0.9

コード例 #15

0

ファイルを表示

    def test_linear_svr_evaluation(self):
        """
        Check that the evaluation results are the same in scikit learn and coremltools
        """
        ARGS = [{}, {
            'C': 0.5,
            'epsilon': 0.25
        }, {
            'dual': False,
            'loss': 'squared_epsilon_insensitive'
        }, {
            'tol': 0.005
        }, {
            'fit_intercept': False
        }, {
            'intercept_scaling': 1.5
        }]

        input_names = self.scikit_data.feature_names
        df = pd.DataFrame(self.scikit_data.data, columns=input_names)

        for cur_args in ARGS:
            print(cur_args)
            cur_model = LinearSVR(**cur_args)
            cur_model.fit(self.scikit_data['data'], self.scikit_data['target'])
            spec = convert(cur_model, input_names, 'target')

            df['prediction'] = cur_model.predict(self.scikit_data.data)

            metrics = evaluate_regressor(spec, df)
            self.assertAlmostEquals(metrics['max_error'], 0)

コード例 #16

0

ファイルを表示

ファイル: test_svm.py プロジェクト: iamDecode/sklearn-pmml-model

class TestLinearSVRIntegration(TestCase):
    def setUp(self):
        df = pd.read_csv(path.join(BASE_DIR, '../models/categorical-test.csv'))
        Xte = df.iloc[:, 1:]
        Xenc = pd.get_dummies(Xte, prefix_sep='')
        yte = df.iloc[:, 0]
        self.test = (Xte, yte)
        self.enc = (Xenc, yte)

        pmml = path.join(BASE_DIR, '../models/linear-model-lm.pmml')
        self.clf = PMMLLinearSVR(pmml)

        self.ref = LinearSVR()
        self.ref.fit(Xenc, yte == 'Yes')

    def test_invalid_model(self):
        with self.assertRaises(Exception) as cm:
            PMMLLinearSVR(pmml=StringIO("""
              <PMML xmlns="http://www.dmg.org/PMML-4_3" version="4.3">
                <DataDictionary>
                  <DataField name="Class" optype="categorical" dataType="string">
                    <Value value="setosa"/>
                    <Value value="versicolor"/>
                    <Value value="virginica"/>
                  </DataField>
                </DataDictionary>
                <MiningSchema>
                  <MiningField name="Class" usageType="target"/>
                </MiningSchema>
              </PMML>
              """))

        assert str(
            cm.exception) == 'PMML model does not contain RegressionModel.'

    def test_fit_exception(self):
        with self.assertRaises(Exception) as cm:
            self.clf.fit(np.array([[]]), np.array([]))

        assert str(cm.exception) == 'Not supported.'

    def test_more_tags(self):
        assert self.clf._more_tags() == LinearSVR()._more_tags()

    def test_sklearn2pmml(self):
        # Export to PMML
        pipeline = PMMLPipeline([("classifier", self.ref)])
        pipeline.fit(self.enc[0], self.enc[1] == 'Yes')
        sklearn2pmml(pipeline, "svm-sklearn2pmml.pmml", with_repr=True)

        try:
            # Import PMML
            model = PMMLLinearSVR(pmml='svm-sklearn2pmml.pmml')

            # Verify classification
            Xenc, _ = self.enc
            assert np.allclose(self.ref.predict(Xenc), model.predict(Xenc))

        finally:
            remove("svm-sklearn2pmml.pmml")

コード例 #17

0

ファイルを表示

def linear_svm_regression():
    np.random.seed(42)
    m = 50
    X = 2 * np.random.rand(m, 1)
    y = (4 + 3 * X + np.random.randn(m, 1)).ravel()
    svm_reg1 = LinearSVR(epsilon=1.5, random_state=42)
    svm_reg2 = LinearSVR(epsilon=0.5, random_state=42)
    svm_reg1.fit(X, y)
    svm_reg2.fit(X, y)
    svm_reg1.support_ = find_support_vectors(svm_reg1, X, y)
    svm_reg2.support_ = find_support_vectors(svm_reg2, X, y)

    eps_x1 = 1
    eps_y_pred = svm_reg1.predict([[eps_x1]])

    plt.figure(figsize=(9, 4))
    plt.subplot(121)
    plot_svm_regression(svm_reg1, X, y, [0, 2, 3, 11])
    plt.title(r"$\epsilon = {}$".format(svm_reg1.epsilon), fontsize=18)
    plt.ylabel(r"$y$", fontsize=18, rotation=0)
    # plt.plot([eps_x1, eps_x1], [eps_y_pred, eps_y_pred - svm_reg1.epsilon], "k-", linewidth=2)
    plt.annotate(
        '', xy=(eps_x1, eps_y_pred), xycoords='data',
        xytext=(eps_x1, eps_y_pred - svm_reg1.epsilon),
        textcoords='data', arrowprops={'arrowstyle': '<->', 'linewidth': 1.5}
    )
    plt.text(0.91, 5.6, r"$\epsilon$", fontsize=20)
    plt.subplot(122)
    plot_svm_regression(svm_reg2, X, y, [0, 2, 3, 11])
    plt.title(r"$\epsilon = {}$".format(svm_reg2.epsilon), fontsize=18)
    plt.show()

コード例 #18

0

ファイルを表示

def outlier_linearSVR_detector(feature, target, residual_threshold, return_index = False):
	"""
	this function detect the outlier by using the LinearSVR with linear kernel
	with the fitted coefficient
	"""
	target = (np.array(target)).flatten()
	residual_threshold = (np.max(target) -np.min(target))*residual_threshold
	regr = LinearSVR(random_state=1, dual=True, epsilon=0.0)
	regr.fit(feature, target)
	
	predict_data = regr.predict(feature)
	i=0
	num_of_outlier = 0
	outlier_index = []
	for x in predict_data:
		delta = x-target[i]
		if abs(delta) > residual_threshold:
			num_of_outlier = num_of_outlier + 1	
			outlier_index.append(i)
		i=i+1
	slope = regr.coef_[0]
	
	if return_index is False:
		return (num_of_outlier, slope)
	else:
		return outlier_index

コード例 #19

0

ファイルを表示

    async def do_run_async(self):
        # Generate some non-linear data based on a quadratic equation
        m = 100
        X = 6 * np.random.uniform(1, 5, (m, 1)) - 3
        y = 0.5 * X**2 + X + 2 + np.random.uniform(1, 5, (m, 1))

        plt.plot(X, y, ".")
        plt.show()

        # To tackle nonlinear regression tasks, you can use a kernelized SVM model
        svm_poly_reg = SVR(kernel="poly", degree=2, C=100, epsilon=0.1)
        svm_poly_reg.fit(X, y)

        rand_index = np.random.randint(0, 99)
        x = X[rand_index, ]
        print("Prediction for:", x)
        print(svm_poly_reg.predict([x]))
        print("Label:", y[rand_index, ])

        # ... or just use the Linear SVR algorithm with polynomial features
        polly = PolynomialFeatures(
            degree=2)  # Polynomial degree is usually number of features + 1?
        X_tr = polly.fit_transform(X)

        svm_reg = LinearSVR(epsilon=1.5)
        svm_reg.fit(X_tr, y)

        rand_index = np.random.randint(0, 99)
        x = X_tr[rand_index, ]
        print("Prediction for:", x)
        print(svm_reg.predict([x]))
        print("Label:", y[rand_index, ])

コード例 #20

0

ファイルを表示

def build_svr(params=None):
    train_df, test_df = load_data()
    combined_df = pd.concat((train_df.loc[:, 'MSSubClass':'SaleCondition'],
                             test_df.loc[:, 'MSSubClass':'SaleCondition']))

    # feature engineering
    config_categorical_features(combined_df)
    # combined_df = extract_common_features(combined_df)
    log_transform_features(combined_df)
    combined_df = normalize_numerical_features(combined_df)
    combined_df = one_hot_encoding(combined_df)
    missing_value_fill(combined_df)

    X_train = combined_df[:train_df.shape[0]]
    X_test = combined_df[train_df.shape[0]:]
    y = np.log1p(train_df["SalePrice"])

    if params is None:
        params = tuning(X_train, y)

    # model training
    model = LinearSVR(**params)
    model.fit(X_train, y)
    print("cross_validation_rmse:", np.mean(np.sqrt(-cross_val_score(model, X_train, y, cv=3, scoring="neg_mean_squared_error"))))

    # model prediction
    lasso_preds = np.expm1(model.predict(X_test))
    solution = pd.DataFrame({"id": test_df.Id, "SalePrice": lasso_preds})
    solution.to_csv("./house_price/submission_svr_v1.csv", index=False)

コード例 #21

0

ファイルを表示

def svr_C(train_features, train_labels, test_features, test_labels, name):
    """
    Plot C against the accuracy.
    """
    sns.set()
    sns.set_style("ticks")

    train_results = []
    test_results = []

    c_values = np.linspace(1e-4, 1, 10)

    train_scaled, scaler = ml_funcs.apply_scaling(train_features, 'SVR', name, save_scaler=False)
    test_scaled = scaler.transform(test_features)

    for c_val in c_values:
        print("C:", c_val)

        svr = LinearSVR(C=c_val, max_iter=2000, random_state=0)
        svr.fit(train_scaled, train_labels)
        predict_train = svr.predict(train_scaled)

        # Accuracy of training data (mean absolute percentage error)
        accuracy_train = compute_accuracy(predict_train, train_labels)
        train_results.append(accuracy_train)

        predict_test = svr.predict(test_scaled)

        # Accuracy for test data.
        accuracy_test = compute_accuracy(predict_test, test_labels)
        test_results.append(accuracy_test)

    fig = plt.figure(figsize=(10, 6))
    sns.lineplot(x=c_values, y=train_results, label='Train')
    sns.lineplot(x=c_values, y=test_results, label='Test')
    plt.legend(frameon=False, loc='lower right')
    plt.xlabel('C')
    plt.ylabel('Accuracy score [%]')

    fig.tight_layout()
    sns.despine()

    if generate_plots.directory_exists("./Figures"):
        plt.savefig("./Figures/C_" + name + ".pdf", bbox_inches="tight", dpi=300,
                    transparent=True)
    else:
        print("Directory: ./Figures does not exist!")

コード例 #22

0

ファイルを表示

ファイル: runs.py プロジェクト: Sourge/udacity

def linearSVR(data):
    X = data.drop(["id", "date", "price","long","lat", "zipcode","yr_renovated", "sqft_above", "sqft_basement"], axis=1)
    y = data["price"]
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.10, random_state=42)
    svr = LinearSVR(random_state=42)
    svr.fit(X_train, y_train)
    y_predict = svr.predict(X_test)
    print "r2-score for LinearSVR: %f" % r2_score(y_test, y_predict)

コード例 #23

0

ファイルを表示

ファイル: parkinsons.svr.py プロジェクト: timilsinamohan/SSR

def innerfold_svr(x_test, y_test, x_train, y_train):
    svr_rbf = LinearSVR(random_state=4)
    svr_rbf.fit(x_train, y_train)
    pred_y = svr_rbf.predict(x_test)
    mse = mean_squared_error(y_test, pred_y)
    rmse = math.sqrt(mse)
    print rmse
    return rmse

コード例 #24

0

ファイルを表示

def regressor_test(complete,incomplete,years):
    kn_errors = []
    linear_errors = []
    svr_errors = []    
    
    for i in years[0]:
            
        X_train, X_test, y_train, y_test = train_test_split(complete.loc[:,complete.columns != i].values,
                                                            complete.loc[:,i].values, test_size = 0.2, random_state = 0)
        
        regressor1 = KNeighborsRegressor(2, 
                                       weights ='distance', 
                                       metric = 'euclidean')
        regressor2= LinearRegression()
        regressor3=LinearSVR()
        
        
        trained_model1 = regressor1.fit(X_train, 
                                 y_train)
        trained_model2 = regressor2.fit(X_train, 
                                 y_train)
        trained_model3 = regressor3.fit(X_train, 
                                 y_train)  
        
        incomplete_2 = deepcopy(incomplete)
        incomplete_2.loc[:, incomplete.columns != i] = incomplete_2.loc[:, 
                                incomplete.columns != i].apply(lambda row: row.fillna(row.mean()), axis=1)

        y_pred1 = regressor1.predict(X_test)
        y_pred2 = regressor2.predict(X_test)
        y_pred3 = regressor3.predict(X_test)
        
        
        kn_errors.append(mean_squared_error(y_test, y_pred1))
        linear_errors.append(mean_squared_error(y_test, y_pred2))
        svr_errors.append(mean_squared_error(y_test, y_pred3))
        
        
        #Test for checking the best model 
    MSE= []

    for i in range(0, len(complete.loc[:,'2007':'2017'].columns)):
        l = []
        l.extend((kn_errors[i], linear_errors[i], svr_errors[i]))
        
        if min(l) == kn_errors[i]:
            MSE.append("KNN")
        elif min(l) == linear_errors[i]:
            MSE.append("Linear")
        elif min(l) == svr_errors[i]:
            MSE.append("SVR")

    
    print("KNN =",MSE.count("KNN"),'\nLinear =',MSE.count("Linear") ,'\nSVR =',MSE.count("SVR"))


    return max(set(MSE), key = MSE.count)

コード例 #25

0

ファイルを表示

ファイル: Regressors.py プロジェクト: primebuilder/Algorithmic-Trading

def svm_regressor(train_data, train_label, test_data, test_label, parameters):
    min_error = 10000000000
    error = []

    # tuned_parameters = [{'kernel': ['rbf'], 'gamma': [100,10,1,1e-1, 1e-2,],
    #                      'C': [0.1,1, 10, 100], 'epsilon':[ 100, 1000, 10000,1e6,1e8]}]
    #                     # {'kernel': ['linear'], 'C': [1, 10, 100, 1000], 'epsilon': [1, 10,100,1000]},
    #                     # {'kernel':['poly'],'gamma': [1e-3, 1e-4],
    #                     #  'C': [1, 10, 100, 1000], 'epsilon':[ 1, 10, 100,1000]}]
    # # {'kernel': ['linear'], 'C': [1, 10, 100, 1000], 'epsilon': [1e-2, 1e-1, 1, 10]}
    # clf = GridSearchCV(SVR(), tuned_parameters, cv=5,verbose=1,n_jobs=-1)
    # clf.fit(train_data, train_label)
    # print clf.best_params_
    # print clf.cv_results_
    # tuned_parameters = [{'C': [1e-2,1e-1,1, 10, 100], 'epsilon': [1, 10, 100, 1000,10000]}]
    # clf = GridSearchCV(LinearSVR(random_state=random_state), tuned_parameters, cv=5, verbose=1, n_jobs=-1)
    # clf.fit(train_data, train_label)
    # print clf.best_params_
    # print clf.cv_results_

    # regr = SVR(kernel='rbf', gamma=0.01,C=100)
    # regr.fit(train_data, train_label)
    # score = regr.score(test_data, test_label)
    # predict = regr.predict(test_data)
    # predict = map(lambda x: [x], predict)
    # predict = np.array(predict)
    # mse = MSE(np.array(predict), test_label)
    # if (mse[0] < min_error):
    #     min_error = mse[0]
    # print mse[0]
    regr = LinearSVR(C=0.001, epsilon=1, random_state=random_state)
    regr.fit(train_data, train_label)
    score = regr.score(test_data, test_label)
    predict = regr.predict(test_data)
    predict = map(lambda x: [x], predict)
    predict = np.array(predict)
    mse = MSE(np.array(predict), test_label)
    if (mse[0] < min_error):
        min_error = mse[0]

    print 'MSE ' + parameters + ' ' + str(mse[0])

    df = pd.Series(predict.flatten(), index=test_label.index)
    price = train_label.append(test_label)
    plt.title('SVM Regression on ' + parameters)
    plt.plot(price[1000:-1], label='actual price')
    plt.plot(df, label='predicted price')
    plt.legend(loc='lower right')
    plt.xlabel('Dates')
    plt.ylabel('Price')
    # plt.show()
    directory = './svm/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    plt.savefig(directory + parameters + '.png')
    plt.close()
    return

コード例 #26

0

ファイルを表示

ファイル: models.py プロジェクト: weallwegot/poets_quants_handicap

def linear_svr_pred(X_train, Y_train):
    """
    Train a linear model with Support Vector Regression
    """

    svr_model = LinearSVR(random_state=RANDOM_STATE)
    svr_model.fit(X_train, Y_train)
    Y_pred = svr_model.predict(X_train)
    return Y_pred

コード例 #27

0

ファイルを表示

class LibLinear_SVR:
    # Liblinear is not deterministic as it uses a RNG inside
    def __init__(self,
                 epsilon,
                 loss,
                 dual,
                 tol,
                 C,
                 fit_intercept,
                 intercept_scaling,
                 random_state=None):
        self.epsilon = epsilon
        self.loss = loss
        self.dual = dual
        self.tol = tol
        self.C = C
        self.fit_intercept = fit_intercept
        self.intercept_scaling = intercept_scaling
        self.random_state = random_state
        self.estimator = None

    def fit(self, X, Y):
        from sklearn.svm import LinearSVR

        # In case of nested loss
        if isinstance(self.loss, dict):
            combination = self.loss
            self.loss = combination['loss']
            self.dual = combination['dual']

        self.epsilon = float(self.epsilon)
        self.C = float(self.C)
        self.tol = float(self.tol)

        self.dual = check_for_bool(self.dual)

        self.fit_intercept = check_for_bool(self.fit_intercept)

        self.intercept_scaling = float(self.intercept_scaling)

        self.estimator = LinearSVR(epsilon=self.epsilon,
                                   loss=self.loss,
                                   dual=self.dual,
                                   tol=self.tol,
                                   C=self.C,
                                   fit_intercept=self.fit_intercept,
                                   intercept_scaling=self.intercept_scaling,
                                   random_state=self.random_state)
        self.estimator.fit(X, Y)
        return self

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError()
        return self.estimator.predict(X)

コード例 #28

0

ファイルを表示

class   SVMRegression(object):

    def __init__(self, X, y, epsilon, **kwargs):

        self.X = X
        self.y = y
        self.epsilon = epsilon
        self.model = LinearSVR(epsilon=epsilon, **kwargs) 

    def train_model(self):
        self.model.fit(self.X, self.y)
        self.epsilon = self.model.epsilon
        self.y_pred = self.model.predict(self.X)

    def get_support_vectors(self):
        """
        Get the index of points which is off the street
        """
        self.if_off_margin = (np.abs(self.y - self.y_pred) >= self.epsilon)
        self.idx_support_ = np.argwhere(self.if_off_margin)
        return self.idx_support_

    def model_predict(self, x_new):
        return self.model.predict(x_new)

    def plot_svm_regression(self, axes):
        """
        Plot SVM Regression
        """
        x_new = np.linspace(axes[0], axes[1], 100).reshape(100, 1)
        y_estimate = self.model.predict(x_new)

        plt.plot(x_new, y_estimate, "k-", linewidth=2, label="Prediction of y")
        plt.plot(x_new, y_estimate + self.epsilon, "r--", label="Upper Bound")
        plt.plot(x_new, y_estimate - self.epsilon, "g--", label="Lower Bound")
        
        plt.scatter(self.X[self.idx_support_], self.y[self.idx_support_], s=180, facecolors='#FFAAAA')
        plt.plot(self.X, self.y, "bo")
        plt.xlabel(r"$x_1$", fontsize=18)
        plt.ylabel(r"$y$", fontsize=18, rotation=0)
        plt.legend(loc="best", fontsize=18)
        plt.axis(axes)

コード例 #29

0

ファイルを表示

ファイル: Hotel_Rating_Prediction.py プロジェクト: Anamika0601/Hotel-Review-Rating-Prediction

def predict_SVM():
    svclassifier = LinearSVR(random_state=50,
                             max_iter=100000,
                             epsilon=0,
                             tol=1e-9)
    svclassifier.fit(X_train_csr.todense(), y_train_1)
    scv_test_predict = svclassifier.predict(X_test_csr.todense())
    print(scv_test_predict)
    print(classification_report(y_test_1, np.rint(scv_test_predict)))
    print("RMSE for Neural Random SVR Classifier",
          sqrt(mean_squared_error(y_test_1, np.rint(scv_test_predict))))

コード例 #30

0

ファイルを表示

ファイル: models_additional.py プロジェクト: HuseyinAltnsk/article-popularity-prediction-ML

def Linear_SVR(Xtrain, Xtest, ytrain, ytest):
    cv_scores = []
    parameters = [0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2, 2.25, 2.5]
    for i in parameters:
        clf = LinearSVR(loss='squared_epsilon_insensitive', C=i)
        clf.fit(Xtrain, ytrain)
        y_pred = clf.predict(Xtest)
        #print clf.score(y_test, y_pred)
        cv_scores.append(metrics.r2_score(ytest, y_pred))
    print("LinearSVR")
    print sum(cv_scores) / float(len(cv_scores))

コード例 #31

0

ファイルを表示

ファイル: xielaoban.py プロジェクト: nbutacm/NBUTACM

def main():
    # 数据加载
    train_data = pd.read_csv('d_train_20180102.csv', encoding='GBK')
    train_bloods = train_data['血糖'].astype(float)
    test_data = pd.read_csv('d_test_A_20180102.csv', encoding='GBK')
    test_bloods = pd.read_csv('d_answer_a_20180128.csv',
                              encoding='GBK').astype(float)
    train_data = train_data.drop(['id', '体检日期'], axis=1)
    test_data = test_data.drop(['id', '体检日期'], axis=1)
    train_data = train_data.drop(
        ['乙肝表面抗原', '乙肝表面抗体', '乙肝e抗原', '乙肝e抗体', '乙肝核心抗体', '血糖'], axis=1)
    test_data = test_data.drop(
        ['乙肝表面抗原', '乙肝表面抗体', '乙肝e抗原', '乙肝e抗体', '乙肝核心抗体'], axis=1)

    label = train_data.columns
    encoder = LabelEncoder()
    train_data['性别'] = encoder.fit_transform(train_data['性别'])
    test_data['性别'] = encoder.fit_transform(test_data['性别'])
    train_data.astype(float)
    test_data.astype(float)
    for i in label:
        train_data[i].fillna(train_data[i].mean(), inplace=True)
        test_data[i].fillna(test_data[i].mean(), inplace=True)

    scaler = StandardScaler()
    train_data = pd.DataFrame(scaler.fit_transform(train_data))  # 均值归一化
    test_data = pd.DataFrame(scaler.fit_transform(test_data))  # 均值归一化/

    # 回归得用线性svr
    lin_svr = LinearSVR(random_state=42)
    lin_svr.fit(train_data, train_bloods)
    predict_bloods = lin_svr.predict(test_data)
    mse = mean_squared_error(test_bloods, predict_bloods)
    print(mse)
    print(np.sqrt(mse))
    param_distributions = {
        'gamma': reciprocal([0.001, 0.1]),
        # 'C': uniform(1,10)
        'C': [uniform(1, 10), uniform(10, 1)]
    }
    rnd_search_cv = RandomizedSearchCV(SVR(),
                                       param_distributions,
                                       n_iter=4,
                                       verbose=2,
                                       cv=3,
                                       random_state=42)
    train_bloods = pd.DataFrame(train_bloods)
    rnd_search_cv.fit(train_data, train_bloods)
    y_pred = rnd_search_cv.best_estimator_.predict(train_data)
    mse = mean_squared_error(train_bloods, y_pred)
    print(np.sqrt(mse))  # 0.5727524770785356
    y_pred = rnd_search_cv.best_estimator_.predict(test_data)
    mse = mean_squared_error(test_bloods, y_pred)
    print(np.sqrt(mse))  # 0.592916838552874

コード例 #32

0

ファイルを表示

ファイル: predict.py プロジェクト: plumiron/weibo_prediction

class SVRR(object):

    def __init__(self, C):
        self.regression = LinearSVR(C=C)

    def fit(self, xs, ys):
        xs = xs.values
        ys = ys['y']
        self.regression.fit(xs, ys)

    def predict(self, xs):
        xs = xs.values
        ys = self.regression.predict(xs)
        return ys

コード例 #33

0

ファイルを表示

ファイル: entrainment_pred.py プロジェクト: bonilhamusclab-projects/behav_connectome_prediction

    class LinearSVRPermuteCoef:
        def __init__(self, **kwargs):
            self.model = LinearSVR(**kwargs)

        def fit(self, X, y):
            self.model.fit(X, y)

            self.coef_ = self.model.coef_
            self.intercept_ = self.model.intercept_

            def add_coef(arr, fn):
                arr.append(fn(self.coef_))

            add_coef(coeffs_state['max'], np.max)
            add_coef(coeffs_state['min'], np.min)

            return self

        def get_params(self, deep=True):
            return self.model.get_params(deep)

        def set_params(self, **kwargs):
            self.model.set_params(**kwargs)
            return self

        def predict(self, X):
            return self.model.predict(X)

        def score(self, X, y, sample_weight=None):
            if sample_weight is not None:
                return self.model.score(X, y, sample_weight)
            else:
                return self.model.score(X, y)

        @staticmethod
        def permute_min_coefs():
            return coeffs_state['min']

        @staticmethod
        def permute_max_coefs():
            return coeffs_state['max']

        @staticmethod
        def reset_perm_coefs():
            coeffs_state['min'] = []
            coeffs_state['max'] = []

コード例 #34

0

ファイルを表示

ファイル: models.py プロジェクト: pearlphilip/USP-inhibition

def build_svm(x_train, y_train, x_test, y_test, n_features):
    """
    Constructing a support vector regression model from input dataframe
    :param x_train: features dataframe for model training
    :param y_train: target dataframe for model training
    :param x_test: features dataframe for model testing
    :param y_test: target dataframe for model testing
    :return: None
    """

    clf = LinearSVR(random_state=1, dual=False, epsilon=0,
                    loss='squared_epsilon_insensitive')
    # Random state has int value for non-random sampling
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)

    # Mean absolute error regression loss
    mean_abs = sklearn.metrics.mean_absolute_error(y_test, y_pred)
    # Mean squared error regression loss
    mean_sq = sklearn.metrics.mean_squared_error(y_test, y_pred)
    # Median absolute error regression loss
    median_abs = sklearn.metrics.median_absolute_error(y_test, y_pred)
    # R^2 (coefficient of determination) regression score function
    r2 = sklearn.metrics.r2_score(y_test, y_pred)
    # Explained variance regression score function
    exp_var_score = sklearn.metrics.explained_variance_score(y_test, y_pred)

    with open('../trained_networks/svm_%d_data.pkl' % n_features, 'wb') as results:
        pickle.dump(clf, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(mean_abs, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(mean_sq, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(median_abs, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(r2, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(exp_var_score, results, pickle.HIGHEST_PROTOCOL)
        pickle.dump(y_pred, results, pickle.HIGHEST_PROTOCOL)

    return

コード例 #35

0

ファイルを表示

ファイル: Rossmann_Stacking.py プロジェクト: gunnarklee/kaggle

cat_vars = ['DayOfWeek','Promo','StateHoliday','SchoolHoliday','StoreType','Assortment','CompetitionOpenSinceMonth',
            'CompetitionOpenSinceYear','Promo2','Promo2SinceWeek','Promo2SinceYear','PromoInterval','Day','Month','Year']


num_vars = ['Open','Store','CompetitionDistance','ratio1','ratio2']



X_trn, X_val = train_test_split(train, test_size=0.012, random_state=10)

print 'Training Stage 1 Models'

#train svm
svm1 = LinearSVR(verbose=True)
svm1.fit(X_trn[cat_vars+num_vars],X_trn['Sales'])
svm1_feature = svm1.predict(train[cat_vars+num_vars])
preds = svm1.predict(X_val[cat_vars+num_vars])
print 'svm ',(np.mean(((np.exp(preds)-np.exp(X_val['Sales']))/(np.exp(X_val['Sales'])+1))**2))**0.5


#train xgb
dtrain = xgb.DMatrix(X_trn[cat_vars+num_vars],X_trn['Sales'])
dvalid = xgb.DMatrix(X_val[cat_vars+num_vars],X_val['Sales'])
watchlist = [(dtrain, 'train'), (dvalid, 'eval')]

num_boost_round = 50
params1 = {"objective": "reg:linear","booster" : "gbtree",
"eta": 0.5,"max_depth": 2,"subsample": 0.5,"colsample_bytree": 0.4,
"nthread":4,"silent": 1,"seed": 1301}
gbm1 = xgb.train(params1, dtrain, num_boost_round, evals=watchlist,early_stopping_rounds=50, feval=rmspe_xg, verbose_eval=True)

コード例 #36

0

ファイルを表示

ファイル: project_nextsteps.py プロジェクト: QLGu/flickrtravel

    linsvr = LinearSVR(epsilon=0.1, tol=1e-4, C=1.0, loss='squared_epsilon_insensitive')
    linsvr.fit(explanatory_df, response_series)
    linsvr_rsq[c] = svr.score(explanatory_df, response_series)
    
    # prediction and linear extrapolation of training data set to get further predictions.
    test_cluster = train_cluster.copy()
    
    explanatory_testdf = test_cluster[explanatory_features]
    response_testseries = test_cluster.y
    
    for i in range(0,(len(cluster_i) - 5)):
       test_cluster.loc[i] = [cluster_i.iloc[i], cluster_i.iloc[i+1], 
                            cluster_i.iloc[i+2], cluster_i.iloc[i+3], 
                            cluster_i.iloc[i+4],
                            linsvr.predict(explanatory_df)[i]]
    
    # further running time series to predict into the future
    j = len(test_cluster) - 1
    for i in range(j, j+forecast_years):
       explanatory_testdf = test_cluster[explanatory_features]
       test_list = test_cluster.ix[i,1:6].tolist()
       y_est = linsvr.predict(explanatory_testdf)
       test_list.append(y_est[i])
       test_series = pd.Series(test_list, index = train_cluster.columns)
       test_cluster = test_cluster.append(test_series, ignore_index = True)
    
    linsvr_test_clustery[c] = test_cluster['y']
    linsvr_residuals = test_cluster['y'][0:len(train_cluster)] - train_cluster['y']
    
    linsvr_RMSE[c] = (((linsvr_residuals)**2).mean())**(0.5)

コード例 #37

0

ファイルを表示

ファイル: learner.py プロジェクト: tpsatish95/Universal-MultiDomain-Sentiment-Classifier

class TextLearner(object):
    def __init__(self,data_path,model_path = "./",name = ""):
        self.name = name
        self.data_path = data_path
        self.model_path = model_path
        self.DesignMatrix = []
        self.TestMatrix = []
        self.X_train = []
        self.y_train = [] # not only train but general purpose too
        self.X_test = []
        self.y_test  = []
        self.y_pred = []
        self.vectorizer = None
        self.feature_names = None
        self.chi2 = None
        self.mlModel = None
        self.F = Filter()

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.DesignMatrix = []
        self.TestMatrix = []
        self.X_train = []
        self.y_train = []
        self.X_test = []
        self.y_test  = []
        self.y_pred = []
        self.vectorizer = None
        self.feature_names = None
        self.chi2 = None
        self.mlModel = None
        self.F = None

    def addModelDetails(self,model_p,name = ""):
        self.name = name
        self.model_path = model_p


    def load_data(self,TrTe = 0):               #TrTe => 0-Train  1-Test # returns the dimensions of vectors
        with open( self.data_path, 'rb') as f:
            if TrTe == 0:
                self.DesignMatrix = pickle.load(f)
                return len(self.DesignMatrix[1])
            if TrTe == 1:
                self.TestMatrix = pickle.load(f)
                return len(self.TestMatrix[1])

    def clearOld(self):
        self.X_train = []
        self.y_train = []
        self.X_test = []
        self.y_test  = []
        self.y_pred = []
        self.vectorizer = None
        self.feature_names = None
        self.chi2 = None
        self.mlModel = None


    def process(self,text,default = 0):
        if default == 0:
            text = text.strip().lower().encode("utf-8")
        else:
            text = self.F.process(text)
        return text


    def loadXY(self,TrTe = 0,feature_index = 0,label_index = 1):     #TrTe => 0-Train  1-Test
        if TrTe == 0:
            for i in self.DesignMatrix:
                self.X_train.append(self.process(i[feature_index]))
                self.y_train.append(i[label_index])
            self.X_train = np.array(self.X_train)
            self.y_train = np.array(self.y_train)

        elif TrTe == 1:
            for i in self.TestMatrix:
                self.X_test.append(self.process(i[feature_index]))
                self.y_test.append(i[label_index])
            self.X_test = np.array(self.X_test)
            self.y_test = np.array(self.y_test)


    def featurizeXY(self,only_train = 1):      # Extracts Features
        sw = ['a', 'across', 'am', 'an', 'and', 'any', 'are', 'as', 'at', 'be', 'been', 'being', 'but', 'by', 'can', 'could', 'did', 'do', 'does', 'each', 'for', 'from', 'had', 'has', 'have', 'in', 'into', 'is', "isn't", 'it', "it'd", "it'll", "it's", 'its', 'of', 'on', 'or', 'that', "that's", 'thats', 'the', 'there', "there's", 'theres', 'these', 'this', 'those', 'to', 'under', 'until', 'up', 'were', 'will', 'with', 'would']
        self.vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5,stop_words=sw)
        self.X_train = self.vectorizer.fit_transform(self.X_train)
        self.feature_names = self.vectorizer.get_feature_names()
        if only_train == 0:
            self.X_test = self.vectorizer.transform(self.X_test)


    def reduceDimension(self,only_train = 1, percent = 50):      # Reduce dimensions / self best of features
        n_samples, n_features = self.X_train.shape
        k = int(n_features*(percent/100))

        self.chi2 = SelectKBest(chi2, k=k)
        self.X_train = self.chi2.fit_transform(self.X_train, self.y_train)
        self.feature_names = [self.feature_names[i] for i in self.chi2.get_support(indices=True)]
        self.feature_names = np.asarray(self.feature_names)
        if only_train == 0:
            self.X_test = self.chi2.transform(self.X_test)


    def trainModel(self,Model = "default"):
        if Model == "default":
            self.mlModel = LinearSVR(loss='squared_epsilon_insensitive',dual=False, tol=1e-3)
        else:
            self.mlModel = Model
        self.mlModel.fit(self.X_train, self.y_train)


    def testModel(self,approx = 1):        # returns score ONLY
        self.y_pred = np.array(self.mlModel.predict(self.X_test))

        if approx == 1:
            ### To convert real valued results to binary for scoring
            temp = []
            for y in self.y_pred:
                if y > 0.0:
                    temp.append(1.0)
                else:
                    temp.append(-1.0)
            self.y_pred = temp

        return metrics.accuracy_score(self.y_test, self.y_pred)


    def getReport(self,save = 1, get_top_words = 0):       # returns report
        report = ""
        if get_top_words == 1:
            if hasattr(self.mlModel, 'coef_'):
                    report += "Dimensionality: " + str(self.mlModel.coef_.shape[1])
                    report += "\nDensity: " +  str(density(self.mlModel.coef_))

                    rank = np.argsort(self.mlModel.coef_[0])
                    top10 = rank[-20:]
                    bottom10 = rank[:20]
                    report += "\n\nTop 10 keywords: "
                    report += "\nPositive: " + (" ".join(self.feature_names[top10]))
                    report += "\nNegative: " + (" ".join(self.feature_names[bottom10]))

        score = metrics.accuracy_score(self.y_test, self.y_pred)
        report += "\n\nAccuracy: " + str(score)
        report += "\nClassification report: "
        report += "\n\n" + str(metrics.classification_report(self.y_test, self.y_pred,target_names=["Negative","Positive"]))
        report += "\nConfusion matrix: "
        report += "\n\n" + str(metrics.confusion_matrix(self.y_test, self.y_pred)) + "\n\n"

        if save == 1:
            with open(self.model_path + "report.txt", "w") as text_file:
                text_file.write(report)

        return report


    def crossVal(self,folds = 5, dim_red = 50,full_iter = 0, save = 1):        # returns report # Caution: resets train and test X,y
        skf = cross_validation.StratifiedKFold(self.y_train, n_folds = folds,shuffle=True)
        print(skf)
        master_report = ""

        X_copy = self.X_train
        y_copy = self.y_train

        for train_index, test_index in skf:
            self.X_train, self.X_test = X_copy[train_index], X_copy[test_index]
            self.y_train, self.y_test = y_copy[train_index], y_copy[test_index]
            self.featurizeXY(0)
            self.reduceDimension(0,dim_red)
            self.trainModel()
            self.testModel()
            master_report += self.getReport(save = 0,get_top_words = 0)
            if full_iter == 1:
                continue
            else:
                break

        if save == 1:
            with open(self.model_path + "master_report.txt", "w") as text_file:
                text_file.write(master_report)

        return master_report


    def save_obj(self,obj, name ):
        with open(self.model_path + name + '.pkl', 'wb') as f:
            pickle.dump(obj, f,  protocol=2)


    def saveModel(self):        # saves in model path
        self.save_obj(self.mlModel, self.name + "_model")
        self.save_obj(self.vectorizer, self.name + "_vectorizer")
        self.save_obj(self.chi2, self.name + "_feature_selector")


    def plot(self):
        '''
        beta (Just plotting the model) (Not working)
        '''

        h = .02  # step size in the mesh
        # create a mesh to plot in
        x_min, x_max = self.X_train[:, 0].min() - 1, self.X_train[:, 0].max() + 1
        y_min, y_max = self.X_train[:, 1].min() - 1, self.X_train[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, m_max]x[y_min, y_max].
        Z = self.mlModel.predict(np.c_[xx.ravel(), yy.ravel()])

        # Put the result into a color plot
        Z = Z.reshape(xx.shape)
        plt.contour(xx, yy, Z, cmap=plt.cm.Paired)

        plt.xlim(xx.min(), xx.max())
        plt.ylim(yy.min(), yy.max())
        plt.xticks(())
        plt.yticks(())
        plt.title(self.name)
        plt.savefig(self.model_path + 'plot.png')

コード例 #38

0

ファイルを表示

ファイル: dfs.py プロジェクト: ruige123456/dataMining

def linearSVR(train,trainLable,testData):
    clf = LinearSVR()  
    clf.fit(train,trainLable)  
    predict = clf.predict(testData)  
    return predict

コード例 #39

0

ファイルを表示

ファイル: expire_feature_selection.py プロジェクト: SimonHL/Feature-selection

# 通过交叉验证来选择C
best_cv_score = -1e+30;
for log2c in np.arange(-10,30,1):
    clf = LinearSVR(C=2**log2c, epsilon=0.0001)
    clf.fit(x_input_minmax, y_input)
    cv_score = cross_val_score(cv=sample_num, estimator=clf, X=x_input_minmax, y=y_input, scoring= 'mean_squared_error').mean() # 留1
    print(cv_score)
    if cv_score > best_cv_score:
        best_cv_score = cv_score
        bestc = 2**log2c


# 利用所选的参数进行预测
clf = LinearSVR(C=bestc, epsilon=0.0001)
clf.fit(x_input_minmax, y_input)
y_pred = clf.predict(x_input_minmax)
# y_pred = y_scaler.inverse_transform(y_pred.reshape(-1,1))

view_point = 5;
plt.plot(x_input[:,view_point], y_input, 'bo-', x_input[:,view_point], y_pred, 'rs-')
plt.grid(True)
plt.legend(['y', 'y_pred'])
plt.show()

コード例 #40

0

ファイルを表示

ファイル: author_based_year_classifier.py プロジェクト: ankit141189/bing

	combined = np.append(X, np.matrix(Y).T, axis=1) 
	np.random.shuffle(combined)
	tail_size = -1 * size
	last_column = X.shape[1]
	training_labels = combined[:tail_size, last_column]
	training_data = combined[:tail_size, :-2]
	test_data = combined[tail_size:, :-2]
	actual_labels = combined[tail_size:, last_column]
	return training_data, np.ravel(training_labels), test_data, np.ravel(actual_labels)

training = open('author_features')
NO_TRAINING_SAMPLES = 6000
NO_OF_AUTHORS = 10000
matrix = dok_matrix((NO_TRAINING_SAMPLES, NO_OF_AUTHORS), dtype=np.int)
for line in training.readlines():
	values = line.rstrip().split()
	matrix[int(values[0]), int(values[1])] = 1

labels_file = open('year_training_labels')
labels = [int(x) for x in labels_file.readline().rstrip().split()]

training_matrix = matrix[:4498]
training_data, training_labels, test_data, actual_labels = sample(training_matrix, labels)
classifier = LinearSVR()
classifier.fit(training_data, training_labels)
output = classifier.predict(test_data)
for index, predicted in enumerate(output):
	print '%s %s' % (predicted, actual_labels[index])

print metrics.explained_variance_score(actual_labels, output)

コード例 #41

0

ファイルを表示

ファイル: svr_layer1.py プロジェクト: bishwarup307/Prudential_Life_Insurance

    print "----------- Fold %d -----------------------" %i
    print "--------------------------------------------"
    
    val_id = fold_ids.ix[:, i].dropna()
    idx = train["Id"].isin(list(val_id))
    
    trainingSet = train[~idx]
    validationSet = train[idx]
    
    tr_X = np.matrix(trainingSet[feature_names])
    tr_Y = np.array(trainingSet["Response"])
    val_X = np.matrix(validationSet[feature_names])
    val_Y = np.array(validationSet["Response"])
    
    regm = LinearSVR(C = 0.06, epsilon = 0.45, tol = 1e-5,
                     dual = True, verbose = True, random_state = 133)
                     
    regm.fit(tr_X, tr_Y)    
    preds = regm.predict(val_X)
    
    df = pd.DataFrame(dict({"Id" : validationSet["Id"], "ground_truth" : validationSet["Response"], 
                            "linsvr_preds" : preds}))
    
    linsvr_val = linsvr_val.append(df, ignore_index = True)
    
    tpreds = regm.predict(test_X)
    cname = "Fold" + `i`
    linsvr_test[cname] = tpreds
    
linsvr_val.to_csv("ensemble2/linsvr_val.csv")
linsvr_test.to_csv("ensemble2/linsvr_test.csv")

コード例 #42

0

ファイルを表示

ファイル: pca_svr.py プロジェクト: abrinkmacmu/ML_Project2

    X2 = X_train_reduced[test]
    Y2 = Y_train_raw[test]

    ## Train Classifiers on fold
    rdg_clf = Ridge(alpha=0.5)
    rdg_clf.fit(X1, Y1)
    lso_clf = Lasso(alpha=0.6257)
    lso_clf.fit(X1, Y1)
    svr_clf = LinearSVR(C=1e3)
    svr_clf.fit(X1, Y1)

    ## Score Classifiers on fold
    rdg_clf_score = rdg_clf.score(X2, Y2)
    lso_clf_score = lso_clf.score(X2, Y2)
    svr_clf_score = svr_clf.score(X2, Y2)

    print "Ridge:  ", rdg_clf_score
    print "Lasso:  ", lso_clf_score
    print "SVR_RBF:  ", svr_clf_score


## Train final Classifiers
# clf = Ridge(alpha=.5)
clf = LinearSVR(C=1e3, gamma=0.1)
clf.fit(X_train_reduced, Y_train_raw)
Y_predicted = clf.predict(X_test_reduced)

## Save results to csv
np.savetxt("prediction.csv", Y_predicted, fmt="%.5f", delimiter=",")

コード例 #43

0

ファイルを表示

ファイル: ch05_support_vector_machine.py プロジェクト: stonecoder19/machine_learning

svm_reg2 =LinearSVR(epsilon=0.5)
svm_reg1.fit(X, y)
svm_reg2.fit(X, y)



def find_support_vectors(svm_reg, X, y):
	y_pred = svm_reg.predict(X)
	off_margin = (np.abs(y - y_pred) >= svm_reg.epsilon)
	return np.argwhere(off_margin)

svm_reg1.support_ = find_support_vectors(svm_reg1, X, y)
svm_reg2.support_ = find_support_vectors(svm_reg2, X, y)

eps_x1 = 1
eps_y_pred = svm_reg1.predict([[eps_x1]])


def plot_svm_regression(svm_reg, X, y, axes):
	x1s = np.linspace(axes[0], axes[1], 100).reshape(100, 1)
	y_pred = svm_reg.predict(x1s)
	plt.plot(x1s, y_pred, "k-", linewidth=2, label=r"$\hat{y}$")
	plt.plot(x1s, y_pred + svm_reg.epsilon, "k--")
	plt.plot(x1s, y_pred - svm_reg.epsilon, "k--")
	plt.scatter(X[svm_reg.support_], y[svm_reg.support_], s=180, facecolors="#FFAAAA")
	plt.plot(X, y, "bo")
	plt.xlabel(r"$x_1$", fontsize=18)
	plt.legend(loc="upper left", fontsize=18)
	plt.axis(axes)

plt.figure(figsize=(9, 4))

コード例 #44

0

ファイルを表示

ファイル: LinearSVR.py プロジェクト: bomdicScott/pub_SVR_analysis

    for row in csv.reader(data_file):
        data += [[row[0],row[4],row[6],row[10]]]
        target += [row[9]]

data,target = Lin_clean_data(data[1:],target[1:],2)

point = 2000
X_train = data[:point-1]
X_test = data[point:point+int(point*0.2)]
y_train = target[:point-1]
y_test = target[point:point+int(point*0.2)]


svr = LinearSVR(C=0.1)
svr_model = svr.fit(X_train,y_train)
lin = svr.predict(X_train)
lin_test = svr.predict(X_test)

lin,lin_test = data_normalize(y_train,y_test,lin,lin_test)

print("Train score : ",score(y_train,lin))
print("Train average error : ",sum(abs(y_train-lin)) / float(len(y_train)))

print("Fit score : ",score(y_test,lin_test))
print("Fit average error : ",sum(abs(y_test-lin_test)) / float(len(y_test)))

figure1 = plt.figure(1,figsize=[20,10])
draw_pic(range(len(X_train)),range(len(X_test)),lin,lin_test,y_train,y_test,label='lin',figure=figure1)
figure1.savefig("C:/Users/sean/Desktop/SVR_DATA/linSVR.png",dpi=300,format="png")
plt.close(1)