예제 #1
0
def test_linearsvx_loss_penalty_deprecations():
    X, y = [[0.0], [1.0]], [0, 1]

    msg = ("loss='%s' has been deprecated in favor of "
           "loss='%s' as of 0.16. Backward compatibility"
           " for the %s will be removed in %s")

    # LinearSVC
    # loss l1 --> hinge
    assert_warns_message(DeprecationWarning,
                         msg % ("l1", "hinge", "loss='l1'", "1.0"),
                         svm.LinearSVC(loss="l1").fit, X, y)

    # loss l2 --> squared_hinge
    assert_warns_message(DeprecationWarning,
                         msg % ("l2", "squared_hinge", "loss='l2'", "1.0"),
                         svm.LinearSVC(loss="l2").fit, X, y)

    # LinearSVR
    # loss l1 --> epsilon_insensitive
    assert_warns_message(
        DeprecationWarning,
        msg % ("l1", "epsilon_insensitive", "loss='l1'", "1.0"),
        svm.LinearSVR(loss="l1").fit, X, y)

    # loss l2 --> squared_epsilon_insensitive
    assert_warns_message(
        DeprecationWarning,
        msg % ("l2", "squared_epsilon_insensitive", "loss='l2'", "1.0"),
        svm.LinearSVR(loss="l2").fit, X, y)
예제 #2
0
def test_linearsvr_fit_sampleweight():
    # check correct result when sample_weight is 1
    # check that SVR(kernel='linear') and LinearSVC() give
    # comparable results
    diabetes = datasets.load_diabetes()
    n_samples = len(diabetes.target)
    unit_weight = np.ones(n_samples)
    lsvr = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target,
                                    sample_weight=unit_weight)
    score1 = lsvr.score(diabetes.data, diabetes.target)

    lsvr_no_weight = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target)
    score2 = lsvr_no_weight.score(diabetes.data, diabetes.target)

    assert_allclose(np.linalg.norm(lsvr.coef_),
                    np.linalg.norm(lsvr_no_weight.coef_), 1, 0.0001)
    assert_almost_equal(score1, score2, 2)

    # check that fit(X)  = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
    # X = X1 repeated n1 times, X2 repeated n2 times and so forth
    random_state = check_random_state(0)
    random_weight = random_state.randint(0, 10, n_samples)
    lsvr_unflat = svm.LinearSVR(C=1e3).fit(diabetes.data, diabetes.target,
                                           sample_weight=random_weight)
    score3 = lsvr_unflat.score(diabetes.data, diabetes.target,
                               sample_weight=random_weight)

    X_flat = np.repeat(diabetes.data, random_weight, axis=0)
    y_flat = np.repeat(diabetes.target, random_weight, axis=0)
    lsvr_flat = svm.LinearSVR(C=1e3).fit(X_flat, y_flat)
    score4 = lsvr_flat.score(X_flat, y_flat)

    assert_almost_equal(score3, score4, 2)
예제 #3
0
def test_svr():
    # Test Support Vector Regression

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4,
                          C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear',
                        C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.)):
        clf.fit(diabetes.data, diabetes.target)
        assert clf.score(diabetes.data, diabetes.target) > 0.02

    # non-regression test; previously, BaseLibSVM would check that
    # len(np.unique(y)) < 2, which must only be done for SVC
    svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data)))
    svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
예제 #4
0
 def __init__(self):
     self.clf1 = [
         make_pipeline(
             Imputer(),
             GradientBoostingRegressor(n_estimators=5000, max_depth=8))
         for _ in range(5)
     ]
     self.clf2 = [
         make_pipeline(
             Imputer(strategy='median'),
             ExtraTreesRegressor(n_estimators=5000,
                                 criterion='mse',
                                 max_depth=8,
                                 min_samples_split=10,
                                 min_samples_leaf=1,
                                 min_weight_fraction_leaf=0.0,
                                 max_features='auto',
                                 max_leaf_nodes=None,
                                 bootstrap=False,
                                 oob_score=False,
                                 n_jobs=1,
                                 random_state=42,
                                 verbose=0,
                                 warm_start=True)) for _ in range(5)
     ]
     self.clf3 = [
         make_pipeline(Imputer(), svm.LinearSVR()) for _ in range(5)
     ]
     self.clf = [linear_model.LinearRegression() for _ in range(5)]
예제 #5
0
def test_LinearSVR_C(*data):
    '''
    test the performance with different C
    :param data:  train_data,test_data, train_target, test_target
    :return: None
    '''
    X_train, X_test, y_train, y_test = data
    Cs = np.logspace(-1, 2)
    train_scores = []
    test_scores = []
    for C in Cs:
        regr = svm.LinearSVR(epsilon=0.1,
                             loss='squared_epsilon_insensitive',
                             C=C)
        regr.fit(X_train, y_train)
        train_scores.append(regr.score(X_train, y_train))
        test_scores.append(regr.score(X_test, y_test))
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(Cs, train_scores, label="Training score ", marker='+')
    ax.plot(Cs, test_scores, label=" Testing  score ", marker='o')
    ax.set_title("LinearSVR_C ")
    ax.set_xscale("log")
    ax.set_xlabel(r"C")
    ax.set_ylabel("score")
    ax.set_ylim(-1, 1.05)
    ax.legend(loc="best", framealpha=0.5)
    plt.show()
예제 #6
0
def test_LinearSVR_C(*data):
    """
    测试LinearSVR的预测性能随罚项系数C的变化情况
    """
    train_x, test_x, train_y, test_y = data
    Cs = np.logspace(-1, 2)
    train_scores = []
    test_scores = []
    for C in Cs:
        model = svm.LinearSVR(epsilon=0.1,
                              loss="squared_epsilon_insensitive",
                              C=C)
        model.fit(train_x, train_y)
        train_scores.append(model.score(train_x, train_y))
        test_scores.append(model.score(test_x, test_y))
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(Cs, train_scores, label="Training Score", marker="+")
    ax.plot(Cs, test_scores, label="Testing Score", marker="o")
    ax.set_title("LinearSVR_C")
    ax.set_xscale("log")
    ax.set_xlabel(r"C")
    ax.set_ylabel("score")
    ax.set_ylim(-1, 1.05)
    ax.legend(loc="best")
    plt.show()
예제 #7
0
def test_LinearSVR_epsilon(*data):
    """
    测试LinearSVR的预测性能随eposilon参数的影响
    """
    train_x, test_x, train_y, test_y = data
    epsilons = np.logspace(-2, 2)
    train_scores = []
    test_scores = []
    for epsilon in epsilons:
        model = svm.LinearSVR(epsilon=epsilon,
                              loss="squared_epsilon_insensitive")
        model.fit(train_x, train_y)
        train_scores.append(model.score(train_x, train_y))
        test_scores.append(model.score(test_x, test_y))
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(epsilons, train_scores, label="Training Score", marker="+")
    ax.plot(epsilons, test_scores, label="Testing Score", marker="o")
    ax.set_title("LinearSVR_epsilon")
    ax.set_xscale("log")
    ax.set_xlabel(r"$\epsilon$")
    ax.set_ylabel("score")
    ax.set_ylim(0, 1.05)
    ax.legend(loc="best")
    plt.show()
예제 #8
0
 def __init__(self,modeltype,PCA=None,modelparams = None):
     self.information = {}
     if PCA:
         fitters = load(PCA)
         self.pca = fitters[0]
         self.scaler = fitters[1]
     else:
         self.pca = None
     if modeltype == 'Linear': #Linear Regression
         self.model = LinearRegression(n_jobs=-1)
     elif modeltype == 'SVM': #Support Vector Machine
         self.model= svm.SVR(cache_size=750,C=200)
     elif modeltype == 'LinearSVM': #Linear SVM
         self.model = svm.LinearSVR()
     elif modeltype == 'SGD': #Stochastic Gradient Descent
         self.model = SGDRegressor()
     elif modeltype == 'MLP': #Multi-layer Perceptron
         self.model = MLPRegressor(learning_rate='adaptive',max_iter=1000) 
     elif modeltype == 'KNN': #K Nearest Neighbour
         self.model = KNeighborsRegressor(n_neighbors=2,n_jobs=-1)
     elif modeltype == 'Tree': #Decision Tree
         self.model = DecisionTreeRegressor()
     elif modeltype == 'load': #Load a pre-existing model
         pass
     else: #Not supported
         print('Model type not recognised')
     if modelparams:
         self.model.set_params(**modelparams)
예제 #9
0
def _get_base_ml_model(method):
    regressor = None
    if method == 'lr':
        regressor = linear_model.LinearRegression()
    if method == 'huber':
        regressor = linear_model.HuberRegressor(max_iter=50)
        regressor = multioutput.MultiOutputRegressor(regressor)
    if method == 'svr':
        regressor = svm.LinearSVR()
        regressor = multioutput.MultiOutputRegressor(regressor)
    if method == 'kr':
        regressor = kernel_ridge.KernelRidge(kernel='rbf')
    if method == 'rf':
        regressor = ensemble.RandomForestRegressor(n_estimators=50, n_jobs=8)
    if method == 'gbm':
        regressor = lgb.LGBMRegressor(max_depth=20,
                                      num_leaves=1000,
                                      n_estimators=100,
                                      min_child_samples=5,
                                      random_state=42)
        regressor = multioutput.MultiOutputRegressor(regressor)
    if method == 'nn':
        regressor = neural_network.MLPRegressor(hidden_layer_sizes=(25, 25),
                                                early_stopping=True,
                                                max_iter=1000000,
                                                alpha=0.01)

    return regressor
예제 #10
0
def model(train_x, train_y, test_X, flags='linear'):
    y = None
    if flags == 'linear':
        clf = linear_model.LinearRegression()

    elif flags == 'LSVR':
        clf = svm.LinearSVR()

    elif flags == 'SVR':
        clf = svm.SVR()

    elif flags == 'Ridge':
        clf = linear_model.Ridge()

    elif flags == 'TreeR':
        clf = tree.DecisionTreeRegressor()

    # Knn 不可以
    elif flags == 'Knn':
        clf = neighbors.KNeighborsRegressor()

    elif flags == 'RandomForest':
        clf = ensemble.RandomForestRegressor(n_estimators=20)

    elif flags == 'Adaboost':
        clf = ensemble.AdaBoostRegressor(n_estimators=50)

    elif flags == 'GBRT':
        clf = ensemble.GradientBoostingRegressor(n_estimators=100)

    else:
        pass
    clf.fit(train_x, train_y)
    y = clf.predict(test_X)
    return y
    def _estimate_model(self):
        """Estimates SVR model.

        Returns
        -------
        model : sklearn LinearSVR or SVR model or grid search cv object
            Fitted object.
        """
        if self.kernel == 'linear':
            self.underlying = svm.LinearSVR(**self.kwargs)
        else:
            if self.type == 'eps':
                self.underlying = svm.SVR(kernel=self.kernel, **self.kwargs)
            elif self.type == 'nu':
                self.underlying = svm.NuSVR(kernel=self.kernel, **self.kwargs)
            else:
                raise NotImplementedError(
                    'Type not implemented. Choices are eps or nu.')
        if self.cv_folds is not None:
            model = model_selection.GridSearchCV(self.underlying,
                                                 self.parameters,
                                                 cv=self.cv_folds,
                                                 scoring=self.score)
        else:
            model = self.underlying
        model.fit(self.x_train, self.y_train)
        return model
예제 #12
0
def linear_svr_c(*data):
    x_train, x_test, y_train, y_test = data
    cs = np.logspace(-1, 3)
    train_scores = []
    test_scores = []
    for c in cs:
        svr = svm.LinearSVR(epsilon=0.1,
                            loss="squared_epsilon_insensitive",
                            C=c)
        svr.fit(x_train, y_train)
        train_scores.append(svr.score(x_train, y_train))
        test_scores.append(svr.score(x_test, y_test))

    fig = plt.figure()

    ax = fig.add_subplot(1, 1, 1)
    ax.plot(cs, train_scores, label="training score", marker="+")
    ax.plot(cs, test_scores, label="testing score", marker="o")

    ax.set_title("svr c")
    ax.set_xscale("log")
    ax.set_xlabel(r"C")
    ax.set_ylabel("score")
    ax.set_ylim(-1, 1.05)
    ax.legend(loc="best", framealpha=0.5)

    plt.show()
예제 #13
0
def regression():
    new_url = 'https://goo.gl/sXleFv'
    new_columns = np.array([
        'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
        'PTRATIO', 'B', 'LSTAT', 'MEDV'
    ])

    dataframe = pd.read_csv(new_url, delim_whitespace=True, names=new_columns)
    array = dataframe.values
    X = array[:, 0:13]
    Y = array[:, 13]

    k_fold = model_selection.KFold(n_splits=10, random_state=7)
    models = np.empty([6, 2], dtype='object')
    models[0] = ['K Nearest Neighbors', neighbors.KNeighborsRegressor()]
    models[1] = ['Linear Regression', linear_model.LinearRegression()]
    models[2] = ['Ridge Regression', linear_model.Ridge()]
    models[3] = ['Support Vector Regressor', svm.LinearSVR()]
    models[4] = ['Random Forest Regressor', ensemble.RandomForestRegressor()]
    models[5] = [
        'Gradient Boosted Trees',
        ensemble.GradientBoostingRegressor()
    ]

    for name, model in models:
        # Different model metrics
        for scoring in ('neg_mean_squared_error', 'explained_variance'):
            cross_validation(name, model, X, Y, scoring)
예제 #14
0
def test_LinearSVR_C(*data):
    '''
    测试 LinearSVR 的预测性能随 C 参数的影响
    :param data:  可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值
    :return: None
    '''
    X_train, X_test, y_train, y_test = data
    Cs = np.logspace(-1, 2)
    train_scores = []
    test_scores = []
    for C in Cs:
        regr = svm.LinearSVR(epsilon=0.1,
                             loss='squared_epsilon_insensitive',
                             C=C)
        regr.fit(X_train, y_train)
        train_scores.append(regr.score(X_train, y_train))
        test_scores.append(regr.score(X_test, y_test))
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)
    ax.plot(Cs, train_scores, label="Training score ", marker='+')
    ax.plot(Cs, test_scores, label="Testing  score ", marker='o')
    ax.set_title("LinearSVR_C ")
    ax.set_xscale("log")
    ax.set_xlabel(r"C")
    ax.set_ylabel("score")
    ax.set_ylim(-1, 1.05)
    ax.legend(loc="best", framealpha=0.5)
    plt.show()
예제 #15
0
def predict(data, priceToPredict):

    openingPriceTrain, openingPriceTest, closingPriceTrain, closingPriceTest = \
        data["openingPriceTrain"], data["openingPriceTest"], data["closingPriceTrain"], data["closingPriceTest"]
    clf = svm.LinearSVR()
    clf.fit(openingPriceTrain, closingPriceTrain)
    predicted2 = clf.predict(openingPriceTest)
    score = clf.fit(openingPriceTrain,
                    closingPriceTrain).score(openingPriceTest,
                                             closingPriceTest)
    # print(score)

    fig, ax = plotter.subplots()
    ax.scatter(openingPriceTrain, closingPriceTrain)
    ax.set_ylabel('Predicted SVM')
    ax.scatter(closingPriceTest, clf.predict(openingPriceTest))
    ax.set_xlabel('Measured')
    ax.set_ylabel('Predicted')
    # plotter.show()

    closingPriceTestArray = np.reshape(closingPriceTest, -1)
    clfpr = clf.predict(openingPriceTest)
    predictedArray = np.reshape(clfpr, -1)
    print(pearsonr(closingPriceTestArray, predictedArray))

    openingPriceToPredict = np.array([priceToPredict])
    print(clf.predict(openingPriceToPredict))
    return clf.predict(np.array([openingPriceToPredict]))
예제 #16
0
def multiple_comparision():
    ln_predictor = linear_model.LinearRegression()
    svm_predictor = svm.LinearSVR()
    tree_predictor = tree.DecisionTreeRegressor()
    kernel_predictor = KernelRidge(alpha=1.0, gamma=1.0, kernel='rbf')
    C = 15
    printflag = False
    predictor_dict = {
        'Linear': ln_predictor,
        'SVR': svm_predictor,
        'DT': tree_predictor,
        'RBF Kernel': kernel_predictor
    }
    gamma_list = [0.01]
    max_iter_list = [5]
    results_dict = {}
    # For each model, train a Pareto curve
    for max_iter in max_iter_list:
        for curr_predictor in predictor_dict.keys():
            print('Curr Predictor: ')
            print(curr_predictor)
            predictor = predictor_dict[curr_predictor]
            fair_clf = Model(C=C,
                             printflag=printflag,
                             gamma=1,
                             predictor=predictor,
                             max_iters=max_iter)
            print(fair_clf.predictor)
            all_errors, all_fp = fair_clf.pareto(X, X_prime, y, gamma_list)
            results_dict[curr_predictor] = {
                'Errors': all_errors,
                'FP_disp': all_fp
            }

    print(results_dict)
예제 #17
0
 def run_sklearn():
     skm = sk.LinearSVR(loss=loss,
                        epsilon=eps,
                        max_iter=skit,
                        dual=skdual)
     skm.fit(X_train, y_train)
     return skm.score(X_test, y_test)
예제 #18
0
 def svm(self, type_, epsilon=0.0, penalty=1.0, tol=0.0001):
     """
     ---------------------------------------------
     Regression using Support Vector Machines (SVM)
     ---------------------------------------------
     Parameters:
     epsilon: Parameter in loss function. Defines margin where no penalty is given to errors.
     penalty: L2-penalty for error term. The larger, the less regularisation is used.
     tol: Tolerance for stopping criteria
     loss: Set to epsilon_insensitive, standard SVR
     """
     self.penalty = penalty
     self.eps = epsilon
     self.tol = tol
     self.clf = svm.LinearSVR(epsilon=self.eps,
                              tol=self.tol,
                              C=self.penalty,
                              loss='epsilon_insensitive',
                              fit_intercept=False,
                              max_iter=10e5)
     fit = self.clf.fit(self.X, self.y)
     self.weights = self.clf.coef_
     pred = Regression.predict(self, self.X)
     MSE = mean_squared_error(self.y, pred)
     return MSE, self.clf.score(self.X, self.y)
예제 #19
0
def test_linear_svx_uppercase_loss_penalty():
    # Check if Upper case notation is supported by _fit_liblinear
    # which is called by fit
    X, y = [[0.0], [1.0]], [0, 1]

    msg = ("loss='%s' has been deprecated in favor of "
           "loss='%s' as of 0.16. Backward compatibility"
           " for the uppercase notation will be removed in %s")

    # loss SQUARED_hinge --> squared_hinge
    assert_warns_message(DeprecationWarning,
                         msg % ("SQUARED_hinge", "squared_hinge", "0.18"),
                         svm.LinearSVC(loss="SQUARED_hinge").fit, X, y)

    # penalty L2 --> l2
    assert_warns_message(DeprecationWarning,
                         msg.replace("loss", "penalty")
                         % ("L2", "l2", "0.18"),
                         svm.LinearSVC(penalty="L2").fit, X, y)

    # loss EPSILON_INSENSITIVE --> epsilon_insensitive
    assert_warns_message(DeprecationWarning,
                         msg % ("EPSILON_INSENSITIVE", "epsilon_insensitive",
                                "0.18"),
                         svm.LinearSVR(loss="EPSILON_INSENSITIVE").fit, X, y)
예제 #20
0
    def __init__(self, trainer=svr.LinearSVR(), error_fx=sklm.r2_score):

        #super(RegressionMeasure, self).__init__()
        Measure.__init__(self)
        self.trainer = trainer
        self.fx = error_fx
        self.mse = sklm.mean_squared_error
예제 #21
0
def multiple_pareto():
    gamma_list = [0.002, 0.005, 0.01, 0.02, 0.05, 0.1]

    train_size = X.shape[0]
    X_train = X.iloc[:train_size]
    X_prime_train = X_prime.iloc[:train_size]
    y_train = y.iloc[:train_size]

    ln_predictor = linear_model.LinearRegression()
    svm_predictor = svm.LinearSVR()
    tree_predictor = tree.DecisionTreeRegressor(max_depth=3)
    kernel_predictor = KernelRidge(alpha=1.0, gamma=1.0, kernel='rbf')
    predictor_dict = {
        'Linear': {
            'predictor': ln_predictor,
            'iters': 100
        },
        'SVR': {
            'predictor': svm_predictor,
            'iters': 10
        },
        'DT': {
            'predictor': tree_predictor,
            'iters': 100
        }
    }

    results_dict = {}

    for pred in predictor_dict:
        print('Curr Predictor: {}'.format(pred))
        predictor = predictor_dict[pred]['predictor']
        max_iters = predictor_dict[pred]['iters']
        fair_clf = Model(C=100,
                         printflag=True,
                         gamma=1,
                         predictor=predictor,
                         max_iters=max_iters)
        fair_clf.set_options(max_iters=max_iters)
        errors, fp_violations, fn_violations = fair_clf.pareto(
            X_train, X_prime_train, y_train, gamma_list)
        results_dict[pred] = {
            'errors': errors,
            'fp_violations': fp_violations,
            'fn_violations': fn_violations
        }
        plt.plot(errors, fp_violations, label=pred)

    pickle.dump(
        results_dict,
        open(
            'results_dict_' + str(gamma_list) + '_gammas' + str(gamma_list) +
            '.pkl', 'wb'))

    plt.xlabel('Error')
    plt.ylabel('Unfairness')
    plt.legend()
    plt.title('Error vs. Unfairness\n(Communities & Crime Dataset)')
    plt.show()
 def train(self, results, features, hyperparams, feature_names):
     if self.is_classifier:
         model = svm.LinearSVC(C=hyperparams.get(self.C_VAL))
     else:
         model = svm.LinearSVR(C=hyperparams.get(self.C_VAL), epsilon=hyperparams.get(self.EPSILON))
     model.fit(features, results)
     self.log.debug("Successful creation of Linear Support Vector Machine model: %s\n", model)
     return model
예제 #23
0
def test_sk_LinearSVR():
    print("Testing sklearn, LinearSVR...")
    mod = svm.LinearSVR()
    X, y = iris_data
    mod.fit(X, y)
    docs = {'name': "LinearSVR test"}
    fv = X[0, :]
    upload(mod, fv, docs)
예제 #24
0
def Train(complexity, trainData, trainLabels):
    seed = 1
    reg = svm.LinearSVR(C=complexity,
                        epsilon=0.1,
                        random_state=seed,
                        loss='squared_epsilon_insensitive')
    reg.fit(trainData, trainLabels)
    return reg
예제 #25
0
def linear_svr(*data):
    x_train, x_test, y_train, y_test = data
    svr = svm.LinearSVR()
    svr.fit(x_train, y_train)
    print(svr.coef_)
    print(svr.intercept_)
    print(svr.score(x_train, y_train))
    print(svr.score(x_test, y_test))
예제 #26
0
 def reset(self):
     if self.kernel == 'linear':
         self.clf = svm.LinearSVR(random_state=self.seed)
     else:
         self.clf = svm.SVR(gamma='auto',
                            kernel=self.kernel,
                            degree=self.poly_degree,
                            random_state=self.seed)
예제 #27
0
def calculate_compartment_fraction(structure1,
                                   structure2,
                                   path1,
                                   path2,
                                   size1=None,
                                   size2=None):
    #compartments
    contacts1 = matFromBed(path1, size1, structure1)
    contacts2 = matFromBed(path2, size2, structure2)

    compartments1 = np.array(get_compartments(contacts1, structure1))
    compartments2 = np.array(get_compartments(contacts2, structure2))

    r, p = st.pearsonr(compartments1, compartments2)
    if r < 0:
        compartments2 = -compartments2

    #SVR
    coords1 = structure1.getCoords()
    coords2 = structure2.getCoords()
    coords = np.concatenate((coords1, coords2))
    compartments = np.concatenate((compartments1, compartments2))
    clf = svm.LinearSVR()
    clf.fit(coords, compartments)
    coef = clf.coef_

    transformed_coords1 = np.array(change_coordinate_system(coef, coords1))
    transformed_coords2 = np.array(change_coordinate_system(coef, coords2))

    x_diffs = transformed_coords1[:, 0] - transformed_coords2[:, 0]
    y_diffs = transformed_coords1[:, 1] - transformed_coords2[:, 1]
    z_diffs = transformed_coords1[:, 2] - transformed_coords2[:, 2]

    #axis lengths
    centroid1 = np.mean(transformed_coords1, axis=0)
    centroid2 = np.mean(transformed_coords2, axis=0)
    x_length1 = np.mean(
        [np.abs(coord1[0] - centroid1[0]) for coord1 in transformed_coords1])
    y_length1 = np.mean(
        [np.abs(coord1[1] - centroid1[1]) for coord1 in transformed_coords1])
    z_length1 = np.mean(
        [np.abs(coord1[2] - centroid1[2]) for coord1 in transformed_coords1])
    x_length2 = np.mean(
        [np.abs(coord2[0] - centroid2[0]) for coord2 in transformed_coords2])
    y_length2 = np.mean(
        [np.abs(coord2[1] - centroid2[1]) for coord2 in transformed_coords2])
    z_length2 = np.mean(
        [np.abs(coord2[2] - centroid2[2]) for coord2 in transformed_coords2])

    x_length = np.mean((x_length1, x_length2))
    y_length = np.mean((y_length1, y_length2))
    z_length = np.mean((z_length1, z_length2))

    x_mean = np.mean(np.abs(x_diffs)) / x_length
    y_mean = np.mean(np.abs(y_diffs)) / y_length
    z_mean = np.mean(np.abs(z_diffs)) / z_length

    return z_mean / (x_mean + y_mean + z_mean)
예제 #28
0
    def LinearSVR(self,
                  optimizer=None,
                  param_grid=None,
                  scoring=None,
                  fit_params=None,
                  cv=None):
        """Creates a linear support vector machine regression model estimator.
        :param optimizer: the parameter search and optimization method. default is a sklearn.model_selection.GridSearchCV instance
        :param param_grid: dictionary with hyperparameter names as keys and lists of hyperparameter settings to try in grid search
        :param scoring: the model performance metric to optimize. accepts string values and sklear.metrics.* instances
        :param fit_params: parameters to pass to the `fit` method of the estimator
        :param cv: determines the cross-validation splitting strategy. accepts inputs to sklearn.model_selection.GridSearchCV
        :return None: updates the `tuner` object with the passed parameters and runs a grid search using the LinearSVR estimator
        """

        # check if the optimizer has changed, otherwise use default
        if optimizer is not None:
            self.optimizer = optimizer

        # check if the parameter grid has been set, otherwise set defaults
        if param_grid is None:
            if self.param_grid is None:
                param_grid = {
                    "C": (1e-2, 1e-1, 1e0, 1e1),
                    "loss":
                    ("epsilon_insensitive", "squared_epsilon_insensitive"),
                    "epsilon": (0, 0.01, 0.1),
                    "dual": (False),
                    "tol": (1e-3, 1e-4, 1e-5),
                    "fit_intercept": (True, False),
                }
                self.param_grid = param_grid
        else:
            self.param_grid = param_grid

        # set the scoring function
        if scoring is None:
            if self.scoring is None:
                scoring = _metrics.explained_variance_score
                self.scoring = scoring
        else:
            self.scoring = scoring

        # set the default fit parameters
        if fit_params is not None:
            self.fit_params = fit_params

        # set the cross validation strategy
        if cv is None:
            if self.cv is None:
                cv = _model_selection.StratifiedKFold(n_splits=self.n_splits)
                self.cv = cv
        else:
            self.cv = cv

        # create the estimator and run the grid search
        estimator = _svm.LinearSVR()
        self.run_gs(estimator)
예제 #29
0
def test_LinearSVR_loss(*data):
    x_train, x_test, y_train, y_test = data
    losses = ['epsilon_insensitive', 'squared_epsilon_insensitive']
    for loss in losses:
        regr = svm.LinearSVR(loss=loss)
        regr.fit(x_train, y_train)
        print("loss: %s" % loss)
        print("Coefficients:%s, intercept:%s" % (regr.coef_, regr.intercept_))
        print("Score: %.2f" % regr.score(x_test, y_test))
def build_model(args, C, seed):
    if args.dc_tree:
        model = DecisionTreeRegressor(random_state=seed)
    elif args.nn_radius:
        model = RadiusNeighborsRegressor(radius=1.0)
    else:
        model = svm.LinearSVR(C=complexities[comp], random_state=seed)

    return model