Ejemplo n.º 1
0
def separate_coord_nu_svr(train_sequences_lat=train_sequences,
                          train_sequences_long=train_sequences,
                          val_sequences_lat=val_sequences,
                          val_sequences_long=val_sequences,
                          training_latitudes=training_latitudes,
                          training_longitudes=training_longitudes,
                          val_latitudes=val_latitudes,
                          val_longitudes=val_longitudes,
                          test_sequences=test_sequences,
                          sub_name='separate_coord_nu_svr'):
    # separate svr for each coordinate
    svr_lat = svm.NuSVR(C=0.1, nu=0.3, verbose=10)
    svr_lat.fit(train_sequences_lat, training_latitudes)
    mse_lat = get_mse(svr_lat,
                      val_sequences_lat,
                      val_latitudes,
                      is_multioutput=False)

    svr_long = svm.NuSVR(C=0.001, nu=0.7, verbose=10)
    svr_long.fit(train_sequences_long, training_longitudes)
    mse_long = get_mse(svr_long,
                       val_sequences_long,
                       val_longitudes,
                       is_multioutput=False)

    print(mse_lat)
    print(mse_long)
    print((mse_lat + mse_long) / 2)

    return svr_lat, svr_long
Ejemplo n.º 2
0
def separate_coord_grid_search():
    # gridsearch on 2 single output models
    from sklearn import metrics
    mse = metrics.make_scorer(metrics.mean_squared_error,
                              greater_is_better=False)

    Cs = [0.1, 0.01, 0.001]
    nus = [0.3, 0.7, 0.9, 1]
    params = {'C': Cs, 'nu': nus}

    from sklearn.model_selection import GridSearchCV
    svr_lat_grid = GridSearchCV(svm.NuSVR(),
                                params,
                                cv=5,
                                scoring=mse,
                                n_jobs=-1,
                                verbose=10)

    svr_lat_grid.fit(train_sequences, training_latitudes)

    print(svr_lat_grid.best_params_)

    svr_long_grid = GridSearchCV(svm.NuSVR(),
                                 params,
                                 cv=5,
                                 scoring=mse,
                                 n_jobs=-1,
                                 verbose=10)

    svr_long_grid.fit(train_sequences, training_longitudes)

    print(svr_long_grid.best_params_)
Ejemplo n.º 3
0
def test_svr():
    """
    Test Support Vector Regression
    """

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4,
                          C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear', C=10.)):
        clf.fit(diabetes.data, diabetes.target)
        assert_greater(clf.score(diabetes.data, diabetes.target), 0.02)
Ejemplo n.º 4
0
def test_SVR():
    """
    Test Support Vector Regression
    """

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4),
                svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear', C=10.),
                svm.sparse.NuSVR(kernel='linear', nu=.4),
                svm.sparse.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.sparse.SVR(kernel='linear', C=10.)):
        clf.fit(diabetes.data, diabetes.target)
        assert clf.score(diabetes.data, diabetes.target) > 0.02
Ejemplo n.º 5
0
def test_svr():
    # Test Support Vector Regression

    diabetes = datasets.load_diabetes()
    for clf in (svm.NuSVR(kernel='linear', nu=.4,
                          C=1.0), svm.NuSVR(kernel='linear', nu=.4, C=10.),
                svm.SVR(kernel='linear',
                        C=10.), svm.LinearSVR(C=10.), svm.LinearSVR(C=10.)):
        clf.fit(diabetes.data, diabetes.target)
        assert clf.score(diabetes.data, diabetes.target) > 0.02

    # non-regression test; previously, BaseLibSVM would check that
    # len(np.unique(y)) < 2, which must only be done for SVC
    svm.SVR().fit(diabetes.data, np.ones(len(diabetes.data)))
    svm.LinearSVR().fit(diabetes.data, np.ones(len(diabetes.data)))
Ejemplo n.º 6
0
def trainFixed():
    '''
	train a machine learner based on data from some fixed parameter point.
	save to fixed.pkl
	'''
    print "Entering train fixed"
    trainAndTarget = np.loadtxt('traindata.dat')
    traindata = trainAndTarget[:, 0:2]
    targetdata = trainAndTarget[:, 2]

    massPoints = np.unique(traindata[:, 1])
    chunk = len(traindata) / len(massPoints) / 2
    shift = len(traindata) / 2

    #plot for fixed mu=0 training
    print "training fixed"
    clf = svm.NuSVR()
    reducedtrain = np.concatenate(
        (traindata[4 * chunk:5 * chunk,
                   0], traindata[4 * chunk + shift:5 * chunk + shift, 0]))
    reducedtarget = np.concatenate(
        (targetdata[4 * chunk:5 * chunk],
         targetdata[4 * chunk + shift:5 * chunk + shift]))

    clf.fit(reducedtrain.reshape((len(reducedtrain), 1)), reducedtarget)
    joblib.dump(clf, 'fixed.pkl')
Ejemplo n.º 7
0
def wardCV(data, labels, cut_level, connect):
    '''calculate cross-validated amount of ward-clusters'''
    #loop for list
    accuracies = np.zeros(len(cut_level))
    for i in cut_level:
        #reduce to set amount of clusters
        agglo = sklcl.WardAgglomeration(connectivity=connect, n_clusters=i)
        cross = sklcv.KFold(n=len(labels), n_folds=len(labels))
        pred_vec = np.zeros_like(labels)
        for train_i, test_i in cross:
            use_train = agglo.fit_transform(data[train_i])
            use_test = agglo.transform(data[test_i])

            scaler = sklpre.StandardScaler()
            use_train = scaler.fit_transform(use_train)
            use_test = scaler.transform(use_test)

            model = sklsvm.NuSVR(kernel='linear', nu=1, C=100)
            model.fit(use_train, labels[train_i])
            pr = model.predict(use_test)
            pred_vec[test_i] = pr
        #save accuracy
        accuracies[cut_level == i], _ = ss.spearmanr(pred_vec, labels)
    #based on loo-accuracy, select the optimal number of features
    #TODO -smooth this?
    accuracies = ssig.medfilt(accuracies)
    best_model = cut_level[accuracies.argmax()]
    return best_model
    def _estimate_model(self):
        """Estimates SVR model.

        Returns
        -------
        model : sklearn LinearSVR or SVR model or grid search cv object
            Fitted object.
        """
        if self.kernel == 'linear':
            self.underlying = svm.LinearSVR(**self.kwargs)
        else:
            if self.type == 'eps':
                self.underlying = svm.SVR(kernel=self.kernel, **self.kwargs)
            elif self.type == 'nu':
                self.underlying = svm.NuSVR(kernel=self.kernel, **self.kwargs)
            else:
                raise NotImplementedError(
                    'Type not implemented. Choices are eps or nu.')
        if self.cv_folds is not None:
            model = model_selection.GridSearchCV(self.underlying,
                                                 self.parameters,
                                                 cv=self.cv_folds,
                                                 scoring=self.score)
        else:
            model = self.underlying
        model.fit(self.x_train, self.y_train)
        return model
Ejemplo n.º 9
0
def test_c_samples_scaling():
    """Test C scaling by n_samples
    """
    X = iris.data[iris.target != 2]
    y = iris.target[iris.target != 2]
    X2 = np.r_[X, X]
    y2 = np.r_[y, y]

    clfs = [
        svm.SVC(tol=1e-6, kernel='linear', C=0.1),
        svm.SVR(tol=1e-6, kernel='linear', C=100),
        svm.LinearSVC(tol=1e-6, C=0.1),
        linear_model.LogisticRegression(penalty='l1', tol=1e-6, C=100),
        linear_model.LogisticRegression(penalty='l2', tol=1e-6),
        svm.NuSVR(tol=1e-6, kernel='linear')
    ]

    for clf in clfs:
        clf.set_params(scale_C=False)
        coef_ = clf.fit(X, y).coef_
        coef2_ = clf.fit(X2, y2).coef_
        error_no_scale = linalg.norm(coef2_ - coef_) / linalg.norm(coef_)
        assert_true(error_no_scale > 1e-3)

        clf.set_params(scale_C=True)
        coef_ = clf.fit(X, y).coef_
        coef2_ = clf.fit(X2, y2).coef_
        error_with_scale = linalg.norm(coef2_ - coef_) / linalg.norm(coef_)
        assert_true(error_with_scale < 1e-5)
Ejemplo n.º 10
0
def multioutput_model():
    # labels for multioutput model
    train_labels = np.zeros((training_latitudes.shape[0], 2))
    train_labels[:, 0] = np.array(training_latitudes)
    train_labels[:, 1] = np.array(training_longitudes)

    #labels for multioutput models
    val_labels = np.zeros((val_latitudes.shape[0], 2))
    val_labels[:, 0] = np.array(val_latitudes)
    val_labels[:, 1] = np.array(val_longitudes)

    # multi output model
    from sklearn.multioutput import MultiOutputRegressor
    from sklearn.linear_model import BayesianRidge
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.kernel_ridge import KernelRidge

    # define and fit
    multi_output = MultiOutputRegressor(svm.NuSVR(), n_jobs=-1)
    multi_output.fit(train_sequences, train_labels)

    # get error
    from sklearn import metrics
    predictions = multi_output.predict(val_sequences)
    mse_1 = metrics.mean_squared_error(val_labels[:, 0], predictions[:, 0])
    mse_2 = metrics.mean_squared_error(val_labels[:, 1], predictions[:, 1])
    print(mse_1)
    print(mse_2)

    return multi_output
Ejemplo n.º 11
0
def main(args):
    train_file = os.path.join(args.data_dir, 'train.csv')
    train_df = pd.read_csv(train_file)
    train_df = clean_data(train_df)
    print(train_df.info())

    feature_columns = [
        'MSSubClass', 'LotArea', 'LotArea', 'OverallQual', 'OverallCond',
        'YearBuilt'
    ]

    features = train_df[feature_columns].values
    targets = train_df['SalePrice'].values

    clf = svm.NuSVR()
    clf.fit(features, targets)

    test_file = os.path.join(args.data_dir, 'test.csv')
    test_df = pd.read_csv(test_file)
    test_df = clean_data(test_df)
    print(test_df.info())

    features = test_df[feature_columns].values
    predicts = clf.predict(features)
    ids = test_df['Id'].values

    with open('/tmp/kaggle_submit.csv', 'w') as fileobj:
        writer = csv.writer(fileobj)
        writer.writerow(['Id', 'SalePrice'])
        for id, price in zip(ids, predicts):
            writer.writerow([id, price])
Ejemplo n.º 12
0
def main():
    id2year2stats = load_files(
        {year: 'fant%d.csv' % year
         for year in xrange(2008, 2013)}, SPECIAL_CASE_TRADES)

    def id_to_useful_name(id):
        year2stats = id2year2stats[id]
        any_year = year2stats[year2stats.keys()[0]]
        return (any_year['Name'], any_year['Tm'], any_year['FantasyFantPos'])

    current_players = set(id for id in id2year2stats
                          if BASE_YEAR - 1 in id2year2stats[id])

    matrix, identifiers, features = construct_feature_matrix(id2year2stats)
    id2name = {
        ident[ID]: id_to_useful_name(ident[ID])
        for ident in identifiers
    }

    from sklearn import linear_model
    from sklearn import ensemble
    from sklearn import svm

    seed = randint(0, 2**32 - 1)
    for model in [
            linear_model.LinearRegression(),
            linear_model.Ridge(),
            ensemble.RandomForestRegressor(),
            ensemble.ExtraTreesRegressor(),
            ensemble.AdaBoostRegressor(),
            ensemble.GradientBoostingRegressor(),
            svm.SVR(),
            svm.NuSVR(),
    ]:
        print str(model).split('(')[0]
        cross_validate(matrix,
                       identifiers,
                       features,
                       id2name,
                       model,
                       n_folds=10,
                       seed=seed)
        print

    model = ensemble.RandomForestRegressor()
    current_predictions, current_ids = \
        predict_current_year(matrix, identifiers, features, id2name, model)

    current_predictions, current_ids = zip(
        *[(pred, ident)
          for pred, ident in zip(current_predictions, current_ids)
          if ident[ID] in current_players])

    current_predicted_ranks = position_ranking_lists(current_ids,
                                                     current_predictions,
                                                     id2name)

    dump_predictions(current_predicted_ranks)

    return
Ejemplo n.º 13
0
def test_sk_NuSVR():
    print("Testing sklearn, NuSVR...")
    mod = svm.NuSVR()
    X, y = iris_data
    mod.fit(X, y)
    docs = {'name': "NuSVR test"}
    fv = X[0, :]
    upload(mod, fv, docs)
Ejemplo n.º 14
0
def test_unfitted():
    X = "foo!"  # input validation not required when SVM not fitted

    clf = svm.SVC()
    with pytest.raises(Exception, match=r".*\bSVC\b.*\bnot\b.*\bfitted\b"):
        clf.predict(X)

    clf = svm.NuSVR()
    with pytest.raises(Exception, match=r".*\bNuSVR\b.*\bnot\b.*\bfitted\b"):
        clf.predict(X)
Ejemplo n.º 15
0
 def __init__(self, provider):
     self.provider = provider
     self.mult = self.provider.multiplier
     input = []
     target = []
     for d in self.provider.getLearnData():
         input.append(d[0])
         target.append(d[1][0] / self.mult)
     self.regressor = svm.NuSVR()
     self.regressor.fit(input, target)
Ejemplo n.º 16
0
def test_unfitted():
    X = "foo!"  # input validation not required when SVM not fitted

    clf = svm.SVC(gamma="scale")
    assert_raises_regexp(Exception, r".*\bSVC\b.*\bnot\b.*\bfitted\b",
                         clf.predict, X)

    clf = svm.NuSVR(gamma='scale')
    assert_raises_regexp(Exception, r".*\bNuSVR\b.*\bnot\b.*\bfitted\b",
                         clf.predict, X)
Ejemplo n.º 17
0
def validation(data, target, constant):
    score = 0

    regressor = svm.NuSVR(kernel="poly")

    param_grid = {
        'C': np.linspace(20.0, 40.0, 10),
        'nu': np.linspace(0.0001, 1, 5)
    }

    grid_search = sklearn.grid_search.GridSearchCV(
        regressor,
        param_grid,
        scoring=sklearn.metrics.make_scorer(sklearn.metrics.mean_squared_error,
                                            greater_is_better=False),
        cv=5,
        n_jobs=-1)
    grid_search.fit(data, target)
    clf = grid_search.best_estimator_
    print(clf)

    chunk_size = len(data) / CVSize
    for x in range(CVSize):

        # These describe where to cut to get our crossdat
        first_step = x * chunk_size
        second_step = (x + 1) * chunk_size

        # Get the data parts we train on
        cross_data = np.vstack((data[:first_step], data[second_step:]))
        cross_target = np.append(target[:first_step], target[second_step:])

        # fit and save the coef
        clf.fit(cross_data, cross_target)

        # Find mean squared error and print it
        sample_data = data[first_step:second_step]
        sample_target = target[first_step:second_step]

        # Get scores for our model
        pred = clf.predict(sample_data)
        RMSE = mean_squared_error(sample_target, pred)**0.5
        score += RMSE

    score = score / CVSize

    print("Cross-Validation RMSE: {} ".format(score))

    # Get global score
    clf.fit(data, target)
    pred = clf.predict(data)
    RMSE = mean_squared_error(target, pred)**0.5
    print("RMSE on whole dataset {}".format(RMSE))

    return score
Ejemplo n.º 18
0
def test_svr_coef_sign():
    # Test that SVR(kernel="linear") has coef_ with the right sign.
    # Non-regression test for #2933.
    X = np.random.RandomState(21).randn(10, 3)
    y = np.random.RandomState(12).randn(10)

    for svr in [svm.SVR(kernel='linear'), svm.NuSVR(kernel='linear'),
                svm.LinearSVR()]:
        svr.fit(X, y)
        assert_array_almost_equal(svr.predict(X),
                                  np.dot(X, svr.coef_.ravel()) + svr.intercept_)
Ejemplo n.º 19
0
def SelectModel(regressor):

    if (regressor == 'svr'):
        model = svm.SVR()
    elif (regressor == 'nusvr'):
        model = svm.NuSVR()
    elif (regressor == 'linear'):
        model = LinearRegression()
    elif (regressor == 'RF'):
        model = RandomForestRegressor(n_estimators=1500, n_jobs=-1)

    return model
Ejemplo n.º 20
0
def manualGridSearch(train, test, dict_names, y, X, Y, tuning_parameters):
    gridSearch = defaultdict(dict)
    for i in range(len(X)):
        nX = X[i]
        nY = Y[i]
        for k in tuning_parameters['kernel']:
            d = 3
            print(k)
            listK = []
            if k =='poly':
                d = int(k[-1])
                k = 'poly'
            for g in tuning_parameters['gamma']:
                print("   "+str(g))
                col = []
                if 'poly' in k and type(g) != float:
                    for c in tuning_parameters['C']:
                        for name in dict_names:
                            col.append(0)
                else:
                    for c in tuning_parameters['C']:
                        if 'poly' in k:
                            k = 'poly'
                        print("      "+str(c))
                        for name in dict_names:
                            print("      "+name)
                            X_train, X_test, y_train, y_test = getTrainTest(train, test, name, nX, nY, y)
                            if y==1 or y==2:
                                clf = svm.NuSVR(kernel=k, C=c, gamma=g, degree=d)
                                clf.fit(X_train, y_train)
                                y_pred = clf.predict(X_test)
                                y_pred = y_pred.astype(np.float)
                                y_test = y_test.astype(np.float)
                                rmse = np.sqrt((np.square(y_pred - y_test)).mean())
                                col.append(rmse)
                            elif y==0 or y==3:
                                clf = svm.SVC(kernel=k, C=c, gamma=g, degree=d)
                                clf.fit(X_train, y_train)
                                y_pred = clf.predict(X_test)
                                y_pred = y_pred.astype(np.int)
                                y_test = y_test.astype(np.int)
                                precision = metrics.precision_score(y_true=y_test,y_pred=y_pred,pos_label=0)
                                col.append(precision)
                listK.append(col)
            A = np.column_stack(listK)
            row_names = len(tuning_parameters['C'])*dict_names
            col_names = tuning_parameters['gamma']
            A = pd.DataFrame(A)
            A.index = row_names
            A.columns = col_names
            gridSearch[k][nX] = A
    return gridSearch
Ejemplo n.º 21
0
def test_immutable_coef_property():
    # Check that primal coef modification are not silently ignored
    svms = [
        svm.SVC(kernel='linear').fit(iris.data, iris.target),
        svm.NuSVC(kernel='linear').fit(iris.data, iris.target),
        svm.SVR(kernel='linear').fit(iris.data, iris.target),
        svm.NuSVR(kernel='linear').fit(iris.data, iris.target),
        svm.OneClassSVM(kernel='linear').fit(iris.data),
    ]
    for clf in svms:
        assert_raises(AttributeError, clf.__setattr__, 'coef_', np.arange(3))
        assert_raises((RuntimeError, ValueError),
                      clf.coef_.__setitem__, (0, 0), 0)
Ejemplo n.º 22
0
def test_immutable_coef_property():
    # Check that primal coef modification are not silently ignored
    svms = [
        svm.SVC(kernel="linear").fit(iris.data, iris.target),
        svm.NuSVC(kernel="linear").fit(iris.data, iris.target),
        svm.SVR(kernel="linear").fit(iris.data, iris.target),
        svm.NuSVR(kernel="linear").fit(iris.data, iris.target),
        svm.OneClassSVM(kernel="linear").fit(iris.data),
    ]
    for clf in svms:
        with pytest.raises(AttributeError):
            clf.__setattr__("coef_", np.arange(3))
        with pytest.raises((RuntimeError, ValueError)):
            clf.coef_.__setitem__((0, 0), 0)
Ejemplo n.º 23
0
def nu_svr_example():
    n_samples, n_features = 10, 5
    np.random.seed(0)
    X, Y = np.random.randn(n_samples, n_features), np.random.randn(n_samples)
    #iris = datasets.load_iris()
    #X, Y = iris.data, iris.target

    regressor = svm.NuSVR(nu=0.5, kernel='rbf', degree=3, max_iter=-1)
    regressor.fit(X, Y)

    X_test = np.random.randn(5, n_features)
    #X_test = X
    print('Prediction =', regressor.predict(X_test))
    print('Score =', regressor.score(X, Y))
def SelectModel(regressor):

    if(regressor == 'svr'):
        model = svm.SVR()
    elif (regressor == 'nusvr'):
        model = svm.NuSVR()
    elif (regressor == 'Gausian'):
        model = GaussianProcess(corr='cubic', theta0=1e-2, thetaL=1e-4, thetaU=1e-1,random_start=100)
    elif (regressor == 'Nearest_Neighbors_uniform'):
        model = neighbors.KNeighborsRegressor(n_neighbors, weights='uniform')
    elif (regressor == 'Nearest_Neighbors_distance'):
        model = neighbors.KNeighborsRegressor(n_neighbors, weights='distance')
    
    return model
Ejemplo n.º 25
0
def normalCV_NuSVR_cpu(X, Y, n_folds, c, kernel):
    svc = svm.NuSVR(kernel=kernel, C=c, verbose=0, max_iter=100000)
    kf = KFold(n_splits=n_folds, random_state=None)

    array_preds = np.zeros((len(Y),))
    list_trues = np.zeros((len(Y),))

    for train_index, test_index in kf.split(X=X):
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        svc.fit(x_train, y_train)
        pred = svc.predict(x_test)
        array_preds[test_index] = pred
        list_trues[test_index] = y_test

    return array_preds, list_trues
Ejemplo n.º 26
0
def loocv_NuSVR_cpu(X, Y, c, kernel):
    svc = svm.NuSVR(kernel=kernel, C=c, verbose=0, max_iter=100000)
    loo = LeaveOneOut()

    array_preds = np.zeros((len(Y),))
    list_trues = np.zeros((len(Y),))

    for train_index, test_index in loo.split(X=X):
        x_train, x_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
        svc.fit(x_train, y_train)
        pred = svc.predict(x_test)
        array_preds[test_index] = pred
        list_trues[test_index] = y_test

    return array_preds, list_trues
Ejemplo n.º 27
0
def trainAdaptive():
    '''
	train a machine learner on parametrized data examples.
	save to adaptive.pkl
	'''
    print "Entering train adaptive"
    trainAndTarget = np.loadtxt('traindata.dat')
    traindata = trainAndTarget[:, 0:2]
    targetdata = trainAndTarget[:, 2]

    massPoints = np.unique(traindata[:, 1])
    chunk = len(traindata) / len(massPoints) / 2
    shift = len(traindata) / 2

    print "training adaptive"
    clf = svm.NuSVR()
    clf.fit(traindata, targetdata)
    joblib.dump(clf, 'adaptive.pkl')
Ejemplo n.º 28
0
def model_svm(s, t, s_, t_, flagLinear):
    # bad r2
    if flagLinear == 0:
        #http://scikit-learn.org/stable/modules/generated/sklearn.svm.NuSVR.html#sklearn.svm.NuSVR
        clf = sksvm.NuSVR(nu=0.5, C=1.0, kernel='rbf', degree=5, gamma='auto', coef0=0.0, shrinking=True, \
                    tol=0.001, cache_size=200, verbose=False, max_iter=1000)
        clf.fit(s, t)
    else:
        # http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVR.html#sklearn.svm.LinearSVR
        # this loss function is L1, thus insensitive to outliers
        clf = sksvm.LinearSVR(epsilon=0.0, tol=0.0001, C=1.0, loss='epsilon_insensitive', \
                              fit_intercept=True, intercept_scaling=1.0, dual=True, verbose=0, \
                              random_state=None, max_iter=1000)
        clf.fit(s, t)
        print 'coeffs = ', clf.coef_, '  intercept = ', clf.intercept_
    r2_train = clf.score(s, t)
    r2_test = clf.score(s_, t_)
    print 'r2_train=', r2_train, ' r2_test=', r2_test
Ejemplo n.º 29
0
def test_immutable_coef_property():
    """Check that primal coef modification are not silently ignored"""
    svms = [
        svm.SVC(kernel='linear').fit(iris.data, iris.target),
        svm.NuSVC(kernel='linear').fit(iris.data, iris.target),
        svm.SVR(kernel='linear').fit(iris.data, iris.target),
        svm.NuSVR(kernel='linear').fit(iris.data, iris.target),
        svm.OneClassSVM(kernel='linear').fit(iris.data),
        svm.sparse.SVC(kernel='linear').fit(iris.data, iris.target),
        svm.sparse.NuSVC(kernel='linear').fit(iris.data, iris.target),
        svm.sparse.SVR(kernel='linear').fit(iris.data, iris.target),
        svm.sparse.NuSVR(kernel='linear').fit(iris.data, iris.target),
        svm.LinearSVC().fit(iris.data, iris.target),
        linear_model.LogisticRegression().fit(iris.data, iris.target),
    ]
    for clf in svms:
        assert_raises(AttributeError, clf.__setattr__, 'coef_', np.arange(3))
        assert_raises(RuntimeError, clf.coef_.__setitem__, (0, 0), 0)
Ejemplo n.º 30
0
    def svrmodel(self, testlen, ntrain, kernel='linear', batch=10000):
        hsmadata = self.hsmadata
        dates = pd.Series(hsmadata['date'].unique()).sort_values()
        dates.index = range(0, len(dates))
        ntest = len(dates) // testlen

        hsma = pd.DataFrame()
        for i in range(ntrain, ntest):
            traindata = hsmadata[
                (hsmadata['date'] >= dates[(i - ntrain) * testlen])
                & (hsmadata['date'] < dates[i * testlen - self.day])].copy()
            testdata = hsmadata[(hsmadata['date'] >= dates[i * testlen]) & (
                hsmadata['date'] < dates[(i + 1) * testlen])].copy()
            traindata.index = range(0, traindata.shape[0])
            testdata['predratio'] = 0

            traindata = traindata.iloc[:, 2:]
            traindatax = traindata.drop(['closeratio'], 1)
            traindatay = traindata['closeratio']
            testdatax = testdata[traindatax.columns]

            scaler = preprocessing.StandardScaler().fit(traindatax)
            traindatas = scaler.transform(traindatax)
            testdatas = scaler.transform(testdatax)

            n1 = traindatas.shape[0]
            nbatch = n1 // batch
            for j in range(0, nbatch):
                traindataxb = pd.DataFrame(
                    traindatas).ix[range(j, n1, nbatch), ]
                traindatayb = traindata.ix[range(j, n1, nbatch), 'closeratio']
                svrmodel = svm.NuSVR(kernel=kernel)
                svrmodel.fit(traindataxb, traindatayb)
                testdata['predratio'] = testdata[
                    'predratio'] + svrmodel.predict(testdatas)

            testdata['predratio'] = testdata['predratio'] / nbatch

            hsma = pd.concat([hsma, testdata], ignore_index=True)

        return (hsma)