predict_out = int(math.ceil(0.01 * len(data)))
data['label'] = data[predict_col].shift(-predict_out)

#%% splitting the data into Features X and labels y

data.dropna(inplace=True)
X = np.array(data.drop(['label'], 1))
y = np.array(data['label'])
X = preprocessing.scale(X)
y = np.array(data['label'])
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2)

#%% training and trying different kernerls for SVM model
for k in ['linear', 'poly', 'rbf', 'sigmoid']:
    model = svm.SVR(kernel=k, gamma='scale')
    model.fit(X_train, y_train)
    accuracy = model.score(X_test, y_test)
    print(k, accuracy)

#%% training linearregression model
model = LinearRegression()
model.fit(X_train, y_train)
accuracy = model.score(X_test, y_test)
print('accuracy ', accuracy)

#%% chosing the svm linear kernel for best accuracy

model = svm.SVR(kernel='linear', gamma='scale')
model.fit(X_train, y_train)
accuracy = model.score(X_test, y_test)
Example #2
0
def modelSVM(X_train, y_train):
    model = svm.SVR(kernel="poly")
    model.fit(X_train, y_train)
    return model
Example #3
0
def getData():

    q.put("starting data query...")

    lat1 = str(request.args.get('lat1'))
    lng1 = str(request.args.get('lng1'))
    lat2 = str(request.args.get('lat2'))
    lng2 = str(request.args.get('lng2'))

    w = float(request.args.get('w'))
    h = float(request.args.get('h'))
    cell_size = float(request.args.get('cell_size'))

    analysis = request.args.get('analysis')
    heatmap = request.args.get('heatmap')
    spread = request.args.get('spread')
    if spread == "":
        spread = 12
    else:
        try:
            spread = int(spread)
        except:

            spread = 12
    #CAPTURE ANY ADDITIONAL ARGUMENTS SENT FROM THE CLIENT HERE

    engine = create_engine(
        'sqlite:////var/www/mywebsite/mywebsite/database/datamining.db')
    Base.metadata.bind = engine

    DBSession = sessionmaker(bind=engine)
    session = DBSession()
    records = session.query(RealEstate).filter(
        RealEstate.latitude > lat1, RealEstate.latitude < lat2,
        RealEstate.longitude > lng1, RealEstate.longitude < lng2).all()

    #USE INFORMATION RECEIVED FROM CLIENT TO CONTROL
    #HOW MANY RECORDS ARE CONSIDERED IN THE ANALYSIS
    if heatmap == "true":
        random.shuffle(records)
        records = records[:100]
    if analysis == "true":
        random.shuffle(records)
        records = records[:80]
    numListings = len(records)

    # iterate through data to find minimum and maximum price
    minPrice = 1000000000
    maxPrice = 0

    for record in records:
        price = record.price

        if price > maxPrice:
            maxPrice = price
        if price < minPrice:
            minPrice = price

    output = {"type": "FeatureCollection", "features": []}

    for record in records:
        feature = {
            "type": "Feature",
            "properties": {},
            "geometry": {
                "type": "Point"
            }
        }
        feature["id"] = record.id
        feature["properties"]["name"] = record.title
        feature["properties"]["price"] = record.price
        feature["properties"]["priceNorm"] = remap(record.price, minPrice,
                                                   maxPrice, 0, 1)
        feature["geometry"]["coordinates"] = [
            record.latitude, record.longitude
        ]

        output["features"].append(feature)

    if heatmap == "false":
        if analysis == "false":
            q.put('idle')
            return json.dumps(output)

    output["analysis"] = []

    numW = int(math.floor(w / cell_size))
    numH = int(math.floor(h / cell_size))

    grid = []

    for j in range(numH):
        grid.append([])
        for i in range(numW):
            grid[j].append(0)

    #USE CONDITIONAL ALONG WITH UI INFORMATION RECEIVED FROM THE CLIENT TO SWITCH
    #BETWEEN HEAT MAP AND INTERPOLATION ANALYSIS
    if heatmap == "true":
        ## HEAT MAP IMPLEMENTATION
        q.put('starting heatmap analysis...')
        for record in records:

            pos_x = int(remap(record.longitude, lng1, lng2, 0, numW))
            pos_y = int(remap(record.latitude, lat1, lat2, numH, 0))

            #USE INFORMATION RECEIVED FROM CLIENT TO CONTROL SPREAD OF HEAT MAP
            #spread = 12
            if ((spread > 0) and (spread < 20)):
                spread = spread
            else:
                spread = 12
                print "spread = defult value"

            for j in range(max(0, (pos_y - spread)),
                           min(numH, (pos_y + spread))):
                for i in range(max(0, (pos_x - spread)),
                               min(numW, (pos_x + spread))):
                    grid[j][i] += 2 * math.exp(
                        (-point_distance(i, j, pos_x, pos_y)**2) /
                        (2 * (spread / 2)**2))
        grid = normalizeArray(grid)

        offsetLeft = (w - numW * cell_size) / 2.0
        offsetTop = (h - numH * cell_size) / 2.0

        for j in range(numH):
            for i in range(numW):
                newItem = {}

                newItem['x'] = offsetLeft + i * cell_size
                newItem['y'] = offsetTop + j * cell_size
                newItem['width'] = cell_size - 1
                newItem['height'] = cell_size - 1
                newItem['value'] = grid[j][i]

                output["analysis"].append(newItem)
        if analysis == "false":
            q.put('idle')
        if analysis == "true":
            q.put('cannot run both, run as heatmap')
        return json.dumps(output)

    ## MACHINE LEARNING IMPLEMENTATION
    if ((heatmap == "false") and (analysis == "true")):
        q.put('starting interpolation analysis...')
        featureData = []
        targetData = []

        for record in records:
            featureData.append([record.latitude, record.longitude])
            targetData.append(record.price)

        X = np.asarray(featureData, dtype='float')
        y = np.asarray(targetData, dtype='float')

        breakpoint = int(numListings * .7)

        # create training and validation set
        X_train = X[:breakpoint]
        X_val = X[breakpoint:]

        y_train = y[:breakpoint]
        y_val = y[breakpoint:]

        #mean 0, variance 1
        scaler = preprocessing.StandardScaler().fit(X_train)
        X_train_scaled = scaler.transform(X_train)

        mse_min = 10000000000000000000000

        for C in [.01, 1, 100, 10000, 1000000]:

            for e in [.01, 1, 100, 10000, 1000000]:

                for g in [.01, 1, 100, 10000, 1000000]:

                    q.put("training model: C[" + str(C) + "], e[" + str(e) +
                          "], g[" + str(g) + "]")

                    model = svm.SVR(C=C,
                                    epsilon=e,
                                    gamma=g,
                                    kernel='rbf',
                                    cache_size=2000)
                    model.fit(X_train_scaled, y_train)

                    y_val_p = [model.predict(i) for i in X_val]

                    mse = 0
                    for i in range(len(y_val_p)):
                        mse += (y_val_p[i] - y_val[i])**2
                    mse /= len(y_val_p)

                    if mse < mse_min:
                        mse_min = mse
                        model_best = model
                        C_best = C
                        e_best = e
                        g_best = g

        q.put("best model: C[" + str(C_best) + "], e[" + str(e_best) +
              "], g[" + str(g_best) + "]")

        for j in range(numH):
            for i in range(numW):
                lat = remap(j, numH, 0, lat1, lat2)
                lng = remap(i, 0, numW, lng1, lng2)

                testData = [[lat, lng]]
                X_test = np.asarray(testData, dtype='float')
                X_test_scaled = scaler.transform(X_test)
                grid[j][i] = model_best.predict(X_test_scaled)
        grid = normalizeArray(grid)

        offsetLeft = (w - numW * cell_size) / 2.0
        offsetTop = (h - numH * cell_size) / 2.0

        for j in range(numH):
            for i in range(numW):
                newItem = {}

                newItem['x'] = offsetLeft + i * cell_size
                newItem['y'] = offsetTop + j * cell_size
                newItem['width'] = cell_size - 1
                newItem['height'] = cell_size - 1
                newItem['value'] = grid[j][i]

                output["analysis"].append(newItem)

        q.put('idle')

        return json.dumps(output)
Example #4
0
from sklearn import linear_model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import svm
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import RandomForestRegressor

algorithm_name = [
    'Regression(Lasso)', 'KNN', 'Decision Tree', 'SVM(Linear)', 'AdaBoost',
    'Random Forest'
]
algorithm = []
algorithm.append(linear_model.Lasso(alpha=0.00114115))
algorithm.append(KNeighborsRegressor(n_neighbors=31))
algorithm.append(DecisionTreeRegressor(max_depth=4))
algorithm.append(svm.SVR(kernel='linear', C=0.0918484848484))
algorithm.append(AdaBoostRegressor(n_estimators=10))
algorithm.append(RandomForestRegressor(n_estimators=13, max_depth=4))

for i in range(len(algorithm_name)):
    kfold = KFold(n_splits=5, shuffle=False)
    index = kfold.split(X=x, y=y)
    for train_index, val_index in index:
        starttime = datetime.datetime.now()
        algorithm[i].fit(x[train_index], y[train_index])  # train
        y_pred = algorithm[i].predict(x[val_index])  # predict
        accuracy1 = r2_score(list(y_pred), list(y[val_index]))
        y_pred = algorithm[i].predict(x_test)  # predict
        accuracy = r2_score(list(y_pred), list(y_test))
        endtime = datetime.datetime.now()
        time = (endtime - starttime).microseconds
target_train = target[:int(.9 * n_samples)]
data_test = data[int(.9 * n_samples):]
target_test = target[int(.9 * n_samples):]

# classfication scores
print('# Classification scores:')
print('KNN: %f' % neighbors.KNeighborsClassifier().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.ElasticNet: %f' % linear_model.ElasticNet().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.ElasticNetCV: %f' % linear_model.ElasticNetCV().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.Lars: %f' % linear_model.Lars().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.Lasso: %f' % linear_model.Lasso().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LassoCV: %f' % linear_model.LassoCV().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LassoLars: %f' % linear_model.LassoLars().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LassoLarsIC: %f' % linear_model.LassoLarsIC().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LinearRegression: %f' % linear_model.LinearRegression().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.LogisticRegression: %f' % linear_model.LogisticRegression().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.OrthogonalMatchingPursuit: %f' % linear_model.OrthogonalMatchingPursuit().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.PassiveAggressiveClassifier: %f' % linear_model.PassiveAggressiveClassifier().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.PassiveAggressiveRegressor: %f' % linear_model.PassiveAggressiveRegressor().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.Perceptron: %f' % linear_model.Perceptron().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.Ridge: %f' % linear_model.Ridge().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.RidgeClassifier: %f' % linear_model.RidgeClassifier().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.RidgeClassifierCV: %f' % linear_model.RidgeClassifierCV().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.RidgeCV: %f' % linear_model.RidgeCV().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.SGDClassifier: %f' % linear_model.SGDClassifier().fit(data_train, target_train).score(data_test, target_test))
print('linear_model.SGDRegressor: %f' % linear_model.SGDRegressor().fit(data_train, target_train).score(data_test, target_test))
print('naive_bayes.MultinomialNB: %f' % naive_bayes.MultinomialNB().fit(data_train, target_train).score(data_test, target_test))
print('lda.LDA: %f' % lda.LDA().fit(data_train, target_train).score(data_test, target_test))
print('svm.SVR: %f' % svm.SVR().fit(data_train, target_train).score(data_test, target_test))
print('svm.SVC: %f' % svm.SVC(kernel='linear').fit(data_train, target_train).score(data_test, target_test))
print('svm.LinearSVC: %f' % svm.LinearSVC().fit(data_train, target_train).score(data_test, target_test))
Example #6
0
import numpy as np
from sklearn import svm
import matplotlib.pyplot as plt


if __name__ == "__main__":
    N = 50
    np.random.seed(0)
    x = np.sort(np.random.uniform(0, 6, N), axis=0)#uniform() 方法将随机生成下一个实数,它在 [x, y] 范围内。
    y = 2*np.sin(x) + 0.1*np.random.randn(N)
    x = x.reshape(-1, 1)
    print('x =\n', x)
    print('y =\n', y)

    print('SVR - RBF')
    svr_rbf = svm.SVR(kernel='rbf', gamma=0.2, C=100)
    svr_rbf.fit(x, y)
    print('SVR - Linear')
    svr_linear = svm.SVR(kernel='linear', C=100)
    svr_linear.fit(x, y)
    print('SVR - Polynomial')
    svr_poly = svm.SVR(kernel='poly', degree=3, C=100)
    svr_poly.fit(x, y)
    print('Fit OK.')

    # 思考:系数1.1改成1.5
    x_test = np.linspace(x.min(), 1.1*x.max(), 100).reshape(-1, 1)
    y_rbf = svr_rbf.predict(x_test)
    y_linear = svr_linear.predict(x_test)
    y_poly = svr_poly.predict(x_test)
Example #7
0
def svrPredictions(xTrain,yTrain,xTest,k):
    clf = svm.SVR(C=2.0,kernel=k)
    clf.fit(xTrain,yTrain)
    return clf.predict(xTest)
Example #8
0
    'epsilon': (
        1e-2,
        1e-1,
        1e0,
        1e1,
    ),
    'coef0': (
        0.0,
        0.1,
        0.2,
    ),
}]

# Exhaustive search over specified parameter values for the estimator
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
est = svm.SVR()
gs = GridSearchCV(est,
                  cv=10,
                  param_grid=hyper_params,
                  verbose=2,
                  n_jobs=n_jobs,
                  scoring='r2',
                  refit=True,
                  pre_dispatch='n_jobs',
                  error_score=np.nan,
                  return_train_score=True)

t0 = time.time()
gs.fit(x_train, y_train)
runtime = time.time() - t0
print("Training time: %.6f s" % runtime)
Example #9
0
def baggingMySVM(trainX,
                 trainY,
                 train_prediction_start,
                 testX,
                 testY,
                 test_prediction_start,
                 look_ahead,
                 bag_size=47,
                 Nestimators=50,
                 samp_size=0.95,
                 sampleModels=50,
                 kernel='sigmoid'):
    cRange = scipy.stats.expon(scale=5)
    #gammaRange = scipy.stats.expon(scale=0.1)
    #parameter_dist = {'C': cRange, 'gamma': gammaRange}
    parameter_dist = {'C': cRange}
    clf = RandomizedSearchCV(estimator=svm.SVR(kernel=kernel),
                             param_distributions=parameter_dist,
                             n_iter=50,
                             cv=10,
                             n_jobs=-1)
    clf.fit(trainX, trainY)
    print('Best C:', clf.best_estimator_.C)
    #print('Best Gamma:', clf.best_estimator_.gamma)

    svr = BaggingRegressor(svm.SVR(kernel=kernel, C=clf.best_estimator_.C),
                           n_estimators=Nestimators,
                           max_samples=samp_size,
                           bootstrap=False,
                           random_state=123)
    svr = svr.fit(trainX, trainY)

    colnames = ['dtStart']
    cln = [i for i in range(1, Nestimators * 2 + 3, 1)]
    colnames.extend(cln)
    # (date, trainY, true_lab, pred_labs...)
    trainRs = np.zeros((trainX.shape[0], sampleModels * 2 + 3))
    trainRs_raw = np.zeros((trainX.shape[0], Nestimators * 2 + 3))
    trainRs[:, 0] = train_prediction_start
    trainRs_raw[:, 0] = train_prediction_start
    trainRs[:, 1] = trainY
    trainRs_raw[:, 1] = trainY
    trainRs[:, 2] = [1 if trainY[i] > 0 else 0 for i in range(len(trainY))]
    trainRs_raw[:, 2] = [1 if trainY[i] > 0 else 0 for i in range(len(trainY))]
    #
    testRs = np.zeros((testX.shape[0], sampleModels * 2 + 3))
    testRs_raw = np.zeros((testX.shape[0], Nestimators * 2 + 3))
    testRs[:, 0] = test_prediction_start
    testRs_raw[:, 0] = test_prediction_start
    testRs[:, 1] = testY
    testRs_raw[:, 1] = testY
    testRs[:, 2] = [1 if testY[i] > 0 else 0 for i in range(len(testY))]
    testRs_raw[:, 2] = [1 if testY[i] > 0 else 0 for i in range(len(testY))]
    for i in range(sampleModels):
        trainRs_raw[:, i + 3] = svr.estimators_[i].predict(trainX)

        testRs_raw[:, i + 3] = svr.estimators_[i].predict(testX)

        trainRs_raw[:, i + Nestimators + 3] = \
            [1 if trainRs_raw[j, i + 3] > 0 else 0 for j in range(len(trainRs_raw[:, i + 3]))]
        testRs_raw[:, i + Nestimators + 3] = \
            [1 if testRs_raw[j, i + 3] > 0 else 0 for j in range(len(testRs_raw[:, i + 3]))]

    # aggregating results!
    model_inds = [j for j in range(3, Nestimators + 3)]
    # print(model_inds)
    for i in range(len(model_inds)):
        index_modelstoUse = np.random.choice(model_inds,
                                             bag_size,
                                             replace=False)
        tmp_train = trainRs_raw[:, index_modelstoUse]
        tmp_test = testRs_raw[:, index_modelstoUse]
        trainRs[:, i + 3] = np.sum(tmp_train, axis=1)
        testRs[:, i + 3] = np.sum(tmp_test, axis=1)

        trainRs[:, i + Nestimators + 3] = \
            [1 if trainRs[j, i + 3] > 0 else 0 for j in range(len(trainRs[:, i + 3]))]
        testRs[:, i + Nestimators + 3] = \
            [1 if testRs[j, i + 3] > 0 else 0 for j in range(len(testRs[:, i + 3]))]

    trainRs = pd.DataFrame(trainRs, columns=colnames)
    trainRs.to_csv('train_SQ_results_la%d.csv' % look_ahead, index=False)

    trainRs_raw = pd.DataFrame(trainRs_raw, columns=colnames)
    trainRs_raw.to_csv('train_SQ_Raw_results_la%d.csv' % look_ahead,
                       index=False)

    testRs = pd.DataFrame(testRs, columns=colnames)
    testRs.to_csv('test_SQ_results_la%d.csv' % look_ahead, index=False)

    testRs_raw = pd.DataFrame(testRs_raw, columns=colnames)
    testRs_raw.to_csv('test_SQ_Raw_results_la%d.csv' % look_ahead, index=False)
Example #10
0
def regressors(regrs):
    if (regrs == 'lin'):
        reg = LinearRegression(n_jobs=-1)
    elif (regrs == 'svm-lin'):
        reg = svm.SVR(kernel='linear', gamma='auto')
    elif (regrs == 'svm-poly'):
        reg = svm.SVR(kernel='poly', gamma='auto')
    elif (regrs == 'lasso'):
        reg = make_pipeline(PolynomialFeatures(params['deg_poly'], interaction_only=False), LassoCV(eps=params['lasso_eps'],\
                            n_alphas=params['lasso_nalpha'],max_iter=params['lasso_iter'], normalize=False,cv=5))
    elif (regrs == 'tree'):
        reg = DecisionTreeRegressor(random_state=24361)
    elif (regrs == 'forest'):
        reg = RandomForestRegressor(n_estimators=20,
                                    max_depth=2,
                                    min_samples_split=4,
                                    min_samples_leaf=1,
                                    random_state=24361,
                                    n_jobs=-1)
    elif (regrs == 'xgbr'):
        reg=XGBRegressor(learning_rate=0.10, max_depth=2, min_child_weight=1, \
                         n_estimators=100, subsample=0.25)
        # reg = XGBRegressor(learning_rate=0.045, max_depth=2, min_child_weight=1, \
        #                    n_estimators=100, subsample=0.15
        # eta=0.2, gamma=0.9, reg_lambda=0.1, reg_alpha=0.3, n_jobs=-1
    elif (regrs == 'ada'):
        nn = MLPRegressor(hidden_layer_sizes=(32, 1),
                          activation='relu',
                          solver='adam',
                          random_state=24361)
        xgbr=XGBRegressor(learning_rate=0.10, max_depth=2, min_child_weight=1, \
                         n_estimators=100, subsample=0.25, random_state=24361)
        # xgbr = XGBRegressor(learning_rate=0.045, max_depth=2, min_child_weight=1, \
        #                     n_estimators=100, subsample=0.15, gamma=0.3, reg_lambda=0.5, reg_alpha=0.4, n_jobs=-1)
        reg = AdaBoostRegressor(base_estimator=xgbr, learning_rate=0.1, loss='square', \
                                n_estimators=100, random_state=24361)
    elif (regrs == 'nn'):
        reg = MLPRegressor(hidden_layer_sizes=(32, 1),
                           activation='relu',
                           solver='adam',
                           random_state=24361)
        # learning_rate='constant', learning_rate_init=0.01, alpha=0.001, power_t=0.5, max_iter=50, \
        # tol=0.0001, momentum=0.5, nesterovs_momentum=True, validation_fraction=0.1, \
        # beta_1=0.1, beta_2=0.555, epsilon=1e-08, n_iter_no_change=50, random_state=24361)
    elif (regrs == 'comb'):
        xgbr = XGBRegressor(learning_rate=0.045, max_depth=2, min_child_weight=1, \
                            n_estimators=100, subsample=0.15, n_jobs=-1)
        xgbr1 = XGBRegressor(learning_rate=0.035, max_depth=3, min_child_weight=1, \
                            n_estimators=50, subsample=0.15, n_jobs=-1)
        # xgbr2 = XGBRegressor(learning_rate=0.025, max_depth=2, min_child_weight=1, \
        #                     n_estimators=50, subsample=0.15, n_jobs=-1)
        frst = RandomForestRegressor(max_depth=2,
                                     max_leaf_nodes=2,
                                     n_estimators=3,
                                     n_jobs=-1)
        dtr = DecisionTreeRegressor(max_depth=2, max_leaf_nodes=2)
        nn = MLPRegressor(hidden_layer_sizes=(32, 1),
                          activation='tanh',
                          solver='adam',
                          learning_rate_init=0.15)
        reg = StackingRegressor(regressors=[xgbr, xgbr1, frst, nn],
                                meta_regressor=frst)
    elif (regrs == 'tpot'):
        reg = TPOTRegressor(generations=10,
                            verbosity=2,
                            scoring='r2',
                            n_jobs=-1,
                            random_state=23)
    elif (regrs == 'voting'):
        frst = RandomForestRegressor(n_estimators=100,
                                     random_state=24361,
                                     n_jobs=-1)
        dtr = DecisionTreeRegressor(random_state=24361)
        reg = VotingClassifier(estimators=[('frst', frst), ('dtr', dtr)],
                               voting='hard')
    return (reg)
Example #11
0
import pandas as pd
from sklearn import svm
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import GridSearchCV


def rmse(y, y_pred):
    return mean_squared_error(y, y_pred)**0.5


train_dataset = pd.read_csv('train.csv', header=0)
x_train = train_dataset.iloc[:, 2:]
y_train = train_dataset.iloc[:, 1]
svc = svm.SVR(kernel='linear')

param_grid = [
    {
        'C': [1, 10, 100, 1000],
        'kernel': ['linear']
    },
    {
        'C': [1, 10, 100, 1000],
        'gamma': [0.001, 0.0001],
        'kernel': ['rbf']
    },
]
rmse_scorer = make_scorer(rmse, greater_is_better=False)
model = GridSearchCV(estimator=svc,
                     param_grid=param_grid,
                     scoring=rmse_scorer,
                     cv=3)
Example #12
0
plt.plot(xx, yy_down, 'k--')
plt.plot(xx, yy_up, 'k--')

plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
            s=80, facecolors='none')
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)

plt.axis('tight')
plt.show()
'''

#回归

from sklearn import svm
X = [[0, 0], [2, 2]]
y = [0.5, 2.5]
clf = svm.SVR()
clf.fit(X, y)
svm.SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
print(clf.predict([[1, 1]]))

from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
print(X.shape)
y = iris.target
print(y.shape)
X = X[y != 0, :2]
y = y[y != 0]
print(X.shape)
Example #13
0
#分类
# X=[[0,0],[1,1]]
# y=[0,1]
# clf=svm.SVC()
# clf.fit(X,y)
# res_predicted=clf.predict([[2.,2.]])
# #获得支持向量
# res_support_vector=clf.support_vectors_
# #获得支持向量的索引
# res_support=clf.support_
# #每一个类别获得支持向量的数量
# res_nSupport=clf.n_support_
# print(res_nSupport)

#多元分类
# X=[[0],[1],[2],[3]]
# Y=[0,1,2,3]
# clf=svm.SVC(decision_function_shape='ovo')
# clf.fit(X,Y)
# dec=clf.decision_function([[1]])
# print(dec.shape[1])

#回归
from sklearn import svm
X = [[0, 0], [2, 2]]
y = [0.5, 2.5]
clf = svm.SVR()
clf.fit(X, y)
res = clf.predict([[1, 1]])
print(res)
Example #14
0
rows = datasetX.shape[0]
#print(rows)

end = int(round(rows * 0.7, 0))  # vyber kolik % dat bude trenovaci
trainX = datasetX[0:end]
trainY = datasetY[0:end]
testX = datasetX[end:rows]
testY = datasetY[end:rows]

print("_______________________\n SVM - SVR \n_______________________")
clf = svm.SVR(C=100000,
              degree=3,
              kernel='rbf',
              gamma=1.9,
              shrinking=True,
              tol=1e-9,
              cache_size=500,
              verbose=True,
              max_iter=-1)

#normalizace

#X_normalized = preprocessing.normalize(trainX, norm='l2')
#X_test_normalized = preprocessing.normalize(testX, norm='l2')

# normalize
if normalize:
    trainX = preprocessing.normalize(trainX)
    testX = preprocessing.normalize(testX)
Example #15
0
MAE_train_LR = []
MSE_train_LR = []
RMSE_train_LR = []
APE_test_LR = []
MAE_test_LR = []
MSE_test_LR = []
RMSE_test_LR = []
i = 0
for train_index, test_index in loo.split(scaled):
    trainSet = scaled[train_index]
    testSet = scaled[test_index]

    train_X, train_y = trainSet[:, 0:4], trainSet[:, -1]
    test_X, test_y = testSet[:, 0:4], testSet[:, -1]

    clf_SVR = svm.SVR(kernel='rbf', C=1000, gamma=15).fit(train_X, train_y)
    #clf_SVR = svm.SVR(kernel='linear',C=20).fit(train_X,train_y)
    #clf_SVR = svm.SVR(kernel='poly',C=1000, degree=3).fit(train_X,train_y)

    clf_RFR = RandomForestRegressor().fit(train_X, train_y)
    #clf_RFR = RandomForestRegressor(n_estimators=100,max_features=2).fit(train_X,train_y)

    clf_LR = linear_model.LinearRegression().fit(train_X, train_y)

    #joblib.dump(clf_SVR, '../results/SVR_train_model_'+str(i+1)+'.m')
    #joblib.dump(clf_RFR, '../results/RFR_train_model_'+str(i+1)+'.m')
    #joblib.dump(clf_LR, '../results/LR_train_model_'+str(i+1)+'.m')

    #inverse dataset of SVR
    train_pred_SVR = clf_SVR.predict(train_X)
    test_pred_SVR = clf_SVR.predict(test_X)
Example #16
0
divisao = 0.75

embaralhar = np.random.permutation(amostras)

x = x[embaralhar]
y= y[embaralhar]

x_treino = x [:int(amostras*divisao)]
y_treino = y [:int(amostras*divisao)]

x_teste = x [int(amostras*divisao):]
y_teste = y [int(amostras*divisao):]

parametros_svr = {'kernel':('linear','poly','sigmoid','rbf'),'C':[1,2,3,4,5]}

svr = svm.SVR()

clf = GridSearchCV(svr,parametros_svr, n_jobs=10)

print(clf.best_params_)

clf = svm.SVR(kernel = 'linear')

clf.fir(x_treino, y_treino)

predicao = clf.predict(x_teste)

mse = metrics.mean_squared_error(y_teste, predicao)

r2 = metrics.r2_score(y_teste, predicao)
Example #17
0
        classification(xgboost.XGBClassifier(**XGBOOST_PARAMS)),
        classification_binary(xgboost.XGBClassifier(**XGBOOST_PARAMS)),

        # XGBoost (Large Trees)
        regression_random(xgboost.XGBRegressor(**XGBOOST_PARAMS_LARGE)),
        classification_random(xgboost.XGBClassifier(**XGBOOST_PARAMS_LARGE)),
        classification_binary_random(
            xgboost.XGBClassifier(**XGBOOST_PARAMS_LARGE)),

        # Linear SVM
        regression(svm.LinearSVR(random_state=RANDOM_SEED)),
        classification(svm.LinearSVC(random_state=RANDOM_SEED)),
        classification_binary(svm.LinearSVC(random_state=RANDOM_SEED)),

        # SVM
        regression(svm.SVR(kernel="rbf")),
        regression(svm.NuSVR(kernel="rbf")),
        classification_binary(svm.SVC(kernel="rbf", **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="linear", **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="poly", degree=2, **SVC_PARAMS)),
        classification_binary(svm.SVC(kernel="sigmoid", **SVC_PARAMS)),
        classification_binary(svm.NuSVC(kernel="rbf", **SVC_PARAMS)),
        classification(svm.SVC(kernel="rbf", **SVC_PARAMS)),
        classification(svm.NuSVC(kernel="rbf", **SVC_PARAMS)),

        # Linear Regression
        regression(linear_model.LinearRegression()),
        regression(linear_model.HuberRegressor()),
        regression(linear_model.ElasticNet(random_state=RANDOM_SEED)),
        regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
Example #18
0
print(X_test.shape)
print(X_test)
print('Training set - y')
print(y_train.shape)
print(y_train)
print('Test set - y')
print(y_test.shape)
print(y_test)

X_train = X_train.astype('double')
y_train = y_train.astype('double')
X_test = X_test.astype('double')
y_test = y_test.astype('double')

print('Started Model training')
clf = svm.SVR(kernel=kernel).fit(X_train, y_train)
print(clf)
print('Model trained')

#make predictions
pred = clf.predict(X_test)

#print('Here is the mean squared error -')
#print(mean_squared_error(pred, y_test))

fig = plt.figure()
#print(X.reshape(1, -1))
#print(y)
plt.scatter(X.reshape(1, -1), y.ravel(), color='blue', label='original data')
#plt.scatter(val_x, val_y, color='pink', label='mean data')
#plt.plot(means, y_compressed, color='red', label='connected mean data')
Example #19
0
    def attack_svr(self, server, predictor_name, kernel_type, attack_type, dimension, query_budget, dataset=None, roundsize=5):
        if dataset is None and attack_type != "extraction" or len(dataset) < 2:
            print("[!] Dataset too small")
            print("[*] Aborting attack...")
            raise ValueError

        if not isinstance(dataset, list):
            dataset = dataset.tolist()
        if attack_type == "retraining":
            X = []
            y = []
            for datum in random.sample(dataset, query_budget):
                b = self.client.poll_server(server, predictor_name, [datum])
                X.append(datum)
                y.append(b)
            if kernel_type == "quadratic":
                my_model = svm.SVR(kernel="poly", degree=2)
            else:
                my_model = svm.SVR(kernel=kernel_type)


            my_model.fit(X, numpy.ravel(y))
            return my_model

        elif attack_type == "adaptive retraining":
            if len(dataset) >= query_budget > roundsize:

                pool = random.sample(dataset, query_budget)
                X = []
                y = []
                n = roundsize
                t = math.ceil(query_budget / n)

                for i in range(0, n):  # Initial training data for a basic start to train upon
                    a = pool.pop(0)
                    b = self.client.poll_server(server, predictor_name, [a])
                    X.append(a)
                    y.append(b)

                if kernel_type == "quadratic":
                    my_model = svm.NuSVR(kernel="poly", degree=2)
                else:
                    my_model = svm.NuSVR(kernel=kernel_type)
                for i in range(0, t - 1):  # perform t rounds minus the initial round.
                    #print(numpy.ravel(y))
                    my_model.fit(X, numpy.ravel(y))

                    if len(my_model.support_vectors_) == 0:
                        print("[!] NO SUPPORTVECTORS IN ROUND", i)
                        print("[*] Adding another round of random samples")
                        #print(my_model.support_)
                        #print(my_model.support_vectors_)
                        #print(my_model.dual_coef_)
                        for q in range(0, n):  # Initial training data for a basic start to train upon
                            if len(pool) == 0:
                                print("[!] Error: Not enough data")
                                raise IndexError
                            a = pool.pop(0)
                            b = self.client.poll_server(server, predictor_name, [a])
                            X.append(a)
                            y.append(b)
                        continue
                    print("Training Round", i, " of ", t-1)
                    pool, samples = self.get_furthest_samples(pool,
                                                              my_model.support_vectors_,
                                                              kernel_type,
                                                              my_model.coef0,
                                                              my_model.get_params()["gamma"],
                                                              my_model.get_params()["C"],
                                                              n,
                                                              my_model.dual_coef_)

                    for j in samples:
                        X.append(j)
                        y.append(self.client.poll_server(server, predictor_name, [j]))
                my_model.fit(X, numpy.ravel(y))
                return my_model
            else:
                print("[!] Error: either not enough data in data set, or query budget not bigger than round size.")
                print("[*] Aborting attack...")
                raise ValueError
        elif attack_type == "extraction":
            if kernel_type == "quadratic":
                # NOTE: KEEP IN MIND, IN THE IMPLEMENTATION THE VECTOR INDICES START AT 0, INSTEAD OF 1
                # Also DIMENSION - 1 is max index, not dimenstion itself.
                d_ = self.nCr(dimension, 2) + 2*dimension + 1  # d := Projection dimension
                if d_ > query_budget:
                    print("[!] Error: This algorithm will need", d_ ," queries.")
                    raise ValueError
                w_ = [0] * d_  # extracted weight vectors

                null_vector = [0] * dimension
                b_ = self.client.poll_server(server, predictor_name, [null_vector])[0]  # b' = w_d c +b
                for dim in range(dimension):
                    v_p = dim * [0] + [1] + (dimension - 1 - dim) * [0]
                    v_n = dim * [0] + [-1] + (dimension - 1 - dim) * [0]
                    f_v_p = self.client.poll_server(server, predictor_name, [v_p])[0] - b_
                    f_v_n = self.client.poll_server(server, predictor_name, [v_n])[0] - b_
                    w_[dimension - dim + 1 - 2] = (f_v_p + f_v_n) / 2
                    w_[d_ - dim - 2] = (f_v_p - f_v_n) / 2

                class QuadraticMockModel:
                    def __init__(self, d__, w__, b__):
                        self.dim = d__
                        self.w = w__
                        self.b = b__

                    def phi(self, x__):
                        vec = []
                        for i__ in x__[::-1]:
                            vec.append(i__**2)
                        for i__ in reversed(range(len(x__))):
                            for j__ in reversed(range(i__)):
                                vec.append(math.sqrt(2)*x__[i__]*x__[j__])
                        for i__ in x__[::-1]:
                            vec.append(i__)
                        vec.append(0)
                        return vec

                    def predict(self, arr):
                        rv = []
                        for v__ in arr:
                            val = numpy.dot(self.w, self.phi(v__)) + self.b
                            rv.append(val)
                        return rv

                if dimension <= 2:
                    return QuadraticMockModel(d_, w_, b_)
                for dim_i in range(dimension):
                    for dim_j in range(dim_i + 1, dimension):
                        #print(dim_i, dim_j)
                        v = dimension*[0]
                        v[dim_i], v[dim_j] = 1, 1
                        f_v = self.client.poll_server(server, predictor_name, [v])[0]
                        r = self.r_index(dim_i + 1, dim_j + 1, dimension) - 1
                        w_[r] = (f_v - w_[dimension - dim_i + 1 - 2] - w_[dimension - dim_j + 1 - 2] - w_[d_ - dim_i - 2] - w_[d_ - dim_j - 2] - b_) / math.sqrt(2)
                print("[+] w' extrahiert:", w_)

                return QuadraticMockModel(d_, w_, b_)

            if kernel_type != "linear":
                print("[!] Error: Unsupported Kernel for extraction attack.")
                raise ValueError
            d = [0] * dimension
            b = self.client.poll_server(server, predictor_name, [d])[0]
            w = []
            for j in range(0, dimension):
                x = j * [0] + [1] + (dimension - 1 - j) * [0]
                w.append(self.client.poll_server(server, predictor_name, [x])[0]-b)
            print("[+] Model parameters have been successfully extracted")
            print("[*] weight (w):", w)
            print("[*] bias   (b):", b)
            print("[*] Building mock model...")

            class LinearMockModel:
                def __init__(self, d__, w__, b__):
                    self.dim = d__
                    self.w = w__
                    self.b = b__

                def predict(self, arr):
                    rv = []
                    for v__ in arr:
                        val = numpy.dot(self.w, v__) + self.b
                        rv.append(val)
                    return rv

            return LinearMockModel(dimension, w, b)
        else:
            print("[!] Error: unknown attack type for svr")
            print("[*] Aborting attack...")
            raise ValueError
Example #20
0
    # Put the result into a color plot
    Z = Z.reshape(XX.shape)
    plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
    plt.contour(XX, YY, Z, colors=['k', 'k', 'k'],
                linestyles=['--', '-', '--'], levels=[-.5, 0, .5])

    plt.title(kernel)
plt.show()
'''

# Find the optimized model 
from sklearn.svm import SVR
import numpy as np
parameters = {'kernel': ('linear', 'rbf','poly'), 'C':[1.5, 10],'gamma': [1e-7, 1e-4],'epsilon':[0.1,0.2,0.5,0.3]}
svr = svm.SVR()
clf = GridSearchCV(svr, parameters)
clf.fit(X_train, y_train)
clf.best_params_

print "Best estimator found by grid search:",clf.best_estimator_
print "Best parameters found by grid search:",clf.best_params_
print clf.best_score_

clf_best = linear_model.LogisticRegression(**clf.best_params_)
y_pred = clf_best.fit(X_train, y_train).predict(X_test)
score = clf_best.score(X_test, y_test)

# out prediction accuracy
print 'Accuracy:', score
# output confusion matrix
Example #21
0
os.chdir(r'D:\desktop\data mining\ML\LinearReg_ML')
df = pd.read_csv('HousePrices.csv')
df=df.head(50000) 
X = df.drop(['Prices'], axis=1)
y = df.Prices
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_scaled =pd.DataFrame(sc.fit_transform(X), columns=X.columns)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=1/3, random_state=0)   
from sklearn import svm
from sklearn.svm import SVR 
from sklearn.model_selection import GridSearchCV  
gamma= 'auto' ; gamma = 'scale'
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svr = svm.SVR()
gr = GridSearchCV(svr, parameters, cv=5)
gr.fit(X_train, y_train)
gr.score(X_test,y_test)

#***********************************************************
from sklearn.model_selection import GridSearchCV

param_grid = [{'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8]},
{'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},]

from sklearn.ensemble import  
forest_reg = RandomForestRegressor()
grd = GridSearchCV(forest_reg, param_grid, cv=5, scoring='neg_mean_squared_error')
grd.fit(X_test, y_test)
grd.score(X_test, y_test)
Example #22
0
    def build_surrogate(self):
        """ Build a surrogate. Multiple options for models are available including:
            -Gaussian Processes
            -KNN
            -SVR
            
            Assumptions:
            None
            
            Source:
            N/A
            
            Inputs:
            state [state()]
            
            Outputs:
            self.sfc_surrogate    [fun()]
            self.thrust_surrogate [fun()]
            
            Properties Used:
            Defaulted values
        """     
        
        # unpack
        pycycle_problem = self.model
        
        
        pycycle_problem.set_solver_print(level=-1)
        pycycle_problem.set_solver_print(level=2, depth=0)        
        
        
        # Extract the data
        # Create lists that will turn into arrays
        Altitudes = []
        Machs     = []
        PCs       = []
        Thrust    = []
        TSFC      = []
        
        
        # if we added fc.dTS this would handle the deltaISA
        
        throttles = self.evaluation_throttles*1.

        for MN, alt in self.evaluation_mach_alt: 
    
            print('***'*10)
            print(f'* MN: {MN}, alt: {alt}')
            print('***'*10)
            pycycle_problem['OD_full_pwr.fc.MN'] = MN
            pycycle_problem['OD_full_pwr.fc.alt'] = alt
            pycycle_problem['OD_part_pwr.fc.MN'] = MN
            pycycle_problem['OD_part_pwr.fc.alt'] = alt
    
            for PC in throttles: 
                print(f'## PC = {PC}')
                pycycle_problem['OD_part_pwr.PC']  = PC
                pycycle_problem.run_model()
                #Save to our list for SUAVE
                Altitudes.append(alt)
                Machs.append(MN)
                PCs.append(PC)
                TSFC.append(pycycle_problem['OD_part_pwr.perf.TSFC'][0])
                Thrust.append(pycycle_problem['OD_part_pwr.perf.Fn'][0])

            throttles = np.flip(throttles)

        # Now setup into vectors
        Altitudes = np.atleast_2d(np.array(Altitudes)).T * Units.feet
        Mach      = np.atleast_2d(np.array(Machs)).T
        Throttle  = np.atleast_2d(np.array(PCs)).T
        thr       = np.atleast_2d(np.array(Thrust)).T * Units.lbf
        sfc       = np.atleast_2d(np.array(TSFC)).T   * Units['lbm/hr/lbf'] # lbm/hr/lbf converted to (kg/N/s)
        
        
        # Once we have the data the model must be deleted because pycycle models can't be deepcopied
        self.pop('model')
        
        # Concatenate all together and things will start to look like the propuslor surrogate soon
        my_data = np.concatenate([Altitudes,Mach,Throttle,thr,sfc],axis=1)
        
        if self.save_deck :
            # Write an engine deck
            np.savetxt("pyCycle_deck.csv", my_data, delimiter=",")
        
        print(my_data)
        
        # Clean up to remove redundant lines
        b = np.ascontiguousarray(my_data).view(np.dtype((np.void, my_data.dtype.itemsize * my_data.shape[1])))
        _, idx = np.unique(b, return_index=True)
       
        my_data = my_data[idx]                
   
        xy  = my_data[:,:3] # Altitude, Mach, Throttle
        thr = np.transpose(np.atleast_2d(my_data[:,3])) # Thrust
        sfc = np.transpose(np.atleast_2d(my_data[:,4]))  # SFC        
        
        self.altitude_input_scale = np.max(xy[:,0])
        self.thrust_input_scale   = np.max(thr)
        self.sfc_input_scale      = np.max(sfc)
        
        # normalize for better surrogate performance
        xy[:,0] /= self.altitude_input_scale
        thr     /= self.thrust_input_scale
        sfc     /= self.sfc_input_scale
       
       
        # Pick the type of process
        if self.surrogate_type  == 'gaussian':
            gp_kernel = Matern()
            regr_sfc = gaussian_process.GaussianProcessRegressor(kernel=gp_kernel)
            regr_thr = gaussian_process.GaussianProcessRegressor(kernel=gp_kernel)      
            thr_surrogate = regr_thr.fit(xy, thr)
            sfc_surrogate = regr_sfc.fit(xy, sfc)  
           
        elif self.surrogate_type  == 'knn':
            regr_sfc = neighbors.KNeighborsRegressor(n_neighbors=1,weights='distance')
            regr_thr = neighbors.KNeighborsRegressor(n_neighbors=1,weights='distance')
            sfc_surrogate = regr_sfc.fit(xy, sfc)
            thr_surrogate = regr_thr.fit(xy, thr)  
   
        elif self.surrogate_type  == 'svr':
            regr_thr = svm.SVR(C=500.)
            regr_sfc = svm.SVR(C=500.)
            sfc_surrogate  = regr_sfc.fit(xy, sfc)
            thr_surrogate  = regr_thr.fit(xy, thr)    
           
        elif self.surrogate_type == 'linear':
            regr_thr = linear_model.LinearRegression()
            regr_sfc = linear_model.LinearRegression()          
            sfc_surrogate  = regr_sfc.fit(xy, sfc)
            thr_surrogate  = regr_thr.fit(xy, thr)
            
        else:
            raise NotImplementedError('Selected surrogate method has not been implemented')
       
       
        if self.thrust_anchor is not None:
            cons = deepcopy(self.thrust_anchor_conditions)
            cons[0,0] /= self.altitude_input_scale
            base_thrust_at_anchor = thr_surrogate.predict(cons)
            self.thrust_anchor_scale = self.thrust_anchor/(base_thrust_at_anchor*self.thrust_input_scale)
            
        if self.sfc_anchor is not None:
            cons = deepcopy(self.sfc_anchor_conditions)
            cons[0,0] /= self.altitude_input_scale
            base_sfc_at_anchor = sfc_surrogate.predict(cons)
            self.sfc_anchor_scale = self.sfc_anchor/(base_sfc_at_anchor*self.sfc_input_scale)
       
        # Save the output
        self.sfc_surrogate    = sfc_surrogate
        self.thrust_surrogate = thr_surrogate   
Example #23
0
        data.append(line[:-1].split(','))
data = np.array(data).T
encoders, x = [], []
for row in range(len(data)):
    if data[row, 0].isdigit():
        encoder = DigitEncoder()
    else:
        encoder = sp.LabelEncoder()
    if row < len(data) - 1:
        x.append(encoder.fit_transform(data[row]))
    else:
        y = encoder.fit_transform(data[row])
    encoders.append(encoder)
x = np.array(x).T
train_x, test_x, train_y, test_y = \
    ms.train_test_split(x, y, test_size=0.25,
                        random_state=5)
model = svm.SVR(kernel='rbf', C=10, epsilon=0.2)
model.fit(train_x, train_y)
pred_test_y = model.predict(test_x)
print(sm.r2_score(test_y, pred_test_y))
data = [['Tuesday', '13:35', 'San Francisco', 'yes']]
data = np.array(data).T
x = []
for row in range(len(data)):
    encoder = encoders[row]
    x.append(encoder.transform(data[row]))
x = np.array(x).T
pred_y = model.predict(x)
print(int(pred_y))
Example #24
0
model1 = svm.LinearSVR()
model1.fit(x_train, y_train)
confidence1 = model1.score(x_test, y_test)
predict_1 = model1.predict(x_small)
dataset['Predict_Linear'] = np.nan
print('Score for Linear Reg: :',confidence1)
print('\n')

for i in predict_1:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += one_day
    dataset.loc[next_date] = [np.nan for _ in range(len(dataset.columns)-1)]+[i]
####################################################################################

model2 = svm.SVR(kernel = 'rbf', C= 100, gamma= 0.06)
model2.fit(x_train, y_train)
confidence2 = model2.score(x_test, y_test)
predict_2 = model2.predict(x_small)
dataset['Predict_RBF'] = np.nan
print('Score for RBF Reg: :',confidence2)
print('\n')


for i in predict_2:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += one_day
    dataset.loc[next_date] = [np.nan for _ in range(len(dataset.columns)-1)]+[i]

####################################################################################
Example #25
0
from sklearn import svm
import pickle


def fun(line):
        a = line.strip().split()
        for i in range(len(a)):
                a[i] = float(a[i])
        return a[3:]



f = open('Train','r')
g = open('TrainTrue','r')
X = []
Y = []
for line in f.readlines():
        line2 = g.readline()
        if (('--' not in line) and ('--' not in line2)):
                X.append(fun(line))
                Y.append(float(line2.strip().split()[-1])+273.16)
                #print fun(line)


print ('Reading Done')
f.close()

clf = svm.SVR(cache_size=7000) 
clf.fit(X, Y)

pickle.dump(clf, open('Model','wb'))
Example #26
0
def test_SVR_poly(*data):
    '''
    测试 多项式核的 SVR 的预测性能随  degree、gamma、coef0 的影响.

    :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的值、测试样本的值
    :return: None
    '''
    X_train, X_test, y_train, y_test = data
    fig = plt.figure()
    ### 测试 degree ####
    degrees = range(1, 20)
    train_scores = []
    test_scores = []
    for degree in degrees:
        regr = svm.SVR(kernel='poly', degree=degree, coef0=1)
        regr.fit(X_train, y_train)
        train_scores.append(regr.score(X_train, y_train))
        test_scores.append(regr.score(X_test, y_test))
    ax = fig.add_subplot(1, 3, 1)
    ax.plot(degrees, train_scores, label="Training score ", marker='+')
    ax.plot(degrees, test_scores, label=" Testing  score ", marker='o')
    ax.set_title("SVR_poly_degree r=1")
    ax.set_xlabel("p")
    ax.set_ylabel("score")
    ax.set_ylim(-1, 1.)
    ax.legend(loc="best", framealpha=0.5)

    ### 测试 gamma,固定 degree为3, coef0 为 1 ####
    gammas = range(1, 40)
    train_scores = []
    test_scores = []
    for gamma in gammas:
        regr = svm.SVR(kernel='poly', gamma=gamma, degree=3, coef0=1)
        regr.fit(X_train, y_train)
        train_scores.append(regr.score(X_train, y_train))
        test_scores.append(regr.score(X_test, y_test))
    ax = fig.add_subplot(1, 3, 2)
    ax.plot(gammas, train_scores, label="Training score ", marker='+')
    ax.plot(gammas, test_scores, label=" Testing  score ", marker='o')
    ax.set_title("SVR_poly_gamma  r=1")
    ax.set_xlabel(r"$\gamma$")
    ax.set_ylabel("score")
    ax.set_ylim(-1, 1)
    ax.legend(loc="best", framealpha=0.5)
    ### 测试 r,固定 gamma 为 20,degree为 3 ######
    rs = range(0, 20)
    train_scores = []
    test_scores = []
    for r in rs:
        regr = svm.SVR(kernel='poly', gamma=20, degree=3, coef0=r)
        regr.fit(X_train, y_train)
        train_scores.append(regr.score(X_train, y_train))
        test_scores.append(regr.score(X_test, y_test))
    ax = fig.add_subplot(1, 3, 3)
    ax.plot(rs, train_scores, label="Training score ", marker='+')
    ax.plot(rs, test_scores, label=" Testing  score ", marker='o')
    ax.set_title("SVR_poly_r gamma=20 degree=3")
    ax.set_xlabel(r"r")
    ax.set_ylabel("score")
    ax.set_ylim(-1, 1.)
    ax.legend(loc="best", framealpha=0.5)
    plt.show()
    def getbest(self):
        data = self.X
        data1 = data
        # data1[c] = data1[c].append(len(data1[c]),self.y[c])
        # data = data1
        # print(data)
        '''
        data = []
        for c in range(self.length):
            p = ()
            p += self.X_te[c]
            p += self.y_te[c]
            data.append(p)
        nbc = nltk.NaiveBayesClassifier.train(data[:self.length * 0.7])
        nbcacc = nltk.classify.accuracy(nbc, data[self.length * 0.7:])
        # nbcacc = accuracy_score(self.y_te, ynbc)
        self.acc.append(("NaiveBayes", nbcacc))
        '''

        knn = KNeighborsClassifier(n_neighbors=3)
        # score = cross_val_score(knn, self.X_tr, self.y_tr, cv=3, scoring='accuracy')
        # print("scores ", score)
        knn.fit(self.X_tr, self.y_tr)
        yknn = knn.predict(self.X_te)
        knnacc = accuracy_score(self.y_te, yknn)
        self.acc.append(("knn", knnacc))

        clf = svm.SVR()
        clf.fit(self.X_tr, self.y_tr)
        svr = clf.score(self.X_te, self.y_te)
        self.acc.append(("SVR", svr))

        clf = LinearDiscriminantAnalysis()
        clf.fit(self.X_tr, self.y_tr)
        lda = clf.score(self.X_te, self.y_te)
        self.acc.append(("LDA", lda))

        clf = GaussianNB()
        clf.fit(self.X_tr, self.y_tr)
        xx = clf.predict(self.X_te)
        gnb = accuracy_score(self.y_te, xx)
        self.acc.append(("GaussianNB", gnb))

        clf = BernoulliNB()
        clf.fit(self.X_tr, self.y_tr)
        xx = clf.predict(self.X_te)
        gnb = accuracy_score(self.y_te, xx)
        self.acc.append(("BernoulliNB", gnb))

        clf = MultinomialNB()
        clf.fit(self.X_tr, self.y_tr)
        xx = clf.predict(self.X_te)
        gnb = accuracy_score(self.y_te, xx)
        self.acc.append(("MultinomialNB", gnb))

        clf = linear_model.LinearRegression()
        clf.fit(self.X_tr, self.y_tr)
        lrgacc = clf.score(self.X_te, self.y_te)
        self.acc.append(("LinearReg", lrgacc))

        clf = linear_model.LogisticRegression()
        clf.fit(self.X_tr.astype('int'), self.y_tr.astype('int'))
        logacc = clf.score(self.X_te.astype('int'), self.y_te.astype('int'))
        self.acc.append(("LogisticReg", logacc))

        clf = linear_model.SGDClassifier()
        clf.fit(self.X_tr, self.y_tr)
        ysgd = clf.predict(self.X_te)
        sgdacc = accuracy_score(self.y_te, ysgd)
        self.acc.append(("SGDC", sgdacc))

        clf = DecisionTreeClassifier()
        clf.fit(self.X_tr, self.y_tr)
        dtc = clf.score(self.X_te, self.y_te)
        self.acc.append(("DecisionTree", dtc))

        clf = SVC(kernel='rbf')
        clf.fit(self.X_tr, self.y_tr)
        ysvc = clf.predict(self.X_te)
        svcaccr = accuracy_score(self.y_te, ysvc)
        self.acc.append(("SVC-rbf", svcaccr))

        clf = SVC(kernel='linear')
        clf.fit(self.X_tr, self.y_tr)
        ysvc = clf.predict(self.X_te)
        svcaccl = accuracy_score(self.y_te, ysvc)
        self.acc.append(("SVC-linear", svcaccl))
        '''clf = SVC(kernel='poly', degree=5)
        clf.fit(self.X_tr, self.y_tr)
        ysvc = clf.predict(self.X_te)
        print("svcp", ysvc)
        svcaccp = accuracy_score(self.y_te, ysvc)
        self.acc.append(("SVC-poly", svcaccp))'''

        self.acc.sort(key=lambda tup: tup[1], reverse=True)
        for i in self.acc:
            print(i[0], " ", i[1] * 100)
Example #28
0
boston_x = scale.transform(boston_X)
pca = PCA(n_components=3)
boston_x = pca.fit_transform(boston_x)
fig = plt.figure()
ax = plt.gca(projection='3d')
# ax.scatter(boston_x[:, 0], boston_x[:, 1], boston_x[:, 2], marker='o', c=boston_y)
x_tran, x_test, y_tran, y_test = train_test_split(boston_x,
                                                  boston_y,
                                                  test_size=0.3,
                                                  random_state=42)
result = []
z = np.zeros(shape=(10, 10))
test_number = len(y_test)
for i in range(1, 11, 1):
    for j in range(1, 11, 1):
        clf = svm.SVR(C=i / 10, epsilon=j / 10,
                      gamma='auto').fit(x_tran, y_tran)
        y_pre = clf.predict(x_test)
        result.append([i, j, clf.score(x_test, y_test)])
        z[i - 1, j - 1] = clf.score(x_test, y_test)
print(result)
x = np.linspace(1, 10, 10)
y = np.linspace(1, 10, 10)
X, Y = np.meshgrid(x / 10, y / 10)
ax.plot_surface(X, Y, z, cmap=cm.coolwarm)
ax.set_zlim(0, 1)
ax.zaxis.set_major_locator(LinearLocator(5))
ax.set_xlabel('C')
ax.set_ylabel('eplison')
ax.set_zlabel('score')
plt.title('Boston_SVR')
plt.show()
Example #29
0
from sklearn import svm
from sklearn import preprocessing

scaler = preprocessing.StandardScaler().fit(X)
X_scaled = scaler.transform(X)

count = 1
mse_val = []

for C_var in [.001, 50, 500]:

    for e_var in [.001, 1, 10]:

        figure(num=count, figsize=(10, 8), dpi=150)

        svr = svm.SVR(kernel='rbf', C=C_var, epsilon=e_var, cache_size=2000)

        svr.fit(X_train, y_train)

        h = .02

        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5

        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))

        Z = svr.predict(np.c_[xx.ravel(), yy.ravel()])
        Z = Z.reshape(xx.shape)

        plt.contourf(xx, yy, Z, 8, cmap=cm, alpha=.75)
Example #30
0
MLA = [
    #Ensemble Methods
    ensemble.AdaBoostRegressor(n_estimators=100, loss='exponential'),
    ensemble.ExtraTreesRegressor(),
    ensemble.GradientBoostingRegressor(n_estimators=100),
    ensemble.RandomForestRegressor(),

    #GLM
    linear_model.LinearRegression(),
    linear_model.SGDRegressor(),
    linear_model.Ridge(),
    linear_model.Lasso(),
    
    #SVM
    svm.SVR(),
    svm.LinearSVR(),
    
    #
    ]


# In[31]:


np.array(top_importance[0:15])


# In[32]: