def preprocess(filename):
    X, y = load_X_Y.load_X_Y(filename)
    X = np.array(X)
    y = np.array(y)
    rand_s = 20
    X, y = shuffle(X, y, random_state=rand_s)
    return (X, y)
def preprocess(filename):
    X, y = load_X_Y.load_X_Y(filename)
    X = np.array(X)
    y = np.array(y)
    rand_s = 20
    X, y = shuffle(X, y, random_state=rand_s)
    return X, y
#!/usr/bin/python
# -*- coding: utf-8 -*-
import numpy as np
from sklearn import tree
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
import load_X_Y
import math
import os

###############################################################################
# Load data
parent_path = os.path.dirname(os.getcwd())
data_path = os.path.join(parent_path, 'train_data')
file_in = os.path.join(data_path, 'featureMat_merge_rest_line11.txt')
X, y = load_X_Y.load_X_Y(file_in)
X = np.array(X)
y = np.array(y)

fout = open('iteration.txt', 'w')
fout.write(
    "min_samples_leaf\troot_of_mse_of_testset\ttrain_error_rate\ttest_error_rate\n"
)
for m in range(1, 101):
    X_train_array = list()
    y_train_array = list()
    X_test_array = list()
    y_test_array = list()
    mse_s = list()
    train_error_rates = list()
    test_error_rates = list()
#!/usr/bin/python
# -*- coding: utf-8 -*-


from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import load_X_Y
from sklearn import preprocessing
import numpy as np

(X,y) = load_X_Y.load_X_Y('featureMat_restday_line10.txt_checked')

for item in X:
        for i in range(len(item)):
                item[i] = float(item[i])

X_scaled = preprocessing.scale(X)
mat_X_scaled =  np.matrix(X_scaled)
weights = np.matrix([0.88181899, 0.03112526, 0.04583346, 0.02779611, 0.01342619])
weights = weights.transpose()
ulti_X = np.array(mat_X_scaled*weights)

clf = make_pipeline(PolynomialFeatures(4), Ridge())
clf.fit(ulti_X, y)

n1 = len(ulti_X)
n2 = len(y)
if(n1 != n2):
	print '读取X和Y错误'
Exemple #5
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from sklearn import neighbors
import load_X_Y
from sklearn import preprocessing
import numpy as np

(X, y) = load_X_Y.load_X_Y('featureMat_restday_line10.txt_checked')

for item in X:
    for i in range(len(item)):
        item[i] = float(item[i])

X_scaled = preprocessing.scale(X)
mat_X_scaled = np.matrix(X_scaled)
weights = np.matrix(
    [0.88181899, 0.03112526, 0.04583346, 0.02779611, 0.01342619])
weights = weights.transpose()
ulti_X = np.array(mat_X_scaled * weights)

clf = neighbors.KNeighborsRegressor(1)
clf.fit(ulti_X, y)

n1 = len(ulti_X)
n2 = len(y)
if (n1 != n2):
    print '读取X和Y错误'

#全部样本拿来训练得到的训练集内错误率
numberOfError = 0
import numpy as np
import matplotlib.pyplot as plt

from sklearn import ensemble
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error

import load_X_Y
import math
import os
###############################################################################
# Load data
parent_path = os.path.dirname(os.getcwd())
data_path = os.path.join(parent_path, 'train_data')
file_in = os.path.join(data_path, 'featureMat_merge_rest_line11.txt')
X,y = load_X_Y.load_X_Y(file_in)
X = np.array(X)
y = np.array(y)


X_train_array = list()
y_train_array = list()
X_test_array = list()
y_test_array = list()
mse_s = list()
train_error_rates = list()
test_error_rates = list()
feature_importances = list()
plt.figure(figsize=(30, 12))
frame = 0
for rand_s in [10, 50, 100, 300, 500, 900, 1400, 4500, 3400, 22] :
#!/usr/bin/python
# -*- coding: utf-8 -*-

import numpy as np
import matplotlib.pyplot as plt

from sklearn import tree
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error

import load_X_Y
import math

###############################################################################
# Load data
X, y = load_X_Y.load_X_Y('featureMat_workday_line10.txt_checked')
X = np.array(X)
y = np.array(y)
rand_s = 20
X, y = shuffle(X, y, random_state=rand_s)
X = X.astype(np.float32)
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

###############################################################################
# Fit regression model
clf = tree.DecisionTreeRegressor()
clf = clf.fit(X_train, y_train)

mse = mean_squared_error(y_test, clf.predict(X_test))