def preprocess(filename): X, y = load_X_Y.load_X_Y(filename) X = np.array(X) y = np.array(y) rand_s = 20 X, y = shuffle(X, y, random_state=rand_s) return (X, y)
def preprocess(filename): X, y = load_X_Y.load_X_Y(filename) X = np.array(X) y = np.array(y) rand_s = 20 X, y = shuffle(X, y, random_state=rand_s) return X, y
#!/usr/bin/python # -*- coding: utf-8 -*- import numpy as np from sklearn import tree from sklearn.utils import shuffle from sklearn.metrics import mean_squared_error import load_X_Y import math import os ############################################################################### # Load data parent_path = os.path.dirname(os.getcwd()) data_path = os.path.join(parent_path, 'train_data') file_in = os.path.join(data_path, 'featureMat_merge_rest_line11.txt') X, y = load_X_Y.load_X_Y(file_in) X = np.array(X) y = np.array(y) fout = open('iteration.txt', 'w') fout.write( "min_samples_leaf\troot_of_mse_of_testset\ttrain_error_rate\ttest_error_rate\n" ) for m in range(1, 101): X_train_array = list() y_train_array = list() X_test_array = list() y_test_array = list() mse_s = list() train_error_rates = list() test_error_rates = list()
#!/usr/bin/python # -*- coding: utf-8 -*- from sklearn.linear_model import Ridge from sklearn.preprocessing import PolynomialFeatures from sklearn.pipeline import make_pipeline import load_X_Y from sklearn import preprocessing import numpy as np (X,y) = load_X_Y.load_X_Y('featureMat_restday_line10.txt_checked') for item in X: for i in range(len(item)): item[i] = float(item[i]) X_scaled = preprocessing.scale(X) mat_X_scaled = np.matrix(X_scaled) weights = np.matrix([0.88181899, 0.03112526, 0.04583346, 0.02779611, 0.01342619]) weights = weights.transpose() ulti_X = np.array(mat_X_scaled*weights) clf = make_pipeline(PolynomialFeatures(4), Ridge()) clf.fit(ulti_X, y) n1 = len(ulti_X) n2 = len(y) if(n1 != n2): print '读取X和Y错误'
#!/usr/bin/python # -*- coding: utf-8 -*- from sklearn import neighbors import load_X_Y from sklearn import preprocessing import numpy as np (X, y) = load_X_Y.load_X_Y('featureMat_restday_line10.txt_checked') for item in X: for i in range(len(item)): item[i] = float(item[i]) X_scaled = preprocessing.scale(X) mat_X_scaled = np.matrix(X_scaled) weights = np.matrix( [0.88181899, 0.03112526, 0.04583346, 0.02779611, 0.01342619]) weights = weights.transpose() ulti_X = np.array(mat_X_scaled * weights) clf = neighbors.KNeighborsRegressor(1) clf.fit(ulti_X, y) n1 = len(ulti_X) n2 = len(y) if (n1 != n2): print '读取X和Y错误' #全部样本拿来训练得到的训练集内错误率 numberOfError = 0
import numpy as np import matplotlib.pyplot as plt from sklearn import ensemble from sklearn.utils import shuffle from sklearn.metrics import mean_squared_error import load_X_Y import math import os ############################################################################### # Load data parent_path = os.path.dirname(os.getcwd()) data_path = os.path.join(parent_path, 'train_data') file_in = os.path.join(data_path, 'featureMat_merge_rest_line11.txt') X,y = load_X_Y.load_X_Y(file_in) X = np.array(X) y = np.array(y) X_train_array = list() y_train_array = list() X_test_array = list() y_test_array = list() mse_s = list() train_error_rates = list() test_error_rates = list() feature_importances = list() plt.figure(figsize=(30, 12)) frame = 0 for rand_s in [10, 50, 100, 300, 500, 900, 1400, 4500, 3400, 22] :
#!/usr/bin/python # -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import tree from sklearn.utils import shuffle from sklearn.metrics import mean_squared_error import load_X_Y import math ############################################################################### # Load data X, y = load_X_Y.load_X_Y('featureMat_workday_line10.txt_checked') X = np.array(X) y = np.array(y) rand_s = 20 X, y = shuffle(X, y, random_state=rand_s) X = X.astype(np.float32) offset = int(X.shape[0] * 0.9) X_train, y_train = X[:offset], y[:offset] X_test, y_test = X[offset:], y[offset:] ############################################################################### # Fit regression model clf = tree.DecisionTreeRegressor() clf = clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test))