Created on Mon Apr 24 21:45:36 2017 @author: Vne """ import re import pandas as pd #数据分析 import numpy as np #科学计算 from sklearn.model_selection import train_test_split from basic_tool import scaler_attr from basic_tool import describe_factor from basic_tool import cvBestScore data = pd.read_csv('E:\machine_learing\DateSet\Eye.csv') scaler_attr(data,'AF3') scaler_attr(data,'F7') scaler_attr(data,'F3') scaler_attr(data,'FC5') scaler_attr(data,'T7') scaler_attr(data,'P7') scaler_attr(data,'O1') scaler_attr(data,'O2') scaler_attr(data,'P8') scaler_attr(data,'T8') scaler_attr(data,'FC6') scaler_attr(data,'F4') scaler_attr(data,'F8') scaler_attr(data,'AF4') X = data.iloc[:, 15:29]
@author: Vne """ import re import pandas as pd #数据分析 import numpy as np #科学计算 from sklearn.model_selection import train_test_split from basic_tool import scaler_attr from basic_tool import describe_factor from basic_tool import cvBestScore data = pd.read_csv("E:\machine_learing\DateSet\magic04.csv") scaler_attr(data,'fLength') scaler_attr(data,'fWidth') scaler_attr(data,'fAsym') scaler_attr(data,'fM3Long') scaler_attr(data,'fM3Trans') scaler_attr(data,'fAlpha') scaler_attr(data,'fDist') X = data[[2,3,4,11,12,13,14,15,16,17]] data['class'] = data['class'].apply(lambda x: 1 if x == 'g' else 0) y = data['class'] ##将样本划分为训练集和测试集 X_raw_train, X_test, y_raw_train, y_test = train_test_split(X, y, test_size=(len(data)-2000), random_state=1)
@author: Vne """ import re import pandas as pd #数据分析 import numpy as np #科学计算 from sklearn.model_selection import train_test_split from basic_tool import scaler_attr from basic_tool import describe_factor from basic_tool import cvBestScore data = pd.read_csv("E:\machine_learing\DateSet\Credit_Card.csv") scaler_attr(data,'LIMIT_BAL') scaler_attr(data,'BILL_AMT1') scaler_attr(data,'PAY_AMT1') X = data[[25,2,3,4,5,6,7,8,9,10,26,27]] y = data['default.payment.next.month'] ##将样本划分为训练集和测试集 X_raw_train, X_test, y_raw_train, y_test = train_test_split(X, y, test_size=(len(data)-2000), random_state=1) X_raw_train = X_raw_train.as_matrix() y_raw_train = y_raw_train.as_matrix() X_test = X_test.as_matrix() y_test = y_test.as_matrix() from binary_Classifier import RandomForest_Classifier
""" import re import pandas as pd #数据分析 import numpy as np #科学计算 from sklearn.model_selection import train_test_split from basic_tool import scaler_attr from basic_tool import describe_factor from basic_tool import cvBestScore data = pd.read_csv("E:\machine_learing\DateSet\covtype.csv") data = data.iloc[0:30000,:] scaler_attr(data,'Elevation') scaler_attr(data,'Aspect') scaler_attr(data,'Horizontal_Distance_To_Hydrology') scaler_attr(data,'Vertical_Distance_To_Hydrology') scaler_attr(data,'Horizontal_Distance_To_Roadways') scaler_attr(data,'Hillshade_9am') scaler_attr(data,'Hillshade_3pm') scaler_attr(data,'Horizontal_Distance_To_Fire_Points') #X = data[[2,55,56,57,58,59,60,61,62,10,14,24,34,44]] X = data[[2,55,56,57,58,59,60,61,62,10,14,24,34,44]] y = data['Cover_Type'] ##将样本划分为训练集和测试集 X_raw_train, X_test, y_raw_train, y_test = train_test_split(X, y, test_size=(len(data)-2000), random_state=1)
'Black': 1, 'Asian-Pac-Islander': 2, 'Amer-Indian-Eskimo': 3, 'Other': 4 }) data['sex_num'] = np.where(data.sex == 'Female', 0, 1) data['rel_num'] = data.relationship.map({ 'Not-in-family': 0, 'Unmarried': 0, 'Own-child': 0, 'Other-relative': 0, 'Husband': 1, 'Wife': 1 }) scaler_attr(data, 'capital.gain') scaler_attr(data, 'capital.loss') X = data[[ 'workclass_num', 'education.num', 'marital_num', 'race_num', 'sex_num', 'rel_num', 'capital.gain_scaled', 'capital.loss_scaled' ]] y = data.over50K ##将样本划分为训练集和测试集 X_raw_train, X_test, y_raw_train, y_test = train_test_split( X, y, test_size=(len(data) - 4000), random_state=1) X_raw_train = X_raw_train.as_matrix() y_raw_train = y_raw_train.as_matrix() X_test = X_test.as_matrix()
@author: Vne """ import re import pandas as pd #数据分析 import numpy as np #科学计算 from sklearn.model_selection import train_test_split from basic_tool import scaler_attr from basic_tool import describe_factor from basic_tool import cvBestScore data = pd.read_csv('E:\machine_learing\DateSet\Occupancy.csv', index_col='index') scaler_attr(data, 'Light') scaler_attr(data, 'CO2') X = data[[1, 2, 5, 7, 8]] y = data['Occupancy'] ##将样本划分为训练集和测试集 X_raw_train, X_test, y_raw_train, y_test = train_test_split( X, y, test_size=(len(data) - 2000), random_state=1) X_raw_train = X_raw_train.as_matrix() y_raw_train = y_raw_train.as_matrix() X_test = X_test.as_matrix() y_test = y_test.as_matrix() from binary_Classifier import RandomForest_Classifier
from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from basic_tool import scaler_attr from basic_tool import describe_factor from basic_tool import cvBestScore data = pd.read_csv("E:\machine_learing\DateSet\Horse_Racing.csv", index_col='UID', encoding="ISO-8859-1") cat_var = data.dtypes.loc[data.dtypes == 'object'].index le = LabelEncoder() for var in cat_var: data[var] = le.fit_transform(data[var]) scaler_attr(data, 'Date') scaler_attr(data, 'Horse') X = data[[1, 3, 5, 6, 7, 9, 10]] y = data['TipsterActive'] ##将样本划分为训练集和测试集 X_raw_train, X_test, y_raw_train, y_test = train_test_split( X, y, test_size=(len(data) - 2000), random_state=1) X_raw_train = X_raw_train.as_matrix() y_raw_train = y_raw_train.as_matrix() X_test = X_test.as_matrix() y_test = y_test.as_matrix() from binary_Classifier import RandomForest_Classifier