Esempio n. 1
0
Created on Mon Apr 24 21:45:36 2017

@author: Vne
"""
import re
import pandas as pd #数据分析
import numpy as np #科学计算

from sklearn.model_selection import train_test_split

from basic_tool import scaler_attr
from basic_tool import describe_factor
from basic_tool import cvBestScore

data = pd.read_csv('E:\machine_learing\DateSet\Eye.csv')
scaler_attr(data,'AF3')
scaler_attr(data,'F7')
scaler_attr(data,'F3')
scaler_attr(data,'FC5')
scaler_attr(data,'T7')
scaler_attr(data,'P7')
scaler_attr(data,'O1')
scaler_attr(data,'O2')
scaler_attr(data,'P8')
scaler_attr(data,'T8')
scaler_attr(data,'FC6')
scaler_attr(data,'F4')
scaler_attr(data,'F8')
scaler_attr(data,'AF4')

X = data.iloc[:, 15:29]
Esempio n. 2
0
@author: Vne
"""

import re
import pandas as pd #数据分析
import numpy as np #科学计算

from sklearn.model_selection import train_test_split

from basic_tool import scaler_attr
from basic_tool import describe_factor
from basic_tool import cvBestScore

data = pd.read_csv("E:\machine_learing\DateSet\magic04.csv")
scaler_attr(data,'fLength')
scaler_attr(data,'fWidth')
scaler_attr(data,'fAsym')
scaler_attr(data,'fM3Long')
scaler_attr(data,'fM3Trans')
scaler_attr(data,'fAlpha')
scaler_attr(data,'fDist')

X = data[[2,3,4,11,12,13,14,15,16,17]]
data['class'] = data['class'].apply(lambda x: 1 if x == 'g' else 0)
y = data['class']

##将样本划分为训练集和测试集
X_raw_train, X_test, y_raw_train, y_test = train_test_split(X, y, test_size=(len(data)-2000), random_state=1)

@author: Vne
"""

import re
import pandas as pd #数据分析
import numpy as np #科学计算

from sklearn.model_selection import train_test_split

from basic_tool import scaler_attr
from basic_tool import describe_factor
from basic_tool import cvBestScore

data = pd.read_csv("E:\machine_learing\DateSet\Credit_Card.csv")

scaler_attr(data,'LIMIT_BAL')
scaler_attr(data,'BILL_AMT1')
scaler_attr(data,'PAY_AMT1')
X = data[[25,2,3,4,5,6,7,8,9,10,26,27]]
y = data['default.payment.next.month']

##将样本划分为训练集和测试集
X_raw_train, X_test, y_raw_train, y_test = train_test_split(X, y, test_size=(len(data)-2000), random_state=1)


X_raw_train = X_raw_train.as_matrix()
y_raw_train = y_raw_train.as_matrix()
X_test = X_test.as_matrix()
y_test = y_test.as_matrix()

from binary_Classifier import RandomForest_Classifier
"""

import re
import pandas as pd #数据分析
import numpy as np #科学计算

from sklearn.model_selection import train_test_split

from basic_tool import scaler_attr
from basic_tool import describe_factor
from basic_tool import cvBestScore


data = pd.read_csv("E:\machine_learing\DateSet\covtype.csv")
data = data.iloc[0:30000,:]
scaler_attr(data,'Elevation')
scaler_attr(data,'Aspect')
scaler_attr(data,'Horizontal_Distance_To_Hydrology')
scaler_attr(data,'Vertical_Distance_To_Hydrology')
scaler_attr(data,'Horizontal_Distance_To_Roadways')
scaler_attr(data,'Hillshade_9am')
scaler_attr(data,'Hillshade_3pm')
scaler_attr(data,'Horizontal_Distance_To_Fire_Points')
#X = data[[2,55,56,57,58,59,60,61,62,10,14,24,34,44]]
X = data[[2,55,56,57,58,59,60,61,62,10,14,24,34,44]]
y = data['Cover_Type']

##将样本划分为训练集和测试集
X_raw_train, X_test, y_raw_train, y_test = train_test_split(X, y, test_size=(len(data)-2000), random_state=1)

    'Black': 1,
    'Asian-Pac-Islander': 2,
    'Amer-Indian-Eskimo': 3,
    'Other': 4
})
data['sex_num'] = np.where(data.sex == 'Female', 0, 1)
data['rel_num'] = data.relationship.map({
    'Not-in-family': 0,
    'Unmarried': 0,
    'Own-child': 0,
    'Other-relative': 0,
    'Husband': 1,
    'Wife': 1
})

scaler_attr(data, 'capital.gain')
scaler_attr(data, 'capital.loss')

X = data[[
    'workclass_num', 'education.num', 'marital_num', 'race_num', 'sex_num',
    'rel_num', 'capital.gain_scaled', 'capital.loss_scaled'
]]
y = data.over50K

##将样本划分为训练集和测试集
X_raw_train, X_test, y_raw_train, y_test = train_test_split(
    X, y, test_size=(len(data) - 4000), random_state=1)

X_raw_train = X_raw_train.as_matrix()
y_raw_train = y_raw_train.as_matrix()
X_test = X_test.as_matrix()
@author: Vne
"""

import re
import pandas as pd  #数据分析
import numpy as np  #科学计算

from sklearn.model_selection import train_test_split

from basic_tool import scaler_attr
from basic_tool import describe_factor
from basic_tool import cvBestScore

data = pd.read_csv('E:\machine_learing\DateSet\Occupancy.csv',
                   index_col='index')
scaler_attr(data, 'Light')
scaler_attr(data, 'CO2')

X = data[[1, 2, 5, 7, 8]]
y = data['Occupancy']

##将样本划分为训练集和测试集
X_raw_train, X_test, y_raw_train, y_test = train_test_split(
    X, y, test_size=(len(data) - 2000), random_state=1)

X_raw_train = X_raw_train.as_matrix()
y_raw_train = y_raw_train.as_matrix()
X_test = X_test.as_matrix()
y_test = y_test.as_matrix()

from binary_Classifier import RandomForest_Classifier
Esempio n. 7
0
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from basic_tool import scaler_attr
from basic_tool import describe_factor
from basic_tool import cvBestScore

data = pd.read_csv("E:\machine_learing\DateSet\Horse_Racing.csv",
                   index_col='UID',
                   encoding="ISO-8859-1")
cat_var = data.dtypes.loc[data.dtypes == 'object'].index
le = LabelEncoder()
for var in cat_var:
    data[var] = le.fit_transform(data[var])
scaler_attr(data, 'Date')
scaler_attr(data, 'Horse')

X = data[[1, 3, 5, 6, 7, 9, 10]]
y = data['TipsterActive']

##将样本划分为训练集和测试集
X_raw_train, X_test, y_raw_train, y_test = train_test_split(
    X, y, test_size=(len(data) - 2000), random_state=1)

X_raw_train = X_raw_train.as_matrix()
y_raw_train = y_raw_train.as_matrix()
X_test = X_test.as_matrix()
y_test = y_test.as_matrix()

from binary_Classifier import RandomForest_Classifier