Python preProc Examples

Programming Language: Python

Namespace/Package Name: utils.dataprocess

Method/Function: preProc

Examples at hotexamples.com: 3

Python preProc - 3 examples found. These are the top rated real world Python examples of utils.dataprocess.preProc extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: carInsurancePredXgboost.py Project: HandH1998/carInsurancePred

import xgboost as xgb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import joblib
from utils.dataprocess import preProc, preProcTest, toJson
from sklearn.model_selection import train_test_split, GridSearchCV

# xgb原生接口

dataTrain = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_train.csv')
xTrain, yTrain, scaler = preProc(dataTrain, toNumpy=True)
# weight=反例数量/正例数量
weight = (yTrain.shape[0] - sum(yTrain)) * 1.0 / sum(yTrain)
xTrain, xValidation, yTrain, yValidation = train_test_split(xTrain, yTrain, test_size=0.2)

dtrain = xgb.DMatrix(xTrain, label=yTrain)
dvalidation = xgb.DMatrix(xValidation, label=yValidation)
params = {
    # xgboost宏观特征参数
    'booster': 'gbtree',
    'nthread': 5,  # 线程数
    'silent': 0,  # 为1时，静默开启

    # booster参数
    'eta': 0.1,  # learning rate 通过减少每一步的权重，提高鲁棒性
    'gamma': 0.1,  # 节点分裂所需要的最小损失函数下降值
    'max_depth': 9,  # 最大树高，限制过拟合
    'lambda': 2,  # 权重的L2正则项
    'alpha': 1,  # 权重的L1正则项

Example #2

Show file

File: carInsurancePred.py Project: HandH1998/carInsurancePred

        '''
        for i in range(self.layer_num):
            x = self.relu[i](self.bns[i](self.hiddens[i](x)))
        x = self.predict(x)
        return x


# 读入数据
dataTrain = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_train.csv')
weight_negative = sum(dataTrain['Response']) * 1.0 / dataTrain.shape[0]
# 由于类别不平衡，采用权重解决，分别设置负类和正类的权重
weights = [weight_negative,
           1 - weight_negative]  # [0.12293666666666667, 0.8770633333333333]
# pytorch要求权重输入为tensor
weights = torch.from_numpy(np.array(weights)).type(torch.FloatTensor)
xTrain, yTrain, scaler = preProc(dataTrain, toTensor=True)
torch.save(xTrain, 'xTrain.pt')
torch.save(yTrain, 'yTrain.pt')
xTrain, xValidation, yTrain, yValidation = train_test_split(xTrain,
                                                            yTrain,
                                                            test_size=0.2,
                                                            random_state=1)

torch_dataset = Data.TensorDataset(xTrain, yTrain)
loader = Data.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)
net = Net(14, 15, 2, 3)
print(net)

Example #3

Show file

from sklearn.svm import SVC
import numpy as np
import pandas as pd
import joblib
import time
from utils.dataprocess import preProc, preProcTest, toJson
import sklearn.metrics as metrics

t1 = time.time()
# 训练数据处理
dataTrain = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_train.csv')
xTrain, yTrain, scaler = preProc(dataTrain)

# 建立模型
max_iter = 100000000
# model里加上class_weight='balanced',等价于正负例分别乘以权重sum(负例)、sum(正例)
# fit里有参数sample_weight,为每个sample赋上权重，是长度等于sample数量的array
# 这两个作用相同，只使用一个
model = SVC(C=1.0, kernel='rbf', gamma='auto', tol=0.2, cache_size=1024, class_weight='balanced', max_iter=max_iter)
model.fit(xTrain, yTrain, sample_weight=None)
score = model.score(xTrain, yTrain)
print('Score:', score)
pred_y = model.predict(xTrain)
fscore = metrics.f1_score(yTrain, pred_y)
print('Fvalue:', fscore)
equal1count = sum(pred_y == 1)
print('预测结果为1的数量:', equal1count)
print(pred_y)
joblib.dump(model, 'carInsurancePredSVM.model')
print('迭代次数', max_iter, '耗时:', time.time() - t1)