#对特征进行归一化 from sklearn.preprocessing import Normalizer X=Normalizer().fit_transform(X.astype(float)) # #对特征进行对数转化 from numpy import log1p from sklearn.preprocessing import FunctionTransformer X=FunctionTransformer(log1p).fit_transform(X) #对特征进行标准化 from sklearn.preprocessing import StandardScaler StandardScaler().fit_transform(X) #对特征进行区间缩放法 from sklearn.preprocessing import MinMaxScaler MinMaxScaler().fit_transform(X) #对特征进行定量二值化 from sklearn.preprocessing import Binarizer Binarizer(threshold=3).fit_transform(iris.data) #K折交叉验证 from sklearn.model_selection import cross_val_score score = cross_val_score(model, X,y, cv=5) print("score=",score)
ct_pred = clf.predict(Normalizer().fit_transform(test_ct)) res = pd.DataFrame({'ID': kid, 'Expected': ct_pred}) res.to_csv('outcome.csv', index=False) # ===============LGBM count (more tuning needed)======================================================== star = star.astype(int) - 1 xtrain_ct, xvalid_ct, ytrain, yvalid = train_test_split(train_ct, star, stratify=star, random_state=960724, test_size=0.2, shuffle=True) lg_train = lgb.Dataset(xtrain_ct.astype(np.float64), label=ytrain) lg_val = lg_train.create_valid(xvalid_ct.astype(np.float64), label=yvalid) param = { 'objective': 'multiclass', 'task': 'train', 'num_threads': 4, 'seed': 960724, 'early_stopping_round': 50, 'verbosity': 1, 'num_class': 5, 'learning_rate': 0.1, 'num_leaves': 31, 'max_depth': -1, 'feature_fraction': 0.8, 'bagging_fraction': 0.8