algo = sys.argv[2] if algo == 'lr': lr_params = { "feature_size": feature_size, "field_size": field_size, "epoch": 10, "batch_size": 1024, "learning_rate": 0.001, "optimizer_type": "adam", "l2_reg": 0.01, "verbose": True } lr = LR(**lr_params) lr.fit(Xi_train, Xv_train, y_train, Xi_valid, Xv_valid, y_valid) elif algo == 'fm': fm_params = { "feature_size": feature_size, "field_size": field_size, "embedding_size": 15, "epoch": 20, "batch_size": 1024, "learning_rate": 0.001, "optimizer_type": "adam", "l2_w_reg": 0.01, "l2_v_reg": 0.01, "verbose": True } fm = FM(**fm_params) fm.fit(Xi_train, Xv_train, y_train, Xi_valid, Xv_valid, y_valid)
y_valid = X_valid['isFraud'].values.reshape((-1, 1)) model = LR(features_sizes, loss_type='binary', metric_type='auc') #model=FM(features_sizes,k=8,loss_type='binary',metric_type='auc') #model=MLP(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(32,32)) #model=BiFM(features_sizes,k=8,loss_type='binary',metric_type='auc') #model=DeepFM(features_sizes,k=8,loss_type='binary',metric_type='auc',deep_layers=(32,32)) #model=AFM(features_sizes,loss_type='binary',metric_type='auc',attention_FM=8) #model=CFM(features_sizes,loss_type='binary',metric_type='auc') #model=MLR(features_sizes,loss_type='binary',metric_type='auc',MLR_m=16) #model=MFM(features_sizes,k=8,loss_type='binary',metric_type='auc',MFM_m=2) best_score = model.fit(X_train[cate_features], X_valid[cate_features], y_train, y_valid, lr=0.0005, N_EPOCH=50, batch_size=500, early_stopping_rounds=3) #0.0005->0.001(1e-3 bs=1000) SUBMIT = False if SUBMIT: y_pred = model.predict(test[cate_features]) y_pred = 1. / (1. + np.exp(-1. * y_pred)) sample_submission['isFraud'] = y_pred #sample_submission.to_csv(data_path+'sub/sub01_LR_F49_timeSF_0.8154.csv',index=False) #sample_submission.to_csv(data_path+'sub/sub05_MLR_m=15_nosig_F49_timeSF_0.8154.csv',index=False) #LR:0.8774 KG:0.8261 #TIMESF # LGB:0.8442@90 KG=0.8549
model = LR( features_sizes, hash_size=int(1e6) ) #valid score 1e5:0.82 3e5:0.79 6e5:0.773 1e6:0.766 | proto test score:0.852 #model=FM(features_sizes,k=256)#0.474 hash-> #model = FM(features_sizes, k=24,hash_size=int(1e6)) #protoscore:k=24+h1e6=0.692 0.693 (valid比test好 0.631) #model=MLP(features_sizes,deep_layers=(256,256),k=256) #小batch=1024 LR不用小.同1e-3 valid_score=model.fit(train[features],valid[features],y_train,y_valid,lr=0.001,N_EPOCH=100,batch_size=1024,early_stopping_rounds=15) #model = DeepFM(features_sizes, deep_layers=(256, 256), k=256) #model = NFM(features_sizes, k=256) #model = AFM(features_sizes,k=256,attention_FM=256) #model = AFM(features_sizes, k=256, attention_FM=8,dropout_keeprate=0.9,lambda_l2=0.001) #model = MLP(features_sizes, deep_layers=(1,), k=256) #model=AutoInt(features_sizes,k=8) valid_score = model.fit(train[features], valid[features], y_train, y_valid, lr=0.001, N_EPOCH=100, batch_size=4096, early_stopping_rounds=15) y_pred = model.predict(test[features]).reshape((-1)) predictions_bounded = np.maximum(y_pred, np.ones(len(y_pred)) * -1) # bound the lower values predictions_bounded = np.minimum(predictions_bounded, np.ones(len(y_pred)) * 1) # bound the higher values test_loss = np.sqrt( np.mean( np.square( y_test.reshape(predictions_bounded.shape) - predictions_bounded)))
data.to_hdf(data_dir+'train.hdf', 'w',complib='blosc', complevel=5) ''' data = pd.read_hdf(data_dir + 'train.hdf').sample(frac=1.0, random_state=42) features_sizes = [data[c].nunique() for c in features] #data=data.sample(frac=0.1,random_state=42) print("Data Prepared.") from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(data[features], data['click'], test_size=0.2, random_state=42) y_train = y_train.values.reshape((-1, 1)) y_test = y_test.values.reshape((-1, 1)) model = LR(features_sizes, loss_type='binary') #bs=1000 #model=FM(features_sizes,k=8)#bs=500 # model=MLP(features_sizes,deep_layers=(16,16),k=16) print(model) best_score = model.fit(X_train, X_test, y_train, y_test, lr=0.001, N_EPOCH=50, batch_size=500, early_stopping_rounds=1) #0.0005->0.001(1e-3 bs=1000) #best_score = model.fit(X_train, X_test, y_train, y_test, lr=0.0002, N_EPOCH=50, batch_size=500,early_stopping_rounds=3)#0.0005->0.001(1e-3 bs=1000)