Esempio n. 1
0
    
    print('FINAL SHAPE')
    print('dbuild.shape:{}  dvalid.shape:{}\n'.format((dbuild.num_row(), dbuild.num_col()),
                                                      (dvalid.num_row(), dvalid.num_col())))

    return dbuild, dvalid, watchlist, sub_build, sub_valid

#==============================================================================
# cv
#==============================================================================
models = []
for i in range(LOOP):
    dbuild, dvalid, watchlist, sub_build, sub_valid = split_build_valid()
    model = xgb.train(param, dbuild, nround, watchlist,
                      early_stopping_rounds=ESR, verbose_eval=5)
    models.append(model)
    del dbuild, dvalid, watchlist, sub_build, sub_valid
    gc.collect()

imp = ex.getImp(models)
imp.to_csv('imp_None.csv', index=0)




#==============================================================================
utils.end(__file__)



Esempio n. 2
0
         'eval_metric': 'auc',
         'max_depth': 4,
         'objective': 'binary:logistic',
         'silent': 1,
         'tree_method': 'hist',
         'nthread': 64,
         'seed': SEED}


gc.collect()


model = xgb.train(param, dtrain, NROUND)
del dtrain; gc.collect()

imp = ex.getImp(model)
imp.to_csv('imp.csv', index=False)

# =============================================================================
# test
# =============================================================================

X = pd.concat([utils.read_pickles('../data/test_old'),
               utils.read_pickles('../data/002_test'),
               utils.read_pickles('../data/003_test'),
               utils.read_pickles('../data/004_test'),
               utils.read_pickles('../data/005_test'),
               utils.read_pickles('../data/101_test'),
#               utils.read_pickles('../data/102_test'),
#               utils.read_pickles('../data/103-1_test'),
#               utils.read_pickles('../data/103-2_test'),
Esempio n. 3
0
model = xgb.train(params, dtrain, 9999, watchlist, verbose_eval=10,
                  early_stopping_rounds=50)



result = f"CV valid-auc: { model.best_score }"
print(result)

utils.send_line(result)


# =============================================================================
# imp
# =============================================================================
imp = ex.getImp(model).sort_values(['gain', 'feature'], ascending=[False, True])


imp.to_csv(f'LOG/imp_{__file__}.csv', index=False)


#def multi_touch(arg):
#    os.system(f'touch "../feature_unused/{arg}.f"')
#
#
#col = imp[imp['split']==0]['feature'].tolist()
#pool = Pool(cpu_count())
#pool.map(multi_touch, col)
#pool.close()