def handler(context): print( f'start training with parameters : {Parameters.as_dict()}, context : {context}' ) X_train, y_train, cols_train = train_data_loader(DATALAKE_CHANNEL_ID, DATALAKE_TRAIN_FILE_ID, LABEL_FIELD, INPUT_FIELDS) dtrain = lgb.Dataset(X_train, y_train) if DATALAKE_VAL_FILE_ID: X_val, y_val, _ = train_data_loader(DATALAKE_CHANNEL_ID, DATALAKE_VAL_FILE_ID, LABEL_FIELD, INPUT_FIELDS) else: X_val, y_val = None, None extraction_cb = ModelExtractionCallback() tensorboard_cb = TensorBoardCallback(statistics, writer) tensorboard_cb.set_valid(X_val, y_val, Parameters.IS_CLASSIFICATION, IS_MULTI, Parameters.NUM_CLASS) callbacks = [ extraction_cb, tensorboard_cb, ] lgb.cv(PARAMS, dtrain, nfold=Parameters.NFOLD, early_stopping_rounds=Parameters.EARLY_STOPPING_ROUNDS, verbose_eval=Parameters.VERBOSE_EVAL, stratified=STRATIFIED, callbacks=callbacks, metrics=Parameters.METRIC, seed=Parameters.SEED) models = extraction_cb.raw_boosters for i, model in enumerate(models): model.save_model( os.path.join(ABEJA_TRAINING_RESULT_DIR, f'model_{i}.txt')) di = {**(Parameters.as_dict()), 'cols_train': cols_train} lgb_env = open(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'lgb_env.json'), 'w') json.dump(di, lgb_env) lgb_env.close() writer.close()
dtrain, nfold=Parameters.NFOLD, early_stopping_rounds=Parameters.EARLY_STOPPING_ROUNDS, verbose_eval=Parameters.VERBOSE_EVAL, stratified=STRATIFIED, callbacks=callbacks, metrics=Parameters.METRIC, seed=Parameters.SEED) # In[10]: models = extraction_cb.raw_boosters for i, model in enumerate(models): model.save_model(os.path.join(ABEJA_TRAINING_RESULT_DIR, f'model_{i}.txt')) di = {**(Parameters.as_dict()), 'cols_train': cols_train} lgb_env = open(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'lgb_env.json'), 'w') json.dump(di, lgb_env) lgb_env.close() writer.close() # In[ ]: def handler(context): print("finish.") # In[ ]: parameters = Parameters.as_env()
def handler(context): print( f'start training with parameters : {Parameters.as_dict()}, context : {context}' ) X_train, y_train, cols_train = train_data_loader(DATALAKE_CHANNEL_ID, DATALAKE_TRAIN_FILE_ID, LABEL_FIELD, INPUT_FIELDS) models = [] pred = np.zeros(len(X_train)) if DATALAKE_VAL_FILE_ID: X_val, y_val, _ = train_data_loader(DATALAKE_CHANNEL_ID, DATALAKE_VAL_FILE_ID, LABEL_FIELD, INPUT_FIELDS) if IS_MULTI: pred_val = np.zeros((len(X_val), NUM_CLASS)) else: pred_val = np.zeros(len(X_val)) else: X_val, y_val, pred_val = None, None, None for i, (train_index, valid_index) in enumerate(skf.split(X_train, y_train)): model = classifier(**PARAMS) model.fit(X_train.iloc[train_index], y_train[train_index]) pred[valid_index] = model.predict(X_train.iloc[valid_index]) score, loss = evaluator(y_train[valid_index], pred[valid_index]) score_val = 0.0 loss_val = 0.0 filename = os.path.join(ABEJA_TRAINING_RESULT_DIR, f'model_{i}.pkl') pickle.dump(model, open(filename, 'wb')) models.append(model) if DATALAKE_VAL_FILE_ID: pred_val_cv = model.predict(X_val) if IS_MULTI: pred_val += np.identity(NUM_CLASS)[pred_val_cv] else: pred_val += pred_val_cv score_val, loss_val = evaluator(y_val, pred_val_cv) print('-------------') print( 'cv {} || score:{:.4f} || loss:{:.4f} || val_score:{:.4f} || val_loss:{:.4f}' .format(i + 1, score, loss, score_val, loss_val)) writer.add_scalar('main/acc', score, i + 1) writer.add_scalar('main/loss', loss, i + 1) writer.add_scalar('test/acc', score_val, i + 1) writer.add_scalar('test/loss', loss_val, i + 1) statistics(i + 1, loss, score, loss_val, score_val) writer.flush() score, loss = evaluator(y_train, pred) score_val = 0.0 loss_val = 0.0 if DATALAKE_VAL_FILE_ID: if IS_MULTI: pred_val = np.argmax(pred_val, axis=1) else: pred_val /= len(models) score_val, loss_val = evaluator(y_val, pred_val) print('-------------') print( 'cv total score:{:.4f} || cv total loss:{:.4f} || cv total val_score:{:.4f} || cv total val_loss:{:.4f}' .format(score, loss, score_val, loss_val)) statistics(Parameters.NFOLD, None, score, None, score_val) writer.add_scalar('main/acc', score, Parameters.NFOLD) writer.add_scalar('main/loss', loss, Parameters.NFOLD) writer.add_scalar('test/acc', score_val, Parameters.NFOLD) writer.add_scalar('test/loss', loss_val, Parameters.NFOLD) writer.close() di = {**(Parameters.as_dict()), 'cols_train': cols_train} skf_env = open(os.path.join(ABEJA_TRAINING_RESULT_DIR, 'skf_env.json'), 'w') json.dump(di, skf_env) skf_env.close() return