# Params lgb_params = { 'objective': 'binary', 'boosting_type': 'gbdt', 'metric': METRIC, 'num_threads': N_THREADS, 'verbose': VERBOSE, 'seed': SEED, 'n_estimators': N_ESTIMATORS, 'early_stopping_rounds': EARLY_STOPPING_ROUNDS } logger = utility.get_logger(LOGGER_NAME, MODEL_NUMBER, run_id, LOG_DIR) utility.set_seed(SEED) logger.info(f'Running for Model Number {MODEL_NUMBER}') utility.update_tracking(run_id, "model_number", MODEL_NUMBER, drop_incomplete_rows=True) utility.update_tracking(run_id, "model_type", MODEL_TYPE) utility.update_tracking(run_id, "is_test", IS_TEST) utility.update_tracking(run_id, "n_estimators", N_ESTIMATORS) utility.update_tracking(run_id, "early_stopping_rounds", EARLY_STOPPING_ROUNDS) utility.update_tracking(run_id, "random_state", SEED) utility.update_tracking(run_id, "n_threads", N_THREADS) #utility.update_tracking(run_id, "learning_rate", LEARNING_RATE) utility.update_tracking(run_id, "n_fold", N_FOLDS)
import feather import sys import pandas as pd import numpy as np sys.path.insert(0, "/home/jupyter/kaggle/energy/src") import utility utility.set_seed(42) # Read weather_train and weather_test data _, _, weather_train_df, weather_test_df, building_df = utility.read_data(utility.CREATED_DATA_DIR, train=False, test=False, weather_train=True, weather_test=True, building=True) print(f'Shape of weather_train_df : {weather_train_df.shape}') print(f'Shape of weather_test_df : {weather_test_df.shape}') # columns_name = ['air_temperature', 'cloud_coverage', 'dew_temperature', # 'precip_depth_1_hr', 'sea_level_pressure', 'wind_direction', # 'wind_speed'] # cloud_coverage, precip_depth_1_hr, sea_level_pressure, wind_direction # are failing now for some reason. Hence, filling only for three weather # attributes columns_name = ['air_temperature', 'dew_temperature', 'wind_speed'] print('Null distribution before filling') print(weather_train_df[columns_name].isna().sum())
import utility import data import model import loss from option import args from checkpoint import Checkpoint from trainer import Trainer utility.set_seed(args.seed) # 设置随机种子,方便结果复现 checkpoint = Checkpoint(args) if checkpoint.ok: loader = data.Data(args) model = model.Model(args, checkpoint) loss = loss.Loss(args, checkpoint) if not args.test_only else None t = Trainer(args, loader, model, loss, checkpoint) while not t.terminate(): t.train() t.test() checkpoint.done()
import utility import data import model import loss from option import args from checkpoint import Checkpoint from trainer import Trainer utility.set_seed(args.seed) checkpoint = Checkpoint(args) if checkpoint.ok: loader = data.Data(args) model = model.Model(args, checkpoint) loss = loss.Loss(args, checkpoint) if not args.test_only else None t = Trainer(args, loader, model, loss, checkpoint) while not t.terminate(): t.train() t.test() checkpoint.done()