def main(): # Read input data train_df = pd.read_csv('input/train.csv') test_df = pd.read_csv('input/test.csv') # This prints out (rows, columns) in each dataframe print('original train shape: %s' % str(train_df.shape)) print('original test shape: %s' % str(test_df.shape)) train_df, test_df = fillna(train_df, test_df) train_df, test_df = count_encoding(train_df, test_df, replace=False) train_df, test_df = target_encoding(train_df, test_df, replace=False) train_df, test_df = high_diff_corr_pca(train_df, test_df, n_features=5) train_df, test_df = sum_of_na(train_df, test_df) train_df, test_df = drop_calc(train_df, test_df) # This prints out (rows, columns) in each dataframe print('Train shape: %s' % str(train_df.shape)) print('Test shape: %s' % str(test_df.shape)) params = { 'lambda': 1e-5, 'factor': 6, 'iteration': 100, 'patience': 10, 'eta': 0.2, 'nr_threads': 1, 'seed': 41 } train_predict(train_df, test_df, params)
def main(): # Read input data train_df = pd.read_csv('input/train.csv') test_df = pd.read_csv('input/test.csv') # This prints out (rows, columns) in each dataframe print('original train shape: %s' % str(train_df.shape)) print('original test shape: %s' % str(test_df.shape)) # Fill NA train_df, test_df = fillna(train_df, test_df) # Count encoding train_df, test_df = count_encoding(train_df, test_df, replace=False) # Target encoding train_df, test_df = target_encoding(train_df, test_df, replace=False) # Dummy encoding train_df, test_df = dummy_encoding(train_df, test_df) # high_diff_corr_pca train_df, test_df = high_diff_corr_pca(train_df, test_df, n_features=5) # Sum of NA train_df, test_df = sum_of_na(train_df, test_df) # drop calc train_df, test_df = drop_calc(train_df, test_df) # This prints out (rows, columns) in each dataframe print('Train shape: %s' % str(train_df.shape)) print('Test shape: %s' % str(test_df.shape)) params = { 'max_leaf': 1000, 'algorithm': 'RGF', 'loss': 'Log', 'l2': 0.01, 'sl2': 0.01, 'normalize': False, 'min_samples_leaf': 10, 'n_iter': None, 'opt_interval': 100, 'learning_rate': 0.5, 'calc_prob': 'sigmoid', 'n_jobs': -1, 'memory_policy': 'generous', 'verbose': True } train_predict(train_df, test_df, params)
def main(): # Read input data train_df = pd.read_csv('input/train.csv') test_df = pd.read_csv('input/test.csv') # This prints out (rows, columns) in each dataframe print('original train shape: %s' % str(train_df.shape)) print('original test shape: %s' % str(test_df.shape)) train_df, test_df = fillna(train_df, test_df) train_df, test_df = count_encoding(train_df, test_df, replace=False) train_df, test_df = target_encoding(train_df, test_df, replace=False) train_df, test_df = dummy_encoding(train_df, test_df) train_df, test_df = high_diff_corr_pca(train_df, test_df, n_features=5) train_df, test_df = sum_of_na(train_df, test_df) train_df, test_df = drop_calc(train_df, test_df) # This prints out (rows, columns) in each dataframe print('Train shape: %s' % str(train_df.shape)) print('Test shape: %s' % str(test_df.shape)) params = { 'bagging_temperature': 1, 'name': 'experiment', 'random_strength': 1, 'has_time': False, 'store_all_simple_ctr': False, 'verbose': True, 'use_best_model': True, 'random_seed': 41, 'thread_count': 2, 'od_type': 'IncToDec', 'od_wait': 20, 'od_pval': 0.01, 'feature_border_type': 'MinEntropy', 'loss_function': 'Logloss', 'rsm': 1, 'l2_leaf_reg': 23, 'depth': 6, 'learning_rate': 0.057, 'iterations': 10000, 'leaf_estimation_method': 'Newton' } train_predict(train_df, test_df, params)
def main(): # Read input data train_df = pd.read_csv('input/train.csv') test_df = pd.read_csv('input/test.csv') # This prints out (rows, columns) in each dataframe print('original train shape: %s' % str(train_df.shape)) print('original test shape: %s' % str(test_df.shape)) train_df, test_df = fillna(train_df, test_df) train_df, test_df = count_encoding(train_df, test_df, replace=False) train_df, test_df = target_encoding(train_df, test_df, replace=False) train_df, test_df = dummy_encoding(train_df, test_df) train_df, test_df = high_diff_corr_pca(train_df, test_df, n_features=5) train_df, test_df = sum_of_na(train_df, test_df) train_df, test_df = drop_calc(train_df, test_df) l1_models = ['l1_rgf.csv.gz', 'l1_ffm.csv.gz', 'l1_catb.csv.gz'] # This prints out (rows, columns) in each dataframe print('Train shape: %s' % str(train_df.shape)) print('Test shape: %s' % str(test_df.shape)) params = { 'application': 'binary', 'num_threads': 2, 'boosting': 'gbdt', 'max_bin': 16, 'learning_rate': 0.025, 'num_leaves': 52, 'feature_fraction': 0.45, 'bagging_fraction': 0.75, 'bagging_freq': 16, 'min_data_in_leaf': 740, 'min_child_weight': 2.0 } train_predict(train_df, test_df, l1_models, params)
def main(): # Read input data train_df = pd.read_csv('input/train.csv') test_df = pd.read_csv('input/test.csv') # This prints out (rows, columns) in each dataframe print('original train shape: %s' % str(train_df.shape)) print('original test shape: %s' % str(test_df.shape)) train_df, test_df = fillna(train_df, test_df) train_df, test_df = count_encoding(train_df, test_df, replace=False) train_df, test_df = target_encoding(train_df, test_df, replace=False) train_df, test_df = dummy_encoding(train_df, test_df) train_df, test_df = high_diff_corr_pca(train_df, test_df, n_features=5) train_df, test_df = sum_of_na(train_df, test_df) train_df, test_df = drop_calc(train_df, test_df) # This prints out (rows, columns) in each dataframe print('Train shape: %s' % str(train_df.shape)) print('Test shape: %s' % str(test_df.shape)) params = { 'booster': 'gbtree', 'objective': 'binary:logistic', 'eval_metric': 'auc', 'seed': 41, 'nthread': 32, 'silent': True, 'eta': 0.025, 'max_depth': 5, 'min_child_weight': 9.15, 'gamma': 0.59, 'subsample': 0.8, 'colsample_bytree': 0.8 } train_predict(train_df, test_df, params)