X = artgor_utils.reduce_mem_usage(X) X_test = artgor_utils.reduce_mem_usage(X_test) gc.collect() if debug: print('saving debug data...') X.to_csv("../data/debug_data/debug_acsf.csv", index=False) print("training models...") result_dict_lgb = artgor_utils.train_model_regression( X=X, X_test=X_test, y=y, params=params, folds=folds, model_type='lgb', eval_metric='group_mae', plot_feature_importance=True, verbose=100, early_stopping_rounds=1000, n_estimators=n_estimators, res_filename=result_filename) if not debug: print("saving results...") np.save(result_filename, result_dict_lgb) print("making submission...") sub['scalar_coupling_constant'] = result_dict_lgb['prediction'] sub.to_csv(sub_filename, index=False)
index_type_test = (test['type'] == type) X_t = train.loc[index_type].copy() X_test_t = test.loc[index_type_test].copy() y_t = y[index_type] print('X_test_t.shape ', X_test_t.shape) print('X_t.shape ', X_t.shape) print('Training...') result_dict_lgb = artgor_utils.train_model_regression( X=X_t, y=y_t, X_test=X_test_t, params=params, columns=best_features, folds=folds, n_folds=n_folds, model_type='lgb', eval_metric='group_mae', verbose=50, early_stopping_rounds=1000, n_estimators=n_estimators, res_filename=result_filename) del X_t, X_test_t gc.collect() if not debug: np.save(str(result_filename), result_dict_lgb) X_short.loc[X_short['type'] == type, 'oof'] = result_dict_lgb['oof'] X_short_test.loc[X_short_test['type'] == type,
if oof_colomn == 'scalar_coupling_constant': y = coupling_constant else: y = folder_data[oof_colomn] folder_data[categorical_cols] = folder_data[ categorical_cols].astype('category') print('test_shape', test[features].shape) print('train shape', folder_data[features].shape) result_dict = artgor_utils.train_model_regression( X=folder_data, X_test=test, y=y, params=params, folds=folds, columns=features, model_type='lgb', eval_metric='group_mae', verbose=50, early_stopping_rounds=20, n_estimators=n_estimators, res_filename=result_filename, ) print("saving results...") if result_filename: np.save(str(result_filename), result_dict)
type_test = type_dist_test gc.collect() params = types_config[type] result_filename = result_filename_prefix / f'distance_kernel_{type}.npy' type_y = full_train[train_mask]['scalar_coupling_constant'] print('X_test_t.shape ', type_test.shape) print('X_t.shape ', type_train.shape) print('Training...') result_dict_lgb = artgor_utils.train_model_regression( X=type_train, y=type_y, X_test=type_test, params=params, columns=best_features, folds=folds, n_folds=n_folds, model_type='lgb', eval_metric='mae', verbose=50, early_stopping_rounds=1000, res_filename=result_filename ) del type_train, type_test gc.collect() if not debug: np.save(str(result_filename), result_dict_lgb) X_short.loc[X_short['type'] == type, 'oof'] = result_dict_lgb['oof'] X_short_test.loc[X_short_test['type'] == type, 'prediction'] = result_dict_lgb['prediction']