Exemple #1
0
         'min_child_weight': 10,
         'min_data_in_leaf': 150,
         'reg_lambda': 0.5,  # L2 regularization term on weights.
         'reg_alpha': 0.5,  # L1 regularization term on weights.
         
         'colsample_bytree': 0.9,
         'subsample': 0.9,
#         'nthread': 32,
         'nthread': cpu_count(),
         'bagging_freq': 1,
         'verbose':-1,
         'seed': SEED
         }


loader805 = utils_best.Loader('CV805_LB803')
loader804 = utils_best.Loader('LB804')

# =============================================================================
# load
# =============================================================================
X_805 = loader805.train()
X_804 = loader804.train()

col = X_804.columns.difference(X_805.columns)
X = pd.concat([X_805, X_804[col]], axis=1)

y = utils.read_pickles('../data/label').TARGET


#col = [c for c in X.columns if new_feature in c]
    'min_child_weight': 10,
    'min_data_in_leaf': 150,
    'reg_lambda': 0.5,  # L2 regularization term on weights.
    'reg_alpha': 0.5,  # L1 regularization term on weights.
    'colsample_bytree': 0.9,
    'subsample': 0.9,
    #         'nthread': 32,
    'nthread': cpu_count(),
    'bagging_freq': 1,
    'verbose': -1,
    #         'seed': SEED
}

np.random.seed(SEED)

loader = utils_best.Loader('LB804')

# =============================================================================
# load
# =============================================================================
# train
X_train = loader.train()
y_train = utils.read_pickles('../data/label').TARGET

files_tr = utils.get_use_files(new_features, True)

X_ = pd.concat([pd.read_feather(f) for f in tqdm(files_tr, mininterval=60)],
               axis=1)
X_train = pd.concat([X_train, X_], axis=1)

if X_train.columns.duplicated().sum() > 0:
Exemple #3
0
    'num_leaves': 63,
    'max_bin': 255,
    'min_child_weight': 10,
    'min_data_in_leaf': 150,
    'reg_lambda': 0.5,  # L2 regularization term on weights.
    'reg_alpha': 0.5,  # L1 regularization term on weights.
    'colsample_bytree': 0.9,
    'subsample': 0.9,
    #         'nthread': 32,
    'nthread': cpu_count(),
    'bagging_freq': 1,
    'verbose': -1,
    'seed': SEED
}

loader = utils_best.Loader('CV805_LB803')

# =============================================================================
# load
# =============================================================================
X = loader.train()
y = utils.read_pickles('../data/label').TARGET

if X.columns.duplicated().sum() > 0:
    raise Exception(f'duplicated!: { X.columns[X.columns.duplicated()] }')
print('no dup :) ')
print(f'X.shape {X.shape}')

gc.collect()

CAT = list(set(X.columns) & set(loader.category()))