コード例 #1
0
raw['oof'] = raw.drop(columns=['card_id', 'target']).mean(axis=1)
train = raw[:len_train]
cv_score = mean_squared_error(train['oof'], train['target'])**0.5
print("CV score: {:<8.5f}".format(cv_score))
sub_folder = path.join(top_folder,
                       'average-CV-' + str(np.round(cv_score, 5)) + '_' + now)
makedirs(sub_folder, exist_ok=True)
raw[['card_id', 'oof']].to_csv(path.join(sub_folder, 'oof_average.csv'),
                               index=False)
del raw['oof']

# ========= stacking
train = raw[:len_train]
test = raw[len_train:]

train = uni_distribution(train, 'target')

x_train = train.drop(columns=['card_id', 'target'])
y_train = train['target']
x_test = test.drop(columns=['card_id', 'target'])

folds = KFold(n_splits=6, shuffle=False, random_state=None)
oof = np.zeros(len(train))
predictions = np.zeros(len(test))

for fold_, (trn_idx,
            val_idx) in enumerate(folds.split(x_train.values, y_train.values)):
    print("fold n°{}".format(fold_))
    trn_data, trn_y = x_train.iloc[trn_idx], y_train.iloc[trn_idx]
    val_data, val_y = x_train.iloc[val_idx], y_train.iloc[val_idx]
コード例 #2
0
train = raw[:len_train]
test = raw[len_train:]
del raw

# === remove imbalance feature
list_p_value = []
for i in tqdm(feats):
    list_p_value.append(ks_2samp(test[i], train[i])[1])
Se = pd.Series(list_p_value, index=feats).sort_values()
list_discarded = list(Se[Se < .1].index)
print(list_discarded)
for col in tqdm(list_discarded):
    feats.remove(col)

# === uniform distribution
train = uni_distribution(df=train, key='target')
y_train = train['target']

train = train[feats]
test = test[feats]
gc.collect()

model = ridge.Ridge(alpha=1)
folds = KFold(n_splits=6, shuffle=False, random_state=None)

col_all = [col for col in feats]
col_use = [
    'oof_0',
]
col_use.extend(feats_old)
col_use.extend(feats_ds2)