'verbose': -1,
    #         'seed': SEED
}

np.random.seed(SEED)

loader = utils_best.Loader('LB804')

# =============================================================================
# load
# =============================================================================
# train
X_train = loader.train()
y_train = utils.read_pickles('../data/label').TARGET

files_tr = utils.get_use_files(new_features, True)

X_ = pd.concat([pd.read_feather(f) for f in tqdm(files_tr, mininterval=60)],
               axis=1)
X_train = pd.concat([X_train, X_], axis=1)

if X_train.columns.duplicated().sum() > 0:
    raise Exception(
        f'duplicated!: { X_train.columns[X_train.columns.duplicated()] }')
print('no dup :) ')
print(f'X_train.shape {X_train.shape}')

gc.collect()

CAT = list(set(X_train.columns) & set(loader.category()))
    'bagging_freq': 1,
    'verbose': -1,
    'seed': SEED
}

use_files = [
    'train_f001',
    #             'train_f002_WEEKDAY_APPR_PROCESS_START-ORGANIZATION_TYPE',
    #             'train_f002_OCCUPATION_TYPE-ORGANIZATION_TYPE'
]

# =============================================================================
# load
# =============================================================================

files = utils.get_use_files(use_files, True)

X = pd.concat([pd.read_feather(f) for f in tqdm(files, mininterval=60)],
              axis=1)
y = utils.read_pickles('../data/label').TARGET

CAT = list(set(X.columns) & set(utils_cat.ALL))

if X.columns.duplicated().sum() > 0:
    raise Exception(f'duplicated!: { X.columns[X.columns.duplicated()] }')
print('no dup :) ')
print(f'X.shape {X.shape}')

gc.collect()

# =============================================================================
    # 'max_leaves': 0,
    'max_bin': 256,
    # 'predictor': 'cpu_predictor',
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    #          'seed': SEED
}

use_files = []
np.random.seed(SEED)

# =============================================================================
# load train, test
# =============================================================================

files = utils.get_use_files(use_files, True)

X_train = pd.concat([pd.read_feather(f) for f in tqdm(files, mininterval=60)],
                    axis=1)
y = utils.read_pickles('../data/label').TARGET

# maxwell
maxwell = pd.read_feather('../feature_someone/Maxwell_train.f')
X_train = pd.concat([X_train, maxwell], axis=1)
del maxwell
gc.collect()

if X_train.columns.duplicated().sum() > 0:
    raise Exception(
        f'duplicated!: { X_train.columns[X_train.columns.duplicated()] }')
print('no dup :) ')
Esempio n. 4
0
    'bagging_freq': 1,
    'verbose': -1,
    'seed': SEED
}

np.random.seed(SEED)
# =============================================================================
# load
# =============================================================================

prefixes = [
    'f001',
    'f002',
]

files = utils.get_use_files(prefixes, True)

X_train = pd.concat([pd.read_feather(f) for f in tqdm(files, mininterval=60)],
                    axis=1)
y_train = X_train[label_name]
X_train.drop(label_name, axis=1, inplace=True)

CAT = list(set(X_train.columns) & set(utils_cat.ALL))

if X_train.columns.duplicated().sum() > 0:
    raise Exception(
        f'duplicated!: { X_train.columns[X_train.columns.duplicated()] }')
print('no dup :) ')
print(f'X_train.shape {X_train.shape}')

gc.collect()
    'nthread': cpu_count(),
    'bagging_freq': 1,
    'verbose': -1,
    'seed': SEED
}

# =============================================================================
# load
# =============================================================================
X = pd.read_csv(
    '../data/Valid_stochastic_blending_v3-2_valid_0.308rk_0.444mw1_0.248t1_0.81050CV_0.3Adv.csv'
)
X.drop('SK_ID_CURR', axis=1)
y = utils.read_pickles('../data/label').TARGET

files = utils.get_use_files(new_features)
#for new_feature in new_features:
#    files += glob(f'../feature/train_{new_feature}*')

print('files:', len(files))

X_ = pd.concat([pd.read_feather(f) for f in tqdm(files, mininterval=60)],
               axis=1)
X = pd.concat([X, X_], axis=1)
del X_

if X.columns.duplicated().sum() > 0:
    raise Exception(f'duplicated!: { X.columns[X.columns.duplicated()] }')
print('no dup :) ')
print(f'X.shape {X.shape}')
         'nthread': cpu_count(),
         'bagging_freq': 1,
         'verbose':-1,
         'seed': SEED
         }


loader = utils_best.Loader('LB804')

# =============================================================================
# load
# =============================================================================
X_old = loader.train()
y = utils.read_pickles('../data/label').TARGET

files_tr = utils.get_use_files(new_features, True)


X_ = pd.concat([pd.read_feather(f) for f in tqdm(files_tr, mininterval=60)
                ], axis=1)
X_new = pd.concat([X_old, X_], axis=1).drop(['f001_EXT_SOURCE_1', 'f001_EXT_SOURCE_2','f001_EXT_SOURCE_3'], axis=1)
del X_

if X_new.columns.duplicated().sum()>0:
    raise Exception(f'duplicated!: { X_new.columns[X_new.columns.duplicated()] }')
print('no dup :) ')
print(f'X_new.shape {X_new.shape}')

gc.collect()

CAT = list( set(X_new.columns) & set(loader.category()) )
@author: Kazuki
"""

import numpy as np
import pandas as pd
from tqdm import tqdm
import gc, os
from multiprocessing import Pool, cpu_count
NTHREAD = cpu_count()
#import utils_agg
import utils
#utils.start(__file__)
#==============================================================================

files = utils.get_use_files([], True)
X = pd.concat([pd.read_feather(f) for f in tqdm(files, mininterval=60)],
              axis=1)

# =============================================================================
# var0
# =============================================================================
col_var0 = utils.check_var(X)


def multi_touch_var0(arg):
    os.system(f'touch "../feature_var0/{arg}.f"')


pool = Pool(cpu_count())
pool.map(multi_touch_var0, col_var0)