Example #1
0
def concat_pred_item(T, dryrun=False):
    if T==-1:
        name = 'test'
    else:
        name = 'trainT-'+str(T)
    
    df = utils.load_pred_item(name)
    
    df = pd.merge(df, pd.read_pickle('../feature/{}/f317_user-product.p'.format(name)), 
                  on=['user_id', 'product_id'],how='left')
    
    gc.collect()
    
    #==============================================================================
    print('output')
    #==============================================================================
    if dryrun == True:
        return df
    else:
        utils.to_pickles(df, '../feature/{}/all_apdx'.format(name), 20, inplace=True)
Example #2
0
         'silent':1, 
         'nthread':27,
#         'scale_pos_weight':y_build.mean(),
#         'eval_metric':'auc',
         'eval_metric':'logloss',
         'objective':'binary:logistic',
         'tree_method':'hist'}

print("""#==== print param ======""")
print('seed:', seed)


#==============================================================================
# prepare
#==============================================================================
train = pd.concat([utils.load_pred_item('trainT-0', True).sample(frac=.1),
                   utils.load_pred_item('trainT-1', True).sample(frac=.1)
                   ], ignore_index=True, join='inner')
#train = utils.load_pred_item('trainT-0')

sub_train = train[['order_id', 'product_id', 'y']]
y_train = train['y']
X_train = train.drop('y', axis=1)
del train
gc.collect()

# drop id
col = [c for c in X_train.columns if '_id' in c] + ['is_train']
col.remove('user_id')
print('drop1',col)
X_train.drop(col, axis=1, inplace=True) # keep user_id
Example #3
0
    'silent': 1,
    'nthread': 27,
    'eval_metric': 'logloss',
    'objective': 'binary:logistic',
    'tree_method': 'hist'
}

print("""#==== print param ======""")
print('DATE:', DATE)
print('seed:', seed)

#==============================================================================
# prepare
#==============================================================================
train = pd.concat([
    utils.load_pred_item('trainT-0'),
    utils.load_pred_item('trainT-1'),
    utils.load_pred_item('trainT-2')
],
                  ignore_index=True)

y_train = train['y']
X_train = train.drop('y', axis=1)
del train
gc.collect()

# drop id
col = [c for c in X_train.columns if '_id' in c] + ['is_train']
col.remove('user_id')
print('drop1', col)
X_train.drop(col, axis=1, inplace=True)  # keep user_id
Example #4
0
    'nthread': 27,
    #         'scale_pos_weight':y_build.mean(),
    #         'eval_metric':'auc',
    'eval_metric': 'logloss',
    'objective': 'binary:logistic',
    'tree_method': 'hist'
}

print("""#==== print param ======""")
print('seed:', seed)

#==============================================================================
# prepare
#==============================================================================
train = pd.concat([
    utils.load_pred_item('trainT-0', True).sample(frac=.1),
    utils.load_pred_item('trainT-1', True).sample(frac=.1)
],
                  ignore_index=True,
                  join='inner')
#train = utils.load_pred_item('trainT-0')

sub_train = train[['order_id', 'product_id', 'y']]
y_train = train['y']
X_train = train.drop('y', axis=1)
del train
gc.collect()

# drop id
col = [c for c in X_train.columns if '_id' in c] + ['is_train']
col.remove('user_id')
         'subsample':0.75,
         'silent':1,
         'nthread':27,
         'eval_metric':'logloss',
         'objective':'binary:logistic',
         'tree_method':'hist'
         }

print("""#==== print param ======""")
print('DATE:', DATE)
print('seed:', seed)

#==============================================================================
# prepare
#==============================================================================
train = pd.concat([utils.load_pred_item('trainT-0'),
                   utils.load_pred_item('trainT-1'),
                   utils.load_pred_item('trainT-2')
                   ], ignore_index=True)

y_train = train['y']
X_train = train.drop('y', axis=1)
#X_train = pd.merge(train.drop('y', axis=1), 
#                   pd.read_pickle('../output/sub/811_1/train_stack_item.p').drop('y', axis=1),
#                   on=['order_id', 'product_id'], how='left')
del train
gc.collect()

# drop id
col = [c for c in X_train.columns if '_id' in c] + ['is_train']
col.remove('user_id')