def concat_pred_item(T, dryrun=False): if T==-1: name = 'test' else: name = 'trainT-'+str(T) df = utils.load_pred_item(name) df = pd.merge(df, pd.read_pickle('../feature/{}/f317_user-product.p'.format(name)), on=['user_id', 'product_id'],how='left') gc.collect() #============================================================================== print('output') #============================================================================== if dryrun == True: return df else: utils.to_pickles(df, '../feature/{}/all_apdx'.format(name), 20, inplace=True)
'silent':1, 'nthread':27, # 'scale_pos_weight':y_build.mean(), # 'eval_metric':'auc', 'eval_metric':'logloss', 'objective':'binary:logistic', 'tree_method':'hist'} print("""#==== print param ======""") print('seed:', seed) #============================================================================== # prepare #============================================================================== train = pd.concat([utils.load_pred_item('trainT-0', True).sample(frac=.1), utils.load_pred_item('trainT-1', True).sample(frac=.1) ], ignore_index=True, join='inner') #train = utils.load_pred_item('trainT-0') sub_train = train[['order_id', 'product_id', 'y']] y_train = train['y'] X_train = train.drop('y', axis=1) del train gc.collect() # drop id col = [c for c in X_train.columns if '_id' in c] + ['is_train'] col.remove('user_id') print('drop1',col) X_train.drop(col, axis=1, inplace=True) # keep user_id
'silent': 1, 'nthread': 27, 'eval_metric': 'logloss', 'objective': 'binary:logistic', 'tree_method': 'hist' } print("""#==== print param ======""") print('DATE:', DATE) print('seed:', seed) #============================================================================== # prepare #============================================================================== train = pd.concat([ utils.load_pred_item('trainT-0'), utils.load_pred_item('trainT-1'), utils.load_pred_item('trainT-2') ], ignore_index=True) y_train = train['y'] X_train = train.drop('y', axis=1) del train gc.collect() # drop id col = [c for c in X_train.columns if '_id' in c] + ['is_train'] col.remove('user_id') print('drop1', col) X_train.drop(col, axis=1, inplace=True) # keep user_id
'nthread': 27, # 'scale_pos_weight':y_build.mean(), # 'eval_metric':'auc', 'eval_metric': 'logloss', 'objective': 'binary:logistic', 'tree_method': 'hist' } print("""#==== print param ======""") print('seed:', seed) #============================================================================== # prepare #============================================================================== train = pd.concat([ utils.load_pred_item('trainT-0', True).sample(frac=.1), utils.load_pred_item('trainT-1', True).sample(frac=.1) ], ignore_index=True, join='inner') #train = utils.load_pred_item('trainT-0') sub_train = train[['order_id', 'product_id', 'y']] y_train = train['y'] X_train = train.drop('y', axis=1) del train gc.collect() # drop id col = [c for c in X_train.columns if '_id' in c] + ['is_train'] col.remove('user_id')
'subsample':0.75, 'silent':1, 'nthread':27, 'eval_metric':'logloss', 'objective':'binary:logistic', 'tree_method':'hist' } print("""#==== print param ======""") print('DATE:', DATE) print('seed:', seed) #============================================================================== # prepare #============================================================================== train = pd.concat([utils.load_pred_item('trainT-0'), utils.load_pred_item('trainT-1'), utils.load_pred_item('trainT-2') ], ignore_index=True) y_train = train['y'] X_train = train.drop('y', axis=1) #X_train = pd.merge(train.drop('y', axis=1), # pd.read_pickle('../output/sub/811_1/train_stack_item.p').drop('y', axis=1), # on=['order_id', 'product_id'], how='left') del train gc.collect() # drop id col = [c for c in X_train.columns if '_id' in c] + ['is_train'] col.remove('user_id')