import numpy as np import pandas as pd import multiprocessing from functools import partial from sklearn.preprocessing import LabelEncoder from kairbnb.preprocessing import process_user_actions from kairbnb.preprocessing import process_user_secs_elapsed from kairbnb.io import load_users NROWS = None VERSION = '5' if __name__ == '__main__': # Load raw data train_users, test_users = load_users(nrows=NROWS, na_values='-unknown-') sessions = pd.read_csv('../data/sessions.csv', nrows=NROWS, na_values='-unknown-') # Join users users = pd.concat((train_users, test_users), axis=0, ignore_index=True) users = users.set_index('id') # Drop date_first_booking column (empty since competition's restart) users = users.drop('date_first_booking', axis=1) # Remove weird age values users.loc[users['age'] > 100, 'age'] = np.nan users.loc[users['age'] < 13, 'age'] = np.nan # Change type to date
import numpy as np import pandas as pd import multiprocessing from functools import partial from sklearn.preprocessing import LabelEncoder from kairbnb.preprocessing import process_user_actions from kairbnb.preprocessing import process_user_secs_elapsed from kairbnb.io import load_users NROWS = None VERSION = '5' if __name__ == '__main__': # Load raw data train_users, test_users = load_users(nrows=NROWS, na_values='-unknown-') sessions = pd.read_csv('../data/sessions.csv', nrows=NROWS, na_values='-unknown-') # Join users users = pd.concat((train_users, test_users), axis=0, ignore_index=True) users = users.set_index('id') # Drop date_first_booking column (empty since competition's restart) users = users.drop('date_first_booking', axis=1) # Remove weird age values users.loc[users['age'] > 100, 'age'] = np.nan users.loc[users['age'] < 13, 'age'] = np.nan
#!/usr/bin/env python from sklearn.preprocessing import LabelEncoder from xgboost.sklearn import XGBClassifier from kairbnb.io import generate_submission, load_users VERSION = '5' NAME = 'gb_' + VERSION if __name__ == '__main__': train_users, test_users = load_users(version=VERSION) y_train = train_users['country_destination'] train_users.drop(['country_destination', 'id'], axis=1, inplace=True) train_users = train_users.fillna(-1) x_train = train_users.values label_encoder = LabelEncoder() encoded_y_train = label_encoder.fit_transform(y_train) test_users_ids = test_users['id'] test_users.drop('id', axis=1, inplace=True) test_users = test_users.fillna(-1) x_test = test_users.values clf = XGBClassifier(max_depth=7, learning_rate=0.18, n_estimators=80, objective="rank:pairwise", gamma=0, min_child_weight=1,
import pandas as pd from kairbnb.preprocessing import one_hot_encoding from kairbnb.io import load_users VERSION = '5' if __name__ == '__main__': # Load raw data train_users, test_users = load_users(version=VERSION) # Join users users = pd.concat((train_users, test_users), axis=0, ignore_index=True) # Set ID as index users = users.set_index('id') train_users = train_users.set_index('id') test_users = test_users.set_index('id') # Drop columns drop_list = [ 'date_account_created', 'date_first_active', 'timestamp_first_active' ] users.drop(drop_list, axis=1, inplace=True) # IDEA: Add interaction features
from kairbnb.io import load_users VERSION = '1' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-d', '--max_depth', default=7, type=int) parser.add_argument('-lr', '--learning_rate', default=0.18, type=float) parser.add_argument('-n', '--n_estimators', default=80, type=int) parser.add_argument('-ct', '--colsample_bytree', default=1, type=float) parser.add_argument('-cl', '--colsample_bylevel', default=1, type=float) parser.add_argument('-sub', '--subsample', default=1, type=float) parser.add_argument('-md', '--max_delta', default=0, type=float) args = parser.parse_args() train_users, _ = load_users(version=VERSION) train_users.fillna(-1, inplace=True) y_train = train_users['country_destination'] train_users.drop(['country_destination', 'id'], axis=1, inplace=True) x_train = train_users.values label_encoder = LabelEncoder() encoded_y_train = label_encoder.fit_transform(y_train) xgb = XGBClassifier( max_depth=args.max_depth, learning_rate=args.learning_rate, n_estimators=args.n_estimators, objective="multi:softprob", gamma=0, min_child_weight=1,
from kairbnb.io import load_users VERSION = '1' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-d', '--max_depth', default=7, type=int) parser.add_argument('-lr', '--learning_rate', default=0.18, type=float) parser.add_argument('-n', '--n_estimators', default=80, type=int) parser.add_argument('-ct', '--colsample_bytree', default=1, type=float) parser.add_argument('-cl', '--colsample_bylevel', default=1, type=float) parser.add_argument('-sub', '--subsample', default=1, type=float) parser.add_argument('-md', '--max_delta', default=0, type=float) args = parser.parse_args() train_users, _ = load_users(version=VERSION) train_users.fillna(-1, inplace=True) y_train = train_users['country_destination'] train_users.drop(['country_destination', 'id'], axis=1, inplace=True) x_train = train_users.values label_encoder = LabelEncoder() encoded_y_train = label_encoder.fit_transform(y_train) xgb = XGBClassifier(max_depth=args.max_depth, learning_rate=args.learning_rate, n_estimators=args.n_estimators, objective="multi:softprob", gamma=0, min_child_weight=1, max_delta_step=0,