def data_v6(data): def get_y(): return data.get('train', 'y') def get_X(dset='train'): return data.extract(dset, [ ('nums', None), ('cats', None), ('nans', None), ]).fillna(-999) # 0.46007 model = skl.ensemble.ExtraTreesClassifier(n_estimators=1000, criterion='entropy', min_samples_leaf=5, max_features=0.8, n_jobs=8, random_state=42) X, y = get_X(), get_y() X_test = get_X('test') cv = skl.cross_validation.StratifiedKFold(y, n_folds=16, shuffle=True, random_state=1234) data.get('train', 'models')['draft_ext'] = model_train_cv_parallel( model, X, y, n_jobs=1, cv=cv)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['draft_ext'] = model.predict_proba(X_test)[:, 1] - 0.5
def data_v30(data): scaler = skl.preprocessing.StandardScaler() def get_y(): return data.get('train', df='y') def get_X(dset='train'): X = data.extract(dset, [ ('match', ['lobby_type_practice']), ('models', ['time_xgb']), ('synergy_sum', None), ('item_counts', None), ('ability_counts', None), ('team_diffs', ['first_ward']), ('fb', ['first_blood_team']), ('gold_counts', None), ('xp_counts', None), ('lh_counts', None), ('kills_counts', None), ]).to_sparse(0) scale = ['time_xgb', 'anti_synergy', 'synergy'] X[scale] = scaler.fit_transform(X[scale]) if dset == 'train' else scaler.transform(X[scale]) return X model = skl.linear_model.LogisticRegression(random_state=1234, C=0.005) X, y = get_X(), get_y() data.get('train', 'models')['common_linear'] = model_train_cv_parallel(model, X, y, n_jobs=1)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['common_linear'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
def data_v5(data): def get_y(): return data.get('train', 'y') def get_X(dset='train'): return data.extract(dset, [ ('nums', None), ('cats', None), ('nans', None), ]).fillna(-999) # 0.46394 model = LogisticXGB(n_estimators=350, learning_rate=0.05, max_depth=7, seed=42) X, y = get_X(), get_y() X_test = get_X('test') cv = skl.cross_validation.StratifiedKFold(y, n_folds=16, shuffle=True, random_state=1234) data.dset('train')['models'] = pd.DataFrame(index=X.index) data.get('train', 'models')['draft_xgb'] = model_train_cv_parallel( model, X, y, n_jobs=1, cv=cv)['predict'] - 0.5 model.fit(X, y) data.dset('test')['models'] = pd.DataFrame(index=X_test.index) data.get('test', 'models')['draft_xgb'] = model.predict_proba(X_test)[:, 1] - 0.5
def data_v31(data): def get_y(): return data.get('train', df='y') def get_X(dset='train'): return data.extract(dset, [ ('match', ['lobby_type_practice']), ('models', ['time_xgb']), ('synergy_sum', None), ('item_counts', None), ('ability_counts', None), ('team_diffs', ['first_ward']), ('fb', ['first_blood_team']), ('gold_counts', None), ('xp_counts', None), ('lh_counts', None), ('kills_counts', None), ('hero_roles_bag', None), ]).to_sparse(0) model = LogisticXGB(n_estimators=500, learning_rate=0.1, max_depth=4, subsample=0.8, colsample_bytree=0.6, max_delta_step=1, seed=1234) X, y = get_X(), get_y() data.get('train', 'models')['common_xgb'] = model_train_cv_parallel(model, X, y, n_jobs=1)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['common_xgb'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
def data_v32(data): def get_y(): return data.get('train', df='y') def get_X(dset='train'): return data.extract(dset, [ ('match', ['lobby_type_practice']), ('models', [ 'time_xgb', 'pre_match_linear', 'pre_match_xgb', 'common_linear', 'common_xgb' ]), ]).to_sparse(0) model = LogisticXGB(n_estimators=300, learning_rate=0.01, max_depth=4, max_delta_step=1, seed=1234) X, y = get_X(), get_y() data.get('train', 'models')['ensemble1_xgb'] = model_train_cv_parallel( model, X, y, n_jobs=1)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['ensemble1_xgb'] = model.predict_proba( get_X('test'))[:, 1] - 0.5
def data_v31(data): def get_y(): return data.get('train', df='y') def get_X(dset='train'): return data.extract(dset, [ ('match', ['lobby_type_practice']), ('models', ['time_xgb']), ('synergy_sum', None), ('item_counts', None), ('ability_counts', None), ('team_diffs', ['first_ward']), ('fb', ['first_blood_team']), ('gold_counts', None), ('xp_counts', None), ('lh_counts', None), ('kills_counts', None), ('hero_roles_bag', None), ]).to_sparse(0) model = LogisticXGB(n_estimators=500, learning_rate=0.1, max_depth=4, subsample=0.8, colsample_bytree=0.6, max_delta_step=1, seed=1234) X, y = get_X(), get_y() data.get('train', 'models')['common_xgb'] = model_train_cv_parallel( model, X, y, n_jobs=1)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['common_xgb'] = model.predict_proba( get_X('test'))[:, 1] - 0.5
def data_v25(data): def get_y(): return data.get('train', df='y') def get_X(dset='train'): return data.extract(dset, [ ('models', ['time_xgb']), ('synergy_sum', None), ('hero_roles_bag', None), ('match', ['lobby_type_practice', 'lobby_type_public', 'lobby_type_ranked']) ]) model = LogisticXGB(n_estimators=100, learning_rate=0.03, max_depth=4, subsample=0.8, colsample_bytree=0.8, seed=1234, nthread=1) X, y = get_X(), get_y() data.get('train', 'models')['pre_match_xgb'] = model_train_cv_parallel( model, X, y)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['pre_match_xgb'] = model.predict_proba( get_X('test'))[:, 1] - 0.5
def data_v23(data): model = LogisticXGB(n_estimators=2, max_depth=6, learning_rate=0.001, nthread=1) X = data.get('train', 'match')[['start_time']] y = data.get('train', 'y') train_models = pd.DataFrame(index=X.index) train_models['time_xgb'] = model_train_cv_parallel(model, X, y)['predict'] - 0.5 data.dset('train')['models'] = train_models X_test = data.get('test', 'match')[['start_time']] test_models = pd.DataFrame(index=X_test.index) model.fit(X, y) test_models['time_xgb'] = model.predict_proba(X_test)[:, 1] - 0.5 data.dset('test')['models'] = test_models
def data_v32(data): def get_y(): return data.get('train', df='y') def get_X(dset='train'): return data.extract(dset, [ ('match', ['lobby_type_practice']), ('models', ['time_xgb', 'pre_match_linear', 'pre_match_xgb', 'common_linear', 'common_xgb']), ]).to_sparse(0) model = LogisticXGB(n_estimators=300, learning_rate=0.01, max_depth=4, max_delta_step=1, seed=1234) X, y = get_X(), get_y() data.get('train', 'models')['ensemble1_xgb'] = model_train_cv_parallel(model, X, y, n_jobs=1)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['ensemble1_xgb'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
def data_v25(data): def get_y(): return data.get('train', df='y') def get_X(dset='train'): return data.extract(dset, [ ('models', ['time_xgb']), ('synergy_sum', None), ('hero_roles_bag', None), ('match', ['lobby_type_practice', 'lobby_type_public', 'lobby_type_ranked']) ]) model = LogisticXGB(n_estimators=100, learning_rate=0.03, max_depth=4, subsample=0.8, colsample_bytree=0.8, seed=1234, nthread=1) X, y = get_X(), get_y() data.get('train', 'models')['pre_match_xgb'] = model_train_cv_parallel(model, X, y)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['pre_match_xgb'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
def data_v24(data): scaler = skl.preprocessing.StandardScaler() def get_y(): return data.get('train', df='y') def get_X(dset='train'): return data.extract(dset, [ ('models', ['time_xgb']), ('synergy_sum', None), ('hero_roles_bag', None), ('match', ['lobby_type_practice', 'lobby_type_public', 'lobby_type_ranked']) ], scaler=scaler) model = skl.linear_model.LogisticRegression(random_state=123, C=0.001) X, y = get_X(), get_y() data.get('train', 'models')['pre_match_linear'] = model_train_cv_parallel(model, X, y)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['pre_match_linear'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
def data_v6(data): def get_y(): return data.get('train', 'y') def get_X(dset='train'): return data.extract(dset, [ ('nums', None), ('cats', None), ('nans', None), ]).fillna(-999) # 0.46007 model = skl.ensemble.ExtraTreesClassifier(n_estimators=1000, criterion='entropy', min_samples_leaf=5, max_features=0.8, n_jobs=8, random_state=42) X, y = get_X(), get_y() X_test = get_X('test') cv = skl.cross_validation.StratifiedKFold(y, n_folds=16, shuffle=True, random_state=1234) data.get('train', 'models')['draft_ext'] = model_train_cv_parallel(model, X, y, n_jobs=1, cv=cv)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['draft_ext'] = model.predict_proba(X_test)[:, 1] - 0.5
def data_v5(data): def get_y(): return data.get('train', 'y') def get_X(dset='train'): return data.extract(dset, [ ('nums', None), ('cats', None), ('nans', None), ]).fillna(-999) # 0.46394 model = LogisticXGB(n_estimators=350, learning_rate=0.05, max_depth=7, seed=42) X, y = get_X(), get_y() X_test = get_X('test') cv = skl.cross_validation.StratifiedKFold(y, n_folds=16, shuffle=True, random_state=1234) data.dset('train')['models'] = pd.DataFrame(index=X.index) data.get('train', 'models')['draft_xgb'] = model_train_cv_parallel(model, X, y, n_jobs=1, cv=cv)['predict'] - 0.5 model.fit(X, y) data.dset('test')['models'] = pd.DataFrame(index=X_test.index) data.get('test', 'models')['draft_xgb'] = model.predict_proba(X_test)[:, 1] - 0.5
def data_v30(data): scaler = skl.preprocessing.StandardScaler() def get_y(): return data.get('train', df='y') def get_X(dset='train'): X = data.extract(dset, [ ('match', ['lobby_type_practice']), ('models', ['time_xgb']), ('synergy_sum', None), ('item_counts', None), ('ability_counts', None), ('team_diffs', ['first_ward']), ('fb', ['first_blood_team']), ('gold_counts', None), ('xp_counts', None), ('lh_counts', None), ('kills_counts', None), ]).to_sparse(0) scale = ['time_xgb', 'anti_synergy', 'synergy'] X[scale] = scaler.fit_transform( X[scale]) if dset == 'train' else scaler.transform(X[scale]) return X model = skl.linear_model.LogisticRegression(random_state=1234, C=0.005) X, y = get_X(), get_y() data.get('train', 'models')['common_linear'] = model_train_cv_parallel( model, X, y, n_jobs=1)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['common_linear'] = model.predict_proba( get_X('test'))[:, 1] - 0.5
def data_v24(data): scaler = skl.preprocessing.StandardScaler() def get_y(): return data.get('train', df='y') def get_X(dset='train'): return data.extract(dset, [ ('models', ['time_xgb']), ('synergy_sum', None), ('hero_roles_bag', None), ('match', ['lobby_type_practice', 'lobby_type_public', 'lobby_type_ranked']) ], scaler=scaler) model = skl.linear_model.LogisticRegression(random_state=123, C=0.001) X, y = get_X(), get_y() data.get('train', 'models')['pre_match_linear'] = model_train_cv_parallel( model, X, y)['predict'] - 0.5 model.fit(X, y) data.get('test', 'models')['pre_match_linear'] = model.predict_proba( get_X('test'))[:, 1] - 0.5