コード例 #1
0
ファイル: dataset.py プロジェクト: blagoffvyacheslav/kaggle
def data_v6(data):
    def get_y():
        return data.get('train', 'y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('nums', None),
            ('cats', None),
            ('nans', None),
        ]).fillna(-999)

    # 0.46007
    model = skl.ensemble.ExtraTreesClassifier(n_estimators=1000,
                                              criterion='entropy',
                                              min_samples_leaf=5,
                                              max_features=0.8,
                                              n_jobs=8,
                                              random_state=42)
    X, y = get_X(), get_y()
    X_test = get_X('test')

    cv = skl.cross_validation.StratifiedKFold(y,
                                              n_folds=16,
                                              shuffle=True,
                                              random_state=1234)
    data.get('train', 'models')['draft_ext'] = model_train_cv_parallel(
        model, X, y, n_jobs=1, cv=cv)['predict'] - 0.5

    model.fit(X, y)
    data.get('test',
             'models')['draft_ext'] = model.predict_proba(X_test)[:, 1] - 0.5
コード例 #2
0
ファイル: dataset.py プロジェクト: tyz910/kaggle
def data_v30(data):
    scaler = skl.preprocessing.StandardScaler()

    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        X = data.extract(dset, [
            ('match', ['lobby_type_practice']),
            ('models', ['time_xgb']),
            ('synergy_sum', None),
            ('item_counts', None),
            ('ability_counts', None),
            ('team_diffs', ['first_ward']),
            ('fb', ['first_blood_team']),
            ('gold_counts', None),
            ('xp_counts', None),
            ('lh_counts', None),
            ('kills_counts', None),
        ]).to_sparse(0)

        scale = ['time_xgb', 'anti_synergy', 'synergy']
        X[scale] = scaler.fit_transform(X[scale]) if dset == 'train' else scaler.transform(X[scale])

        return X

    model = skl.linear_model.LogisticRegression(random_state=1234, C=0.005)
    X, y = get_X(), get_y()

    data.get('train', 'models')['common_linear'] = model_train_cv_parallel(model, X, y, n_jobs=1)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['common_linear'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
コード例 #3
0
ファイル: dataset.py プロジェクト: blagoffvyacheslav/kaggle
def data_v5(data):
    def get_y():
        return data.get('train', 'y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('nums', None),
            ('cats', None),
            ('nans', None),
        ]).fillna(-999)

    # 0.46394
    model = LogisticXGB(n_estimators=350,
                        learning_rate=0.05,
                        max_depth=7,
                        seed=42)
    X, y = get_X(), get_y()
    X_test = get_X('test')

    cv = skl.cross_validation.StratifiedKFold(y,
                                              n_folds=16,
                                              shuffle=True,
                                              random_state=1234)
    data.dset('train')['models'] = pd.DataFrame(index=X.index)
    data.get('train', 'models')['draft_xgb'] = model_train_cv_parallel(
        model, X, y, n_jobs=1, cv=cv)['predict'] - 0.5

    model.fit(X, y)
    data.dset('test')['models'] = pd.DataFrame(index=X_test.index)
    data.get('test',
             'models')['draft_xgb'] = model.predict_proba(X_test)[:, 1] - 0.5
コード例 #4
0
ファイル: dataset.py プロジェクト: tyz910/kaggle
def data_v31(data):
    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('match', ['lobby_type_practice']),
            ('models', ['time_xgb']),
            ('synergy_sum', None),
            ('item_counts', None),
            ('ability_counts', None),
            ('team_diffs', ['first_ward']),
            ('fb', ['first_blood_team']),
            ('gold_counts', None),
            ('xp_counts', None),
            ('lh_counts', None),
            ('kills_counts', None),
            ('hero_roles_bag', None),
        ]).to_sparse(0)

    model = LogisticXGB(n_estimators=500, learning_rate=0.1, max_depth=4, subsample=0.8, colsample_bytree=0.6, max_delta_step=1, seed=1234)
    X, y = get_X(), get_y()

    data.get('train', 'models')['common_xgb'] = model_train_cv_parallel(model, X, y, n_jobs=1)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['common_xgb'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
コード例 #5
0
def data_v32(data):
    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('match', ['lobby_type_practice']),
            ('models', [
                'time_xgb', 'pre_match_linear', 'pre_match_xgb',
                'common_linear', 'common_xgb'
            ]),
        ]).to_sparse(0)

    model = LogisticXGB(n_estimators=300,
                        learning_rate=0.01,
                        max_depth=4,
                        max_delta_step=1,
                        seed=1234)
    X, y = get_X(), get_y()

    data.get('train', 'models')['ensemble1_xgb'] = model_train_cv_parallel(
        model, X, y, n_jobs=1)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['ensemble1_xgb'] = model.predict_proba(
        get_X('test'))[:, 1] - 0.5
コード例 #6
0
def data_v31(data):
    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('match', ['lobby_type_practice']),
            ('models', ['time_xgb']),
            ('synergy_sum', None),
            ('item_counts', None),
            ('ability_counts', None),
            ('team_diffs', ['first_ward']),
            ('fb', ['first_blood_team']),
            ('gold_counts', None),
            ('xp_counts', None),
            ('lh_counts', None),
            ('kills_counts', None),
            ('hero_roles_bag', None),
        ]).to_sparse(0)

    model = LogisticXGB(n_estimators=500,
                        learning_rate=0.1,
                        max_depth=4,
                        subsample=0.8,
                        colsample_bytree=0.6,
                        max_delta_step=1,
                        seed=1234)
    X, y = get_X(), get_y()

    data.get('train', 'models')['common_xgb'] = model_train_cv_parallel(
        model, X, y, n_jobs=1)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['common_xgb'] = model.predict_proba(
        get_X('test'))[:, 1] - 0.5
コード例 #7
0
def data_v25(data):
    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('models', ['time_xgb']), ('synergy_sum', None),
            ('hero_roles_bag', None),
            ('match',
             ['lobby_type_practice', 'lobby_type_public', 'lobby_type_ranked'])
        ])

    model = LogisticXGB(n_estimators=100,
                        learning_rate=0.03,
                        max_depth=4,
                        subsample=0.8,
                        colsample_bytree=0.8,
                        seed=1234,
                        nthread=1)
    X, y = get_X(), get_y()

    data.get('train', 'models')['pre_match_xgb'] = model_train_cv_parallel(
        model, X, y)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['pre_match_xgb'] = model.predict_proba(
        get_X('test'))[:, 1] - 0.5
コード例 #8
0
ファイル: dataset.py プロジェクト: tyz910/kaggle
def data_v23(data):
    model = LogisticXGB(n_estimators=2, max_depth=6, learning_rate=0.001, nthread=1)
    X = data.get('train', 'match')[['start_time']]
    y = data.get('train', 'y')

    train_models = pd.DataFrame(index=X.index)
    train_models['time_xgb'] = model_train_cv_parallel(model, X, y)['predict'] - 0.5
    data.dset('train')['models'] = train_models

    X_test = data.get('test', 'match')[['start_time']]
    test_models = pd.DataFrame(index=X_test.index)
    model.fit(X, y)
    test_models['time_xgb'] = model.predict_proba(X_test)[:, 1] - 0.5
    data.dset('test')['models'] = test_models
コード例 #9
0
ファイル: dataset.py プロジェクト: tyz910/kaggle
def data_v32(data):
    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('match', ['lobby_type_practice']),
            ('models', ['time_xgb', 'pre_match_linear', 'pre_match_xgb', 'common_linear', 'common_xgb']),
        ]).to_sparse(0)

    model = LogisticXGB(n_estimators=300, learning_rate=0.01, max_depth=4, max_delta_step=1, seed=1234)
    X, y = get_X(), get_y()

    data.get('train', 'models')['ensemble1_xgb'] = model_train_cv_parallel(model, X, y, n_jobs=1)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['ensemble1_xgb'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
コード例 #10
0
def data_v23(data):
    model = LogisticXGB(n_estimators=2,
                        max_depth=6,
                        learning_rate=0.001,
                        nthread=1)
    X = data.get('train', 'match')[['start_time']]
    y = data.get('train', 'y')

    train_models = pd.DataFrame(index=X.index)
    train_models['time_xgb'] = model_train_cv_parallel(model, X,
                                                       y)['predict'] - 0.5
    data.dset('train')['models'] = train_models

    X_test = data.get('test', 'match')[['start_time']]
    test_models = pd.DataFrame(index=X_test.index)
    model.fit(X, y)
    test_models['time_xgb'] = model.predict_proba(X_test)[:, 1] - 0.5
    data.dset('test')['models'] = test_models
コード例 #11
0
ファイル: dataset.py プロジェクト: tyz910/kaggle
def data_v25(data):
    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('models', ['time_xgb']),
            ('synergy_sum', None),
            ('hero_roles_bag', None),
            ('match', ['lobby_type_practice', 'lobby_type_public', 'lobby_type_ranked'])
        ])

    model = LogisticXGB(n_estimators=100, learning_rate=0.03, max_depth=4, subsample=0.8, colsample_bytree=0.8, seed=1234, nthread=1)
    X, y = get_X(), get_y()

    data.get('train', 'models')['pre_match_xgb'] = model_train_cv_parallel(model, X, y)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['pre_match_xgb'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
コード例 #12
0
ファイル: dataset.py プロジェクト: tyz910/kaggle
def data_v24(data):
    scaler = skl.preprocessing.StandardScaler()

    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('models', ['time_xgb']),
            ('synergy_sum', None),
            ('hero_roles_bag', None),
            ('match', ['lobby_type_practice', 'lobby_type_public', 'lobby_type_ranked'])
        ], scaler=scaler)

    model = skl.linear_model.LogisticRegression(random_state=123, C=0.001)
    X, y = get_X(), get_y()

    data.get('train', 'models')['pre_match_linear'] = model_train_cv_parallel(model, X, y)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['pre_match_linear'] = model.predict_proba(get_X('test'))[:, 1] - 0.5
コード例 #13
0
ファイル: dataset.py プロジェクト: tyz910/kaggle
def data_v6(data):
    def get_y():
        return data.get('train', 'y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('nums', None),
            ('cats', None),
            ('nans', None),
        ]).fillna(-999)

    # 0.46007
    model = skl.ensemble.ExtraTreesClassifier(n_estimators=1000, criterion='entropy', min_samples_leaf=5, max_features=0.8, n_jobs=8, random_state=42)
    X, y = get_X(), get_y()
    X_test = get_X('test')

    cv = skl.cross_validation.StratifiedKFold(y, n_folds=16, shuffle=True, random_state=1234)
    data.get('train', 'models')['draft_ext'] = model_train_cv_parallel(model, X, y, n_jobs=1, cv=cv)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['draft_ext'] = model.predict_proba(X_test)[:, 1] - 0.5
コード例 #14
0
ファイル: dataset.py プロジェクト: tyz910/kaggle
def data_v5(data):
    def get_y():
        return data.get('train', 'y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('nums', None),
            ('cats', None),
            ('nans', None),
        ]).fillna(-999)

    # 0.46394
    model = LogisticXGB(n_estimators=350, learning_rate=0.05, max_depth=7, seed=42)
    X, y = get_X(), get_y()
    X_test = get_X('test')

    cv = skl.cross_validation.StratifiedKFold(y, n_folds=16, shuffle=True, random_state=1234)
    data.dset('train')['models'] = pd.DataFrame(index=X.index)
    data.get('train', 'models')['draft_xgb'] = model_train_cv_parallel(model, X, y, n_jobs=1, cv=cv)['predict'] - 0.5

    model.fit(X, y)
    data.dset('test')['models'] = pd.DataFrame(index=X_test.index)
    data.get('test', 'models')['draft_xgb'] = model.predict_proba(X_test)[:, 1] - 0.5
コード例 #15
0
def data_v30(data):
    scaler = skl.preprocessing.StandardScaler()

    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        X = data.extract(dset, [
            ('match', ['lobby_type_practice']),
            ('models', ['time_xgb']),
            ('synergy_sum', None),
            ('item_counts', None),
            ('ability_counts', None),
            ('team_diffs', ['first_ward']),
            ('fb', ['first_blood_team']),
            ('gold_counts', None),
            ('xp_counts', None),
            ('lh_counts', None),
            ('kills_counts', None),
        ]).to_sparse(0)

        scale = ['time_xgb', 'anti_synergy', 'synergy']
        X[scale] = scaler.fit_transform(
            X[scale]) if dset == 'train' else scaler.transform(X[scale])

        return X

    model = skl.linear_model.LogisticRegression(random_state=1234, C=0.005)
    X, y = get_X(), get_y()

    data.get('train', 'models')['common_linear'] = model_train_cv_parallel(
        model, X, y, n_jobs=1)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['common_linear'] = model.predict_proba(
        get_X('test'))[:, 1] - 0.5
コード例 #16
0
def data_v24(data):
    scaler = skl.preprocessing.StandardScaler()

    def get_y():
        return data.get('train', df='y')

    def get_X(dset='train'):
        return data.extract(dset, [
            ('models', ['time_xgb']), ('synergy_sum', None),
            ('hero_roles_bag', None),
            ('match',
             ['lobby_type_practice', 'lobby_type_public', 'lobby_type_ranked'])
        ],
                            scaler=scaler)

    model = skl.linear_model.LogisticRegression(random_state=123, C=0.001)
    X, y = get_X(), get_y()

    data.get('train', 'models')['pre_match_linear'] = model_train_cv_parallel(
        model, X, y)['predict'] - 0.5

    model.fit(X, y)
    data.get('test', 'models')['pre_match_linear'] = model.predict_proba(
        get_X('test'))[:, 1] - 0.5