Esempio n. 1
0
Here I will try to use xgb.
"""
import pandas as pd
import xgboost as xgb
from sklearn.cross_validation import ShuffleSplit
import numpy as np
from gini_normalized import normalized_gini
from preprocessing.to_labels import to_labels
import math

train = pd.read_csv("../data/train_new.csv")
hold = pd.read_csv("../data/hold_new.csv")
test = pd.read_csv("../data/test.csv")

par = (train, hold, test)
train, hold, test = to_labels(par)

y = train["Hazard"]
# X = train.drop(['Hazard', 'Id'], 1)
# X = train.drop(['Hazard', 'Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10', 'tp_59', 'tp_84', 'global_mean', 'global_median', 'global_std'], 1)
# X_test = test.drop(['Hazard', 'Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10', 'tp_59', 'tp_84', 'global_mean', 'global_median', 'global_std'], 1)
X = train.drop(["Hazard", "Id", "T2_V10", "T2_V7", "T1_V13", "T1_V10"], 1)
X_hold = hold.drop(["Hazard", "Id", "T2_V10", "T2_V7", "T1_V13", "T1_V10"], 1)
X_test = test.drop(["Id", "T2_V10", "T2_V7", "T1_V13", "T1_V10"], 1)

#


params = {
    # 'objective': 'reg:linear',
    "objective": "count:poisson",
Esempio n. 2
0
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from preprocessing.to_labels import to_labels
from gini_normalized import normalized_gini

# joined = pd.read_csv('../data/joined.csv')
#
# train = joined[joined['Hazard'] != -1]
# test = joined[joined['Hazard'] == -1]

train = pd.read_csv('../data/train_new.csv')
hold = pd.read_csv('../data/hold_new.csv')
test = pd.read_csv('../data/test.csv')
# hold = pd.read_csv('../data/hold_new.csv')

train, hold = to_labels((train, hold))

y = train['Hazard']
X = train.drop(['Hazard', 'Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10'], 1)
X_hold = hold.drop(['Hazard', 'Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10'], 1)
X_test = hold.drop(['Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10'], 1)

random_state = 42

ind = 1

if ind == 1:
    rs = ShuffleSplit(len(y),
                      n_iter=10,
                      test_size=0.5,
                      random_state=random_state)
Esempio n. 3
0
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from preprocessing.to_labels import to_labels
from gini_normalized import normalized_gini

# joined = pd.read_csv('../data/joined.csv')
#
# train = joined[joined['Hazard'] != -1]
# test = joined[joined['Hazard'] == -1]

train = pd.read_csv("../data/train_new.csv")
hold = pd.read_csv("../data/hold_new.csv")
test = pd.read_csv("../data/test.csv")
# hold = pd.read_csv('../data/hold_new.csv')

train, hold = to_labels((train, hold))

y = train["Hazard"]
X = train.drop(["Hazard", "Id", "T2_V10", "T2_V7", "T1_V13", "T1_V10"], 1)
X_hold = hold.drop(["Hazard", "Id", "T2_V10", "T2_V7", "T1_V13", "T1_V10"], 1)
X_test = hold.drop(["Id", "T2_V10", "T2_V7", "T1_V13", "T1_V10"], 1)

random_state = 42

ind = 1

if ind == 1:
    rs = ShuffleSplit(len(y), n_iter=10, test_size=0.5, random_state=random_state)

    result = []
Esempio n. 4
0
Here I will try to use xgb, trying to predict log(1+x)
'''
import pandas as pd
import xgboost as xgb
from sklearn.cross_validation import ShuffleSplit
import numpy as np
from gini_normalized import normalized_gini
from preprocessing.to_labels import to_labels
import math

train = pd.read_csv('../data/train_new.csv')
hold = pd.read_csv('../data/hold_new.csv')
test = pd.read_csv('../data/test.csv')

par = (train, hold, test)
train, hold, test = to_labels(par)

y = train['Hazard'].apply(lambda x: math.log(1 + x), 1)

# X = train.drop(['Hazard', 'Id'], 1)
# X = train.drop(['Hazard', 'Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10', 'tp_59', 'tp_84', 'global_mean', 'global_median', 'global_std'], 1)
# X_test = test.drop(['Hazard', 'Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10', 'tp_59', 'tp_84', 'global_mean', 'global_median', 'global_std'], 1)
X = train.drop(['Hazard', 'Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10'], 1)
X_hold = hold.drop(['Hazard', 'Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10'], 1)
X_test = test.drop(['Id', 'T2_V10', 'T2_V7', 'T1_V13', 'T1_V10'], 1)

params = {
    # 'objective': 'reg:linear',
    'objective': 'count:poisson',
    # 'eta': 0.005,
    # 'min_child_weight': 6,