-
Notifications
You must be signed in to change notification settings - Fork 0
/
learn.py
63 lines (42 loc) · 1.59 KB
/
learn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
__author__ = 'giulio'
from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier
import utils as ut
import numpy as np
import os
from sklearn.svm import SVC
from sklearn.preprocessing import scale, MinMaxScaler
reload(ut)
X_train, y_train = ut.load_X(), ut.load_y()
X_test = ut.load_X_test()
X = np.vstack((X_train, X_test))
X = scale(X)
X_train = X[:X_train.shape[0]]
X_test = X[X_train.shape[0]:]
# mms = MinMaxScaler()
# X = mms.fit_transform(X)
clf1 = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
kernel='rbf', max_iter=-1, probability=True, random_state=None,
shrinking=True, tol=0.001, verbose=False)
clf2 = GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance',
max_depth=3, max_features=None, max_leaf_nodes=None,
min_samples_leaf=1, min_samples_split=2, n_estimators=100,
random_state=None, subsample=1.0, verbose=0,
warm_start=False)
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
clf1_y = clf1.predict_proba(X_test)[:, 1]
clf2_y = clf2.predict_proba(X_test)[:, 1]
alpha = 0.0
tot = alpha*clf1_y+(1-alpha)*clf2_y
empty = np.loadtxt("data/emptyfiles.txt", dtype='int')
empty -= 2013
empty = empty[empty >= 0]
print empty
tot[empty] = 0.0000000000
print tot[4686]
_test_ids = np.loadtxt("./data/test_ids.csv", dtype='str', usecols=(1, ))
prediction = np.vstack((_test_ids, tot.astype(str))).T
hash_id = os.urandom(8).encode('hex')
np.savetxt(
"data/subm/%s.csv" % hash_id, prediction, fmt="%s,%s", delimiter=",", header="bidder_id,prediction", comments=''
)