Esempio n. 1
0
#Support Vector Machines
import sys, os  #is necessary for relative import
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import
from reading_train_data import txs, tys
from reading_test_data import tds, tis
from write_results import write_predictions

from sklearn import metrics
from sklearn.svm import SVC
# fit a SVM model to the data
model = SVC()
model.fit(txs, tys[:,1])

predicted = model.predict_proba(tds)

write_predictions("cart.csv", tis[:,0], predicted)
Esempio n. 2
0
#watchlist = [ (xgmat,'train') ]
##bst = xgb.train( plst, xgmat, num_round, watchlist );
#matrix = xgb.DMatrix(train_x,labels,weight = weights)


#100 features maximum :(
if get_feature_importance:
    prev_fn = bst.feature_names
    bst = gbm.booster()
    bst.feature_names = [i for i in train_x.columns.values]#args_x[1:]
    
    imps = bst.get_fscore()
    #print (len(imps))
    if configure_verbose_mode:
        print(imps)
    imps = np.array([[str(imps[i]) if imps.has_key(i) else "NaN" for i in bst.feature_names]])
    imps = np.append([train_x.columns.values], imps, axis=0)
    np.savetxt(feature_importance_file, imps,fmt='%s', delimiter=',')
    #return the original state
    bst.feature_names = prev_fn


predictions = gbm.predict_proba(test_x)
arr = dtest[args_x[0]].as_matrix()
write_predictions("1boost.csv",arr,predictions)

#import matplotlib.pyplot as plt
#xgb.plot_importance(gbm)
#plt.show()
#bad plot :(
Esempio n. 3
0
import sys, os  #is necessary for relative import
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import
from reading_train_data import train_data_x, train_data_y
from reading_test_data import test_data
from write_results import write_predictions

from sklearn import metrics
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(train_data_x[:,1:], train_data_y[:,1])

predicted = model.predict_proba(test_data[:,1:])

write_predictions("nb_submission.csv", test_data[:,0], predicted)
Esempio n. 4
0
import sys, os  #is necessary for relative import
sys.path.append(os.path.join(os.path.dirname(__file__), '..',
                             'components'))  #is necessary for relative import
from reading_train_data import txs, tys
from reading_test_data import tds, tis
from write_results import write_predictions

from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
# fit a k-nearest neighbor model to the data

model = KNeighborsClassifier(5, 'distance')
model.fit(txs, tys[:, 1])

predicted = model.predict_proba(tds)

write_predictions("k_nearest.csv", tis, predicted)
Esempio n. 5
0
import sys, os  #is necessary for relative import
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import
from reading_train_data import txs, tys
from reading_test_data import tds, tis
from write_results import write_predictions

from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
# fit a k-nearest neighbor model to the data

model = KNeighborsClassifier(5,'distance')
model.fit(txs, tys[:,1])

predicted = model.predict_proba(tds)

write_predictions("k_nearest.csv", tis, predicted)
Esempio n. 6
0
#Classification and Regression Trees
import sys, os  #is necessary for relative import
sys.path.append(os.path.join(os.path.dirname(__file__), '..',
                             'components'))  #is necessary for relative import
from reading_train_data import txs, tys
from reading_test_data import tds, tis
from write_results import write_predictions

from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier
# fit a CART model to the data
model = DecisionTreeClassifier()
model.fit(txs, tys[:, 1])

predicted = model.predict_proba(tds)

write_predictions("cart.csv", tis, predicted)
Esempio n. 7
0
import sys, os  #is necessary for relative import
sys.path.append(os.path.join(os.path.dirname(__file__), '..',
                             'components'))  #is necessary for relative import
from reading_train_data import train_data_x, train_data_y
from reading_test_data import test_data
from write_results import write_predictions

from sklearn import metrics
from sklearn.naive_bayes import GaussianNB

model = GaussianNB()
model.fit(train_data_x[:, 1:], train_data_y[:, 1])

predicted = model.predict_proba(test_data[:, 1:])

write_predictions("nb_submission.csv", test_data[:, 0], predicted)
Esempio n. 8
0
pca = PCA(num_components)

x_transformed = pca.fit_transform(txs)
ts_transformed = pca.transform(tds)


from write_results import write_predictions
from sklearn import metrics
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(x_transformed, tys[:,1])

predicted = model.predict_proba(ts_transformed)

write_predictions("regression_pca25.csv",tis,predicted)
"""



writing_parts = [list(map(str,rw)) for rw in x_transformed]
labels = [ "feature" + str(i) for i in range(1,num_components+1)]
writing_parts = np.append([labels], writing_parts, axis=0)
np.savetxt("../../data/pca/training_pca" + str(num_components) + ".csv", writing_parts, fmt='%s', delimiter=',')

writing_parts = [list(map(str,rw)) for rw in ts_transformed]
labels = [ "feature" + str(i) for i in range(1,num_components+1)]
writing_parts = np.append([labels], writing_parts, axis=0)
np.savetxt("../../data/pca/test_pca" + str(num_components) + ".csv", writing_parts, fmt='%s', delimiter=',',newline='\n')
"""
#watchlist = [ (xgmat,'train') ]
##bst = xgb.train( plst, xgmat, num_round, watchlist );
#matrix = xgb.DMatrix(train_x,labels,weight = weights)


#100 features maximum :(
if get_feature_importance:
    bst = gbm.booster()
    prev_fn = bst.feature_names
    bst.feature_names = [i for i in train_x.columns.values]#args_x[1:]
    
    imps = bst.get_fscore()
    #print (len(imps))
    if configure_verbose_mode:
        print(imps)
    imps = np.array([[str(imps[i]) if imps.has_key(i) else "NaN" for i in bst.feature_names]])
    imps = np.append([train_x.columns.values], imps, axis=0)
    np.savetxt(feature_importance_file, imps,fmt='%s', delimiter=',')
    #return the original state
    bst.feature_names = prev_fn


predictions = gbm.predict_proba(test_x)
arr = dtest[args_x[0]].as_matrix()
write_predictions("1boost.csv",arr,predictions)

#import matplotlib.pyplot as plt
#xgb.plot_importance(gbm)
#plt.show()
#bad plot :(