예제 #1
0
    
    for i in pdata_nearest_neigh:
        (kx, kd) = k_nearest_cluster(nxs, nds, i)
        train_x = train_x.join(kx)
        test_x = test_x.join(kd)
    if configure_verbose_mode:
        print("Finished: algorithm nearest")


if pdata_regression_sgd:
    nxs = dtrain[args_x[1:]].as_matrix()
    nds = dtest[args_x[1:]].as_matrix()

    weights = dtrain[args_y[0]].as_matrix()
    aux_end = int(len(nxs)*0.9)#0.9 is the best value!
    new_feature_train = regression_sgd(nxs[:aux_end,:],weights[:aux_end],nxs)
    new_feature_test = regression_sgd(nxs,weights,nds)
    new_feature_train = ps.DataFrame([i for i in new_feature_train],columns = ["Mnew_sgd"])
    new_feature_test = ps.DataFrame(([i for i in new_feature_test]),columns = ["Mnew_sgd"])
    train_x = train_x.join(new_feature_train)
    test_x = test_x.join(new_feature_test)
    if configure_verbose_mode:
        print("Finished: regression sgd")

if pdata_additional:
    train_x = add_features(train_x)
    test_x = add_features(test_x)
    if configure_verbose_mode:
        print("Finished: feature engeneering")

#if exclude_sth:
예제 #2
0
###
if pdata_add_features:
    train_x = add_features(train_x)
    test_x = add_features(test_x)

if add_column_as_regression_sgd:
    #with all data
    #nxs = train_x.as_matrix()
    #nds = test_x.as_matrix()
    #or not
    nxs = dtrain[args_x[1:]].as_matrix()
    nds = dtest[args_x[1:]].as_matrix()

    weights = dtrain[args_y[0]].as_matrix()
    aux_end = int(len(nxs)*pdata_sgd)
    new_feature_train = regression_sgd(nxs[:aux_end,:],weights[:aux_end],nxs)
    new_feature_test = regression_sgd(nxs,weights,nds)
    new_feature_train = ps.DataFrame([i for i in new_feature_train],columns = ["Mnew_sgd"])
    new_feature_test = ps.DataFrame(([i for i in new_feature_test]),columns = ["Mnew_sgd"])
    train_x = train_x.join(new_feature_train)
    test_x = test_x.join(new_feature_test)
    #xs = np.append(xs, new_feature_train, 1)
    #ds = np.append(ds, new_feature_test, 1)

#Jet_num
#New_sum_jet_pt
#new_frac_lep_pt
#if threshold_filter_features>0:
#    fs = ps.read_csv(feature_importance_file,',',header=0)
#    print(len(fs.columns.values))
#    fs = fs.dropna(axis=1,how='all')
예제 #3
0
import sys, os  #is necessary for relative import
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import
from reading_train_data import txs, tys
from reading_test_data import tds, tis
from write_results import write_predictions,write_predictions2
from funcs import get_threshold, regression_sgd

from sklearn import metrics
from sklearn.linear_model import *
import numpy as np


predicted = regression_sgd(txs,tys[:,0],tds,False)
threshold = get_threshold(tys[:,0],tys[:,1])
write_predictions2("regression_sgd.csv",tis,predicted,threshold)

#############################
exit()
#############################
#logical EOF

model = LogisticRegression()
model.fit(txs, tys[:,1])
#print(model)
# make predictions
#expected = train_data_y[:,1]
#predicted = model.predict(test_data[:,1:])
#probability = model.decision_function(test_data[:,1:])

##with all features result is 2.01342 ~ 1508 place
#write_predictions("regression.csv",test_data[:,0],probability,predicted)