for i in pdata_nearest_neigh: (kx, kd) = k_nearest_cluster(nxs, nds, i) train_x = train_x.join(kx) test_x = test_x.join(kd) if configure_verbose_mode: print("Finished: algorithm nearest") if pdata_regression_sgd: nxs = dtrain[args_x[1:]].as_matrix() nds = dtest[args_x[1:]].as_matrix() weights = dtrain[args_y[0]].as_matrix() aux_end = int(len(nxs)*0.9)#0.9 is the best value! new_feature_train = regression_sgd(nxs[:aux_end,:],weights[:aux_end],nxs) new_feature_test = regression_sgd(nxs,weights,nds) new_feature_train = ps.DataFrame([i for i in new_feature_train],columns = ["Mnew_sgd"]) new_feature_test = ps.DataFrame(([i for i in new_feature_test]),columns = ["Mnew_sgd"]) train_x = train_x.join(new_feature_train) test_x = test_x.join(new_feature_test) if configure_verbose_mode: print("Finished: regression sgd") if pdata_additional: train_x = add_features(train_x) test_x = add_features(test_x) if configure_verbose_mode: print("Finished: feature engeneering") #if exclude_sth:
### if pdata_add_features: train_x = add_features(train_x) test_x = add_features(test_x) if add_column_as_regression_sgd: #with all data #nxs = train_x.as_matrix() #nds = test_x.as_matrix() #or not nxs = dtrain[args_x[1:]].as_matrix() nds = dtest[args_x[1:]].as_matrix() weights = dtrain[args_y[0]].as_matrix() aux_end = int(len(nxs)*pdata_sgd) new_feature_train = regression_sgd(nxs[:aux_end,:],weights[:aux_end],nxs) new_feature_test = regression_sgd(nxs,weights,nds) new_feature_train = ps.DataFrame([i for i in new_feature_train],columns = ["Mnew_sgd"]) new_feature_test = ps.DataFrame(([i for i in new_feature_test]),columns = ["Mnew_sgd"]) train_x = train_x.join(new_feature_train) test_x = test_x.join(new_feature_test) #xs = np.append(xs, new_feature_train, 1) #ds = np.append(ds, new_feature_test, 1) #Jet_num #New_sum_jet_pt #new_frac_lep_pt #if threshold_filter_features>0: # fs = ps.read_csv(feature_importance_file,',',header=0) # print(len(fs.columns.values)) # fs = fs.dropna(axis=1,how='all')
import sys, os #is necessary for relative import sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'components')) #is necessary for relative import from reading_train_data import txs, tys from reading_test_data import tds, tis from write_results import write_predictions,write_predictions2 from funcs import get_threshold, regression_sgd from sklearn import metrics from sklearn.linear_model import * import numpy as np predicted = regression_sgd(txs,tys[:,0],tds,False) threshold = get_threshold(tys[:,0],tys[:,1]) write_predictions2("regression_sgd.csv",tis,predicted,threshold) ############################# exit() ############################# #logical EOF model = LogisticRegression() model.fit(txs, tys[:,1]) #print(model) # make predictions #expected = train_data_y[:,1] #predicted = model.predict(test_data[:,1:]) #probability = model.decision_function(test_data[:,1:]) ##with all features result is 2.01342 ~ 1508 place #write_predictions("regression.csv",test_data[:,0],probability,predicted)