#!/usr/bin/env python # -*- coding: utf-8 -*- import xgboost as xgb import numpy as np import myparse as mp import sys from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.preprocessing import RobustScaler from sklearn.decomposition import PCA from sklearn.cross_validation import StratifiedKFold, KFold from sklearn.metrics import roc_auc_score import matplotlib.pyplot as plt # read csv include first line enroll_train = mp.readcsv("enrollment_train.csv") enroll_test = mp.readcsv("enrollment_test.csv") truth_train = mp.readcsv("truth_train.csv") sample_train_x = mp.readcsv("sample_train_x.csv") sample_test_x = mp.readcsv("sample_test_x.csv") aug_graph_train = mp.readcsv("augmentGraph_train.csv") aug_graph_test = mp.readcsv("augmentGraph_test.csv") all_feat_train = mp.readcsv("feat_train.csv") all_feat_test = mp.readcsv("feat_test.csv") all_azure_train = mp.readcsv("azure_train.csv") all_azure_test = mp.readcsv("azure_test.csv") all_azure2_train = mp.readcsv("azure2_train.csv") all_azure2_test = mp.readcsv("azure2_test.csv")
import numpy as np import myparse as mp import matplotlib.pyplot as plt from sklearn.metrics import roc_auc_score label_train = mp.readcsv('truth_train.csv')[0:, 1].astype(float) feat1_train = mp.readcsv('XGB_5_005_08_07_450_all0111_reg_train968152.csv')[ 0:, 1].astype(float) feat1_test = mp.readcsv('XGB_5_005_08_07_450_all0111_reg_test968152.csv')[ 0:, 1].astype(float) feat2_train = mp.readcsv('XGB_5_005_08_07_500_all0110_reg_train967211.csv')[ 0:, 1].astype(float) feat2_test = mp.readcsv('XGB_5_005_08_07_500_all0110_reg_test967211.csv')[ 0:, 1].astype(float) feat3_train = mp.readcsv('XGB_5_005_08_07_400_all0110_reg_train967298.csv')[ 0:, 1].astype(float) feat3_test = mp.readcsv('XGB_5_005_08_07_400_all0110_reg_test967298.csv')[ 0:, 1].astype(float) feat4_train = mp.readcsv( 'XGB_5_005_08_07_300_all0111_reg_rank_train967192.csv')[0:, 1].astype(float) feat4_test = mp.readcsv('XGB_5_005_08_07_300_all0111_reg_rank_test967192.csv')[ 0:, 1].astype(float) feat5_train = mp.readcsv( 'XGB_5_005_08_09_500_all_0111_impor_reg_train966810.csv')[0:, 1].astype(float)
#!/usr/bin/env python # -*- coding: utf-8 -*- import xgboost as xgb import numpy as np import myparse as mp from sklearn.cross_validation import StratifiedKFold from sklearn.metrics import roc_auc_score mypred = mp.readcsv("XGB_5_005_08_07_450_all0111_reg_train968152.csv").astype( float) truth_train = mp.readcsv("truth_train.csv").astype(float) mypred[mypred < 0.5] = 0 mypred[mypred >= 0.5] = 1 print float(sum(truth_train[0:, 1] == mypred[0:, 1])) / len(mypred[0:, 1]) print roc_auc_score(truth_train[0:, 1].astype(float), mypred[0:, 1].astype(float))
#!/usr/bin/env python # -*- coding: utf-8 -*- import numpy as np import myparse as mp from sklearn.linear_model import LogisticRegression from sklearn import cross_validation, grid_search from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import RobustScaler from sklearn.decomposition import PCA from sklearn.decomposition import RandomizedPCA # read csv include first line enroll_train = mp.readcsv("enrollment_train.csv") truth_train = mp.readcsv("truth_train.csv") sample_train_x = mp.readcsv("sample_train_x.csv") sample_test_x = mp.readcsv("sample_test_x.csv") aug_graph_train = mp.readcsv("augmentGraph_train.csv") aug_graph_test = mp.readcsv("augmentGraph_test.csv") all_azure_train = mp.readcsv("azure_train.csv") all_azure_test = mp.readcsv("azure_test.csv") all_feat_train = mp.readcsv("feat.csv") aug_train = aug_graph_train[1:, 1:].astype(float) data_train = sample_train_x[1:, 1:].astype(float) #feat_train = all_feat_train[0:,1:].astype(float) azure_train = all_azure_train[1:, 2:].astype(float) data_train = np.hstack((data_train, aug_train)) #data_train = np.hstack((data_train,aug_feat)) data_train = np.hstack((data_train, azure_train))
#!/usr/bin/env python # -*- coding: utf-8 -*- import numpy as np import myparse as mp #aaa = "sample_test_x.csv" #bbb = "XGB_5_005_08_09_400_all_0111_cla_single_test.csv" #ccc = "XGB_5_005_08_07_500_all_0110_cla_test884551.csv" aaa = "sample_train_x.csv" bbb = "XGB_5_005_08_09_350_all_0111_cla_single_train.csv" ccc = "XGB_5_005_08_07_500_all_0110_cla_train884551.csv" truth_train = mp.readcsv("truth_train.csv") label_train = truth_train[0:,1].astype(float) sample_test_x = mp.readcsv(aaa) data_test = sample_test_x[1:,1:].astype(float) print sample_test_x[0,3] print sample_test_x[0:6,3] print data_test[0:5,2] index_test_single = data_test[0:,2]==1 index_test_multiple = data_test[0:,2]!=1 print sum(index_test_single) print sum(index_test_multiple) single_pred = mp.readcsv(bbb)[0:,1].astype(float) nice_pred = mp.readcsv(ccc)[0:,1].astype(float) merge_pred = np.zeros(len(single_pred))