예제 #1
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import xgboost as xgb
import numpy as np
import myparse as mp
import sys
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
from sklearn.cross_validation import StratifiedKFold, KFold
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

# read csv include first line
enroll_train = mp.readcsv("enrollment_train.csv")
enroll_test = mp.readcsv("enrollment_test.csv")

truth_train = mp.readcsv("truth_train.csv")
sample_train_x = mp.readcsv("sample_train_x.csv")
sample_test_x = mp.readcsv("sample_test_x.csv")
aug_graph_train = mp.readcsv("augmentGraph_train.csv")
aug_graph_test = mp.readcsv("augmentGraph_test.csv")

all_feat_train = mp.readcsv("feat_train.csv")
all_feat_test = mp.readcsv("feat_test.csv")

all_azure_train = mp.readcsv("azure_train.csv")
all_azure_test = mp.readcsv("azure_test.csv")

all_azure2_train = mp.readcsv("azure2_train.csv")
all_azure2_test = mp.readcsv("azure2_test.csv")
예제 #2
0
import numpy as np
import myparse as mp
import matplotlib.pyplot as plt
from sklearn.metrics import roc_auc_score

label_train = mp.readcsv('truth_train.csv')[0:, 1].astype(float)

feat1_train = mp.readcsv('XGB_5_005_08_07_450_all0111_reg_train968152.csv')[
    0:, 1].astype(float)
feat1_test = mp.readcsv('XGB_5_005_08_07_450_all0111_reg_test968152.csv')[
    0:, 1].astype(float)

feat2_train = mp.readcsv('XGB_5_005_08_07_500_all0110_reg_train967211.csv')[
    0:, 1].astype(float)
feat2_test = mp.readcsv('XGB_5_005_08_07_500_all0110_reg_test967211.csv')[
    0:, 1].astype(float)

feat3_train = mp.readcsv('XGB_5_005_08_07_400_all0110_reg_train967298.csv')[
    0:, 1].astype(float)
feat3_test = mp.readcsv('XGB_5_005_08_07_400_all0110_reg_test967298.csv')[
    0:, 1].astype(float)

feat4_train = mp.readcsv(
    'XGB_5_005_08_07_300_all0111_reg_rank_train967192.csv')[0:,
                                                            1].astype(float)
feat4_test = mp.readcsv('XGB_5_005_08_07_300_all0111_reg_rank_test967192.csv')[
    0:, 1].astype(float)

feat5_train = mp.readcsv(
    'XGB_5_005_08_09_500_all_0111_impor_reg_train966810.csv')[0:,
                                                              1].astype(float)
예제 #3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import xgboost as xgb
import numpy as np
import myparse as mp

from sklearn.cross_validation import StratifiedKFold
from sklearn.metrics import roc_auc_score

mypred = mp.readcsv("XGB_5_005_08_07_450_all0111_reg_train968152.csv").astype(
    float)
truth_train = mp.readcsv("truth_train.csv").astype(float)

mypred[mypred < 0.5] = 0
mypred[mypred >= 0.5] = 1

print float(sum(truth_train[0:, 1] == mypred[0:, 1])) / len(mypred[0:, 1])
print roc_auc_score(truth_train[0:, 1].astype(float), mypred[0:,
                                                             1].astype(float))
예제 #4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import myparse as mp
from sklearn.linear_model import LogisticRegression
from sklearn import cross_validation, grid_search
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
from sklearn.decomposition import RandomizedPCA

# read csv include first line
enroll_train = mp.readcsv("enrollment_train.csv")
truth_train = mp.readcsv("truth_train.csv")
sample_train_x = mp.readcsv("sample_train_x.csv")
sample_test_x = mp.readcsv("sample_test_x.csv")
aug_graph_train = mp.readcsv("augmentGraph_train.csv")
aug_graph_test = mp.readcsv("augmentGraph_test.csv")
all_azure_train = mp.readcsv("azure_train.csv")
all_azure_test = mp.readcsv("azure_test.csv")

all_feat_train = mp.readcsv("feat.csv")

aug_train = aug_graph_train[1:, 1:].astype(float)
data_train = sample_train_x[1:, 1:].astype(float)
#feat_train = all_feat_train[0:,1:].astype(float)
azure_train = all_azure_train[1:, 2:].astype(float)

data_train = np.hstack((data_train, aug_train))
#data_train = np.hstack((data_train,aug_feat))
data_train = np.hstack((data_train, azure_train))
예제 #5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import myparse as mp

#aaa = "sample_test_x.csv"
#bbb = "XGB_5_005_08_09_400_all_0111_cla_single_test.csv"
#ccc = "XGB_5_005_08_07_500_all_0110_cla_test884551.csv"
aaa = "sample_train_x.csv"
bbb = "XGB_5_005_08_09_350_all_0111_cla_single_train.csv"
ccc = "XGB_5_005_08_07_500_all_0110_cla_train884551.csv"

truth_train = mp.readcsv("truth_train.csv")
label_train = truth_train[0:,1].astype(float)

sample_test_x = mp.readcsv(aaa)
data_test = sample_test_x[1:,1:].astype(float)
print sample_test_x[0,3]
print sample_test_x[0:6,3]
print data_test[0:5,2]

index_test_single = data_test[0:,2]==1
index_test_multiple = data_test[0:,2]!=1

print sum(index_test_single)
print sum(index_test_multiple)

single_pred = mp.readcsv(bbb)[0:,1].astype(float)
nice_pred = mp.readcsv(ccc)[0:,1].astype(float)

merge_pred = np.zeros(len(single_pred))
예제 #6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import xgboost as xgb
import numpy as np
import myparse as mp
import sys
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA
from sklearn.cross_validation import StratifiedKFold, KFold
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

# read csv include first line
enroll_train = mp.readcsv("enrollment_train.csv")
enroll_test = mp.readcsv("enrollment_test.csv")

truth_train = mp.readcsv("truth_train.csv")
sample_train_x = mp.readcsv("sample_train_x.csv")
sample_test_x = mp.readcsv("sample_test_x.csv")
aug_graph_train = mp.readcsv("augmentGraph_train.csv")
aug_graph_test = mp.readcsv("augmentGraph_test.csv")

all_feat_train = mp.readcsv("feat_train.csv")
all_feat_test = mp.readcsv("feat_test.csv")

all_azure_train = mp.readcsv("azure_train.csv")
all_azure_test = mp.readcsv("azure_test.csv")

all_azure2_train = mp.readcsv("azure2_train.csv")
all_azure2_test = mp.readcsv("azure2_test.csv")