Esempio n. 1
0
    min_child_weight=1,
    missing=None,
    n_estimators=100,  #
    nthread=-1,
    objective='binary:logistic',  #
    reg_alpha=1,
    reg_lambda=1,
    scale_pos_weight=1,
    seed=1234,  #
    silent=True,
    subsample=1)
skf = StratifiedKFold(n_splits=5)
ytest = np.ones((1, 2)) * 0.5
yscore = np.ones((1, 2)) * 0.5
for train, test in skf.split(X, y):
    y_train = utils.to_categorical(y[train])
    hist = cv_clf.fit(X[train], y[train])
    y_score = cv_clf.predict_proba(X[test])
    yscore = np.vstack((yscore, y_score))
    y_test = utils.to_categorical(y[test])
    ytest = np.vstack((ytest, y_test))
    fpr, tpr, _ = roc_curve(y_test[:, 0], y_score[:, 0])
    roc_auc = auc(fpr, tpr)
    y_class = utils.categorical_probas_to_classes(y_score)
    y_test_tmp = y[test]
    acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace(
        len(y_class), y_class, y_test_tmp)
    sepscores.append(
        [acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc])
    print(
        'XGBoost:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f'
Esempio n. 2
0
#        stuff = {"x": trainx_tmp2, "y": trainy_tmp2}
#        print k
#        with open("data/"+str(nbr_sup) + "/" + str(i) + ".pkl", 'w') as f:
#            pkl.dump(stuff, f, protocol=pkl.HIGHEST_PROTOCOL)
#        i += 1

#with open("data/"+str(nbr_sup) + "/0.pkl") as f:
#    stuff = pkl.load(f)
#    trainx, trainy = stuff["x"], stuff["y"]
# share over gpu: we can store the whole mnist over the gpu.
# Train
trainx_sh = theano.shared(trainx.astype(theano.config.floatX),
                          name="trainx", borrow=True)
trainlabels_sh = theano.shared(trainy.astype(theano.config.floatX),
                               name="trainlabels", borrow=True)
trainy_sh = theano.shared(to_categorical(trainy, x_classes).astype(
    theano.config.floatX),  name="trainy", borrow=True)

# valid
validx_sh = theano.shared(validx.astype(theano.config.floatX),
                          name="validx", borrow=True)
validlabels_sh = theano.shared(validy.astype(theano.config.floatX),
                               name="validlabels", borrow=True)
#
input = T.fmatrix("x")
input1 = T.fmatrix("x1")
input2 = T.fmatrix("x2")
rng = np.random.RandomState(23455)
# Architecture
nhid_l0 = 1200
nhid_l1 = 1200
nhid_l2 = 200
# train models
i = 0
# nbags = 10
nepochs = 1
# y = y[0:512]
number_class = y.nunique()

print('xtrain.shape[0]:', xtrain.shape[0])
print('xtest.shape[0]:', xtest.shape[0])

pred_oob = np.zeros((xtrain.shape[0], number_class))  # 2010000*6=>100000*6
pred_test = np.zeros((xtest.shape[0], number_class))  # 502500*6=>20000*6

# print('y:\n', y)
y_onehot = to_categorical(y.values)  # 0-5进行独热编码
print('y_onehot:\n', y_onehot)
'''
# start training
for (indexTr, indexTe) in cv.split(xtrain, y):        # indexTr, indexTe是每一折所选数据的序号,5折迭代
    xtr = xtrain[indexTr]                             # 本折训练数据
    ytr = y_onehot[indexTr]                           # 本折训练数据标签
    xval = xtrain[indexTe]                            # 本折验证数据
    yval = y_onehot[indexTe]                          # 本折验证数据标签
    pred = np.zeros((xval.shape[0], number_class))    # 验证数据的预测值数组20000*6
    for j in range(nbags):                            # 创建多次模型
        model = nn_model()
        fit = model.fit_generator(generator=batch_generator(xtr, ytr, 128, True),       # 运行nepochs轮
                                  nb_epoch=nepochs,
                                  samples_per_epoch=xtr.shape[0],
                                  verbose=1,
                  metrics=['accuracy'])
    return (model)


# cv-folds
nfolds = 4
cv = StratifiedKFold(n_splits=nfolds, random_state=0)

# train models
str = 0
nbags = 1
nepochs = 1
number_class = label.nunique()

# 编码
y_onehot = to_categorical(label.values)
'''
# start training 
for (index_train, index_val) in cv.split(xtrain, y_cat):
    xtr = xtrain[index_train]
    ytr = y_cat[index_train]
    xval = xtrain[index_val]
    yval = y_cat[index_val]
    pred = np.zeros((xval.shape[0], number_class))
    for j in range(nbags):
        model = nn_model()
        fit = model.fit_generator(generator=batch_generator(xtr, ytr, 256, True),
                                  nb_epoch=nepochs,
                                  samples_per_epoch=xtr.shape[0],
                                  verbose=1,
                                  validation_data=(xval.todense(), yval))
#            pkl.dump(stuff, f, protocol=pkl.HIGHEST_PROTOCOL)
#        i += 1

#with open("data/"+str(nbr_sup) + "/0.pkl") as f:
#    stuff = pkl.load(f)
#    trainx, trainy = stuff["x"], stuff["y"]
# share over gpu: we can store the whole mnist over the gpu.
# Train
trainx = trainx.reshape((trainx.shape[0], 1, 28, 28))
trainx_sh = theano.shared(trainx.astype(theano.config.floatX),
                          name="trainx",
                          borrow=True)
trainlabels_sh = theano.shared(trainy.astype(theano.config.floatX),
                               name="trainlabels",
                               borrow=True)
trainy_sh = theano.shared(to_categorical(trainy,
                                         10).astype(theano.config.floatX),
                          name="trainy",
                          borrow=True)
# trainy_sh = T.cast(trainy_sh, 'int32')

# valid
validx_sh = theano.shared(validx.astype(theano.config.floatX),
                          name="validx",
                          borrow=True)
validlabels_sh = theano.shared(validy.astype(theano.config.floatX),
                               name="validlabels",
                               borrow=True)
#
input = T.tensor4("x")
input1 = T.tensor4("x1")
input2 = T.tensor4("x2")