min_child_weight=1, missing=None, n_estimators=100, # nthread=-1, objective='binary:logistic', # reg_alpha=1, reg_lambda=1, scale_pos_weight=1, seed=1234, # silent=True, subsample=1) skf = StratifiedKFold(n_splits=5) ytest = np.ones((1, 2)) * 0.5 yscore = np.ones((1, 2)) * 0.5 for train, test in skf.split(X, y): y_train = utils.to_categorical(y[train]) hist = cv_clf.fit(X[train], y[train]) y_score = cv_clf.predict_proba(X[test]) yscore = np.vstack((yscore, y_score)) y_test = utils.to_categorical(y[test]) ytest = np.vstack((ytest, y_test)) fpr, tpr, _ = roc_curve(y_test[:, 0], y_score[:, 0]) roc_auc = auc(fpr, tpr) y_class = utils.categorical_probas_to_classes(y_score) y_test_tmp = y[test] acc, precision, npv, sensitivity, specificity, mcc, f1 = utils.calculate_performace( len(y_class), y_class, y_test_tmp) sepscores.append( [acc, precision, npv, sensitivity, specificity, mcc, f1, roc_auc]) print( 'XGBoost:acc=%f,precision=%f,npv=%f,sensitivity=%f,specificity=%f,mcc=%f,f1=%f,roc_auc=%f'
# stuff = {"x": trainx_tmp2, "y": trainy_tmp2} # print k # with open("data/"+str(nbr_sup) + "/" + str(i) + ".pkl", 'w') as f: # pkl.dump(stuff, f, protocol=pkl.HIGHEST_PROTOCOL) # i += 1 #with open("data/"+str(nbr_sup) + "/0.pkl") as f: # stuff = pkl.load(f) # trainx, trainy = stuff["x"], stuff["y"] # share over gpu: we can store the whole mnist over the gpu. # Train trainx_sh = theano.shared(trainx.astype(theano.config.floatX), name="trainx", borrow=True) trainlabels_sh = theano.shared(trainy.astype(theano.config.floatX), name="trainlabels", borrow=True) trainy_sh = theano.shared(to_categorical(trainy, x_classes).astype( theano.config.floatX), name="trainy", borrow=True) # valid validx_sh = theano.shared(validx.astype(theano.config.floatX), name="validx", borrow=True) validlabels_sh = theano.shared(validy.astype(theano.config.floatX), name="validlabels", borrow=True) # input = T.fmatrix("x") input1 = T.fmatrix("x1") input2 = T.fmatrix("x2") rng = np.random.RandomState(23455) # Architecture nhid_l0 = 1200 nhid_l1 = 1200 nhid_l2 = 200
# train models i = 0 # nbags = 10 nepochs = 1 # y = y[0:512] number_class = y.nunique() print('xtrain.shape[0]:', xtrain.shape[0]) print('xtest.shape[0]:', xtest.shape[0]) pred_oob = np.zeros((xtrain.shape[0], number_class)) # 2010000*6=>100000*6 pred_test = np.zeros((xtest.shape[0], number_class)) # 502500*6=>20000*6 # print('y:\n', y) y_onehot = to_categorical(y.values) # 0-5进行独热编码 print('y_onehot:\n', y_onehot) ''' # start training for (indexTr, indexTe) in cv.split(xtrain, y): # indexTr, indexTe是每一折所选数据的序号,5折迭代 xtr = xtrain[indexTr] # 本折训练数据 ytr = y_onehot[indexTr] # 本折训练数据标签 xval = xtrain[indexTe] # 本折验证数据 yval = y_onehot[indexTe] # 本折验证数据标签 pred = np.zeros((xval.shape[0], number_class)) # 验证数据的预测值数组20000*6 for j in range(nbags): # 创建多次模型 model = nn_model() fit = model.fit_generator(generator=batch_generator(xtr, ytr, 128, True), # 运行nepochs轮 nb_epoch=nepochs, samples_per_epoch=xtr.shape[0], verbose=1,
metrics=['accuracy']) return (model) # cv-folds nfolds = 4 cv = StratifiedKFold(n_splits=nfolds, random_state=0) # train models str = 0 nbags = 1 nepochs = 1 number_class = label.nunique() # 编码 y_onehot = to_categorical(label.values) ''' # start training for (index_train, index_val) in cv.split(xtrain, y_cat): xtr = xtrain[index_train] ytr = y_cat[index_train] xval = xtrain[index_val] yval = y_cat[index_val] pred = np.zeros((xval.shape[0], number_class)) for j in range(nbags): model = nn_model() fit = model.fit_generator(generator=batch_generator(xtr, ytr, 256, True), nb_epoch=nepochs, samples_per_epoch=xtr.shape[0], verbose=1, validation_data=(xval.todense(), yval))
# pkl.dump(stuff, f, protocol=pkl.HIGHEST_PROTOCOL) # i += 1 #with open("data/"+str(nbr_sup) + "/0.pkl") as f: # stuff = pkl.load(f) # trainx, trainy = stuff["x"], stuff["y"] # share over gpu: we can store the whole mnist over the gpu. # Train trainx = trainx.reshape((trainx.shape[0], 1, 28, 28)) trainx_sh = theano.shared(trainx.astype(theano.config.floatX), name="trainx", borrow=True) trainlabels_sh = theano.shared(trainy.astype(theano.config.floatX), name="trainlabels", borrow=True) trainy_sh = theano.shared(to_categorical(trainy, 10).astype(theano.config.floatX), name="trainy", borrow=True) # trainy_sh = T.cast(trainy_sh, 'int32') # valid validx_sh = theano.shared(validx.astype(theano.config.floatX), name="validx", borrow=True) validlabels_sh = theano.shared(validy.astype(theano.config.floatX), name="validlabels", borrow=True) # input = T.tensor4("x") input1 = T.tensor4("x1") input2 = T.tensor4("x2")