#Implement unoptimized models #---------------------------- if False: model = second_model() num_epochs = 100 batch_size = 32 hm = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=num_epochs, batch_size=batch_size) gut.plot_KerasHistory_metrics(hm, 'nhanes_keras_model_metrics') scores = model.evaluate(X, Y) print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100)) prob_train = model.predict(X_train) predictions_train = [round(x[0]) for x in prob_train] prob_test = model.predict(X_test) predictions_test = [round(x[0]) for x in prob_test] pred_train_df = pd.concat([ pd.DataFrame(prob_train, columns=['PROB']), pd.DataFrame(predictions_train, columns=['Y_PRED_TRAIN']) ], axis=1) pred_test_df = pd.concat([ pd.DataFrame(prob_test, columns=['PROB']),
model.predict_classes(X_test) #model.predict(X_test) # Log Loss over time plt.plot(History.history['loss']) plt.plot(History.history['val_loss']) plt.title('Model Loss') plt.ylabel('Loss') plt.xlabel('Epochs') plt.legend(['train', 'test']) plt.show() # Model accuracy over time plt.plot(History.history['acc']) plt.plot(History.history['val_acc']) plt.title('Model Accuracy') plt.ylabel('Accuracy') plt.xlabel('Epochs') plt.legend(['train', 'test']) plt.show() print(model.metrics_names) model.evaluate(X_train,y_train) # averaged loss and accuracy on train model.evaluate(X_test,y_test) # averaged loss and accuracy on text
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, train_size=0.8, random_state=55) # print(x_train.max) # print(x_train.min) #2. 모델링 model = XGBClassifier(n_job=-1, use_label_encoder=False) #3. 컴파일, 훈련 model.fit(x_train, y_train) #4. 평가, 예측 loss = model.evaluate(x_test, y_test) print('loss : ', loss) y_pred = model.predict(x_test[:10]) # print(y_pred) print(y_test[:10]) print(np.argmax(y_test[:10], axis=-1)) #DNN #(784, ) # loss : [0.09116600453853607, 0.9779000282287598] # [7 2 1 0 4 1 4 9 5 9] #PCA 154 # loss : [0.13378241658210754, 0.9748571515083313] # [9 4 5 3 8 8 8 1 6 4]
#Oversampling x_train_res_train, y_train_res_train = sm.fit_sample( x_train[train], y_train[train]) # model fiting with train set and model validatation with validation data set history = model.fit(x_train_res_train, y_train_res_train, validation_data=(x_train[test], y_train[test]), epochs=2000, batch_size=300, verbose=0) historySet.append(history) scores = model.evaluate(x_train[test], y_train[test], verbose=0) #predict test dataset. predicted = model.predict(x_val) predicted = (predicted > 0.5) #making confuction matrix cm = confusion_matrix(y_val, predicted) fpr, tpr, thresholds = metrics.roc_curve(y_val, predicted) tprs.append(interp(mean_fpr, fpr, tpr)) roc_auc = auc(fpr, tpr) print('ROC_AUC') print(roc_auc) print('TPR') print(tpr[1]) print('FPR')
#X_test = np.hstack((X_test, X_test[:, :5]**5)) #X_test = np.hstack((X_test, X_test[:, :5]**6)) X_all = np.concatenate((X_train, X_test)) mu = sum(X_all) / X_all.shape[0] sigma = np.std(X_all, axis=0) return (X_train - mu) / sigma, Y_train, (X_test - mu) / sigma def Test(model, X_test): result = model.predict(X_test, batch_size=100) index_name = [i for i in range(1, X_test.shape[0] + 1)] df = pd.DataFrame(index_name, columns=['id']) df['label'] = pd.DataFrame(result.astype(int)) df.to_csv('output.csv', index=False) if __name__ == '__main__': X_train, Y_train, X_test = Read_data() model = XGBClassifier() #model.compile(loss='binary_crossentropy', optimizer=Adagrad(lr=0.01), metrics=['accuracy']) model.fit(X_train, Y_train, batch_size=100, epochs=50, validation_split=0.1, shuffle=True) score = model.evaluate(X_train, Y_train) print('Total loss: ', score[0]) print('Accuracy: ', score[1]) Test(model, X_test)