mk = markers[int(trainy[i, 0])] plt.scatter(Xtsne[i, 0], Xtsne[i, 1], Xtsne[i, 2], c=color, marker=mk) plt.show() #%% 创建模型对象 n_feature = trainX.shape[1] model = LogisticRegression(n_feature, lr) #%% 梯度检验 # 任务1:实现 LogisticRegression 中的 get_grad 方法 print(model.check_grad(validX, validy)) #%% 训练 for i in range(n_iter): model.update(trainX, trainy) model.evaluate(validX, validy) utils.plot_loss(model, n_iter) utils.plot_F1(model, n_iter) #%% 评估模型 # 任务2: 实现 utils 中的 confusion_matrix 函数 # 试着从 F1 score 的角度或验证误差的角度选择最佳模型 #idx = np.argmax(model.validF1) idx = np.argmin(model.validloss) model.W = model.snapshot[idx] y_hat = model.predict(testX) threshold = 0.5 TP, FP, FN, TN = utils.confusion_matrix(threshold, y_hat, testy)
if __name__ == "__main__": #################################### LOGISTIC REGRESSION ##################################### print("\n\nLOGISTIC:\n") data = pd.read_csv('data/data_banknote_authentication.txt', header=None) X_train, X_test, X_val, Y_train, Y_test, Y_val = preprocess_for_gradient_descent( data, 'rowwise', 'standardization', (0.7, 0.15, 0.15)) # print(X_train.shape, X_val.shape, X_test.shape, Y_train.shape, Y_val.shape, Y_test.shape) model = LogisticRegression(learning_rate=0.005, initialisation='gaussian', regularisation='None', lambda_reg=0.01) model.fit(X_train, Y_train, 5000, 'BGD') output_val = model.predict(X_val) output_test = model.predict(X_test) val_accuracy, val_f_score = model.evaluate(output_val, Y_val) test_accuracy, test_f_score = model.evaluate(output_test, Y_test) print("Train accuracy: ", model.train_accuracy) print("Val accuracy: ", val_accuracy) print("Test accuracy: ", test_accuracy) print("\nVal F1-Score: ", val_f_score) print("Test F1-Score: ", test_f_score) # print(model._weights) # print("\n\n", model.costs) plot_loss(model.costs, 'logistic_5000_epochs')