def MFE(self, X_split, y_split, model): if model == 'SVM': X_split_scaled = standard_scale(X_split) Model = SVM() Model.fit(X_split_scaled[0], y_split[0]) y_hat = Model.predict(X_split_scaled[2]) elif model == 'RF': Model = RF() Model.fit(np.concatenate([X_split[0], X_split[1]]), np.concatenate([y_split[0], y_split[1]])) y_hat = Model.predict(X_split[2]) elif model == 'FNN': X_split_scaled = standard_scale(X_split) Model = FNN(model) Model.fit(X_split_scaled[0], y_split[0], validation_data=[X_split_scaled[1], y_split[1]], epochs=self.MAX_EPOCH, batch_size=self.BATCH_SIZE, callbacks=[self.es]) y_hat = Model.predict_classes(X_split_scaled[2]) else: print('model undefined') return self.evaluate(y_split[2], y_hat)
def svm(): # ********************* load the dataset and divide to X&y *********************** from sklearn.datasets import make_blobs X, Y = make_blobs(cluster_std=0.9, random_state=20, n_samples=1000, centers=10, n_features=10) from Algorithms.ML_.helper.data_helper import split_train_val_test X, Xv, y, Yv, Xt, Yt = split_train_val_test(X, Y) print(X.shape, y.shape, Xv.shape, Yv.shape, Xt.shape, Yt.shape) # ********************* build model *********************** from model import SVM from activation import Activation, Softmax, Hinge from regularization import Regularization, L1, L2, L12 from optimizer import Vanilla model = SVM() learning_rate, reg_rate = 1e-3, 5e-1 model.compile(alpha=learning_rate, lambda_=reg_rate, activation=Softmax(), reg=L2(), opt=Vanilla()) model.describe() # ********************* train *********************** loss_train, loss_val = model.train(X, y, val=(Xv, Yv), iter_=1000, return_loss=True, verbose=True, eps=1e-3) import matplotlib.pyplot as plt plt.plot(range(len(loss_train)), loss_train) plt.plot(range(len(loss_val)), loss_val) plt.legend(['train', 'val']) plt.xlabel('Iteration') plt.ylabel('Training loss') plt.title('Training Loss history') plt.show() # ********************* predict *********************** pred_train = model.predict(X) pred_val = model.predict(Xv) pred_test = model.predict(Xt) import metrics print('train accuracy=', metrics.accuracy(y, pred_train)) print('val accuracy=', metrics.accuracy(Yv, pred_val)) print('test accuracy=', metrics.accuracy(Yt, pred_test)) print('null accuracy=', metrics.null_accuracy(y)) import metrics metrics.print_metrics(Yt, pred_test)
# load model if exist try: with open("../Resources/models/model", "rb") as model_file: model = pickle.load(model_file) except IOError as err: # load training reviews from file train_review = utils.load_reviews("../Resources/samples/train_data") # get feature from train data train_data, train_label = feature_data(tagger, exp, bag, train_review) # initalize classifer class model = SVM() # train model model.train(train_data, train_label) #save model with open("../Resources/models/model", "wb") as model_file: pickle.dump(model, model_file) else: print("use saved model..") # load test reviews from file test_review = utils.load_reviews("../Resources/samples/test_data") # get feature from test data test_data, test_label = feature_data(tagger, exp, bag, test_review) # predict model result = model.predict(test_data) # evaluate accuracy evaluate_model(result, test_label)
model.compile(alpha=1e-7, lambda_=2, activation=Softmax, reg=L2) # model.compile(alpha=0, lambda_=0, activation=Hinge, Reg=L2, dReg=dL2) history = model.train(Xd, Yd, iter_=0, eps=0.0001) print(model.loss(model.X, model.y, add_ones=False), np.sum(model.grad(model.X, model.y, False))) L, dW = model.grad(model.X, model.y, True) print(L, np.sum(dW)) # print(np.sum(model.W)) # print(np.sum(model.grad(model.X, model.y, loss_=False))) # print(np.sum(model.grad1(model.X, model.y))) # L, dW = model.activation.loss_grad_loop(model.X, model.W, model.y) # print(L, np.sum(dW)) loss_history = model.train(X, y, eps=0.0001, batch=200, iter_=1500) plt.plot(range(len(loss_history)), loss_history) plt.xlabel('Iteration number') plt.ylabel('Loss value') plt.show() print(loss_history[::100]) # ********************* metrics *********************** pred = np.argmax(model.predict(model.X, add_ones=False), axis=1) pred_v = np.argmax(model.predict(Xv), axis=1) pred_te = np.argmax(model.predict(Xte), axis=1) # # ********************* metrics *********************** print(metrics.accuracy(y, pred)) print(metrics.accuracy(Yv, pred_v)) print(metrics.accuracy(Yte, pred_te)) # print(np.mean(metrics.null_accuracy(Yte)))
# get feature from train data train_data, train_label = feature_data(tagger, exp, bag, train_review) # initalize classifer class model = SVM() # train model model.train(train_data, train_label) #save model with open("./models/model", "wb") as model_file: pickle.dump(model, model_file) else: print("use saved model..") # load test reviews from file #test_review = utils.load_reviews("./samples/test_data") # get feature from test data #test_data, test_label = feature_data(tagger, exp, bag, test_review) # predict model result = model.predict(train_data) # evaluate accuracy evaluate_model(result, train_label) with open("fault", "w") as ff: for i, v in enumerate(train_data): if result[i] != train_label[i]: ff.write("real:{0!s:s}: {1!s:s}\n" .format(train_review[i][0], train_review[i][1])) ff.write("[{0:f} {1:f} {2:f} {3:f} {4:f} {5:f} {6:f} {7:f}\n" .format(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]))