def baseline(self, Event_num, read_from_file=True, include_MANRS_data=True, baseline_Feature=False): #baseline method SVM,RF Event_name = self.Event_list[Event_num - 1] target_list = ['normal', Event_name] loader = Data_Loader(Event_name=Event_name, Event_num=Event_num, TIME_STEP=1, WINDOW_SIZE=self.WINDOW_SIZE) train_x, train_y, test_x, test_y, eval_x, eval_y = loader.loadDataSet( read_from_file=read_from_file, include_MANRS_data=include_MANRS_data, baseline=baseline_Feature) self.INPUT_SIZE = loader.INPUT_SIZE train_x = train_x.reshape([-1, self.INPUT_SIZE * self.WINDOW_SIZE]) test_x = test_x.reshape([-1, self.INPUT_SIZE * self.WINDOW_SIZE]) from sklearn import tree from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV tuned_param_SVC = [{ 'kernel': ['rbf'], 'gamma': [1e-3, 1e-4], 'C': [1, 10, 100, 1000] }, { 'kernel': ['linear'], 'C': [1, 10, 100, 1000] }] scores = ['precision', 'f1'] tuned_param_RT = [{ 'n_estimators': range(20, 500, 30), }] for score in scores: svc = GridSearchCV(SVC(), tuned_param_SVC, scoring='%s_macro' % score, n_jobs=8) rf = GridSearchCV(RandomForestClassifier(bootstrap=True, oob_score=True), tuned_param_RT, scoring='%s_macro' % score, n_jobs=8) # tree=tree.DecisionTree() models = [svc, rf] for clf in models: clf.fit(train_x, train_y) pred_y = clf.predict(test_x) message = 'Tuning hyper-parameters for %s' % score message = "Best parameters set found on development set:\n\n" message += str(clf.best_params_) message += '\n\n' message += "Grid scores on development set:\n\n" print("Best parameters set found on development set:") print() print(clf.best_params_) print() print("Grid scores on development set:") print() means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) message += "%0.3f (+/-%0.03f) for %r \n" % (mean, std * 2, params) message += "\nDetailed classification report:\n\nThe model is trained on the full development set.\nThe scores are computed on the full evaluation set.\n\n" print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() from sklearn.metrics import classification_report message += str( classification_report(y_true=test_y, y_pred=pred_y, target_names=target_list)) print( classification_report(y_true=test_y, y_pred=pred_y, target_names=target_list)) baseline_txt_path = '../result_doc/baseline.txt' with open(baseline_txt_path, 'a') as f: f.write(message)
def train(self, Event_num, read_from_file=True, include_MANRS_data=True, WINDOW_SIZE=30, HIDDEN_SIZE=128, save_epoch=10): #the implement of training model ''' :param Event_num: the index type of the anomaly :param read_from_file: already have the file to read :param include_MANRS_data: join the legitimate data :param WINDOW_SIZE: the sliding window size :param HIDDEN_SIZE: the hidden size of LSTM model :return: save model under the path ../params/best_lstm_params_' + Event_name + '2.pkl ''' self.WINDOW_SIZE = WINDOW_SIZE self.Hidden_SIZE = HIDDEN_SIZE global SA_LSTM_flag Event_name = self.Event_list[Event_num - 1] Path = '../params/best_lstm_params_' + Event_name + '.pkl' target_list = ['normal', Event_name] loader = Data_Loader(Event_name=Event_name, Event_num=Event_num, TIME_STEP=1, WINDOW_SIZE=self.WINDOW_SIZE) train_x, train_y, test_x, test_y, eval_x, eval_y = loader.loadDataSet( read_from_file=read_from_file, include_MANRS_data=include_MANRS_data) self.INPUT_SIZE = loader.INPUT_SIZE datasets = Data.TensorDataset(train_x, train_y) train_loader = Data.DataLoader(dataset=datasets, batch_size=self.BATCH_SIZE, shuffle=True, num_workers=2) eval_x = eval_x.cuda() eval_y = eval_y.numpy() test_x = test_x.cuda() test_y = test_y.numpy() if SA_LSTM_flag: lstm = SA_LSTM(WINDOW_SIZE=self.WINDOW_SIZE, INPUT_SIZE=self.INPUT_SIZE, Hidden_SIZE=self.Hidden_SIZE, LSTM_layer_NUM=self.LSTM_layer_NUM) else: lstm = LSTM(WINDOW_SIZE=self.WINDOW_SIZE, INPUT_SIZE=self.INPUT_SIZE, Hidden_SIZE=self.Hidden_SIZE, LSTM_layer_NUM=self.LSTM_layer_NUM) lstm = lstm.cuda() optimizer = torch.optim.Adam(lstm.parameters(), lr=self.LR) loss_func = nn.CrossEntropyLoss() h_state = None from sklearn.metrics import f1_score best_f1_score = 0.0 best_epoch = 0 #train_length = len(train_loader) for epoch in range(self.EPOCH): for step, (x, y) in tqdm(enumerate(train_loader)): x = x.cuda() y = y.cuda() if SA_LSTM_flag: output, attn_weights = lstm(x) else: output = lstm(x) loss = loss_func(output, y) optimizer.zero_grad() loss.backward() optimizer.step() if step % 10000 == 0: if SA_LSTM_flag: eval_output, attn_weights = lstm(test_x) else: eval_output = lstm(test_x) pred_y = torch.max(eval_output, 1)[1].cpu().data.numpy() #print(pred_y) #print(eval_y) accuracy = float(np.sum(pred_y == test_y)) / float( test_y.size) print('Epoch: ', epoch, '| train loss: %.4f' % loss.cpu().data.numpy(), '| test accuracy: %.2f' % accuracy) from sklearn.metrics import classification_report temp_str = classification_report(y_true=test_y, y_pred=pred_y, target_names=target_list) temp_f1 = f1_score(y_pred=pred_y, y_true=test_y, average='macro') print('temp_f1', temp_f1) # temp_sum=temp_f1+temp_route_f1 #if (best_f1_score < temp_f1): if (epoch == save_epoch): print(temp_str + '\n' + str(temp_f1)) with open( '../result_doc/test_best_f1' + Event_name + '.txt', 'a') as f: message = 'epoch:' + str( epoch) + ' f1_score:' + str(temp_f1) + '\n' f.write(message) best_f1_score = temp_f1 best_epoch = epoch torch.save(lstm.state_dict(), Path) lstm.load_state_dict(torch.load(Path)) if SA_LSTM_flag: test_output, attn_weights = lstm(test_x) else: test_output = lstm(test_x) pred_y = torch.max(test_output, 1)[1].cpu().data.numpy() from sklearn.metrics import classification_report test_report = classification_report(y_true=test_y, y_pred=pred_y, target_names=target_list) test_parameter_path = '../result_doc/test_parameter' + '_' + Event_name + '.txt' with open(test_parameter_path, 'a') as f: message = "TimeStep:" + str( self.TIME_STEP ) + '\tWINDOW_SIZE:' + str( self.WINDOW_SIZE ) + "\tLSTM_NUM: " + str(self.LSTM_NUM) + '\tLayer num: ' + str( self.LSTM_layer_NUM ) + '\tLR:' + str(self.LR) + '\tBatch_size: ' + str( self.BATCH_SIZE) + '\tHidden_size: ' + str( self.Hidden_SIZE ) + '\tNormalizer:MinMaxScaler' + '\t epoch:' + str( best_epoch) + '\tf1_score:' + str( best_f1_score) + '\n' + 'include_MANRS_data:' + str( include_MANRS_data ) + '\t time_bins:60s' + '\n' + test_report + '\n\n' print(message) f.write(message) self.models.append(lstm) #attn_weights_df=pd.DataFrame(best_attn_weights) #print(attn_weights_df) #attn_weights_df.to_csv('../result_doc/atten_weights' + '_' + Event_name + '.csv') torch.save(lstm, '../params/lstm' + Event_name + '.pkl')