def train_val(self, allData_raw, allLabel_raw, iter, weighted): if weighted: print('\nif weighted, further split val set\n') X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split( allData_raw, allLabel_raw, test_size=0.1, shuffle=True, random_state=74) else: X_train_raw, y_train_raw = allData_raw, allLabel_raw modelNameList = ['sae', 'cnn', 'cudnnLstm'] acc_dict = defaultdict() self.NUM_CLASS = len(set(allLabel_raw)) model_dict = defaultdict() for modelName in modelNameList: print('now training model: {}'.format(modelName)) self.opts.model = modelName model, params = chooseModel(self.opts, ifTest=False, NUM_CLASS=self.NUM_CLASS) X_train = prepareData.cutData(params['data_dim'], X_train_raw) X_train, y_train = processData(modelName, X_train, y_train_raw, self.NUM_CLASS) if weighted: X_test = prepareData.cutData(params['data_dim'], X_test_raw) X_test, y_test = processData(modelName, X_test, y_test_raw, self.NUM_CLASS) modelPath = model.train(X_train, y_train, self.NUM_CLASS) modelPath_prefix, modelPath_surfix = os.path.splitext(modelPath) newPath = modelPath_prefix + '_' + str( iter) + '_' + self.dataType + modelPath_surfix print('save model {} of iter {} to {}'.format( modelName, str(iter), newPath)) os.rename(modelPath, newPath) model_dict[modelName] = (model, params) if weighted: tmp_acc = model.test(X_test, y_test, self.NUM_CLASS, newPath) acc_dict[modelName] = tmp_acc print('model {} acc is: {:f}'.format(modelName, tmp_acc)) else: acc_dict = {} print('average model, no val acc') model_dict[modelName] = (model, params, newPath) return acc_dict, model_dict
def doExperiment(X_train_raw, y_train_raw, X_test_raw, y_test_raw, params, modelObj): X_train = prepareData.cutData(params['data_dim'], X_train_raw) X_test = prepareData.cutData(params['data_dim'], X_test_raw) NUM_CLASS = params['NUM_CLASS'] X_train, y_train, X_test, y_test = nFold.processData( modelObj.name, X_train, y_train_raw, X_test, y_test_raw, NUM_CLASS) modelPath = modelObj.train(X_train, y_train, NUM_CLASS) y_pred_tmp = modelObj.prediction(X_test, NUM_CLASS, modelPath) y_pred = np.argmax(y_pred_tmp, 1) acc = accuracy_score(y_test_raw, y_pred) tp, fp = compute_tp_and_fp(y_test_raw, y_pred) # true postive rate and false positive rate return acc, fp, tp, modelPath
def doEnsembleTest(modelPathList, X_test_raw, y_test_raw, NUM_CLASS, dataType): ensemble_pred = 0 for modelPath in modelPathList: if re.search('cnn', modelPath): opts = MyOptions('cnn', dataType) elif re.search('sae', modelPath): opts = MyOptions('sae', dataType) elif re.search('Lstm', modelPath) or re.search('lstm', modelPath): opts = MyOptions('cudnnLstm', dataType) modelObj, params = ensemble.chooseModel(opts, False) X_test = prepareData.cutData(params['data_dim'], X_test_raw) X_test, y_test = ensemble.processData(opts.model, X_test, y_test_raw, NUM_CLASS) model = modelObj.create_model(NUM_CLASS) model.load_weights(modelPath) pred = model.predict(X_test) ensemble_pred = ensemble_pred + pred y_pred = np.argmax(ensemble_pred, 1) acc = accuracy_score(y_pred, y_test_raw) print('ensemble acc of defense test is: {:f}'.format(acc)) tp, fp = compute_tp_and_fp(y_test_raw, y_pred) print('ensemble false positive rate is: {:f}, true positive rate is: {:f}'. format(fp, tp)) return acc, fp, tp
def test(self, model_dict, acc_dict, X_test_raw, y_test_raw, i): NUM_CLASS = len(set(y_test_raw)) acc_list = [] pred_res_dict = defaultdict() for key in model_dict.keys(): (model, params, modelPath) = model_dict[key] X_test = prepareData.cutData(params['data_dim'], X_test_raw) X_test, y_test = processData(key, X_test, y_test_raw, NUM_CLASS) model_pred = model.prediction(X_test, NUM_CLASS, modelPath) tmp = np.argmax(model_pred, 1) tmpAcc = accuracy_score(tmp, y_test_raw) acc_list.append(tmpAcc) print('{} model acc is: {:f}'.format(key, tmpAcc)) pred_res_dict[key] = model_pred #assert(len(pred_res_dict.keys()) == 3) acc = self.merge_res(pred_res_dict, NUM_CLASS, y_test_raw, acc_dict) return acc