def train_val(self, allData_raw, allLabel_raw, iter, weighted):
        if weighted:
            print('\nif weighted, further split val set\n')
            X_train_raw, X_test_raw, y_train_raw, y_test_raw = train_test_split(
                allData_raw,
                allLabel_raw,
                test_size=0.1,
                shuffle=True,
                random_state=74)
        else:
            X_train_raw, y_train_raw = allData_raw, allLabel_raw

        modelNameList = ['sae', 'cnn', 'cudnnLstm']
        acc_dict = defaultdict()
        self.NUM_CLASS = len(set(allLabel_raw))
        model_dict = defaultdict()
        for modelName in modelNameList:
            print('now training model: {}'.format(modelName))
            self.opts.model = modelName
            model, params = chooseModel(self.opts,
                                        ifTest=False,
                                        NUM_CLASS=self.NUM_CLASS)
            X_train = prepareData.cutData(params['data_dim'], X_train_raw)
            X_train, y_train = processData(modelName, X_train, y_train_raw,
                                           self.NUM_CLASS)
            if weighted:
                X_test = prepareData.cutData(params['data_dim'], X_test_raw)
                X_test, y_test = processData(modelName, X_test, y_test_raw,
                                             self.NUM_CLASS)

            modelPath = model.train(X_train, y_train, self.NUM_CLASS)

            modelPath_prefix, modelPath_surfix = os.path.splitext(modelPath)
            newPath = modelPath_prefix + '_' + str(
                iter) + '_' + self.dataType + modelPath_surfix
            print('save model {} of iter {} to {}'.format(
                modelName, str(iter), newPath))
            os.rename(modelPath, newPath)

            model_dict[modelName] = (model, params)
            if weighted:
                tmp_acc = model.test(X_test, y_test, self.NUM_CLASS, newPath)
                acc_dict[modelName] = tmp_acc
                print('model {} acc is: {:f}'.format(modelName, tmp_acc))
            else:
                acc_dict = {}
                print('average model, no val acc')

            model_dict[modelName] = (model, params, newPath)

        return acc_dict, model_dict
def doExperiment(X_train_raw, y_train_raw, X_test_raw, y_test_raw, params,
                 modelObj):
    X_train = prepareData.cutData(params['data_dim'], X_train_raw)
    X_test = prepareData.cutData(params['data_dim'], X_test_raw)
    NUM_CLASS = params['NUM_CLASS']
    X_train, y_train, X_test, y_test = nFold.processData(
        modelObj.name, X_train, y_train_raw, X_test, y_test_raw, NUM_CLASS)

    modelPath = modelObj.train(X_train, y_train, NUM_CLASS)
    y_pred_tmp = modelObj.prediction(X_test, NUM_CLASS, modelPath)
    y_pred = np.argmax(y_pred_tmp, 1)
    acc = accuracy_score(y_test_raw, y_pred)
    tp, fp = compute_tp_and_fp(y_test_raw, y_pred)
    # true postive rate and false positive rate
    return acc, fp, tp, modelPath
def doEnsembleTest(modelPathList, X_test_raw, y_test_raw, NUM_CLASS, dataType):
    ensemble_pred = 0
    for modelPath in modelPathList:
        if re.search('cnn', modelPath):
            opts = MyOptions('cnn', dataType)
        elif re.search('sae', modelPath):
            opts = MyOptions('sae', dataType)
        elif re.search('Lstm', modelPath) or re.search('lstm', modelPath):
            opts = MyOptions('cudnnLstm', dataType)
        modelObj, params = ensemble.chooseModel(opts, False)
        X_test = prepareData.cutData(params['data_dim'], X_test_raw)
        X_test, y_test = ensemble.processData(opts.model, X_test, y_test_raw,
                                              NUM_CLASS)
        model = modelObj.create_model(NUM_CLASS)
        model.load_weights(modelPath)

        pred = model.predict(X_test)
        ensemble_pred = ensemble_pred + pred

    y_pred = np.argmax(ensemble_pred, 1)
    acc = accuracy_score(y_pred, y_test_raw)
    print('ensemble acc of defense test is: {:f}'.format(acc))
    tp, fp = compute_tp_and_fp(y_test_raw, y_pred)
    print('ensemble false positive rate is: {:f}, true positive rate is: {:f}'.
          format(fp, tp))
    return acc, fp, tp
Beispiel #4
0
    def test(self, model_dict, acc_dict, X_test_raw, y_test_raw, i):
        NUM_CLASS = len(set(y_test_raw))
        acc_list = []

        pred_res_dict = defaultdict()
        for key in model_dict.keys():
            (model, params, modelPath) = model_dict[key]

            X_test = prepareData.cutData(params['data_dim'], X_test_raw)
            X_test, y_test = processData(key, X_test, y_test_raw, NUM_CLASS)
            model_pred = model.prediction(X_test, NUM_CLASS, modelPath)
            tmp = np.argmax(model_pred, 1)
            tmpAcc = accuracy_score(tmp, y_test_raw)
            acc_list.append(tmpAcc)
            print('{} model acc is: {:f}'.format(key, tmpAcc))

            pred_res_dict[key] = model_pred

        #assert(len(pred_res_dict.keys()) == 3)

        acc = self.merge_res(pred_res_dict, NUM_CLASS, y_test_raw, acc_dict)

        return acc