Esempio n. 1
0
    def run_train_test(self):
        acc_list = []
        allData, allLabel, labelMap = prepareData.loadData(
            self.input, 700, self.dataType, self.mode)
        NUM_CLASS = len(set(allLabel))

        for i in range(self.nFold):
            X_train, X_test, y_train, y_test = train_test_split(
                allData,
                allLabel,
                test_size=self.test_size,
                shuffle=True,
                random_state=i**i)

            acc_dict, model_dict = self.train_val(X_train, y_train, i,
                                                  self.weighted)

            # use the value of acc_dict to control weighted or not, acc_dict == {} means no weight
            if not self.weighted:
                acc_dict = {}
            acc = self.test(model_dict, acc_dict, X_test, y_test, i)
            acc_list.append(acc)

        print('the weight used here is: ', acc_dict)
        avg_acc = mean(acc_list)
        tmp = 'acc list is: ' + str(acc_list)
        tmp2 = 'average acc is: ' + str(avg_acc)
        contents = '\n'.join([tmp, tmp2])
        write2file(contents, self.output)

        return acc_list, avg_acc
def generateNotMonitoredData(opts, unmonitoredDir, mode):
    print('generate unmonitored onlyOrder data now...')
    allData, _, _ = prepareData.loadData(opts.input, 800, 'onlyOrder', mode)
    data = pd.DataFrame(allData)
    fname = 'not_onlyOrder.csv'
    fpath = os.path.join(unmonitoredDir, fname)
    print('data save to {}'.format(fpath))
    data.to_csv(fpath, sep=',')

    print('generate data unmonitored both now...')
    allData, _, _ = prepareData.loadData(opts.input, 800, 'both', mode)
    data = pd.DataFrame(allData)
    fname = 'not_both.csv'
    fpath = os.path.join(unmonitoredDir, fname)
    print('data save to {}'.format(fpath))
    data.to_csv(fpath, sep=',')
def runTests(opts):
    modelList = ['cnn', 'sae', 'cudnnLstm']
    modelPathList = []
    report_list = []

    # load original data
    allData, allLabel, _ = prepareData.loadData(opts.input, 600, opts.dataType)
    print('finish loading original data')
    X_train_raw, X_test_raw_1, y_train_raw, y_test_raw_1 = train_test_split(allData, allLabel, test_size=0.2, shuffle=True, random_state=77)
    NUM_CLASS = len(set(y_test_raw_1))

    # load batch data
    batch1_dir = '/home/carl/work_dir/data/batches/2nd_round'
    batch2_dir = '/home/carl/work_dir/data/batches/3rd_round'
    X_test_raw_2, y_test_raw_2, _ = prepareData.loadData(batch1_dir, 600, opts.dataType)
    print('finish loading batch1 data')
    X_test_raw_3, y_test_raw_3, _ = prepareData.loadData(batch2_dir, 600, opts.dataType)
    print('finish loading batch2 data')

    testDataList = [(X_test_raw_1, y_test_raw_1), (X_test_raw_2, y_test_raw_2), (X_test_raw_3, y_test_raw_3)]


    for model in modelList:
        print('select model {} to with dataType {} to run defense testing...'.format(model, opts.dataType))
        myOpts = MyOptions(model, opts.dataType)
        params, modelObj = nFold.chooseModel(myOpts)
        params['NUM_CLASS'] = NUM_CLASS

        print('start experiment with model {}'.format(model))
        acc_list = doExperiment(X_train_raw, y_train_raw, testDataList,  params, modelObj)
        tmp = 'model {} with dataType {} and has an accuracy list {}'.format(model, opts.dataType, acc_list)
        print(tmp)
        report_list.append(tmp)

    '''
    ensemble_acc, ensemble_fp, ensemble_tp = doEnsembleTest(modelPathList, X_test_raw_d, y_test_raw_d, NUM_CLASS, opts.dataType)
    tmp = 'ensemble results with dataType {} and train with Original Data and Test with Defense data to run defense testing has an accuracy {:f}'.format(opts.dataType, ensemble_acc)
    report_list.append(tmp)
    tmp = 'ensemble false positive rate is: {:f}, true positive rate is: {:f}'.format(ensemble_fp, ensemble_tp)
    report_list.append(tmp)
    '''

    contents = '\n#########################\n'.join(report_list)
    contents = contents + '\n'
    nFold.write2file(contents, opts.output)
Esempio n. 4
0
def loadTestData(opts, PARAMS):
    allData, allLabel, labelMap = prepareData.loadData(opts.input,
                                                       PARAMS['data_dim'],
                                                       opts.dataType,
                                                       opts.mode)
    NUM_CLASS = len(set(allLabel))
    allLabel = np_utils.to_categorical(allLabel, NUM_CLASS)

    return allData, allLabel, labelMap, NUM_CLASS
Esempio n. 5
0
def loadTestData(opts, params):
    allData_raw, allLabel_raw, labelMap = prepareData.loadData(
        opts.input, params['data_dim'], opts.dataType, mode=opts.mode)
    NUM_CLASS = len(set(allLabel_raw))
    allData = allData_raw.reshape(allData_raw.shape[0], allData_raw.shape[1],
                                  1)
    allLabel = np_utils.to_categorical(allLabel_raw, NUM_CLASS)

    return allData, allLabel, labelMap, NUM_CLASS
Esempio n. 6
0
def runNormalTest(opts):
    PARAMS, modelObj = chooseModel(opts)
    if 'ensemble' == opts.model:
        ensembleModel = ensemble.EnsembleModel(opts)
        acc_list, acc = ensembleModel.run()
        print(acc_list)
        print(acc)
    else:
        allData, allLabel, labelMap = prepareData.loadData(opts.input, PARAMS['data_dim'], opts.dataType, opts.mode)
        acc_list, trainTimeList, testTimeList = runTestOnce(opts, allData, allLabel, PARAMS)
        contents = resReport(opts.model, acc_list, trainTimeList, testTimeList)
        print(contents)
        write2file(contents, opts.output)
Esempio n. 7
0
                                    name="Y")
        self.keepProb = tf.placeholder(tf.float32)
        self.defineCNN()
        self.defineLoss()
        self.SGD(X, Y, startLearningRate, miniBatchFraction, epoch, keepProb)

    def predict_proba(self, X):
        """
        使用神经网络对未知数据进行预测
        """
        sess = self.sess
        pred = tf.nn.softmax(logits=self.out, name="pred")
        prob = sess.run(pred, feed_dict={self.input: X, self.keepProb: 1.0})
        return prob


if __name__ == "__main__":
    data = loadData()
    print(data[0].shape)
    trainData, validationData, trainLabel, validationLabel = train_test_split(
        data[0], data[1], test_size=0.3, random_state=1001)
    trainSet = {"X": trainData, "Y": trainLabel}
    validationSet = {"X": validationData, "Y": validationLabel}
    testSet = {"X": data[2]}
    # Windows下的存储路径与Linux并不相同
    if os.name == "nt":
        ann = CNN("logs\\mnist_cnn", trainSet, validationSet, testSet)
    else:
        ann = CNN("logs/mnist_cnn", trainSet, validationSet, testSet)
    ann.fit()
def runTests(opts):
    epsilon = os.path.basename(opts.defense)
    modelList = ['sae', 'cnn', 'cudnnLstm']
    #modelList = ['cnn']
    modelPathList = []
    report_list = []

    print('load original data...')
    o_allData, o_allLabel, _ = prepareData.loadData(opts.original, 600,
                                                    opts.dataType)
    X_train_raw_o, X_test_raw_o, y_train_raw_o, y_test_raw_o = train_test_split(
        o_allData, o_allLabel, test_size=0.2, shuffle=True, random_state=77)

    print('\nload defense data...')
    d_allData, d_allLabel, _ = prepareData.loadData(opts.defense, 600,
                                                    opts.dataType)
    X_train_raw_d, X_test_raw_d, y_train_raw_d, y_test_raw_d = train_test_split(
        d_allData, d_allLabel, test_size=0.2, shuffle=True, random_state=44)

    try:
        assert (len(set(y_test_raw_d)) == len(set(y_test_raw_o)))
    except Exception:
        print('y_test_raw len is {:d}, y_test_denfense len is {:d}'.format(
            len(set(y_test_raw_o)), len(set(y_test_raw_d))))

    NUM_CLASS = len(set(y_test_raw_o))
    for model in modelList:
        print(
            'select model {} to with dataType {} and epsilon={} to run defense testing...'
            .format(model, opts.dataType, epsilon))
        myOpts = MyOptions(model, opts.dataType)
        params, modelObj = nFold.chooseModel(myOpts)
        params['NUM_CLASS'] = NUM_CLASS

        print('train with original data and test with defense data...')
        acc, fp, tp, modelPath = doExperiment(X_train_raw_o, y_train_raw_o,
                                              X_test_raw_d, y_test_raw_d,
                                              params, modelObj)
        modelPathList.append(modelPath)
        tmp = 'model {} with dataType {} and epsilon={} to train with Original Data and Test with Defense data has a defense testing accuracy {:f}'.format(
            model, opts.dataType, epsilon, acc)
        print(tmp)
        report_list.append(tmp)
        tmp = 'model {} false positive rate is: {:f} and its true positive rate is: {:f}'.format(
            model, fp, tp)
        report_list.append(tmp)

    ensemble_acc, ensemble_fp, ensemble_tp = doEnsembleTest(
        modelPathList, X_test_raw_d, y_test_raw_d, NUM_CLASS, opts.dataType)
    tmp = 'ensemble results with dataType {} and epsilon={} to train with Original Data and Test with Defense data has a defense testing accuracy {:f}'.format(
        opts.dataType, epsilon, ensemble_acc)
    report_list.append(tmp)
    tmp = 'ensemble false positive rate is: {:f}, true positive rate is: {:f}'.format(
        ensemble_fp, ensemble_tp)
    report_list.append(tmp)

    contents = '\n#########################\n'.join(report_list)
    contents = contents + '\n'
    nFold.write2file(contents, opts.output + '_train_with_origin')

    report_list = []
    NUM_CLASS = len(set(y_test_raw_d))
    for model in modelList:
        print('select model {} to with dataType {} to run defense testing...'.
              format(model, opts.dataType))
        myOpts = MyOptions(model, opts.dataType)
        params, modelObj = nFold.chooseModel(myOpts)
        params['NUM_CLASS'] = NUM_CLASS

        print('train with defense data and test with defense data...')
        acc, fp, tp, modelPath = doExperiment(X_train_raw_d, y_train_raw_d,
                                              X_test_raw_d, y_test_raw_d,
                                              params, modelObj)
        modelPathList.append(modelPath)
        tmp = 'model {} with dataType {} and epsilon={} to train with Defense Data and Test with Defense data have a defense testing accuracy {:f}'.format(
            model, opts.dataType, epsilon, acc)
        print(tmp)
        report_list.append(tmp)
        tmp = 'model {} false positive rate is: {:f} and its true positive rate is: {:f}'.format(
            model, fp, tp)
        report_list.append(tmp)

    ensemble_acc, ensemble_fp, ensemble_tp = doEnsembleTest(
        modelPathList, X_test_raw_d, y_test_raw_d, NUM_CLASS, opts.dataType)
    tmp = 'ensemble results with dataType {} and epsilon={} to train with Defense Data and Test with Defense data have a defense testing accuracy {:f}'.format(
        opts.dataType, epsilon, ensemble_acc)
    report_list.append(tmp)
    tmp = 'ensemble false positive rate is: {:f}, true positive rate is: {:f}'.format(
        ensemble_fp, ensemble_tp)
    report_list.append(tmp)

    contents = '\n#########################\n'.join(report_list)
    contents = contents + '\n'
    nFold.write2file(contents, opts.output + '_train_with_defense')
    ]
    stateSpace = [state for state in it.product(agentStates, targetStates)]

    reset = GE.Reset(actionSpace, agentStates, targetStates)
    print('Generating Optimal Policy...')
    optimalPolicy = PD.generateOptimalPolicy(stateSpace, actionSpace)
    print('Optimal Policy Generated.')

    maxTimeStep = int(gridSize * gridSize / 2)
    sampleTrajectory = PD.SampleTrajectory(maxTimeStep, transitionFunction,
                                           isTerminal, reset)

    trajNum = 5000
    dataSetPath = "all_data.pkl"
    #PD.generateData(sampleTrajectory, optimalPolicy, trajNum, dataSetPath, actionSpace)
    dataSet = PD.loadData(dataSetPath)
    random.shuffle(dataSet)

    trainingDataSizes = list(range(1000, 9900, 1000))
    trainingDataList = [([state for state, _ in dataSet[:size]],
                         [label for _, label in dataSet[:size]])
                        for size in trainingDataSizes]
    testDataSize = 5000
    testData = PD.sampleData(dataSet, testDataSize)

    learningRate = 0.001
    regularizationFactor = 1e-4
    generatePolicyNet = NN.GeneratePolicyNet(4, 8, learningRate,
                                             regularizationFactor)
    models = [generatePolicyNet(3, 32) for _ in range(len(trainingDataSizes))]