コード例 #1
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        # deep learning
        if config.DEEP_LEARNING_METHOD != -1:
            if config.DEEP_LEARNING_METHOD == 1:
                logistic_sgd_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 2:
                [trainingFile,
                 testingFile] = dA_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 3:
                mlp_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 4:
                SdA_2.runDL([trainingFile, testingFile])

        if config.NUM_MONITORED_SITES != -1:
            [accuracy, debugInfo] = ['NA', []]
            return [accuracy, debugInfo]
        '''
        return wekaAPI.execute( trainingFile,
                             testingFile,
                             "weka.Run weka.classifiers.functions.LibSVM",
                             ['-K','2', # RBF kernel
                              '-G','0.0000019073486328125', # Gamma
                              '-C','131072'] ) # Cost
        '''

        if config.CROSS_VALIDATION == 0:
            return wekaAPI.execute(
                trainingFile,
                testingFile,
                "weka.Run weka.classifiers.functions.LibSVM",
                [
                    '-K',
                    '2',  # RBF kernel
                    '-G',
                    '0.0000019073486328125',  # Gamma
                    ##May20 '-Z', # normalization 18 May 2015
                    '-C',
                    '131072'
                ])  # Cost
        else:
            file = Utils.joinTrainingTestingFiles(
                trainingFile, testingFile)  # join and shuffle
            return wekaAPI.executeCrossValidation(
                file,
                "weka.Run weka.classifiers.functions.LibSVM",
                [
                    '-x',
                    str(config.CROSS_VALIDATION),  # number of folds
                    '-K',
                    '2',  # RBF kernel
                    '-G',
                    '0.0000019073486328125',  # Gamma
                    ##May20 '-Z', # normalization 18 May 2015
                    '-C',
                    '131072'
                ])  # Cost
コード例 #2
0
ファイル: Cumul.py プロジェクト: shibz-islam/BiMorphing
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        if config.n_components_PCA != 0:
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])

        if config.n_components_LDA != 0:
            [trainingFile,
             testingFile] = Utils.calcLDA6([trainingFile, testingFile])

        if config.n_components_QDA != 0:
            [trainingFile,
             testingFile] = Utils.calcQDA([trainingFile, testingFile])

        classifier = "svm"
        kwargs = {}
        kwargs['C'] = 2**11
        kwargs['kernel'] = 'rbf'
        kwargs['gamma'] = 2

        if config.CROSS_VALIDATION == 0:
            return wekaAPI.executeSklearn(trainingFile, testingFile,
                                          classifier, **kwargs)
        else:
            file = Utils.joinTrainingTestingFiles(
                trainingFile, testingFile)  # join and shuffle
            return wekaAPI.executeSklearnCrossValidationScaleWithRange(
                file, classifier, config.CROSS_VALIDATION, (-1, 1),
                **kwargs)  # CV with normalization
コード例 #3
0
ファイル: HerrmannClassifier.py プロジェクト: ruur/deepdig
 def classify(runID, trainingSet, testingSet):
     [trainingFile,
      testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                               testingSet)
     return wekaAPI.execute(trainingFile, testingFile,
                            "weka.classifiers.bayes.NaiveBayesMultinomial",
                            [])
コード例 #4
0
ファイル: HpNGramClustering.py プロジェクト: ruur/deepdig
    def classify(runID, trainingSet, testingSet):

        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        if (config.CLUSTERING_METHOD == 1):
            clusteringAPI.calcKmeans([trainingFile, testingFile],
                                     "Description goes here!")
        elif (config.CLUSTERING_METHOD == 2):
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])
            clusteringAPI.calcKmeans([trainingFile, testingFile],
                                     "Description goes here!")
        elif (config.CLUSTERING_METHOD == 3):
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])
            clusteringAPI.calcKmeansCvxHullDelaunay(
                [trainingFile, testingFile], "Description goes here!")
        elif (config.CLUSTERING_METHOD == 4):
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])
            clusteringAPI.calcKmeansCvxHullDelaunay_Mixed(
                [trainingFile, testingFile], "Description goes here!")
        elif (config.CLUSTERING_METHOD == 5):
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])
            clusteringAPI.calcKmeansCvxHullDelaunay_Mixed_KNN(
                [trainingFile, testingFile],
                "Description goes here!",
                threshold=3)

        return ['NA', []]
コード例 #5
0
 def classify( runID, trainingSet, testingSet ):
     [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
     return wekaAPI.execute( trainingFile,
                          testingFile,
                          "weka.Run weka.classifiers.functions.LibSVM",
                          ['-K','2', # RBF kernel
                           '-G','0.0000019073486328125', # Gamma
                           '-C','131072'] ) # Cost
 def classify(runID, trainingSet, testingSet):
     [trainingFile,
      testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                               testingSet)
     # deep learning (AE)
     if config.AE != -1:
         [trainingFile,
          testingFile] = dA_2.calcAE([trainingFile, testingFile])
     return wekaAPI.execute(trainingFile, testingFile,
                            "weka.classifiers.bayes.NaiveBayes", ['-K'])
コード例 #7
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        if config.n_components_PCA != 0:
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])

        if config.n_components_LDA != 0:
            [trainingFile,
             testingFile] = Utils.calcLDA6([trainingFile, testingFile])

        if config.n_components_QDA != 0:
            [trainingFile,
             testingFile] = Utils.calcQDA([trainingFile, testingFile])

        return wekaAPI.execute(trainingFile, testingFile,
                               "weka.classifiers.bayes.NaiveBayes", ['-K'])
コード例 #8
0
    def classify( runID, trainingSet, testingSet ):
        [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )

        if config.n_components_PCA != 0:
            [trainingFile,testingFile] = Utils.calcPCA2([trainingFile,testingFile])

        if config.n_components_LDA != 0:
            [trainingFile,testingFile] = Utils.calcLDA6([trainingFile,testingFile])

        if config.n_components_QDA != 0:
            [trainingFile,testingFile] = Utils.calcQDA([trainingFile,testingFile])

        return wekaAPI.execute( trainingFile,
                             testingFile,
                             "weka.Run weka.classifiers.functions.LibSVM",
                             ['-K','2', # RBF kernel
                              '-G','0.0000019073486328125', # Gamma
                              ##May20 '-Z', # normalization 18 May 2015
                              '-C','131072'] ) # Cost
コード例 #9
0
 def classify(runID, trainingSet, testingSet):
     [trainingFile,
      testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                               testingSet)
     # return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
     if config.CROSS_VALIDATION == 0:
         return wekaAPI.execute(trainingFile, testingFile,
                                "weka.classifiers.bayes.NaiveBayes", ['-K'])
     else:
         file = Utils.joinTrainingTestingFiles(
             trainingFile, testingFile)  # join and shuffle
         return wekaAPI.executeCrossValidation(
             file,
             "weka.classifiers.bayes.NaiveBayes",
             [
                 '-x',
                 str(config.CROSS_VALIDATION),  # number of folds
                 '-K'
             ])
コード例 #10
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)
        # return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )

        # deep learning
        if config.DEEP_LEARNING_METHOD != -1:
            #DLMethod = Utils.intToDL(config.DEEP_LEARNING_METHOD)
            #print 'Deep Learning Method: ' + DLMethod
            #[trainingFile, testingFile] = DLMethod.runDL([trainingFile, testingFile])
            #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile])
            #SdA_2.calcSdA([trainingFile, testingFile])
            #logistic_sgd_2.calcLog_sgd([trainingFile, testingFile])
            if config.DEEP_LEARNING_METHOD == 1:
                logistic_sgd_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 2:
                [trainingFile,
                 testingFile] = dA_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 3:
                mlp_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 4:
                SdA_2.runDL([trainingFile, testingFile])

        if config.CROSS_VALIDATION == 0:
            return wekaAPI.execute(trainingFile, testingFile,
                                   "weka.classifiers.bayes.NaiveBayes", ['-K'])
        else:
            file = Utils.joinTrainingTestingFiles(
                trainingFile, testingFile)  # join and shuffle
            return wekaAPI.executeCrossValidation(
                file,
                "weka.classifiers.bayes.NaiveBayes",
                [
                    '-x',
                    str(config.CROSS_VALIDATION),  # number of folds
                    '-K'
                ])
コード例 #11
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        # deep learning (AE)
        if config.DEEP_LEARNING_METHOD != -1:
            #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # one layer dA
            #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # two layers dA
            #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile])
            #SdA_2.calcSdA([trainingFile, testingFile])
            if config.DEEP_LEARNING_METHOD == 1:
                [trainingFile, testingFile
                 ] = logistic_sgd_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 2:
                [trainingFile,
                 testingFile] = dA_2.runDL([trainingFile, testingFile])
                [trainingFile,
                 testingFile] = dA_2.runDL([trainingFile, testingFile])
                #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile])
                #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile])
                #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 3:
                # DL classifier
                return mlp_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 4:
                return SdA_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 5:
                return mlp_3.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 6:
                return SdA_3.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 7:
                return LeNetConvPoolLayer_2.runDL([trainingFile, testingFile])

        return wekaAPI.execute(trainingFile, testingFile,
                               "weka.classifiers.bayes.NaiveBayes", ['-K'])
コード例 #12
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        if config.NUM_MONITORED_SITES != -1:  #no need to classify as this is for generating openworld datasets. See the line above (arffWriter)
            [accuracy, debugInfo] = ['NA', []]
            return [accuracy, debugInfo]

        if config.n_components_PCA != 0:
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])

        if config.n_components_LDA != 0:
            [trainingFile,
             testingFile] = Utils.calcLDA4([trainingFile, testingFile])

        if config.n_components_QDA != 0:
            [trainingFile,
             testingFile] = Utils.calcQDA([trainingFile, testingFile])

        if config.lasso != 0:
            #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile])
            #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile])
            Utils.calcLogisticRegression([trainingFile, testingFile])

        #Utils.plotDensity([trainingFile,testingFile])
        #Utils.plot([trainingFile,testingFile])
        '''
        if (config.DATA_SOURCE == 62 or config.DATA_SOURCE == 63 or config.DATA_SOURCE == 64 or config.DATA_SOURCE == 65):
            if config.LABEL_NOISE_RATIO != 0:
                [trainingFile,testingFile] = Utils.makeLabelNoise([trainingFile,testingFile],config.LABEL_NOISE_RATIO)
        '''
        if config.CROSS_VALIDATION == 0:
            return wekaAPI.execute(
                trainingFile,
                testingFile,
                "weka.Run weka.classifiers.functions.LibSVM",
                [
                    '-K',
                    '2',  # RBF kernel
                    '-G',
                    '0.0000019073486328125',  # Gamma
                    ##May20 '-Z', # normalization 18 May 2015
                    '-C',
                    '131072',  # Cost
                    #'-S','2', # one-class svm
                    '-B'
                ])  # confidence
        else:
            file = Utils.joinTrainingTestingFiles(
                trainingFile, testingFile)  # join and shuffle
            return wekaAPI.executeCrossValidation(
                file,
                "weka.Run weka.classifiers.functions.LibSVM",
                [
                    '-x',
                    str(config.CROSS_VALIDATION),  # number of folds
                    '-K',
                    '2',  # RBF kernel
                    '-G',
                    '0.0000019073486328125',  # Gamma
                    ##May20 '-Z', # normalization 18 May 2015
                    '-C',
                    '131072',  # Cost
                    '-B'
                ])  # confidence
コード例 #13
0
 def classify( runID, trainingSet, testingSet ):
     [trainingFile,testingFile] = arffWriter.writeArffFiles( runID, trainingSet, testingSet )
     return wekaAPI.execute( trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'] )
コード例 #14
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        if config.n_components_PCA != 0:
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])

        if config.n_components_LDA != 0:
            [trainingFile,
             testingFile] = Utils.calcLDA6([trainingFile, testingFile])

        if config.n_components_QDA != 0:
            [trainingFile,
             testingFile] = Utils.calcQDA([trainingFile, testingFile])

        classifier = "RF"
        kwargs = {}
        kwargs[
            'n_estimators'] = 500  #number of trees/ length of the fingerprint
        kwargs['criterion'] = "gini"
        kwargs['oob_score'] = True
        kwargs['n_jobs'] = 3

        if config.NUM_MONITORED_SITES == -1 and config.NUM_NON_MONITORED_SITES == -1:
            # closed world
            if config.CROSS_VALIDATION == 0:
                return wekaAPI.executeSklearn(trainingFile, testingFile,
                                              classifier, **kwargs)
            else:
                file = Utils.joinTrainingTestingFiles(
                    trainingFile, testingFile)  # join and shuffle
                return wekaAPI.executeSklearnCrossValidation(
                    file, classifier, config.CROSS_VALIDATION,
                    **kwargs)  # CV with normalization
        else:
            # open world
            trainList = wekaAPI.readFile(trainingFile)
            testList = wekaAPI.readFile(testingFile)
            trainInstancesList = []
            testInstancesList = []
            classes = ""
            yTrain = []
            yTest = []

            for line in trainList:
                if line[0] == '@':
                    if line.lower().startswith("@attribute class"):
                        classes = line.split(" ")[2]
                else:
                    # instancesList.append(float(line.split(",")[:-1]))
                    trainInstancesList.append(
                        [float(i) for i in line.split(",")[:-1]])
                    yTrain.append(line.split(",")[-1])

            for line in testList:
                if line[0] != '@':
                    testInstancesList.append(
                        [float(i) for i in line.split(",")[:-1]])
                    yTest.append(line.split(",")[-1])

            XTr = numpy.array(trainInstancesList)
            yTr = numpy.array(yTrain)
            XTe = numpy.array(testInstancesList)
            yTe = numpy.array(yTest)

            clf = RandomForestClassifier(**kwargs)

            print('Generating Leaves...')
            training_leaves = clf.apply(XTr)
            test_leaves = clf.apply(XTe)
            training_leaves = [
                numpy.array(training_leaf, dtype=int)
                for training_leaf in training_leaves
            ]
            test_leaves = [
                numpy.array(test_leaf, dtype=int) for test_leaf in test_leaves
            ]
            true_positive = 0
            false_positive = 0
            knn = 3  # k value
            debugInfo = []

            print('Calculating Distances...')
            for test_leaf_idx in range(len(test_leaves)):
                test_leaf = test_leaves[test_leaf_idx]  #array of leaf values
                true_label = yTe[test_leaf_idx]
                dist_predicted_labels = [
                ]  # List of (distance, predicated_label) pairs

                for training_leaf_idx in range(len(training_leaves)):
                    training_leaf = training_leaves[
                        training_leaf_idx]  #array of leaf values
                    predicted_label = yTr[training_leaf_idx]

                    distance = numpy.sum(training_leaf != test_leaf) / float(
                        training_leaf.size)
                    if distance == 1.0:
                        continue
                    dist_predicted_labels.append(
                        (distance,
                         predicted_label))  # tuple(distance, predicted_label)

                closest_distances_labels = sorted(
                    dist_predicted_labels
                )[:knn]  #array of tuples (distance, predicted_label)
                # vote function
                labels = [label for _, label in closest_distances_labels]
                if len(set(labels)) == 1:
                    classified_label = labels[0]
                else:
                    classified_label = config.binaryLabels[1]  #webpageNonMon

                debugInfo.append([true_label,
                                  classified_label])  # for debug purposes

                if true_label != config.binaryLabels[
                        1] and true_label == classified_label:
                    true_positive += 1

                if true_label == config.binaryLabels[
                        1] and true_label != classified_label:
                    false_positive += 1

            num_unmonitored_test_instances = yTe.count(config.binaryLabels[1])
            num_monitored_test_instances = len(
                yTe) - num_unmonitored_test_instances

            true_positive_rate = true_positive / float(
                num_monitored_test_instances)
            false_positive_rate = false_positive / float(
                num_unmonitored_test_instances)

            print("True Positive Count = %d / %d" %
                  (true_positive, num_monitored_test_instances))
            print("False Positive Count = %d / %d" %
                  (false_positive, num_unmonitored_test_instances))
            print("True Positive Rate: ", true_positive_rate)
            print("False Positive Rate: ", false_positive_rate)

            result = [true_positive_rate, false_positive_rate]

            return [result, debugInfo]
コード例 #15
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)
        [trainingFileOrig, testingFileOrig] = [trainingFile, testingFile]

        if config.NUM_MONITORED_SITES != -1:  #no need to classify as this is for generating openworld datasets. See the line above (arffWriter)
            [accuracy, debugInfo] = ['NA', []]
            return [accuracy, debugInfo]

        if config.n_components_PCA != 0:
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])

        if config.n_components_LDA != 0:
            [trainingFile,
             testingFile] = Utils.calcLDA4([trainingFile, testingFile])

        if config.n_components_QDA != 0:
            [trainingFile,
             testingFile] = Utils.calcQDA([trainingFile, testingFile])

        if config.lasso != 0:
            #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile])
            #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile])
            Utils.calcLogisticRegression([trainingFile, testingFile])

        # deep learning
        if config.DEEP_LEARNING_METHOD != -1:
            #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # one layer dA
            #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile]) # two layers dA
            #[trainingFile, testingFile] = dA_2.calcAE([trainingFile, testingFile])
            #SdA_2.calcSdA([trainingFile, testingFile])
            if config.DEEP_LEARNING_METHOD == 1:
                [trainingFile, testingFile
                 ] = logistic_sgd_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 2:
                [trainingFile,
                 testingFile] = dA_2.runDL([trainingFile, testingFile])
                [trainingFile,
                 testingFile] = dA_2.runDL([trainingFile, testingFile])
                #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile])
                #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile])
                #[trainingFile, testingFile] = dA_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 3:
                # DL classifier
                return mlp_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 4:
                return SdA_2.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 5:
                return mlp_3.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 6:
                return SdA_3.runDL([trainingFile, testingFile])
            elif config.DEEP_LEARNING_METHOD == 7:
                return LeNetConvPoolLayer_2.runDL([trainingFile, testingFile])

        #Utils.plotDensity([trainingFile,testingFile])
        #Utils.plot([trainingFile,testingFile])

        if config.OC_SVM == 0:  # multi-class svm
            if config.CROSS_VALIDATION == 0:
                #print 'WARNING: NB classifier with Bi-Di. ###########///////////XXXXXX???????? '
                #return wekaAPI.execute(trainingFile, testingFile, "weka.classifiers.bayes.NaiveBayes", ['-K'])

                return wekaAPI.execute(
                    trainingFile,
                    testingFile,
                    "weka.Run weka.classifiers.functions.LibSVM",
                    [
                        '-K',
                        '2',  # RBF kernel
                        '-G',
                        '0.0000019073486328125',  # Gamma
                        ##May20 '-Z', # normalization 18 May 2015
                        '-C',
                        '131072',  # Cost
                        #'-S','2', # one-class svm
                        '-B'
                    ])  # confidence

            else:
                file = Utils.joinTrainingTestingFiles(
                    trainingFile, testingFile)  # join and shuffle
                return wekaAPI.executeCrossValidation(
                    file,
                    "weka.Run weka.classifiers.functions.LibSVM",
                    [
                        '-x',
                        str(config.CROSS_VALIDATION),  # number of folds
                        '-K',
                        '2',  # RBF kernel
                        '-G',
                        '0.0000019073486328125',  # Gamma
                        ##May20 '-Z', # normalization 18 May 2015
                        '-C',
                        '131072',  # Cost
                        '-B'
                    ])  # confidence
        else:  # one-class svm
            if config.CROSS_VALIDATION == 0:
                print str(config.SVM_KERNEL)
                print str(config.OC_SVM_Nu)
                return wekaAPI.executeOneClassSVM(
                    trainingFile,
                    testingFile,
                    "weka.Run weka.classifiers.functions.LibSVM",
                    [
                        '-K',
                        str(config.SVM_KERNEL),
                        #'-K','2', # RBF kernel
                        #'-G','0.0000019073486328125', # Gamma
                        ##May20 '-Z', # normalization 18 May 2015
                        #'-C','131072', # Cost
                        #'-N','0.001', # nu
                        '-N',
                        str(config.OC_SVM_Nu),  # nu
                        '-S',
                        '2'
                    ])  #, # one-class svm
                #'-B'] )  # confidence
            else:
                file = Utils.joinTrainingTestingFiles(
                    trainingFile, testingFile)  # join and shuffle
                return wekaAPI.executeCrossValidation(
                    file,
                    "weka.Run weka.classifiers.functions.LibSVM",
                    [
                        '-x',
                        str(config.CROSS_VALIDATION),  # number of folds
                        '-K',
                        '2',  # RBF kernel
                        '-G',
                        '0.0000019073486328125',  # Gamma
                        ##May20 '-Z', # normalization 18 May 2015
                        '-C',
                        '131072',  # Cost
                        '-B'
                    ])  # confidence
コード例 #16
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        if config.CROSS_VALIDATION == 0:
            trainList = wekaAPI.readFile(trainingFile)
            testList = wekaAPI.readFile(testingFile)
            trainInstancesList = []
            testInstancesList = []
            classes = ""
            yTrain = []
            yTest = []

            for line in trainList:
                if line[0] == '@':
                    if line.lower().startswith("@attribute class"):
                        classes = line.split(" ")[2]
                else:
                    # instancesList.append(float(line.split(",")[:-1]))
                    trainInstancesList.append(
                        [float(i) for i in line.split(",")[:-1]])
                    yTrain.append(line.split(",")[-1])

            for line in testList:
                if line[0] != '@':
                    testInstancesList.append(
                        [float(i) for i in line.split(",")[:-1]])
                    yTest.append(line.split(",")[-1])
        else:
            file = Utils.joinTrainingTestingFiles(
                trainingFile, testingFile)  # join and shuffle
            fileList = wekaAPI.readFile(file)

            fileInstancesList = []
            y = []

            for line in fileList:
                if line[0] == '@':
                    if line.lower().startswith("@attribute class"):
                        classes = line.split(" ")[2]
                else:
                    fileInstancesList.append(
                        [float(i) for i in line.split(",")[:-1]])
                    y.append(line.split(",")[-1])
            trainInstancesList, testInstancesList, yTrain, yTest = train_test_split(
                fileInstancesList, y, test_size=6000, random_state=42)

        XTr = numpy.array(trainInstancesList)
        yTr = numpy.array(yTrain)
        XTe = numpy.array(testInstancesList)
        yTe = numpy.array(yTest)

        # print ("Scaling data...")
        # scaler = StandardScaler()
        # XTr = scaler.fit_transform(XTr)
        # XTe = scaler.fit_transform(XTe)
        if config.NUM_MONITORED_SITES == -1 and config.NUM_NON_MONITORED_SITES == -1:
            print "Closed-world"
        else:
            print "Open-world"

        print("Classification...")
        classifier = kNN()

        classifier.fit(XTr, yTr)
        prediction = classifier.predict(XTe)

        totalPredictions = 0
        totalCorrectPredictions = 0
        debugInfo = []
        for i in range(0, len(yTe)):
            actualClass = yTe[i]
            predictedClass = prediction[i]
            probEstimate = 'NA'
            # debugInfo.append([actualClass,predictedClass])
            debugInfo.append([actualClass, predictedClass, probEstimate])
            totalPredictions += 1.0
            if actualClass == predictedClass:
                totalCorrectPredictions += 1.0

        accuracy = totalCorrectPredictions / totalPredictions * 100.0
        print("Accuracy = ", accuracy)

        positive = []  # monitored
        negative = []  # non-monitored
        positive.append(config.binaryLabels[0])  # 'webpageMon'
        negative.append(config.binaryLabels[1])  # 'webpageNonMon'
        tp = 0
        tn = 0
        fp = 0
        fn = 0

        for entry in debugInfo:
            if entry[0] in positive:  # actual is positive
                if entry[1] in positive:  # predicted is positive too
                    tp += 1
                else:  # predicted is negative
                    fn += 1
            elif entry[0] in negative:  # actual is negative
                if entry[1] in positive:  # predicted is positive
                    fp += 1
                else:  # predicted is negative too
                    tn += 1

        tpr = str("%.4f" % (float(tp) / float(tp + fn)))
        fpr = str("%.4f" % (float(fp) / float(fp + tn)))
        Acc = str("%.4f" % (float(tp + tn) / float(tp + tn + fp + fn)))
        F1 = str("%.4f" % (float(2 * tp) / float((2 * tp) + (fn) + (fp))))
        F2 = str("%.4f" % (float(5 * tp) / float((5 * tp) + (4 * fn) +
                                                 (fp))))  # beta = 2
        print "TPR, FPR, ACC, tp, tn, fp, fn, F1, F2"
        print tpr, fpr, Acc, tp, tn, fp, fn, F1, F2

        return [accuracy, debugInfo]
コード例 #17
0
    def classify(runID, trainingSet, testingSet):
        [trainingFile,
         testingFile] = arffWriter.writeArffFiles(runID, trainingSet,
                                                  testingSet)

        if config.NUM_MONITORED_SITES != -1:  #no need to classify as this is for generating openworld datasets. See the line above (arffWriter)
            [accuracy, debugInfo] = ['NA', []]
            return [accuracy, debugInfo]

        if config.n_components_PCA != 0:
            [trainingFile,
             testingFile] = Utils.calcPCA2([trainingFile, testingFile])

        if config.n_components_LDA != 0:
            [trainingFile,
             testingFile] = Utils.calcLDA4([trainingFile, testingFile])

        if config.n_components_QDA != 0:
            [trainingFile,
             testingFile] = Utils.calcQDA([trainingFile, testingFile])

        if config.lasso != 0:
            #[trainingFile,testingFile] = Utils.calcLasso3([trainingFile,testingFile])
            #[trainingFile,testingFile] = Utils.calcLogisticRegression([trainingFile,testingFile])
            Utils.calcLogisticRegression([trainingFile, testingFile])

        #Utils.plotDensity([trainingFile,testingFile])
        #Utils.plot([trainingFile,testingFile])

        if config.NUM_FEATURES_RF != 0:
            [trainingFile,
             testingFile] = Utils.calcTreeBaseRF([trainingFile, testingFile],
                                                 config.NUM_FEATURES_RF)

        if config.OC_SVM == 0:  # multi-class svm
            if config.CROSS_VALIDATION == 0:
                return wekaAPI.execute(
                    trainingFile,
                    testingFile,
                    "weka.Run weka.classifiers.functions.LibSVM",
                    [
                        '-K',
                        '2',  # RBF kernel
                        '-G',
                        '0.0000019073486328125',  # Gamma
                        ##May20 '-Z', # normalization 18 May 2015
                        '-C',
                        '131072',  # Cost
                        '-B'
                    ])  # confidence
            else:
                file = Utils.joinTrainingTestingFiles(
                    trainingFile, testingFile)  # join and shuffle
                return wekaAPI.executeCrossValidation(
                    file,
                    "weka.Run weka.classifiers.functions.LibSVM",
                    [
                        '-x',
                        str(config.CROSS_VALIDATION),  # number of folds
                        '-K',
                        '2',  # RBF kernel
                        '-G',
                        '0.0000019073486328125',  # Gamma
                        ##May20 '-Z', # normalization 18 May 2015
                        '-C',
                        '131072',  # Cost
                        '-B'
                    ])  # confidence
        else:  # one-class svm
            if config.CROSS_VALIDATION == 0:
                print str(config.SVM_KERNEL)
                print str(config.OC_SVM_Nu)
                return wekaAPI.executeOneClassSVM(
                    trainingFile,
                    testingFile,
                    "weka.Run weka.classifiers.functions.LibSVM",
                    [
                        '-K',
                        str(config.SVM_KERNEL),
                        #'-K','0', # kernel
                        #'-G','0.0000019073486328125', # Gamma
                        ##May20 '-Z', # normalization 18 May 2015
                        #'-C','131072', # Cost
                        #'-N','0.01', # nu
                        '-N',
                        str(config.OC_SVM_Nu),  # nu
                        '-S',
                        '2'
                    ])  #, # one-class svm
                #'-B'] )  # confidence
            else:
                file = Utils.joinTrainingTestingFiles(
                    trainingFile, testingFile)  # join and shuffle
                return wekaAPI.executeCrossValidation(
                    file,
                    "weka.Run weka.classifiers.functions.LibSVM",
                    [
                        '-x',
                        str(config.CROSS_VALIDATION),  # number of folds
                        '-K',
                        '2',  # RBF kernel
                        '-G',
                        '0.0000019073486328125',  # Gamma
                        ##May20 '-Z', # normalization 18 May 2015
                        '-C',
                        '131072',  # Cost
                        '-B'
                    ])  # confidence