def main():

    #load the dataset

    trainDF = load_cvs_dataset("../normdatapayload.csv")
    #load the dataset

    #Text Preprocessing

    txt_label = trainDF['label']
    txt_text = trainDF['payload']

    #Text feature engineering
    model_input = count_vectorizer(txt_text, txt_label)
    #Text feature engineering

    #  Build Text Classification Model and Evaluating the Model
    naive = naive_bayes.MultinomialNB()
    accuracy = train_model(naive, model_input[0], model_input[1],
                           model_input[2], model_input[3])
    print("NB, word_tf_idf accuracy is : ", accuracy * 100)

    with open('../vocabulary_file', 'wb') as vocabulary_file:
        pickle.dump(model_input[4], vocabulary_file)

    with open('../text_classifier', 'wb') as picklefile:
        pickle.dump(naive, picklefile)
def main():

    #load the dataset

    trainDF = load_cvs_dataset("../corpus.csv")
    #load the dataset

    #Text Preprocessing
    txt_label = trainDF['label']
    txt_text = trainDF['text']

    #Text Preprocessing

    #Text feature engineering
    model_input = count_vectorizer(txt_text, txt_label)
    #Text feature engineering

    #  Build Text Classification Model and Evaluating the Model
    naive = ensemble.RandomForestClassifier()
    accuracy = train_model(naive, model_input[0], model_input[1],
                           model_input[2], model_input[3])
    print("RandomForest_Clf, count_vectorizer accuracy is : ", accuracy * 100)

    #Text feature engineering
    model_input = word_tf_idf(txt_text, txt_label)
    #Text feature engineering

    #  Build Text Classification Model and Evaluating the Model
    naive = ensemble.RandomForestClassifier()
    accuracy = train_model(naive, model_input[0], model_input[1],
                           model_input[2], model_input[3])
    print("RandomForest_Clf, word_tf_idf accuracy is : ", accuracy * 100)
def main():
    
    # load the dataset
  
    trainDF = load_cvs_dataset("../xss_test.csv")
    # load the dataset
    
    # Text Preprocessing
    txt_label = trainDF['label']
    txt_text = trainDF['payload']
    # clear_txt = prepare_dataset().clean_cvs_txt(txt_text)
    # Text Preprocessing
    '''
    # Text feature engineering with char_tf_idf 
    model_input = char_tf_idf(txt_text, txt_label)
    # Text feature engineering 
    
    #  Build Text Classification Model and Evaluating the Model
    
    naive = naive_bayes.MultinomialNB()
    accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3])
    print ("NB, char_tf_idf accuracy is : ", accuracy * 100)
    
    # Text feature engineering with count_vectorizer
    model_input = count_vectorizer(txt_text, txt_label)
    # Text feature engineering with count_vectorizer
    
    #  Build Text Classification Model and Evaluating the Model
    naive = naive_bayes.MultinomialNB()
    accuracy = train_model(naive,model_input[0], model_input[1], model_input[2], model_input[3])
    print ("NB, count_vectorizer accuracy is : ", accuracy * 100)
    #  Build Text Classification Model and Evaluating the Model
    
    # Text feature engineering with ngram_tf_idf
    model_input = ngram_tf_idf(txt_text, txt_label)
    # Text feature engineering with ngram_tf_idf
    
    #  Build Text Classification Model and Evaluating the Model
    naive = naive_bayes.MultinomialNB()
    accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3])
    print ("NB, ngram_tf_idf accuracy is : ", accuracy * 100)
    #  Build Text Classification Model and Evaluating the Model
    '''
    # Text feature engineering with word_tf_idf
    model_input = word_tf_idf(txt_text, txt_label)
    # Text feature engineering with word_tf_idf
    
    #  Build Text Classification Model and Evaluating the Model
    naive = naive_bayes.MultinomialNB()
    accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3])
    print ("NB, word_tf_idf accuracy is : ", accuracy * 100)
Esempio n. 4
0
def bulk_live_verna_detection(input_dataset, context_path, payload, label):
    bulk_verna_detect_result = []

    trainDF = load_cvs_dataset(input_dataset)
    #txt_label = trainDF[label]
    txt_text = trainDF[payload]
    for doc in txt_text:
        result = live_verna_single_detection(context_path, doc)
        if (result == True):
            bulk_verna_detect_result.append('anom')
        elif (result == False):
            bulk_verna_detect_result.append('norm')
        #bulk_verna_detect_result.append(result)
    return bulk_verna_detect_result
Esempio n. 5
0
def live_verna_detection(model_path, input_web_param_path, payload_col_name):

    verify_result = []
    trainDF = load_cvs_dataset(input_web_param_path)
    txt_text = trainDF[payload_col_name]
    for web_param in txt_text:
        result = live_verna_single_detection(model_path, web_param)

        if (result == True):
            verify_result.append('true')
        elif (result == False):
            verify_result.append('false')

    return verify_result
Esempio n. 6
0
def main():
   
    # load the dataset
 
    trainDF = load_cvs_dataset("../corpus.csv")
    # load the dataset
    
    # Text Preprocessing
    txt_label = trainDF['label']
    txt_text = trainDF['text']
    
    # Text Preprocessing
   
    # Text feature engineering 
    model_input = ngram_tf_idf(txt_text, txt_label)
    # Text feature engineering 
    
    #  Build Text Classification Model and Evaluating the Model
    naive = svm.SVC()
    accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3])
    print ("Svm_clf, ngram_tf_idf accuracy is : ", accuracy * 100)
Esempio n. 7
0
def train_model_write(input_dataset, train_model_path, payload_col_name,
                      payload_label):

    #print(''+train_model_path)
    trainDF = load_cvs_dataset(input_dataset)
    txt_label = trainDF[payload_label]
    txt_text = trainDF[payload_col_name]
    model_input = count_vectorizer(txt_text, txt_label)
    naive = naive_bayes.MultinomialNB()
    accuracy = train_model(naive, model_input[0], model_input[1],
                           model_input[2], model_input[3])
    dirs = os.listdir(train_model_path)
    file_no = len(dirs)
    pickle.dump(
        naive,
        open(
            str(train_model_path) + "text_classifier-" + str(file_no) +
            ".pickle", "wb"))
    pickle.dump(
        model_input[4],
        open(
            str(train_model_path) + "tfidf-" + str(file_no) + ".pickle", "wb"))
    return accuracy * 100