def main(): #load the dataset trainDF = load_cvs_dataset("../normdatapayload.csv") #load the dataset #Text Preprocessing txt_label = trainDF['label'] txt_text = trainDF['payload'] #Text feature engineering model_input = count_vectorizer(txt_text, txt_label) #Text feature engineering # Build Text Classification Model and Evaluating the Model naive = naive_bayes.MultinomialNB() accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3]) print("NB, word_tf_idf accuracy is : ", accuracy * 100) with open('../vocabulary_file', 'wb') as vocabulary_file: pickle.dump(model_input[4], vocabulary_file) with open('../text_classifier', 'wb') as picklefile: pickle.dump(naive, picklefile)
def main(): #load the dataset trainDF = load_cvs_dataset("../corpus.csv") #load the dataset #Text Preprocessing txt_label = trainDF['label'] txt_text = trainDF['text'] #Text Preprocessing #Text feature engineering model_input = count_vectorizer(txt_text, txt_label) #Text feature engineering # Build Text Classification Model and Evaluating the Model naive = ensemble.RandomForestClassifier() accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3]) print("RandomForest_Clf, count_vectorizer accuracy is : ", accuracy * 100) #Text feature engineering model_input = word_tf_idf(txt_text, txt_label) #Text feature engineering # Build Text Classification Model and Evaluating the Model naive = ensemble.RandomForestClassifier() accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3]) print("RandomForest_Clf, word_tf_idf accuracy is : ", accuracy * 100)
def main(): # load the dataset trainDF = load_cvs_dataset("../xss_test.csv") # load the dataset # Text Preprocessing txt_label = trainDF['label'] txt_text = trainDF['payload'] # clear_txt = prepare_dataset().clean_cvs_txt(txt_text) # Text Preprocessing ''' # Text feature engineering with char_tf_idf model_input = char_tf_idf(txt_text, txt_label) # Text feature engineering # Build Text Classification Model and Evaluating the Model naive = naive_bayes.MultinomialNB() accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3]) print ("NB, char_tf_idf accuracy is : ", accuracy * 100) # Text feature engineering with count_vectorizer model_input = count_vectorizer(txt_text, txt_label) # Text feature engineering with count_vectorizer # Build Text Classification Model and Evaluating the Model naive = naive_bayes.MultinomialNB() accuracy = train_model(naive,model_input[0], model_input[1], model_input[2], model_input[3]) print ("NB, count_vectorizer accuracy is : ", accuracy * 100) # Build Text Classification Model and Evaluating the Model # Text feature engineering with ngram_tf_idf model_input = ngram_tf_idf(txt_text, txt_label) # Text feature engineering with ngram_tf_idf # Build Text Classification Model and Evaluating the Model naive = naive_bayes.MultinomialNB() accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3]) print ("NB, ngram_tf_idf accuracy is : ", accuracy * 100) # Build Text Classification Model and Evaluating the Model ''' # Text feature engineering with word_tf_idf model_input = word_tf_idf(txt_text, txt_label) # Text feature engineering with word_tf_idf # Build Text Classification Model and Evaluating the Model naive = naive_bayes.MultinomialNB() accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3]) print ("NB, word_tf_idf accuracy is : ", accuracy * 100)
def bulk_live_verna_detection(input_dataset, context_path, payload, label): bulk_verna_detect_result = [] trainDF = load_cvs_dataset(input_dataset) #txt_label = trainDF[label] txt_text = trainDF[payload] for doc in txt_text: result = live_verna_single_detection(context_path, doc) if (result == True): bulk_verna_detect_result.append('anom') elif (result == False): bulk_verna_detect_result.append('norm') #bulk_verna_detect_result.append(result) return bulk_verna_detect_result
def live_verna_detection(model_path, input_web_param_path, payload_col_name): verify_result = [] trainDF = load_cvs_dataset(input_web_param_path) txt_text = trainDF[payload_col_name] for web_param in txt_text: result = live_verna_single_detection(model_path, web_param) if (result == True): verify_result.append('true') elif (result == False): verify_result.append('false') return verify_result
def main(): # load the dataset trainDF = load_cvs_dataset("../corpus.csv") # load the dataset # Text Preprocessing txt_label = trainDF['label'] txt_text = trainDF['text'] # Text Preprocessing # Text feature engineering model_input = ngram_tf_idf(txt_text, txt_label) # Text feature engineering # Build Text Classification Model and Evaluating the Model naive = svm.SVC() accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3]) print ("Svm_clf, ngram_tf_idf accuracy is : ", accuracy * 100)
def train_model_write(input_dataset, train_model_path, payload_col_name, payload_label): #print(''+train_model_path) trainDF = load_cvs_dataset(input_dataset) txt_label = trainDF[payload_label] txt_text = trainDF[payload_col_name] model_input = count_vectorizer(txt_text, txt_label) naive = naive_bayes.MultinomialNB() accuracy = train_model(naive, model_input[0], model_input[1], model_input[2], model_input[3]) dirs = os.listdir(train_model_path) file_no = len(dirs) pickle.dump( naive, open( str(train_model_path) + "text_classifier-" + str(file_no) + ".pickle", "wb")) pickle.dump( model_input[4], open( str(train_model_path) + "tfidf-" + str(file_no) + ".pickle", "wb")) return accuracy * 100