def feature(PATH): #filename = './fuzzdb/attack-payloads/sql-injection/detect/MySQL.fuzz.txt' filename = PATH test_case_list = file_operation.fileopt_line(filename) full_list = file_operation.fileopt_all(filename) full_stat_list = testcase_stat.stat_all(full_list) test_case_stat_list = testcase_stat.stat(test_case_list) tf_matrix = testcase_stat.compute_tf(test_case_stat_list) test_case_stat_list = testcase_stat.stat(test_case_list) idf_matrix = testcase_stat.compute_idf(test_case_stat_list, full_stat_list) tf_idf_list = testcase_stat.compute_tf_idf(tf_matrix, idf_matrix) test_case_feature_list = testcase_stat.feature_vector_quantify(tf_idf_list, full_stat_list) return test_case_feature_list
json.dump(test_case_stat_list, open('./statistics/test_case_stat_list.json', 'w')) #------------------------------------------------------------------- # test_case_list is the list stored the test cases read from files # Sample : [ # ['alice', 'dod', 'alice', 'bob', 'alice', 'tom'], # ['alice', 'dod', 'alice', 'bob', 'alice', 'tom'] # ] # # full_list is the total list without split # full_stat_list is used to stat full list # test_case_stat_list is the split test cases list statistic list # #------------------------------------------------------------------- #print test_case_stat_list tf_matrix = testcase_stat.compute_tf(test_case_stat_list) #computing tf value and store in tf_matrix #print tf_matrix #Important this place # In this place we should recompute the test_case_stat_list value # If not the consequence will go wrong test_case_stat_list = testcase_stat.stat(test_case_list) #print test_case_stat_list # Recompute the test_case_stat_list and then compute the idf value idf_matrix = testcase_stat.compute_idf(test_case_stat_list, full_stat_list) #print idf_matrix
open('./statistics/test_case_stat_list.json', 'w')) #------------------------------------------------------------------- # test_case_list is the list stored the test cases read from files # Sample : [ # ['alice', 'dod', 'alice', 'bob', 'alice', 'tom'], # ['alice', 'dod', 'alice', 'bob', 'alice', 'tom'] # ] # # full_list is the total list without split # full_stat_list is used to stat full list # test_case_stat_list is the split test cases list statistic list # #------------------------------------------------------------------- #print test_case_stat_list tf_matrix = testcase_stat.compute_tf(test_case_stat_list) #computing tf value and store in tf_matrix #print tf_matrix #Important this place # In this place we should recompute the test_case_stat_list value # If not the consequence will go wrong test_case_stat_list = testcase_stat.stat(test_case_list) #print test_case_stat_list # Recompute the test_case_stat_list and then compute the idf value idf_matrix = testcase_stat.compute_idf(test_case_stat_list, full_stat_list) #print idf_matrix