def ngram_size_experiment(trigram_tree, quadgram_tree, pentagram_tree, hexagram_tree, test_set_benign, test_set_malicious): experiment = Experiment('altering the length of the n-gram') experiment.add_individual_experiment(trigram_tree, test_set_benign, test_set_malicious, '3-gram tree') experiment.add_individual_experiment(quadgram_tree, test_set_benign, test_set_malicious, '4-gram tree') experiment.add_individual_experiment(pentagram_tree, test_set_benign, test_set_malicious, '5-gram tree') experiment.add_individual_experiment(hexagram_tree, test_set_benign, test_set_malicious, '6-gram tree') return experiment.show_description()
def calc_stat(self, description="Default description"): experiment = Experiment(description) tree = self._load_tree_from_file(self._tree) benign = [] malware = [] for f in os.listdir(self.benign_test_set): benign.append(os.path.join(self.benign_test_set, f)) for f in os.listdir(self.malware_test_set): malware.append(os.path.join(self.malware_test_set, f)) experiment.add_individual_experiment(tree, benign, malware, description) tree_dir = os.path.dirname(self._tree) with open(os.path.join(tree_dir, self.STATISTICS_FILE_NAME), "w") as stat_file: print >> stat_file, experiment.show_description()
def training_set_size_experiment(tree_training_50, tree_training_100, tree_training_150, tree_training_200, tree_training_250, test_set_benign, test_set_malicious): experiment = Experiment('altering the size of training set (maintaining ratio of benign/malicious)') experiment.add_individual_experiment(tree_training_50, test_set_benign, test_set_malicious, 'Training set 50-50') experiment.add_individual_experiment(tree_training_100, test_set_benign, test_set_malicious, 'Training set 100-100') experiment.add_individual_experiment(tree_training_150, test_set_benign, test_set_malicious, 'Training set 150-150') experiment.add_individual_experiment(tree_training_200, test_set_benign, test_set_malicious, 'Training set 200-200') experiment.add_individual_experiment(tree_training_250, test_set_benign, test_set_malicious, 'Training set 250-250') return experiment.show_description()
def top_ngrams_experiment(top_100_tree, top_250_tree, top_500_tree, top_750_tree, top_1000_tree, top_2000_tree, test_set_benign, test_set_malicious): experiment = Experiment('altering the amount of attributes in the tree') experiment.add_individual_experiment(top_100_tree, test_set_benign, test_set_malicious, 'Top 100 tree') experiment.add_individual_experiment(top_250_tree, test_set_benign, test_set_malicious, 'Top 250 tree') experiment.add_individual_experiment(top_500_tree, test_set_benign, test_set_malicious, 'Top 500 tree') experiment.add_individual_experiment(top_750_tree, test_set_benign, test_set_malicious, 'Top 750 tree') experiment.add_individual_experiment(top_1000_tree, test_set_benign, test_set_malicious, 'Top 1,000 tree') experiment.add_individual_experiment(top_2000_tree, test_set_benign, test_set_malicious, 'Top 2,000 tree') return experiment.show_description()
def ngrams_selection_experiment(normal_tree, random_tree, most_frequent_tree, information_gain_tree, gain_ratio_tree, test_set_benign, test_set_malicious): experiment = Experiment('changing method of top-k ngram selection') experiment.add_individual_experiment(normal_tree, test_set_benign, test_set_malicious, 'Kolter & Maloof tree') experiment.add_individual_experiment(random_tree, test_set_benign, test_set_malicious, 'Random Selection') experiment.add_individual_experiment(most_frequent_tree, test_set_benign, test_set_malicious, 'Most frequent ngrams') experiment.add_individual_experiment(information_gain_tree, test_set_benign, test_set_malicious, 'Information Gain') experiment.add_individual_experiment(gain_ratio_tree, test_set_benign, test_set_malicious, 'Information Gain') return experiment.show_description()