def ngram_size_experiment(trigram_tree, quadgram_tree, pentagram_tree, hexagram_tree, test_set_benign, test_set_malicious):
    experiment = Experiment('altering the length of the n-gram')
    experiment.add_individual_experiment(trigram_tree, test_set_benign, test_set_malicious, '3-gram tree')
    experiment.add_individual_experiment(quadgram_tree, test_set_benign, test_set_malicious, '4-gram tree')
    experiment.add_individual_experiment(pentagram_tree, test_set_benign, test_set_malicious, '5-gram tree')
    experiment.add_individual_experiment(hexagram_tree, test_set_benign, test_set_malicious, '6-gram tree')

    return experiment.show_description()
    def calc_stat(self, description="Default description"):
        experiment = Experiment(description)
        tree = self._load_tree_from_file(self._tree)

        benign = []
        malware = []

        for f in os.listdir(self.benign_test_set):
            benign.append(os.path.join(self.benign_test_set, f))

        for f in os.listdir(self.malware_test_set):
            malware.append(os.path.join(self.malware_test_set, f))

        experiment.add_individual_experiment(tree, benign, malware, description)

        tree_dir = os.path.dirname(self._tree)

        with open(os.path.join(tree_dir, self.STATISTICS_FILE_NAME), "w") as stat_file:
            print >> stat_file, experiment.show_description()
def training_set_size_experiment(tree_training_50, tree_training_100, tree_training_150, tree_training_200,
                                 tree_training_250, test_set_benign, test_set_malicious):

    experiment = Experiment('altering the size of training set (maintaining ratio of benign/malicious)')

    experiment.add_individual_experiment(tree_training_50, test_set_benign, test_set_malicious, 'Training set 50-50')
    experiment.add_individual_experiment(tree_training_100, test_set_benign, test_set_malicious, 'Training set 100-100')
    experiment.add_individual_experiment(tree_training_150, test_set_benign, test_set_malicious, 'Training set 150-150')
    experiment.add_individual_experiment(tree_training_200, test_set_benign, test_set_malicious, 'Training set 200-200')
    experiment.add_individual_experiment(tree_training_250, test_set_benign, test_set_malicious, 'Training set 250-250')

    return experiment.show_description()
def top_ngrams_experiment(top_100_tree, top_250_tree, top_500_tree, top_750_tree,
                          top_1000_tree, top_2000_tree, test_set_benign, test_set_malicious):

    experiment = Experiment('altering the amount of attributes in the tree')

    experiment.add_individual_experiment(top_100_tree, test_set_benign, test_set_malicious, 'Top 100 tree')
    experiment.add_individual_experiment(top_250_tree, test_set_benign, test_set_malicious, 'Top 250 tree')
    experiment.add_individual_experiment(top_500_tree, test_set_benign, test_set_malicious, 'Top 500 tree')
    experiment.add_individual_experiment(top_750_tree, test_set_benign, test_set_malicious, 'Top 750 tree')
    experiment.add_individual_experiment(top_1000_tree, test_set_benign, test_set_malicious, 'Top 1,000 tree')
    experiment.add_individual_experiment(top_2000_tree, test_set_benign, test_set_malicious, 'Top 2,000 tree')

    return experiment.show_description()
def ngrams_selection_experiment(normal_tree, random_tree, most_frequent_tree, information_gain_tree,
                                gain_ratio_tree, test_set_benign, test_set_malicious):

    experiment = Experiment('changing method of top-k ngram selection')

    experiment.add_individual_experiment(normal_tree, test_set_benign, test_set_malicious, 'Kolter & Maloof tree')
    experiment.add_individual_experiment(random_tree, test_set_benign, test_set_malicious, 'Random Selection')
    experiment.add_individual_experiment(most_frequent_tree, test_set_benign, test_set_malicious, 'Most frequent ngrams')
    experiment.add_individual_experiment(information_gain_tree, test_set_benign, test_set_malicious, 'Information Gain')
    experiment.add_individual_experiment(gain_ratio_tree, test_set_benign, test_set_malicious, 'Information Gain')

    return experiment.show_description()