Ejemplo n.º 1
0
    def apply_mutation(self, ch: Chromosome):
        """ Applies mutation on a chromosome
        modify an existing template from a cluster
        :param ch: chromosome to mutate
        :return: the modified chromosome
        """

        cluster_id = random.choice(list(ch.templates.keys()))
        prob = 1.0 / ch.cluster_size(cluster_id)

        for template_index in range(0, ch.cluster_size(cluster_id)):
            if random.random() <= prob:
                self.change_template(ch, cluster_id, template_index)
                # Once the chromosome is changed,
                # we have to update the matched lines (only for the unchanged templates)
                fix_all_star_template(ch, cluster_id, template_index,
                                      self.chGenerator.messages)
                compute_matched_lines(self.chGenerator.messages,
                                      ch.templates[cluster_id][template_index])

        # add templates to reach 100% coverage if not satisfied for the modified cluster
        #if random.random() <= 0.50:
        remove_sub_templates(ch, cluster_id)
        #else:
        #    remove_super_templates(ch, cluster_id)
        self.add_template_to_reach_100cov(ch, cluster_id)
Ejemplo n.º 2
0
 def test_remove_all_stars_template(self):
     template0 = Template(['*', '*', '2000'])
     template1 = Template(['file', 'config', '1000'])
     template2 = Template(['*', '*', '*'])
     chromosome = Chromosome({3:[template0, template1, template2]})
     remove_all_stars_template(chromosome, 3)
     self.assertEqual(chromosome.to_string(), "[ * * 2000 ]\n[ file config 1000 ]\n")
Ejemplo n.º 3
0
def validate_chromosome(templates: dict,
                        chromosome: Chromosome,
                        out_file,
                        fix=None):
    number_templates_oracle = 0
    for key in templates.keys():
        number_templates_oracle = number_templates_oracle + len(templates[key])

    # if fix = true
    if fix == True:

        #let's consider only the fixed part from the templates
        for key in chromosome.templates.keys():
            for tmp in chromosome.templates[key]:
                while tmp.token.__contains__('*'):
                    tmp.token.remove('*')
                while tmp.token.__contains__('#spec#'):
                    tmp.token.remove('#spec#')
    correct = 0
    incorrect = 0
    out_file.write(
        "\t\t\t\t\t Correct templates  \t\t\t\t\t\t\t\t\t\t\t Incorrect templates \n\n"
    )
    for key in chromosome.templates.keys():
        # check if the key in oracle.keys()
        if key not in templates.keys():
            incorrect = incorrect + chromosome.cluster_size(key)
        else:
            for index in range(0, chromosome.cluster_size(key)):
                template = chromosome.templates[key][index]

                for i in range(template.get_length()):
                    if template.token[i] == '#spec#':
                        template.token[i] = "*"

                if is_correct(templates[key], template):
                    out_file.write("\t\t\t %r\n" % ((template.to_string())))
                    correct += 1
                else:
                    incorrect += 1
                    out_file.write(
                        "\t\t\t\t\t                    \t\t\t\t\t\t\t\t %r\n" %
                        ((template.to_string())))

    out_file.write("\n\n\t\t\t Correct:   \t\t %r\n" % correct)
    out_file.write("\t\t\t Incorrect: \t\t %r\n" % incorrect)
    # compute: Precision, Recall, Accuracy and F-Measure
    precision = (correct / chromosome.all_templates())
    recall = (correct / number_templates_oracle)
    accuracy = correct / (incorrect + number_templates_oracle)
    if precision != 0 or recall != 0:
        f_measure = (2 * (precision * recall)) / (precision + recall)
    else:
        f_measure = 0
    return correct, incorrect, precision, recall, accuracy, f_measure
Ejemplo n.º 4
0
def remove_all_stars_template(chromosome: Chromosome, cluster_id: int):
    template_to_remove = set()
    for template in chromosome.templates[cluster_id]:
        has_no_star = False
        for token in template.token:
            if not (token == "*"):
                has_no_star = True
                break
        if not has_no_star:
            template_to_remove.add(template)

    for template in template_to_remove:
        chromosome.delete_template(template)
Ejemplo n.º 5
0
 def add_template_to_reach_100cov(self, ch: Chromosome, cluster_id: int):
     uncovered_lines = set(range(len(
         self.chGenerator.messages[cluster_id])))
     for template in ch.templates[cluster_id]:
         uncovered_lines = uncovered_lines.difference(
             template.matched_lines)
     while len(uncovered_lines) > 0:
         message_index = random.choice(list(uncovered_lines))
         template = self.chGenerator.generate_template_from_line(
             cluster_id, message_index)
         ch.add_template(template)
         compute_matched_lines(self.chGenerator.messages, template)
         uncovered_lines = uncovered_lines.difference(
             template.matched_lines)
Ejemplo n.º 6
0
 def test_check_variable_parts(self):
     # create messages
     message1 = Message(['read', 'text', 'file', 'from', 'ABC1'])
     message2 = Message(['read', 'text', 'file', 'from', 'ABC2'])
     message3 = Message(['read', 'text', 'file', 'from', 'ABC3'])
     messages = {5: [message1, message2, message3]}
     # create a chromosome
     template = Template(['read', 'text', '*', 'from', '*'])
     template.matched_lines = [0, 1, 2]
     ch = Chromosome({5: [template]})
     # code to test
     check_variable_parts(ch, messages)
     print(ch.to_string())
     self.assertEqual(ch.templates[5][0].to_string(),
                      "[ read text file from * ]")
Ejemplo n.º 7
0
def multipoint_cx(ch1: Chromosome, ch2: Chromosome):
    """ apply crossover on two Chromosomes at a randomly selected crossover point
    :param ch1: first Chromosome
    :param ch2: second Chromosome
    :return: the two modified chromosomes
    """
    # select a random key
    for key in ch1.templates.keys():
        if random.random() <= 0.5:
            cluster_from1 = ch1.templates[key][:]
            cluster_from2 = ch2.templates[key][:]

            ch1.templates[key] = cluster_from2
            ch2.templates[key] = cluster_from1
    return ch1, ch2
Ejemplo n.º 8
0
    def test_is_all_star_template(self):
        template = Template(['*', '*', '*', '*'])
        self.assertTrue(is_all_star_template(template))

        template1 = Template(['*', 'message', '*', '*'])
        chromosome = Chromosome({4: [template1]})
        self.assertFalse(is_all_star_template(chromosome.templates[4][0]))
Ejemplo n.º 9
0
def remove_super_templates(chromosome: Chromosome, cluster_id: int):
    chromosome.templates[cluster_id] = list(
        set(chromosome.templates[cluster_id]))

    template_to_remove = set()
    derive_super_template(chromosome.templates[cluster_id], template_to_remove)

    for template in template_to_remove:
        chromosome.templates[cluster_id].remove(template)
Ejemplo n.º 10
0
    def test_fix_all_star_template(self):
        message1 = Message(['message', 'sent', 'A1'])
        message2 = Message(['message', 'sent', 'A2'])
        message3 = Message(['message', 'sent', 'A2', 'from', ':', 'B1'])
        message4 = Message(['message', 'sent', 'A2', 'from', ':', 'B2'])
        messages = {3: [message1, message2], 6: [message3, message4]}

        template = Template(['*', '*', '*', '*', '*', '*'])
        template.matched_lines = [0, 1]
        ch = Chromosome({6: [template]})
        fix_all_star_template(ch, 6, 0, messages)
        self.assertFalse(is_all_star_template(ch.templates[6][0]))
Ejemplo n.º 11
0
    def test_remove_super_templates(self):
        t1 = Template(['read', 'text', 'file', 'ABC', 'from', 'DB', '*'])
        t1.matched_lines = [3]
        t2 = Template(['read', 'text', 'file', '*', 'from', 'DB', '*'])
        t2.matched_lines = [0, 1]
        t3 = Template(['read', 'text', 'file', '*', 'from', 'DB', '2323232'])
        t3.matched_lines = [0]
        t4 = Template(['read', 'text', 'file', '*', 'from', 'DB', '*'])
        t4.matched_lines = [0, 1]

        t5 = Template(['file', 'configuration', '*'])
        t5.matched_lines = [2, 3, 4]
        t6 = Template(['file', 'configuration', 'A'])
        t6.matched_lines = [2]

        ch = Chromosome({7: [t1, t2, t3, t4, t3, t4], 3: [t5, t6]})
        remove_super_templates(ch, 7)
        self.assertEqual(ch.cluster_size(7), 2)
        remove_super_templates(ch, 3)
        self.assertEqual(ch.cluster_size(3), 1)
        self.assertEqual(ch.templates[3][0].token, ['file', 'configuration', 'A'])
Ejemplo n.º 12
0
 def generate_100cov_chromosome(self):
     '''Create a chromosome with 100 coverage for each cluster
     the created chromosome shuold have the same keys as the messages
      of the chromosome generator and for each key, values (templates) must cover
      all messages in that cluster
     :return: chromosome
     '''
     chromosome = Chromosome({})
     for key in self.messages.keys():
         if key == 0:
             continue
         uncovered_lines = list(range(0, len(self.messages[key])))
         while len(uncovered_lines) > 0:
             message_index = random.choice(uncovered_lines)
             template = self.generate_template_from_line(key, message_index)
             chromosome.add_template(template)
             compute_matched_lines(self.messages, template)
             for line in template.matched_lines:
                 if uncovered_lines.__contains__(line):
                     uncovered_lines.remove(line)
     # print("Created one Chromosome")
     return chromosome
    def test_add_template_to_reach_100cov(self):
        logfile = ROOT_DIR + '/test/resources/File.log'
        chrom_gen = ChromosomeGenerator(logfile, 0, '\n', ["'[\w\d\$\-:,\./_ ><\|]*'"])
        chrom_mutator_100 = ChromosomeMutator100cov(chrom_gen)
        template = Template(['Message', 'sent', 'by', 'EEE', ',', 'at', 'port', '1'])
        template.matched_lines = [0]
        chromosome = Chromosome({8: [template]})
        chrom_mutator_100.add_template_to_reach_100cov(chromosome, 8)
        chromosome_matched_lines = []
        for t in chromosome.templates[8]:
            for i in t.matched_lines:
                chromosome_matched_lines.append(i)

        self.assertEqual(len(set(chromosome_matched_lines)), len(chrom_gen.messages[8]))
    def test_apply_mutation(self):
        logfile = ROOT_DIR + '/test/resources/File.log'
        chrom_gen = ChromosomeGenerator(logfile, 0, '\n', ["'[\w\d\$\-:,\./_ ><\|]*'"])
        chrom_mutator_100 = ChromosomeMutator100cov(chrom_gen)
        template = Template(['Driver', ':', '*'])
        template.matched_lines = [5]
        chromosome = Chromosome({3: [template]})
        chrom_mutator_100.apply_mutation(chromosome)

        chromosome_matched_lines = []
        for t in chromosome.templates[3]:
            for i in t.matched_lines:
                chromosome_matched_lines.append(i)
        self.assertEqual(len(set(chromosome_matched_lines)), len(chrom_gen.messages[3]))
Ejemplo n.º 15
0
    def compute_objective(self, chromosome: Chromosome):
        """ Evaluate a chromosome
                compute 2 objectives: 
                        - frequency: #matched log messages ((/#log messages)/#templates)
                        - specificity: fixed words / #words in a template
                :param chromosome: the list of templates
                :return: frequency value, specificity value
                """
        average_specificity = []
        average_frequency = []

        # compute frequency and specificity for each template
        for key in chromosome.templates:
            template_cluster = chromosome.templates[key]
            for template in template_cluster:
                template.specificity = 0

                for word in template.token:
                    if word != "*" and word != "#spec#":
                        template.specificity += 1.0

                template.specificity /= len(template.token)
                average_specificity.append(template.specificity)

                if template.specificity > 0:
                    average_frequency.append(1.0 *
                                             len(template.matched_lines) /
                                             len(self.generator.messages[key]))
                else:
                    average_frequency.append(0)

        # average specificity across templates
        average_specificity = mean(average_specificity)
        # average frequency across templates
        average_frequency = mean(average_frequency)

        chromosome.coverage = 1
        return [average_specificity, average_frequency]
Ejemplo n.º 16
0
    def test_check_variable_parts_2templates(self):
        # create messages
        message1 = Message(['read', 'text', 'file1', 'from', 'ABC1'])
        message2 = Message(['read', 'text', 'file2', 'from', 'ABC3'])
        message3 = Message(['read', 'text', 'file2', 'from', 'ABC4'])
        message4 = Message(
            ['read', 'text', 'file', 'ABC', 'from', 'DB', '98765'])
        message5 = Message(
            ['read', 'text', 'file', 'DSE', 'from', 'DB', '7654'])

        messages = {5: [message1, message2, message3], 7: [message4, message5]}
        # create a chromosome
        template1 = Template(['read', '*', '*', 'from', '*'])
        template1.matched_lines = [0, 1, 2]
        template2 = Template(['read', '*', 'file', '*', 'from', 'DB', '*'])
        template2.matched_lines = [0, 1]
        ch = Chromosome({5: [template1], 7: [template2]})
        # code to test
        check_variable_parts(ch, messages)
        self.assertEqual(ch.templates[5][0].to_string(),
                         "[ read text * from * ]")
        self.assertEqual(ch.templates[7][0].to_string(),
                         "[ read text file * from DB * ]")