Ejemplo n.º 1
0
 def format_program_vector(self, program_vector):
     lines = split_list(self.filter_program_vector(program_vector),
                        delimiter=self.new_line,
                        keep_delimiter=True)
     output = []
     for i, line in enumerate(lines):
         output += [
             '[%-2d, %-3d]' % (i, len(line)) + ', '.join(map(str, line))
         ]
     return '\n'.join(output)
Ejemplo n.º 2
0
 def get_errors_from_ground_truth(self, program_vector,
                                  original_program_vector):
     programA_lines = split_list(self.filter_program_vector(program_vector),
                                 delimiter=self.new_line,
                                 keep_delimiter=True)
     programB_lines = split_list(
         self.filter_program_vector(original_program_vector),
         delimiter=self.new_line,
         keep_delimiter=True)
     if len(programA_lines) != len(programB_lines):
         raise ValueError('Program are of different lengths: %d and %d!' %
                          (len(programA_lines), len(programB_lines)))
     faulty_lines = []
     for index, (lineA,
                 lineB) in enumerate(zip(programA_lines, programB_lines)):
         if lineA != lineB:
             edit_count = self.fix_line(copy.deepcopy(lineA),
                                        copy.deepcopy(lineB))
             for _ in range(edit_count):
                 faulty_lines.append(index)
     return faulty_lines
Ejemplo n.º 3
0
 def cursor_of_line(self, program, line_number, position='init'):
     program_line_count = self.get_line_count(program)
     assert line_number >= 0
     assert line_number <= program_line_count
     if line_number == 0 and position == 'init':  # this is the first line of the program
         return 0
     if line_number == program_line_count and position == 'end':  # this is the last line of the program
         cursor = len(program) - 1
         assert program[cursor] == self.new_line
         return cursor
     lines = split_list(program, self.new_line)
     current_cursor = 0
     for current_line_number, line in enumerate(lines):
         if current_line_number < line_number:
             current_cursor += len(line)
         elif current_line_number == line_number:
             if position == 'init':
                 return current_cursor
             elif position == 'end':
                 return current_cursor + len(line) - 1
             else:
                 raise ValueError(
                     'position could be either "init" or "end",  not "%s"' %
                     position)
Ejemplo n.º 4
0
    def localize_error(self, programA, programB):
        '''programA is the mutated program while programB is the original one!'''
        assert type(programA) == list and type(
            programB) == list, 'types:{}, {}, programs:\n{}\n{}'.format(
                type(programA), type(programB), programA, programB)
        assert type(self.new_line) == int
        programA = self.filter_program_vector(programA)
        programB = self.filter_program_vector(programB)
        programA_lines = split_list(programA,
                                    delimiter=self.new_line,
                                    keep_delimiter=True)
        programB_lines = split_list(programB,
                                    delimiter=self.new_line,
                                    keep_delimiter=True)

        if len(programA_lines) != len(programB_lines):
            raise ValueError('Program are of different lengths: %d and %d!' %
                             (len(programA_lines), len(programB_lines)))

        different_line_num = None
        for j, (A_line,
                B_line) in enumerate(zip(programA_lines, programB_lines)):
            if A_line != B_line:
                different_line_num = j
                break

        error_at = None
        if different_line_num is not None:
            error_at = sum(
                [len(line) for line in programA_lines[:different_line_num]])
            for i, (a, b) in enumerate(zip(A_line, B_line)):
                if a != b:
                    break
            error_at += i
        assert error_at is not None
        assert programA[error_at] == a, '{},{},{},{}'.format(
            self.format_program_vector(programA), '\na:', a,
            programA[error_at])
        assert programB[error_at] == b, '{},{},{},{}'.format(
            self.format_program_vector(programB), '\nb:', b,
            programB[error_at])

        fix_action = None
        if a in self.mutables and b in self.mutables:
            del_cost = self.fix_line(A_line[i + 1:],
                                     B_line[i:],
                                     recursive=True)
            ins_cost = self.fix_line(A_line[i:],
                                     B_line[i + 1:],
                                     recursive=True)
            fix_action = 'delete' if del_cost <= ins_cost else 'insert'

        # b has been deleted from original line
        if fix_action == 'insert' or (a not in self.mutables
                                      and b in self.mutables):
            fix_action = 'insert' + ' ' + self.rev_tl_dict[b]
            # 原来这里是self.rev_tl_dict[b][-1] 不知道为什么要这么搞
        # a is a duplicated token
        elif fix_action == 'delete' or (a in self.mutables
                                        and b not in self.mutables):
            fix_action = 'delete' + ' ' + self.rev_tl_dict[a]
        else:
            err_str = 'Should not happen:' + self.devectorize(
                A_line) + '\n' + self.devectorize(B_line)
            assert False, err_str

        return different_line_num, error_at, fix_action
Ejemplo n.º 5
0
 def get_line(self, program, line_number):
     return split_list(program, self.new_line)[line_number]