Beispiel #1
0
 def get_compiler_errors_from_source(self, program_id, program_source):
     if self.compilation_error_store is not None:
         err_list = self.compilation_error_store.get_errors(
             program_id, program_source)
     else:
         err_list, _ = compilation_errors(program_source)
     return err_list
Beispiel #2
0
 def get_compiler_errors(self, program_id, program, name_dict, name_seq):
     program_source = self.get_program_source_from_vector(
         program, name_dict, name_seq)
     if self.compilation_error_store is not None:
         err_list = self.compilation_error_store.get_errors(
             program_id, program_source)
     else:
         err_list, _ = compilation_errors(program_source)
     return err_list
Beispiel #3
0
    def process(self, source_code_array, max_attempts=6):
        sequences_of_programs = {}
        fixes_suggested_by_network = {}
        entries = []
        entries_ids = []
        errors = {}
        fixes_to_return = {}
        error_messages = {}

        # Wrap it up into a nice box
        for idx, source_code in enumerate(source_code_array):
            program, name_dict, name_sequence, literal_sequence = C_Tokenizer().tokenize(source_code)
            entries.append((idx, program, name_dict, name_sequence, literal_sequence))
            entries_ids.append((idx, program, name_dict, name_sequence, literal_sequence))
            sequences_of_programs[idx] = [program]
            fixes_suggested_by_network[idx] = []
            errors[idx], _ = compilation_errors(source_code)
            error_messages[idx] = []
            fixes_to_return[idx] = []

        network = self.network

        if self.task == 'ids':
            normalize_names = False
            fix_kind = 'insert'
            
        else:
            assert self.task == 'typo'
            normalize_names = True
            fix_kind = 'replace'

        # Reinitialize `entries'
        entries = entries_ids

        try:
            for round_ in range(max_attempts):
                to_delete = []
                input_ = []

                for i, entry in enumerate(entries):
                    idx, program, name_dict, name_sequence, literal_sequence = entry

                    try:
                        program_vector = vectorize(sequences_of_programs[idx][-1], network['in_seq_length'], network['dictionary'], normalize_names=normalize_names, reverse=True, append_eos=False)
                    except VectorizationFailedException:
                        program_vector = None

                    if program_vector is not None:
                        input_.append(program_vector)
                    else:
                        to_delete.append(i)
                        error_messages[idx].append('VectorizationFailed')

                # Delete
                to_delete = sorted(to_delete)[::-1]

                for i in to_delete:
                    del entries[i]

                assert len(input_) == len(entries)

                if len(input_) == 0:
                    break

                # Pass it through the network
                fix_vectors = get_fixes(network['session'], input_, network)
                fixes = []

                # Devectorize them
                for i, fix_vector in enumerate(fix_vectors):
                    idx, _, _, _, _ = entries[i]

                    fix = devectorize(fix_vector, network['dictionary'])
                    fixes_suggested_by_network[idx].append(fix)
                    fixes.append(fix)

                to_delete = []

                # Apply fixes
                for i, entry, fix in zip(range(len(fixes)), entries, fixes):
                    idx, program, name_dict, name_sequence, literal_sequence = entry

                    try:
                        program = sequences_of_programs[idx][-1]
                        program = apply_fix(program, fix, kind=fix_kind, check_literals=True)
                        sequences_of_programs[idx].append(program)
                        regen_source_code = tokens_to_source(program, name_dict, clang_format=True, literal_seq=literal_sequence)
                        this_errors, _ = compilation_errors(regen_source_code)

                        if len(fix.strip().split()) > 0 and len(this_errors) > len(errors[idx]):
                            to_delete.append(i)
                            error_messages[idx].append('ErrorsIncreased')
                        else:
                            errors[idx] = this_errors
                    except IndexError:
                        to_delete.append(i)
                        error_messages[idx].append('IndexError')
                    except VectorizationFailedException as e:
                        to_delete.append(i)
                        error_messages[idx].append('VectorizationFailed')
                    except InvalidFixLocationException:
                        to_delete.append(i)

                        if fix.strip().split()[0] == '_eos_':
                            error_messages[idx].append('OK')
                        else:
                            error_messages[idx].append('InvalidFixLocation')
                    except SubstitutionFailedException:
                        to_delete.append(i)
                        error_messages[idx].append('SubstitutionFailed')
                    else:
                        assert len(fix.strip().split()) == 0 or fix.strip().split()[0] != '_eos_'

                        if fix_kind == 'insert':
                            fix_ = ' '.join(fix.split()[1:])
                            fix_line = extract_line_number(fix_) + 1
                            fixes_to_return[idx].append('%s at line %d: %s' % (fix_kind, fix_line, ''.join(fix_to_source(fix_, program, name_dict, clang_format=True).split('\n'))))
                        else:
                            fix_line = extract_line_number(fix) + 1
                            fixes_to_return[idx].append('%s at line %d: %s' % (fix_kind, fix_line, ''.join(fix_to_source(fix, program, name_dict, name_seq=name_sequence, literal_seq=literal_sequence, clang_format=True).split('\n'))))

                # Delete
                to_delete = sorted(to_delete)[::-1]

                for i in to_delete:
                    del entries[i]

        except KeyError as e:
            pass

        except InvalidFixLocationException:
            pass

        except SubstitutionFailedException:
            pass
        # -----------

        repaired_programs = {}

        for idx in sequences_of_programs:
            repaired_programs[idx] = tokens_to_source(sequences_of_programs[idx][-1], name_dict, clang_format=True, literal_seq=literal_sequence)
            repaired_programs[idx] = repaired_programs[idx].strip()

        return fixes_to_return, repaired_programs, error_messages
Beispiel #4
0
def do_problem(problem_id):
    global reconstruction, errors, errors_full, total_count, errors_test

    c = conn.cursor()

    reconstruction[problem_id] = {}
    errors[problem_id] = {}
    errors_full[problem_id] = {}
    errors_test[problem_id] = []
    candidate_programs = []

    for row in c.execute('SELECT user_id, prog_id, code, name_dict, name_seq FROM programs WHERE prob_id = ?', (problem_id,)):
        user_id, prog_id, initial = row[0], row[1], row[2]
        name_dict = json.loads(row[3])
        name_seq = json.loads(row[4])

        candidate_programs.append(
            (user_id, prog_id, initial, name_dict, name_seq,))

    for _, prog_id, initial, name_dict, name_seq in candidate_programs:
        fixes_suggested_by_typo_network = []
        fixes_suggested_by_undeclared_network = []

        for row in c.execute('SELECT fix FROM iterations WHERE prog_id=? AND network = \'typo\' ORDER BY iteration', (prog_id,)):
            fixes_suggested_by_typo_network.append(row[0])

        for row in c.execute('SELECT fix FROM iterations WHERE prog_id=? AND network = \'ids\' ORDER BY iteration', (prog_id,)):
            fixes_suggested_by_undeclared_network.append(row[0])

        reconstruction[problem_id][prog_id] = [initial]
        temp_errors, temp_errors_full = compilation_errors(
            tokens_to_source(initial, name_dict, False))
        errors[problem_id][prog_id] = [temp_errors]
        errors_full[problem_id][prog_id] = [temp_errors_full]

        try:
            for fix in fixes_suggested_by_typo_network:
                if meets_criterion(reconstruction[problem_id][prog_id][-1], fix, 'replace'):
                    temp_prog = apply_fix(
                        reconstruction[problem_id][prog_id][-1], fix, 'replace')
                    temp_errors, temp_errors_full = compilation_errors(
                        tokens_to_source(temp_prog, name_dict, False))

                    if len(temp_errors) > len(errors[problem_id][prog_id][-1]):
                        break
                    else:
                        reconstruction[problem_id][prog_id].append(temp_prog)
                        errors[problem_id][prog_id].append(temp_errors)
                        errors_full[problem_id][prog_id].append(
                            temp_errors_full)
                else:
                    break

        except InvalidFixLocationException:
            print 'Localization failed'

        while len(reconstruction[problem_id][prog_id]) <= 5:
            reconstruction[problem_id][prog_id].append(
                reconstruction[problem_id][prog_id][-1])
            errors[problem_id][prog_id].append(errors[problem_id][prog_id][-1])
            errors_full[problem_id][prog_id].append(
                errors_full[problem_id][prog_id][-1])

        already_fixed = []

        try:
            for fix in fixes_suggested_by_undeclared_network:
                if fix not in already_fixed:
                    temp_prog = apply_fix(
                        reconstruction[problem_id][prog_id][-1], fix, 'insert')
                    already_fixed.append(fix)
                    temp_errors, temp_errors_full = compilation_errors(
                        tokens_to_source(temp_prog, name_dict, False))

                    if len(temp_errors) > len(errors[problem_id][prog_id][-1]):
                        break
                    else:
                        reconstruction[problem_id][prog_id].append(temp_prog)
                        errors[problem_id][prog_id].append(temp_errors)
                        errors_full[problem_id][prog_id].append(
                            temp_errors_full)
                else:
                    pass

        except InvalidFixLocationException:
            print 'Localization failed'

        while len(reconstruction[problem_id][prog_id]) <= 10:
            reconstruction[problem_id][prog_id].append(
                reconstruction[problem_id][prog_id][-1])
            errors[problem_id][prog_id].append(errors[problem_id][prog_id][-1])
            errors_full[problem_id][prog_id].append(
                errors_full[problem_id][prog_id][-1])

        errors_test[problem_id].append(errors[problem_id][prog_id])

        if not args.is_timing_experiment:
            for k, errors_t, errors_full_t in zip(range(len(errors[problem_id][prog_id])), errors[problem_id][prog_id], errors_full[problem_id][prog_id]):
                c.execute("INSERT INTO error_message_strings VALUES(?, ?, ?, ?, ?)", (
                    prog_id, k, 'typo', errors_full_t.decode('utf-8', 'ignore'), len(errors_t)))

                for error_ in errors_t:
                    c.execute("INSERT INTO error_messages VALUES(?, ?, ?, ?)",
                              (prog_id, k, 'typo', error_.decode('utf-8', 'ignore'),))

    count_t = len(candidate_programs)
    total_count += count_t

    if not args.is_timing_experiment:
        print 'Committing changes to database...'
        conn.commit()
        print 'Done!'
    else:
        print 'Done problem with', count_t, 'programs'

    c.close()