def get_compiler_errors_from_source(self, program_id, program_source): if self.compilation_error_store is not None: err_list = self.compilation_error_store.get_errors( program_id, program_source) else: err_list, _ = compilation_errors(program_source) return err_list
def get_compiler_errors(self, program_id, program, name_dict, name_seq): program_source = self.get_program_source_from_vector( program, name_dict, name_seq) if self.compilation_error_store is not None: err_list = self.compilation_error_store.get_errors( program_id, program_source) else: err_list, _ = compilation_errors(program_source) return err_list
def process(self, source_code_array, max_attempts=6): sequences_of_programs = {} fixes_suggested_by_network = {} entries = [] entries_ids = [] errors = {} fixes_to_return = {} error_messages = {} # Wrap it up into a nice box for idx, source_code in enumerate(source_code_array): program, name_dict, name_sequence, literal_sequence = C_Tokenizer().tokenize(source_code) entries.append((idx, program, name_dict, name_sequence, literal_sequence)) entries_ids.append((idx, program, name_dict, name_sequence, literal_sequence)) sequences_of_programs[idx] = [program] fixes_suggested_by_network[idx] = [] errors[idx], _ = compilation_errors(source_code) error_messages[idx] = [] fixes_to_return[idx] = [] network = self.network if self.task == 'ids': normalize_names = False fix_kind = 'insert' else: assert self.task == 'typo' normalize_names = True fix_kind = 'replace' # Reinitialize `entries' entries = entries_ids try: for round_ in range(max_attempts): to_delete = [] input_ = [] for i, entry in enumerate(entries): idx, program, name_dict, name_sequence, literal_sequence = entry try: program_vector = vectorize(sequences_of_programs[idx][-1], network['in_seq_length'], network['dictionary'], normalize_names=normalize_names, reverse=True, append_eos=False) except VectorizationFailedException: program_vector = None if program_vector is not None: input_.append(program_vector) else: to_delete.append(i) error_messages[idx].append('VectorizationFailed') # Delete to_delete = sorted(to_delete)[::-1] for i in to_delete: del entries[i] assert len(input_) == len(entries) if len(input_) == 0: break # Pass it through the network fix_vectors = get_fixes(network['session'], input_, network) fixes = [] # Devectorize them for i, fix_vector in enumerate(fix_vectors): idx, _, _, _, _ = entries[i] fix = devectorize(fix_vector, network['dictionary']) fixes_suggested_by_network[idx].append(fix) fixes.append(fix) to_delete = [] # Apply fixes for i, entry, fix in zip(range(len(fixes)), entries, fixes): idx, program, name_dict, name_sequence, literal_sequence = entry try: program = sequences_of_programs[idx][-1] program = apply_fix(program, fix, kind=fix_kind, check_literals=True) sequences_of_programs[idx].append(program) regen_source_code = tokens_to_source(program, name_dict, clang_format=True, literal_seq=literal_sequence) this_errors, _ = compilation_errors(regen_source_code) if len(fix.strip().split()) > 0 and len(this_errors) > len(errors[idx]): to_delete.append(i) error_messages[idx].append('ErrorsIncreased') else: errors[idx] = this_errors except IndexError: to_delete.append(i) error_messages[idx].append('IndexError') except VectorizationFailedException as e: to_delete.append(i) error_messages[idx].append('VectorizationFailed') except InvalidFixLocationException: to_delete.append(i) if fix.strip().split()[0] == '_eos_': error_messages[idx].append('OK') else: error_messages[idx].append('InvalidFixLocation') except SubstitutionFailedException: to_delete.append(i) error_messages[idx].append('SubstitutionFailed') else: assert len(fix.strip().split()) == 0 or fix.strip().split()[0] != '_eos_' if fix_kind == 'insert': fix_ = ' '.join(fix.split()[1:]) fix_line = extract_line_number(fix_) + 1 fixes_to_return[idx].append('%s at line %d: %s' % (fix_kind, fix_line, ''.join(fix_to_source(fix_, program, name_dict, clang_format=True).split('\n')))) else: fix_line = extract_line_number(fix) + 1 fixes_to_return[idx].append('%s at line %d: %s' % (fix_kind, fix_line, ''.join(fix_to_source(fix, program, name_dict, name_seq=name_sequence, literal_seq=literal_sequence, clang_format=True).split('\n')))) # Delete to_delete = sorted(to_delete)[::-1] for i in to_delete: del entries[i] except KeyError as e: pass except InvalidFixLocationException: pass except SubstitutionFailedException: pass # ----------- repaired_programs = {} for idx in sequences_of_programs: repaired_programs[idx] = tokens_to_source(sequences_of_programs[idx][-1], name_dict, clang_format=True, literal_seq=literal_sequence) repaired_programs[idx] = repaired_programs[idx].strip() return fixes_to_return, repaired_programs, error_messages
def do_problem(problem_id): global reconstruction, errors, errors_full, total_count, errors_test c = conn.cursor() reconstruction[problem_id] = {} errors[problem_id] = {} errors_full[problem_id] = {} errors_test[problem_id] = [] candidate_programs = [] for row in c.execute('SELECT user_id, prog_id, code, name_dict, name_seq FROM programs WHERE prob_id = ?', (problem_id,)): user_id, prog_id, initial = row[0], row[1], row[2] name_dict = json.loads(row[3]) name_seq = json.loads(row[4]) candidate_programs.append( (user_id, prog_id, initial, name_dict, name_seq,)) for _, prog_id, initial, name_dict, name_seq in candidate_programs: fixes_suggested_by_typo_network = [] fixes_suggested_by_undeclared_network = [] for row in c.execute('SELECT fix FROM iterations WHERE prog_id=? AND network = \'typo\' ORDER BY iteration', (prog_id,)): fixes_suggested_by_typo_network.append(row[0]) for row in c.execute('SELECT fix FROM iterations WHERE prog_id=? AND network = \'ids\' ORDER BY iteration', (prog_id,)): fixes_suggested_by_undeclared_network.append(row[0]) reconstruction[problem_id][prog_id] = [initial] temp_errors, temp_errors_full = compilation_errors( tokens_to_source(initial, name_dict, False)) errors[problem_id][prog_id] = [temp_errors] errors_full[problem_id][prog_id] = [temp_errors_full] try: for fix in fixes_suggested_by_typo_network: if meets_criterion(reconstruction[problem_id][prog_id][-1], fix, 'replace'): temp_prog = apply_fix( reconstruction[problem_id][prog_id][-1], fix, 'replace') temp_errors, temp_errors_full = compilation_errors( tokens_to_source(temp_prog, name_dict, False)) if len(temp_errors) > len(errors[problem_id][prog_id][-1]): break else: reconstruction[problem_id][prog_id].append(temp_prog) errors[problem_id][prog_id].append(temp_errors) errors_full[problem_id][prog_id].append( temp_errors_full) else: break except InvalidFixLocationException: print 'Localization failed' while len(reconstruction[problem_id][prog_id]) <= 5: reconstruction[problem_id][prog_id].append( reconstruction[problem_id][prog_id][-1]) errors[problem_id][prog_id].append(errors[problem_id][prog_id][-1]) errors_full[problem_id][prog_id].append( errors_full[problem_id][prog_id][-1]) already_fixed = [] try: for fix in fixes_suggested_by_undeclared_network: if fix not in already_fixed: temp_prog = apply_fix( reconstruction[problem_id][prog_id][-1], fix, 'insert') already_fixed.append(fix) temp_errors, temp_errors_full = compilation_errors( tokens_to_source(temp_prog, name_dict, False)) if len(temp_errors) > len(errors[problem_id][prog_id][-1]): break else: reconstruction[problem_id][prog_id].append(temp_prog) errors[problem_id][prog_id].append(temp_errors) errors_full[problem_id][prog_id].append( temp_errors_full) else: pass except InvalidFixLocationException: print 'Localization failed' while len(reconstruction[problem_id][prog_id]) <= 10: reconstruction[problem_id][prog_id].append( reconstruction[problem_id][prog_id][-1]) errors[problem_id][prog_id].append(errors[problem_id][prog_id][-1]) errors_full[problem_id][prog_id].append( errors_full[problem_id][prog_id][-1]) errors_test[problem_id].append(errors[problem_id][prog_id]) if not args.is_timing_experiment: for k, errors_t, errors_full_t in zip(range(len(errors[problem_id][prog_id])), errors[problem_id][prog_id], errors_full[problem_id][prog_id]): c.execute("INSERT INTO error_message_strings VALUES(?, ?, ?, ?, ?)", ( prog_id, k, 'typo', errors_full_t.decode('utf-8', 'ignore'), len(errors_t))) for error_ in errors_t: c.execute("INSERT INTO error_messages VALUES(?, ?, ?, ?)", (prog_id, k, 'typo', error_.decode('utf-8', 'ignore'),)) count_t = len(candidate_programs) total_count += count_t if not args.is_timing_experiment: print 'Committing changes to database...' conn.commit() print 'Done!' else: print 'Done problem with', count_t, 'programs' c.close()