def evaluate(self, src): testing_start_time = time.time() positive = [] negative = [] for test in self.validation_test_suite: if self.run_test(src, test): positive.append(test) else: negative.append(test) # make sure if failing tests really fail if self.config['redundant_test']: negative_copy = negative[:] for test in negative_copy: if self.run_test(src, test): negative.remove(test) positive.append(test) testing_end_time = time.time() testing_elapsed = testing_end_time - testing_start_time statistics.data['time']['testing'] += testing_elapsed statistics.save() return positive, negative
patch = repair() else: patch = repair() except TimeoutException: logger.info("failed to generate patch (timeout)") print('TIMEOUT') exit(0) except (CompilationError, InferenceError, TransformationError): logger.info("failed to generate patch") print('FAIL') exit(1) end = time.time() elapsed = format_time(end - start) statistics.data['time']['total'] = end - start statistics.save() if patch is None: logger.info("no patch generated in {}".format(elapsed)) print('FAIL') exit(0) else: patch_file = basename(abspath( args.src)) + '-' + time.strftime("%Y-%b%d-%H%M%S") + '.patch' logger.info("patch successfully generated in {} (see {})".format( elapsed, patch_file)) print('SUCCESS') with open(patch_file, 'w+') as file: for line in patch: file.write(line) exit(0)
def __call__(self, angelic_forest): if type(angelic_forest) == str: # angelic_forest is a file shutil.copyfile(angelic_forest, self.angelic_forest_file) else: # angelic_forest is a data structure self.dump_angelic_forest(angelic_forest) dirpath = tempfile.mkdtemp() patch_file = join(dirpath, 'patch') config_file = join(dirpath, 'config.json') for level in self.config['synthesis_levels']: logger.info( 'synthesizing patch with component level \'{}\''.format(level)) config = { "encodingConfig": { "componentsMultipleOccurrences": True, # better if false, if not enough primitive components, synthesis can fail "phantomComponents": True, "repairBooleanConst": False, "repairIntegerConst": False, "level": "linear" }, "simplification": False, "reuseStructure": not self.config['semfix'], "spaceReduction": True, "componentLevel": level, "solverBound": 3, "solverTimeout": self.config['synthesis_timeout'] } with open(config_file, 'w') as file: json.dump(config, file) if self.config['use_nsynth']: jar = os.environ['NSYNTH_JAR'] else: jar = os.environ['SYNTHESIS_JAR'] if self.config['verbose']: stderr = None else: stderr = subprocess.DEVNULL args = [ self.angelic_forest_file, self.extracted, patch_file, config_file ] synthesis_start_time = time.time() try: result = subprocess.check_output(['java', '-jar', jar] + args, stderr=stderr) except subprocess.CalledProcessError: logger.warning("synthesis returned non-zero code") continue finally: synthesis_end_time = time.time() synthesis_elapsed = synthesis_end_time - synthesis_start_time statistics.data['time']['synthesis'] += synthesis_elapsed iter_stat = dict() iter_stat['tests'] = len(angelic_forest) iter_stat['level'] = level iter_stat['time'] = synthesis_elapsed statistics.data['iterations']['synthesis'].append(iter_stat) statistics.save() if str(result, 'UTF-8').strip() == 'TIMEOUT': logger.warning('timeout when synthesizing fix') elif str(result, 'UTF-8').strip() == 'FAIL': logger.info('synthesis failed') elif str(result, 'UTF-8').strip() == 'SUCCESS': with open(patch_file) as file: content = file.readlines() patch = dict() while len(content) > 0: line = content.pop(0) if len(line) == 0: continue expr = tuple(map(int, line.strip().split('-'))) def convert_to_c(s): return s.replace('_LBRSQR_', '[').replace('_RBRSQR_', ']') original = convert_to_c(content.pop(0).strip()) fixed = convert_to_c(content.pop(0).strip()) if self.config['semfix']: logger.info('synthesized expression {}: {}'.format( expr, fixed)) else: logger.info('fixing expression {}: {} ---> {}'.format( expr, original, fixed)) patch[expr] = fixed if len(patch) == 0: logger.warn('patch contains no changes') return None return patch else: raise Exception('result: ' + str(result, 'UTF-8')) shutil.rmtree(dirpath) return None
def generate_patch(self): positive, negative = self.evaluate(self.validation_src) self.frontend_src.configure() if config['build_before_instr']: self.frontend_src.build() self.instrument_for_localization(self.frontend_src) self.frontend_src.build() testing_start_time = time.time() if len(positive) > 0: logger.info('running positive tests for debugging') for test in positive: self.trace += test if test not in self.dump: self.dump += test _, instrumented = self.run_test(self.frontend_src, test, dump=self.dump[test], trace=self.trace[test], check_instrumented=True) if not instrumented: self.repair_test_suite.remove(test) else: _, instrumented = self.run_test(self.frontend_src, test, trace=self.trace[test], check_instrumented=True) if not instrumented: self.repair_test_suite.remove(test) golden_is_built = False excluded = [] if len(negative) > 0: logger.info('running negative tests for debugging') for test in negative: self.trace += test _, instrumented = self.run_test(self.frontend_src, test, trace=self.trace[test], check_instrumented=True) if not instrumented: self.repair_test_suite.remove(test) if test not in self.dump: if self.golden_src is None: logger.error("golden version or assert file needed for test {}".format(test)) return [] if not golden_is_built: self.golden_src.configure() self.golden_src.build() golden_is_built = True self.dump += test result = self.run_test(self.golden_src, test, dump=self.dump[test]) if not result: excluded.append(test) for test in excluded: if not self.config['mute_test_message']: logger.warning('excluding test {} because it fails in golden version'.format(test)) negative.remove(test) if test in self.repair_test_suite: self.repair_test_suite.remove(test) self.validation_test_suite.remove(test) testing_end_time = time.time() testing_elapsed = testing_end_time - testing_start_time statistics.data['time']['testing'] += testing_elapsed statistics.save() logger.info("repair test suite: {}".format(self.repair_test_suite)) logger.info("validation test suite: {}".format(self.validation_test_suite)) positive_traces = [(test, self.trace.parse(test)) for test in positive] negative_traces = [(test, self.trace.parse(test)) for test in negative] suspicious = self.get_suspicious_groups(self.validation_test_suite, positive_traces, negative_traces) if self.config['localize_only']: for idx, (group, score) in enumerate(suspicious): logger.info('group {}: {} ({})'.format(idx+1, group, score)) exit(0) if len(suspicious) == 0: logger.warning('no suspicious expressions localized') repaired = len(negative) == 0 patches = [] while (config['generate_all'] or not repaired) and len(suspicious) > 0: if self.config['use_semfix_syn']: # prepare a clean directory shutil.rmtree(join(self.working_dir, 'semfix-syn-input'), ignore_errors='true') expressions = suspicious.pop(0) logger.info('considering suspicious expressions {}'.format(expressions)) current_repair_suite = self.reduce(self.repair_test_suite, positive_traces, negative_traces, expressions) self.backend_src.restore_buggy() self.backend_src.configure() if config['build_before_instr']: self.backend_src.build() self.instrument_for_inference(self.backend_src, expressions) self.backend_src.build() angelic_forest = dict() inference_failed = False for test in current_repair_suite: try: angelic_forest[test] = self.infer_spec(self.backend_src, test, self.dump[test], self.frontend_src) if len(angelic_forest[test]) == 0: if test in positive: logger.warning('angelic forest for positive test {} not found'.format(test)) current_repair_suite.remove(test) del angelic_forest[test] continue inference_failed = True break except InferenceError: logger.warning('inference failed (error was raised)') inference_failed = True break except NoSmtError: if test in positive: current_repair_suite.remove(test) continue inference_failed = True break if inference_failed: continue initial_fix = self.synthesize_fix(angelic_forest) if initial_fix is None: logger.info('cannot synthesize fix') continue logger.info('candidate fix synthesized') self.validation_src.restore_buggy() try: self.apply_patch(self.validation_src, initial_fix) except TransformationError: logger.info('cannot apply fix') continue self.validation_src.build() pos, neg = self.evaluate(self.validation_src) if not set(neg).isdisjoint(set(current_repair_suite)): not_repaired = list(set(current_repair_suite) & set(neg)) logger.warning("generated invalid fix (tests {} not repaired)".format(not_repaired)) continue repaired = len(neg) == 0 if repaired: patches.append(self.validation_src.diff_buggy()) neg = list(set(neg) & set(self.repair_test_suite)) current_positive, current_negative = pos, neg if len(current_negative) == 0 and not repaired: logger.warning("cannot repair using instrumented tests") continue negative_idx = 0 while not repaired: counterexample = current_negative[negative_idx] logger.info('counterexample test is {}'.format(counterexample)) current_repair_suite.append(counterexample) try: angelic_forest[counterexample] = self.infer_spec(self.backend_src, counterexample, self.dump[counterexample], self.frontend_src) except NoSmtError: logger.warning("no smt file for test {}".format(counterexample)) negative_idx = negative_idx + 1 if len(current_negative) - negative_idx > 0: continue break if len(angelic_forest[counterexample]) == 0: break fix = self.synthesize_fix(angelic_forest) if fix is None: logger.info('cannot refine fix') break logger.info('refined fix is synthesized') self.validation_src.restore_buggy() self.apply_patch(self.validation_src, fix) self.validation_src.build() pos, neg = self.evaluate(self.validation_src) repaired = len(neg) == 0 if repaired: patches.append(self.validation_src.diff_buggy()) neg = list(set(neg) & set(self.repair_test_suite)) current_positive, current_negative = pos, neg if not set(current_negative).isdisjoint(set(current_repair_suite)): not_repaired = list(set(current_repair_suite) & set(current_negative)) logger.warning("generated invalid fix (tests {} not repaired)".format(not_repaired)) break negative_idx = 0 return patches
patches = repair() else: patches = repair() except TimeoutException: logger.info("failed to generate patch (timeout)") print('TIMEOUT') exit(0) except (CompilationError, InferenceError, TransformationError): logger.info("failed to generate patch") print('FAIL') exit(1) end = time.time() elapsed = format_time(end - start) statistics.data['time']['total'] = end - start statistics.save() if not patches: logger.info("no patch generated in {}".format(elapsed)) print('FAIL') exit(0) else: if config['generate_all']: patch_dir = basename(abspath(args.src)) + '-' + time.strftime("%Y-%b%d-%H%M%S") if not exists(patch_dir): os.mkdir(patch_dir) for idx, patch in enumerate(patches): patch_file = os.path.join(patch_dir, str(idx) + '.patch') with open(patch_file, 'w+') as file: for line in patch: file.write(line)
def __call__(self, project, test, dump, validation_project): logger.info('inferring specification for test \'{}\''.format(test)) environment = dict(os.environ) if self.config['klee_max_forks'] is not None: environment['ANGELIX_KLEE_MAX_FORKS'] = str( self.config['klee_max_forks']) if self.config['klee_max_depth'] is not None: environment['ANGELIX_KLEE_MAX_DEPTH'] = str( self.config['klee_max_depth']) if self.config['klee_search'] is not None: environment['ANGELIX_KLEE_SEARCH'] = self.config['klee_search'] if self.config['klee_timeout'] is not None: environment['ANGELIX_KLEE_MAX_TIME'] = str( self.config['klee_timeout']) if self.config['klee_solver_timeout'] is not None: environment['ANGELIX_KLEE_MAX_SOLVER_TIME'] = str( self.config['klee_solver_timeout']) if self.config['klee_debug']: environment['ANGELIX_KLEE_DEBUG'] = 'YES' if self.config['klee_ignore_errors']: environment['KLEE_DISABLE_MEMORY_ERROR'] = 'YES' if self.config['use_semfix_syn']: environment['ANGELIX_USE_SEMFIX_SYN'] = 'YES' environment['ANGELIX_KLEE_WORKDIR'] = project.dir klee_start_time = time.time() self.run_test(project, test, klee=True, env=environment) klee_end_time = time.time() klee_elapsed = klee_end_time - klee_start_time statistics.data['time']['klee'] += klee_elapsed statistics.save() logger.info('sleeping for 1 second...') time.sleep(1) smt_glob = join(project.dir, 'klee-out-0', '*.smt2') smt_files = glob(smt_glob) err_glob = join(project.dir, 'klee-out-0', '*.err') err_files = glob(err_glob) err_list = [] for err in err_files: err_list.append(os.path.basename(err).split('.')[0]) non_error_smt_files = [] for smt in smt_files: smt_id = os.path.basename(smt).split('.')[0] if not smt_id in err_list: non_error_smt_files.append(smt) if not self.config['ignore_infer_errors']: smt_files = non_error_smt_files if len(smt_files) == 0 and len(err_list) == 0: logger.warning('No paths explored') raise NoSmtError() if len(smt_files) == 0: logger.warning('No non-error paths explored') raise NoSmtError() # loading dump # name -> value list oracle = dict() vars = os.listdir(dump) for var in vars: instances = os.listdir(join(dump, var)) for i in range(0, len(instances)): if str(i) not in instances: logger.error('corrupted dump for test \'{}\''.format(test)) raise InferenceError() oracle[var] = [] for i in range(0, len(instances)): file = join(dump, var, str(i)) with open(file) as f: content = f.read() oracle[var].append(content) # solving path constraints inference_start_time = time.time() angelic_paths = [] z3.set_param("timeout", self.config['path_solving_timeout']) solver = Solver() for smt in smt_files: logger.info('solving path {}'.format(relpath(smt))) try: path = z3.parse_smt2_file(smt) except: logger.warning('failed to parse {}'.format(smt)) continue variables = [ str(var) for var in get_vars(path) if str(var).startswith('int!') or str(var).startswith('long!') or str(var).startswith('bool!') or str(var).startswith('char!') or str(var).startswith('reachable!') ] try: outputs, choices, constants, reachable, original_available = parse_variables( variables) except: continue # name -> value list (parsed) oracle_constraints = dict() def str_to_int(s): return int(s) def str_to_long(s): return int(s) def str_to_bool(s): if s == 'false': return False if s == 'true': return True raise InferenceError() def str_to_char(s): if len(s) != 1: raise InferenceError() return s[0] dump_parser_by_type = dict() dump_parser_by_type['int'] = str_to_int dump_parser_by_type['long'] = str_to_long dump_parser_by_type['bool'] = str_to_bool dump_parser_by_type['char'] = str_to_char def bool_to_bv(b): if b: return BitVecVal(1, 32) else: return BitVecVal(0, 32) def int_to_bv(i): return BitVecVal(i, 32) def long_to_bv(i): return BitVecVal(i, 64) def char_to_bv(c): return BitVecVal(ord(c), 32) to_bv_converter_by_type = dict() to_bv_converter_by_type['bool'] = bool_to_bv to_bv_converter_by_type['int'] = int_to_bv to_bv_converter_by_type['long'] = long_to_bv to_bv_converter_by_type['char'] = char_to_bv def bv_to_bool(bv): return bv.as_long() != 0 def bv_to_int(bv): l = bv.as_long() if l >> 31 == 1: # negative l -= pow(2, 32) return l def bv_to_long(bv): l = bv.as_long() if l >> 63 == 1: # negative l -= pow(2, 64) return l def bv_to_char(bv): l = bv.as_long() return chr(l) from_bv_converter_by_type = dict() from_bv_converter_by_type['bool'] = bv_to_bool from_bv_converter_by_type['int'] = bv_to_int from_bv_converter_by_type['long'] = bv_to_long from_bv_converter_by_type['char'] = bv_to_char matching_path = True for expected_variable, expected_values in oracle.items(): if expected_variable == 'reachable': expected_reachable = set(expected_values) if not (expected_reachable == reachable): logger.info( 'labels \'{}\' executed while {} required'.format( list(reachable), list(expected_reachable))) matching_path = False break continue if expected_variable not in outputs.keys(): outputs[expected_variable] = ( None, 0) # unconstraint does not mean wrong required_executions = len(expected_values) actual_executions = outputs[expected_variable][1] if required_executions != actual_executions: logger.info( 'value \'{}\' executed {} times while {} required'. format(expected_variable, actual_executions, required_executions)) matching_path = False break oracle_constraints[expected_variable] = [] for i in range(0, required_executions): type = outputs[expected_variable][0] try: value = dump_parser_by_type[type](expected_values[i]) except: logger.error( 'variable \'{}\' has incompatible type {}'.format( expected_variable, type)) raise InferenceError() oracle_constraints[expected_variable].append(value) if not matching_path: continue solver.reset() solver.add(path) def array_to_bv32(array): return Concat(Select(array, BitVecVal(3, 32)), Select(array, BitVecVal(2, 32)), Select(array, BitVecVal(1, 32)), Select(array, BitVecVal(0, 32))) def array_to_bv64(array): return Concat(Select(array, BitVecVal(7, 32)), Select(array, BitVecVal(6, 32)), Select(array, BitVecVal(5, 32)), Select(array, BitVecVal(4, 32)), Select(array, BitVecVal(3, 32)), Select(array, BitVecVal(2, 32)), Select(array, BitVecVal(1, 32)), Select(array, BitVecVal(0, 32))) def angelic_variable(type, expr, instance): pattern = '{}!choice!{}!{}!{}!{}!{}!angelic' s = pattern.format(type, expr[0], expr[1], expr[2], expr[3], instance) return Array(s, BitVecSort(32), BitVecSort(8)) def original_variable(type, expr, instance): pattern = '{}!choice!{}!{}!{}!{}!{}!original' s = pattern.format(type, expr[0], expr[1], expr[2], expr[3], instance) return Array(s, BitVecSort(32), BitVecSort(8)) def env_variable(expr, instance, name): pattern = 'int!choice!{}!{}!{}!{}!{}!env!{}' s = pattern.format(expr[0], expr[1], expr[2], expr[3], instance, name) return Array(s, BitVecSort(32), BitVecSort(8)) def output_variable(type, name, instance): s = '{}!output!{}!{}'.format(type, name, instance) if type == 'long': return Array(s, BitVecSort(32), BitVecSort(8)) else: return Array(s, BitVecSort(32), BitVecSort(8)) def angelic_selector(expr, instance): s = 'angelic!{}!{}!{}!{}!{}'.format(expr[0], expr[1], expr[2], expr[3], instance) return BitVec(s, 32) def original_selector(expr, instance): s = 'original!{}!{}!{}!{}!{}'.format(expr[0], expr[1], expr[2], expr[3], instance) return BitVec(s, 32) def env_selector(expr, instance, name): s = 'env!{}!{}!{}!{}!{}!{}'.format(name, expr[0], expr[1], expr[2], expr[3], instance) return BitVec(s, 32) for name, values in oracle_constraints.items(): type, _ = outputs[name] for i, value in enumerate(values): array = output_variable(type, name, i) bv_value = to_bv_converter_by_type[type](value) if type == 'long': solver.add(bv_value == array_to_bv64(array)) else: solver.add(bv_value == array_to_bv32(array)) for (expr, item) in choices.items(): type, instances, env = item for instance in range(0, instances): selector = angelic_selector(expr, instance) array = angelic_variable(type, expr, instance) solver.add(selector == array_to_bv32(array)) selector = original_selector(expr, instance) array = original_variable(type, expr, instance) solver.add(selector == array_to_bv32(array)) for name in env: selector = env_selector(expr, instance, name) array = env_variable(expr, instance, name) solver.add(selector == array_to_bv32(array)) result = solver.check() if result != z3.sat: logger.info('UNSAT') # TODO: can be timeout continue model = solver.model() # expr -> (angelic * original * env) list angelic_path = dict() if os.path.exists(self.load[test]): shutil.rmtree(self.load[test]) os.mkdir(self.load[test]) for (expr, item) in choices.items(): angelic_path[expr] = [] type, instances, env = item expr_str = '{}-{}-{}-{}'.format(expr[0], expr[1], expr[2], expr[3]) expression_dir = join(self.load[test], expr_str) if not os.path.exists(expression_dir): os.mkdir(expression_dir) for instance in range(0, instances): bv_angelic = model[angelic_selector(expr, instance)] angelic = from_bv_converter_by_type[type](bv_angelic) bv_original = model[original_selector(expr, instance)] original = from_bv_converter_by_type[type](bv_original) if original_available: logger.info( 'expression {}[{}]: angelic = {}, original = {}'. format(expr, instance, angelic, original)) else: logger.info('expression {}[{}]: angelic = {}'.format( expr, instance, angelic)) env_values = dict() for name in env: bv_env = model[env_selector(expr, instance, name)] value = from_bv_converter_by_type['int'](bv_env) env_values[name] = value if original_available: angelic_path[expr].append( (angelic, original, env_values)) else: angelic_path[expr].append((angelic, None, env_values)) # Dump angelic path to dump folder instance_file = join(expression_dir, str(instance)) with open(instance_file, 'w') as file: if isinstance(angelic, bool): if angelic: file.write('1') else: file.write('0') else: file.write(str(angelic)) # Run Tester to validate the dumped values validated = self.run_test(validation_project, test, load=self.load[test]) if validated: angelic_paths.append(angelic_path) else: logger.info('spurious angelic path') if self.config['synthesis_bool_only']: angelic_paths = self._boolean_angelic_forest(angelic_paths) if self.config['max_angelic_paths'] is not None and \ len(angelic_paths) > self.config['max_angelic_paths']: angelic_paths = self._reduce_angelic_forest(angelic_paths) else: logger.info('found {} angelic paths for test \'{}\''.format( len(angelic_paths), test)) inference_end_time = time.time() inference_elapsed = inference_end_time - inference_start_time statistics.data['time']['inference'] += inference_elapsed iter_stat = dict() iter_stat['time'] = dict() iter_stat['time']['klee'] = klee_elapsed iter_stat['time']['inference'] = inference_elapsed iter_stat['paths'] = dict() iter_stat['paths']['explored'] = len(smt_files) iter_stat['paths']['angelic'] = len(angelic_paths) statistics.data['iterations']['klee'].append(iter_stat) statistics.save() return angelic_paths
def __call__(self, project, test, dump, validation_project): logger.info('inferring specification for test \'{}\''.format(test)) environment = dict(os.environ) if self.config['klee_max_forks'] is not None: environment['ANGELIX_KLEE_MAX_FORKS'] = str(self.config['klee_max_forks']) if self.config['klee_max_depth'] is not None: environment['ANGELIX_KLEE_MAX_DEPTH'] = str(self.config['klee_max_depth']) if self.config['klee_search'] is not None: environment['ANGELIX_KLEE_SEARCH'] = self.config['klee_search'] if self.config['klee_timeout'] is not None: environment['ANGELIX_KLEE_MAX_TIME'] = str(self.config['klee_timeout']) if self.config['klee_solver_timeout'] is not None: environment['ANGELIX_KLEE_MAX_SOLVER_TIME'] = str(self.config['klee_solver_timeout']) if self.config['klee_debug']: environment['ANGELIX_KLEE_DEBUG'] = 'YES' if self.config['klee_ignore_errors']: environment['KLEE_DISABLE_MEMORY_ERROR'] = 'YES' if self.config['use_semfix_syn']: environment['ANGELIX_USE_SEMFIX_SYN'] = 'YES' environment['ANGELIX_KLEE_WORKDIR'] = project.dir klee_start_time = time.time() self.run_test(project, test, klee=True, env=environment) klee_end_time = time.time() klee_elapsed = klee_end_time - klee_start_time statistics.data['time']['klee'] += klee_elapsed statistics.save() logger.info('sleeping for 1 second...') time.sleep(1) smt_glob = join(project.dir, 'klee-out-0', '*.smt2') smt_files = glob(smt_glob) err_glob = join(project.dir, 'klee-out-0', '*.err') err_files = glob(err_glob) err_list = [] for err in err_files: err_list.append(os.path.basename(err).split('.')[0]) non_error_smt_files = [] for smt in smt_files: smt_id = os.path.basename(smt).split('.')[0] if not smt_id in err_list: non_error_smt_files.append(smt) if not self.config['ignore_infer_errors']: smt_files = non_error_smt_files if len(smt_files) == 0 and len(err_list) == 0: logger.warning('No paths explored') raise NoSmtError() if len(smt_files) == 0: logger.warning('No non-error paths explored') raise NoSmtError() # loading dump # name -> value list oracle = dict() vars = os.listdir(dump) for var in vars: instances = os.listdir(join(dump, var)) for i in range(0, len(instances)): if str(i) not in instances: logger.error('corrupted dump for test \'{}\''.format(test)) raise InferenceError() oracle[var] = [] for i in range(0, len(instances)): file = join(dump, var, str(i)) with open(file) as f: content = f.read() oracle[var].append(content) # solving path constraints inference_start_time = time.time() angelic_paths = [] z3.set_param("timeout", self.config['path_solving_timeout']) solver = Solver() for smt in smt_files: logger.info('solving path {}'.format(relpath(smt))) try: path = z3.parse_smt2_file(smt) except: logger.warning('failed to parse {}'.format(smt)) continue variables = [str(var) for var in get_vars(path) if str(var).startswith('int!') or str(var).startswith('long!') or str(var).startswith('bool!') or str(var).startswith('char!') or str(var).startswith('reachable!')] try: outputs, choices, constants, reachable, original_available = parse_variables(variables) except: continue # name -> value list (parsed) oracle_constraints = dict() def str_to_int(s): return int(s) def str_to_long(s): return int(s) def str_to_bool(s): if s == 'false': return False if s == 'true': return True raise InferenceError() def str_to_char(s): if len(s) != 1: raise InferenceError() return s[0] dump_parser_by_type = dict() dump_parser_by_type['int'] = str_to_int dump_parser_by_type['long'] = str_to_long dump_parser_by_type['bool'] = str_to_bool dump_parser_by_type['char'] = str_to_char def bool_to_bv(b): if b: return BitVecVal(1, 32) else: return BitVecVal(0, 32) def int_to_bv(i): return BitVecVal(i, 32) def long_to_bv(i): return BitVecVal(i, 64) def char_to_bv(c): return BitVecVal(ord(c), 32) to_bv_converter_by_type = dict() to_bv_converter_by_type['bool'] = bool_to_bv to_bv_converter_by_type['int'] = int_to_bv to_bv_converter_by_type['long'] = long_to_bv to_bv_converter_by_type['char'] = char_to_bv def bv_to_bool(bv): return bv.as_long() != 0 def bv_to_int(bv): l = bv.as_long() if l >> 31 == 1: # negative l -= pow(2, 32) return l def bv_to_long(bv): l = bv.as_long() if l >> 63 == 1: # negative l -= pow(2, 64) return l def bv_to_char(bv): l = bv.as_long() return chr(l) from_bv_converter_by_type = dict() from_bv_converter_by_type['bool'] = bv_to_bool from_bv_converter_by_type['int'] = bv_to_int from_bv_converter_by_type['long'] = bv_to_long from_bv_converter_by_type['char'] = bv_to_char matching_path = True for expected_variable, expected_values in oracle.items(): if expected_variable == 'reachable': expected_reachable = set(expected_values) if not (expected_reachable == reachable): logger.info('labels \'{}\' executed while {} required'.format( list(reachable), list(expected_reachable))) matching_path = False break continue if expected_variable not in outputs.keys(): outputs[expected_variable] = (None, 0) # unconstraint does not mean wrong required_executions = len(expected_values) actual_executions = outputs[expected_variable][1] if required_executions != actual_executions: logger.info('value \'{}\' executed {} times while {} required'.format( expected_variable, actual_executions, required_executions)) matching_path = False break oracle_constraints[expected_variable] = [] for i in range(0, required_executions): type = outputs[expected_variable][0] try: value = dump_parser_by_type[type](expected_values[i]) except: logger.error('variable \'{}\' has incompatible type {}'.format(expected_variable, type)) raise InferenceError() oracle_constraints[expected_variable].append(value) if not matching_path: continue solver.reset() solver.add(path) def array_to_bv32(array): return Concat(Select(array, BitVecVal(3, 32)), Select(array, BitVecVal(2, 32)), Select(array, BitVecVal(1, 32)), Select(array, BitVecVal(0, 32))) def array_to_bv64(array): return Concat(Select(array, BitVecVal(7, 32)), Select(array, BitVecVal(6, 32)), Select(array, BitVecVal(5, 32)), Select(array, BitVecVal(4, 32)), Select(array, BitVecVal(3, 32)), Select(array, BitVecVal(2, 32)), Select(array, BitVecVal(1, 32)), Select(array, BitVecVal(0, 32))) def angelic_variable(type, expr, instance): pattern = '{}!choice!{}!{}!{}!{}!{}!angelic' s = pattern.format(type, expr[0], expr[1], expr[2], expr[3], instance) return Array(s, BitVecSort(32), BitVecSort(8)) def original_variable(type, expr, instance): pattern = '{}!choice!{}!{}!{}!{}!{}!original' s = pattern.format(type, expr[0], expr[1], expr[2], expr[3], instance) return Array(s, BitVecSort(32), BitVecSort(8)) def env_variable(expr, instance, name): pattern = 'int!choice!{}!{}!{}!{}!{}!env!{}' s = pattern.format(expr[0], expr[1], expr[2], expr[3], instance, name) return Array(s, BitVecSort(32), BitVecSort(8)) def output_variable(type, name, instance): s = '{}!output!{}!{}'.format(type, name, instance) if type == 'long': return Array(s, BitVecSort(32), BitVecSort(8)) else: return Array(s, BitVecSort(32), BitVecSort(8)) def angelic_selector(expr, instance): s = 'angelic!{}!{}!{}!{}!{}'.format(expr[0], expr[1], expr[2], expr[3], instance) return BitVec(s, 32) def original_selector(expr, instance): s = 'original!{}!{}!{}!{}!{}'.format(expr[0], expr[1], expr[2], expr[3], instance) return BitVec(s, 32) def env_selector(expr, instance, name): s = 'env!{}!{}!{}!{}!{}!{}'.format(name, expr[0], expr[1], expr[2], expr[3], instance) return BitVec(s, 32) for name, values in oracle_constraints.items(): type, _ = outputs[name] for i, value in enumerate(values): array = output_variable(type, name, i) bv_value = to_bv_converter_by_type[type](value) if type == 'long': solver.add(bv_value == array_to_bv64(array)) else: solver.add(bv_value == array_to_bv32(array)) for (expr, item) in choices.items(): type, instances, env = item for instance in range(0, instances): selector = angelic_selector(expr, instance) array = angelic_variable(type, expr, instance) solver.add(selector == array_to_bv32(array)) selector = original_selector(expr, instance) array = original_variable(type, expr, instance) solver.add(selector == array_to_bv32(array)) for name in env: selector = env_selector(expr, instance, name) array = env_variable(expr, instance, name) solver.add(selector == array_to_bv32(array)) result = solver.check() if result != z3.sat: logger.info('UNSAT') # TODO: can be timeout continue model = solver.model() # expr -> (angelic * original * env) list angelic_path = dict() if os.path.exists(self.load[test]): shutil.rmtree(self.load[test]) os.mkdir(self.load[test]) for (expr, item) in choices.items(): angelic_path[expr] = [] type, instances, env = item expr_str = '{}-{}-{}-{}'.format(expr[0], expr[1], expr[2], expr[3]) expression_dir = join(self.load[test], expr_str) if not os.path.exists(expression_dir): os.mkdir(expression_dir) for instance in range(0, instances): bv_angelic = model[angelic_selector(expr, instance)] angelic = from_bv_converter_by_type[type](bv_angelic) bv_original = model[original_selector(expr, instance)] original = from_bv_converter_by_type[type](bv_original) if original_available: logger.info('expression {}[{}]: angelic = {}, original = {}'.format(expr, instance, angelic, original)) else: logger.info('expression {}[{}]: angelic = {}'.format(expr, instance, angelic)) env_values = dict() for name in env: bv_env = model[env_selector(expr, instance, name)] value = from_bv_converter_by_type['int'](bv_env) env_values[name] = value if original_available: angelic_path[expr].append((angelic, original, env_values)) else: angelic_path[expr].append((angelic, None, env_values)) # Dump angelic path to dump folder instance_file = join(expression_dir, str(instance)) with open(instance_file, 'w') as file: if isinstance(angelic, bool): if angelic: file.write('1') else: file.write('0') else: file.write(str(angelic)) # Run Tester to validate the dumped values validated = self.run_test(validation_project, test, load=self.load[test]) if validated: angelic_paths.append(angelic_path) else: logger.info('spurious angelic path') if self.config['synthesis_bool_only']: angelic_paths = self._boolean_angelic_forest(angelic_paths) if self.config['max_angelic_paths'] is not None and \ len(angelic_paths) > self.config['max_angelic_paths']: angelic_paths = self._reduce_angelic_forest(angelic_paths) else: logger.info('found {} angelic paths for test \'{}\''.format(len(angelic_paths), test)) inference_end_time = time.time() inference_elapsed = inference_end_time - inference_start_time statistics.data['time']['inference'] += inference_elapsed iter_stat = dict() iter_stat['time'] = dict() iter_stat['time']['klee'] = klee_elapsed iter_stat['time']['inference'] = inference_elapsed iter_stat['paths'] = dict() iter_stat['paths']['explored'] = len(smt_files) iter_stat['paths']['angelic'] = len(angelic_paths) statistics.data['iterations']['klee'].append(iter_stat) statistics.save() return angelic_paths
def __call__(self, angelic_forest): if type(angelic_forest) == str: # angelic_forest is a file shutil.copyfile(angelic_forest, self.angelic_forest_file) else: # angelic_forest is a data structure self.dump_angelic_forest(angelic_forest) dirpath = tempfile.mkdtemp() patch_file = join(dirpath, 'patch') config_file = join(dirpath, 'config.json') for level in self.config['synthesis_levels']: logger.info('synthesizing patch with component level \'{}\''.format(level)) config = { "encodingConfig": { "componentsMultipleOccurrences": True, # better if false, if not enough primitive components, synthesis can fail "phantomComponents": True, "repairBooleanConst": False, "repairIntegerConst": False, "level": "linear" }, "simplification": False, "reuseStructure": not self.config['semfix'], "spaceReduction": True, "componentLevel": level, "solverBound": 3, "solverTimeout": self.config['synthesis_timeout'] } with open(config_file, 'w') as file: json.dump(config, file) if self.config['use_nsynth']: jar = os.environ['NSYNTH_JAR'] else: if self.config['synthesis_other_solver'] is None: jar = os.environ['SYNTHESIS_JAR'] else: jar = os.environ['SYNTHESIS_OTHER_JAR'] if self.config['verbose']: stderr = None else: stderr = subprocess.DEVNULL args = [self.angelic_forest_file, self.extracted, patch_file, config_file] shutil.copyfile(config_file, "/angelix/config.json") if self.config['synthesis_other_solver'] is not None: solverName = self.config['synthesis_other_solver'] args += [solverName] if solverName == "Enum": args += [os.environ['ENUM_SOLVER_PATH']] elif solverName == "Symbolic": args += [os.environ['SYMBOLIC_SOLVER_PATH']] elif solverName == "CVC4": args += [os.environ['CVC4_SOLVER_PATH']] elif solverName == "Stoc": args += [os.environ['STOC_SOLVER_PATH']] else: raise NameError("Not supported solver: "+solverName) args += [os.environ["RESULT_BEAUTIFIER_PATH"]] args += ["/angelix/additionalConfig.txt"] synthesis_start_time = time.time() logger.info("-------") logger.info(args) try: result = subprocess.check_output(['java', '-jar', jar] + args, stderr=stderr) except subprocess.CalledProcessError: logger.warning("synthesis returned non-zero code") continue finally: synthesis_end_time = time.time() synthesis_elapsed = synthesis_end_time - synthesis_start_time statistics.data['time']['synthesis'] += synthesis_elapsed iter_stat = dict() iter_stat['tests'] = len(angelic_forest) iter_stat['level'] = level iter_stat['time'] = synthesis_elapsed statistics.data['iterations']['synthesis'].append(iter_stat) statistics.save() if str(result, 'UTF-8').strip() == 'TIMEOUT': logger.warning('timeout when synthesizing fix') elif str(result, 'UTF-8').strip() == 'FAIL': logger.info('synthesis failed') elif str(result, 'UTF-8').strip() == 'SUCCESS': with open(patch_file) as file: content = file.readlines() patch = dict() while len(content) > 0: line = content.pop(0) if len(line) == 0: continue expr = tuple(map(int, line.strip().split('-'))) def convert_to_c(s): return s.replace('_LBRSQR_', '[').replace('_RBRSQR_', ']') original = convert_to_c(content.pop(0).strip()) fixed = convert_to_c(content.pop(0).strip()) if self.config['semfix']: logger.info('synthesized expression {}: {}'.format(expr, fixed)) else: logger.info('fixing expression {}: {} ---> {}'.format(expr, original, fixed)) patch[expr] = fixed if len(patch) == 0: logger.warn('patch contains no changes') return None return patch else: raise Exception('result: ' + str(result, 'UTF-8')) shutil.rmtree(dirpath) return None