def evaluate(self, src):
        testing_start_time = time.time()

        positive = []
        negative = []

        for test in self.validation_test_suite:
            if self.run_test(src, test):
                positive.append(test)
            else:
                negative.append(test)

        # make sure if failing tests really fail
        if self.config['redundant_test']:
            negative_copy = negative[:]
            for test in negative_copy:
                if self.run_test(src, test):
                    negative.remove(test)
                    positive.append(test)

        testing_end_time = time.time()
        testing_elapsed = testing_end_time - testing_start_time
        statistics.data['time']['testing'] += testing_elapsed
        statistics.save()

        return positive, negative
Ejemplo n.º 2
0
    def evaluate(self, src):
        testing_start_time = time.time()

        positive = []
        negative = []

        for test in self.validation_test_suite:
            if self.run_test(src, test):
                positive.append(test)
            else:
                negative.append(test)

        # make sure if failing tests really fail
        if self.config['redundant_test']:
            negative_copy = negative[:]
            for test in negative_copy:
                if self.run_test(src, test):
                    negative.remove(test)
                    positive.append(test)

        testing_end_time = time.time()
        testing_elapsed = testing_end_time - testing_start_time
        statistics.data['time']['testing'] += testing_elapsed
        statistics.save()

        return positive, negative
Ejemplo n.º 3
0
                patch = repair()
        else:
            patch = repair()
    except TimeoutException:
        logger.info("failed to generate patch (timeout)")
        print('TIMEOUT')
        exit(0)
    except (CompilationError, InferenceError, TransformationError):
        logger.info("failed to generate patch")
        print('FAIL')
        exit(1)

    end = time.time()
    elapsed = format_time(end - start)
    statistics.data['time']['total'] = end - start
    statistics.save()

    if patch is None:
        logger.info("no patch generated in {}".format(elapsed))
        print('FAIL')
        exit(0)
    else:
        patch_file = basename(abspath(
            args.src)) + '-' + time.strftime("%Y-%b%d-%H%M%S") + '.patch'
        logger.info("patch successfully generated in {} (see {})".format(
            elapsed, patch_file))
        print('SUCCESS')
        with open(patch_file, 'w+') as file:
            for line in patch:
                file.write(line)
        exit(0)
Ejemplo n.º 4
0
    def __call__(self, angelic_forest):

        if type(angelic_forest) == str:
            # angelic_forest is a file
            shutil.copyfile(angelic_forest, self.angelic_forest_file)
        else:
            # angelic_forest is a data structure
            self.dump_angelic_forest(angelic_forest)

        dirpath = tempfile.mkdtemp()
        patch_file = join(dirpath, 'patch')
        config_file = join(dirpath, 'config.json')

        for level in self.config['synthesis_levels']:

            logger.info(
                'synthesizing patch with component level \'{}\''.format(level))

            config = {
                "encodingConfig": {
                    "componentsMultipleOccurrences": True,
                    # better if false, if not enough primitive components, synthesis can fail
                    "phantomComponents": True,
                    "repairBooleanConst": False,
                    "repairIntegerConst": False,
                    "level": "linear"
                },
                "simplification": False,
                "reuseStructure": not self.config['semfix'],
                "spaceReduction": True,
                "componentLevel": level,
                "solverBound": 3,
                "solverTimeout": self.config['synthesis_timeout']
            }

            with open(config_file, 'w') as file:
                json.dump(config, file)

            if self.config['use_nsynth']:
                jar = os.environ['NSYNTH_JAR']
            else:
                jar = os.environ['SYNTHESIS_JAR']

            if self.config['verbose']:
                stderr = None
            else:
                stderr = subprocess.DEVNULL

            args = [
                self.angelic_forest_file, self.extracted, patch_file,
                config_file
            ]

            synthesis_start_time = time.time()

            try:
                result = subprocess.check_output(['java', '-jar', jar] + args,
                                                 stderr=stderr)
            except subprocess.CalledProcessError:
                logger.warning("synthesis returned non-zero code")
                continue
            finally:
                synthesis_end_time = time.time()
                synthesis_elapsed = synthesis_end_time - synthesis_start_time
                statistics.data['time']['synthesis'] += synthesis_elapsed
                iter_stat = dict()
                iter_stat['tests'] = len(angelic_forest)
                iter_stat['level'] = level
                iter_stat['time'] = synthesis_elapsed
                statistics.data['iterations']['synthesis'].append(iter_stat)
                statistics.save()

            if str(result, 'UTF-8').strip() == 'TIMEOUT':
                logger.warning('timeout when synthesizing fix')
            elif str(result, 'UTF-8').strip() == 'FAIL':
                logger.info('synthesis failed')
            elif str(result, 'UTF-8').strip() == 'SUCCESS':
                with open(patch_file) as file:
                    content = file.readlines()
                patch = dict()
                while len(content) > 0:
                    line = content.pop(0)
                    if len(line) == 0:
                        continue
                    expr = tuple(map(int, line.strip().split('-')))

                    def convert_to_c(s):
                        return s.replace('_LBRSQR_',
                                         '[').replace('_RBRSQR_', ']')

                    original = convert_to_c(content.pop(0).strip())
                    fixed = convert_to_c(content.pop(0).strip())
                    if self.config['semfix']:
                        logger.info('synthesized expression {}: {}'.format(
                            expr, fixed))
                    else:
                        logger.info('fixing expression {}: {} ---> {}'.format(
                            expr, original, fixed))
                    patch[expr] = fixed
                if len(patch) == 0:
                    logger.warn('patch contains no changes')
                    return None
                return patch
            else:
                raise Exception('result: ' + str(result, 'UTF-8'))

        shutil.rmtree(dirpath)

        return None
    def generate_patch(self):
        positive, negative = self.evaluate(self.validation_src)

        self.frontend_src.configure()
        if config['build_before_instr']:
            self.frontend_src.build()
        self.instrument_for_localization(self.frontend_src)
        self.frontend_src.build()

        testing_start_time = time.time()
        if len(positive) > 0:
            logger.info('running positive tests for debugging')
        for test in positive:
            self.trace += test
            if test not in self.dump:
                self.dump += test
                _, instrumented = self.run_test(self.frontend_src, test, dump=self.dump[test], trace=self.trace[test], check_instrumented=True)
                if not instrumented:
                    self.repair_test_suite.remove(test)
            else:
                _, instrumented = self.run_test(self.frontend_src, test, trace=self.trace[test], check_instrumented=True)
                if not instrumented:
                    self.repair_test_suite.remove(test)

        golden_is_built = False
        excluded = []

        if len(negative) > 0:
            logger.info('running negative tests for debugging')
        for test in negative:
            self.trace += test
            _, instrumented = self.run_test(self.frontend_src, test, trace=self.trace[test], check_instrumented=True)
            if not instrumented:
                self.repair_test_suite.remove(test)
            if test not in self.dump:
                if self.golden_src is None:
                    logger.error("golden version or assert file needed for test {}".format(test))
                    return []
                if not golden_is_built:
                    self.golden_src.configure()
                    self.golden_src.build()
                    golden_is_built = True
                self.dump += test
                result = self.run_test(self.golden_src, test, dump=self.dump[test])
                if not result:
                    excluded.append(test)

        for test in excluded:
            if not self.config['mute_test_message']:
                logger.warning('excluding test {} because it fails in golden version'.format(test))
            negative.remove(test)
            if test in self.repair_test_suite:
                self.repair_test_suite.remove(test)
            self.validation_test_suite.remove(test)

        testing_end_time = time.time()
        testing_elapsed = testing_end_time - testing_start_time
        statistics.data['time']['testing'] += testing_elapsed
        statistics.save()

        logger.info("repair test suite: {}".format(self.repair_test_suite))
        logger.info("validation test suite: {}".format(self.validation_test_suite))

        positive_traces = [(test, self.trace.parse(test)) for test in positive]
        negative_traces = [(test, self.trace.parse(test)) for test in negative]
        suspicious = self.get_suspicious_groups(self.validation_test_suite, positive_traces, negative_traces)

        if self.config['localize_only']:
            for idx, (group, score) in enumerate(suspicious):
                logger.info('group {}: {} ({})'.format(idx+1, group, score))
            exit(0)

        if len(suspicious) == 0:
            logger.warning('no suspicious expressions localized')

        repaired = len(negative) == 0
        
        patches = []

        while (config['generate_all'] or not repaired) and len(suspicious) > 0:
            if self.config['use_semfix_syn']:
                # prepare a clean directory
                shutil.rmtree(join(self.working_dir, 'semfix-syn-input'),
                              ignore_errors='true')

            expressions = suspicious.pop(0)
            logger.info('considering suspicious expressions {}'.format(expressions))
            current_repair_suite = self.reduce(self.repair_test_suite, positive_traces, negative_traces, expressions)

            self.backend_src.restore_buggy()
            self.backend_src.configure()
            if config['build_before_instr']:
                self.backend_src.build()
            self.instrument_for_inference(self.backend_src, expressions)
            self.backend_src.build()

            angelic_forest = dict()
            inference_failed = False
            for test in current_repair_suite:
                try:
                    angelic_forest[test] = self.infer_spec(self.backend_src, test, self.dump[test], self.frontend_src)
                    if len(angelic_forest[test]) == 0:
                        if test in positive:
                            logger.warning('angelic forest for positive test {} not found'.format(test))
                            current_repair_suite.remove(test)
                            del angelic_forest[test]
                            continue
                        inference_failed = True
                        break
                except InferenceError:
                    logger.warning('inference failed (error was raised)')
                    inference_failed = True
                    break
                except NoSmtError:
                    if test in positive:
                        current_repair_suite.remove(test)
                        continue
                    inference_failed = True
                    break
            if inference_failed:
                continue
            initial_fix = self.synthesize_fix(angelic_forest)
            if initial_fix is None:
                logger.info('cannot synthesize fix')
                continue
            logger.info('candidate fix synthesized')

            self.validation_src.restore_buggy()
            try:
                self.apply_patch(self.validation_src, initial_fix)
            except TransformationError:
                logger.info('cannot apply fix')
                continue
            self.validation_src.build()

            pos, neg = self.evaluate(self.validation_src)
            if not set(neg).isdisjoint(set(current_repair_suite)):
                not_repaired = list(set(current_repair_suite) & set(neg))
                logger.warning("generated invalid fix (tests {} not repaired)".format(not_repaired))
                continue
            repaired = len(neg) == 0
            if repaired:
                patches.append(self.validation_src.diff_buggy())
            neg = list(set(neg) & set(self.repair_test_suite))
            current_positive, current_negative = pos, neg

            if len(current_negative) == 0 and not repaired:
                logger.warning("cannot repair using instrumented tests")
                continue

            negative_idx = 0
            while not repaired:
                counterexample = current_negative[negative_idx]

                logger.info('counterexample test is {}'.format(counterexample))
                current_repair_suite.append(counterexample)
                try:
                    angelic_forest[counterexample] = self.infer_spec(self.backend_src,
                                                                     counterexample,
                                                                     self.dump[counterexample],
                                                                     self.frontend_src)
                except NoSmtError:
                    logger.warning("no smt file for test {}".format(counterexample))
                    negative_idx = negative_idx + 1
                    if len(current_negative) - negative_idx > 0:
                        continue
                    break
                if len(angelic_forest[counterexample]) == 0:
                    break
                fix = self.synthesize_fix(angelic_forest)
                if fix is None:
                    logger.info('cannot refine fix')
                    break
                logger.info('refined fix is synthesized')
                self.validation_src.restore_buggy()
                self.apply_patch(self.validation_src, fix)
                self.validation_src.build()
                pos, neg = self.evaluate(self.validation_src)
                repaired = len(neg) == 0
                if repaired:
                    patches.append(self.validation_src.diff_buggy())
                neg = list(set(neg) & set(self.repair_test_suite))
                current_positive, current_negative = pos, neg

                if not set(current_negative).isdisjoint(set(current_repair_suite)):
                    not_repaired = list(set(current_repair_suite) & set(current_negative))
                    logger.warning("generated invalid fix (tests {} not repaired)".format(not_repaired))
                    break
                negative_idx = 0
                
        return patches
                patches = repair()
        else:
            patches = repair()
    except TimeoutException:
        logger.info("failed to generate patch (timeout)")
        print('TIMEOUT')
        exit(0)
    except (CompilationError, InferenceError, TransformationError):
        logger.info("failed to generate patch")
        print('FAIL')
        exit(1)

    end = time.time()
    elapsed = format_time(end - start)
    statistics.data['time']['total'] = end - start
    statistics.save()

    if not patches:
        logger.info("no patch generated in {}".format(elapsed))
        print('FAIL')
        exit(0)
    else:
        if config['generate_all']:
            patch_dir = basename(abspath(args.src)) + '-' + time.strftime("%Y-%b%d-%H%M%S")
            if not exists(patch_dir):
                os.mkdir(patch_dir)
            for idx, patch in enumerate(patches):
                patch_file = os.path.join(patch_dir, str(idx) + '.patch')
                with open(patch_file, 'w+') as file:
                    for line in patch:
                        file.write(line)
Ejemplo n.º 7
0
    def __call__(self, project, test, dump, validation_project):
        logger.info('inferring specification for test \'{}\''.format(test))

        environment = dict(os.environ)
        if self.config['klee_max_forks'] is not None:
            environment['ANGELIX_KLEE_MAX_FORKS'] = str(
                self.config['klee_max_forks'])
        if self.config['klee_max_depth'] is not None:
            environment['ANGELIX_KLEE_MAX_DEPTH'] = str(
                self.config['klee_max_depth'])
        if self.config['klee_search'] is not None:
            environment['ANGELIX_KLEE_SEARCH'] = self.config['klee_search']
        if self.config['klee_timeout'] is not None:
            environment['ANGELIX_KLEE_MAX_TIME'] = str(
                self.config['klee_timeout'])
        if self.config['klee_solver_timeout'] is not None:
            environment['ANGELIX_KLEE_MAX_SOLVER_TIME'] = str(
                self.config['klee_solver_timeout'])
        if self.config['klee_debug']:
            environment['ANGELIX_KLEE_DEBUG'] = 'YES'
        if self.config['klee_ignore_errors']:
            environment['KLEE_DISABLE_MEMORY_ERROR'] = 'YES'
        if self.config['use_semfix_syn']:
            environment['ANGELIX_USE_SEMFIX_SYN'] = 'YES'
        environment['ANGELIX_KLEE_WORKDIR'] = project.dir

        klee_start_time = time.time()
        self.run_test(project, test, klee=True, env=environment)
        klee_end_time = time.time()
        klee_elapsed = klee_end_time - klee_start_time
        statistics.data['time']['klee'] += klee_elapsed
        statistics.save()

        logger.info('sleeping for 1 second...')
        time.sleep(1)

        smt_glob = join(project.dir, 'klee-out-0', '*.smt2')
        smt_files = glob(smt_glob)

        err_glob = join(project.dir, 'klee-out-0', '*.err')
        err_files = glob(err_glob)

        err_list = []
        for err in err_files:
            err_list.append(os.path.basename(err).split('.')[0])

        non_error_smt_files = []
        for smt in smt_files:
            smt_id = os.path.basename(smt).split('.')[0]
            if not smt_id in err_list:
                non_error_smt_files.append(smt)

        if not self.config['ignore_infer_errors']:
            smt_files = non_error_smt_files

        if len(smt_files) == 0 and len(err_list) == 0:
            logger.warning('No paths explored')
            raise NoSmtError()

        if len(smt_files) == 0:
            logger.warning('No non-error paths explored')
            raise NoSmtError()

        # loading dump

        # name -> value list
        oracle = dict()

        vars = os.listdir(dump)
        for var in vars:
            instances = os.listdir(join(dump, var))
            for i in range(0, len(instances)):
                if str(i) not in instances:
                    logger.error('corrupted dump for test \'{}\''.format(test))
                    raise InferenceError()
            oracle[var] = []
            for i in range(0, len(instances)):
                file = join(dump, var, str(i))
                with open(file) as f:
                    content = f.read()
                oracle[var].append(content)

        # solving path constraints
        inference_start_time = time.time()

        angelic_paths = []

        z3.set_param("timeout", self.config['path_solving_timeout'])

        solver = Solver()

        for smt in smt_files:
            logger.info('solving path {}'.format(relpath(smt)))

            try:
                path = z3.parse_smt2_file(smt)
            except:
                logger.warning('failed to parse {}'.format(smt))
                continue

            variables = [
                str(var) for var in get_vars(path)
                if str(var).startswith('int!') or str(var).startswith('long!')
                or str(var).startswith('bool!') or str(var).startswith('char!')
                or str(var).startswith('reachable!')
            ]

            try:
                outputs, choices, constants, reachable, original_available = parse_variables(
                    variables)
            except:
                continue

            # name -> value list (parsed)
            oracle_constraints = dict()

            def str_to_int(s):
                return int(s)

            def str_to_long(s):
                return int(s)

            def str_to_bool(s):
                if s == 'false':
                    return False
                if s == 'true':
                    return True
                raise InferenceError()

            def str_to_char(s):
                if len(s) != 1:
                    raise InferenceError()
                return s[0]

            dump_parser_by_type = dict()
            dump_parser_by_type['int'] = str_to_int
            dump_parser_by_type['long'] = str_to_long
            dump_parser_by_type['bool'] = str_to_bool
            dump_parser_by_type['char'] = str_to_char

            def bool_to_bv(b):
                if b:
                    return BitVecVal(1, 32)
                else:
                    return BitVecVal(0, 32)

            def int_to_bv(i):
                return BitVecVal(i, 32)

            def long_to_bv(i):
                return BitVecVal(i, 64)

            def char_to_bv(c):
                return BitVecVal(ord(c), 32)

            to_bv_converter_by_type = dict()
            to_bv_converter_by_type['bool'] = bool_to_bv
            to_bv_converter_by_type['int'] = int_to_bv
            to_bv_converter_by_type['long'] = long_to_bv
            to_bv_converter_by_type['char'] = char_to_bv

            def bv_to_bool(bv):
                return bv.as_long() != 0

            def bv_to_int(bv):
                l = bv.as_long()
                if l >> 31 == 1:  # negative
                    l -= pow(2, 32)
                return l

            def bv_to_long(bv):
                l = bv.as_long()
                if l >> 63 == 1:  # negative
                    l -= pow(2, 64)
                return l

            def bv_to_char(bv):
                l = bv.as_long()
                return chr(l)

            from_bv_converter_by_type = dict()
            from_bv_converter_by_type['bool'] = bv_to_bool
            from_bv_converter_by_type['int'] = bv_to_int
            from_bv_converter_by_type['long'] = bv_to_long
            from_bv_converter_by_type['char'] = bv_to_char

            matching_path = True

            for expected_variable, expected_values in oracle.items():
                if expected_variable == 'reachable':
                    expected_reachable = set(expected_values)
                    if not (expected_reachable == reachable):
                        logger.info(
                            'labels \'{}\' executed while {} required'.format(
                                list(reachable), list(expected_reachable)))
                        matching_path = False
                        break
                    continue
                if expected_variable not in outputs.keys():
                    outputs[expected_variable] = (
                        None, 0)  # unconstraint does not mean wrong
                required_executions = len(expected_values)
                actual_executions = outputs[expected_variable][1]
                if required_executions != actual_executions:
                    logger.info(
                        'value \'{}\' executed {} times while {} required'.
                        format(expected_variable, actual_executions,
                               required_executions))
                    matching_path = False
                    break
                oracle_constraints[expected_variable] = []
                for i in range(0, required_executions):
                    type = outputs[expected_variable][0]
                    try:
                        value = dump_parser_by_type[type](expected_values[i])
                    except:
                        logger.error(
                            'variable \'{}\' has incompatible type {}'.format(
                                expected_variable, type))
                        raise InferenceError()
                    oracle_constraints[expected_variable].append(value)

            if not matching_path:
                continue

            solver.reset()
            solver.add(path)

            def array_to_bv32(array):
                return Concat(Select(array, BitVecVal(3, 32)),
                              Select(array, BitVecVal(2, 32)),
                              Select(array, BitVecVal(1, 32)),
                              Select(array, BitVecVal(0, 32)))

            def array_to_bv64(array):
                return Concat(Select(array, BitVecVal(7, 32)),
                              Select(array, BitVecVal(6, 32)),
                              Select(array, BitVecVal(5, 32)),
                              Select(array, BitVecVal(4, 32)),
                              Select(array, BitVecVal(3, 32)),
                              Select(array, BitVecVal(2, 32)),
                              Select(array, BitVecVal(1, 32)),
                              Select(array, BitVecVal(0, 32)))

            def angelic_variable(type, expr, instance):
                pattern = '{}!choice!{}!{}!{}!{}!{}!angelic'
                s = pattern.format(type, expr[0], expr[1], expr[2], expr[3],
                                   instance)
                return Array(s, BitVecSort(32), BitVecSort(8))

            def original_variable(type, expr, instance):
                pattern = '{}!choice!{}!{}!{}!{}!{}!original'
                s = pattern.format(type, expr[0], expr[1], expr[2], expr[3],
                                   instance)
                return Array(s, BitVecSort(32), BitVecSort(8))

            def env_variable(expr, instance, name):
                pattern = 'int!choice!{}!{}!{}!{}!{}!env!{}'
                s = pattern.format(expr[0], expr[1], expr[2], expr[3],
                                   instance, name)
                return Array(s, BitVecSort(32), BitVecSort(8))

            def output_variable(type, name, instance):
                s = '{}!output!{}!{}'.format(type, name, instance)
                if type == 'long':
                    return Array(s, BitVecSort(32), BitVecSort(8))
                else:
                    return Array(s, BitVecSort(32), BitVecSort(8))

            def angelic_selector(expr, instance):
                s = 'angelic!{}!{}!{}!{}!{}'.format(expr[0], expr[1], expr[2],
                                                    expr[3], instance)
                return BitVec(s, 32)

            def original_selector(expr, instance):
                s = 'original!{}!{}!{}!{}!{}'.format(expr[0], expr[1], expr[2],
                                                     expr[3], instance)
                return BitVec(s, 32)

            def env_selector(expr, instance, name):
                s = 'env!{}!{}!{}!{}!{}!{}'.format(name, expr[0], expr[1],
                                                   expr[2], expr[3], instance)
                return BitVec(s, 32)

            for name, values in oracle_constraints.items():
                type, _ = outputs[name]
                for i, value in enumerate(values):
                    array = output_variable(type, name, i)
                    bv_value = to_bv_converter_by_type[type](value)
                    if type == 'long':
                        solver.add(bv_value == array_to_bv64(array))
                    else:
                        solver.add(bv_value == array_to_bv32(array))

            for (expr, item) in choices.items():
                type, instances, env = item
                for instance in range(0, instances):
                    selector = angelic_selector(expr, instance)
                    array = angelic_variable(type, expr, instance)
                    solver.add(selector == array_to_bv32(array))

                    selector = original_selector(expr, instance)
                    array = original_variable(type, expr, instance)
                    solver.add(selector == array_to_bv32(array))

                    for name in env:
                        selector = env_selector(expr, instance, name)
                        array = env_variable(expr, instance, name)
                        solver.add(selector == array_to_bv32(array))

            result = solver.check()
            if result != z3.sat:
                logger.info('UNSAT')  # TODO: can be timeout
                continue
            model = solver.model()

            # expr -> (angelic * original * env) list
            angelic_path = dict()

            if os.path.exists(self.load[test]):
                shutil.rmtree(self.load[test])
            os.mkdir(self.load[test])

            for (expr, item) in choices.items():
                angelic_path[expr] = []
                type, instances, env = item

                expr_str = '{}-{}-{}-{}'.format(expr[0], expr[1], expr[2],
                                                expr[3])
                expression_dir = join(self.load[test], expr_str)
                if not os.path.exists(expression_dir):
                    os.mkdir(expression_dir)

                for instance in range(0, instances):
                    bv_angelic = model[angelic_selector(expr, instance)]
                    angelic = from_bv_converter_by_type[type](bv_angelic)
                    bv_original = model[original_selector(expr, instance)]
                    original = from_bv_converter_by_type[type](bv_original)
                    if original_available:
                        logger.info(
                            'expression {}[{}]: angelic = {}, original = {}'.
                            format(expr, instance, angelic, original))
                    else:
                        logger.info('expression {}[{}]: angelic = {}'.format(
                            expr, instance, angelic))
                    env_values = dict()
                    for name in env:
                        bv_env = model[env_selector(expr, instance, name)]
                        value = from_bv_converter_by_type['int'](bv_env)
                        env_values[name] = value

                    if original_available:
                        angelic_path[expr].append(
                            (angelic, original, env_values))
                    else:
                        angelic_path[expr].append((angelic, None, env_values))

                    # Dump angelic path to dump folder
                    instance_file = join(expression_dir, str(instance))
                    with open(instance_file, 'w') as file:
                        if isinstance(angelic, bool):
                            if angelic:
                                file.write('1')
                            else:
                                file.write('0')
                        else:
                            file.write(str(angelic))

            # Run Tester to validate the dumped values
            validated = self.run_test(validation_project,
                                      test,
                                      load=self.load[test])
            if validated:
                angelic_paths.append(angelic_path)
            else:
                logger.info('spurious angelic path')

        if self.config['synthesis_bool_only']:
            angelic_paths = self._boolean_angelic_forest(angelic_paths)

        if self.config['max_angelic_paths'] is not None and \
           len(angelic_paths) > self.config['max_angelic_paths']:
            angelic_paths = self._reduce_angelic_forest(angelic_paths)
        else:
            logger.info('found {} angelic paths for test \'{}\''.format(
                len(angelic_paths), test))

        inference_end_time = time.time()
        inference_elapsed = inference_end_time - inference_start_time
        statistics.data['time']['inference'] += inference_elapsed

        iter_stat = dict()
        iter_stat['time'] = dict()
        iter_stat['time']['klee'] = klee_elapsed
        iter_stat['time']['inference'] = inference_elapsed
        iter_stat['paths'] = dict()
        iter_stat['paths']['explored'] = len(smt_files)
        iter_stat['paths']['angelic'] = len(angelic_paths)
        statistics.data['iterations']['klee'].append(iter_stat)
        statistics.save()

        return angelic_paths
Ejemplo n.º 8
0
    def __call__(self, project, test, dump, validation_project):
        logger.info('inferring specification for test \'{}\''.format(test))

        environment = dict(os.environ)
        if self.config['klee_max_forks'] is not None:
            environment['ANGELIX_KLEE_MAX_FORKS'] = str(self.config['klee_max_forks'])
        if self.config['klee_max_depth'] is not None:
            environment['ANGELIX_KLEE_MAX_DEPTH'] = str(self.config['klee_max_depth'])
        if self.config['klee_search'] is not None:
            environment['ANGELIX_KLEE_SEARCH'] = self.config['klee_search']
        if self.config['klee_timeout'] is not None:
            environment['ANGELIX_KLEE_MAX_TIME'] = str(self.config['klee_timeout'])
        if self.config['klee_solver_timeout'] is not None:
            environment['ANGELIX_KLEE_MAX_SOLVER_TIME'] = str(self.config['klee_solver_timeout'])
        if self.config['klee_debug']:
            environment['ANGELIX_KLEE_DEBUG'] = 'YES'
        if self.config['klee_ignore_errors']:
            environment['KLEE_DISABLE_MEMORY_ERROR'] = 'YES'
        if self.config['use_semfix_syn']:
            environment['ANGELIX_USE_SEMFIX_SYN'] = 'YES'
        environment['ANGELIX_KLEE_WORKDIR'] = project.dir

        klee_start_time = time.time()
        self.run_test(project, test, klee=True, env=environment)
        klee_end_time = time.time()
        klee_elapsed = klee_end_time - klee_start_time
        statistics.data['time']['klee'] += klee_elapsed
        statistics.save()

        logger.info('sleeping for 1 second...')
        time.sleep(1)

        smt_glob = join(project.dir, 'klee-out-0', '*.smt2')
        smt_files = glob(smt_glob)

        err_glob = join(project.dir, 'klee-out-0', '*.err')
        err_files = glob(err_glob)

        err_list = []
        for err in err_files:
            err_list.append(os.path.basename(err).split('.')[0])

        non_error_smt_files = []
        for smt in smt_files:
            smt_id = os.path.basename(smt).split('.')[0]
            if not smt_id in err_list:
                non_error_smt_files.append(smt)

        if not self.config['ignore_infer_errors']:
            smt_files = non_error_smt_files

        if len(smt_files) == 0 and len(err_list) == 0:
            logger.warning('No paths explored')
            raise NoSmtError()

        if len(smt_files) == 0:
            logger.warning('No non-error paths explored')
            raise NoSmtError()

        # loading dump

        # name -> value list
        oracle = dict()

        vars = os.listdir(dump)
        for var in vars:
            instances = os.listdir(join(dump, var))
            for i in range(0, len(instances)):
                if str(i) not in instances:
                    logger.error('corrupted dump for test \'{}\''.format(test))
                    raise InferenceError()
            oracle[var] = []
            for i in range(0, len(instances)):
                file = join(dump, var, str(i))
                with open(file) as f:
                    content = f.read()
                oracle[var].append(content)

        # solving path constraints
        inference_start_time = time.time()

        angelic_paths = []

        z3.set_param("timeout", self.config['path_solving_timeout'])

        solver = Solver()

        for smt in smt_files:
            logger.info('solving path {}'.format(relpath(smt)))

            try:
                path = z3.parse_smt2_file(smt)
            except:
                logger.warning('failed to parse {}'.format(smt))
                continue

            variables = [str(var) for var in get_vars(path)
                         if str(var).startswith('int!')
                         or str(var).startswith('long!')
                         or str(var).startswith('bool!')
                         or str(var).startswith('char!')
                         or str(var).startswith('reachable!')]

            try:
                outputs, choices, constants, reachable, original_available = parse_variables(variables)
            except:
                continue

            # name -> value list (parsed)
            oracle_constraints = dict()

            def str_to_int(s):
                return int(s)

            def str_to_long(s):
                return int(s)

            def str_to_bool(s):
                if s == 'false':
                    return False
                if s == 'true':
                    return True
                raise InferenceError()

            def str_to_char(s):
                if len(s) != 1:
                    raise InferenceError()
                return s[0]

            dump_parser_by_type = dict()
            dump_parser_by_type['int'] = str_to_int
            dump_parser_by_type['long'] = str_to_long
            dump_parser_by_type['bool'] = str_to_bool
            dump_parser_by_type['char'] = str_to_char

            def bool_to_bv(b):
                if b:
                    return BitVecVal(1, 32)
                else:
                    return BitVecVal(0, 32)

            def int_to_bv(i):
                return BitVecVal(i, 32)
            
            def long_to_bv(i):
                return BitVecVal(i, 64)

            def char_to_bv(c):
                return BitVecVal(ord(c), 32)

            to_bv_converter_by_type = dict()
            to_bv_converter_by_type['bool'] = bool_to_bv
            to_bv_converter_by_type['int'] = int_to_bv
            to_bv_converter_by_type['long'] = long_to_bv
            to_bv_converter_by_type['char'] = char_to_bv
            
            def bv_to_bool(bv):
                return bv.as_long() != 0

            def bv_to_int(bv):
                l = bv.as_long()
                if l >> 31 == 1:  # negative
                    l -= pow(2, 32)
                return l

            def bv_to_long(bv):
                l = bv.as_long()
                if l >> 63 == 1:  # negative
                    l -= pow(2, 64)
                return l

            def bv_to_char(bv):
                l = bv.as_long()
                return chr(l)

            from_bv_converter_by_type = dict()
            from_bv_converter_by_type['bool'] = bv_to_bool
            from_bv_converter_by_type['int'] = bv_to_int
            from_bv_converter_by_type['long'] = bv_to_long
            from_bv_converter_by_type['char'] = bv_to_char

            matching_path = True

            for expected_variable, expected_values in oracle.items():
                if expected_variable == 'reachable':
                    expected_reachable = set(expected_values)
                    if not (expected_reachable == reachable):
                        logger.info('labels \'{}\' executed while {} required'.format(
                            list(reachable),
                            list(expected_reachable)))
                        matching_path = False
                        break
                    continue
                if expected_variable not in outputs.keys():
                    outputs[expected_variable] = (None, 0)  # unconstraint does not mean wrong
                required_executions = len(expected_values)
                actual_executions = outputs[expected_variable][1]
                if required_executions != actual_executions:
                    logger.info('value \'{}\' executed {} times while {} required'.format(
                        expected_variable,
                        actual_executions,
                        required_executions))
                    matching_path = False
                    break
                oracle_constraints[expected_variable] = []
                for i in range(0, required_executions):
                    type = outputs[expected_variable][0]
                    try:
                        value = dump_parser_by_type[type](expected_values[i])
                    except:
                        logger.error('variable \'{}\' has incompatible type {}'.format(expected_variable,
                                                                                       type))
                        raise InferenceError()
                    oracle_constraints[expected_variable].append(value)

            if not matching_path:
                continue

            solver.reset()
            solver.add(path)

            def array_to_bv32(array):
                return Concat(Select(array, BitVecVal(3, 32)),
                              Select(array, BitVecVal(2, 32)),
                              Select(array, BitVecVal(1, 32)),
                              Select(array, BitVecVal(0, 32)))

            def array_to_bv64(array):
                return Concat(Select(array, BitVecVal(7, 32)),
                              Select(array, BitVecVal(6, 32)),
                              Select(array, BitVecVal(5, 32)),
                              Select(array, BitVecVal(4, 32)),
                              Select(array, BitVecVal(3, 32)),
                              Select(array, BitVecVal(2, 32)),
                              Select(array, BitVecVal(1, 32)),
                              Select(array, BitVecVal(0, 32)))

            def angelic_variable(type, expr, instance):
                pattern = '{}!choice!{}!{}!{}!{}!{}!angelic'
                s = pattern.format(type, expr[0], expr[1], expr[2], expr[3], instance)
                return Array(s, BitVecSort(32), BitVecSort(8))

            def original_variable(type, expr, instance):
                pattern = '{}!choice!{}!{}!{}!{}!{}!original'
                s = pattern.format(type, expr[0], expr[1], expr[2], expr[3], instance)
                return Array(s, BitVecSort(32), BitVecSort(8))

            def env_variable(expr, instance, name):
                pattern = 'int!choice!{}!{}!{}!{}!{}!env!{}'
                s = pattern.format(expr[0], expr[1], expr[2], expr[3], instance, name)
                return Array(s, BitVecSort(32), BitVecSort(8))

            def output_variable(type, name, instance):
                s = '{}!output!{}!{}'.format(type, name, instance)
                if type == 'long':
                    return Array(s, BitVecSort(32), BitVecSort(8))
                else:
                    return Array(s, BitVecSort(32), BitVecSort(8))

            def angelic_selector(expr, instance):
                s = 'angelic!{}!{}!{}!{}!{}'.format(expr[0], expr[1], expr[2], expr[3], instance)
                return BitVec(s, 32)

            def original_selector(expr, instance):
                s = 'original!{}!{}!{}!{}!{}'.format(expr[0], expr[1], expr[2], expr[3], instance)
                return BitVec(s, 32)

            def env_selector(expr, instance, name):
                s = 'env!{}!{}!{}!{}!{}!{}'.format(name, expr[0], expr[1], expr[2], expr[3], instance)
                return BitVec(s, 32)

            for name, values in oracle_constraints.items():
                type, _ = outputs[name]
                for i, value in enumerate(values):
                    array = output_variable(type, name, i)
                    bv_value = to_bv_converter_by_type[type](value)
                    if type == 'long':
                        solver.add(bv_value == array_to_bv64(array))
                    else:
                        solver.add(bv_value == array_to_bv32(array))
                    

            for (expr, item) in choices.items():
                type, instances, env = item
                for instance in range(0, instances):
                    selector = angelic_selector(expr, instance)
                    array = angelic_variable(type, expr, instance)
                    solver.add(selector == array_to_bv32(array))

                    selector = original_selector(expr, instance)
                    array = original_variable(type, expr, instance)
                    solver.add(selector == array_to_bv32(array))

                    for name in env:
                        selector = env_selector(expr, instance, name)
                        array = env_variable(expr, instance, name)
                        solver.add(selector == array_to_bv32(array))


            result = solver.check()
            if result != z3.sat:
                logger.info('UNSAT') # TODO: can be timeout
                continue
            model = solver.model()

            # expr -> (angelic * original * env) list
            angelic_path = dict()

            if os.path.exists(self.load[test]):
                shutil.rmtree(self.load[test])
            os.mkdir(self.load[test])

            for (expr, item) in choices.items():
                angelic_path[expr] = []
                type, instances, env = item
                
                expr_str = '{}-{}-{}-{}'.format(expr[0], expr[1], expr[2], expr[3])
                expression_dir = join(self.load[test], expr_str)
                if not os.path.exists(expression_dir):
                    os.mkdir(expression_dir)

                for instance in range(0, instances):
                    bv_angelic = model[angelic_selector(expr, instance)]
                    angelic = from_bv_converter_by_type[type](bv_angelic)
                    bv_original = model[original_selector(expr, instance)]
                    original = from_bv_converter_by_type[type](bv_original)
                    if original_available:
                        logger.info('expression {}[{}]: angelic = {}, original = {}'.format(expr,
                                                                                            instance,
                                                                                            angelic,
                                                                                            original))
                    else:
                        logger.info('expression {}[{}]: angelic = {}'.format(expr,
                                                                             instance,
                                                                             angelic))
                    env_values = dict()
                    for name in env:
                        bv_env = model[env_selector(expr, instance, name)]
                        value = from_bv_converter_by_type['int'](bv_env)
                        env_values[name] = value

                    if original_available:
                        angelic_path[expr].append((angelic, original, env_values))
                    else:
                        angelic_path[expr].append((angelic, None, env_values))

                    # Dump angelic path to dump folder
                    instance_file = join(expression_dir, str(instance))
                    with open(instance_file, 'w') as file:
                        if isinstance(angelic, bool):
                            if angelic:
                                file.write('1')
                            else:
                                file.write('0')
                        else:
                            file.write(str(angelic))
            

            # Run Tester to validate the dumped values
            validated = self.run_test(validation_project, test, load=self.load[test])
            if validated:
                angelic_paths.append(angelic_path)
            else:
                logger.info('spurious angelic path')

        if self.config['synthesis_bool_only']:
            angelic_paths = self._boolean_angelic_forest(angelic_paths)

        if self.config['max_angelic_paths'] is not None and \
           len(angelic_paths) > self.config['max_angelic_paths']:
            angelic_paths = self._reduce_angelic_forest(angelic_paths)
        else:
            logger.info('found {} angelic paths for test \'{}\''.format(len(angelic_paths), test))

        inference_end_time = time.time()
        inference_elapsed = inference_end_time - inference_start_time
        statistics.data['time']['inference'] += inference_elapsed

        iter_stat = dict()
        iter_stat['time'] = dict()
        iter_stat['time']['klee'] = klee_elapsed
        iter_stat['time']['inference'] = inference_elapsed
        iter_stat['paths'] = dict()
        iter_stat['paths']['explored'] = len(smt_files)
        iter_stat['paths']['angelic'] = len(angelic_paths)
        statistics.data['iterations']['klee'].append(iter_stat)
        statistics.save()

        return angelic_paths
    def __call__(self, angelic_forest):

        if type(angelic_forest) == str:
            # angelic_forest is a file
            shutil.copyfile(angelic_forest, self.angelic_forest_file)
        else:
            # angelic_forest is a data structure
            self.dump_angelic_forest(angelic_forest)

        dirpath = tempfile.mkdtemp()
        patch_file = join(dirpath, 'patch')
        config_file = join(dirpath, 'config.json')

        for level in self.config['synthesis_levels']:

            logger.info('synthesizing patch with component level \'{}\''.format(level))

            config = {
                "encodingConfig": {
                    "componentsMultipleOccurrences": True,
                    # better if false, if not enough primitive components, synthesis can fail
                    "phantomComponents": True,
                    "repairBooleanConst": False,
                    "repairIntegerConst": False,
                    "level": "linear"
                },
                "simplification": False,
                "reuseStructure": not self.config['semfix'],
                "spaceReduction": True,
                "componentLevel": level,
                "solverBound": 3,
                "solverTimeout": self.config['synthesis_timeout']
            }

            with open(config_file, 'w') as file:
                json.dump(config, file)

            if self.config['use_nsynth']:
                jar = os.environ['NSYNTH_JAR']
            else:
                if self.config['synthesis_other_solver'] is None:
                    jar = os.environ['SYNTHESIS_JAR']
                else:
                    jar = os.environ['SYNTHESIS_OTHER_JAR']


            if self.config['verbose']:
                stderr = None
            else:
                stderr = subprocess.DEVNULL

            args = [self.angelic_forest_file, self.extracted, patch_file, config_file]
            shutil.copyfile(config_file, "/angelix/config.json")
            if self.config['synthesis_other_solver'] is not None:
                 solverName = self.config['synthesis_other_solver']
                 args += [solverName]
                 if solverName == "Enum":
                     args += [os.environ['ENUM_SOLVER_PATH']]
                 elif solverName == "Symbolic":
                     args += [os.environ['SYMBOLIC_SOLVER_PATH']]
                 elif solverName == "CVC4":
                     args += [os.environ['CVC4_SOLVER_PATH']]
                 elif solverName == "Stoc":
                     args += [os.environ['STOC_SOLVER_PATH']]
                 else:
                     raise NameError("Not supported solver: "+solverName)
                 args += [os.environ["RESULT_BEAUTIFIER_PATH"]]
                 args += ["/angelix/additionalConfig.txt"]            
            synthesis_start_time = time.time()
            logger.info("-------")
            logger.info(args)            
            try:
                result = subprocess.check_output(['java', '-jar', jar] + args, stderr=stderr)
            except subprocess.CalledProcessError:
                logger.warning("synthesis returned non-zero code")
                continue
            finally:
                synthesis_end_time = time.time()
                synthesis_elapsed = synthesis_end_time - synthesis_start_time
                statistics.data['time']['synthesis'] += synthesis_elapsed
                iter_stat = dict()
                iter_stat['tests'] = len(angelic_forest)
                iter_stat['level'] = level
                iter_stat['time'] = synthesis_elapsed
                statistics.data['iterations']['synthesis'].append(iter_stat)
                statistics.save()

            if str(result, 'UTF-8').strip() == 'TIMEOUT':
                logger.warning('timeout when synthesizing fix')
            elif str(result, 'UTF-8').strip() == 'FAIL':
                logger.info('synthesis failed')
            elif str(result, 'UTF-8').strip() == 'SUCCESS':
                with open(patch_file) as file:
                    content = file.readlines()
                patch = dict()
                while len(content) > 0:
                    line = content.pop(0)
                    if len(line) == 0:
                        continue
                    expr = tuple(map(int, line.strip().split('-')))
                    def convert_to_c(s):
                        return s.replace('_LBRSQR_', '[').replace('_RBRSQR_', ']')
                    original = convert_to_c(content.pop(0).strip())
                    fixed = convert_to_c(content.pop(0).strip())
                    if self.config['semfix']:
                        logger.info('synthesized expression {}: {}'.format(expr, fixed))
                    else:
                        logger.info('fixing expression {}: {} ---> {}'.format(expr, original, fixed))
                    patch[expr] = fixed
                if len(patch) == 0:
                    logger.warn('patch contains no changes')
                    return None
                return patch
            else:
                raise Exception('result: ' + str(result, 'UTF-8'))

        shutil.rmtree(dirpath)

        return None