def _process(objs: List[Dict[str, Any]],
             inpt: str,
             dfs: bool = False) -> Tuple[List[InputValue], bool]:
    """
    Returns the next position to substitute and the possible substitutions based on a list of comparisons
    :param dfs:
    :param objs: The list of comparisons made during the execution.
    :param inpt: The input used for the execution.
    :return: the list of possible replacements and a flag if the last comparison was an assert
    """
    # the sole input comparisons for those functions that need it
    input_comparisons = [
        obj for obj in objs if obj["type"] == "INPUT_COMPARISON"
    ]

    if not objs:
        # FIXME return proper list of corrections
        return [], False  # [InputValue(0, 0, random.choice(string.printable), "undef", inpt)]

    # get the last comparison made, then extract the character index.
    last_comparison = _get_last_comparison(objs)
    if not last_comparison:
        # raise ValueError("The extraction of the last made comparison returned incorrectly.")
        return [], False
        # return _construct_correction_list([random.choice(Utils.continuations)], dfs, inpt, Utils.min_index + len(inpt) - 1)
    # input_len = last_comparison["length"]
    Utils.max_index = last_comparison['index'][0]
    # if we can prune this branch of the search space, we do so
    # if Pruning.is_pruned(objs, input_comparisons, Utils.max_index):
    #    return []
    # if input_len <= Utils.max_index:  # EOF -- generate more data
    #     # FIXME return proper list of corrections
    #     return [InputValue(Utils.max_index, Utils.min_index, random.choice(string.printable), "undef", inpt)]

    # First, get all token links, then get all the comparisons on the index of last character comparison
    cmps = _get_comparisons_on_idx(input_comparisons, Utils.max_index)
    ParsingStageExtractor.extract_stages(objs)
    TokenLearningHandler.find_learning_patterns(objs)
    TokenHandler.iterate_objs(input_comparisons, inpt)

    corrections = _get_corrections(cmps, Utils.continuations)

    last_obj_entry = objs[-1]

    return _construct_correction_list(
        corrections, dfs, inpt, Utils.max_index
    ), last_obj_entry["type"] == "INPUT_COMPARISON" and last_obj_entry[
        "operator"] == "assert"
Exemple #2
0
 def get_corrections(self):
     """
     Returns the new input string based on parent and other encapsulated parameters.
     :return:
     """
     new_char = self.correction
     subst_index = self.at - self.min
     inp = self.inp
     # replace char of last comparison with new continuation
     inp = inp[:subst_index] + new_char
     # append a new char, it might be that the program expects additional input
     # also if the newchar is not a char but a string it is likely a keyword, so we better add a whitespace
     if self.operator == "strcmp":
         inp_rand_new = inp + " " + random.choice(Utils.continuations)
     # for token compares the "random" next char has to be a token itself, otw. a lexing error might occur
     # and we will not see a token comparison. Also between two tokens a whitespace should be allowed.
     elif self.operator == "tokencomp":
         # for token substitutions add a whitespace as this is in general used to separate tokens
         inp = self.inp[:subst_index] + " " + new_char if not self.inp[:subst_index].endswith(" ") else self.inp[:subst_index] + new_char
         inp_rand_new = inp + " " + TokenHandler.random_token() if not inp.endswith(" ") else inp + new_char
     elif self.operator == "strlen":
         inp_rand_new = inp
     else:
         inp_rand_new = inp + random.choice(Utils.continuations)
     if self.do_append:
         return inp, inp_rand_new
     else:
         return inp, inp
Exemple #3
0
def print_stats_file():
    """
    Report stats in stats file.
    """
    Utils.stats_file.seek(0)
    Utils.stats_file.write("String to Token Mapping:\n")
    Utils.stats_file.write(TokenHandler.print_tokenmap())
    Utils.stats_file.write("\nParsing Stage Mapping:\n")
    Utils.stats_file.write(ParsingStageExtractor.print_stages())
    Utils.stats_file.write("\n")
    Utils.stats_file.write(
        "Smallest in Heap: %s\n\tValue: %s\n\tStack: %d\n" %
        (str(Utils.inputs.heap[0].prio_value), Utils.inputs.heap[0].val,
         Utils.inputs.heap[0].val.stack_size) if Utils.inputs.heap else (
             "PrioQueue is currently empty.", ""))
    Utils.stats_file.write("\n")
    Utils.stats_file.write(
        "Number of executions: %d\nApprox. Search Space Size: %d\nExecuted since last found: %d\nRuntime (seconds): %d\n"
        % (Utils.all_exec, len(Utils.inputs), Utils.current_iteration,
           time.time() - Utils.starttime))
    Utils.stats_file.flush()
Exemple #4
0
def print_afl():
    """
    Prints the dictionary and seed to the afl folder. Mock values given as test and dict input if nothing was created.
    :return:
    """
    os.makedirs(os.path.join(g_parentdir, "afl", "dict"), exist_ok=True)
    os.makedirs(os.path.join(g_parentdir, "afl", "tests"), exist_ok=True)
    dict_counter = 0
    tokens = TokenHandler.tokens()
    if not tokens:
        with open(
                os.path.join(g_parentdir, "afl", "dict",
                             "entry%d" % dict_counter), "w") as afl_dict:
            afl_dict.write(" ")
            dict_counter += 1
    for entry in tokens:
        with open(
                os.path.join(g_parentdir, "afl", "dict",
                             "entry%d" % dict_counter), "w") as afl_dict:
            afl_dict.write(entry)
            dict_counter += 1
    # create an empty test if no seeds are available to fulfill the requirement of afl to have at least one valid test
    test_counter = 0
    if not Utils.seed_for_afl:
        with open(
                os.path.join(g_parentdir, "afl", "tests",
                             "test%d" % test_counter), "w") as test_file:
            test_file.write(" ")
            test_counter += 1
    for val in Utils.seed_for_afl:
        with open(
                os.path.join(g_parentdir, "afl", "tests",
                             "test%d" % test_counter), "w") as test_file:
            # avoid inputs that cause the afl instrumented version of lisp to crash
            if not val.startswith(" ( # "):
                test_file.write(val)
                test_counter += 1
Exemple #5
0
def _add_values_to_queue(h_value,
                         inpt_counter,
                         new_covered,
                         values,
                         parent_input: "InputWrapper",
                         re_evaluation: bool = False):
    """
    The actual addition code.
    :param h_value:
    :param inpt_counter:
    :param new_covered:
    :param values:
    :return:
    """
    if h_value.cover_counter[0] >= 0:
        for val in values:
            # delete possible random continuations which are possibly part of the program input
            if val.correction in Utils.continuations:
                Utils.continuations.remove(val.correction)
            # TODO for the moment refill continuations if it runs empty, in future use a wider range of cont.
            if not Utils.continuations:
                Utils.continuations = [i for i in string.printable]
            inpt_counter += 1
            # add for each substitution and element to the prio queue which will be inserted based on the
            # heuristic value of the substitution
            new_h_value: HeuristicValue = h_value.clone()
            new_h_value.set_input_counter_adapt_value(
                parent_input.prio_value.input_counter +
                1 if parent_input else 0)
            # adjustement of the heuristic value based on the operator used and the stacksize
            # operator_adjustment = 10000 - (val.stack_size * 100) if val.operator == "tokencomp" and val.stack_size > 0 and h_value.cover_counter[0] > 0 else 0

            # adjustement of the heuristic value based on the token used
            # only used if a certain amount of new basic blocks is covered
            token_usage_adjustement = 100 * len(
                val.correction
            ) if val.operator == "tokencomp" and h_value.cover_counter[
                0] <= individual_correction_threshold and not any(
                    val.correction in value
                    for value in Utils.valid_found) else 0
            if val.operator != "tokencomp":
                new_h_value.adjust_value(len(val.correction) * 2)
            else:
                new_h_value.adjust_value(1 + token_usage_adjustement)

            if val.operator == "tokencomp" and val.stack_size > 0 and h_value.cover_counter[
                    0] > individual_correction_threshold and not re_evaluation and val.do_append:
                new_h_value.set_individual_correction(
                    TokenHandler.get_tokencompare_stack_tuple(
                        val.at, val.correction, val.stack_size, val.id,
                        h_value.cover_counter[0]))
            # if we want the value to be re-evaluated (the do_append flag is False), we have to give it a very good heuristic value
            elif not val.do_append:
                new_h_value.set_individual_correction(((-1, 0), ))
                new_h_value.same_path_taken = 0.0
            Utils.inputs.push(
                InputWrapper(new_h_value, inpt_counter, new_covered, val))
    # else:
    #     for val in values:
    #         if val.correction in Utils.continuations:
    #             Utils.continuations.remove(val.correction)
    #         # TODO for the moment refill continuations if it runs empty, in future use a wider range of cont.
    #         if not Utils.continuations:
    #             Utils.continuations = [i for i in string.printable]
    #         inpt_counter += 1
    #         # add for each substitution and element to the prio queue which will be inserted based on the
    #         # heuristic value of the substitution
    #         stack_val = Utils.map_char_stack.get(val.correction)
    #         if stack_val is not None:
    #             new_h_value = h_value.clone()
    #             new_h_value.adjust_value(-stack_val * len(val.inp))
    #             Utils.inputs.push(InputWrapper(new_h_value, inpt_counter, new_covered, val))
    #         else:
    #             Utils.inputs.push(InputWrapper(h_value.clone(), inpt_counter, new_covered, val))
    return inpt_counter
def _get_corrections(cmp_stack: List[Any], local_continuations: List[str]):
    """
    Returns the possible substitutions for the character at index at_idx
    :param cmp_stack: The comparisons made on the last character.
    :param local_continuations: The possible characters to insert if there is no useful continuation
    :return:
    """
    chars = set()
    tok_comp_values = set()
    # the index at which the same token occurred
    # in general the value to substitute should not be correct because we might have missed some comparisons
    # TODO maybe we should remember the last set of found tokcomp values, if the last set was the same, we ignore the flag
    found_token_same_index = None
    for char in cmp_stack:
        if char["operator"] == "switch":
            chars |= _construct_continuation_set(char["operand"], char)
            TokenLearningHandler.add_rhs(char["operand"])

        elif char["operator"] == "strlen":
            length = int(char["operand"][0])
            TokenLearningHandler.add_rhs(
                ["".join(["a" for _ in range(0, length)])])
            chars |= _construct_continuation_set(
                ["".join(["a" for _ in range(0, length)])], char)

        elif char["operator"] == "conversion":
            pos_subst = ConversionHandler.get_possible_substitutions(
                char["operand"][0])
            TokenLearningHandler.add_rhs(pos_subst)
            chars |= _construct_continuation_set(pos_subst, char)

        elif char["operator"] == "tokencomp":
            # check if really the largest tokencomparison is used for calculating a substitution
            # if TokenHandler.is_largest_token(char["index"][0]):
            tok_comp_values.add(int(char["operand"][0]))
            pos_subst = set()
            if TokenHandler.get_majority_token(char["index"][0]) == int(
                    char["operand"][0]):
                found_token_same_index = char
            # only use tokencomps for which the majority vote and the lhs value are the same (so those which are likely the actual token comparisons)
            if TokenHandler.get_majority_token(char["index"][0]) == int(
                    char["value"]):
                pos_subst = TokenHandler.get_possible_substitutions(
                    char["operand"][0], char["stack"])
            # for tokencomp we might need to correct lookaheads, thus the index of char is corrected to Utils.max_index
            if char["index"][0] > Utils.max_index:
                char["index"][0] = Utils.max_index
            if pos_subst:
                chars |= _construct_continuation_set(pos_subst, char)

        elif char["operator"] == "tokenstore" or char["operator"] == "assert":
            pass

        else:
            # for smaller and greater the exact range is not defined, so we better learn only the first value in the range
            if ">" in char["operator"]:
                TokenLearningHandler.add_rhs(char["operand"][0])
            elif "<" in char["operator"]:
                TokenLearningHandler.add_rhs(char["operand"][-1])
            else:
                TokenLearningHandler.add_rhs(char["operand"])

            chars |= _construct_continuation_set(
                [random.choice(char["operand"])], char)
    if not chars:
        # for cont in local_continuations:
        #     chars.add(("random"))
        return list()
    if found_token_same_index is not None:
        diff_token = TokenHandler.get_different_correct_token(tok_comp_values)
        if diff_token:
            found_token_same_index["stack"] = [
                "re-eval"
            ]  # reduce stack size to rank re-calculation higher
            return list(
                _construct_continuation_set(diff_token, found_token_same_index,
                                            False))
        else:
            return list(chars)
    return list(chars)