예제 #1
0
def SearchInputs(symbolicTracer, simpleTracer, initialSeedDict, isTraining):
    # Init the worklist with the initial seed dict
    worklist  = RiverUtils.InputsWorklist()
    forceFinish = False

    # Put all the the inputs in the seed dictionary to the worklist
    for initialSeed in initialSeedDict:
        inp = RiverUtils.InputRLGenerational()
        inp.buffer = {k: v for k, v in enumerate(initialSeed)}
        inp.bound = 0
        inp.priority = 0

        worklist.addInput(inp)

    startTime = currTime = time.time()
    while worklist:
        # Take the first seed
        inputSeed : RiverUtils.Input = worklist.extractInput()

        # If the input was just estimated, solve it now !
        if inputSeed.buffer == None:
            actionTaken = inputSeed.action
            bbPathInParent = inputSeed.BBPathInParentPC
            inputSeed, targetFound, score, newBasicBlockPath = solveLazyInput(inputSeed, symbolicTracer, simpleTracer, isTraining)

            RLBanditsModuleInstance.addExperience(bb_path_state=bbPathInParent, action=actionTaken, realValue=score if score != None else NEGATIVE_ACTION_SCORE)

            if targetFound:
                logging.critical(f"The solution to get to the target address is input {inputSeed}")
                # TODO: Bogdan put this in output somewhere , folder, visualization etc.
                forceFinish = True  # No reason to continue...
                break

            if inputSeed == None:
                continue

        newInputs = Expand(symbolicTracer, inputSeed)

        for newInp in newInputs:
            """
            # Assign this input a priority, and check if the hacked target address was found or not
            targetFound, newInp.priority = ScoreInput(newInp, symbolicTracer, simpleTracer)
            """
            # Then put it in the worklist
            worklist.addInput(newInp)

        currTime = outputStats.UpdateOutputStats(startTime, currTime, collectorTracers=[simpleTracer])

        if forceFinish:
            break

    currTime = outputStats.UpdateOutputStats(startTime, currTime, collectorTracers=[simpleTracer], forceOutput=True)
def SearchInputs(symbolicTracer, simpleTracer, initialSeedDict):
    # Init the worklist with the initial seed dict
    worklist = RiverUtils.InputsWorklist()
    forceFinish = False

    # Put all the the inputs in the seed dictionary to the worklist
    for initialSeed in initialSeedDict:
        inp = RiverUtils.Input()
        inp.buffer = {k: v for k, v in enumerate(initialSeed)}
        inp.bound = 0
        inp.priority = 0
        worklist.addInput(inp)

    startTime = currTime = time.time()
    while worklist:
        # Take the first seed
        inputSeed: RiverUtils.Input = worklist.extractInput()
        newInputs = Expand(symbolicTracer, inputSeed)

        for newInp in newInputs:
            # Execute the input to detect real issues with it
            ExecuteInputToDetectIssues(newInp)

            # Assign this input a priority, and check if the hacked target address was found or not
            targetFound, newInp.priority = ScoreInput(newInp, simpleTracer)

            if targetFound:
                logging.critical(
                    f"The solution to get to the target address is input {newInp}"
                )
                # TODO: Bogdan put this in output somewhere , folder, visualization etc.
                forceFinish = True  # No reason to continue...
                break

            # Then put it in the worklist
            worklist.addInput(newInp)

        currTime = outputStats.UpdateOutputStats(
            startTime, currTime, collectorTracers=[simpleTracer])

        if forceFinish:
            break

    currTime = outputStats.UpdateOutputStats(startTime,
                                             currTime,
                                             collectorTracers=[simpleTracer],
                                             forceOutput=True)
예제 #3
0
    def reset(self):
        self.input: RiverUtils.Input = RiverUtils.Input()  # Array of bytes
        self.input.usePlainBuffer = True
        self.input.buffer = [
            0
        ] * self.args.maxLen  # np.zeros(shape=(self.args.maxLen,), dtype=np.byte)
        self.tracer.ResetMem()
        self.tracer.resetPersistentState()

        obs = self.fill_observation()
        return obs
예제 #4
0
def testBaseEnvironment():
    # Step 0: make environment
    args = RiverUtils.parseArgs()
    # r = RiverBinaryFuzzerBase(args)
    env = gym.make('RiverBinaryFuzzerBase-v0', args=args)

    folderPath = "./corpus"
    env.setCorpusSeed(folderPath)

    # Random policy action for testing purposes
    done = False
    numEpisodes = 10
    numMaxStepsPerEpisode = 100
    for epIndex in tqdm(range(numEpisodes)):
        obs = env.reset()
        stepIndex = 0
        totalReward = 0
        while not done and stepIndex < numMaxStepsPerEpisode:
            # Choose an action
            isSymbolic = False
            if np.random.rand() < 0.25:
                isSymbolic = True
            actionIndex = np.random.choice(len(
                RiverUtils.Input.actionFunctors))
            action = (actionIndex, {
                'isSymbolic': isSymbolic
            })  # Action index, context and parameters
            # print(f"Action applied: {action}")
            obs, reward, newObs, done, info = env.step(action)
            stepIndex += 1
            totalReward += reward

            if stepIndex % 20 == 0:
                print(
                    f"Episode: {epIndex} step: {stepIndex} partial reward: {totalReward}"
                )

        if epIndex % 1 == 0:
            print(f"Episode: {epIndex} totalReward:{totalReward}")

        done = done
예제 #5
0
def Expand(symbolicTracer: RiverTracer, inputToTry):
    logging.info(f"Seed injected:, {inputToTry}")

    targetAddressFound, numNewBasicBlocks, basicBlocksPathFoundThisRun = symbolicTracer.runInput(
        inputToTry, symbolized=True, countBBlocks=False)

    # Set of new inputs
    inputs: List[RiverUtils.Input] = []

    # Get path constraints from the last execution
    PathConstraints = symbolicTracer.getLastRunPathConstraints()

    # Get the astContext
    astCtxt = symbolicTracer.getAstContext()

    # This represents the current path constraint, dummy initialization
    currentPathConstraint = astCtxt.equal(astCtxt.bvtrue(), astCtxt.bvtrue())

    # Go through the path constraints from bound of the input (to prevent backtracking as described in the paper)
    PCLen = len(PathConstraints)
    for pcIndex in range(inputToTry.bound, PCLen):
        pc = PathConstraints[pcIndex]

        # Get all branches
        branches = pc.getBranchConstraints()

        if RECONSTRUCT_BB_GRAPH:
            # Put all detected edges in the graph
            for branch in branches:
                onEdgeDetected(branch['srcAddr'], branch['dstAddr'])

        # If there is a condition on this path (not a direct jump), try to reverse it with a new input
        if pc.isMultipleBranches():
            takenAddress = pc.getTakenAddress()
            for branch in branches:
                # Get the constraint of the branch which has been not taken
                if branch['dstAddr'] != takenAddress:
                    #print(branch['constraint'])
                    #expr = astCtxt.unroll(branch['constraint'])
                    #expr = ctx.simplify(expr)

                    # Check if we can change current executed path with the branch changed
                    desiredConstrain = astCtxt.land(
                        [currentPathConstraint, branch['constraint']])

                    newInput = RiverUtils.InputRLGenerational()
                    newInput.buffer = None
                    newInput.buffer_parent = copy.deepcopy(inputToTry.buffer)
                    newInput.bound = inputToTry.bound + 1  # Same as for parent
                    newInput.PC = copy.copy(
                        PathConstraints
                    )  # Copy the path constraint of the parent input
                    newInput.BBPathInParentPC = copy.copy(
                        basicBlocksPathFoundThisRun)
                    newInput.constraint = desiredConstrain
                    newInput.action = pcIndex
                    newInput.priority = RLBanditsModuleInstance.predict(
                        newInput, basicBlocksPathFoundThisRun)
                    inputs.append(newInput)
                    """
                    changes = symbolicTracer.solveInputChangesForPath(desiredConstrain)

                    # Then, if a possible change was detected => create a new input entry and add it to the output list
                    if changes:
                        newInput = copy.deepcopy(inputToTry)
                        newInput.applyChanges(changes)
                        newInput.bound = pcIndex + 1
                        inputs.append(newInput)
                    """

        # Update the previous constraints with taken(true) branch to keep the same path initially taken
        currentPathConstraint = astCtxt.land(
            [currentPathConstraint,
             pc.getTakenPredicate()])

    # Clear the path constraints to be clean at the next execution.
    symbolicTracer.resetLastRunPathConstraints()

    return inputs
예제 #6
0
def ScoreInput(newInp: RiverUtils.Input, symbolicTracer: RiverTracer,
               simpleTracer: RiverTracer):
    logging.info(f"--Scoring input {newInp}")
    targetFound, numNewBlocks, allBBsInThisRun = simpleTracer.runInput(
        newInp, symbolized=False, countBBlocks=True)

    # Here we come with the various REWARD FUNCTIONS implementations
    # Locally we do only new basic blocks counting as debug
    score = numNewBlocks

    return targetFound, score, allBBsInThisRun  # as default, return the bound...


if __name__ == '__main__':

    args = RiverUtils.parseArgs()

    # Create two tracers : one symbolic used for detecting path constraints etc, and another one less heavy used only for tracing and scoring purpose
    symbolicTracer = RiverTracer(symbolized=True,
                                 architecture=args.architecture,
                                 maxInputSize=args.maxLen,
                                 targetAddressToReach=args.targetAddress)
    simpleTracer = RiverTracer(symbolized=True,
                               architecture=args.architecture,
                               maxInputSize=args.maxLen,
                               targetAddressToReach=args.targetAddress)

    # Load the binary info into the given list of tracers. We do this strage API to load only once the binary...
    RiverTracer.loadBinary([symbolicTracer, simpleTracer], args.binaryPath,
                           args.entryfuncName)
    if args.outputType == "textual":
예제 #7
0
if __name__ == "__main__":
    rlBandits = RLBanditsModule()

    def randomExp():
        state = list(np.random.choice(10, size=np.random.randint(10)))
        action = np.random.randint(10)
        realValue = np.random.rand()
        return state, action, realValue

    # Create random experiences to trigger a training
    for expIndex in range(Batch_size):
        rlBandits.addExperience(*randomExp())

    # Predict for some values
    state, action, realValue = randomExp()
    input = RiverUtils.InputRLGenerational()
    input.action = action
    print(rlBandits.predict(input, state))
    """
	input_state = tf.random.uniform(shape=(Batch_size, 11))  # 11 is variable, can be anything !
	input_action = tf.random.uniform(shape=(Batch_size,), minval=0, maxval=action_emb_size, dtype=tf.int32)
	output_value = tf.random.uniform(shape=(Batch_size,), dtype=tf.float32)

	model = buildRLConcolicModel(Batch_size, BBlock_emb_size, action_emb_size)

	model.train(dataSet_X_state=input_state, dataSet_X_actions=input_action,
				dataSet_Y=output_value, epochs=numEpochs)

	outputs = model(input_state, input_action, training=False)  # model(input_state, input_action) #m({'state':input_state, 'action':input_action})
	"""
예제 #8
0
        ctx.setConcreteMemoryAreaValue(vaddr, phdr.content)

    logging.info(f"Findind the exported function of interest {path}..")
    res = binary.exported_functions
    for function in res:
        if ENTRY_FUNC_NAME in function.name:
            ENTRY = function.address
            logging.info(f"Function of interest found at address {ENTRY}")
            break

    assert ENTRY != None, "Exported function wasn't found"


if __name__ == '__main__':

    args = RiverUtils.parseArgs()

    ctx = TritonContext(args.architecture)

    # Define symbolic optimizations
    ctx.setMode(MODE.ALIGNED_MEMORY, True)
    ctx.setMode(MODE.ONLY_ON_SYMBOLIZED, True)  # ????

    # Load the binary
    loadBinary(ctx, args.binaryPath)

    # This is a list of inputs tested before.. # TODO: remove this after impl SAGE or other techiques...
    inputsAlreadyTried = list()

    # TODO: currently random input first with random size between allowed regions
    # use corpus and dictionary
예제 #9
0
def testCustomPNGEnvironment():
    # Step 0: make environment
    args = RiverUtils.parseArgs()
    # r = RiverBinaryFuzzerBase(args)
    env = gym.make('RiverBinaryCustomForLibPNGEnv-v0', args=args)

    folderPath = "./corpus"
    env.setCorpusSeed(folderPath)

    # Random policy action for testing purposes
    done = False
    numEpisodes = 10
    numMaxStepsPerEpisode = 100
    isInputAlreadyModifiedBySolver = False  # If true, it means that SMT solver changed the input buffer so skip one step from your action ideally !

    for epIndex in tqdm(range(numEpisodes)):
        obs = env.reset()
        stepIndex = 0
        totalReward = 0

        # Keep counter if the reward has modified or not in the last T steps
        prev_reward = 0
        num_stepsSameReward = 0
        stepsSameReward_thresholdForSymbolic = 2  # At how many steps to apply symbolic stuff if things stagnate

        while not done and stepIndex < numMaxStepsPerEpisode:
            # Choose an action
            isSymbolicStep = False

            # Check if blocked in local optima
            if num_stepsSameReward >= stepsSameReward_thresholdForSymbolic:
                # Apply symbolic tracing ?
                if np.random.rand() < 0.5:
                    #print(f"Applying a symbolic step at step {stepIndex}!")
                    num_stepsSameReward = 0
                    isSymbolicStep = True

            # If solver modified this last frame do not take any action in this step
            if isInputAlreadyModifiedBySolver == True:
                actionIndex = RiverUtils.Input.NO_ACTION_INDEX
                isInputAlreadyModifiedBySolver = False
            else:
                actionIndex = np.random.choice(
                    len(RiverUtils.Input.actionFunctors))

            action = (actionIndex, {
                'isSymbolic': isSymbolicStep
            })  # Action index, context and parameters
            #print(f"Action applied: {action}")
            obs, reward, newObs, done, info = env.step(action)
            stepIndex += 1
            totalReward += reward

            # If a symbolic step was used, modify randomly one of the blocks to get a new unexplored branch, or maybe a low probability one
            if isSymbolicStep:
                # Take the last path constraints from the symbolic execution step
                pathConstraints = info['lastPathConstraints']

                # Modify the input buffer to take a different bramnch, will return True if found
                succeeded = env.symbolicTakeUntakenBranch(pathConstraints)

                if succeeded:
                    isInputAlreadyModifiedBySolver = True

                # Very important to call this to reset the last execution state of the symbolic tracer in the end !
                # OTHERWISE CONDITIONS WILL PROPAGATE BETWEEN RUNS !
                env.resetTracerSymbolicState()

            if prev_reward == totalReward:
                num_stepsSameReward += 1

            if stepIndex % 20 == 0:
                print(
                    f"Episode: {epIndex} step: {stepIndex} partial reward: {totalReward}"
                )

            prev_reward = totalReward

        if epIndex % 1 == 0:
            print(f"Episode: {epIndex} totalReward:{totalReward}")

        done = done
예제 #10
0
 def setCorpusSeed(self, path):
     initialInputCorpus = ["good"]  # TODO: load from folder path
     self.corpus = RiverUtils.processSeedDict(
         initialInputCorpus
     )  # Transform the initial seed dict to bytes instead of chars if needed