Example #1
0
    def ActionStep(self, action: int) -> random_opt_pb2.DelayedRewardStep:
        if not self.action_space.contains(action):
            raise ValueError(f"Unknown action: '{action}'")
        start_ms = labdate.MillisecondsTimestamp()
        obs = self.observation_space.sample()
        opt_pass = self.config.candidate_pass[action]

        step = random_opt_pb2.DelayedRewardStep(
            start_time_epoch_ms=start_ms,
            opt_pass=opt_pass,
        )

        # Run the full list of passes and update working_bytecode file.
        try:
            all_passes = [step.opt_pass for step in self.episodes[-1].step[1:]]
            opt.RunOptPassOnBytecode(self.bytecode_path,
                                     self.working_dir / "temp.ll", all_passes)
            step.bytecode_changed = BytecodesAreEqual(
                self.working_dir / "temp.ll", self.working_bytecode_path)
            shutil.copyfile(self.working_dir / "temp.ll",
                            self.working_bytecode_path)
            step.reward = (self.bytecode_changed_reward
                           if step.bytecode_changed else
                           self.bytecode_unchanged_reward)
        except llvm.LlvmError as e:
            # Opt failed, set the error message.
            step.reward = self.opt_failed_reward
            step.opt_error_msg = text.truncate(str(e), 255)

        step.total_step_runtime_ms = labdate.MillisecondsTimestamp() - start_ms
        self.episodes[-1].step.extend([step])
        return obs, step.reward, False, {}
Example #2
0
    def EndEpisodeStep(self) -> random_opt_pb2.DelayedRewardStep:
        start_ms = labdate.MillisecondsTimestamp()
        step = random_opt_pb2.DelayedRewardStep(start_time_epoch_ms=start_ms, )
        try:
            clang.Compile([self.working_bytecode_path],
                          self.binary_path,
                          copts=["-O0"])
            try:
                runtimes = self.GetRuntimes()
                self.episodes[-1].binary_runtime_ms.extend(runtimes)
                if self.BinaryIsValid():
                    step.reward = self.runtime_reward(
                        sum(runtimes) / len(runtimes))
                else:
                    self.episodes[
                        -1].outcome = random_opt_pb2.DelayedRewardEpisode.EVAL_FAILED
                    step.reward = self.eval_failed_reward
            except ValueError as e:
                self.episodes[-1].outcome = random_opt_pb2.Step.EXEC_FAILED
                self.episodes[-1].outcome_error_msg = text.truncate(
                    str(e), 255)
                step.reward = self.exec_failed_reward
        except clang.ClangException as e:
            self.episodes[
                -1].outcome = random_opt_pb2.DelayedRewardEpisode.COMPILE_FAILED
            self.episodes[-1].outcome_error_msg = text.truncate(str(e), 255)
            step.reward = self.compile_failed_reward

        obs = self.observation_space.sample()
        step.total_step_runtime_ms = labdate.MillisecondsTimestamp() - start_ms
        self.episodes[-1].step.extend([step])
        return obs, step.reward, True, {}
Example #3
0
 def reset(self):
     """Reset the environment state."""
     app.Log(2, "$ cp %s %s", self.bytecode_path,
             self.working_bytecode_path)
     shutil.copyfile(self.bytecode_path, self.working_bytecode_path)
     clang.Compile([self.bytecode_path], self.binary_path, copts=["-O0"])
     self.RunSetupCommand()
     self.episodes.append(
         random_opt_pb2.DelayedRewardEpisode(step=[
             random_opt_pb2.DelayedRewardStep(
                 start_time_epoch_ms=labdate.MillisecondsTimestamp(), )
         ]))