Esempio n. 1
0
    def wait(self) -> None:
        if self.wait_for_running:
            wait(self.is_running)
            self.logger.info("tasks started")

        if self.to_monitor:
            wait(self.has_files)
            self.logger.info("new files found")
Esempio n. 2
0
 def check_repro(self) -> bool:
     self.logger.info("checking repros")
     return wait(self.check_repro_impl)
Esempio n. 3
0
 def check_jobs(self) -> bool:
     """ Check all of the integration jobs """
     self.logger.info("checking jobs")
     return wait(self.check_jobs_impl)
Esempio n. 4
0
    def check_logs_for_errors(self) -> None:
        # only check for errors that exist between the start and stop markers
        # also, only check for the most recent 100 errors within the last 3
        # hours. The records are scanned through in reverse chronological
        # order.

        self.inject_log(self.stop_log_marker)
        wait(self.check_log_end_marker, frequency=5.0)
        self.logger.info("application insights log flushed")

        logs = self.of.debug.logs.keyword("error",
                                          limit=100000,
                                          timespan="PT3H")

        seen_errors = False
        seen_stop = False
        for entry in logs:
            message = entry.get("message", "")
            if not seen_stop:
                if self.stop_log_marker in message:
                    seen_stop = True
                continue

            if self.start_log_marker in message:
                break

            # ignore logging.info coming from Azure Functions
            if entry.get("customDimensions",
                         {}).get("LogLevel") == "Information":
                continue

            # ignore warnings coming from the rust code, only be concerned
            # about errors
            if (entry.get("severityLevel") == 2
                    and entry.get("sdkVersion") == "rust:0.1.5"):
                continue

            # ignore resource not found warnings from azure-functions layer,
            # which relate to azure-retry issues
            if (message.startswith("Client-Request-ID=")
                    and "ResourceNotFound" in message and entry.get(
                        "sdkVersion", "").startswith("azurefunctions")):
                continue

            # ignore analyzer output, as we can't control what applications
            # being fuzzed send to stdout or stderr. (most importantly, cdb
            # prints "Symbol Loading Error Summary")
            if message.startswith(
                    "process (stdout) analyzer:") or message.startswith(
                        "process (stderr) analyzer:"):
                continue

            # TODO: ignore queue errors until tasks are shut down before
            # deleting queues https://github.com/microsoft/onefuzz/issues/141
            if ("storage queue pop failed" in message
                    or "storage queue delete failed"
                    in message) and entry.get("sdkVersion") == "rust:0.1.5":
                continue

            if message is None:
                self.logger.error("error log: %s", entry)
            else:
                self.logger.error("error log: %s", message)
            seen_errors = True

        if seen_errors:
            raise Exception("logs included errors")
Esempio n. 5
0
    def check_repro(self, repros: Dict[UUID, Tuple[Job, Repro]]) -> bool:
        self.logger.info("checking repros")
        self.success = True

        def check_repro_impl() -> Tuple[bool, str, bool]:
            # check all of the launched repros

            self.cleared = False

            def clear() -> None:
                if not self.cleared:
                    self.cleared = True
                    print("")

            commands: Dict[OS, Tuple[str, str]] = {
                OS.windows: ("r rip", r"^rip=[a-f0-9]{16}"),
                OS.linux:
                ("info reg rip", r"^rip\s+0x[a-f0-9]+\s+0x[a-f0-9]+"),
            }

            for (job, repro) in list(repros.values()):
                repros[job.job_id] = (job, self.of.repro.get(repro.vm_id))

            for (job, repro) in list(repros.values()):
                if repro.error:
                    clear()
                    self.logger.error(
                        "repro failed: %s: %s",
                        job.config.name,
                        repro.error,
                    )
                    self.of.repro.delete(repro.vm_id)
                    del repros[job.job_id]
                elif repro.state == VmState.running:
                    try:
                        result = self.of.repro.connect(
                            repro.vm_id,
                            delete_after_use=True,
                            debug_command=commands[repro.os][0],
                        )
                        if result is not None and re.search(
                                commands[repro.os][1], result, re.MULTILINE):
                            clear()
                            self.logger.info("repro succeeded: %s",
                                             job.config.name)
                        else:
                            clear()
                            self.logger.error("repro failed: %s - %s",
                                              job.config.name, result)
                    except Exception as err:
                        clear()
                        self.logger.error("repro failed: %s - %s",
                                          job.config.name, err)
                    del repros[job.job_id]
                elif repro.state not in [
                        VmState.init, VmState.extensions_launch
                ]:
                    self.logger.error("repro failed: %s - bad state: %s",
                                      job.config.name, repro.state)
                    del repros[job.job_id]

            repro_states: Dict[str, List[str]] = {}
            for (job, repro) in repros.values():
                if repro.state.name not in repro_states:
                    repro_states[repro.state.name] = []
                repro_states[repro.state.name].append(job.config.name)

            logline = []
            for state in repro_states:
                logline.append("%s:%s" %
                               (state, ",".join(repro_states[state])))

            msg = "waiting repro: %s" % " ".join(logline)
            if len(msg) > 80:
                msg = "waiting on %d repros" % len(repros)
            return (not bool(repros), msg, self.success)

        return wait(check_repro_impl)
Esempio n. 6
0
    def check_jobs(self,
                   poll: bool = False,
                   stop_on_complete_check: bool = False) -> bool:
        """ Check all of the integration jobs """
        jobs: Dict[UUID, Job] = {x.job_id: x for x in self.get_jobs()}
        job_tasks: Dict[UUID, List[Task]] = {}
        check_containers: Dict[UUID, Dict[Container, Tuple[ContainerWrapper,
                                                           int]]] = {}

        for job in jobs.values():
            if job.config.name not in TARGETS:
                self.logger.error("unknown job target: %s", job.config.name)
                continue

            tasks = self.of.jobs.tasks.list(job.job_id)
            job_tasks[job.job_id] = tasks
            check_containers[job.job_id] = {}
            for task in tasks:
                for container in task.config.containers:
                    if container.type in TARGETS[
                            job.config.name].wait_for_files:
                        count = TARGETS[job.config.name].wait_for_files[
                            container.type]
                        check_containers[job.job_id][container.name] = (
                            ContainerWrapper(
                                self.of.containers.get(
                                    container.name).sas_url),
                            count,
                        )

        self.success = True
        self.logger.info("checking %d jobs", len(jobs))

        self.cleared = False

        def clear() -> None:
            if not self.cleared:
                self.cleared = True
                if poll:
                    print("")

        def check_jobs_impl() -> Tuple[bool, str, bool]:
            self.cleared = False
            failed_jobs: Set[UUID] = set()
            job_task_states: Dict[UUID, Set[TaskTestState]] = {}

            for job_id in check_containers:
                finished_containers: Set[Container] = set()
                for (container_name,
                     container_impl) in check_containers[job_id].items():
                    container_client, count = container_impl
                    if len(container_client.list_blobs()) >= count:
                        clear()
                        self.logger.info(
                            "found files for %s - %s",
                            jobs[job_id].config.name,
                            container_name,
                        )
                        finished_containers.add(container_name)

                for container_name in finished_containers:
                    del check_containers[job_id][container_name]

            scalesets = self.of.scalesets.list()
            for job_id in job_tasks:
                finished_tasks: Set[UUID] = set()
                job_task_states[job_id] = set()

                for task in job_tasks[job_id]:
                    if job_id not in jobs:
                        continue

                    task_result = self.check_task(jobs[job_id], task,
                                                  scalesets)
                    if task_result == TaskTestState.failed:
                        self.success = False
                        failed_jobs.add(job_id)
                    elif task_result == TaskTestState.stopped:
                        finished_tasks.add(task.task_id)
                    else:
                        job_task_states[job_id].add(task_result)
                job_tasks[job_id] = [
                    x for x in job_tasks[job_id]
                    if x.task_id not in finished_tasks
                ]

            to_remove: Set[UUID] = set()
            for job in jobs.values():
                # stop tracking failed jobs
                if job.job_id in failed_jobs:
                    if job.job_id in check_containers:
                        del check_containers[job.job_id]
                    if job.job_id in job_tasks:
                        del job_tasks[job.job_id]
                    continue

                # stop checking containers once all the containers for the job
                # have checked out.
                if job.job_id in check_containers:
                    if not check_containers[job.job_id]:
                        clear()
                        self.logger.info(
                            "found files in all containers for %s",
                            job.config.name)
                        del check_containers[job.job_id]

                if job.job_id not in check_containers:
                    if job.job_id in job_task_states:
                        if set([TaskTestState.running
                                ]).issuperset(job_task_states[job.job_id]):
                            del job_tasks[job.job_id]

                if job.job_id not in job_tasks and job.job_id not in check_containers:
                    clear()
                    self.logger.info("%s completed", job.config.name)
                    to_remove.add(job.job_id)

            for job_id in to_remove:
                if stop_on_complete_check:
                    self.stop_job(jobs[job_id])
                del jobs[job_id]

            msg = "waiting on: %s" % ",".join(
                sorted(x.config.name for x in jobs.values()))
            if poll and len(msg) > 80:
                msg = "waiting on %d jobs" % len(jobs)

            if not jobs:
                msg = "done all tasks"

            return (not bool(jobs), msg, self.success)

        if poll:
            return wait(check_jobs_impl)
        else:
            _, msg, result = check_jobs_impl()
            self.logger.info(msg)
            return result