Python GuestManager.start_analysis 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: cuckoo.core.guest

클래스/타입: GuestManager

메소드/함수: start_analysis

hotexamples.com에서의 예제들: 5

Python GuestManager.start_analysis - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 cuckoo.core.guest.GuestManager.start_analysis에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

GuestManager(12)

start_analysis(4)

get(3)

determine_temp_path(2)

post(2)

wait_available(2)

wait_for_completion(2)

add_config(1)

analysis_manager(1)

determine_analyzer_path(1)

determine_system_drive(1)

query_environ(1)

stop(1)

timeout(1)

upload_analyzer(1)

예제 #1

파일 보기

    def test_start_analysis_nofile(self):
        set_cwd(tempfile.mkdtemp())
        cuckoo_create()
        target = mock.MagicMock()
        target.is_file = False
        analysis = mock.MagicMock()
        analysis.status = "starting"
        gm = GuestManager(
            "cuckoo1", "1.2.3.4", "windows", 1, None, analysis,
            target
        )
        gm.wait_available = mock.MagicMock()
        httpresponse = mock.MagicMock()
        httpresponse.status_code = 200
        httpresponse.json.return_value = {
            "version": 0.8,
            "features": ["pinning", "execpy"]
        }
        gm.post = mock.MagicMock()
        gm.get = mock.MagicMock(return_value=httpresponse)
        gm.query_environ = mock.MagicMock()
        gm.upload_analyzer = mock.MagicMock()
        gm.add_config = mock.MagicMock()
        gm.determine_temp_path = mock.MagicMock(return_value="/tmp/sIYUbJJ")
        gm.analysis_manager = mock.MagicMock()

        gm.start_analysis({"timeout": 60, "file_name": "doge"}, None)

        assert gm.analysis.status == "starting"
        gm.get.assert_called_with("/pinning")
        gm.target.helper.get_filepointer.assert_not_called()
        gm.post.assert_called_once_with("/execpy", data=mock.ANY)

예제 #2

파일 보기

파일: test_guest.py 프로젝트: warewolf/cuckoo

 def test_start_analysis_timeout(self):
     set_cwd(tempfile.mkdtemp())
     cuckoo_create(cfg={
         "cuckoo": {
             "timeouts": {
                 "critical": 123,
             },
         },
     })
     gm = GuestManager("cuckoo1", "1.2.3.4", "windows", 1, None)
     gm.wait_available = mock.MagicMock(side_effect=Exception)
     with pytest.raises(Exception):
         gm.start_analysis({"timeout": 42}, None)
     assert gm.timeout == 165

예제 #3

파일 보기

파일: test_guest.py 프로젝트: zhouxiaozhang/cuckoo

 def test_start_analysis_timeout(self):
     set_cwd(tempfile.mkdtemp())
     cuckoo_create(cfg={
         "cuckoo": {
             "timeouts": {
                 "critical": 123,
             },
         },
     })
     gm = GuestManager("cuckoo1", "1.2.3.4", "windows", 1, None)
     gm.wait_available = mock.MagicMock(side_effect=Exception)
     with pytest.raises(Exception):
         gm.start_analysis({"timeout": 42}, None)
     assert gm.timeout == 165

예제 #4

파일 보기

파일: regular.py 프로젝트: hatching/cuckoo-ekhunting

class Regular(AnalysisManager):

    supports = ["regular", "baseline", "service"]

    def init(self, db):
        """Executed by the scheduler. Prepares the analysis for starting."""
        # Used at the processing and final stage to determine is processing
        # was run and successful
        self.processing_success = False

        # TODO remove this when the latest RDP/VNC machine additions/changes
        # have been added. Currently, the enabling requires a db change at
        # the start of a machine
        self.db = db

        # If for some reason the task dir does not exist, stop the analysis
        # because it should have been created upon submission
        if not self.task.dir_exists():
            log.error("Task directory for task #%s does not exist",
                      self.task.id)
            return False

        self.guest_manager = GuestManager(self.machine.name, self.machine.ip,
                                          self.machine.platform, self.task,
                                          self, self.analysis, self.target)

        self.aux = RunAuxiliary(self.task.task_dict, self.machine,
                                self.guest_manager)

        self.rt = RealTimeHandler()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        self.control_enabled = (config("cuckoo:remotecontrol:enabled")
                                and "remotecontrol" in self.task.options)

        # Write task to disk in json file
        self.task.write_task_json()

        if not self.target.target:
            self.build_options()
            return True

        options = {
            "category": self.target.category,
            "target": self.target.target
        }

        if self.target.is_file:
            if not self.target.copy_exists():
                log.error("The file to submit '%s' does not exist",
                          self.target.copied_binary)
                return False

            if not self.target.helper.is_readable():
                log.error(
                    "Unable to read target file %s, please check if it is"
                    " readable for the user executing Cuckoo Sandbox",
                    self.target.copied_binary)
                return False

            task_options = {}
            package, activity = self.target.helper.get_apk_entry()
            if package and activity:
                task_options["apk_entry"] = "%s:%s" % (package, activity)

            options.update({
                "file_name":
                os.path.basename(self.target.target),
                "file_type":
                self.target.helper.get_type(),
                "pe_exports":
                ",".join(self.target.helper.get_exported_functions()),
                "options":
                task_options
            })

        self.build_options(options=options)

        return True

    def run(self):
        """Starts the analysis manager thread."""
        task_log_start(self.task.id)
        analysis_success = False

        try:
            analysis_success = self.start_and_wait()

            # See if the analysis did not fail in the analysis manager
            # and see if the status was not set to failed by
            # the guest manager
            if analysis_success:
                if self.analysis.status == Analysis.FAILED:
                    analysis_success = False
        except Exception as e:
            log.exception("Failure during the starting of task #%s. Error: %s",
                          self.task.id,
                          e,
                          extra={
                              "action": "task.start",
                              "status": "error"
                          })
        finally:
            try:
                self.stop_and_wait()
            except Exception as e:
                log.exception(
                    "Failure during the stopping of task #%s. Error: %s",
                    self.task.id,
                    e,
                    extra={
                        "action": "task.stop",
                        "status": "error"
                    })

        if analysis_success:
            self.set_analysis_status(Analysis.STOPPED, wait=True)
        else:
            self.set_analysis_status(Analysis.FAILED, wait=True)

        if not config("cuckoo:cuckoo:process_results"):
            log.debug("Cuckoo process_results is set to 'no',"
                      " not processing results")
            return

        log.info("Processing and reporting results for task #%s",
                 self.task.id,
                 extra={
                     "action": "task.report",
                     "status": "pending"
                 })
        try:
            self.processing_success = self.task.process()
        except Exception as e:
            log.exception("Error during processing of task #%s. Error: %s",
                          self.task.id,
                          e,
                          extra={
                              "action": "task.report",
                              "status": "failed"
                          })
            return

        log.info("Task #%d: analysis procedure completed",
                 self.task.id,
                 extra={
                     "action": "task.report",
                     "status": "finished",
                 })

    def start_and_wait(self):
        """Start the analysis by running the auxiliary modules,
        adding the task to the resultserver, starting the machine
        and running a guest manager."""
        # Set guest status to starting and start analysis machine
        self.set_analysis_status(Analysis.STARTING)

        target = self.target.target
        if self.target.target and self.target.is_file:
            target = os.path.basename(target)

        log.info(
            "Starting analysis (task #%s, options: '%s') type '%s'."
            " Target: %s '%s'",
            self.task.id,
            self.options["options"],
            self.task.type,
            self.target.category,
            target,
            extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.target.category,
                "package": self.task.package,
                "options": self.options["options"],
                "custom": self.task.custom,
                "type": self.task.type
            })

        ResultServer().add_task(self.task.db_task, self.machine, self.rt)

        # Start auxiliary modules
        self.aux.start()

        if self.control_enabled:
            try:
                self.machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                self.control_enabled = False
                log.exception(
                    "Remote control support has not been implemented "
                    "for machinery %s.", self.machine.manager)

        # Json log for performance measurement purposes
        logger("Starting VM",
               action="vm.start",
               status="pending",
               vmname=self.machine.name)

        try:
            self.machinery.start(self.machine.label, self.task.db_task)
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name,
                e,
                extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                })
            return False
        except CuckooMachineError as e:
            log.error("Error starting Virtual Machine! VM: %s, error: %s",
                      self.machine.name,
                      e,
                      extra={
                          "action": "vm.start",
                          "status": "error",
                          "vmname": self.machine.name,
                      })
            return False

        logger("Started VM",
               action="vm.start",
               status="success",
               vmname=self.machine.name)

        # retrieve the port used for remote control
        if self.control_enabled:
            try:
                params = self.machinery.get_remote_control_params(
                    self.machine.label)
                self.db.set_machine_rcparams(self.machine.label, params)
            except NotImplementedError:
                log.exception(
                    "Remote control support has not been implemented "
                    "for machinery %s.", self.machine.manager)

        # Enable network routing
        self.route.route_network()

        # By the time start returns it will have fully started the Virtual
        # Machine. We can now safely release the machine lock.
        self.release_machine_lock()

        # Request scheduler action for status 'starting'
        self.request_scheduler_action(Analysis.STARTING)

        # Choose the correct way of waiting or managing the agent and
        # execute it
        try:
            self.manage()
        except CuckooGuestCriticalTimeout as e:
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                })

        except CuckooGuestError as e:
            log.error("Error from the Cuckoo Guest: %s",
                      e,
                      extra={
                          "action": "guest.handle",
                          "status": "error",
                          "task_id": self.task.id,
                      })

        return True

    def stop_and_wait(self):
        """Stop the analysis by stopping the aux modules, optionally
        dumping VM memory, stopping the VM and deleting the task from
        the resultserver."""
        self.set_analysis_status(Analysis.STOPPING)

        # Stop all Auxiliary modules
        self.aux.stop()

        # If enabled, make a full memory dump of the machine
        # before it shuts down
        if config("cuckoo:cuckoo:memory_dump") or self.task.memory:
            logger("Taking full memory dump",
                   action="vm.memdump",
                   status="pending",
                   vmname=self.machine.name)
            try:
                dump_path = os.path.join(self.task.path, "memory.dmp")
                self.machinery.dump_memory(self.machine.label, dump_path)

                logger("Taken full memory dump",
                       action="vm.memdump",
                       status="success",
                       vmname=self.machine.name)

            except NotImplementedError:
                log.error(
                    "The memory dump functionality is not available for "
                    "the current machine manager.",
                    extra={
                        "action": "vm.memdump",
                        "status": "error",
                        "vmname": self.machine.name,
                    })
            except CuckooMachineError as e:
                log.error("Machinery error: %s",
                          e,
                          extra={
                              "action": "vm.memdump",
                              "status": "error",
                          })

        logger("Stopping VM",
               action="vm.stop",
               status="pending",
               vmname=self.machine.name)

        # Stop the analysis machine.
        try:
            self.machinery.stop(self.machine.label)
        except CuckooMachineError as e:
            log.warning("Unable to stop machine %s: %s",
                        self.machine.label,
                        e,
                        extra={
                            "action": "vm.stop",
                            "status": "error",
                            "vmname": self.machine.name,
                        })

        logger("Stopped VM",
               action="vm.stop",
               status="success",
               vmname=self.machine.name)

        # Disable remote control after stopping the machine
        # if it was enabled for the task.
        if self.control_enabled:
            try:
                self.machinery.disable_remote_control(self.machine.label)
            except NotImplementedError:
                log.exception(
                    "Remote control support has not been implemented "
                    "for machinery %s.", self.machine.manager)

        # After all this, we can make the ResultServer forget about the
        # internal state for this analysis task.
        ResultServer().del_task(self.task.db_task, self.machine)

        # Drop the network routing rules if any.
        self.route.unroute_network()

    def manage(self):
        """Choose and use to method of waiting or managing the further steps
        to be taken of an analysis."""
        if "noagent" in self.machine.options:
            log.debug("Usage handler for the 'noagent' option")
            self.set_analysis_status(Analysis.RUNNING)
            self.wait_finish()
        elif self.task.type == "baseline":
            log.debug("Sleeping until timeout for baseline")
            self.set_analysis_status(Analysis.RUNNING)
            time.sleep(self.options["timeout"])
        else:
            log.debug("Using guest manager")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(self.options, monitor)

            if self.analysis.status == Analysis.STARTING:
                self.set_analysis_status(Analysis.RUNNING)
                self.guest_manager.wait_for_completion()

    def set_target(self, targets):
        if targets:
            self.target = targets[0]
        else:
            self.target = Target()

    def on_status_starting(self, db):
        """Is executed by the scheduler on analysis status starting
        Stores the chosen route in the db."""
        log.info("Using route '%s' for task #%s", self.route.route,
                 self.task.id)
        # Propagate the taken route to the database.
        db.set_route(self.task.id, self.route.route)

        # Store used machine in the task
        db.set_machine(self.task.id, self.machine.name)

    def on_status_stopped(self, db):
        """Is executed by the scheduler on analysis status stopped
        Sets the task to completed, writes task json to analysis folder
        and releases machine if it is locked."""
        log.debug("Setting task #%s status to %s", self.task.id,
                  TASK_COMPLETED)
        self.task.set_status(TASK_COMPLETED)

        # Update task obj and write json to disk
        self.task.write_task_json()

        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

    def on_status_failed(self, db):
        """Is executed by the scheduler on analysis status failed.
        Releases the locked machine if it is locked and updates task status
        to analysis failed."""
        log.error("Analysis for task #%s failed", self.task.id)
        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

    def finalize(self, db):
        """Executed by the scheduler when the analysis manager thread exists.
        Updates the task status to the correct one and updates the
        task.json."""
        self.task.set_latest()
        self.release_machine_lock()
        # If, at this point, the analysis is not stopped, it cannot
        # succeeded, since the manager thread already exited. Updated status
        # to failed if the results were not processed.
        if self.analysis.status != Analysis.STOPPED:
            log.warning("Analysis status is '%s' after exit.",
                        self.analysis.status)
            if not config("cuckoo:cuckoo:process_results"):
                log.debug("Setting task #%s status to %s",
                          TASK_FAILED_ANALYSIS)
                self.task.write_task_json(status=TASK_FAILED_ANALYSIS)
                self.task.set_status(TASK_FAILED_ANALYSIS)

        if config("cuckoo:cuckoo:process_results"):
            if self.processing_success:
                log.debug("Setting task #%s status to %s", self.task.id,
                          TASK_REPORTED)
                self.task.write_task_json(status=TASK_REPORTED)
                self.task.set_status(TASK_REPORTED)
            else:
                log.debug("Setting task #%s status to %s", self.task.id,
                          TASK_FAILED_PROCESSING)
                self.task.write_task_json(status=TASK_FAILED_PROCESSING)
                self.task.set_status(TASK_FAILED_PROCESSING)

        task_log_stop(self.task.id)

예제 #5

파일 보기

class MassURL(AnalysisManager):

    supports = ["massurl"]
    URL_BLOCKSIZE = 5
    SECS_PER_BLOCK = 25

    def init(self, db):
        # If for some reason the task dir does not exist, stop the analysis
        # because it should have been created upon submission
        if not self.task.dir_exists():
            log.error("Task directory for task #%s does not exist",
                      self.task.id)
            return False

        if not URLDiaries.init_done:
            URLDiaries.init()

        self.curr_block = self.new_target_block()
        if not self.curr_block:
            log.error("Empty target list, cannot proceed.")
            return False

        self.rt = RealTimeHandler()
        self.ev_client = EventClient()
        self.URL_BLOCKSIZE = int(
            self.task.options.get("urlblocksize", self.URL_BLOCKSIZE))
        self.SECS_PER_BLOCK = int(
            self.task.options.get("blocktime", self.SECS_PER_BLOCK))
        self.aborted = False
        self.completed = False

        # Write task to disk in json file
        self.task.write_task_json()
        self.build_options(
            options={
                "category": "url",
                "target": ",".join(self.curr_block.keys()),
                "enforce_timeout": True,
                "timeout": len(self.task.targets) * self.SECS_PER_BLOCK * 3
            })

        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip, self.machine.platform,
            self.task, self, self.analysis,
            self.curr_block.get(
                self.curr_block.keys().pop(0)).get("target_obj"))

        self.aux = RunAuxiliary(self.task.task_dict, self.machine,
                                self.guest_manager)

        # The wait/agent status checking etc is run in a separate thread. This
        # allows the analysis manager to perform other actions while the
        # analysis is running
        self.gm_wait_th = threading.Thread(
            target=self.guest_manager.wait_for_completion)
        self.gm_wait_th.daemon = True
        self.detection_events = []
        self.netflow_events = []
        self.js_events = []
        self.realtime_finished = set()
        self.tlskeys_response = None
        self.realtime_error = False
        self.requestfinder = RequestFinder(self.task.id)
        self.all_pids_targets = {}

        return True

    def set_target(self, targets):
        blocksize = int(
            self.task.options.get("urlblocksize", self.URL_BLOCKSIZE))
        self.targets = []
        for i in range(0, len(targets), blocksize):
            self.targets.append(targets[i:i + blocksize])

    def new_target_block(self):
        block = {}
        if self.targets:
            for t in self.targets.pop(0):
                diary = URLDiary(t.target, t.sha256, self.machine.name,
                                 self.task.package)
                block[t.target] = {"diary": diary, "target_obj": t}
            return block
        return {}

    def start_run(self):
        self.set_analysis_status(Analysis.STARTING)
        log.info(
            "Starting analysis (task #%s, options: '%s') type '%s'. %d URLs",
            self.task.id, self.options["options"], self.task.type,
            len(self.task.targets))

        ResultServer().add_task(self.task.db_task, self.machine, self.rt)
        self.aux.start()

        try:
            self.machinery.start(self.machine.label, self.task.db_task)
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s", self.machine.name, e)
            return False
        except CuckooMachineError as e:
            log.error("Error starting Virtual Machine! VM: %s, error: %s",
                      self.machine.name, e)
            return False

        # Enable network routing
        if not self.route.route_network():
            log.error("Failed to use chosen route for the analysis")
            self.ev_client.send_event(
                "massurltaskfailure", {
                    "taskid":
                    self.task.id,
                    "error":
                    "Failed to use chosen route '%s'. "
                    "Inspect the log" % self.route.route,
                    "status":
                    self.analysis.status
                })
            return False

        # By the time start returns it will have fully started the Virtual
        # Machine. We can now safely release the machine lock.
        self.release_machine_lock()

        # Request scheduler action for status 'starting'
        self.request_scheduler_action(Analysis.STARTING)

        try:
            self.guest_manager.start_analysis(
                self.options, self.task.options.get("monitor", "latest"))
        except CuckooGuestCriticalTimeout as e:
            log.error(
                "Critical timeout reached while starting virtual"
                " machine. %s", e)
            return False
        except CuckooGuestError as e:
            log.error("Failed to prepare guest for analysis: %s", e)
            return False

        return self.analysis.status == Analysis.STARTING

    def stop_and_wait(self):
        if self.rt.sock:
            try:
                # Use the realtime protocol to request the analyzer to stop. So
                # that the analysis, logs etc can be closed gracefully.
                self.rt.send_command_blocking(RealTimeMessages.stop_analyzer(),
                                              maxwait=3)
            except RealtimeError:
                log.warning("No response from analyzer to stopping request")

        self.set_analysis_status(Analysis.STOPPING)

        # Stop all Auxiliary modules
        self.aux.stop()

        # Wait for the guest manager wait to stop before stopping the machine.
        # We want any exception messages to be retrieved from the agent.
        if self.gm_wait_th.is_alive():
            self.gm_wait_th.join(timeout=6)

        # Stop the analysis machine.
        try:
            self.machinery.stop(self.machine.label)
        except CuckooMachineError as e:
            log.warning(
                "Unable to stop machine %s: %s",
                self.machine.label,
                e,
            )

        # After all this, we can make the ResultServer forget about the
        # internal state for this analysis task.
        ResultServer().del_task(self.task.db_task, self.machine)

        # Drop the network routing rules if any.
        self.route.unroute_network()

    def run_analysis(self):
        while self.curr_block:
            if not self.gm_wait_th.is_alive():
                return

            log.info("Uploaded new block of %d URLs", len(self.curr_block))

            pkg_info = {}
            tries = len(self.curr_block) * 10
            while not pkg_info:
                try:
                    pkg_info = self.rt.send_command_blocking(
                        RealTimeMessages.list_packages(),
                        maxwait=len(self.curr_block) * 2)
                except RealtimeError as e:
                    log.error(
                        "No response from guest or it failed to send analysis "
                        "package information. %s", e)
                    return

                tries -= 1
                if not pkg_info and tries <= 0:
                    log.error(
                        "Analyzer is not returning target PIDs. It might have "
                        "failed to start the targets.")
                    return
                time.sleep(1)

            pids_targets = {
                int(pid): target
                for p in pkg_info for pid, target in p.get("pids").items()
            }
            self.all_pids_targets.update(pids_targets)

            # Give the URLs some time to load and remain opened
            time.sleep(self.SECS_PER_BLOCK)

            # Request the analyzer to stop all running analysis packages
            try:
                self.rt.send_command(RealTimeMessages.stop_all_packages())
            except RealtimeError as e:
                log.error("Error sending real-time package stop command. %s",
                          e)

            # Ask realtime to process the generated onemon protobuf file.
            signature_events = self.handle_events(pids_targets)

            # The end of the URL block is reached, have the scheduler
            # do the database operations
            self.request_scheduler_action(for_status="stopurlblock")

            # Store URL diaries
            for url, info in self.curr_block.iteritems():
                diary = info.get("diary")
                if not diary.stored:
                    URLDiaries.store_diary(info.get("diary"))

            if signature_events:
                self.request_scheduler_action(for_status="aborted")
                return

            # Acquire the next block of URLs according to the defined URL
            # blocksize
            self.curr_block = self.new_target_block()
            if not self.curr_block:
                continue

            try:
                self.rt.send_command_blocking(RealTimeMessages.start_package(
                    target=self.curr_block.keys(),
                    category="url",
                    package=self.task.package,
                    options=self.task.options,
                    respond=True),
                                              maxwait=len(self.curr_block) *
                                              10)
            except RealtimeError as e:
                log.error(
                    "No response from guest or it failed to open new URLs. "
                    "Error: %s", e)
                return

        # The loop was broken because there are no targets left. This means
        # the analysis was completed.
        self.completed = True

    def attribute_js(self, pid_target):
        pid_js = {}
        ppid_pid = {}

        for pid, ppid, code in self.js_events:
            if pid not in pid_js:
                pid_js[pid] = []

            pid_js[pid].append(code)

            if ppid not in ppid_pid:
                ppid_pid[ppid] = set()
            ppid_pid[ppid].add(pid)

        def walk_childprocs(pid, t):
            for js in pid_js.get(pid, []):
                self.curr_block.get(target).get("diary").add_javascript(js)

            for child in ppid_pid.get(pid, []):
                walk_childprocs(child, t)

        for pid, target in pid_target.iteritems():
            walk_childprocs(pid, target)

    def extract_requests(self, pid_target):
        flow_target = {}
        flows = {}
        ppid_pid = {}
        ports = set()

        for flow, pid, ppid in self.netflow_events:
            ports.add(flow[3])
            if pid not in flows:
                flows[pid] = []
            flows[pid].append(flow)

            if ppid not in ppid_pid:
                ppid_pid[ppid] = set()
            ppid_pid[ppid].add(pid)

        def walk_childprocs(pid, t):
            for flow in flows.get(pid, []):
                flow_target[flow] = t

            for child in ppid_pid.get(pid, []):
                walk_childprocs(child, t)

        for pid, target in pid_target.iteritems():
            walk_childprocs(pid, target)

        reports = self.requestfinder.process(flow_target, ports=ports)
        for target_url, report in reports.iteritems():
            log.debug("Traffic extracted for %s", target_url)
            target_helpers = self.curr_block.get(target_url)
            diary = target_helpers.get("diary")
            diary.set_request_report(report)

    def handle_events(self, pid_target):
        # New queue for a new batch, to be sure it is empty
        self.detection_events = []
        self.netflow_events = []
        self.realtime_finished = set()
        self.tlskeys_response = None
        self.realtime_error = False
        self.js_events = []
        wait_for = set()

        # Tell onemon to process results.
        self.ev_client.send_event("massurltask", body={"taskid": self.task.id})
        wait_for.add("massurltask")

        # If IE was used, TLS master secrets van be extracted.
        # If not package is supplied, the analyzer will use IE.
        if not self.task.package or self.task.package.lower() == "ie":
            if config("massurl:massurl:extract_tls"):
                lsass_pid = get_lsass_pids(self.task.id)
                if lsass_pid:
                    log.debug("Running TLS key extraction for task #%s",
                              self.task.id)
                    self.ev_client.send_event("dumptls", {
                        "taskid": self.task.id,
                        "lsass_pid": lsass_pid
                    })
                    wait_for.add("dumptls")

        waited = 0
        while wait_for and not self.realtime_error:
            if waited >= 60:
                log.error(
                    "Timeout for realtime onemon processor reached. No results"
                    " received. Stopping analysis of URL current block: %r. "
                    "Was waiting for results of: %s", self.curr_block.keys(),
                    wait_for)
                break
            waited += 0.5
            time.sleep(0.5)
            wait_for -= self.realtime_finished

        if self.realtime_error:
            log.error("Realtime processor reported an error. %s",
                      self.realtime_error)

        if self.netflow_events:
            log.debug("Running request extraction for task: #%s", self.task.id)
            self.extract_requests(pid_target)

        if self.js_events:
            log.debug("Running Javascript attribution")
            self.attribute_js(pid_target)

        # If no events were sent by Onemon, no signatures were triggered.
        # Continue analysis.
        if self.detection_events:
            self.handle_signature_events()
            return True

        return False

    def handle_signature_events(self):
        log.info("%d realtime signature triggered for task #%d",
                 len(self.detection_events), self.task.id)
        # Collect all triggered signatures from the queue
        sigs = []
        for ev in self.detection_events:
            sigs.append({
                "signature": ev.get("signature"),
                "description": ev.get("description"),
                "ioc": ev.get("ioc")
            })

        # A signature was triggered while only a single URL was opened. Update
        # and store the URL diary, and send a detection event.
        if len(self.curr_block) == 1:
            diary = self.curr_block.itervalues().next().get("diary")
            diary.add_signature(sigs)
            diary_id = URLDiaries.store_diary(diary)

            self.ev_client.send_event("massurldetection",
                                      body={
                                          "taskid": self.task.id,
                                          "status": "aborted",
                                          "candidates": self.curr_block.keys(),
                                          "signatures": sigs,
                                          "diary_id": diary_id
                                      })

        else:
            # Multiple URLs were opened while signatures were triggered. Send
            # a detection event with all URLs that were opened. The massurl
            # scheduler will create a new task with only these URLs.
            self.ev_client.send_event("massurldetection",
                                      body={
                                          "taskid": self.task.id,
                                          "status": "aborted",
                                          "candidates": self.curr_block.keys(),
                                          "signatures": sigs
                                      })

    def run(self):
        task_log_start(self.task.id)
        if not self.ev_client.start(maxtries=2):
            log.error(
                "Could not connect to Cuckoo event messaging client. Aborting")
            self.set_analysis_status(Analysis.FAILED)
            return

        # Tell the client to ask the event server to send all events of
        # type 'signature' and 'netflow'. These events will be sent by onemon.
        self.ev_client.subscribe(self.realtime_sig_cb, "signature")
        self.ev_client.subscribe(self.realtime_netflow_cb, "netflow")
        self.ev_client.subscribe(self.realtime_finished_cb, "finished")
        self.ev_client.subscribe(self.realtime_tlskeys_cb, "tlskeys")
        self.ev_client.subscribe(self.realtime_javascript_cb, "javascript")
        self.ev_client.subscribe(self.realtime_error_cb, "error")

        try:
            if self.start_run():
                self.set_analysis_status(Analysis.RUNNING)
                self.gm_wait_th.start()
                self.run_analysis()

        except Exception as e:
            log.error("Failure during analysis run of task #%s. %s. %s",
                      self.task.id, e, traceback.format_exc())
            try:
                self.ev_client.send_event(
                    "massurltaskfailure", {
                        "taskid": self.task.id,
                        "error": "%s" % traceback.format_exc(4),
                        "status": self.analysis.status
                    })
            except Exception as e:
                log.exception("Failed to send failure notification event")

        finally:
            try:
                self.stop_and_wait()
            except Exception as e:
                log.exception(
                    "Failure while stopping analysis run of task #%s: %s",
                    self.task.id, e)

        if self.completed or self.aborted:
            self.set_analysis_status(Analysis.STOPPED, wait=True)
        else:
            self.set_analysis_status(Analysis.FAILED, wait=True)

    def realtime_sig_cb(self, message):
        """Handle incoming signature events from the realtime processor"""
        log.debug("Signature event for task #%s", self.task.id)
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        for k in ("description", "ioc", "signature"):
            if k not in message["body"]:
                return

        self.detection_events.append(message["body"])

    def realtime_netflow_cb(self, message):
        """Handle incoming netflow events from the realtime processor"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        for k in ("srcip", "srcport", "dstip", "dstport", "pid", "ppid"):
            if k not in message["body"]:
                return

        flow = message["body"]
        self.netflow_events.append(
            ((flow.get("srcip"), flow.get("srcport"), flow.get("dstip"),
              flow.get("dstport")), flow.get("pid"), flow.get("ppid")))

    def realtime_error_cb(self, message):
        """Handle an error event from the realtime processor. These events
        can occur after sending it an event that triggers its processing
        routine"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        self.realtime_error = message["body"].get("error", True)

    def realtime_tlskeys_cb(self, message):
        """Handle a tlskeys event containing tls master keys extracted by
        the realtime processor"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        tlskeys = message["body"].get("tlskeys")
        if not tlskeys:
            return

        if not isinstance(tlskeys, list):
            return

        with open(cwd("tlsmaster.txt", analysis=self.task.id), "wb") as fp:
            for entry in sorted(tlskeys):
                fp.write("RSA Session-ID:%s Master-Key:%s\n" %
                         (entry.get("session_id"), entry.get("master_secret")))

    def realtime_javascript_cb(self, message):
        """Handle incoming javascript events from the realtime processor"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        for k in ("pid", "ppid"):
            if k not in message["body"]:
                return

        js = message["body"]
        meta = js.get("meta") if js.get("meta") != "no context" else None
        code = js.get("code")

        if code:
            self.js_events.append((js.get("pid"), js.get("ppid"), code))

        if meta:
            self.js_events.append((js.get("pid"), js.get("ppid"), meta))

    def realtime_finished_cb(self, message):
        """Handle incoming finish events from the realtime processor"""
        task_id = message["body"].get("taskid")
        if not task_id or task_id != self.task.id:
            return

        action = message["body"].get("action")
        if not action:
            return

        self.realtime_finished.add(action)

    def on_status_failed(self, db):
        """The mass url analysis failed"""
        # What should we do it failed? How can be prevent redundant work and
        # Be sure the mass url scheduler knows this task failed?
        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

    def on_status_starting(self, db):
        """Is executed by the scheduler on analysis status starting
        Stores the chosen route in the db."""
        log.info("Using route '%s' for task #%s", self.route.route,
                 self.task.id)
        # Propagate the taken route to the database.
        db.set_route(self.task.id, self.route.route)

        # Store used machine in the task
        db.set_machine(self.task.id, self.machine.name)

    def on_status_stopurlblock(self, db):
        """When a new block of URLs has finished, update their rows
        in the database. This way we can keep track of which were and were not
        analyzed in case of an abort/crash/detection and a re-submit
         is required."""
        updated = []
        for t in self.curr_block:
            target_obj = self.curr_block[t].get("target_obj")
            target_obj["analyzed"] = True
            updated.append(target_obj.target_dict)
        db.update_targets(updated)

    def on_status_aborted(self, db):
        """This status is reached when a potentially malicious action is
        detected and the remaining URLs should be analyzed in a new task"""
        log.info("Task #%s aborted", self.task.id)
        self.aborted = True
        self.task.set_status(TASK_ABORTED)
        self.task.write_task_json()

    def on_status_stopped(self, db):
        """Executed by the scheduler when the analysis reaches the stopped
        status."""
        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

    def finalize(self, db):
        self.ev_client.stop()
        self.task.set_latest()

        if self.machine.locked:
            log.debug("Releasing machine lock on %s", self.machine.label)
            self.machine = self.machinery.release(self.machine.label)

        self.release_machine_lock()
        with open(cwd("pids_targets.json", analysis=self.task.id), "wb") as fp:
            json.dump(self.all_pids_targets, fp, indent=2)

        if self.analysis.status != Analysis.STOPPED:
            log.warning("Analysis status is '%s' after exit.",
                        self.analysis.status)
            self.task.write_task_json(status=TASK_FAILED_ANALYSIS)
            self.task.set_status(TASK_FAILED_ANALYSIS)

        if self.completed:
            log.info("Setting task #%d to reported", self.task.id)
            self.task.write_task_json(status=TASK_REPORTED)
            self.task.set_status(TASK_REPORTED)

        task_log_stop(self.task.id)