def process_task(task): db = Database() try: task_log_start(task["id"]) logger( "Starting task reporting", action="task.report", status="pending", target=task["target"], category=task["category"], package=task["package"], options=emit_options(task["options"]), custom=task["custom"] ) if task["category"] == "file" and task.get("sample_id"): sample = db.view_sample(task["sample_id"]) copy_path = cwd("storage", "binaries", sample.sha256) else: copy_path = None try: process(task["target"], copy_path, task) db.set_status(task["id"], TASK_REPORTED) except Exception as e: log.exception("Task #%d: error reporting: %s", task["id"], e) db.set_status(task["id"], TASK_FAILED_PROCESSING) log.info("Task #%d: reports generation completed", task["id"], extra={ "action": "task.report", "status": "success", }) except Exception as e: log.exception("Caught unknown exception: %s", e) finally: task_log_stop(task["id"])
def ready_for_new_run(self): """Performs checks to see if Cuckoo should start a new pending task or not""" # Wait until the machine lock is not locked. This is only the case # when all machines are fully running, rather that about to start # or still busy starting. This way we won't have race conditions # with finding out there are no available machines in the analysis # manager or having two analyses pick the same machine. if not self.machine_lock.acquire(False): logger( "Could not acquire machine lock", action="scheduler.machine_lock", status="busy" ) return False self.machine_lock.release() # Verify if the minimum amount of disk space is available if config("cuckoo:cuckoo:freespace"): freespace = get_free_disk(cwd("storage", "analyses")) # If freespace is None, the check failed. Continue, since this # can happen if the disk check is not supported on others than # unix and winxp+. The call might also fail on win32. if freespace is None: log.error("Error determining free disk space") elif freespace <= config("cuckoo:cuckoo:freespace"): log.error( "Not enough free disk space! (Only %d MB!)", freespace, extra={ "action": "scheduler.diskspace", "status": "error", "available": freespace, } ) return False max_vm = config("cuckoo:cuckoo:max_machines_count") if max_vm and len(self.machinery.running()) >= max_vm: log.debug( "Maximum amount of machines is running", extra={ "action": "scheduler.machines", "status": "maxed" } ) return False if not self.machinery.availables(): logger( "No available machines", action="scheduler.machines", status="none" ) return False return True
def process_results(self): """Process the analysis results and generate the enabled reports.""" logger( "Starting task reporting", action="task.report", status="pending" ) # TODO Refactor this function as currently "cuckoo process" has a 1:1 # copy of its code. TODO Also remove "archive" files. results = RunProcessing(task=self.task).run() RunSignatures(results=results).run() RunReporting(task=self.task, results=results).run() # If the target is a file and the user enabled the option, # delete the original copy. if self.task.category == "file" and self.cfg.cuckoo.delete_original: if not os.path.exists(self.task.target): log.warning("Original file does not exist anymore: \"%s\": " "File not found.", self.task.target) else: try: os.remove(self.task.target) except OSError as e: log.error("Unable to delete original file at path " "\"%s\": %s", self.task.target, e) # If the target is a file and the user enabled the delete copy of # the binary option, then delete the copy. if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy: if not os.path.exists(self.binary): log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary) else: try: os.remove(self.binary) except OSError as e: log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e) # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it. if os.path.islink(self.storage_binary) and not os.path.exists(self.storage_binary): try: os.remove(self.storage_binary) except OSError as e: log.error("Unable to delete symlink to the binary copy at path \"%s\": %s", self.storage_binary, e) log.info( "Task #%d: reports generation completed", self.task.id, extra={ "action": "task.report", "status": "success", } ) return True
def process_task(task): db = Database() if not task.dir_exists(): log.error("Task #%s directory %s does not exist, cannot process it", task.id, task.path) db.set_status(task.id, TASK_FAILED_PROCESSING) return task_log_start(task.id) if task.targets: target = task.targets[0] else: target = Target() logger("Starting task reporting", action="task.report", status="pending", target=target.target, category=target.category, package=task["package"], options=emit_options(task["options"]), custom=task["custom"]) success = False try: success = task.process() except Exception as e: log.error("Failed to process task #%s. Error: %s", task.id, e) finally: if success: log.info("Task #%d: reports generation completed", task.id, extra={ "action": "task.report", "status": "success", }) db.set_status(task.id, TASK_REPORTED) else: log.error("Failed to process task #%s", task.id, extra={ "action": "task.report", "status": "failed", }) db.set_status(task.id, TASK_FAILED_PROCESSING) task_log_stop(task.id)
def test_logger(): set_cwd(tempfile.mkdtemp()) cuckoo_create() init_logfile("cuckoo.json") with mock.patch("time.time") as p: p.return_value = 1484232001 logger("test %s", "message", action="a", status="b") assert json.load(open(cwd("log", "cuckoo.json"), "rb")) == { "asctime": mock.ANY, "action": "a", "level": "info", "message": "test message", "status": "b", "task_id": None, "time": 1484232001, }
def process_task(task): db = Database() try: task_log_start(task["id"]) logger("Starting task reporting", action="task.report", status="pending", target=task["target"], category=task["category"], package=task["package"], options=emit_options(task["options"]), custom=task["custom"]) if task["category"] == "file" and task.get("sample_id"): sample = db.view_sample(task["sample_id"]) copy_path = cwd("storage", "binaries", sample.sha256) else: copy_path = None try: process(task["target"], copy_path, task) db.set_status(task["id"], TASK_REPORTED) except Exception as e: log.exception("Task #%d: error reporting: %s", task["id"], e) db.set_status(task["id"], TASK_FAILED_PROCESSING) log.info("Task #%d: reports generation completed", task["id"], extra={ "action": "task.report", "status": "success", }) except Exception as e: log.exception("Caught unknown exception: %s", e) finally: task_log_stop(task["id"])
def launch_analysis(self): """Start analysis.""" succeeded = False if self.task.category == "file" or self.task.category == "archive": target = os.path.basename(self.task.target) else: target = self.task.target log.info( "Starting analysis of %s \"%s\" (task #%d, options \"%s\")", self.task.category.upper(), target, self.task.id, emit_options(self.task.options), extra={ "action": "task.init", "status": "starting", "task_id": self.task.id, "target": target, "category": self.task.category, "package": self.task.package, "options": emit_options(self.task.options), "custom": self.task.custom, } ) # Initialize the analysis. if not self.init(): logger("Failed to initialize", action="task.init", status="error") return False # Acquire analysis machine. try: self.acquire_machine() except CuckooOperationalError as e: machine_lock.release() log.error("Cannot acquire machine: %s", e, extra={ "action": "vm.acquire", "status": "error", }) return False # At this point we can tell the ResultServer about it. try: ResultServer().add_task(self.task, self.machine) except Exception as e: machinery.release(self.machine.label) self.errors.put(e) # Initialize the guest manager. self.guest_manager = GuestManager( self.machine.name, self.machine.ip, self.machine.platform, self.task.id, self ) self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager) self.aux.start() # Generate the analysis configuration file. options = self.build_options() # Check if the current task has remotecontrol # enabled before starting the machine. control_enabled = ( config("cuckoo:remotecontrol:enabled") and "remotecontrol" in self.task.options ) if control_enabled: try: machinery.enable_remote_control(self.machine.label) except NotImplementedError: raise CuckooMachineError( "Remote control support has not been implemented " "for this machinery." ) try: unlocked = False self.interface = None # Mark the selected analysis machine in the database as started. guest_log = self.db.guest_start(self.task.id, self.machine.name, self.machine.label, machinery.__class__.__name__) logger( "Starting VM", action="vm.start", status="pending", vmname=self.machine.name ) # Start the machine. machinery.start(self.machine.label, self.task) logger( "Started VM", action="vm.start", status="success", vmname=self.machine.name ) # retrieve the port used for remote control if control_enabled: try: params = machinery.get_remote_control_params( self.machine.label ) self.db.set_machine_rcparams(self.machine.label, params) except NotImplementedError: raise CuckooMachineError( "Remote control support has not been implemented " "for this machinery." ) # Enable network routing. self.route_network() # By the time start returns it will have fully started the Virtual # Machine. We can now safely release the machine lock. machine_lock.release() unlocked = True # Run and manage the components inside the guest unless this # machine has the "noagent" option specified (please refer to the # wait_finish() function for more details on this function). if "noagent" not in self.machine.options: self.guest_manage(options) else: self.wait_finish() succeeded = True except CuckooMachineSnapshotError as e: log.error( "Unable to restore to the snapshot for this Virtual Machine! " "Does your VM have a proper Snapshot and can you revert to it " "manually? VM: %s, error: %s", self.machine.name, e, extra={ "action": "vm.resume", "status": "error", "vmname": self.machine.name, } ) except CuckooMachineError as e: if not unlocked: machine_lock.release() log.error( "Error starting Virtual Machine! VM: %s, error: %s", self.machine.name, e, extra={ "action": "vm.start", "status": "error", "vmname": self.machine.name, } ) except CuckooGuestCriticalTimeout as e: if not unlocked: machine_lock.release() log.error( "Error from machine '%s': it appears that this Virtual " "Machine hasn't been configured properly as the Cuckoo Host " "wasn't able to connect to the Guest. There could be a few " "reasons for this, please refer to our documentation on the " "matter: %s", self.machine.name, faq("troubleshooting-vm-network-configuration"), extra={ "error_action": "vmrouting", "action": "guest.handle", "status": "error", "task_id": self.task.id, } ) except CuckooGuestError as e: if not unlocked: machine_lock.release() log.error("Error from the Cuckoo Guest: %s", e, extra={ "action": "guest.handle", "status": "error", "task_id": self.task.id, }) finally: # Stop Auxiliary modules. self.aux.stop() # Take a memory dump of the machine before shutting it off. if self.cfg.cuckoo.memory_dump or self.task.memory: logger( "Taking full memory dump", action="vm.memdump", status="pending", vmname=self.machine.name ) try: dump_path = os.path.join(self.storage, "memory.dmp") machinery.dump_memory(self.machine.label, dump_path) logger( "Taken full memory dump", action="vm.memdump", status="success", vmname=self.machine.name ) except NotImplementedError: log.error( "The memory dump functionality is not available for " "the current machine manager.", extra={ "action": "vm.memdump", "status": "error", "vmname": self.machine.name, } ) except CuckooMachineError as e: log.error("Machinery error: %s", e, extra={ "action": "vm.memdump", "status": "error", }) logger( "Stopping VM", action="vm.stop", status="pending", vmname=self.machine.name ) try: # Stop the analysis machine. machinery.stop(self.machine.label) except CuckooMachineError as e: log.warning( "Unable to stop machine %s: %s", self.machine.label, e, extra={ "action": "vm.stop", "status": "error", "vmname": self.machine.name, } ) logger( "Stopped VM", action="vm.stop", status="success", vmname=self.machine.name ) # Disable remote control after stopping the machine # if it was enabled for the task. if control_enabled: try: machinery.disable_remote_control(self.machine.label) except NotImplementedError: raise CuckooMachineError( "Remote control support has not been implemented " "for this machinery." ) # Mark the machine in the database as stopped. Unless this machine # has been marked as dead, we just keep it as "started" in the # database so it'll not be used later on in this session. self.db.guest_stop(guest_log) # After all this, we can make the ResultServer forget about the # internal state for this analysis task. ResultServer().del_task(self.task, self.machine) # Drop the network routing rules if any. self.unroute_network() try: # Release the analysis machine. But only if the machine has # not turned dead yet. machinery.release(self.machine.label) except CuckooMachineError as e: log.error( "Unable to release machine %s, reason %s. You might need " "to restore it manually.", self.machine.label, e, extra={ "action": "vm.release", "status": "error", "vmname": self.machine.name, } ) return succeeded
def start_and_wait(self): """Start the analysis by running the auxiliary modules, adding the task to the resultserver, starting the machine and running a guest manager.""" # Set guest status to starting and start analysis machine self.set_analysis_status(Analysis.STARTING) target = self.target.target if self.target.target and self.target.is_file: target = os.path.basename(target) log.info( "Starting analysis (task #%s, options: '%s') type '%s'." " Target: %s '%s'", self.task.id, self.options["options"], self.task.type, self.target.category, target, extra={ "action": "task.init", "status": "starting", "task_id": self.task.id, "target": target, "category": self.target.category, "package": self.task.package, "options": self.options["options"], "custom": self.task.custom, "type": self.task.type }) ResultServer().add_task(self.task.db_task, self.machine, self.rt) # Start auxiliary modules self.aux.start() if self.control_enabled: try: self.machinery.enable_remote_control(self.machine.label) except NotImplementedError: self.control_enabled = False log.exception( "Remote control support has not been implemented " "for machinery %s.", self.machine.manager) # Json log for performance measurement purposes logger("Starting VM", action="vm.start", status="pending", vmname=self.machine.name) try: self.machinery.start(self.machine.label, self.task.db_task) except CuckooMachineSnapshotError as e: log.error( "Unable to restore to the snapshot for this Virtual Machine! " "Does your VM have a proper Snapshot and can you revert to it " "manually? VM: %s, error: %s", self.machine.name, e, extra={ "action": "vm.resume", "status": "error", "vmname": self.machine.name, }) return False except CuckooMachineError as e: log.error("Error starting Virtual Machine! VM: %s, error: %s", self.machine.name, e, extra={ "action": "vm.start", "status": "error", "vmname": self.machine.name, }) return False logger("Started VM", action="vm.start", status="success", vmname=self.machine.name) # retrieve the port used for remote control if self.control_enabled: try: params = self.machinery.get_remote_control_params( self.machine.label) self.db.set_machine_rcparams(self.machine.label, params) except NotImplementedError: log.exception( "Remote control support has not been implemented " "for machinery %s.", self.machine.manager) # Enable network routing self.route.route_network() # By the time start returns it will have fully started the Virtual # Machine. We can now safely release the machine lock. self.release_machine_lock() # Request scheduler action for status 'starting' self.request_scheduler_action(Analysis.STARTING) # Choose the correct way of waiting or managing the agent and # execute it try: self.manage() except CuckooGuestCriticalTimeout as e: log.error( "Error from machine '%s': it appears that this Virtual " "Machine hasn't been configured properly as the Cuckoo Host " "wasn't able to connect to the Guest. There could be a few " "reasons for this, please refer to our documentation on the " "matter: %s", self.machine.name, faq("troubleshooting-vm-network-configuration"), extra={ "error_action": "vmrouting", "action": "guest.handle", "status": "error", "task_id": self.task.id, }) except CuckooGuestError as e: log.error("Error from the Cuckoo Guest: %s", e, extra={ "action": "guest.handle", "status": "error", "task_id": self.task.id, }) return True
def log_something(target, copy_path, task): logger("test message", action="hello.world", status="success")
def launch_analysis(self): """Start analysis.""" succeeded = False if self.task.category == "file" or self.task.category == "archive": target = os.path.basename(self.task.target) else: target = self.task.target log.info("Starting analysis of %s \"%s\" (task #%d, options \"%s\")", self.task.category.upper(), target, self.task.id, emit_options(self.task.options), extra={ "action": "task.init", "status": "starting", "task_id": self.task.id, "target": target, "category": self.task.category, "package": self.task.package, "options": emit_options(self.task.options), "custom": self.task.custom, }) # Initialize the analysis. if not self.init(): logger("Failed to initialize", action="task.init", status="error") return False # Acquire analysis machine. try: self.acquire_machine() except CuckooOperationalError as e: machine_lock.release() log.error("Cannot acquire machine: %s", e, extra={ "action": "vm.acquire", "status": "error", }) return False self.rs_port = self.machine.resultserver_port or ResultServer().port # At this point we can tell the ResultServer about it. try: ResultServer().add_task(self.task, self.machine) except Exception as e: machinery.release(self.machine.label) self.errors.put(e) # Initialize the guest manager. self.guest_manager = GuestManager(self.machine.name, self.machine.ip, self.machine.platform, self.task.id, self) self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager) self.aux.start() # Generate the analysis configuration file. options = self.build_options() # Check if the current task has remotecontrol # enabled before starting the machine. control_enabled = (config("cuckoo:remotecontrol:enabled") and "remotecontrol" in self.task.options) if control_enabled: try: machinery.enable_remote_control(self.machine.label) except NotImplementedError: log.error( "Remote control support has not been implemented for the " "configured machinery module: %s", config("cuckoo:cuckoo:machinery")) try: unlocked = False self.interface = None # Mark the selected analysis machine in the database as started. guest_log = self.db.guest_start(self.task.id, self.machine.name, self.machine.label, machinery.__class__.__name__) logger("Starting VM", action="vm.start", status="pending", vmname=self.machine.name) # Start the machine. machinery.start(self.machine.label, self.task) logger("Started VM", action="vm.start", status="success", vmname=self.machine.name) # retrieve the port used for remote control if control_enabled: try: params = machinery.get_remote_control_params( self.machine.label) self.db.set_machine_rcparams(self.machine.label, params) except NotImplementedError: log.error( "Remote control support has not been implemented for the " "configured machinery module: %s", config("cuckoo:cuckoo:machinery")) # Enable network routing. self.route_network() # By the time start returns it will have fully started the Virtual # Machine. We can now safely release the machine lock. machine_lock.release() unlocked = True # Run and manage the components inside the guest unless this # machine has the "noagent" option specified (please refer to the # wait_finish() function for more details on this function). if "noagent" not in self.machine.options: self.guest_manage(options) else: self.wait_finish() succeeded = True except CuckooMachineSnapshotError as e: log.error( "Unable to restore to the snapshot for this Virtual Machine! " "Does your VM have a proper Snapshot and can you revert to it " "manually? VM: %s, error: %s", self.machine.name, e, extra={ "action": "vm.resume", "status": "error", "vmname": self.machine.name, }) except CuckooMachineError as e: if not unlocked: machine_lock.release() log.error("Error starting Virtual Machine! VM: %s, error: %s", self.machine.name, e, extra={ "action": "vm.start", "status": "error", "vmname": self.machine.name, }) except CuckooGuestCriticalTimeout as e: if not unlocked: machine_lock.release() log.error( "Error from machine '%s': it appears that this Virtual " "Machine hasn't been configured properly as the Cuckoo Host " "wasn't able to connect to the Guest. There could be a few " "reasons for this, please refer to our documentation on the " "matter: %s", self.machine.name, faq("troubleshooting-vm-network-configuration"), extra={ "error_action": "vmrouting", "action": "guest.handle", "status": "error", "task_id": self.task.id, }) except CuckooGuestError as e: if not unlocked: machine_lock.release() log.error("Error from the Cuckoo Guest: %s", e, extra={ "action": "guest.handle", "status": "error", "task_id": self.task.id, }) finally: # Stop Auxiliary modules. if not self.stopped_aux: self.stopped_aux = True self.aux.stop() # Take a memory dump of the machine before shutting it off. if self.cfg.cuckoo.memory_dump or self.task.memory: logger("Taking full memory dump", action="vm.memdump", status="pending", vmname=self.machine.name) try: dump_path = os.path.join(self.storage, "memory.dmp") machinery.dump_memory(self.machine.label, dump_path) logger("Taken full memory dump", action="vm.memdump", status="success", vmname=self.machine.name) except NotImplementedError: log.error( "The memory dump functionality is not available for " "the current machine manager.", extra={ "action": "vm.memdump", "status": "error", "vmname": self.machine.name, }) except CuckooMachineError as e: log.error("Machinery error: %s", e, extra={ "action": "vm.memdump", "status": "error", }) logger("Stopping VM", action="vm.stop", status="pending", vmname=self.machine.name) try: # Stop the analysis machine. machinery.stop(self.machine.label) except CuckooMachineError as e: log.warning("Unable to stop machine %s: %s", self.machine.label, e, extra={ "action": "vm.stop", "status": "error", "vmname": self.machine.name, }) logger("Stopped VM", action="vm.stop", status="success", vmname=self.machine.name) # Disable remote control after stopping the machine # if it was enabled for the task. if control_enabled: try: machinery.disable_remote_control(self.machine.label) except NotImplementedError: log.error( "Remote control support has not been implemented for the " "configured machinery module: %s", config("cuckoo:cuckoo:machinery")) # Mark the machine in the database as stopped. Unless this machine # has been marked as dead, we just keep it as "started" in the # database so it'll not be used later on in this session. self.db.guest_stop(guest_log) # After all this, we can make the ResultServer forget about the # internal state for this analysis task. ResultServer().del_task(self.task, self.machine) # Drop the network routing rules if any. if not self.unrouted_network: self.unroute_network() try: # Release the analysis machine. But only if the machine has # not turned dead yet. machinery.release(self.machine.label) except CuckooMachineError as e: log.error( "Unable to release machine %s, reason %s. You might need " "to restore it manually.", self.machine.label, e, extra={ "action": "vm.release", "status": "error", "vmname": self.machine.name, }) return succeeded
def start(self): """Start scheduler.""" self.initialize() log.info("Waiting for analysis tasks.") # Message queue with threads to transmit exceptions (used as IPC). errors = Queue.Queue() # Command-line overrides the configuration file. if self.maxcount is None: self.maxcount = self.cfg.cuckoo.max_analysis_count launchedAnalysis = True # This loop runs forever. while self.running: if not launchedAnalysis: time.sleep(1) launchedAnalysis = False # Run cleanup on finished analysis managers and untrack them for am in self._cleanup_managers(): self.analysis_managers.discard(am) # Wait until the machine lock is not locked. This is only the case # when all machines are fully running, rather that about to start # or still busy starting. This way we won't have race conditions # with finding out there are no available machines in the analysis # manager or having two analyses pick the same machine. if not machine_lock.acquire(False): logger("Could not acquire machine lock", action="scheduler.machine_lock", status="busy") continue machine_lock.release() # If not enough free disk space is available, then we print an # error message and wait another round (this check is ignored # when the freespace configuration variable is set to zero). if self.cfg.cuckoo.freespace: # Resolve the full base path to the analysis folder, just in # case somebody decides to make a symbolic link out of it. dir_path = cwd("storage", "analyses") # TODO: Windows support if hasattr(os, "statvfs"): dir_stats = os.statvfs(dir_path.encode("utf8")) # Calculate the free disk space in megabytes. space_available = dir_stats.f_bavail * dir_stats.f_frsize space_available /= 1024 * 1024 if space_available < self.cfg.cuckoo.freespace: log.error("Not enough free disk space! (Only %d MB!)", space_available, extra={ "action": "scheduler.diskspace", "status": "error", "available": space_available, }) continue # If we have limited the number of concurrently executing machines, # are we currently at the maximum? maxvm = self.cfg.cuckoo.max_machines_count if maxvm and len(machinery.running()) >= maxvm: logger("Already maxed out on running machines", action="scheduler.machines", status="maxed") continue # If no machines are available, it's pointless to fetch for # pending tasks. Loop over. if not machinery.availables(): logger("No available machines", action="scheduler.machines", status="none") continue # Exits if max_analysis_count is defined in the configuration # file and has been reached. if self.maxcount and self.total_analysis_count >= self.maxcount: if active_analysis_count <= 0: log.debug("Reached max analysis count, exiting.", extra={ "action": "scheduler.max_analysis", "status": "success", "limit": self.total_analysis_count, }) self.stop() else: logger( "Maximum analyses hit, awaiting active to finish off", action="scheduler.max_analysis", status="busy", active=active_analysis_count) continue # Fetch a pending analysis task. # TODO This fixes only submissions by --machine, need to add # other attributes (tags etc). # TODO We should probably move the entire "acquire machine" logic # from the Analysis Manager to the Scheduler and then pass the # selected machine onto the Analysis Manager instance. task, available = None, False for machine in self.db.get_available_machines(): task = self.db.fetch(machine=machine.name) if task: break if machine.is_analysis(): available = True # We only fetch a new task if at least one of the available # machines is not a "service" machine (again, please refer to the # services auxiliary module for more information on service VMs). if not task and available: task = self.db.fetch(service=False) if task: start = time.clock() self.total_analysis_count += 1 # Initialize and start the analysis manager. analysis = AnalysisManager(task.id, errors) analysis.daemon = True analysis.start() self.analysis_managers.add(analysis) launchedAnalysis = True log.debug("Processing task #%s Call Duration %ds", task.id, time.clock() - start) # Deal with errors. try: raise errors.get(block=False) except Queue.Empty: pass log.debug("End of analyses.")
def stop_and_wait(self): """Stop the analysis by stopping the aux modules, optionally dumping VM memory, stopping the VM and deleting the task from the resultserver.""" self.set_analysis_status(Analysis.STOPPING) # Stop all Auxiliary modules self.aux.stop() # If enabled, make a full memory dump of the machine # before it shuts down if config("cuckoo:cuckoo:memory_dump") or self.task.memory: logger("Taking full memory dump", action="vm.memdump", status="pending", vmname=self.machine.name) try: dump_path = os.path.join(self.task.path, "memory.dmp") self.machinery.dump_memory(self.machine.label, dump_path) logger("Taken full memory dump", action="vm.memdump", status="success", vmname=self.machine.name) except NotImplementedError: log.error( "The memory dump functionality is not available for " "the current machine manager.", extra={ "action": "vm.memdump", "status": "error", "vmname": self.machine.name, }) except CuckooMachineError as e: log.error("Machinery error: %s", e, extra={ "action": "vm.memdump", "status": "error", }) logger("Stopping VM", action="vm.stop", status="pending", vmname=self.machine.name) # Stop the analysis machine. try: self.machinery.stop(self.machine.label) except CuckooMachineError as e: log.warning("Unable to stop machine %s: %s", self.machine.label, e, extra={ "action": "vm.stop", "status": "error", "vmname": self.machine.name, }) logger("Stopped VM", action="vm.stop", status="success", vmname=self.machine.name) # Disable remote control after stopping the machine # if it was enabled for the task. if self.control_enabled: try: self.machinery.disable_remote_control(self.machine.label) except NotImplementedError: log.exception( "Remote control support has not been implemented " "for machinery %s.", self.machine.manager) # After all this, we can make the ResultServer forget about the # internal state for this analysis task. ResultServer().del_task(self.task.db_task, self.machine) # Drop the network routing rules if any. self.route.unroute_network()
def start(self): """Start scheduler.""" self.initialize() log.info("Waiting for analysis tasks.") # Message queue with threads to transmit exceptions (used as IPC). errors = Queue.Queue() # Command-line overrides the configuration file. if self.maxcount is None: self.maxcount = self.cfg.cuckoo.max_analysis_count # This loop runs forever. while self.running: time.sleep(1) # Wait until the machine lock is not locked. This is only the case # when all machines are fully running, rather that about to start # or still busy starting. This way we won't have race conditions # with finding out there are no available machines in the analysis # manager or having two analyses pick the same machine. if not machine_lock.acquire(False): logger( "Could not acquire machine lock", action="scheduler.machine_lock", status="busy" ) continue machine_lock.release() # If not enough free disk space is available, then we print an # error message and wait another round (this check is ignored # when the freespace configuration variable is set to zero). if self.cfg.cuckoo.freespace: # Resolve the full base path to the analysis folder, just in # case somebody decides to make a symbolic link out of it. dir_path = cwd("storage", "analyses") # TODO: Windows support if hasattr(os, "statvfs"): dir_stats = os.statvfs(dir_path.encode("utf8")) # Calculate the free disk space in megabytes. space_available = dir_stats.f_bavail * dir_stats.f_frsize space_available /= 1024 * 1024 if space_available < self.cfg.cuckoo.freespace: log.error( "Not enough free disk space! (Only %d MB!)", space_available, extra={ "action": "scheduler.diskspace", "status": "error", "available": space_available, } ) continue # If we have limited the number of concurrently executing machines, # are we currently at the maximum? maxvm = self.cfg.cuckoo.max_machines_count if maxvm and len(machinery.running()) >= maxvm: logger( "Already maxed out on running machines", action="scheduler.machines", status="maxed" ) continue # If no machines are available, it's pointless to fetch for # pending tasks. Loop over. if not machinery.availables(): logger( "No available machines", action="scheduler.machines", status="none" ) continue # Exits if max_analysis_count is defined in the configuration # file and has been reached. if self.maxcount and self.total_analysis_count >= self.maxcount: if active_analysis_count <= 0: log.debug("Reached max analysis count, exiting.", extra={ "action": "scheduler.max_analysis", "status": "success", "limit": self.total_analysis_count, }) self.stop() else: logger( "Maximum analyses hit, awaiting active to finish off", action="scheduler.max_analysis", status="busy", active=active_analysis_count ) continue # Fetch a pending analysis task. # TODO This fixes only submissions by --machine, need to add # other attributes (tags etc). # TODO We should probably move the entire "acquire machine" logic # from the Analysis Manager to the Scheduler and then pass the # selected machine onto the Analysis Manager instance. task, available = None, False for machine in self.db.get_available_machines(): task = self.db.fetch(machine=machine.name) if task: break if machine.is_analysis(): available = True # We only fetch a new task if at least one of the available # machines is not a "service" machine (again, please refer to the # services auxiliary module for more information on service VMs). if not task and available: task = self.db.fetch(service=False) if task: log.debug("Processing task #%s", task.id) self.total_analysis_count += 1 # Initialize and start the analysis manager. analysis = AnalysisManager(task.id, errors) analysis.daemon = True analysis.start() # Deal with errors. try: raise errors.get(block=False) except Queue.Empty: pass log.debug("End of analyses.")
def process_tasks(instance, maxcount): logger("foo bar", action="hello.world", status="success")