def start(self): """Start scheduler.""" self.initialize() log.info("Waiting for analysis tasks.") # To handle stop analyzing when we need to restart process without break tasks signal.signal(signal.SIGHUP, self.set_stop_analyzing) # Message queue with threads to transmit exceptions (used as IPC). errors = queue.Queue() # Command-line overrides the configuration file. if self.maxcount is None: self.maxcount = self.cfg.cuckoo.max_analysis_count # This loop runs forever. while self.running: time.sleep(1) # Wait until the machine lock is not locked. This is only the case # when all machines are fully running, rather that about to start # or still busy starting. This way we won't have race conditions # with finding out there are no available machines in the analysis # manager or having two analyses pick the same machine. if not machine_lock.acquire(False): continue machine_lock.release() # If not enough free disk space is available, then we print an # error message and wait another round (this check is ignored # when the freespace configuration variable is set to zero). if self.cfg.cuckoo.freespace: # Resolve the full base path to the analysis folder, just in # case somebody decides to make a symbolic link out of it. dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses") need_space, space_available = free_space_monitor( dir_path, return_value=True) if need_space: log.error("Not enough free disk space! (Only %d MB!)", space_available) continue # Have we limited the number of concurrently executing machines? if self.cfg.cuckoo.max_machines_count > 0: # Are too many running? if len(machinery.running() ) >= self.cfg.cuckoo.max_machines_count: continue # If no machines are available, it's pointless to fetch for # pending tasks. Loop over. if not machinery.availables(): continue # Exits if max_analysis_count is defined in the configuration # file and has been reached. if self.maxcount and self.total_analysis_count >= self.maxcount: if active_analysis_count <= 0: self.stop() else: # Fetch a pending analysis task. # TODO: this fixes only submissions by --machine, need to add other attributes (tags etc.) for machine in self.db.get_available_machines(): task = self.db.fetch(machine=machine.name) if task: break else: task = self.db.fetch() if task: log.debug("Task #{0}: Processing task".format(task.id)) self.total_analysis_count += 1 # Initialize and start the analysis manager. analysis = AnalysisManager(task, errors) analysis.daemon = True analysis.start() # Deal with errors. try: raise errors.get(block=False) except queue.Empty: pass
def launch_analysis(self): """Start analysis.""" succeeded = False dead_machine = False self.socks5s = _load_socks5_operational() log.info("Task #{0}: Starting analysis of {1} '{2}'".format( self.task.id, self.task.category.upper(), convert_to_printable(self.task.target))) # Initialize the analysis folders. if not self.init_storage(): log.debug("Failed to initialize the analysis folder") return False if self.task.category in ["file", "pcap", "static"]: sha256 = File(self.task.target).get_sha256() # Check whether the file has been changed for some unknown reason. # And fail this analysis if it has been modified. if not self.check_file(sha256): return False # Store a copy of the original file. if not self.store_file(sha256): return False if self.task.category in ("pcap", "static"): if self.task.category == "pcap": if hasattr(os, "symlink"): os.symlink(self.binary, os.path.join(self.storage, "dump.pcap")) else: shutil.copy(self.binary, os.path.join(self.storage, "dump.pcap")) # create the logs/files directories as # normally the resultserver would do it dirnames = ["logs", "files", "aux"] for dirname in dirnames: try: os.makedirs(os.path.join(self.storage, dirname)) except: pass return True # Acquire analysis machine. try: self.acquire_machine() self.db.set_task_vm(self.task.id, self.machine.label, self.machine.id) # At this point we can tell the ResultServer about it. except CuckooOperationalError as e: machine_lock.release() log.error("Task #{0}: Cannot acquire machine: {1}".format( self.task.id, e), exc_info=True) return False # Generate the analysis configuration file. options = self.build_options() try: ResultServer().add_task(self.task, self.machine) except Exception as e: machinery.release(self.machine.label) log.exception(e, exc_info=True) self.errors.put(e) aux = RunAuxiliary(task=self.task, machine=self.machine) try: unlocked = False # Mark the selected analysis machine in the database as started. guest_log = self.db.guest_start(self.task.id, self.machine.name, self.machine.label, machinery.__class__.__name__) # Start the machine. machinery.start(self.machine.label) # Enable network routing. self.route_network() # By the time start returns it will have fully started the Virtual # Machine. We can now safely release the machine lock. machine_lock.release() unlocked = True aux.start() # Initialize the guest manager. guest = GuestManager(self.machine.name, self.machine.ip, self.machine.platform, self.task.id, self) options["clock"] = self.db.update_clock(self.task.id) self.db.guest_set_status(self.task.id, "starting") # Start the analysis. guest.start_analysis(options) if self.db.guest_get_status(self.task.id) == "starting": self.db.guest_set_status(self.task.id, "running") guest.wait_for_completion() self.db.guest_set_status(self.task.id, "stopping") succeeded = True except CuckooMachineError as e: if not unlocked: machine_lock.release() log.error(str(e), extra={"task_id": self.task.id}, exc_info=True) dead_machine = True except CuckooGuestError as e: if not unlocked: machine_lock.release() log.error(str(e), extra={"task_id": self.task.id}, exc_info=True) finally: # Stop Auxiliary modules. aux.stop() # Take a memory dump of the machine before shutting it off. if self.cfg.cuckoo.memory_dump or self.task.memory: try: dump_path = get_memdump_path(self.task.id) need_space, space_available = free_space_monitor( os.path.dirname(dump_path), return_value=True) if need_space: log.error( "Not enough free disk space! Could not dump ram (Only %d MB!)", space_available) else: machinery.dump_memory(self.machine.label, dump_path) except NotImplementedError: log.error("The memory dump functionality is not available " "for the current machine manager.") except CuckooMachineError as e: log.error(e, exc_info=True) try: # Stop the analysis machine. machinery.stop(self.machine.label) except CuckooMachineError as e: log.warning( "Task #{0}: Unable to stop machine {1}: {2}".format( self.task.id, self.machine.label, e)) # Mark the machine in the database as stopped. Unless this machine # has been marked as dead, we just keep it as "started" in the # database so it'll not be used later on in this session. self.db.guest_stop(guest_log) # After all this, we can make the ResultServer forget about the # internal state for this analysis task. ResultServer().del_task(self.task, self.machine) # Drop the network routing rules if any. self.unroute_network() if dead_machine: # Remove the guest from the database, so that we can assign a # new guest when the task is being analyzed with another # machine. self.db.guest_remove(guest_log) # Remove the analysis directory that has been created so # far, as launch_analysis() is going to be doing that again. shutil.rmtree(self.storage) # This machine has turned dead, so we throw an exception here # which informs the AnalysisManager that it should analyze # this task again with another available machine. raise CuckooDeadMachine() try: # Release the analysis machine. But only if the machine has # not turned dead yet. machinery.release(self.machine.label) except CuckooMachineError as e: log.error("Task #{0}: Unable to release machine {1}, reason " "{2}. You might need to restore it manually.".format( self.task.id, self.machine.label, e)) return succeeded
def launch_analysis(self): """Start analysis.""" succeeded = False dead_machine = False self.socks5s = _load_socks5_operational() log.info( "Task #%s: Starting analysis of %s '%s'", self.task.id, self.task.category.upper(), convert_to_printable(self.task.target), ) # Initialize the analysis folders. if not self.init_storage(): log.debug("Failed to initialize the analysis folder") return False category_early_escape = self.category_checks() if isinstance(category_early_escape, bool): return category_early_escape # Acquire analysis machine. try: self.acquire_machine() self.db.set_task_vm(self.task.id, self.machine.label, self.machine.id) # At this point we can tell the ResultServer about it. except CuckooOperationalError as e: machine_lock.release() log.error("Task #%s: Cannot acquire machine: %s", self.task.id, e, exc_info=True) return False # Generate the analysis configuration file. options = self.build_options() try: ResultServer().add_task(self.task, self.machine) except Exception as e: machinery.release(self.machine.label) log.exception(e, exc_info=True) self.errors.put(e) aux = RunAuxiliary(task=self.task, machine=self.machine) try: unlocked = False # Mark the selected analysis machine in the database as started. guest_log = self.db.guest_start(self.task.id, self.machine.name, self.machine.label, machinery.__class__.__name__) # Start the machine. machinery.start(self.machine.label) # Enable network routing. self.route_network() # By the time start returns it will have fully started the Virtual # Machine. We can now safely release the machine lock. machine_lock.release() unlocked = True aux.start() # Initialize the guest manager. guest = GuestManager(self.machine.name, self.machine.ip, self.machine.platform, self.task.id, self) options["clock"] = self.db.update_clock(self.task.id) self.db.guest_set_status(self.task.id, "starting") # Start the analysis. guest.start_analysis(options) if self.db.guest_get_status(self.task.id) == "starting": self.db.guest_set_status(self.task.id, "running") guest.wait_for_completion() self.db.guest_set_status(self.task.id, "stopping") succeeded = True except (CuckooMachineError, CuckooNetworkError) as e: if not unlocked: machine_lock.release() log.error(str(e), extra={"task_id": self.task.id}, exc_info=True) dead_machine = True except CuckooGuestError as e: if not unlocked: machine_lock.release() log.error(str(e), extra={"task_id": self.task.id}, exc_info=True) finally: # Stop Auxiliary modules. aux.stop() # Take a memory dump of the machine before shutting it off. if self.cfg.cuckoo.memory_dump or self.task.memory: try: dump_path = get_memdump_path(self.task.id) need_space, space_available = free_space_monitor(os.path.dirname(dump_path), return_value=True) if need_space: log.error("Not enough free disk space! Could not dump ram (Only %d MB!)", space_available) else: machinery.dump_memory(self.machine.label, dump_path) except NotImplementedError: log.error("The memory dump functionality is not available for the current machine manager") except CuckooMachineError as e: log.error(e, exc_info=True) try: # Stop the analysis machine. machinery.stop(self.machine.label) except CuckooMachineError as e: log.warning("Task #%s: Unable to stop machine %s: %s", self.task.id, self.machine.label, e) # Mark the machine in the database as stopped. Unless this machine # has been marked as dead, we just keep it as "started" in the # database so it'll not be used later on in this session. self.db.guest_stop(guest_log) # After all this, we can make the ResultServer forget about the # internal state for this analysis task. ResultServer().del_task(self.task, self.machine) # Drop the network routing rules if any. self.unroute_network() if dead_machine: # Remove the guest from the database, so that we can assign a # new guest when the task is being analyzed with another # machine. self.db.guest_remove(guest_log) # Remove the analysis directory that has been created so # far, as launch_analysis() is going to be doing that again. shutil.rmtree(self.storage) # This machine has turned dead, so we throw an exception here # which informs the AnalysisManager that it should analyze # this task again with another available machine. raise CuckooDeadMachine() try: # Release the analysis machine. But only if the machine has # not turned dead yet. machinery.release(self.machine.label) except CuckooMachineError as e: log.error( "Task #%s: Unable to release machine %s, reason %s. You might need to restore it manually", self.task.id, self.machine.label, e, ) return succeeded
def test_free_space_monitor(mocker): # Will not enter main loop utils.free_space_monitor(return_value=True)
def autoprocess(parallel=1, failed_processing=False, maxtasksperchild=7, memory_debugging=False, processing_timeout=300): maxcount = cfg.cuckoo.max_analysis_count count = 0 db = Database() # pool = multiprocessing.Pool(parallel, init_worker) try: memory_limit() log.info("Processing analysis data") with pebble.ProcessPool(max_workers=parallel, max_tasks=maxtasksperchild, initializer=init_worker) as pool: # CAUTION - big ugly loop ahead. while count < maxcount or not maxcount: # If not enough free disk space is available, then we print an # error message and wait another round (this check is ignored # when the freespace configuration variable is set to zero). if cfg.cuckoo.freespace: # Resolve the full base path to the analysis folder, just in # case somebody decides to make a symbolic link out of it. dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses") need_space, space_available = free_space_monitor(dir_path, return_value=True, processing=True) if need_space: log.error( "Not enough free disk space! (Only %d MB!). You can change limits it in cuckoo.conf -> freespace", space_available, ) time.sleep(60) continue # If still full, don't add more (necessary despite pool). if len(pending_task_id_map) >= parallel: time.sleep(5) continue if failed_processing: tasks = db.list_tasks(status=TASK_FAILED_PROCESSING, limit=parallel, order_by=Task.completed_on.asc()) else: tasks = db.list_tasks(status=TASK_COMPLETED, limit=parallel, order_by=Task.completed_on.asc()) added = False # For loop to add only one, nice. (reason is that we shouldn't overshoot maxcount) for task in tasks: # Not-so-efficient lock. if pending_task_id_map.get(task.id): continue log.info("Processing analysis data for Task #%d", task.id) if task.category != "url": sample = db.view_sample(task.sample_id) copy_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", str(task.id), sample.sha256) else: copy_path = None args = task.target, copy_path kwargs = dict(report=True, auto=True, task=task, memory_debugging=memory_debugging) if memory_debugging: gc.collect() log.info("[%d] (before) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage)) # result = pool.apply_async(process, args, kwargs) future = pool.schedule(process, args, kwargs, timeout=processing_timeout) pending_future_map[future] = task.id pending_task_id_map[task.id] = future future.add_done_callback(processing_finished) if memory_debugging: gc.collect() log.info("[%d] (after) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage)) count += 1 added = True copy_origin_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample.sha256) if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_origin_path): os.unlink(copy_origin_path) break if not added: # don't hog cpu time.sleep(5) except KeyboardInterrupt: # ToDo verify in finally # pool.terminate() raise except MemoryError: mem = get_memory() / 1024 / 1024 print("Remain: %.2f GB" % mem) sys.stderr.write("\n\nERROR: Memory Exception\n") sys.exit(1) except Exception as e: import traceback traceback.print_exc() finally: pool.close() pool.join()
def start(self): """Start scheduler.""" self.initialize() log.info("Waiting for analysis tasks") # To handle stop analyzing when we need to restart process without break tasks signal.signal(signal.SIGHUP, self.set_stop_analyzing) # Message queue with threads to transmit exceptions (used as IPC). errors = queue.Queue() # Command-line overrides the configuration file. if self.maxcount is None: self.maxcount = self.cfg.cuckoo.max_analysis_count # Start the logger which grabs database information if self.cfg.cuckoo.periodic_log: self._thr_periodic_log() # This loop runs forever. while self.running: time.sleep(1) # Wait until the machine lock is not locked. This is only the case # when all machines are fully running, rather that about to start # or still busy starting. This way we won't have race conditions # with finding out there are no available machines in the analysis # manager or having two analyses pick the same machine. if self.categories_need_VM: if not machine_lock.acquire(False): continue machine_lock.release() # If not enough free disk space is available, then we print an # error message and wait another round (this check is ignored # when the freespace configuration variable is set to zero). if self.cfg.cuckoo.freespace: # Resolve the full base path to the analysis folder, just in # case somebody decides to make a symbolic link out of it. dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses") need_space, space_available = free_space_monitor( dir_path, return_value=True, analysis=True) if need_space: log.error( "Not enough free disk space! (Only %d MB!). You can change limits it in cuckoo.conf -> freespace", space_available, ) continue # Have we limited the number of concurrently executing machines? if self.cfg.cuckoo.max_machines_count > 0 and self.categories_need_VM: # Are too many running? if len(machinery.running() ) >= self.cfg.cuckoo.max_machines_count: continue # If no machines are available, it's pointless to fetch for pending tasks. Loop over. # But if we analyze pcaps/static only it's fine # ToDo verify that it works with static and file/url if self.categories_need_VM and not machinery.availables(): continue # Exits if max_analysis_count is defined in the configuration # file and has been reached. if self.maxcount and self.total_analysis_count >= self.maxcount: if active_analysis_count <= 0: self.stop() else: if self.categories_need_VM: # First things first, are there pending tasks? if not self.db.count_tasks(status=TASK_PENDING): continue relevant_machine_is_available = False # There are? Great, let's get them, ordered by priority and then oldest to newest for task in self.db.list_tasks( status=TASK_PENDING, order_by=(Task.priority.desc(), Task.added_on), options_not_like="node="): relevant_machine_is_available = self.db.is_relevant_machine_available( task) if relevant_machine_is_available: break if not relevant_machine_is_available: task = None else: task = self.db.view_task(task.id) else: task = self.db.fetch_task(self.analyzing_categories) if task: log.debug("Task #%s: Processing task", task.id) self.total_analysis_count += 1 # Initialize and start the analysis manager. analysis = AnalysisManager(task, errors) analysis.daemon = True analysis.start() # Deal with errors. try: raise errors.get(block=False) except queue.Empty: pass