Example #1
0
    def start(self):
        """Start scheduler."""
        self.initialize()

        log.info("Waiting for analysis tasks.")

        # To handle stop analyzing when we need to restart process without break tasks
        signal.signal(signal.SIGHUP, self.set_stop_analyzing)

        # Message queue with threads to transmit exceptions (used as IPC).
        errors = queue.Queue()

        # Command-line overrides the configuration file.
        if self.maxcount is None:
            self.maxcount = self.cfg.cuckoo.max_analysis_count

        # This loop runs forever.
        while self.running:
            time.sleep(1)

            # Wait until the machine lock is not locked. This is only the case
            # when all machines are fully running, rather that about to start
            # or still busy starting. This way we won't have race conditions
            # with finding out there are no available machines in the analysis
            # manager or having two analyses pick the same machine.
            if not machine_lock.acquire(False):
                continue

            machine_lock.release()

            # If not enough free disk space is available, then we print an
            # error message and wait another round (this check is ignored
            # when the freespace configuration variable is set to zero).
            if self.cfg.cuckoo.freespace:
                # Resolve the full base path to the analysis folder, just in
                # case somebody decides to make a symbolic link out of it.
                dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses")
                need_space, space_available = free_space_monitor(
                    dir_path, return_value=True)
                if need_space:
                    log.error("Not enough free disk space! (Only %d MB!)",
                              space_available)
                    continue

            # Have we limited the number of concurrently executing machines?
            if self.cfg.cuckoo.max_machines_count > 0:
                # Are too many running?
                if len(machinery.running()
                       ) >= self.cfg.cuckoo.max_machines_count:
                    continue

            # If no machines are available, it's pointless to fetch for
            # pending tasks. Loop over.
            if not machinery.availables():
                continue

            # Exits if max_analysis_count is defined in the configuration
            # file and has been reached.
            if self.maxcount and self.total_analysis_count >= self.maxcount:
                if active_analysis_count <= 0:
                    self.stop()
            else:
                # Fetch a pending analysis task.
                # TODO: this fixes only submissions by --machine, need to add other attributes (tags etc.)
                for machine in self.db.get_available_machines():
                    task = self.db.fetch(machine=machine.name)
                    if task:
                        break
                else:
                    task = self.db.fetch()
                if task:
                    log.debug("Task #{0}: Processing task".format(task.id))
                    self.total_analysis_count += 1
                    # Initialize and start the analysis manager.
                    analysis = AnalysisManager(task, errors)
                    analysis.daemon = True
                    analysis.start()

            # Deal with errors.
            try:
                raise errors.get(block=False)
            except queue.Empty:
                pass
Example #2
0
    def launch_analysis(self):
        """Start analysis."""
        succeeded = False
        dead_machine = False
        self.socks5s = _load_socks5_operational()

        log.info("Task #{0}: Starting analysis of {1} '{2}'".format(
            self.task.id, self.task.category.upper(),
            convert_to_printable(self.task.target)))

        # Initialize the analysis folders.
        if not self.init_storage():
            log.debug("Failed to initialize the analysis folder")
            return False

        if self.task.category in ["file", "pcap", "static"]:
            sha256 = File(self.task.target).get_sha256()
            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            if not self.check_file(sha256):
                return False

            # Store a copy of the original file.
            if not self.store_file(sha256):
                return False

        if self.task.category in ("pcap", "static"):
            if self.task.category == "pcap":
                if hasattr(os, "symlink"):
                    os.symlink(self.binary,
                               os.path.join(self.storage, "dump.pcap"))
                else:
                    shutil.copy(self.binary,
                                os.path.join(self.storage, "dump.pcap"))
            # create the logs/files directories as
            # normally the resultserver would do it
            dirnames = ["logs", "files", "aux"]
            for dirname in dirnames:
                try:
                    os.makedirs(os.path.join(self.storage, dirname))
                except:
                    pass
            return True

        # Acquire analysis machine.
        try:
            self.acquire_machine()
            self.db.set_task_vm(self.task.id, self.machine.label,
                                self.machine.id)
        # At this point we can tell the ResultServer about it.
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Task #{0}: Cannot acquire machine: {1}".format(
                self.task.id, e),
                      exc_info=True)
            return False

        # Generate the analysis configuration file.
        options = self.build_options()

        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            log.exception(e, exc_info=True)
            self.errors.put(e)

        aux = RunAuxiliary(task=self.task, machine=self.machine)

        try:
            unlocked = False

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id, self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            # Start the machine.
            machinery.start(self.machine.label)

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            aux.start()

            # Initialize the guest manager.
            guest = GuestManager(self.machine.name, self.machine.ip,
                                 self.machine.platform, self.task.id, self)

            options["clock"] = self.db.update_clock(self.task.id)
            self.db.guest_set_status(self.task.id, "starting")
            # Start the analysis.
            guest.start_analysis(options)
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                guest.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")
            succeeded = True
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(str(e), extra={"task_id": self.task.id}, exc_info=True)
            dead_machine = True
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error(str(e), extra={"task_id": self.task.id}, exc_info=True)
        finally:
            # Stop Auxiliary modules.
            aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                try:
                    dump_path = get_memdump_path(self.task.id)
                    need_space, space_available = free_space_monitor(
                        os.path.dirname(dump_path), return_value=True)
                    if need_space:
                        log.error(
                            "Not enough free disk space! Could not dump ram (Only %d MB!)",
                            space_available)
                    else:
                        machinery.dump_memory(self.machine.label, dump_path)
                except NotImplementedError:
                    log.error("The memory dump functionality is not available "
                              "for the current machine manager.")

                except CuckooMachineError as e:
                    log.error(e, exc_info=True)

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)

            except CuckooMachineError as e:
                log.warning(
                    "Task #{0}: Unable to stop machine {1}: {2}".format(
                        self.task.id, self.machine.label, e))

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            if dead_machine:
                # Remove the guest from the database, so that we can assign a
                # new guest when the task is being analyzed with another
                # machine.
                self.db.guest_remove(guest_log)

                # Remove the analysis directory that has been created so
                # far, as launch_analysis() is going to be doing that again.
                shutil.rmtree(self.storage)

                # This machine has turned dead, so we throw an exception here
                # which informs the AnalysisManager that it should analyze
                # this task again with another available machine.
                raise CuckooDeadMachine()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)

            except CuckooMachineError as e:
                log.error("Task #{0}: Unable to release machine {1}, reason "
                          "{2}. You might need to restore it manually.".format(
                              self.task.id, self.machine.label, e))

        return succeeded
Example #3
0
    def launch_analysis(self):
        """Start analysis."""
        succeeded = False
        dead_machine = False
        self.socks5s = _load_socks5_operational()

        log.info(
            "Task #%s: Starting analysis of %s '%s'",
            self.task.id,
            self.task.category.upper(),
            convert_to_printable(self.task.target),
        )

        # Initialize the analysis folders.
        if not self.init_storage():
            log.debug("Failed to initialize the analysis folder")
            return False

        category_early_escape = self.category_checks()
        if isinstance(category_early_escape, bool):
            return category_early_escape

        # Acquire analysis machine.
        try:
            self.acquire_machine()
            self.db.set_task_vm(self.task.id, self.machine.label, self.machine.id)
        # At this point we can tell the ResultServer about it.
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Task #%s: Cannot acquire machine: %s", self.task.id, e, exc_info=True)
            return False

        # Generate the analysis configuration file.
        options = self.build_options()

        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            log.exception(e, exc_info=True)
            self.errors.put(e)

        aux = RunAuxiliary(task=self.task, machine=self.machine)

        try:
            unlocked = False

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id, self.machine.name, self.machine.label, machinery.__class__.__name__)
            # Start the machine.
            machinery.start(self.machine.label)

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            aux.start()

            # Initialize the guest manager.
            guest = GuestManager(self.machine.name, self.machine.ip, self.machine.platform, self.task.id, self)

            options["clock"] = self.db.update_clock(self.task.id)
            self.db.guest_set_status(self.task.id, "starting")
            # Start the analysis.
            guest.start_analysis(options)
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                guest.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")
            succeeded = True
        except (CuckooMachineError, CuckooNetworkError) as e:
            if not unlocked:
                machine_lock.release()
            log.error(str(e), extra={"task_id": self.task.id}, exc_info=True)
            dead_machine = True
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error(str(e), extra={"task_id": self.task.id}, exc_info=True)
        finally:
            # Stop Auxiliary modules.
            aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                try:
                    dump_path = get_memdump_path(self.task.id)
                    need_space, space_available = free_space_monitor(os.path.dirname(dump_path), return_value=True)
                    if need_space:
                        log.error("Not enough free disk space! Could not dump ram (Only %d MB!)", space_available)
                    else:
                        machinery.dump_memory(self.machine.label, dump_path)
                except NotImplementedError:
                    log.error("The memory dump functionality is not available for the current machine manager")

                except CuckooMachineError as e:
                    log.error(e, exc_info=True)

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)

            except CuckooMachineError as e:
                log.warning("Task #%s: Unable to stop machine %s: %s", self.task.id, self.machine.label, e)

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            if dead_machine:
                # Remove the guest from the database, so that we can assign a
                # new guest when the task is being analyzed with another
                # machine.
                self.db.guest_remove(guest_log)

                # Remove the analysis directory that has been created so
                # far, as launch_analysis() is going to be doing that again.
                shutil.rmtree(self.storage)

                # This machine has turned dead, so we throw an exception here
                # which informs the AnalysisManager that it should analyze
                # this task again with another available machine.
                raise CuckooDeadMachine()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)

            except CuckooMachineError as e:
                log.error(
                    "Task #%s: Unable to release machine %s, reason %s. You might need to restore it manually",
                    self.task.id,
                    self.machine.label,
                    e,
                )

        return succeeded
Example #4
0
def test_free_space_monitor(mocker):
    # Will not enter main loop
    utils.free_space_monitor(return_value=True)
Example #5
0
def autoprocess(parallel=1, failed_processing=False, maxtasksperchild=7, memory_debugging=False, processing_timeout=300):
    maxcount = cfg.cuckoo.max_analysis_count
    count = 0
    db = Database()
    # pool = multiprocessing.Pool(parallel, init_worker)
    try:
        memory_limit()
        log.info("Processing analysis data")
        with pebble.ProcessPool(max_workers=parallel, max_tasks=maxtasksperchild, initializer=init_worker) as pool:
            # CAUTION - big ugly loop ahead.
            while count < maxcount or not maxcount:

                # If not enough free disk space is available, then we print an
                # error message and wait another round (this check is ignored
                # when the freespace configuration variable is set to zero).
                if cfg.cuckoo.freespace:
                    # Resolve the full base path to the analysis folder, just in
                    # case somebody decides to make a symbolic link out of it.
                    dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses")
                    need_space, space_available = free_space_monitor(dir_path, return_value=True, processing=True)
                    if need_space:
                        log.error(
                            "Not enough free disk space! (Only %d MB!). You can change limits it in cuckoo.conf -> freespace",
                            space_available,
                        )
                        time.sleep(60)
                        continue

                # If still full, don't add more (necessary despite pool).
                if len(pending_task_id_map) >= parallel:
                    time.sleep(5)
                    continue
                if failed_processing:
                    tasks = db.list_tasks(status=TASK_FAILED_PROCESSING, limit=parallel, order_by=Task.completed_on.asc())
                else:
                    tasks = db.list_tasks(status=TASK_COMPLETED, limit=parallel, order_by=Task.completed_on.asc())
                added = False
                # For loop to add only one, nice. (reason is that we shouldn't overshoot maxcount)
                for task in tasks:
                    # Not-so-efficient lock.
                    if pending_task_id_map.get(task.id):
                        continue
                    log.info("Processing analysis data for Task #%d", task.id)
                    if task.category != "url":
                        sample = db.view_sample(task.sample_id)
                        copy_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", str(task.id), sample.sha256)
                    else:
                        copy_path = None
                    args = task.target, copy_path
                    kwargs = dict(report=True, auto=True, task=task, memory_debugging=memory_debugging)
                    if memory_debugging:
                        gc.collect()
                        log.info("[%d] (before) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage))
                    # result = pool.apply_async(process, args, kwargs)
                    future = pool.schedule(process, args, kwargs, timeout=processing_timeout)
                    pending_future_map[future] = task.id
                    pending_task_id_map[task.id] = future
                    future.add_done_callback(processing_finished)
                    if memory_debugging:
                        gc.collect()
                        log.info("[%d] (after) GC object counts: %d, %d", task.id, len(gc.get_objects()), len(gc.garbage))
                    count += 1
                    added = True
                    copy_origin_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample.sha256)
                    if cfg.cuckoo.delete_bin_copy and os.path.exists(copy_origin_path):
                        os.unlink(copy_origin_path)
                    break
                if not added:
                    # don't hog cpu
                    time.sleep(5)
    except KeyboardInterrupt:
        # ToDo verify in finally
        # pool.terminate()
        raise
    except MemoryError:
        mem = get_memory() / 1024 / 1024
        print("Remain: %.2f GB" % mem)
        sys.stderr.write("\n\nERROR: Memory Exception\n")
        sys.exit(1)
    except Exception as e:
        import traceback

        traceback.print_exc()
    finally:
        pool.close()
        pool.join()
Example #6
0
    def start(self):
        """Start scheduler."""
        self.initialize()

        log.info("Waiting for analysis tasks")

        # To handle stop analyzing when we need to restart process without break tasks
        signal.signal(signal.SIGHUP, self.set_stop_analyzing)

        # Message queue with threads to transmit exceptions (used as IPC).
        errors = queue.Queue()

        # Command-line overrides the configuration file.
        if self.maxcount is None:
            self.maxcount = self.cfg.cuckoo.max_analysis_count

        # Start the logger which grabs database information
        if self.cfg.cuckoo.periodic_log:
            self._thr_periodic_log()

        # This loop runs forever.
        while self.running:
            time.sleep(1)
            # Wait until the machine lock is not locked. This is only the case
            # when all machines are fully running, rather that about to start
            # or still busy starting. This way we won't have race conditions
            # with finding out there are no available machines in the analysis
            # manager or having two analyses pick the same machine.
            if self.categories_need_VM:
                if not machine_lock.acquire(False):
                    continue
                machine_lock.release()

            # If not enough free disk space is available, then we print an
            # error message and wait another round (this check is ignored
            # when the freespace configuration variable is set to zero).
            if self.cfg.cuckoo.freespace:
                # Resolve the full base path to the analysis folder, just in
                # case somebody decides to make a symbolic link out of it.
                dir_path = os.path.join(CUCKOO_ROOT, "storage", "analyses")
                need_space, space_available = free_space_monitor(
                    dir_path, return_value=True, analysis=True)
                if need_space:
                    log.error(
                        "Not enough free disk space! (Only %d MB!). You can change limits it in cuckoo.conf -> freespace",
                        space_available,
                    )
                    continue

            # Have we limited the number of concurrently executing machines?
            if self.cfg.cuckoo.max_machines_count > 0 and self.categories_need_VM:
                # Are too many running?
                if len(machinery.running()
                       ) >= self.cfg.cuckoo.max_machines_count:
                    continue

            # If no machines are available, it's pointless to fetch for pending tasks. Loop over.
            # But if we analyze pcaps/static only it's fine
            # ToDo verify that it works with static and file/url
            if self.categories_need_VM and not machinery.availables():
                continue
            # Exits if max_analysis_count is defined in the configuration
            # file and has been reached.
            if self.maxcount and self.total_analysis_count >= self.maxcount:
                if active_analysis_count <= 0:
                    self.stop()
            else:
                if self.categories_need_VM:
                    # First things first, are there pending tasks?
                    if not self.db.count_tasks(status=TASK_PENDING):
                        continue
                    relevant_machine_is_available = False
                    # There are? Great, let's get them, ordered by priority and then oldest to newest
                    for task in self.db.list_tasks(
                            status=TASK_PENDING,
                            order_by=(Task.priority.desc(), Task.added_on),
                            options_not_like="node="):
                        relevant_machine_is_available = self.db.is_relevant_machine_available(
                            task)
                        if relevant_machine_is_available:
                            break
                    if not relevant_machine_is_available:
                        task = None
                    else:
                        task = self.db.view_task(task.id)
                else:
                    task = self.db.fetch_task(self.analyzing_categories)
                if task:
                    log.debug("Task #%s: Processing task", task.id)
                    self.total_analysis_count += 1
                    # Initialize and start the analysis manager.
                    analysis = AnalysisManager(task, errors)
                    analysis.daemon = True
                    analysis.start()

            # Deal with errors.
            try:
                raise errors.get(block=False)
            except queue.Empty:
                pass