Esempio n. 1
0
    def test_import_confirm(self, p):
        set_cwd(tempfile.mkdtemp())
        p.return_value = True

        dirpath = init_legacy_analyses()
        os.makedirs(os.path.join(dirpath, "lib", "cuckoo", "common"))
        open(os.path.join(
            dirpath, "lib", "cuckoo", "common", "constants.py"
        ), "wb").write(constants_11_py)

        shutil.copytree(
            "tests/files/conf/110_plain", os.path.join(dirpath, "conf")
        )

        filepath = os.path.join(dirpath, "conf", "cuckoo.conf")
        buf = open(filepath, "rb").read()
        open(filepath, "wb").write(buf.replace(
            "connection =", "connection = %s" % self.URI
        ))

        try:
            main.main(
                ("--cwd", cwd(), "import", dirpath), standalone_mode=False
            )
        except CuckooOperationalError as e:
            assert "SQL database dump as the command" in e.message
            assert not is_linux()
            return

        db = Database()
        db.connect()
        assert db.engine.name == self.ENGINE
        assert open(cwd("logs", "a.txt", analysis=1), "rb").read() == "a"
        assert config("cuckoo:database:connection") == self.URI
        assert db.count_tasks() == 2
Esempio n. 2
0
def process_task_range(tasks):
    db, task_ids = Database(), []
    for entry in tasks.split(","):
        if entry.isdigit():
            task_ids.append(int(entry))
        elif entry.count("-") == 1:
            start, end = entry.split("-")
            if not start.isdigit() or not end.isdigit():
                log.warning("Invalid range provided: %s", entry)
                continue
            task_ids.extend(range(int(start), int(end)+1))
        elif entry:
            log.warning("Invalid range provided: %s", entry)

    for task_id in sorted(set(task_ids)):
        task = db.view_task(task_id)
        if not task:
            task = {
                "id": task_id,
                "category": "file",
                "target": "",
                "options": {},
                "package": None,
                "custom": None,
            }
        else:
            task = task.to_dict()

        if os.path.isdir(cwd(analysis=task_id)):
            process_task(Dictionary(task))
Esempio n. 3
0
    def test_import_noconfirm(self, p):
        set_cwd(tempfile.mkdtemp())
        p.side_effect = True, False

        dirpath = init_legacy_analyses()
        os.makedirs(os.path.join(dirpath, "lib", "cuckoo", "common"))
        open(os.path.join(
            dirpath, "lib", "cuckoo", "common", "constants.py"
        ), "wb").write(constants_11_py)

        shutil.copytree(
            "tests/files/conf/110_plain", os.path.join(dirpath, "conf")
        )

        filepath = os.path.join(dirpath, "conf", "cuckoo.conf")
        buf = open(filepath, "rb").read()
        open(filepath, "wb").write(buf.replace(
            "connection =", "connection = %s" % self.URI
        ))

        main.main(
            ("--cwd", cwd(), "import", dirpath), standalone_mode=False
        )

        db = Database()
        db.connect()
        assert db.engine.name == self.ENGINE
        assert open(cwd("logs", "a.txt", analysis=1), "rb").read() == "a"
        assert config("cuckoo:database:connection") == self.URI
        assert db.count_tasks() == 2
Esempio n. 4
0
def process_tasks(instance, maxcount, timeout):
    count = 0
    endtime = 0
    db = Database()

    if timeout:
        endtime = int(time.time() + timeout)

    try:
        while process_check_stop(count, maxcount, endtime):
            task_id = db.processing_get_task(instance)

            # Wait a small while before trying to fetch a new task.
            if task_id is None:
                time.sleep(1)
                continue

            task = db.view_task(task_id)

            log.info("Task #%d: reporting task", task.id)

            process_task(task.to_dict())
            count += 1
    except Exception as e:
        log.exception("Caught unknown exception: %s", e)
Esempio n. 5
0
def test_connect_default(p, q):
    set_cwd(tempfile.mkdtemp())
    cuckoo_create()

    db = Database()
    db.connect(create=False)
    q.assert_called_once_with(
        "sqlite:///%s" % cwd("cuckoo.db"),
        connect_args={"check_same_thread": False}
    )
    assert db.engine.pool_timeout == 60
Esempio n. 6
0
    def __init__(self):
        self.options = None
        self.db = Database()

        # Machine table is cleaned to be filled from configuration file
        # at each start.
        self.db.clean_machines()
Esempio n. 7
0
def test_connect_pg(p, q):
    set_cwd(tempfile.mkdtemp())
    cuckoo_create(cfg={
        "cuckoo": {
            "database": {
                "connection": "postgresql://*****:*****@localhost/foobar",
                "timeout": 120,
            }
        }
    })

    db = Database()
    db.connect(create=False)
    q.assert_called_once_with(
        "postgresql://*****:*****@localhost/foobar",
        connect_args={"sslmode": "disable"}
    )
    assert db.engine.pool_timeout == 120
Esempio n. 8
0
def process_tasks(instance, maxcount):
    count = 0
    db = Database()

    try:
        while not maxcount or count != maxcount:
            task_id = db.processing_get_task(instance)

            # Wait a small while before trying to fetch a new task.
            if task_id is None:
                time.sleep(1)
                continue

            task = db.view_task(task_id)

            log.info("Task #%d: reporting task", task.id)

            process_task(task.to_dict())
            count += 1
    except Exception as e:
        log.exception("Caught unknown exception: %s", e)
Esempio n. 9
0
def process_task(task):
    db = Database()

    try:
        task_log_start(task["id"])

        logger(
            "Starting task reporting",
            action="task.report", status="pending",
            target=task["target"], category=task["category"],
            package=task["package"], options=emit_options(task["options"]),
            custom=task["custom"]
        )

        if task["category"] == "file" and task.get("sample_id"):
            sample = db.view_sample(task["sample_id"])
            copy_path = cwd("storage", "binaries", sample.sha256)
        else:
            copy_path = None

        try:
            process(task["target"], copy_path, task)
            db.set_status(task["id"], TASK_REPORTED)
        except Exception as e:
            log.exception("Task #%d: error reporting: %s", task["id"], e)
            db.set_status(task["id"], TASK_FAILED_PROCESSING)

        log.info("Task #%d: reports generation completed", task["id"], extra={
            "action": "task.report", "status": "success",
        })
    except Exception as e:
        log.exception("Caught unknown exception: %s", e)
    finally:
        task_log_stop(task["id"])
Esempio n. 10
0
    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None
Esempio n. 11
0
def cuckoo_machine(vmname, action, ip, platform, options, tags,
                   interface, snapshot, resultserver):
    db = Database()

    cfg = Config.from_confdir(cwd("conf"))
    machinery = cfg["cuckoo"]["cuckoo"]["machinery"]
    machines = cfg[machinery][machinery]["machines"]

    if action == "add":
        if not ip:
            sys.exit("You have to specify a legitimate IP address for --add.")

        if db.view_machine(vmname):
            sys.exit("A Virtual Machine with this name already exists!")

        if vmname in machines:
            sys.exit("A Virtual Machine with this name already exists!")

        if resultserver and resultserver.count(":") == 1:
            resultserver_ip, resultserver_port = resultserver.split(":")
            resultserver_port = int(resultserver_port)
        else:
            resultserver_ip = cfg["cuckoo"]["resultserver"]["ip"]
            resultserver_port = cfg["cuckoo"]["resultserver"]["port"]

        machines.append(vmname)
        cfg[machinery][vmname] = {
            "label": vmname,
            "platform": platform,
            "ip": ip,
            "options": options,
            "snapshot": snapshot,
            "interface": interface,
            "resultserver_ip": resultserver_ip,
            "resultserver_port": resultserver_port,
            "tags": tags,
        }

        db.add_machine(
            vmname, vmname, ip, platform, options, tags, interface, snapshot,
            resultserver_ip, int(resultserver_port)
        )
        db.unlock_machine(vmname)

    if action == "delete":
        # TODO Add a db.del_machine() function for runtime modification.

        if vmname not in machines:
            sys.exit("A Virtual Machine with this name doesn't exist!")

        machines.remove(vmname)
        cfg[machinery].pop(vmname)

    write_cuckoo_conf(cfg=cfg)
Esempio n. 12
0
def init_tasks():
    """Check tasks and reschedule uncompleted ones."""
    db = Database()

    log.debug("Checking for locked tasks..")
    for task in db.list_tasks(status=TASK_RUNNING):
        if config("cuckoo:cuckoo:reschedule"):
            task_id = db.reschedule(task.id)
            log.info(
                "Rescheduled task with ID %s and target %s: task #%s",
                task.id, task.target, task_id
            )
        else:
            db.set_status(task.id, TASK_FAILED_ANALYSIS)
            log.info(
                "Updated running task ID %s status to failed_analysis",
                task.id
            )

    log.debug("Checking for pending service tasks..")
    for task in db.list_tasks(status=TASK_PENDING, category="service"):
        db.set_status(task.id, TASK_FAILED_ANALYSIS)
Esempio n. 13
0
class Machinery(object):
    """Base abstract class for machinery modules."""

    # Default label used in machinery configuration file to supply virtual
    # machine name/label/vmx path. Override it if you dubbed it in another
    # way.
    LABEL = "label"

    def __init__(self):
        self.options = None
        self.db = Database()

        # Machine table is cleaned to be filled from configuration file
        # at each start.
        self.db.clean_machines()

    @classmethod
    def init_once(cls):
        pass

    def pcap_path(self, task_id):
        """Returns the .pcap path for this task id."""
        return cwd("storage", "analyses", "%s" % task_id, "dump.pcap")

    def set_options(self, options):
        """Set machine manager options.
        @param options: machine manager options dict.
        """
        self.options = options

    def initialize(self, module_name):
        """Read, load, and verify machines configuration.
        @param module_name: module name.
        """
        # Load.
        self._initialize(module_name)

        # Run initialization checks.
        self._initialize_check()

    def _initialize(self, module_name):
        """Read configuration.
        @param module_name: module name.
        """
        machinery = self.options.get(module_name)
        for vmname in machinery["machines"]:
            options = self.options.get(vmname)

            # If configured, use specific network interface for this
            # machine, else use the default value.
            if options.get("interface"):
                interface = options["interface"]
            else:
                interface = machinery.get("interface")

            if options.get("resultserver_ip"):
                ip = options["resultserver_ip"]
            else:
                ip = config("cuckoo:resultserver:ip")

            if options.get("resultserver_port"):
                port = options["resultserver_port"]
            else:
                # The ResultServer port might have been dynamically changed,
                # get it from the ResultServer singleton. Also avoid import
                # recursion issues by importing ResultServer here.
                from cuckoo.core.resultserver import ResultServer
                port = ResultServer().port

            self.db.add_machine(
                name=vmname,
                label=options[self.LABEL],
                ip=options.ip,
                platform=options.platform,
                options=options.get("options", ""),
                tags=options.tags,
                interface=interface,
                snapshot=options.snapshot,
                resultserver_ip=ip,
                resultserver_port=port
            )

    def _initialize_check(self):
        """Runs checks against virtualization software when a machine manager
        is initialized.
        @note: in machine manager modules you may override or superclass
               his method.
        @raise CuckooMachineError: if a misconfiguration or a unkown vm state
                                   is found.
        """
        try:
            configured_vms = self._list()
        except NotImplementedError:
            return

        for machine in self.machines():
            # If this machine is already in the "correct" state, then we
            # go on to the next machine.
            if machine.label in configured_vms and \
                    self._status(machine.label) in [self.POWEROFF, self.ABORTED]:
                continue

            # This machine is currently not in its correct state, we're going
            # to try to shut it down. If that works, then the machine is fine.
            try:
                self.stop(machine.label)
            except CuckooMachineError as e:
                raise CuckooCriticalError(
                    "Please update your configuration. Unable to shut '%s' "
                    "down or find the machine in its proper state: %s" %
                    (machine.label, e)
                )

        if not config("cuckoo:timeouts:vm_state"):
            raise CuckooCriticalError(
                "Virtual machine state change timeout has not been set "
                "properly, please update it to be non-null."
            )

    def machines(self):
        """List virtual machines.
        @return: virtual machines list
        """
        return self.db.list_machines()

    def availables(self):
        """How many machines are free.
        @return: free machines count.
        """
        return self.db.count_machines_available()

    def acquire(self, machine_id=None, platform=None, tags=None):
        """Acquire a machine to start analysis.
        @param machine_id: machine ID.
        @param platform: machine platform.
        @param tags: machine tags
        @return: machine or None.
        """
        if machine_id:
            return self.db.lock_machine(label=machine_id)
        elif platform:
            return self.db.lock_machine(platform=platform, tags=tags)
        else:
            return self.db.lock_machine(tags=tags)

    def release(self, label=None):
        """Release a machine.
        @param label: machine name.
        """
        self.db.unlock_machine(label)

    def running(self):
        """Returns running virtual machines.
        @return: running virtual machines list.
        """
        return self.db.list_machines(locked=True)

    def shutdown(self):
        """Shutdown the machine manager. Kills all alive machines.
        @raise CuckooMachineError: if unable to stop machine.
        """
        if len(self.running()) > 0:
            log.info("Still %s guests alive. Shutting down...",
                     len(self.running()))
            for machine in self.running():
                try:
                    self.stop(machine.label)
                except CuckooMachineError as e:
                    log.warning("Unable to shutdown machine %s, please check "
                                "manually. Error: %s", machine.label, e)

    def set_status(self, label, status):
        """Set status for a virtual machine.
        @param label: virtual machine label
        @param status: new virtual machine status
        """
        self.db.set_machine_status(label, status)

    def start(self, label, task):
        """Start a machine.
        @param label: machine name.
        @param task: task object.
        @raise NotImplementedError: this method is abstract.
        """
        raise NotImplementedError

    def stop(self, label=None):
        """Stop a machine.
        @param label: machine name.
        @raise NotImplementedError: this method is abstract.
        """
        raise NotImplementedError

    def _list(self):
        """Lists virtual machines configured.
        @raise NotImplementedError: this method is abstract.
        """
        raise NotImplementedError

    def dump_memory(self, label, path):
        """Takes a memory dump of a machine.
        @param path: path to where to store the memory dump.
        """
        raise NotImplementedError

    def _wait_status(self, label, *states):
        """Waits for a vm status.
        @param label: virtual machine name.
        @param state: virtual machine status, accepts multiple states as list.
        @raise CuckooMachineError: if default waiting timeout expire.
        """
        # This block was originally suggested by Loic Jaquemet.
        waitme = 0
        try:
            current = self._status(label)
        except NameError:
            return

        while current not in states:
            log.debug("Waiting %i cuckooseconds for machine %s to switch "
                      "to status %s", waitme, label, states)
            if waitme > config("cuckoo:timeouts:vm_state"):
                raise CuckooMachineError(
                    "Timeout hit while for machine %s to change status" % label
                )

            time.sleep(1)
            waitme += 1
            current = self._status(label)
Esempio n. 14
0
class Scheduler(object):
    """Tasks Scheduler.

    This class is responsible for the main execution loop of the tool. It
    prepares the analysis machines and keep waiting and loading for new
    analysis tasks.
    Whenever a new task is available, it launches AnalysisManager which will
    take care of running the full analysis process and operating with the
    assigned analysis machine.
    """
    def __init__(self, maxcount=None):
        self.running = True
        self.cfg = Config()
        self.db = Database()
        self.maxcount = maxcount
        self.total_analysis_count = 0

    def initialize(self):
        """Initialize the machine manager."""
        global machinery, machine_lock

        machinery_name = self.cfg.cuckoo.machinery

        max_vmstartup_count = self.cfg.cuckoo.max_vmstartup_count
        if max_vmstartup_count:
            machine_lock = threading.Semaphore(max_vmstartup_count)
        else:
            machine_lock = threading.Lock()

        log.info("Using \"%s\" as machine manager", machinery_name, extra={
            "action": "init.machinery",
            "status": "success",
            "machinery": machinery_name,
        })

        # Initialize the machine manager.
        machinery = cuckoo.machinery.plugins[machinery_name]()

        # Provide a dictionary with the configuration options to the
        # machine manager instance.
        machinery.set_options(Config(machinery_name))

        # Initialize the machine manager.
        try:
            machinery.initialize(machinery_name)
        except CuckooMachineError as e:
            raise CuckooCriticalError("Error initializing machines: %s" % e)

        # At this point all the available machines should have been identified
        # and added to the list. If none were found, Cuckoo aborts the
        # execution. TODO In the future we'll probably want get rid of this.
        if not machinery.machines():
            raise CuckooCriticalError("No machines available.")

        log.info("Loaded %s machine/s", len(machinery.machines()), extra={
            "action": "init.machines",
            "status": "success",
            "count": len(machinery.machines()),
        })

        if len(machinery.machines()) > 1 and self.db.engine.name == "sqlite":
            log.warning("As you've configured Cuckoo to execute parallel "
                        "analyses, we recommend you to switch to a MySQL or "
                        "a PostgreSQL database as SQLite might cause some "
                        "issues.")

        if len(machinery.machines()) > 4 and self.cfg.cuckoo.process_results:
            log.warning("When running many virtual machines it is recommended "
                        "to process the results in separate 'cuckoo process' "
                        "instances to increase throughput and stability. "
                        "Please read the documentation about the "
                        "`Processing Utility`.")

        # Drop all existing packet forwarding rules for each VM. Just in case
        # Cuckoo was terminated for some reason and various forwarding rules
        # have thus not been dropped yet.
        for machine in machinery.machines():
            if not machine.interface:
                log.info("Unable to determine the network interface for VM "
                         "with name %s, Cuckoo will not be able to give it "
                         "full internet access or route it through a VPN! "
                         "Please define a default network interface for the "
                         "machinery or define a network interface for each "
                         "VM.", machine.name)
                continue

            # Drop forwarding rule to each VPN.
            if config("routing:vpn:enabled"):
                for vpn in config("routing:vpn:vpns"):
                    rooter(
                        "forward_disable", machine.interface,
                        config("routing:%s:interface" % vpn), machine.ip
                    )

            # Drop forwarding rule to the internet / dirty line.
            if config("routing:routing:internet") != "none":
                rooter(
                    "forward_disable", machine.interface,
                    config("routing:routing:internet"), machine.ip
                )

    def stop(self):
        """Stop scheduler."""
        self.running = False
        # Shutdown machine manager (used to kill machines that still alive).
        machinery.shutdown()

    def start(self):
        """Start scheduler."""
        self.initialize()

        log.info("Waiting for analysis tasks.")

        # Message queue with threads to transmit exceptions (used as IPC).
        errors = Queue.Queue()

        # Command-line overrides the configuration file.
        if self.maxcount is None:
            self.maxcount = self.cfg.cuckoo.max_analysis_count

        # This loop runs forever.
        while self.running:
            time.sleep(1)

            # Wait until the machine lock is not locked. This is only the case
            # when all machines are fully running, rather that about to start
            # or still busy starting. This way we won't have race conditions
            # with finding out there are no available machines in the analysis
            # manager or having two analyses pick the same machine.
            if not machine_lock.acquire(False):
                logger(
                    "Could not acquire machine lock",
                    action="scheduler.machine_lock", status="busy"
                )
                continue

            machine_lock.release()

            # If not enough free disk space is available, then we print an
            # error message and wait another round (this check is ignored
            # when the freespace configuration variable is set to zero).
            if self.cfg.cuckoo.freespace:
                # Resolve the full base path to the analysis folder, just in
                # case somebody decides to make a symbolic link out of it.
                dir_path = cwd("storage", "analyses")

                # TODO: Windows support
                if hasattr(os, "statvfs"):
                    dir_stats = os.statvfs(dir_path.encode("utf8"))

                    # Calculate the free disk space in megabytes.
                    space_available = dir_stats.f_bavail * dir_stats.f_frsize
                    space_available /= 1024 * 1024

                    if space_available < self.cfg.cuckoo.freespace:
                        log.error(
                            "Not enough free disk space! (Only %d MB!)",
                            space_available, extra={
                                "action": "scheduler.diskspace",
                                "status": "error",
                                "available": space_available,
                            }
                        )
                        continue

            # If we have limited the number of concurrently executing machines,
            # are we currently at the maximum?
            maxvm = self.cfg.cuckoo.max_machines_count
            if maxvm and len(machinery.running()) >= maxvm:
                logger(
                    "Already maxed out on running machines",
                    action="scheduler.machines", status="maxed"
                )
                continue

            # If no machines are available, it's pointless to fetch for
            # pending tasks. Loop over.
            if not machinery.availables():
                logger(
                    "No available machines",
                    action="scheduler.machines", status="none"
                )
                continue

            # Exits if max_analysis_count is defined in the configuration
            # file and has been reached.
            if self.maxcount and self.total_analysis_count >= self.maxcount:
                if active_analysis_count <= 0:
                    log.debug("Reached max analysis count, exiting.", extra={
                        "action": "scheduler.max_analysis",
                        "status": "success",
                        "limit": self.total_analysis_count,
                    })
                    self.stop()
                else:
                    logger(
                        "Maximum analyses hit, awaiting active to finish off",
                        action="scheduler.max_analysis", status="busy",
                        active=active_analysis_count
                    )
                continue

            # Fetch a pending analysis task.
            # TODO This fixes only submissions by --machine, need to add
            # other attributes (tags etc).
            # TODO We should probably move the entire "acquire machine" logic
            # from the Analysis Manager to the Scheduler and then pass the
            # selected machine onto the Analysis Manager instance.
            task, available = None, False
            for machine in self.db.get_available_machines():
                task = self.db.fetch(machine=machine.name)
                if task:
                    break

                if machine.is_analysis():
                    available = True

            # We only fetch a new task if at least one of the available
            # machines is not a "service" machine (again, please refer to the
            # services auxiliary module for more information on service VMs).
            if not task and available:
                task = self.db.fetch(service=False)

            if task:
                log.debug("Processing task #%s", task.id)
                self.total_analysis_count += 1

                # Initialize and start the analysis manager.
                analysis = AnalysisManager(task.id, errors)
                analysis.daemon = True
                analysis.start()

            # Deal with errors.
            try:
                raise errors.get(block=False)
            except Queue.Empty:
                pass

        log.debug("End of analyses.")
Esempio n. 15
0
class AnalysisManager(threading.Thread):
    """Analysis Manager.

    This class handles the full analysis process for a given task. It takes
    care of selecting the analysis machine, preparing the configuration and
    interacting with the guest agent and analyzer components to launch and
    complete the analysis and store, process and report its results.
    """

    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None

    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error("Analysis results folder already exists at path \"%s\", "
                      "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        try:
            Folders.create(self.storage)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"",
                    self.task.target
                )
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target
                )
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary
                    )
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error("Unable to create symlink/copy from \"%s\" to "
                          "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True

    def store_task_info(self):
        """grab latest task from db (if available) and update self.task"""
        dbtask = self.db.view_task(self.task.id)
        self.task = dbtask.to_dict()

        task_info_path = os.path.join(self.storage, "task.json")
        open(task_info_path, "w").write(dbtask.to_json())

    def acquire_machine(self):
        """Acquire an analysis machine from the pool of available ones."""
        machine = None

        # Start a loop to acquire the a machine to run the analysis on.
        while True:
            machine_lock.acquire()

            # In some cases it's possible that we enter this loop without
            # having any available machines. We should make sure this is not
            # such case, or the analysis task will fail completely.
            if not machinery.availables():
                machine_lock.release()
                time.sleep(1)
                continue

            # If the user specified a specific machine ID, a platform to be
            # used or machine tags acquire the machine accordingly.
            machine = machinery.acquire(machine_id=self.task.machine,
                                        platform=self.task.platform,
                                        tags=self.task.tags)

            # If no machine is available at this moment, wait for one second
            # and try again.
            if not machine:
                machine_lock.release()
                log.debug("Task #%d: no machine available yet", self.task.id)
                time.sleep(1)
            else:
                log.info(
                    "Task #%d: acquired machine %s (label=%s)",
                    self.task.id, machine.name, machine.label, extra={
                        "action": "vm.acquire",
                        "status": "success",
                        "vmname": machine.name,
                    }
                )
                break

        self.machine = machine

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            options["pe_exports"] = \
                ",".join(File(self.task.target).get_exported_functions())

            package, activity = File(self.task.target).get_apk_entry()
            self.task.options["apk_entry"] = "%s:%s" % (package, activity)
        elif self.task.category == "archive":
            options["file_name"] = File(self.task.target).get_name()

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = emit_options(self.task.options)
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        # copy in other analyzer specific options, TEMPORARY (most likely)
        vm_options = getattr(machinery.options, self.machine.name)
        for k in vm_options:
            if k.startswith("analyzer_"):
                options[k] = vm_options[k]

        return options

    def route_network(self):
        """Enable network routing if desired."""
        # Determine the desired routing strategy (none, internet, VPN).
        self.route = self.task.options.get(
            "route", config("routing:routing:route")
        )

        if self.route == "none" or self.route == "drop":
            self.interface = None
            self.rt_table = None
        elif self.route == "inetsim":
            pass
        elif self.route == "tor":
            pass
        elif self.route == "internet":
            if config("routing:routing:internet") == "none":
                log.warning(
                    "Internet network routing has been specified, but not "
                    "configured, ignoring routing for this analysis", extra={
                        "action": "network.route",
                        "status": "error",
                        "route": self.route,
                    }
                )
                self.route = "none"
                self.task.options["route"] = "none"
                self.interface = None
                self.rt_table = None
            else:
                self.interface = config("routing:routing:internet")
                self.rt_table = config("routing:routing:rt_table")
        elif self.route in config("routing:vpn:vpns"):
            self.interface = config("routing:%s:interface" % self.route)
            self.rt_table = config("routing:%s:rt_table" % self.route)
        else:
            log.warning(
                "Unknown network routing destination specified, ignoring "
                "routing for this analysis: %r", self.route, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # Check if the network interface is still available. If a VPN dies for
        # some reason, its tunX interface will no longer be available.
        if self.interface and not rooter("nic_available", self.interface):
            log.error(
                "The network interface '%s' configured for this analysis is "
                "not available at the moment, switching to route=none mode.",
                self.interface, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # For now this doesn't work yet in combination with tor routing.
        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_enable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_enable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port")),
                config("routing:inetsim:ports") or ""
            )

        if self.route == "tor":
            rooter(
                "tor_enable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

        if self.interface:
            rooter(
                "forward_enable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_enable", self.rt_table, self.machine.ip
            )

        # Propagate the taken route to the database.
        self.db.set_route(self.task.id, self.route)

    def unroute_network(self):
        """Disable any enabled network routing."""
        if self.interface:
            rooter(
                "forward_disable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_disable", self.rt_table, self.machine.ip
            )

        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_disable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_disable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port")),
                config("routing:inetsim:ports") or ""
            )

        if self.route == "tor":
            rooter(
                "tor_disable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

    def wait_finish(self):
        """Some VMs don't have an actual agent. Mainly those that are used as
        assistance for an analysis through the services auxiliary module. This
        method just waits until the analysis is finished rather than actively
        trying to engage with the Cuckoo Agent."""
        self.db.guest_set_status(self.task.id, "running")
        while self.db.guest_get_status(self.task.id) == "running":
            time.sleep(1)

    def guest_manage(self, options):
        # Handle a special case where we're creating a baseline report of this
        # particular virtual machine - a report containing all the results
        # that are gathered if no additional samples are ran in the VM. These
        # results, such as loaded drivers and opened sockets in volatility, or
        # DNS requests to hostnames related to Microsoft Windows, etc may be
        # omitted or at the very least given less priority when creating a
        # report for an analysis that ran on this VM later on.
        if self.task.category == "baseline":
            time.sleep(options["timeout"])
        else:
            # Start the analysis.
            self.db.guest_set_status(self.task.id, "starting")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(options, monitor)

            # In case the Agent didn't respond and we force-quit the analysis
            # at some point while it was still starting the analysis the state
            # will be "stop" (or anything but "running", really).
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                self.guest_manager.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info(
            "Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
            self.task.category.upper(), target, self.task.id,
            emit_options(self.task.options), extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.task.category,
                "package": self.task.package,
                "options": emit_options(self.task.options),
                "custom": self.task.custom,
            }
        )

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s", e, extra={
                "action": "vm.acquire", "status": "error",
            })
            return False

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip,
            self.machine.platform, self.task.id, self
        )

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        control_enabled = (
            config("cuckoo:remotecontrol:enabled") and
            "remotecontrol" in self.task.options
        )
        if control_enabled:
            try:
                machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                raise CuckooMachineError(
                    "Remote control support has not been implemented "
                    "for this machinery."
                )

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id,
                                            self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger(
                "Starting VM",
                action="vm.start", status="pending",
                vmname=self.machine.name
            )

            # Start the machine.
            machinery.start(self.machine.label, self.task)

            logger(
                "Started VM",
                action="vm.start", status="success",
                vmname=self.machine.name
            )

            # retrieve the port used for remote control
            if control_enabled:
                try:
                    params = machinery.get_remote_control_params(
                        self.machine.label
                    )
                    self.db.set_machine_rcparams(self.machine.label, params)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error starting Virtual Machine! VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.start",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                }
            )
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s", e, extra={
                "action": "guest.handle",
                "status": "error",
                "task_id": self.task.id,
            })
        finally:
            # Stop Auxiliary modules.
            self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger(
                    "Taking full memory dump",
                    action="vm.memdump", status="pending",
                    vmname=self.machine.name
                )
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger(
                        "Taken full memory dump",
                        action="vm.memdump", status="success",
                        vmname=self.machine.name
                    )
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.", extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        }
                    )
                except CuckooMachineError as e:
                    log.error("Machinery error: %s", e, extra={
                        "action": "vm.memdump",
                        "status": "error",
                    })

            logger(
                "Stopping VM",
                action="vm.stop", status="pending",
                vmname=self.machine.name
            )

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning(
                    "Unable to stop machine %s: %s",
                    self.machine.label, e, extra={
                        "action": "vm.stop",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

            logger(
                "Stopped VM",
                action="vm.stop", status="success",
                vmname=self.machine.name
            )

            # Disable remote control after stopping the machine
            # if it was enabled for the task.
            if control_enabled:
                try:
                    machinery.disable_remote_control(self.machine.label)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.", self.machine.label, e, extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

        return succeeded

    def process_results(self):
        """Process the analysis results and generate the enabled reports."""
        logger(
            "Starting task reporting",
            action="task.report", status="pending"
        )

        # TODO Refactor this function as currently "cuckoo process" has a 1:1
        # copy of its code. TODO Also remove "archive" files.
        results = RunProcessing(task=self.task).run()
        RunSignatures(results=results).run()
        RunReporting(task=self.task, results=results).run()

        # If the target is a file and the user enabled the option,
        # delete the original copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_original:
            if not os.path.exists(self.task.target):
                log.warning("Original file does not exist anymore: \"%s\": "
                            "File not found.", self.task.target)
            else:
                try:
                    os.remove(self.task.target)
                except OSError as e:
                    log.error("Unable to delete original file at path "
                              "\"%s\": %s", self.task.target, e)

        # If the target is a file and the user enabled the delete copy of
        # the binary option, then delete the copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy:
            if not os.path.exists(self.binary):
                log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary)
            else:
                try:
                    os.remove(self.binary)
                except OSError as e:
                    log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e)
            # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it.
            if os.path.islink(self.storage_binary) and not os.path.exists(self.storage_binary):
                try:
                    os.remove(self.storage_binary)
                except OSError as e:
                    log.error("Unable to delete symlink to the binary copy at path \"%s\": %s", self.storage_binary, e)

        log.info(
            "Task #%d: reports generation completed",
            self.task.id, extra={
                "action": "task.report",
                "status": "success",
            }
        )

        return True

    def run(self):
        """Run manager thread."""
        global active_analysis_count
        active_analysis_count += 1
        try:
            self.launch_analysis()

            log.debug("Released database task #%d", self.task.id)

            if self.cfg.cuckoo.process_results:
                self.store_task_info()
                self.db.set_status(self.task.id, TASK_COMPLETED)
                # TODO If self.process_results() is unified with apps.py's
                # process() method, then ensure that TASK_FAILED_PROCESSING is
                # handled correctly and not overwritten by the db.set_status()
                # at the end of this method.
                self.process_results()

            # We make a symbolic link ("latest") which links to the latest
            # analysis - this is useful for debugging purposes. This is only
            # supported under systems that support symbolic links.
            if hasattr(os, "symlink"):
                latest = cwd("storage", "analyses", "latest")

                # First we have to remove the existing symbolic link, then we
                # have to create the new one.
                # Deal with race conditions using a lock.
                latest_symlink_lock.acquire()
                try:
                    # As per documentation, lexists() returns True for dead
                    # symbolic links.
                    if os.path.lexists(latest):
                        os.remove(latest)

                    os.symlink(self.storage, latest)
                except OSError as e:
                    log.warning("Error pointing latest analysis symlink: %s" % e)
                finally:
                    latest_symlink_lock.release()

            # overwrite task.json so we have the latest data inside
            self.store_task_info()
            log.info(
                "Task #%d: analysis procedure completed",
                self.task.id, extra={
                    "action": "task.stop",
                    "status": "success",
                }
            )
        except:
            log.exception("Failure in AnalysisManager.run", extra={
                "action": "task.stop",
                "status": "error",
            })
        finally:
            if self.cfg.cuckoo.process_results:
                self.db.set_status(self.task.id, TASK_REPORTED)
            else:
                self.db.set_status(self.task.id, TASK_COMPLETED)
            task_log_stop(self.task.id)
            active_analysis_count -= 1
Esempio n. 16
0
class TestRegular(object):

    createcwd = True

    def setup_class(self):
        self.remove_paths = []
        self.db = Database()

    def create_cwd(self, cfg=None):
        if not TestRegular.createcwd and cfg is None:
            return

        TestRegular.createcwd = False
        newcwd = tempfile.mkdtemp()
        set_cwd(newcwd)
        cuckoo_create(cfg=cfg)
        self.remove_paths.append(newcwd)
        self.db.connect()

    def teardown_class(self):
        for path in self.remove_paths:
            if os.path.isdir(path):
                shutil.rmtree(path)

    def get_manager(self, task=None):
        if task is None:
            task = Task()
            fd, fpath = tempfile.mkstemp()
            os.write(fd, b"\x00" * 32)
            os.close(fd)
            newname = os.path.join(os.path.dirname(fpath), "testanalysis.exe")
            os.rename(fpath, newname)
            id = task.add_path(newname)
            task.load_from_db(id)

        manager = Regular(FakeMachine(), mock.MagicMock(), mock.MagicMock())
        manager.set_task(task)
        manager.set_target(task.targets)
        return manager

    def test_set_task(self):
        self.create_cwd()
        task = Task()
        id = task.add_path(__file__)
        task.load_from_db(id)
        manager = self.get_manager()
        manager.set_task(task)

        assert manager.task == task
        assert manager.analysis is not None
        assert manager.name == "task_%s_Regular" % task.id

    def test_set_target(self):
        self.create_cwd()
        task = Task()
        id = task.add_path(__file__)
        task.load_from_db(id)
        manager = self.get_manager()
        manager.set_target(task.targets)
        assert manager.target == task.targets[0]

    def test_set_target_empty(self):
        self.create_cwd()
        task = Task()
        id = task.add_path(__file__)
        task.load_from_db(id)
        task.task_dict["targets"] = []
        manager = self.get_manager()
        manager.set_target(task.targets)
        assert isinstance(manager.target, Target)

    @mock.patch("cuckoo.common.abstracts.AnalysisManager.build_options")
    def test_init(self, mb):
        self.create_cwd()
        manager = self.get_manager()
        result = manager.init(self.db)
        mb.assert_called_once_with(
            options={
                "category": "file",
                "target": manager.target.target,
                "file_type": "data",
                "file_name": "testanalysis.exe",
                "pe_exports": "",
                "options": {}
            })

        assert result
        assert isinstance(manager.guest_manager, GuestManager)
        assert isinstance(manager.aux, RunAuxiliary)
        assert os.path.isfile(os.path.join(manager.task.path, "task.json"))

    @mock.patch("cuckoo.common.abstracts.AnalysisManager.build_options")
    @mock.patch("cuckoo.core.target.File.get_apk_entry")
    def test_init_apk_options(self, mae, mb):
        self.create_cwd()
        manager = self.get_manager()
        mae.return_value = ("package", "activity")
        result = manager.init(self.db)

        mb.assert_called_once_with(
            options={
                "category": "file",
                "target": manager.target.target,
                "file_type": "data",
                "file_name": "testanalysis.exe",
                "pe_exports": "",
                "options": {
                    "apk_entry": "package:activity"
                }
            })

        assert result
        assert isinstance(manager.guest_manager, GuestManager)
        assert isinstance(manager.aux, RunAuxiliary)
        assert os.path.isfile(os.path.join(manager.task.path, "task.json"))

    @mock.patch("cuckoo.common.abstracts.AnalysisManager.build_options")
    def test_init_non_file(self, mb):
        self.create_cwd()
        task = Task()
        id = task.add_url("http://example.com/42")
        task.load_from_db(id)
        manager = self.get_manager(task)

        result = manager.init(self.db)
        mb.assert_called_once()
        assert result
        assert isinstance(manager.guest_manager, GuestManager)
        assert isinstance(manager.aux, RunAuxiliary)
        assert os.path.isfile(os.path.join(task.path, "task.json"))

    def test_init_remov_original(self):
        self.create_cwd()
        task = Task()
        fd, tmpfile = tempfile.mkstemp()
        os.write(fd, os.urandom(64))
        os.close(fd)
        id = task.add_path(tmpfile)
        task.load_from_db(id)
        tmpfile_obj = File(tmpfile)
        tmpfile_obj.calc_hashes()
        manager = self.get_manager(task)

        # Remove so init fails to find the original target
        os.remove(tmpfile)

        result = manager.init(self.db)
        assert result
        assert manager.options["target"] == tmpfile
        assert manager.options["file_name"] == tmpfile_obj.get_name()
        assert isinstance(manager.guest_manager, GuestManager)
        assert isinstance(manager.aux, RunAuxiliary)
        assert os.path.isfile(os.path.join(task.path, "task.json"))

    def test_init_fail(self):
        self.create_cwd()
        task = Task()
        fd, tmpfile = tempfile.mkstemp()
        os.write(fd, os.urandom(64))
        os.close(fd)
        id = task.add_path(tmpfile)
        task.load_from_db(id)
        manager = self.get_manager(task)
        copy_path = cwd("storage", "binaries", File(tmpfile).get_sha256())

        # Remove both binaries to make init fail
        os.remove(copy_path)
        os.remove(tmpfile)
        result = manager.init(self.db)

        assert not result

    def test_init_copied_bin_none(self):
        self.create_cwd()
        manager = self.get_manager()
        manager.target.copied_binary = None
        result = manager.init(self.db)

        assert not result

    @mock.patch("cuckoo.analysis.regular.ResultServer")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager.set_analysis_status")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager."
                "request_scheduler_action")
    def test_start_and_wait(self, mrsa, msas, mrs):
        self.create_cwd()
        manager = self.get_manager()
        # Mock resultserver obj so we can check if add_task was called
        resulserver_obj = mock.MagicMock()
        mrs.return_value = resulserver_obj

        manager.init(self.db)
        manager.machinery = mock.MagicMock()
        manager.route = mock.MagicMock()
        manager.aux = mock.MagicMock()
        manager.guest_manager = mock.MagicMock()
        # Set status manually, because the method used is mocked
        manager.analysis.status = "starting"

        result = manager.start_and_wait()

        # Check if all required methods were called successfully
        msas.assert_has_calls([mock.call("starting"), mock.call("running")])
        resulserver_obj.add_task.assert_called_once_with(
            manager.task.db_task, manager.machine)
        manager.aux.start.assert_called_once()
        manager.machinery.start.assert_called_once_with(
            "machine1", manager.task.db_task)
        manager.route.route_network.assert_called_once()
        manager.machine_lock.release.assert_called_once()
        mrsa.assert_called_once_with("starting")
        manager.guest_manager.start_analysis.assert_called_once()
        manager.guest_manager.wait_for_completion.assert_called_once()
        assert result

    @mock.patch("cuckoo.analysis.regular.ResultServer")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager.set_analysis_status")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager."
                "request_scheduler_action")
    def test_start_and_wait_url(self, mrsa, msas, mrs):
        self.create_cwd()
        task = Task()
        id = task.add_url("http://example.com/42")
        task.load_from_db(id)

        # Mock resultserver obj so we can check if add_task was called
        resulserver_obj = mock.MagicMock()
        mrs.return_value = resulserver_obj

        manager = self.get_manager(task)
        manager.init(self.db)
        manager.machinery = mock.MagicMock()
        manager.route = mock.MagicMock()
        manager.aux = mock.MagicMock()
        manager.guest_manager = mock.MagicMock()
        # Set status manually, because the method used is mocked
        manager.analysis.status = "starting"

        result = manager.start_and_wait()

        # Check if all required methods were called successfully
        msas.assert_has_calls([mock.call("starting"), mock.call("running")])
        resulserver_obj.add_task.assert_called_once_with(
            task.db_task, manager.machine)
        manager.aux.start.assert_called_once()
        manager.machinery.start.assert_called_once_with(
            "machine1", task.db_task)
        manager.route.route_network.assert_called_once()
        manager.machine_lock.release.assert_called_once()
        mrsa.assert_called_once_with("starting")
        manager.guest_manager.start_analysis.assert_called_once()
        manager.guest_manager.wait_for_completion.assert_called_once()
        assert result

    @mock.patch("cuckoo.analysis.regular.ResultServer")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager.set_analysis_status")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager."
                "request_scheduler_action")
    @mock.patch("time.sleep")
    def test_start_and_wait_baseline(self, mts, mrsa, msas, mrs):
        self.create_cwd()
        task = Task()
        id = task.add_baseline()
        task.load_from_db(id)

        # Mock resultserver obj so we can check if add_task was called
        resulserver_obj = mock.MagicMock()
        mrs.return_value = resulserver_obj
        manager = self.get_manager(task)
        manager.init(self.db)
        manager.machinery = mock.MagicMock()
        manager.route = mock.MagicMock()
        manager.aux = mock.MagicMock()

        result = manager.start_and_wait()

        # Check if all required methods were called successfully
        msas.assert_has_calls([mock.call("starting"), mock.call("running")])
        resulserver_obj.add_task.assert_called_once_with(
            task.db_task, manager.machine)
        manager.aux.start.assert_called_once()
        manager.machinery.start.assert_called_once_with(
            "machine1", task.db_task)
        manager.route.route_network.assert_called_once()
        manager.machine_lock.release.assert_called_once()
        mrsa.assert_called_once_with("starting")
        mts.assert_called_once_with(manager.options["timeout"])
        assert result

    @mock.patch("cuckoo.analysis.regular.ResultServer")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager.set_analysis_status")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager."
                "request_scheduler_action")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager.wait_finish")
    def test_start_and_wait_noagent(self, mwf, mrsa, msas, mrs):
        self.create_cwd()
        task = Task()
        id = task.add_service(owner="1", tags="service,mitm", timeout=120)
        task.load_from_db(id)

        # Mock resultserver obj so we can check if add_task was called
        resulserver_obj = mock.MagicMock()
        mrs.return_value = resulserver_obj
        manager = self.get_manager(task)
        manager.machine.options = "noagent"
        manager.init(self.db)
        manager.machinery = mock.MagicMock()
        manager.route = mock.MagicMock()
        manager.aux = mock.MagicMock()

        result = manager.start_and_wait()

        # Check if all required methods were called successfully
        msas.assert_has_calls([mock.call("starting"), mock.call("running")])
        resulserver_obj.add_task.assert_called_once_with(
            task.db_task, manager.machine)
        manager.aux.start.assert_called_once()
        manager.machinery.start.assert_called_once_with(
            "machine1", task.db_task)
        manager.route.route_network.assert_called_once()
        manager.machine_lock.release.assert_called_once()
        mrsa.assert_called_once_with("starting")
        mwf.assert_called_once()
        assert result

    @mock.patch("cuckoo.analysis.regular.ResultServer")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager.set_analysis_status")
    def test_stop_and_wait(self, msas, mrs):
        self.create_cwd()
        # Mock resultserver obj so we can check if del_task was called
        resulserver_obj = mock.MagicMock()
        mrs.return_value = resulserver_obj
        manager = self.get_manager()
        manager.init(self.db)
        manager.machinery = mock.MagicMock()
        manager.route = mock.MagicMock()
        manager.aux = mock.MagicMock()

        manager.stop_and_wait()

        # Check if all required methods were called successfully
        msas.assert_called_once_with("stopping")
        manager.aux.stop.assert_called_once()
        manager.machinery.stop.assert_called_once_with("machine1")

        resulserver_obj.del_task.assert_called_once_with(
            manager.task.db_task, manager.machine)
        manager.route.unroute_network.assert_called_once()

    @mock.patch("cuckoo.analysis.regular.ResultServer")
    @mock.patch("cuckoo.common.abstracts.AnalysisManager.set_analysis_status")
    def test_stop_and_wait_dump_mem(self, msas, mrs):
        self.create_cwd()
        task = Task()
        id = task.add_path(__file__, memory=True)
        task.load_from_db(id)

        # Mock resultserver obj so we can check if del_task was called
        resulserver_obj = mock.MagicMock()
        mrs.return_value = resulserver_obj
        manager = self.get_manager(task)
        manager.init(self.db)
        manager.machinery = mock.MagicMock()
        manager.route = mock.MagicMock()
        manager.aux = mock.MagicMock()

        manager.stop_and_wait()

        # Check if all required methods were called successfully
        msas.assert_called_once_with("stopping")
        manager.aux.stop.assert_called_once()
        manager.machinery.dump_memory.assert_called_once_with(
            "machine1", cwd("storage", "analyses", str(task.id), "memory.dmp"))
        manager.machinery.stop.assert_called_once_with("machine1")

        resulserver_obj.del_task.assert_called_once_with(
            task.db_task, manager.machine)
        manager.route.unroute_network.assert_called_once()

    def test_run(self):
        self.create_cwd()

        manager = self.get_manager()
        manager.init(self.db)

        manager.start_and_wait = mock.MagicMock(return_value=True)
        manager.stop_and_wait = mock.MagicMock()
        manager.task.process = mock.MagicMock(return_value=True)
        manager.set_analysis_status = mock.MagicMock()
        manager.release_machine_lock = mock.MagicMock()

        manager.run()

        manager.start_and_wait.assert_called_once()
        manager.stop_and_wait.assert_called_once()
        manager.set_analysis_status.assert_called_once_with("stopped",
                                                            wait=True)
        manager.task.process.assert_called_once()

    def test_run_fail(self):
        self.create_cwd()
        manager = self.get_manager()
        manager.init(self.db)

        manager.start_and_wait = mock.MagicMock(return_value=False)
        manager.stop_and_wait = mock.MagicMock()
        manager.task.process = mock.MagicMock(return_value=True)
        manager.set_analysis_status = mock.MagicMock()
        manager.release_machine_lock = mock.MagicMock()

        manager.run()

        manager.start_and_wait.assert_called_once()
        manager.stop_and_wait.assert_called_once()
        manager.set_analysis_status.assert_called_once_with("failed",
                                                            wait=True)
        manager.task.process.assert_called_once()

    def test_on_status_starting(self):
        manager = self.get_manager()
        manager.init(self.db)
        manager.route.route = "none"

        manager.on_status_starting(self.db)

        db_task = self.db.view_task(manager.task.id)
        assert db_task.machine == "machine1"
        assert db_task.route == "none"

    def test_on_status_stopped(self):
        manager = self.get_manager()
        task_json_path = cwd("task.json", analysis=manager.task.id)
        manager.init(self.db)
        manager.machinery = mock.MagicMock()
        # Remove because init creates it. We need to check if it was created
        # on status stopped
        os.remove(task_json_path)

        manager.on_status_stopped(self.db)

        db_task = self.db.view_task(manager.task.id)
        assert manager.task.db_task is not db_task
        assert db_task.status == "completed"
        assert os.path.isfile(task_json_path)
        manager.machinery.release.assert_called_once_with("machine1")

    def test_on_status_failed(self):
        manager = self.get_manager()
        manager.init(self.db)

        manager.on_status_failed(self.db)
        manager.machinery.release.assert_called_once_with("machine1")

    def test_finalize(self):
        manager = self.get_manager()
        task_json_path = cwd("task.json", analysis=manager.task.id)
        manager.init(self.db)
        manager.processing_success = True
        manager.release_machine_lock = mock.MagicMock()
        # Remove because init creates it. We need to check if it was created
        # on status stopped
        os.remove(task_json_path)

        manager.finalize(self.db)

        db_task = self.db.view_task(manager.task.id)
        assert manager.task.db_task is not db_task
        assert db_task.status == "reported"
        assert os.path.isfile(task_json_path)
        manager.release_machine_lock.assert_called_once()

    def test_finalize_analysis_failed(self):
        self.create_cwd(cfg={"cuckoo": {"cuckoo": {"process_results": False}}})
        manager = self.get_manager()
        task_json_path = cwd("task.json", analysis=manager.task.id)
        manager.init(self.db)
        manager.analysis.status = "running"
        manager.release_machine_lock = mock.MagicMock()
        # Remove because init creates it. We need to check if it was created
        # on status stopped
        os.remove(task_json_path)

        manager.finalize(self.db)

        db_task = self.db.view_task(manager.task.id)
        assert manager.task.db_task is not db_task
        assert db_task.status == "failed_analysis"
        assert os.path.isfile(task_json_path)
        manager.release_machine_lock.assert_called_once()

    def test_finalize_process_failed(self):
        TestRegular.createcwd = True
        self.create_cwd()
        manager = self.get_manager()
        task_json_path = cwd("task.json", analysis=manager.task.id)

        manager.init(self.db)
        manager.processing_success = False
        # Remove because init creates it. We need to check if it was created
        # on status stopped
        os.remove(task_json_path)

        manager.finalize(self.db)

        db_task = self.db.view_task(manager.task.id)
        assert manager.task.db_task is not db_task
        assert db_task.status == "failed_processing"
        assert os.path.isfile(task_json_path)

    def test_finalize_process_disabled(self):
        self.create_cwd(cfg={"cuckoo": {"cuckoo": {"process_results": False}}})
        manager = self.get_manager()
        task_json_path = cwd("task.json", analysis=manager.task.id)
        manager.init(self.db)
        manager.processing_success = None
        # Remove because init creates it. We need to check if it was created
        # on status stopped
        os.remove(task_json_path)

        manager.finalize(self.db)

        db_task = self.db.view_task(manager.task.id)
        assert manager.task.db_task is not db_task
        assert db_task.status != "reported"
        assert db_task.status != "failed_processing"
        assert os.path.isfile(task_json_path)

    def test_support_list(self):
        for tasktype in ("regular", "baseline", "service"):
            assert tasktype in Regular.supports
Esempio n. 17
0
def cuckoo_clean():
    """Clean up cuckoo setup.
    It deletes logs, all stored data from file system and configured
    databases (SQL and MongoDB).
    """
    # Init logging (without writing to file).
    init_console_logging()

    try:
        # Initialize the database connection.
        db = Database()
        db.connect(schema_check=False)

        # Drop all tables.
        db.drop()
    except (CuckooDependencyError, CuckooDatabaseError) as e:
        # If something is screwed due to incorrect database migrations or bad
        # database SqlAlchemy would be unable to connect and operate.
        log.warning("Error connecting to database: it is suggested to check "
                    "the connectivity, apply all migrations if needed or purge "
                    "it manually. Error description: %s", e)

    # Check if MongoDB reporting is enabled and drop the database if it is.
    if mongo.init():
        try:
            mongo.connect()
            mongo.drop()
            mongo.close()
        except Exception as e:
            log.warning("Unable to drop MongoDB database: %s", e)

    # Check if ElasticSearch reporting is enabled and drop its data if it is.
    if elastic.init():
        elastic.connect()

        # TODO This should be moved to the elastic abstract.
        # TODO We should also drop historic data, i.e., from pervious days,
        # months, and years.
        date_index = datetime.datetime.utcnow().strftime({
            "yearly": "%Y",
            "monthly": "%Y-%m",
            "daily": "%Y-%m-%d",
        }[elastic.index_time_pattern])
        dated_index = "%s-%s" % (elastic.index, date_index)

        elastic.client.indices.delete(
            index=dated_index, ignore=[400, 404]
        )

        template_name = "%s_template" % dated_index
        if elastic.client.indices.exists_template(template_name):
            elastic.client.indices.delete_template(template_name)

    # Paths to clean.
    paths = [
        cwd("cuckoo.db"),
        cwd("log"),
        cwd("storage", "analyses"),
        cwd("storage", "baseline"),
        cwd("storage", "binaries"),
    ]

    # Delete the various files and directories. In case of directories, keep
    # the parent directories, so to keep the state of the CWD in tact.
    for path in paths:
        if os.path.isdir(path):
            try:
                shutil.rmtree(path)
                os.mkdir(path)
            except (IOError, OSError) as e:
                log.warning("Error removing directory %s: %s", path, e)
        elif os.path.isfile(path):
            try:
                os.unlink(path)
            except (IOError, OSError) as e:
                log.warning("Error removing file %s: %s", path, e)
Esempio n. 18
0
def remove(request, task_id):
    """Remove an analysis.
    @todo: remove folder from storage.
    """
    analyses = results_db.analysis.find({"info.id": int(task_id)})

    # Checks if more analysis found with the same ID, like if process.py
    # was run manually.
    if analyses.count() > 1:
        message = (
            "Multiple tasks with this ID deleted, thanks for all the fish "
            "(the specified analysis was present multiple times in mongo).")
    elif analyses.count() == 1:
        message = "Task deleted, thanks for all the fish."

    if not analyses.count():
        return view_error(request, "The specified analysis does not exist")

    for analysis in analyses:
        # Delete sample if not used.
        if "file_id" in analysis["target"]:
            if results_db.analysis.find({
                    "target.file_id":
                    ObjectId(analysis["target"]["file_id"])
            }).count() == 1:
                fs.delete(ObjectId(analysis["target"]["file_id"]))

        # Delete screenshots.
        for shot in analysis["shots"]:
            if isinstance(shot, dict):
                if "small" in shot:
                    if results_db.analysis.find({
                            "shots":
                            ObjectId(shot["small"]),
                    }).count() == 1:
                        fs.delete(ObjectId(shot["small"]))

                if "original" in shot:
                    if results_db.analysis.find({
                            "shots":
                            ObjectId(shot["original"]),
                    }).count() == 1:
                        fs.delete(ObjectId(shot["original"]))

                continue

            if results_db.analysis.find({
                    "shots": ObjectId(shot)
            }).count() == 1:
                fs.delete(ObjectId(shot))

        # Delete network pcap.
        if "pcap_id" in analysis["network"] and results_db.analysis.find({
                "network.pcap_id":
                ObjectId(analysis["network"]["pcap_id"])
        }).count() == 1:
            fs.delete(ObjectId(analysis["network"]["pcap_id"]))

        # Delete sorted pcap
        if "sorted_pcap_id" in analysis[
                "network"] and results_db.analysis.find({
                    "network.sorted_pcap_id":
                    ObjectId(analysis["network"]["sorted_pcap_id"])
                }).count() == 1:
            fs.delete(ObjectId(analysis["network"]["sorted_pcap_id"]))

        # Delete mitmproxy dump.
        if "mitmproxy_id" in analysis["network"] and results_db.analysis.find({
                "network.mitmproxy_id":
                ObjectId(analysis["network"]["mitmproxy_id"])
        }).count() == 1:
            fs.delete(ObjectId(analysis["network"]["mitmproxy_id"]))

        # Delete dropped.
        for drop in analysis.get("dropped", []):
            if "object_id" in drop and results_db.analysis.find({
                    "dropped.object_id":
                    ObjectId(drop["object_id"])
            }).count() == 1:
                fs.delete(ObjectId(drop["object_id"]))

        # Delete calls.
        for process in analysis.get("behavior", {}).get("processes", []):
            for call in process["calls"]:
                results_db.calls.remove({"_id": ObjectId(call)})

        # Delete analysis data.
        results_db.analysis.remove({"_id": ObjectId(analysis["_id"])})

    # Delete from SQL db.
    db = Database()
    db.delete_task(task_id)

    return render_template(request, "success.html", **{
        "message": message,
    })
Esempio n. 19
0
    def run(self):
        """Run information gathering.
        @return: information dict.
        """
        self.key = "info"

        db = Database()
        dbtask = db.view_task(self.task["id"], details=True)

        dbmachine = None
        if self.machine:
            dbmachine = db.view_machine(self.machine["name"])

        # Fetch the task.
        if dbtask:
            task = dbtask.to_dict()
        else:
            # Task is gone from the database.
            if os.path.isfile(self.taskinfo_path):
                # We've got task.json, so grab info from there.
                task = json_decode(open(self.taskinfo_path).read())
            else:
                # We don't have any info on the task :(
                emptytask = Task()
                emptytask.id = self.task["id"]
                task = emptytask.to_dict()

        if dbmachine:
            task["guest"]["platform"] = dbmachine.to_dict()["platform"]

        # Get git head.
        if os.path.exists(cwd(".cwd")):
            git_head = git_fetch_head = open(cwd(".cwd"), "rb").read()
        else:
            log.warning(
                "No .cwd file was found in the Cuckoo Working Directory. Did "
                "you correctly setup the CWD?")
            git_head = git_fetch_head = None

        # Monitor.
        monitor = cwd("monitor", task["options"].get("monitor", "latest"))
        if os.path.islink(monitor):
            monitor = os.readlink(monitor)
        elif os.path.isfile(monitor):
            monitor = open(monitor, "rb").read().strip()
        elif os.path.isdir(monitor):
            monitor = os.path.basename(monitor)
        else:
            monitor = None

        return dict(
            version=version,
            git={
                "head": git_head,
                "fetch_head": git_fetch_head,
            },
            monitor=monitor,
            added=task.get("added_on"),
            started=task["started_on"],
            ended=task.get("completed_on", "none"),
            duration=task.get("duration", -1),
            id=int(task["id"]),
            category=task["category"],
            custom=task["custom"],
            owner=task["owner"],
            machine=task["guest"],
            package=task["package"],
            platform=task["platform"],
            options=emit_options(task["options"]),
            route=task["route"],
        )
Esempio n. 20
0
import zipfile

from flask import Flask, request, jsonify, make_response

from cuckoo.common.config import config, parse_options
from cuckoo.common.files import Files, Folders
from cuckoo.common.utils import parse_bool
from cuckoo.core.database import Database, Task
from cuckoo.core.database import TASK_REPORTED, TASK_COMPLETED, TASK_RUNNING
from cuckoo.core.rooter import rooter
from cuckoo.core.submit import SubmitManager
from cuckoo.misc import cwd, version, decide_cwd

from cuckoo.common.pcapstream import pcapstream

db = Database()
sm = SubmitManager()

# Initialize Flask app.
app = Flask(__name__)


def json_error(status_code, message):
    """Return a JSON object with a HTTP error code."""
    r = jsonify(message=message)
    r.status_code = status_code
    return r


def shutdown_server():
    """Shutdown API werkzeug server"""
Esempio n. 21
0
class DatabaseEngine(object):
    """Tests database stuff."""
    URI = None

    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

        self.d = Database()
        self.d.connect(dsn=self.URI)

    def add_url(self, url, priority=1, status="pending"):
        task_id = self.d.add_url(url, priority=priority)
        self.d.set_status(task_id, status)
        return task_id

    def test_add_tasks(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, "hehe")
        os.close(fd)

        # Add task.
        count = self.d.Session().query(Task).count()
        self.d.add_path(sample_path)
        assert self.d.Session().query(Task).count() == count + 1

        # Add url.
        self.d.add_url("http://foo.bar")
        assert self.d.Session().query(Task).count() == count + 2

    def test_processing_get_task(self):
        # First reset all existing rows so that earlier exceptions don't affect
        # this unit test run.
        null, session = None, self.d.Session()

        session.query(Task).filter(
            Task.status == "completed", Task.processing == null
        ).update({
            "processing": "something",
        })
        session.commit()

        t1 = self.add_url("http://google.com/1", priority=1, status="completed")
        t2 = self.add_url("http://google.com/2", priority=2, status="completed")
        t3 = self.add_url("http://google.com/3", priority=1, status="completed")
        t4 = self.add_url("http://google.com/4", priority=1, status="completed")
        t5 = self.add_url("http://google.com/5", priority=3, status="completed")
        t6 = self.add_url("http://google.com/6", priority=1, status="completed")
        t7 = self.add_url("http://google.com/7", priority=1, status="completed")

        assert self.d.processing_get_task("foo") == t5
        assert self.d.processing_get_task("foo") == t2
        assert self.d.processing_get_task("foo") == t1
        assert self.d.processing_get_task("foo") == t3
        assert self.d.processing_get_task("foo") == t4
        assert self.d.processing_get_task("foo") == t6
        assert self.d.processing_get_task("foo") == t7
        assert self.d.processing_get_task("foo") is None

    def test_error_exists(self):
        task_id = self.add_url("http://google.com/")
        self.d.add_error("A"*1024, task_id)
        assert len(self.d.view_errors(task_id)) == 1
        self.d.add_error("A"*1024, task_id)
        assert len(self.d.view_errors(task_id)) == 2

    def test_long_error(self):
        self.add_url("http://google.com/")
        self.d.add_error("A"*1024, 1)
        err = self.d.view_errors(1)
        assert err and len(err[0].message) == 1024

    def test_submit(self):
        dirpath = tempfile.mkdtemp()
        submit_id = self.d.add_submit(dirpath, "files", {
            "foo": "bar",
        })
        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        assert submit.tmp_path == dirpath
        assert submit.submit_type == "files"
        assert submit.data == {
            "foo": "bar",
        }

    def test_connect_no_create(self):
        AlembicVersion.__table__.drop(self.d.engine)
        self.d.connect(dsn=self.URI, create=False)
        assert "alembic_version" not in self.d.engine.table_names()
        self.d.connect(dsn=self.URI)
        assert "alembic_version" in self.d.engine.table_names()

    def test_view_submit_tasks(self):
        submit_id = self.d.add_submit(None, None, None)
        t1 = self.d.add_path(__file__, custom="1", submit_id=submit_id)
        t2 = self.d.add_path(__file__, custom="2", submit_id=submit_id)

        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        with pytest.raises(DetachedInstanceError):
            print submit.tasks

        submit = self.d.view_submit(submit_id, tasks=True)
        assert len(submit.tasks) == 2
        tasks = sorted((task.id, task) for task in submit.tasks)
        assert tasks[0][1].id == t1
        assert tasks[0][1].custom == "1"
        assert tasks[1][1].id == t2
        assert tasks[1][1].custom == "2"

    def test_add_reboot(self):
        t0 = self.d.add_path(__file__)
        s0 = self.d.add_submit(None, None, None)
        t1 = self.d.add_reboot(task_id=t0, submit_id=s0)

        t = self.d.view_task(t1)
        assert t.custom == "%s" % t0
        assert t.submit_id == s0

    def test_task_set_options(self):
        t0 = self.d.add_path(__file__, options={"foo": "bar"})
        t1 = self.d.add_path(__file__, options="foo=bar")
        assert self.d.view_task(t0).options == {"foo": "bar"}
        assert self.d.view_task(t1).options == {"foo": "bar"}

    def test_task_tags_str(self):
        task = self.d.add_path(__file__, tags="foo,,bar")
        tag0, tag1 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name)) == ["bar", "foo"]

    def test_task_tags_list(self):
        task = self.d.add_path(__file__, tags=["tag1", "tag2", "", 1, "tag3"])
        tag0, tag1, tag2 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name, tag2.name)) == [
            "tag1", "tag2", "tag3"
        ]

    def test_error_action(self):
        task_id = self.d.add_path(__file__)
        self.d.add_error("message1", task_id)
        self.d.add_error("message2", task_id, "actionhere")
        e1, e2 = self.d.view_errors(task_id)
        assert e1.message == "message1"
        assert e1.action is None
        assert e2.message == "message2"
        assert e2.action == "actionhere"

    def test_view_tasks(self):
        t1 = self.d.add_path(__file__)
        t2 = self.d.add_url("http://google.com/")
        tasks = self.d.view_tasks([t1, t2])
        assert tasks[0].to_dict() == self.d.view_task(t1).to_dict()
        assert tasks[1].to_dict() == self.d.view_task(t2).to_dict()

    def test_add_machine(self):
        self.d.add_machine(
            "name1", "label", "1.2.3.4", "windows", None,
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name2", "label", "1.2.3.4", "windows", "",
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name3", "label", "1.2.3.4", "windows", "opt1 opt2",
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name4", "label", "1.2.3.4", "windows", ["opt3", "opt4"],
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        m1 = self.d.view_machine("name1")
        m2 = self.d.view_machine("name2")
        m3 = self.d.view_machine("name3")
        m4 = self.d.view_machine("name4")
        assert m1.options == []
        assert m2.options == []
        assert m3.options == ["opt1", "opt2"]
        assert m4.options == ["opt3", "opt4"]

    @mock.patch("cuckoo.common.objects.magic")
    def test_add_sample(self, p):
        p.from_file.return_value = ""
        assert self.d.add_path(Files.temp_put(os.urandom(16))) is not None
Esempio n. 22
0
    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

        self.d = Database()
        self.d.connect(dsn=self.URI)
Esempio n. 23
0
def test_init_tasks():
    def init(reschedule):
        set_cwd(tempfile.mkdtemp())
        cuckoo_create(cfg={
            "cuckoo": {
                "cuckoo": {
                    "reschedule": reschedule,
                },
            },
        })
        Database().connect()

        statuses = (
            "pending", "running", "completed", "reported"
        )

        tasks = []
        for status in statuses:
            task_id = Database().add_path(__file__)
            Database().set_status(task_id, status)
            tasks.append(task_id)

        init_tasks()

    init(True)
    assert Database().view_task(1).status == "pending"
    assert Database().view_task(2).status == "recovered"
    assert Database().view_task(3).status == "completed"
    assert Database().view_task(4).status == "reported"
    assert Database().view_task(5).status == "pending"
    assert Database().view_task(6) is None

    init(False)
    assert Database().view_task(1).status == "pending"
    assert Database().view_task(2).status == "failed_analysis"
    assert Database().view_task(3).status == "completed"
    assert Database().view_task(4).status == "reported"
    assert Database().view_task(5) is None
Esempio n. 24
0
def cuckoo_clean():
    """Clean up cuckoo setup.
    It deletes logs, all stored data from file system and configured
    databases (SQL and MongoDB).
    """
    # Init logging (without writing to file).
    init_console_logging()

    try:
        # Initialize the database connection.
        db = Database()
        db.connect(schema_check=False)

        # Drop all tables.
        db.drop()
    except (CuckooDependencyError, CuckooDatabaseError) as e:
        # If something is screwed due to incorrect database migrations or bad
        # database SqlAlchemy would be unable to connect and operate.
        log.warning(
            "Error connecting to database: it is suggested to check "
            "the connectivity, apply all migrations if needed or purge "
            "it manually. Error description: %s", e)

    # Check if MongoDB reporting is enabled and drop the database if it is.
    if mongo.init():
        try:
            mongo.connect()
            mongo.drop()
            mongo.close()
        except Exception as e:
            log.warning("Unable to drop MongoDB database: %s", e)

    # Check if ElasticSearch reporting is enabled and drop its data if it is.
    if elastic.init():
        elastic.connect()

        # TODO This should be moved to the elastic abstract.
        # TODO We should also drop historic data, i.e., from pervious days,
        # months, and years.
        date_index = datetime.datetime.utcnow().strftime({
            "yearly": "%Y",
            "monthly": "%Y-%m",
            "daily": "%Y-%m-%d",
        }[elastic.index_time_pattern])
        dated_index = "%s-%s" % (elastic.index, date_index)

        elastic.client.indices.delete(index=dated_index, ignore=[400, 404])

        template_name = "%s_template" % dated_index
        if elastic.client.indices.exists_template(template_name):
            elastic.client.indices.delete_template(template_name)

    # Paths to clean.
    paths = [
        cwd("cuckoo.db"),
        cwd("log"),
        cwd("storage", "analyses"),
        cwd("storage", "baseline"),
        cwd("storage", "binaries"),
    ]

    # Delete the various files and directories. In case of directories, keep
    # the parent directories, so to keep the state of the CWD in tact.
    for path in paths:
        if os.path.isdir(path):
            try:
                shutil.rmtree(path)
                os.mkdir(path)
            except (IOError, OSError) as e:
                log.warning("Error removing directory %s: %s", path, e)
        elif os.path.isfile(path):
            try:
                os.unlink(path)
            except (IOError, OSError) as e:
                log.warning("Error removing file %s: %s", path, e)
Esempio n. 25
0
def submit_tasks(target, options, package, custom, owner, timeout, priority,
                 machine, platform, memory, enforce_timeout, clock, tags,
                 remote, pattern, maxcount, is_unique, is_url, is_baseline,
                 is_shuffle):
    db = Database()

    data = dict(
        package=package or "",
        timeout=timeout,
        options=options,
        priority=priority,
        machine=machine,
        platform=platform,
        custom=custom,
        owner=owner,
        tags=tags,
        memory="1" if memory else "0",
        enforce_timeout="1" if enforce_timeout else "0",
        clock=clock,
        unique="1" if is_unique else "0",
    )

    if is_baseline:
        if remote:
            print "Remote baseline support has not yet been implemented."
            return

        task_id = db.add_baseline(timeout, owner, machine, memory)
        yield "Baseline", machine, task_id
        return

    if is_url and is_unique:
        print "URL doesn't have --unique support yet."
        return

    if is_url:
        for url in target:
            if not remote:
                data.pop("unique", None)
                task_id = db.add_url(to_unicode(url), **data)
                yield "URL", url, task_id
                continue

            data["url"] = to_unicode(url)
            try:
                r = requests.post("http://%s/tasks/create/url" % remote,
                                  data=data)
                yield "URL", url, r.json()["task_id"]
            except Exception as e:
                print "%s: unable to submit URL: %s" % (bold(red("Error")), e)
    else:
        files = []
        for path in target:
            files.extend(enumerate_files(os.path.abspath(path), pattern))

        if is_shuffle:
            random.shuffle(files)

        for filepath in files:
            if not os.path.getsize(filepath):
                print "%s: sample %s (skipping file)" % (bold(
                    yellow("Empty")), filepath)
                continue

            if maxcount is not None:
                if not maxcount:
                    break
                maxcount -= 1

            if not remote:
                if is_unique:
                    sha256 = File(filepath).get_sha256()
                    if db.find_sample(sha256=sha256):
                        yield "File", filepath, None
                        continue

                data.pop("unique", None)
                task_id = db.add_path(file_path=filepath, **data)
                yield "File", filepath, task_id
                continue

            files = {
                "file": (os.path.basename(filepath), open(filepath, "rb")),
            }

            try:
                r = requests.post("http://%s/tasks/create/file" % remote,
                                  data=data,
                                  files=files)
                yield "File", filepath, r.json()["task_id"]
            except Exception as e:
                print "%s: unable to submit file: %s" % (bold(red("Error")), e)
                continue
Esempio n. 26
0
def submit_tasks(target, options, package, custom, owner, timeout, priority,
                 machine, platform, memory, enforce_timeout, clock, tags,
                 remote, pattern, maxcount, is_unique, is_url, is_baseline,
                 is_shuffle):
    db = Database()

    data = dict(
        package=package or "",
        timeout=timeout,
        options=options,
        priority=priority,
        machine=machine,
        platform=platform,
        custom=custom,
        owner=owner,
        tags=tags,
        memory="1" if memory else "0",
        enforce_timeout="1" if enforce_timeout else "0",
        clock=clock,
        unique="1" if is_unique else "0",
    )

    if is_baseline:
        if remote:
            print "Remote baseline support has not yet been implemented."
            return

        task_id = db.add_baseline(timeout, owner, machine, memory)
        yield "Baseline", machine, task_id
        return

    if is_url and is_unique:
        print "URL doesn't have --unique support yet."
        return

    if is_url:
        for url in target:
            if not remote:
                data.pop("unique", None)
                task_id = db.add_url(to_unicode(url), **data)
                yield "URL", url, task_id
                continue

            data["url"] = to_unicode(url)
            try:
                r = requests.post(
                    "http://%s/tasks/create/url" % remote, data=data
                )
                yield "URL", url, r.json()["task_id"]
            except Exception as e:
                print "%s: unable to submit URL: %s" % (
                    bold(red("Error")), e
                )
    else:
        files = []
        for path in target:
            files.extend(enumerate_files(os.path.abspath(path), pattern))

        if is_shuffle:
            random.shuffle(files)

        for filepath in files:
            if not os.path.getsize(filepath):
                print "%s: sample %s (skipping file)" % (
                    bold(yellow("Empty")), filepath
                )
                continue

            if maxcount is not None:
                if not maxcount:
                    break
                maxcount -= 1

            if not remote:
                if is_unique:
                    sha256 = File(filepath).get_sha256()
                    if db.find_sample(sha256=sha256):
                        yield "File", filepath, None
                        continue

                data.pop("unique", None)
                task_id = db.add_path(file_path=filepath, **data)
                yield "File", filepath, task_id
                continue

            files = {
                "file": (os.path.basename(filepath), open(filepath, "rb")),
            }

            try:
                r = requests.post(
                    "http://%s/tasks/create/file" % remote,
                    data=data, files=files
                )
                yield "File", filepath, r.json()["task_id"]
            except Exception as e:
                print "%s: unable to submit file: %s" % (
                    bold(red("Error")), e
                )
                continue
Esempio n. 27
0
def reboot(task_id):
    reboot_id = Database().add_reboot(task_id=task_id)
    if not reboot_id:
        return json_error(404, "Error creating reboot task")

    return jsonify(task_id=task_id, reboot_id=reboot_id)
Esempio n. 28
0
def web(ctx, args, host, port, uwsgi, nginx):
    """Operate the Cuckoo Web Interface.

    Use "--help" to get this help message and "help" to find Django's
    manage.py potential subcommands.
    """
    username = ctx.parent.user or getuser()
    if uwsgi:
        print "[uwsgi]"
        print "plugins = python"
        if os.environ.get("VIRTUAL_ENV"):
            print "virtualenv =", os.environ["VIRTUAL_ENV"]
        print "module = cuckoo.web.web.wsgi"
        print "uid =", username
        print "gid =", username
        dirpath = os.path.join(cuckoo.__path__[0], "web", "static")
        print "static-map = /static=%s" % dirpath
        print "# If you're getting errors about the PYTHON_EGG_CACHE, then"
        print "# uncomment the following line and add some path that is"
        print "# writable from the defined user."
        print "# env = PYTHON_EGG_CACHE="
        print "env = CUCKOO_APP=web"
        print "env = CUCKOO_CWD=%s" % cwd()
        return

    if nginx:
        print "upstream _uwsgi_cuckoo_web {"
        print "    server unix:/run/uwsgi/app/cuckoo-web/socket;"
        print "}"
        print
        print "server {"
        print "    listen %s:%d;" % (host, port)
        print
        print "    # Cuckoo Web Interface"
        print "    location / {"
        print "        client_max_body_size 1G;"
        print "        proxy_redirect off;"
        print "        proxy_set_header X-Forwarded-Proto $scheme;"
        print "        uwsgi_pass  _uwsgi_cuckoo_web;"
        print "        include     uwsgi_params;"
        print "    }"
        print "}"
        return

    # Switch to cuckoo/web and add the current path to sys.path as the Web
    # Interface is using local imports here and there.
    # TODO Rename local imports to either cuckoo.web.* or relative imports.
    sys.argv[0] = os.path.abspath(sys.argv[0])
    os.chdir(os.path.join(cuckoo.__path__[0], "web"))
    sys.path.insert(0, ".")

    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cuckoo.web.web.settings")

    # The Django HTTP server also imports the WSGI module for some reason, so
    # ensure that WSGI is able to load.
    os.environ["CUCKOO_APP"] = "web"
    os.environ["CUCKOO_CWD"] = cwd()

    from django.core.management import execute_from_command_line

    init_console_logging(level=ctx.parent.level)
    Database().connect()

    try:
        execute_from_command_line(
            ("cuckoo", "runserver", "%s:%d" % (host, port))
            if not args else
            ("cuckoo",) + args
        )
    except CuckooCriticalError as e:
        message = red("{0}: {1}".format(e.__class__.__name__, e))
        if len(log.handlers):
            log.critical(message)
        else:
            sys.stderr.write("{0}\n".format(message))
        sys.exit(1)
Esempio n. 29
0
    def run(self):
        """Run debug analysis.
        @return: debug information dict.
        """
        self.key = "debug"
        debug = {
            "log": [],
            "cuckoo": [],
            "action": [],
            "dbgview": [],
            "errors": [],
        }

        if os.path.exists(self.log_path):
            try:
                f = codecs.open(self.log_path, "rb", "utf-8")
                debug["log"] = f.readlines()
            except ValueError as e:
                raise CuckooProcessingError("Error decoding %s: %s" %
                                            (self.log_path, e))
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening %s: %s" %
                                            (self.log_path, e))
        else:
            log.error(
                "Error processing task #%d: it appears that the Virtual "
                "Machine hasn't been able to contact back to "
                "the Cuckoo Host. There could be a few reasons for this, "
                "please refer to our documentation on the matter: %s",
                self.task.id,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.communication",
                    "status": "error",
                    "task_id": self.task.id,
                })

        if os.path.exists(self.cuckoolog_path):
            debug["cuckoo"] = Logfile(self.cuckoolog_path)

        dbgview_log = os.path.join(self.analysis_path, "logs", "dbgview.log")
        if os.path.exists(dbgview_log):
            f = open(dbgview_log, "rb")
            # Ignore the first line which identifies the machine.
            f.readline()
            for line in f:
                idx, time, message = line.split("\t", 2)
                debug["dbgview"].append(message.strip())

        debug["errors"] = []
        for error in Database().view_errors(self.task["id"]):
            if error.message and error.message not in debug["errors"]:
                debug["errors"].append(error.message)

            if error.action and error.action not in debug["action"]:
                debug["action"].append(error.action)

        if os.path.exists(self.mitmerr_path):
            mitmerr = open(self.mitmerr_path, "rb").read()
            if mitmerr and mitmerr not in debug["errors"]:
                debug["errors"].append(mitmerr)

        return debug
Esempio n. 30
0
class Scheduler(object):

    def __init__(self, maxcount=None):
        self.running = True
        self.db = Database()
        self.maxcount = maxcount
        self.total_analysis_count = 0
        self.machinery = None
        self.machine_lock = None
        self.managers = []

    def initialize(self):
        machinery_name = config("cuckoo:cuckoo:machinery")
        max_vmstartup = config("cuckoo:cuckoo:max_vmstartup_count")

        # Initialize a semaphore or lock to prevent to many VMs from
        # starting at the same time.
        self.machine_lock = threading.Semaphore(max_vmstartup)

        log.info(
            "Using '%s' as machine manager", machinery_name,
            extra={
                "action": "init.machinery",
                "status": "success",
                "machinery": machinery_name,
            }
        )

        # Create the machine manager
        self.machinery = cuckoo.machinery.plugins[machinery_name]()

        # Provide a dictionary with the configuration options to the
        # machine manager instance.
        self.machinery.set_options(Config(machinery_name))

        try:
            self.machinery.initialize(machinery_name)
        except CuckooMachineError as e:
            raise CuckooCriticalError("Error initializing machines: %s" % e)

        # At this point all the available machines should have been identified
        # and added to the list. If none were found, Cuckoo aborts the
        # execution. TODO In the future we'll probably want get rid of this.
        machines = self.machinery.machines()
        if not machines:
            raise CuckooCriticalError("No machines available.")

        log.info(
            "Loaded %s machine/s", len(machines),
            extra={
                "action": "init.machines",
                "status": "success",
                "count": len(machines)
            }
        )

        if len(machines) > 1 and self.db.engine.name == "sqlite":
            log.warning(
                "As you've configured Cuckoo to execute parallel "
                "analyses, we recommend you to switch to a MySQL or "
                "a PostgreSQL database as SQLite might cause some "
                "issues."
            )

        if len(machines) > 4 and config("cuckoo:cuckoo:process_results"):
            log.warning(
                "When running many virtual machines it is recommended to "
                "process the results in separate 'cuckoo process' instances "
                "increase throughput and stability. Please read the "
                "documentation about the `Processing Utility`."
            )

        self.drop_forwarding_rules()

        # Command-line overrides the configuration file.
        if self.maxcount is None:
            self.maxcount = config("cuckoo:cuckoo:max_analysis_count")

    def drop_forwarding_rules(self):
        """Drop all existing packet forwarding rules for each VM. Just in case
        Cuckoo was terminated for some reason and various forwarding rules
        have thus not been dropped yet."""
        for machine in self.machinery.machines():
            if not machine.interface:
                log.info(
                    "Unable to determine the network interface for VM "
                     "with name %s, Cuckoo will not be able to give it "
                     "full internet access or route it through a VPN! "
                     "Please define a default network interface for the "
                     "machinery or define a network interface for each "
                     "VM.", machine.name
                )
                continue

            # Drop forwarding rule to each VPN.
            if config("routing:vpn:enabled"):
                for vpn in config("routing:vpn:vpns"):
                    rooter(
                        "forward_disable", machine.interface,
                        config("routing:%s:interface" % vpn), machine.ip
                    )

            # Drop forwarding rule to the internet / dirty line.
            if config("routing:routing:internet") != "none":
                rooter(
                    "forward_disable", machine.interface,
                    config("routing:routing:internet"), machine.ip
                )

    def stop(self):
        """Stop the Cuckoo task scheduler."""
        self.running = False
        # Shutdown machine manager (used to kill machines that still alive).
        for manager in self.managers:
            manager.force_cleanup()

        self.machinery.shutdown()

    def ready_for_new_run(self):
        """Performs checks to see if Cuckoo should start a new
        pending task or not"""
        # Wait until the machine lock is not locked. This is only the case
        # when all machines are fully running, rather that about to start
        # or still busy starting. This way we won't have race conditions
        # with finding out there are no available machines in the analysis
        # manager or having two analyses pick the same machine.
        if not self.machine_lock.acquire(False):
            logger(
                "Could not acquire machine lock",
                action="scheduler.machine_lock", status="busy"
            )
            return False

        self.machine_lock.release()

        # Verify if the minimum amount of disk space is available
        if config("cuckoo:cuckoo:freespace"):
            freespace = get_free_disk(cwd("storage", "analyses"))

            # If freespace is None, the check failed. Continue, since this
            # can happen if the disk check is not supported on others than
            # unix and winxp+. The call might also fail on win32.
            if freespace is None:
                log.error("Error determining free disk space")
            elif freespace <= config("cuckoo:cuckoo:freespace"):
                log.error(
                    "Not enough free disk space! (Only %d MB!)",
                    freespace, extra={
                        "action": "scheduler.diskspace",
                        "status": "error",
                        "available": freespace,
                    }
                )
                return False

        max_vm = config("cuckoo:cuckoo:max_machines_count")
        if max_vm and len(self.machinery.running()) >= max_vm:
            log.debug(
                "Maximum amount of machines is running", extra={
                    "action": "scheduler.machines",
                    "status": "maxed"
                }
            )
            return False

        if not self.machinery.availables():
            logger(
                "No available machines",
                action="scheduler.machines", status="none"
            )
            return False

        return True

    def task_limit_hit(self):
        """Stops the scheduler is the maximum amount of tasks has been
        reached. This can be configured by max_analysis_count in cuckoo.conf
        or passed as an argument when starting Cuckoo."""
        if self.maxcount and self.total_analysis_count >= self.maxcount:
            if not self.managers:
                log.debug(
                    "Reached max analysis count, exiting.", extra={
                        "action": "scheduler.max_analysis",
                        "status": "success",
                        "limit": self.total_analysis_count,
                    }
                )
                self.stop()
                return True

            log.debug(
                "Maximum analyses hit, awaiting active analyses to finish. "
                "Still active: %s", len(self.managers), extra={
                    "action": "scheduler.max_analysis",
                    "status": "busy",
                    "active": len(self.managers)
                }
            )
            return True
        return False

    def handle_pending(self):
        """Handles pending tasks. Checks if a new task can be started. Eg:
        not too many machines already running, disk space left etc. Selects a
        machine matching the task requirements and creates
        a matching analysis manager for the type of the selected pending
        task"""
        # Acquire machine lock non-blocking. This is because the scheduler
        # also handles requests made by analysis manager. A blocking lock
        # could cause a deadlock
        if not self.machine_lock.acquire(False):
            return

        # Select task that is specifically for one of the available machines
        # possibly a service machine or reserved machine
        machine, task, analysis = None, None, False
        for available_machine in self.db.get_available_machines():

            # If the machine has been reserved for a specific task, this
            # task should be processed first, as the machine will only be
            # released it has finished (Example: longterm task).
            if available_machine.reserved_by:
                task = self.db.fetch(task_id=available_machine.reserved_by)
                if task:
                    machine = self.machinery.acquire(
                        machine_id=available_machine.name
                    )
                    break
                continue

            task = self.db.fetch(machine=available_machine.name)
            if task:
                machine = self.machinery.acquire(
                    machine_id=available_machine.name
                )
                break

            if available_machine.is_analysis():
                analysis = True

        # No task for a specific machine and at least one of the available
        # machines is not a service machine. Fetch task that is not
        # for a service machine
        if not task and not machine and analysis:

            # Search for a task, but don't lock it until we are sure a machine
            # for this task is available, since it might have tags or require
            # a specific platform. Ignore a task if we know a machine is not
            # available for it.
            exclude = []
            while not machine:
                task = self.db.fetch(service=False, exclude=exclude)

                if task is None:
                    break

                try:
                    machine = self.machinery.acquire(
                        machine_id=task.machine, platform=task.platform,
                        tags=task.tags
                    )
                except CuckooOperationalError:
                    log.error(
                        "Task #%s cannot be started, no machine with matching "
                        "requirements for this task exists. Requirements: %s",
                        task.id, Task.requirements_str(task)
                    )
                    # No machine with required tags, name etc exists
                    # Set analysis to failed.
                    # TODO Use another status so it might be recovered
                    # on next Cuckoo startup if the machine exists by then
                    self.db.set_status(task.id, TASK_FAILED_ANALYSIS)
                    break

                if not machine:
                    exclude.append(task.id)

        if not task or not machine:
            self.machine_lock.release()
            if machine:
                self.machinery.release(label=machine.label)
            return

        log.info(
            "Task #%d: acquired machine %s (label=%s)",
            task.id, machine.name, machine.label, extra={
                "action": "vm.acquire",
                "status": "success",
                "vmname": machine.name,
            }
        )

        # Task and matching machine found. Find analysis manager
        # which supports the type of this task. Lock it when found
        analysis_manager = self.get_analysis_manager(task, machine)

        if not analysis_manager:
            # If no analysis manager is found for this task type, it
            # cannot be started, therefore we release the machine again
            self.machinery.release(label=machine.label)

            # Release machine lock as the machine will not be starting
            self.machine_lock.release()

            # Set task status to failed as it cannot be analysed if no matching
            # analysis manager for its type exists
            self.db.set_status(task.id, TASK_FAILED_ANALYSIS)
            return

        # Only lock task for running if we are sure we will try to start it
        self.db.set_status(task.id, TASK_RUNNING)

        # Increment the total amount of analyses
        self.total_analysis_count += 1

        analysis_manager.daemon = True
        if not analysis_manager.init(self.db):
            self.db.set_status(task.id, TASK_FAILED_ANALYSIS)
            log.error(
                "Failed to initialize analysis manager for task #%s", task.id
            )
            self.machine_lock.release()
            self.machinery.release(label=machine.label)
            return

        # If initialization succeeded, start the analysis manager
        # and store it so we can track it
        analysis_manager.start()
        self.managers.append(analysis_manager)

    def get_analysis_manager(self, db_task, machine):
        """Searches all available analysis managers for one
        that supports the type of the given task. Returns an
        analysis manager. Returns None if no manager supports the type"""
        managers = cuckoo.analysis.plugins
        analysis_manager = None
        for manager in managers:
            if db_task.type in manager.supports:

                core_task = Task(db_task)

                analysis_manager = manager(
                    machine, self.machinery, self.machine_lock
                )
                try:
                    analysis_manager.set_task(core_task)
                    analysis_manager.set_target(core_task.targets)
                except Exception as e:
                    analysis_manager = None
                    log.exception(
                        "Failure when setting task and target for analysis"
                        " manager '%s'.", manager
                    )
                break

        return analysis_manager

    def handle_managers(self):
        """Executes actions requested by analysis managers. If an analysis
        manager is finished, executes its finalize actions. Returns a
        list of analysis managers to untrack"""
        remove = []
        for manager in self.managers:

            if manager.action_requested():
                status = manager.get_analysis_status()
                status_action = getattr(manager, "on_status_%s" % status, None)
                if status_action:
                    log.debug(
                        "Executing requested action by task #%s for status"
                        " '%s'", manager.task.id, status
                    )
                    try:
                        status_action(self.db)
                    except Exception as e:
                        log.exception(
                            "Error executing requested action: %s. Error: %s",
                            status_action, e
                        )
                else:
                    log.error(
                        "Analysis manager for task #%s requested action for"
                        " status '%s', but no action is implemented",
                        manager.task.id, status
                    )
                manager.action_lock.release()

            if not manager.isAlive():
                manager.finalize(self.db)
                remove.append(manager)

        return remove

    def keep_running(self):
        return self.running

    def start(self):
        """Start the Cuckoo task scheduler"""
        self.initialize()

        log.info("Waiting for analysis tasks")

        while self.keep_running():
            time.sleep(1)

            # Handles actions requested by analysis managers and performs
            # finalization actions for the managers if they exit.
            for untrack_manager in self.handle_managers():
                self.managers.remove(untrack_manager)

            # Verify if the maximum amount of analyses to process has been hit.
            # Stops the scheduler if no running analysis managers are left.
            if self.task_limit_hit():
                continue

            # Handle pending tasks by finding the matching machine and
            # analysis manager. The manager is started added to tracked
            # analysis managers.
            if self.db.count_tasks(status=TASK_PENDING):
                # Check if the max amount of VMs are running, if there is
                # enough disk space, etc.
                if self.ready_for_new_run():
                    # Grab a pending task, find a machine that matches, find
                    # a matching analysis manager and start the analysis.
                    self.handle_pending()

        log.debug("End of analyses.")
Esempio n. 31
0
 def init(self):
     Database().connect()
     URLDiaries.init()
     self.groupname = rand_string(16)
     self.group_id = None
Esempio n. 32
0
def remove(request, task_id):
    """Remove an analysis.
    @todo: remove folder from storage.
    """
    analyses = results_db.analysis.find({"info.id": int(task_id)})

    # Checks if more analysis found with the same ID, like if process.py
    # was run manually.
    if analyses.count() > 1:
        message = (
            "Multiple tasks with this ID deleted, thanks for all the fish "
            "(the specified analysis was present multiple times in mongo)."
        )
    elif analyses.count() == 1:
        message = "Task deleted, thanks for all the fish."

    if not analyses.count():
        return view_error(request, "The specified analysis does not exist")

    for analysis in analyses:
        # Delete sample if not used.
        if "file_id" in analysis["target"]:
            if results_db.analysis.find({"target.file_id": ObjectId(analysis["target"]["file_id"])}).count() == 1:
                fs.delete(ObjectId(analysis["target"]["file_id"]))

        # Delete screenshots.
        for shot in analysis["shots"]:
            if isinstance(shot, dict):
                if "small" in shot:
                    if results_db.analysis.find({
                        "shots": ObjectId(shot["small"]),
                    }).count() == 1:
                        fs.delete(ObjectId(shot["small"]))

                if "original" in shot:
                    if results_db.analysis.find({
                        "shots": ObjectId(shot["original"]),
                    }).count() == 1:
                        fs.delete(ObjectId(shot["original"]))

                continue

            if results_db.analysis.find({"shots": ObjectId(shot)}).count() == 1:
                fs.delete(ObjectId(shot))

        # Delete network pcap.
        if "pcap_id" in analysis["network"] and results_db.analysis.find({"network.pcap_id": ObjectId(analysis["network"]["pcap_id"])}).count() == 1:
            fs.delete(ObjectId(analysis["network"]["pcap_id"]))

        # Delete sorted pcap
        if "sorted_pcap_id" in analysis["network"] and results_db.analysis.find({"network.sorted_pcap_id": ObjectId(analysis["network"]["sorted_pcap_id"])}).count() == 1:
            fs.delete(ObjectId(analysis["network"]["sorted_pcap_id"]))

        # Delete mitmproxy dump.
        if "mitmproxy_id" in analysis["network"] and results_db.analysis.find({"network.mitmproxy_id": ObjectId(analysis["network"]["mitmproxy_id"])}).count() == 1:
            fs.delete(ObjectId(analysis["network"]["mitmproxy_id"]))

        # Delete dropped.
        for drop in analysis.get("dropped", []):
            if "object_id" in drop and results_db.analysis.find({"dropped.object_id": ObjectId(drop["object_id"])}).count() == 1:
                fs.delete(ObjectId(drop["object_id"]))

        # Delete calls.
        for process in analysis.get("behavior", {}).get("processes", []):
            for call in process["calls"]:
                results_db.calls.remove({"_id": ObjectId(call)})

        # Delete analysis data.
        results_db.analysis.remove({"_id": ObjectId(analysis["_id"])})

    # Delete from SQL db.
    db = Database()
    db.delete_task(task_id)

    return render_template(request, "success.html", **{
        "message": message,
    })
Esempio n. 33
0
class Machinery(object):
    """Base abstract class for machinery modules."""

    # Default label used in machinery configuration file to supply virtual
    # machine name/label/vmx path. Override it if you dubbed it in another
    # way.
    LABEL = "label"

    def __init__(self):
        self.options = None
        self.db = Database()
        self.remote_control = False

        # Machine table is cleaned to be filled from configuration file
        # at each start.
        self.db.clean_machines()

    @classmethod
    def init_once(cls):
        pass

    def pcap_path(self, task_id):
        """Returns the .pcap path for this task id."""
        return cwd("storage", "analyses", "%s" % task_id, "dump.pcap")

    def set_options(self, options):
        """Set machine manager options.
        @param options: machine manager options dict.
        """
        self.options = options

    def initialize(self, module_name):
        """Read, load, and verify machines configuration.
        @param module_name: module name.
        """
        # Load.
        self._initialize(module_name)

        # Run initialization checks.
        self._initialize_check()

    def _initialize(self, module_name):
        """Read configuration.
        @param module_name: module name.
        """
        machinery = self.options.get(module_name)
        for vmname in machinery["machines"]:
            options = self.options.get(vmname)

            # If configured, use specific network interface for this
            # machine, else use the default value.
            if options.get("interface"):
                interface = options["interface"]
            else:
                interface = machinery.get("interface")

            if options.get("resultserver_ip"):
                ip = options["resultserver_ip"]
            else:
                ip = config("cuckoo:resultserver:ip")

            if options.get("resultserver_port"):
                port = options["resultserver_port"]
            else:
                # The ResultServer port might have been dynamically changed,
                # get it from the ResultServer singleton. Also avoid import
                # recursion issues by importing ResultServer here.
                from cuckoo.core.resultserver import ResultServer
                port = ResultServer().port

            self.db.add_machine(
                name=vmname,
                label=options[self.LABEL],
                ip=options.ip,
                platform=options.platform,
                options=options.get("options", ""),
                tags=options.tags,
                interface=interface,
                snapshot=options.snapshot,
                resultserver_ip=ip,
                resultserver_port=port
            )

    def _initialize_check(self):
        """Runs checks against virtualization software when a machine manager
        is initialized.
        @note: in machine manager modules you may override or superclass
               his method.
        @raise CuckooMachineError: if a misconfiguration or a unkown vm state
                                   is found.
        """
        try:
            configured_vms = self._list()
        except NotImplementedError:
            return

        for machine in self.machines():
            # If this machine is already in the "correct" state, then we
            # go on to the next machine.
            if machine.label in configured_vms and \
                    self._status(machine.label) in [self.POWEROFF, self.ABORTED]:
                continue

            # This machine is currently not in its correct state, we're going
            # to try to shut it down. If that works, then the machine is fine.
            try:
                self.stop(machine.label)
            except CuckooMachineError as e:
                raise CuckooCriticalError(
                    "Please update your configuration. Unable to shut '%s' "
                    "down or find the machine in its proper state: %s" %
                    (machine.label, e)
                )

        if not config("cuckoo:timeouts:vm_state"):
            raise CuckooCriticalError(
                "Virtual machine state change timeout has not been set "
                "properly, please update it to be non-null."
            )

    def machines(self):
        """List virtual machines.
        @return: virtual machines list
        """
        return self.db.list_machines()

    def availables(self):
        """How many machines are free.
        @return: free machines count.
        """
        return self.db.count_machines_available()

    def acquire(self, machine_id=None, platform=None, tags=None):
        """Acquire a machine to start analysis.
        @param machine_id: machine ID.
        @param platform: machine platform.
        @param tags: machine tags
        @return: machine or None.
        """
        if machine_id:
            return self.db.lock_machine(label=machine_id)
        elif platform:
            return self.db.lock_machine(platform=platform, tags=tags)
        else:
            return self.db.lock_machine(tags=tags)

    def release(self, label=None):
        """Release a machine.
        @param label: machine name.
        """
        self.db.unlock_machine(label)

    def running(self):
        """Returns running virtual machines.
        @return: running virtual machines list.
        """
        return self.db.list_machines(locked=True)

    def shutdown(self):
        """Shutdown the machine manager. Kills all alive machines.
        @raise CuckooMachineError: if unable to stop machine.
        """
        if len(self.running()) > 0:
            log.info("Still %s guests alive. Shutting down...",
                     len(self.running()))
            for machine in self.running():
                try:
                    self.stop(machine.label)
                except CuckooMachineError as e:
                    log.warning("Unable to shutdown machine %s, please check "
                                "manually. Error: %s", machine.label, e)

    def set_status(self, label, status):
        """Set status for a virtual machine.
        @param label: virtual machine label
        @param status: new virtual machine status
        """
        self.db.set_machine_status(label, status)

    def start(self, label, task):
        """Start a machine.
        @param label: machine name.
        @param task: task object.
        @raise NotImplementedError: this method is abstract.
        """
        raise NotImplementedError

    def stop(self, label=None):
        """Stop a machine.
        @param label: machine name.
        @raise NotImplementedError: this method is abstract.
        """
        raise NotImplementedError

    def _list(self):
        """Lists virtual machines configured.
        @raise NotImplementedError: this method is abstract.
        """
        raise NotImplementedError

    def dump_memory(self, label, path):
        """Takes a memory dump of a machine.
        @param path: path to where to store the memory dump.
        """
        raise NotImplementedError

    def enable_remote_control(self, label):
        """Enable remote control interface (RDP/VNC/SSH).
        @param label: machine name.
        @return: None
        """
        raise NotImplementedError

    def disable_remote_control(self, label):
        """Disable remote control interface (RDP/VNC/SSH).
        @param label: machine name.
        @return: None
        """
        raise NotImplementedError

    def get_remote_control_params(self, label):
        """Return connection details for remote control.
        @param label: machine name.
        @return: dict with keys: protocol, host, port
        """
        raise NotImplementedError

    def _wait_status(self, label, *states):
        """Waits for a vm status.
        @param label: virtual machine name.
        @param state: virtual machine status, accepts multiple states as list.
        @raise CuckooMachineError: if default waiting timeout expire.
        """
        # This block was originally suggested by Loic Jaquemet.
        waitme = 0
        try:
            current = self._status(label)
        except NameError:
            return

        while current not in states:
            log.debug("Waiting %i cuckooseconds for machine %s to switch "
                      "to status %s", waitme, label, states)
            if waitme > config("cuckoo:timeouts:vm_state"):
                raise CuckooMachineError(
                    "Timeout hit while for machine %s to change status" % label
                )

            time.sleep(1)
            waitme += 1
            current = self._status(label)
Esempio n. 34
0
    def run(self):
        """Run information gathering.
        @return: information dict.
        """
        self.key = "info"

        db = Database()
        dbtask = db.view_task(self.task["id"], details=True)

        # Fetch the task.
        if dbtask:
            task = dbtask.to_dict()
        else:
            # Task is gone from the database.
            if os.path.isfile(self.taskinfo_path):
                # We've got task.json, so grab info from there.
                task = json_decode(open(self.taskinfo_path).read())
            else:
                # We don't have any info on the task :(
                emptytask = Task()
                emptytask.id = self.task["id"]
                task = emptytask.to_dict()

        # Get git head.
        if os.path.exists(cwd(".cwd")):
            git_head = git_fetch_head = open(cwd(".cwd"), "rb").read()
        else:
            log.warning(
                "No .cwd file was found in the Cuckoo Working Directory. Did "
                "you correctly setup the CWD?"
            )
            git_head = git_fetch_head = None

        # Monitor.
        monitor = cwd("monitor", task["options"].get("monitor", "latest"))
        if os.path.islink(monitor):
            monitor = os.readlink(monitor)
        elif os.path.isfile(monitor):
            monitor = open(monitor, "rb").read().strip()
        elif os.path.isdir(monitor):
            monitor = os.path.basename(monitor)
        else:
            monitor = None

        return dict(
            version=version,
            git={
                "head": git_head,
                "fetch_head": git_fetch_head,
            },
            monitor=monitor,
            added=task.get("added_on"),
            started=task["started_on"],
            ended=task.get("completed_on", "none"),
            duration=task.get("duration", -1),
            id=int(task["id"]),
            category=task["category"],
            custom=task["custom"],
            owner=task["owner"],
            machine=task["guest"],
            package=task["package"],
            platform=task["platform"],
            options=emit_options(task["options"]),
            route=task["route"],
        )
Esempio n. 35
0
class DatabaseEngine(object):
    """Tests database stuff."""
    URI = None

    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

        self.d = Database()
        self.d.connect(dsn=self.URI)

    def add_url(self, url, priority=1, status="pending"):
        task_id = self.d.add_url(url, priority=priority)
        self.d.set_status(task_id, status)
        return task_id

    def test_add_tasks(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, "hehe")
        os.close(fd)

        # Add task.
        count = self.d.Session().query(Task).count()
        self.d.add_path(sample_path)
        assert self.d.Session().query(Task).count() == count + 1

        # Add url.
        self.d.add_url("http://foo.bar")
        assert self.d.Session().query(Task).count() == count + 2

    def test_processing_get_task(self):
        # First reset all existing rows so that earlier exceptions don't affect
        # this unit test run.
        null, session = None, self.d.Session()

        session.query(Task).filter(
            Task.status == "completed", Task.processing == null
        ).update({
            "processing": "something",
        })
        session.commit()

        t1 = self.add_url("http://google.com/1", priority=1, status="completed")
        t2 = self.add_url("http://google.com/2", priority=2, status="completed")
        t3 = self.add_url("http://google.com/3", priority=1, status="completed")
        t4 = self.add_url("http://google.com/4", priority=1, status="completed")
        t5 = self.add_url("http://google.com/5", priority=3, status="completed")
        t6 = self.add_url("http://google.com/6", priority=1, status="completed")
        t7 = self.add_url("http://google.com/7", priority=1, status="completed")

        assert self.d.processing_get_task("foo") == t5
        assert self.d.processing_get_task("foo") == t2
        assert self.d.processing_get_task("foo") == t1
        assert self.d.processing_get_task("foo") == t3
        assert self.d.processing_get_task("foo") == t4
        assert self.d.processing_get_task("foo") == t6
        assert self.d.processing_get_task("foo") == t7
        assert self.d.processing_get_task("foo") is None

    def test_error_exists(self):
        task_id = self.add_url("http://google.com/")
        self.d.add_error("A"*1024, task_id)
        assert len(self.d.view_errors(task_id)) == 1
        self.d.add_error("A"*1024, task_id)
        assert len(self.d.view_errors(task_id)) == 2

    def test_long_error(self):
        self.add_url("http://google.com/")
        self.d.add_error("A"*1024, 1)
        err = self.d.view_errors(1)
        assert err and len(err[0].message) == 1024

    def test_submit(self):
        dirpath = tempfile.mkdtemp()
        submit_id = self.d.add_submit(dirpath, "files", {
            "foo": "bar",
        })
        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        assert submit.tmp_path == dirpath
        assert submit.submit_type == "files"
        assert submit.data == {
            "foo": "bar",
        }

    def test_connect_no_create(self):
        AlembicVersion.__table__.drop(self.d.engine)
        self.d.connect(dsn=self.URI, create=False)
        assert "alembic_version" not in self.d.engine.table_names()
        self.d.connect(dsn=self.URI)
        assert "alembic_version" in self.d.engine.table_names()

    def test_view_submit_tasks(self):
        submit_id = self.d.add_submit(None, None, None)
        t1 = self.d.add_path(__file__, custom="1", submit_id=submit_id)
        t2 = self.d.add_path(__file__, custom="2", submit_id=submit_id)

        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        with pytest.raises(DetachedInstanceError):
            print submit.tasks

        submit = self.d.view_submit(submit_id, tasks=True)
        assert len(submit.tasks) == 2
        tasks = sorted((task.id, task) for task in submit.tasks)
        assert tasks[0][1].id == t1
        assert tasks[0][1].custom == "1"
        assert tasks[1][1].id == t2
        assert tasks[1][1].custom == "2"

    def test_add_reboot(self):
        t0 = self.d.add_path(__file__)
        s0 = self.d.add_submit(None, None, None)
        t1 = self.d.add_reboot(task_id=t0, submit_id=s0)

        t = self.d.view_task(t1)
        assert t.custom == "%s" % t0
        assert t.submit_id == s0

    def test_task_set_options(self):
        t0 = self.d.add_path(__file__, options={"foo": "bar"})
        t1 = self.d.add_path(__file__, options="foo=bar")
        assert self.d.view_task(t0).options == {"foo": "bar"}
        assert self.d.view_task(t1).options == {"foo": "bar"}

    def test_task_tags_str(self):
        task = self.d.add_path(__file__, tags="foo,,bar")
        tag0, tag1 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name)) == ["bar", "foo"]

    def test_task_tags_list(self):
        task = self.d.add_path(__file__, tags=["tag1", "tag2", "", 1, "tag3"])
        tag0, tag1, tag2 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name, tag2.name)) == [
            "tag1", "tag2", "tag3"
        ]

    def test_error_action(self):
        task_id = self.d.add_path(__file__)
        self.d.add_error("message1", task_id)
        self.d.add_error("message2", task_id, "actionhere")
        e1, e2 = self.d.view_errors(task_id)
        assert e1.message == "message1"
        assert e1.action is None
        assert e2.message == "message2"
        assert e2.action == "actionhere"

    def test_view_tasks(self):
        t1 = self.d.add_path(__file__)
        t2 = self.d.add_url("http://google.com/")
        tasks = self.d.view_tasks([t1, t2])
        assert tasks[0].to_dict() == self.d.view_task(t1).to_dict()
        assert tasks[1].to_dict() == self.d.view_task(t2).to_dict()

    def test_add_machine(self):
        self.d.add_machine(
            "name1", "label", "1.2.3.4", "windows", None,
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name2", "label", "1.2.3.4", "windows", "",
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name3", "label", "1.2.3.4", "windows", "opt1 opt2",
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name4", "label", "1.2.3.4", "windows", ["opt3", "opt4"],
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        m1 = self.d.view_machine("name1")
        m2 = self.d.view_machine("name2")
        m3 = self.d.view_machine("name3")
        m4 = self.d.view_machine("name4")
        assert m1.options == []
        assert m2.options == []
        assert m3.options == ["opt1", "opt2"]
        assert m4.options == ["opt3", "opt4"]

    @mock.patch("cuckoo.common.objects.magic")
    def test_add_sample(self, p):
        p.from_file.return_value = ""
        assert self.d.add_path(Files.temp_put(os.urandom(16))) is not None
Esempio n. 36
0
class TestTask(object):
    def setup(self):
        self.cwd = tempfile.mkdtemp()
        set_cwd(self.cwd)
        cuckoo_create()
        self.db = Database()
        self.db.connect()
        self.tmpfile = None
        self.files = []

    def teardown(self):
        shutil.rmtree(self.cwd)
        for path in self.files:
            try:
                return
                os.remove(path)
            except OSError:
                pass

    def get_file(self):
        fd, target = tempfile.mkstemp()
        os.write(fd, os.urandom(64))
        os.close(fd)
        self.files.append(target)
        return target

    def add_task(self, category="file", url=None, **kwargs):

        if category == "file":
            db_target = create_target.create_file(self.get_file())
        elif category == "url":
            db_target = create_target.create_url(url)

        newtask = DbTask()
        newtask.type = kwargs.get("type")
        newtask.timeout = kwargs.get("timeout")
        newtask.priority = kwargs.get("priority")
        newtask.custom = kwargs.get("custom")
        newtask.owner = kwargs.get("owner")
        newtask.machine = kwargs.get("machine")
        newtask.package = kwargs.get("package")
        newtask.options = kwargs.get("options")
        newtask.platform = kwargs.get("platform")
        newtask.memory = kwargs.get("memory")
        newtask.enforce_timeout = kwargs.get("enforce_timeout")
        newtask.clock = kwargs.get("clock")
        newtask.submit_id = kwargs.get("submit_id")
        newtask.start_on = kwargs.get("start_on")
        newtask.longterm_id = kwargs.get("longterm_id")

        ses = self.db.Session()
        try:
            ses.add(newtask)
            ses.commit()
            task_id = newtask.id

            db_target.task_id = task_id
            ses.add(db_target)
            ses.commit()
            target = db_target.target
        finally:
            ses.close()

        return [task_id, target]

    def test_defined_task_dirs(self):
        assert Task.dirs == [
            "shots", "logs", "files", "extracted", "buffer", "memory"
        ]

    def test_load_from_db(self):
        id = self.add_task()[0]
        task = Task()
        assert task.load_from_db(id)

        assert task.id == id
        assert task.category == "file"
        assert task.path == cwd(analysis=id)

    def test_set_task_constructor(self):
        id = self.add_task()[0]
        db_task = self.db.view_task(id)
        task = Task(db_task)

        assert task.id == id
        assert task.category == "file"
        assert task.path == cwd(analysis=id)
        assert task.db_task == db_task

    def test_set_task(self):
        id, sample = self.add_task()
        db_task = self.db.view_task(id)
        task = Task()
        task.set_task(db_task)

        assert task.id == id
        assert task.category == "file"
        assert task.path == cwd(analysis=id)
        assert task.db_task == db_task
        assert task.target == sample
        assert len(task.targets) == 1
        assert isinstance(task.targets[0], Target)

    def test_load_task_from_dict(self):
        task_dict = {
            "id": 42,
            "category": "file",
            "target": "/tmp/stuff/doge42.exe",
        }

        task = Task()
        task.load_task_dict(task_dict)

        assert task.id == 42
        assert task.category == "file"
        assert task.target == "/tmp/stuff/doge42.exe"
        assert task.path == cwd(analysis=42)
        assert task.type == "regular"

    def test_create_dirs(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)

        dirs = ["shots", "logs", "files", "extracted", "buffer", "memory"]
        task_path = cwd(analysis=id)

        dir_paths = [cwd(task_path, dir) for dir in dirs]

        for path in dir_paths:
            assert not os.path.exists(path)

        assert task.create_dirs()
        assert os.path.exists(task_path)
        for path in dir_paths:
            assert os.path.exists(path)

    def test_dir_exists(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)

        assert not task.dir_exists()
        os.mkdir(cwd(analysis=id))
        assert task.dir_exists()

    def test_is_reported(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        task.create_dirs()

        assert not task.is_reported()
        reports = os.path.join(task.path, "reports")
        os.mkdir(reports)
        with open(os.path.join(reports, "report.json"),
                  "wb") as fw:
            fw.write(os.urandom(64))
        assert task.is_reported()

    @mock.patch("cuckoo.core.task.RunReporting.run")
    @mock.patch("cuckoo.core.task.RunSignatures.run")
    @mock.patch("cuckoo.core.task.RunProcessing.run")
    def test_process(self, mp, ms, mr):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)

        mp.return_value = {"x":"x"}

        task.process()
        mp.assert_called_once()
        ms.assert_called_once()
        mr.assert_called_once()

    @mock.patch("cuckoo.core.task.RunReporting")
    @mock.patch("cuckoo.core.task.RunSignatures")
    @mock.patch("cuckoo.core.task.RunProcessing")
    def test_process_nodelete(self, mp, ms, mr):
        set_cwd(tempfile.mkdtemp())
        cuckoo_create(cfg={
            "cuckoo": {
                "cuckoo": {
                    "delete_original": False,
                    "delete_bin_copy": False,
                },
            },
        })

        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        task.create_dirs()
        copied_binary = cwd("storage", "binaries", File(sample).get_sha256())

        task.process()
        assert os.path.exists(copied_binary)
        assert os.path.exists(sample)

    @mock.patch("cuckoo.core.task.RunReporting")
    @mock.patch("cuckoo.core.task.RunSignatures")
    @mock.patch("cuckoo.core.task.RunProcessing")
    def test_process_dodelete(self, mp, ms, mr):
        set_cwd(tempfile.mkdtemp())
        cuckoo_create(cfg={
            "cuckoo": {
                "cuckoo": {
                    "delete_original": True,
                    "delete_bin_copy": True,
                },
            },
        })

        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        task.create_dirs()

        assert os.path.exists(task.target)
        assert os.path.exists(task.targets[0].copied_binary)
        task.process()
        assert not os.path.exists(sample)
        assert not os.path.exists(task.targets[0].copied_binary)

    def test_get_tags_list(self):
        task = Task()
        tags = " doge,stuff,things"
        tags2 = ("doge", "things ")
        tags3 = "foo,,bar"
        tags4 = ["tag1", 1, "", "tag2"]

        assert task.get_tags_list(tags) == ["doge", "stuff", "things"]
        assert task.get_tags_list(tags2) == ["doge", "things"]
        assert task.get_tags_list(tags3) == ["foo", "bar"]
        assert task.get_tags_list(tags4) == ["tag1", "tag2"]
        assert task.get_tags_list("") == []
        assert task.get_tags_list([]) == []
        assert task.get_tags_list(()) == []
        assert task.get_tags_list(1) == []

    def test_set_latest(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        task.create_dirs()

        sym_latest = cwd("storage", "analyses", "latest")
        task.set_latest()

        assert os.path.realpath(sym_latest) == task.path

    def test_set_status(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        task.set_status("reported")

        assert task.status == "reported"
        assert task["status"] == "reported"

    def test_refresh(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        self.db.set_machine(id, "machine1")

        assert task.machine is None
        assert task["machine"] is None
        task.refresh()
        assert task.machine == "machine1"
        assert task["machine"] == "machine1"

    def test_write_task_json(self):
        id = submit_task.add_path("tests/files/pdf0.pdf")
        session = self.db.Session()
        db_task = session.query(DbTask).filter_by(id=id).first()
        db_task.status = "reported"
        db_task.machine = "DogeOS1"
        db_task.start_on = datetime.datetime(2017, 5, 10, 18, 0)
        db_task.added_on = datetime.datetime(2017, 5, 10, 18, 0)
        db_task.clock = datetime.datetime(2017, 5, 10, 18, 0)
        session.commit()
        session.refresh(db_task)
        session.close()
        task = Task()
        task.load_from_db(id)
        task.write_task_json()

        correct = open("tests/files/tasktest-taskjson.json", "rb")
        correct_json = json.load(correct)
        generated = open(os.path.join(task.path, "task.json"), "rb")
        generated_json = json.load(generated)

        assert generated_json == correct_json

    def test_get_item(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)

        assert task["id"] == id
        assert task["category"] == "file"
        assert task["target"] == sample
        assert task["machine"] is None
        assert len(task["targets"]) == 1

    def test_get_attribute(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        path = cwd(analysis=id)

        assert task.id == id
        assert task.path == path
        assert task.category == "file"
        assert task.target == sample

    def test_requirement_str(self):
        id, sample = self.add_task(
            tags=["doge"], platform="DogeOS", machine="Doge1"
        )
        id = submit_task.add_path(
            self.get_file(), tags=["doge"], platform="DogeOS", machine="Doge1"
        )
        task = Task()
        task.load_from_db(id)

        req_str = task.requirements_str(task.db_task)
        assert req_str == "machine=Doge1 platform=DogeOS tags=doge, "

    def test_reschedule_file(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)

        newid = task.reschedule(priority=3)

        oldtask = self.db.view_task(id)
        newtask = self.db.view_task(newid)
        assert newid is not None
        assert oldtask.status == "recovered"
        assert newtask.targets[0].category == "file"
        assert newtask.targets[0].target == sample
        assert newtask.priority == 3

    def test_reschedule_url(self):
        id, sample = self.add_task(
            url="http://example.com/42", category="url"
        )
        task = Task()
        task.load_from_db(id)

        newid = task.reschedule(priority=2)

        oldtask = self.db.view_task(id)
        newtask = self.db.view_task(newid)
        assert newid is not None
        assert oldtask.status == "recovered"
        assert newtask.targets[0].category == "url"
        assert newtask.priority == 2
        assert newtask.targets[0].target == "http://example.com/42"

    def test_reschedule_id(self):
        id, sample = self.add_task()
        task = Task()
        newid = task.reschedule(task_id=id)

        oldtask = self.db.view_task(id)
        newtask = self.db.view_task(newid)
        assert newid is not None
        assert oldtask.status == "recovered"
        assert newtask.targets[0].category == "file"

    def test_reschedule_fail(self):
        newid = submit_task.reschedule()
        assert newid is None

    def test_reschedule_nonexistant(self):
        newid = submit_task.reschedule(task_id=42)
        assert newid is None

    def test_add_service(self):
        task = Task()
        id = task.add_service(timeout=60, tags=["officepc"], owner="Doge")
        task_path = cwd(analysis=id)
        db_task = self.db.view_task(id)

        assert id is not None
        assert os.path.exists(task_path)
        assert db_task.type == "service"
        assert db_task.owner == "Doge"
        assert db_task.timeout == 60
        assert db_task.priority == 999
        assert db_task.tags[0].name == "officepc"
        assert db_task.targets == []

    def test_add_baseline(self):
        task = Task()
        id = task.add_baseline(timeout=60, owner="Doge", machine="machine1")
        task_path = cwd(analysis=id)
        db_task = self.db.view_task(id)

        assert id is not None
        assert os.path.exists(task_path)
        assert db_task.type == "baseline"
        assert db_task.owner == "Doge"
        assert db_task.timeout == 60
        assert db_task.priority == 999
        assert db_task.machine == "machine1"
        assert db_task.memory == False
        assert db_task.targets == []

    def test_add_reboot(self):
        id, sample = self.add_task(owner="MrDoge")
        sid = self.db.add_submit(None, None, None)
        task = Task()
        task.load_from_db(id)
        task.create_empty()
        newid = task.add_reboot(id, owner="Doge", submit_id=sid)
        task_path = cwd(analysis=newid)
        db_task = self.db.view_task(newid)

        assert newid is not None
        assert os.path.exists(task_path)
        assert db_task.targets[0].category == "file"
        assert db_task.package == "reboot"
        assert db_task.owner == "Doge"
        assert db_task.priority == 1
        assert db_task.custom == "%s" % id
        assert db_task.memory == False
        assert db_task.targets[0].target == sample
        assert db_task.submit_id == sid
        assert len(task.targets) == 1
        assert isinstance(task.targets[0], Target)

    def test_add_reboot_nonexistant(self):
        newid = submit_task.add_reboot(42)
        assert newid is None

    def test_add_reboot_binary_removed(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        task.create_empty()
        os.remove(task.targets[0].copied_binary)
        newid = task.add_reboot(id)
        assert newid is None

    def test_add_url(self):
        id = submit_task.add_url("http://example.com/42")
        db_task = self.db.view_task(id)
        task = Task(db_task)
        task_path = cwd(analysis=id)

        assert id is not None
        assert os.path.exists(task_path)
        assert db_task.targets[0].category == "url"
        assert db_task.targets[0].target == "http://example.com/42"
        assert task.targets[0].target == "http://example.com/42"
        assert len(task.targets) == 1
        assert isinstance(task.targets[0], Target)

    def test_add_archive(self):
        fakezip = self.get_file()
        id = submit_task.add_archive(fakezip, "file1.exe", "exe")
        task_path = cwd(analysis=id)
        db_task = self.db.view_task(id)
        task = Task(db_task)

        assert id is not None
        assert os.path.exists(task_path)
        assert db_task.targets[0].category == "archive"
        assert db_task.options == {"filename": "file1.exe"}
        assert db_task.targets[0].target == fakezip
        assert db_task.package == "exe"
        assert task.targets[0].target == fakezip
        assert len(task.targets) == 1
        assert isinstance(task.targets[0], Target)

    def test_add_archive_nonexistant(self):
        id = submit_task.add_archive("/tmp/BfUbuYByg.zip", "file1.exe", "exe")
        assert id is None

    def test_add_path(self):
        sample = self.get_file()
        id = submit_task.add_path(sample)
        task_path = cwd(analysis=id)
        db_task = self.db.view_task(id)
        task = Task(db_task)

        assert id is not None
        assert os.path.exists(task_path)
        assert db_task.targets[0].category == "file"
        assert db_task.targets[0].target == sample
        assert task.targets[0].target == sample
        assert len(task.targets) == 1
        assert isinstance(task.targets[0], Target)

    def test_add_path_nonexistant(self):
        id = submit_task.add_path("/tmp/YtcukGBYTTBYU.exe")
        assert id is None

    def test_add_path_invalid_starton(self):
        tmpfile = self.get_file()
        id = submit_task.add_path(tmpfile, start_on="13-11-2013")
        assert id is None

    def test_add_massurl(self):
        urls = ["http://example%s.com" % n for n in range(500)]
        id = submit_task.add_massurl(urls)
        task = Task()
        task.load_from_db(id)

        assert id is not None
        assert os.path.exists(cwd(analysis=id))
        assert task.path == cwd(analysis=id)
        assert len(task.targets) == 500
        assert task.type == "massurl"

    def test_add_file(self):
        sample = self.get_file()
        db_target = create_target.create_file(sample)
        starton = datetime.datetime.now()
        id = submit_task.add(
            [db_target], clock="5-17-2017 13:37:13",
            package="exe", owner="Doge", custom="stuff", machine="machine1",
            platform="DogeOS", tags="tag1", memory=True, enforce_timeout=True,
            submit_id=1500, start_on=starton
        )
        task_path = cwd(analysis=id)
        db_task = self.db.view_task(id)
        task = Task(db_task)

        assert id is not None
        assert os.path.exists(task_path)
        assert db_task.targets[0].category == "file"
        assert db_task.targets[0].target == sample
        assert db_task.clock == datetime.datetime(
            year=2017, month=5, day=17, hour=13,minute=37,second=13
        )
        assert db_task.timeout == 0
        assert db_task.package == "exe"
        assert db_task.options == {}
        assert db_task.priority == 1
        assert db_task.custom == "stuff"
        assert db_task.owner == "Doge"
        assert db_task.machine == "machine1"
        assert db_task.platform == "DogeOS"
        assert len(db_task.tags) == 1
        assert db_task.tags[0].name == "tag1"
        assert db_task.memory
        assert db_task.enforce_timeout
        assert db_task.submit_id == 1500
        assert db_task.start_on == starton
        assert task.id == id
        assert task.target == sample
        assert task.category == "file"
        assert task.type == "regular"

    def test_add_base_url(self):
        db_target = create_target.create_url("http://example.com/42")
        id = submit_task.add([db_target])
        task_path = cwd(analysis=id)
        db_task = self.db.view_task(id)
        task = Task(db_task)

        assert id is not None
        assert os.path.exists(task_path)
        assert db_task.targets[0].category == "url"
        assert db_task.targets[0].target == "http://example.com/42"
        assert db_task.clock is not None
        assert task.id == id
        assert task.target == "http://example.com/42"
        assert task.category == "url"

    def test_estimate_export_size(self):
        fake_task = cwd(analysis=1)
        shutil.copytree("tests/files/sample_analysis_storage", fake_task)

        est_size = Task.estimate_export_size(1, ["logs"], ["dump.pcap"])
        assert int(est_size) == 7861

    def test_get_files(self):
        fake_task = cwd(analysis=1)
        shutil.copytree("tests/files/sample_analysis_storage", fake_task)
        dirs, files = Task.get_files(1)

        assert len(dirs) == 6
        assert len(files) == 10
        assert "dump.pcap" in files
        assert ("logs", 1) in dirs

    def test_create_zip(self):
        fake_task = cwd(analysis=1)
        shutil.copytree("tests/files/sample_analysis_storage", fake_task)
        zfileio = Task.create_zip(
            1, ["logs", "report"], ["cuckoo.log", "files.json"]
        )

        assert isinstance(zfileio, io.BytesIO)

        zfile = zipfile.ZipFile(zfileio)
        assert len(zfile.read("files.json")) == 1856
        assert len(zfileio.getvalue()) == 13938

    def test_all_properties(self):
        id, sample = self.add_task()
        task = Task()
        task.load_from_db(id)
        task_properties = [
            "id", "target", "category", "timeout", "priority", "custom",
            "owner", "machine", "package", "tags", "options", "platform",
            "memory", "enforce_timeout", "clock", "added_on", "start_on",
            "started_on", "completed_on", "status", "sample_id", "submit_id",
            "processing", "route", "targets", "longterm_id"
        ]

        try:
            for field in task_properties:
                getattr(task, field)
        except Exception as e:
            pytest.fail(
                "One or more properties of Task raised an error: %s" % e
            )
Esempio n. 37
0
 def setup_class(self):
     self.remove_paths = []
     self.db = Database()
Esempio n. 38
0
 def emit(self, record):
     # TODO Should this also attempt to guess the task ID from _tasks?
     if hasattr(record, "task_id"):
         Database().add_error(self.format(record), int(record.task_id),
                              getattr(record, "error_action", None))
Esempio n. 39
0
from __future__ import with_statement

from alembic import context
from sqlalchemy import create_engine, pool
from logging.config import fileConfig

# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(context.config.config_file_name)

from cuckoo.core.database import Base, Database
from cuckoo.misc import set_cwd

set_cwd(context.get_x_argument(as_dictionary=True)["cwd"])
Database().connect(schema_check=False, create=False)

# Get database connection string from cuckoo configuration.
url = Database().engine.url.__to_string__(hide_password=False)
target_metadata = Base.metadata


def run_migrations_offline():
    """Run migrations in 'offline' mode.
    This configures the context with just a URL
    and not an Engine, though an Engine is acceptable
    here as well.  By skipping the Engine creation
    we don't even need a DBAPI to be available.
    Calls to context.execute() here emit the given string to the
    script output.
    """
Esempio n. 40
0
 def __init__(self, maxcount=None):
     self.running = True
     self.cfg = Config()
     self.db = Database()
     self.maxcount = maxcount
     self.total_analysis_count = 0
Esempio n. 41
0
 def run(self):
     for action in set(self.actions):
         Database().add_error("", self.analysis.task["id"], action)
Esempio n. 42
0
def test_machines():
    set_cwd(tempfile.mkdtemp())
    Folders.create(cwd(), "conf")
    Files.create(
        cwd("conf"), "cuckoo.conf", """
[cuckoo]
machinery = virtualbox
[database]
connection =
timeout =
[resultserver]
ip = 9.8.7.6
port = 9876
""")
    Files.create(
        cwd("conf"), "virtualbox.conf", """
[virtualbox]
machines = a, b, c
[a]
label = a
snapshot = derpa
platform = windows
ip = 1.2.3.4

[b]
label = b
snapshot = derpb
platform = windows
ip = 5.6.7.8
resultserver_ip = 7.5.3.1

[c]
label = c
snapshot = derpc
platform = windows
ip = 1.3.5.7
resultserver_port = 4242
""")

    class mock(object):
        port = 9001

    Singleton._instances[ResultServer] = mock()

    db = Database()
    db.connect()
    m = Machinery()
    m.set_options(Config("virtualbox"))
    m._initialize("virtualbox")

    machines = db.list_machines()
    assert len(machines) == 3
    assert machines[0].label == "a"
    assert machines[0].snapshot == "derpa"
    assert machines[0].ip == "1.2.3.4"
    assert machines[0].resultserver_ip == "9.8.7.6"
    assert machines[0].resultserver_port == 9001
    assert machines[1].label == "b"
    assert machines[1].snapshot == "derpb"
    assert machines[1].ip == "5.6.7.8"
    assert machines[1].resultserver_ip == "7.5.3.1"
    assert machines[1].resultserver_port == 9001
    assert machines[2].label == "c"
    assert machines[2].snapshot == "derpc"
    assert machines[2].ip == "1.3.5.7"
    assert machines[2].resultserver_ip == "9.8.7.6"
    assert machines[2].resultserver_port == 4242

    Singleton._instances.pop(ResultServer)
Esempio n. 43
0
class AnalysisManager(threading.Thread):
    """Analysis Manager.

    This class handles the full analysis process for a given task. It takes
    care of selecting the analysis machine, preparing the configuration and
    interacting with the guest agent and analyzer components to launch and
    complete the analysis and store, process and report its results.
    """

    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None

        self.is_vnc = False
        if self.task.options.get("vnc", False):
            self.is_vnc = True


    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error("Analysis results folder already exists at path \"%s\", "
                      "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        try:
            Folders.create(self.storage)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"",
                    self.task.target
                )
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target
                )
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary
                    )
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error("Unable to create symlink/copy from \"%s\" to "
                          "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True

    def store_task_info(self):
        """grab latest task from db (if available) and update self.task"""
        dbtask = self.db.view_task(self.task.id)
        self.task = dbtask.to_dict()

        task_info_path = os.path.join(self.storage, "task.json")
        open(task_info_path, "w").write(dbtask.to_json())

    def acquire_machine(self):
        """Acquire an analysis machine from the pool of available ones."""
        machine = None

        # Start a loop to acquire the a machine to run the analysis on.
        while True:
            machine_lock.acquire()

            # In some cases it's possible that we enter this loop without
            # having any available machines. We should make sure this is not
            # such case, or the analysis task will fail completely.
            if not machinery.availables():
                machine_lock.release()
                time.sleep(1)
                continue

            # If the user specified a specific machine ID, a platform to be
            # used or machine tags acquire the machine accordingly.
            machine = machinery.acquire(machine_id=self.task.machine,
                                        platform=self.task.platform,
                                        tags=self.task.tags)

            # If no machine is available at this moment, wait for one second
            # and try again.
            if not machine:
                machine_lock.release()
                log.debug("Task #%d: no machine available yet", self.task.id)
                time.sleep(1)
            else:
                log.info(
                    "Task #%d: acquired machine %s (label=%s)",
                    self.task.id, machine.name, machine.label, extra={
                        "action": "vm.acquire",
                        "status": "success",
                        "vmname": machine.name,
                    }
                )
                break

        self.machine = machine

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            options["pe_exports"] = \
                ",".join(File(self.task.target).get_exported_functions())

            package, activity = File(self.task.target).get_apk_entry()
            self.task.options["apk_entry"] = "%s:%s" % (package, activity)
        elif self.task.category == "archive":
            options["file_name"] = File(self.task.target).get_name()

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = emit_options(self.task.options)
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["vnc"] = self.task.vnc
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        # copy in other analyzer specific options, TEMPORARY (most likely)
        vm_options = getattr(machinery.options, self.machine.name)
        for k in vm_options:
            if k.startswith("analyzer_"):
                options[k] = vm_options[k]

        log.info(" [*] build_options() - options built:\n %s", str(options))
        return options

    def route_network(self):
        """Enable network routing if desired."""
        # Determine the desired routing strategy (none, internet, VPN).
        self.route = self.task.options.get(
            "route", config("routing:routing:route")
        )

        if self.route == "none" or self.route == "drop":
            self.interface = None
            self.rt_table = None
        elif self.route == "inetsim":
            pass
        elif self.route == "tor":
            pass
        elif self.route == "internet":
            if config("routing:routing:internet") == "none":
                log.warning(
                    "Internet network routing has been specified, but not "
                    "configured, ignoring routing for this analysis", extra={
                        "action": "network.route",
                        "status": "error",
                        "route": self.route,
                    }
                )
                self.route = "none"
                self.task.options["route"] = "none"
                self.interface = None
                self.rt_table = None
            else:
                self.interface = config("routing:routing:internet")
                self.rt_table = config("routing:routing:rt_table")
        elif self.route in config("routing:vpn:vpns"):
            self.interface = config("routing:%s:interface" % self.route)
            self.rt_table = config("routing:%s:rt_table" % self.route)
        else:
            log.warning(
                "Unknown network routing destination specified, ignoring "
                "routing for this analysis: %r", self.route, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # Check if the network interface is still available. If a VPN dies for
        # some reason, its tunX interface will no longer be available.
        if self.interface and not rooter("nic_available", self.interface):
            log.error(
                "The network interface '%s' configured for this analysis is "
                "not available at the moment, switching to route=none mode.",
                self.interface, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # For now this doesn't work yet in combination with tor routing.
        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_enable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_enable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "tor":
            rooter(
                "tor_enable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

        if self.interface:
            rooter(
                "forward_enable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_enable", self.rt_table, self.machine.ip
            )

        # Propagate the taken route to the database.
        self.db.set_route(self.task.id, self.route)

    def unroute_network(self):
        """Disable any enabled network routing."""
        if self.interface:
            rooter(
                "forward_disable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_disable", self.rt_table, self.machine.ip
            )

        if self.route != "none":
            rooter(
                "drop_disable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_disable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "tor":
            rooter(
                "tor_disable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

    def wait_finish(self):
        """Some VMs don't have an actual agent. Mainly those that are used as
        assistance for an analysis through the services auxiliary module. This
        method just waits until the analysis is finished rather than actively
        trying to engage with the Cuckoo Agent."""
        self.db.guest_set_status(self.task.id, "running")
        while self.db.guest_get_status(self.task.id) == "running":
            time.sleep(1)

    def guest_manage(self, options):
        # Handle a special case where we're creating a baseline report of this
        # particular virtual machine - a report containing all the results
        # that are gathered if no additional samples are ran in the VM. These
        # results, such as loaded drivers and opened sockets in volatility, or
        # DNS requests to hostnames related to Microsoft Windows, etc may be
        # omitted or at the very least given less priority when creating a
        # report for an analysis that ran on this VM later on.
        if self.task.category == "baseline":
            time.sleep(options["timeout"])
        else:
            # Start the analysis.
            self.db.guest_set_status(self.task.id, "starting")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(options, monitor)

            # In case the Agent didn't respond and we force-quit the analysis
            # at some point while it was still starting the analysis the state
            # will be "stop" (or anything but "running", really).
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                self.guest_manager.wait_for_completion()

            if self.is_vnc:
                self.guest_manager.start_vnc(options, monitor)
                self.db.guest_set_status(self.task.id, "vnc")
            else:
                self.db.guest_set_status(self.task.id, "stopping")

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info(
            "Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
            self.task.category.upper(), target, self.task.id,
            emit_options(self.task.options), extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.task.category,
                "package": self.task.package,
                "options": emit_options(self.task.options),
                "custom": self.task.custom,
            }
        )

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s", e, extra={
                "action": "vm.acquire", "status": "error",
            })
            return False

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip,
            self.machine.platform, self.task.id, self
        )

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id,
                                            self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger(
                "Starting VM",
                action="vm.start", status="pending",
                vmname=self.machine.name
            )

            # Start the Virtual Machine.
            machinery.start(self.machine.label, self.task)

            logger(
                "Started VM",
                action="vm.start", status="success",
                vmname=self.machine.name
            )

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error starting Virtual Machine! VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.start",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                }
            )
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s", e, extra={
                "action": "guest.handle",
                "status": "error",
                "task_id": self.task.id,
            })
        finally:
            # Stop Auxiliary modules.
            self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger(
                    "Taking full memory dump",
                    action="vm.memdump", status="pending",
                    vmname=self.machine.name
                )
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger(
                        "Taken full memory dump",
                        action="vm.memdump", status="success",
                        vmname=self.machine.name
                    )
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.", extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        }
                    )
                except CuckooMachineError as e:
                    log.error("Machinery error: %s", e, extra={
                        "action": "vm.memdump",
                        "status": "error",
                    })

            logger(
                "Stopping VM",
                action="vm.stop", status="pending",
                vmname=self.machine.name
            )

            try:
                # Stop the analysis machine.
                """
                while machinery.is_running(self.machine.label) and self.is_vnc:
                    time.sleep(5)
                    log.info(" [*] machinery.is_running - Post analysis ")
                """
                log.info(" [*] machinery.is_running = FALSE Done ")
                if self.is_vnc:
                    self.db.set_status(self.task.id, TASK_COMPLETED)

                    if self.cfg.cuckoo.process_results:
                        # this updates self.task so processing gets the latest and greatest
                        self.store_task_info()

                        self.process_results()
                        self.db.set_status(self.task.id, TASK_REPORTED)

                    # overwrite task.json so we have the latest data inside
                    self.store_task_info()

                    self.db.guest_set_status(self.task.id, "VNC")
                    while machinery.is_running(self.machine.label):
                        log.info(" [*] [%s] VNC mode ", self.machine.name)
                        time.sleep(5)
                    self.db.guest_set_status(self.task.id, "Done")
                    log.info(" [*] [%s] VNC mode - Done ", self.machine.name)
                else:
                    self.db.guest_set_status(self.task.id, "stopping")
                    machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning(
                    "Unable to stop machine %s: %s",
                    self.machine.label, e, extra={
                        "action": "vm.stop",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

            logger(
                "Stopped VM",
                action="vm.stop", status="success",
                vmname=self.machine.name
            )

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.", self.machine.label, e, extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

        return succeeded

    def process_results(self):
        """Process the analysis results and generate the enabled reports."""
        logger(
            "Starting task reporting",
            action="task.report", status="pending"
        )

        # TODO Refactor this function as currently "cuckoo process" has a 1:1
        # copy of its code. TODO Also remove "archive" files.
        results = RunProcessing(task=self.task).run()
        RunSignatures(results=results).run()
        RunReporting(task=self.task, results=results).run()

        # If the target is a file and the user enabled the option,
        # delete the original copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_original:
            if not os.path.exists(self.task.target):
                log.warning("Original file does not exist anymore: \"%s\": "
                            "File not found.", self.task.target)
            else:
                try:
                    os.remove(self.task.target)
                except OSError as e:
                    log.error("Unable to delete original file at path "
                              "\"%s\": %s", self.task.target, e)

        # If the target is a file and the user enabled the delete copy of
        # the binary option, then delete the copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy:
            if not os.path.exists(self.binary):
                log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary)
            else:
                try:
                    os.remove(self.binary)
                except OSError as e:
                    log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e)
            # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it.
            if os.path.islink(self.storage_binary) and not os.path.exists(self.storage_binary):
                try:
                    os.remove(self.storage_binary)
                except OSError as e:
                    log.error("Unable to delete symlink to the binary copy at path \"%s\": %s", self.storage_binary, e)

        log.info(
            "Task #%d: reports generation completed",
            self.task.id, extra={
                "action": "task.report",
                "status": "success",
            }
        )

        return True

    def run(self):
        """Run manager thread."""
        global active_analysis_count
        active_analysis_count += 1
        try:
            self.launch_analysis()

            if not self.is_vnc:
                self.db.set_status(self.task.id, TASK_COMPLETED)

                log.debug("Released database task #%d", self.task.id)

                if self.cfg.cuckoo.process_results:
                    # this updates self.task so processing gets the latest and greatest
                    self.store_task_info()

                    self.process_results()
                    self.db.set_status(self.task.id, TASK_REPORTED)

                # We make a symbolic link ("latest") which links to the latest
                # analysis - this is useful for debugging purposes. This is only
                # supported under systems that support symbolic links.
                if hasattr(os, "symlink"):
                    latest = cwd("storage", "analyses", "latest")

                    # First we have to remove the existing symbolic link, then we
                    # have to create the new one.
                    # Deal with race conditions using a lock.
                    latest_symlink_lock.acquire()
                    try:
                        # As per documentation, lexists() returns True for dead
                        # symbolic links.
                        if os.path.lexists(latest):
                            os.remove(latest)

                        os.symlink(self.storage, latest)
                    except OSError as e:
                        log.warning("Error pointing latest analysis symlink: %s" % e)
                    finally:
                        latest_symlink_lock.release()

                # overwrite task.json so we have the latest data inside
                self.store_task_info()
                log.info(
                    "Task #%d: analysis procedure completed",
                    self.task.id, extra={
                        "action": "task.stop",
                        "status": "success",
                    }
                )
        except:
            log.exception("Failure in AnalysisManager.run", extra={
                "action": "task.stop",
                "status": "error",
            })

        task_log_stop(self.task.id)
        active_analysis_count -= 1
Esempio n. 44
0
def cuckoo_init(level, ctx, cfg=None):
    """Initialize Cuckoo configuration.
    @param quiet: enable quiet mode.
    """
    logo()

    # It would appear this is the first time Cuckoo is being run (on this
    # Cuckoo Working Directory anyway).
    if not os.path.isdir(cwd()) or not os.listdir(cwd()):
        cuckoo_create(ctx.user, cfg)
        sys.exit(0)

    # Determine if this is a proper CWD.
    if not os.path.exists(cwd(".cwd")):
        sys.exit(
            "No proper Cuckoo Working Directory was identified, did you pass "
            "along the correct directory? For new installations please use a "
            "non-existant directory to build up the CWD! You can craft a CWD "
            "manually, but keep in mind that the CWD layout may change along "
            "with Cuckoo releases (and don't forget to fill out '$CWD/.cwd')!"
        )

    init_console_logging(level)

    # Only one Cuckoo process should exist per CWD. Run this check before any
    # files are possibly modified. Note that we mkdir $CWD/pidfiles/ here as
    # its CWD migration rules only kick in after the pidfile check.
    mkdir(cwd("pidfiles"))
    pidfile = Pidfile("cuckoo")
    if pidfile.exists():
        log.error(red("Cuckoo is already running. PID: %s"), pidfile.pid)
        sys.exit(1)

    pidfile.create()

    check_configs()
    check_version()

    ctx.log and init_logging(level)

    # Determine if any CWD updates are required and if so, do them.
    current = open(cwd(".cwd"), "rb").read().strip()
    latest = open(cwd(".cwd", private=True), "rb").read().strip()
    if current != latest:
        migrate_cwd()
        open(cwd(".cwd"), "wb").write(latest)

    # Ensure the user is able to create and read temporary files.
    if not ensure_tmpdir():
        sys.exit(1)

    Database().connect()

    # Load additional Signatures.
    load_signatures()

    init_modules()
    init_tasks()
    init_yara()
    init_binaries()
    init_rooter()
    init_routing()

    signatures = 0
    for sig in cuckoo.signatures:
        if not sig.enabled:
            continue
        signatures += 1

    if not signatures:
        log.warning(
            "It appears that you haven't loaded any Cuckoo Signatures. "
            "Signatures are highly recommended and improve & enrich the "
            "information extracted during an analysis. They also make up "
            "for the analysis score that you see in the Web Interface - so, "
            "pretty important!"
        )
        log.warning(
            "You'll be able to fetch all the latest Cuckoo Signaturs, Yara "
            "rules, and more goodies by running the following command:"
        )
        raw = cwd(raw=True)
        if raw == "." or raw == "~/.cuckoo":
            command = "cuckoo community"
        elif " " in raw or "'" in raw:
            command = 'cuckoo --cwd "%s" community' % raw
        else:
            command = "cuckoo --cwd %s community" % raw

        log.info("$ %s", green(command))
Esempio n. 45
0
class Scheduler(object):
    """Tasks Scheduler.

    This class is responsible for the main execution loop of the tool. It
    prepares the analysis machines and keep waiting and loading for new
    analysis tasks.
    Whenever a new task is available, it launches AnalysisManager which will
    take care of running the full analysis process and operating with the
    assigned analysis machine.
    """
    def __init__(self, maxcount=None):
        self.running = True
        self.cfg = Config()
        self.db = Database()
        self.maxcount = maxcount
        self.total_analysis_count = 0

    def initialize(self):
        """Initialize the machine manager."""
        global machinery, machine_lock

        machinery_name = self.cfg.cuckoo.machinery

        max_vmstartup_count = self.cfg.cuckoo.max_vmstartup_count
        if max_vmstartup_count:
            machine_lock = threading.Semaphore(max_vmstartup_count)
        else:
            machine_lock = threading.Lock()

        log.info("Using \"%s\" as machine manager", machinery_name, extra={
            "action": "init.machinery",
            "status": "success",
            "machinery": machinery_name,
        })

        # Initialize the machine manager.
        machinery = cuckoo.machinery.plugins[machinery_name]()

        # Provide a dictionary with the configuration options to the
        # machine manager instance.
        machinery.set_options(Config(machinery_name))

        # Initialize the machine manager.
        try:
            machinery.initialize(machinery_name)
        except CuckooMachineError as e:
            raise CuckooCriticalError("Error initializing machines: %s" % e)

        # At this point all the available machines should have been identified
        # and added to the list. If none were found, Cuckoo aborts the
        # execution. TODO In the future we'll probably want get rid of this.
        if not machinery.machines():
            raise CuckooCriticalError("No machines available.")

        log.info("Loaded %s machine/s", len(machinery.machines()), extra={
            "action": "init.machines",
            "status": "success",
            "count": len(machinery.machines()),
        })

        if len(machinery.machines()) > 1 and self.db.engine.name == "sqlite":
            log.warning("As you've configured Cuckoo to execute parallel "
                        "analyses, we recommend you to switch to a MySQL or"
                        "a PostgreSQL database as SQLite might cause some "
                        "issues.")

        if len(machinery.machines()) > 4 and self.cfg.cuckoo.process_results:
            log.warning("When running many virtual machines it is recommended "
                        "to process the results in a separate process.py to "
                        "increase throughput and stability. Please read the "
                        "documentation about the `Processing Utility`.")

        # Drop all existing packet forwarding rules for each VM. Just in case
        # Cuckoo was terminated for some reason and various forwarding rules
        # have thus not been dropped yet.
        for machine in machinery.machines():
            if not machine.interface:
                log.info("Unable to determine the network interface for VM "
                         "with name %s, Cuckoo will not be able to give it "
                         "full internet access or route it through a VPN! "
                         "Please define a default network interface for the "
                         "machinery or define a network interface for each "
                         "VM.", machine.name)
                continue

            # Drop forwarding rule to each VPN.
            if config("routing:vpn:enabled"):
                for vpn in config("routing:vpn:vpns"):
                    rooter(
                        "forward_disable", machine.interface,
                        config("routing:%s:interface" % vpn), machine.ip
                    )

            # Drop forwarding rule to the internet / dirty line.
            if config("routing:routing:internet") != "none":
                rooter(
                    "forward_disable", machine.interface,
                    config("routing:routing:internet"), machine.ip
                )

    def stop(self):
        """Stop scheduler."""
        self.running = False
        # Shutdown machine manager (used to kill machines that still alive).
        machinery.shutdown()

    def start(self):
        """Start scheduler."""
        self.initialize()

        log.info("Waiting for analysis tasks.")

        # Message queue with threads to transmit exceptions (used as IPC).
        errors = Queue.Queue()

        # Command-line overrides the configuration file.
        if self.maxcount is None:
            self.maxcount = self.cfg.cuckoo.max_analysis_count

        # This loop runs forever.
        while self.running:
            time.sleep(1)

            # Wait until the machine lock is not locked. This is only the case
            # when all machines are fully running, rather that about to start
            # or still busy starting. This way we won't have race conditions
            # with finding out there are no available machines in the analysis
            # manager or having two analyses pick the same machine.
            if not machine_lock.acquire(False):
                logger(
                    "Could not acquire machine lock",
                    action="scheduler.machine_lock", status="busy"
                )
                continue

            machine_lock.release()

            # If not enough free disk space is available, then we print an
            # error message and wait another round (this check is ignored
            # when the freespace configuration variable is set to zero).
            if self.cfg.cuckoo.freespace:
                # Resolve the full base path to the analysis folder, just in
                # case somebody decides to make a symbolic link out of it.
                dir_path = cwd("storage", "analyses")

                # TODO: Windows support
                if hasattr(os, "statvfs"):
                    dir_stats = os.statvfs(dir_path.encode("utf8"))

                    # Calculate the free disk space in megabytes.
                    space_available = dir_stats.f_bavail * dir_stats.f_frsize
                    space_available /= 1024 * 1024

                    if space_available < self.cfg.cuckoo.freespace:
                        log.error(
                            "Not enough free disk space! (Only %d MB!)",
                            space_available, extra={
                                "action": "scheduler.diskspace",
                                "status": "error",
                                "available": space_available,
                            }
                        )
                        continue

            # If we have limited the number of concurrently executing machines,
            # are we currently at the maximum?
            maxvm = self.cfg.cuckoo.max_machines_count
            if maxvm and len(machinery.running()) >= maxvm:
                logger(
                    "Already maxed out on running machines",
                    action="scheduler.machines", status="maxed"
                )
                continue

            # If no machines are available, it's pointless to fetch for
            # pending tasks. Loop over.
            if not machinery.availables():
                logger(
                    "No available machines",
                    action="scheduler.machines", status="none"
                )
                continue

            # Exits if max_analysis_count is defined in the configuration
            # file and has been reached.
            if self.maxcount and self.total_analysis_count >= self.maxcount:
                if active_analysis_count <= 0:
                    log.debug("Reached max analysis count, exiting.", extra={
                        "action": "scheduler.max_analysis",
                        "status": "success",
                        "limit": self.total_analysis_count,
                    })
                    self.stop()
                else:
                    logger(
                        "Maximum analyses hit, awaiting active to finish off",
                        action="scheduler.max_analysis", status="busy",
                        active=active_analysis_count
                    )
                continue

            # Fetch a pending analysis task.
            # TODO This fixes only submissions by --machine, need to add
            # other attributes (tags etc).
            # TODO We should probably move the entire "acquire machine" logic
            # from the Analysis Manager to the Scheduler and then pass the
            # selected machine onto the Analysis Manager instance.
            task, available = None, False
            for machine in self.db.get_available_machines():
                task = self.db.fetch(machine=machine.name)
                if task:
                    break

                if machine.is_analysis():
                    available = True

            # We only fetch a new task if at least one of the available
            # machines is not a "service" machine (again, please refer to the
            # services auxiliary module for more information on service VMs).
            if not task and available:
                task = self.db.fetch(service=False)

            if task:
                log.debug("Processing task #%s", task.id)
                self.total_analysis_count += 1

                # Initialize and start the analysis manager.
                analysis = AnalysisManager(task.id, errors)
                analysis.daemon = True
                analysis.start()

            # Deal with errors.
            try:
                raise errors.get(block=False)
            except Queue.Empty:
                pass

        log.debug("End of analyses.")
Esempio n. 46
0
def upgrade():
    conn = op.get_bind()

    sample_list = conn.execute("SELECT id, file_size, file_type, md5, crc32, "
                               "sha1, sha256, sha512, ssdeep FROM samples")

    samples = []
    for sample in sample_list:
        samples.append({
            "id": sample[0],
            "file_size": sample[1],
            "file_type": sample[2],
            "md5": sample[3],
            "crc32": sample[4],
            "sha1": sample[5],
            "sha256": sample[6],
            "sha512": sample[7],
            "ssdeep": sample[8],
        })

    # PostgreSQL and MySQL have different names for the foreign key of
    # Task.sample_id -> Sample.id; for SQLite we don't drop/recreate the
    # foreign key.
    fkey_name = {
        "mysql": "tasks_ibfk_1",
        "postgresql": "tasks_sample_id_fkey",
    }

    fkey = fkey_name.get(Database(schema_check=False).engine.name)

    # First drop the foreign key.
    if fkey:
        op.drop_constraint(fkey, "tasks", type_="foreignkey")

    # Rename original table.
    op.rename_table("samples", "old_samples")

    # Drop old table.
    op.drop_table("old_samples")

    file_type = sa.Text()

    # Create the new table with 1.2 schema.
    # Changelog:
    # * file_type changed its type from String(255) to Text().
    op.create_table(
        "samples",
        sa.Column("id", sa.Integer(), nullable=False),
        sa.Column("file_size", sa.Integer(), nullable=False),
        sa.Column("file_type", file_type, nullable=False),
        sa.Column("md5", sa.String(32), nullable=False),
        sa.Column("crc32", sa.String(8), nullable=False),
        sa.Column("sha1", sa.String(40), nullable=False),
        sa.Column("sha256", sa.String(64), nullable=False),
        sa.Column("sha512", sa.String(128), nullable=False),
        sa.Column("ssdeep", sa.Text(), nullable=True),
        sa.PrimaryKeyConstraint("id")
    )

    # Insert data.
    op.bulk_insert(Sample.__table__, samples)

    # Restore the indices.
    op.create_index("hash_index", "samples",
                    ["md5", "crc32", "sha1", "sha256", "sha512"],
                    unique=True)

    # Create the foreign key.
    if fkey:
        op.create_foreign_key(fkey, "tasks", "samples", ["sample_id"], ["id"])
Esempio n. 47
0
 def __init__(self, maxcount=None):
     self.running = True
     self.cfg = Config()
     self.db = Database()
     self.maxcount = maxcount
     self.total_analysis_count = 0
Esempio n. 48
0
def test_on_yara():
    set_cwd(os.path.realpath(tempfile.mkdtemp()))
    cuckoo_create()
    init_modules()

    shutil.copy(cwd("yara", "binaries", "vmdetect.yar"),
                cwd("yara", "memory", "vmdetect.yar"))
    init_yara()

    mkdir(cwd(analysis=1))
    open(cwd("binary", analysis=1), "wb").write("\x0f\x3f\x07\x0b")

    mkdir(cwd("files", analysis=1))
    open(cwd("files", "1.txt", analysis=1), "wb").write("\x56\x4d\x58\x68")

    mkdir(cwd("memory", analysis=1))
    open(cwd("memory", "1-0.dmp", analysis=1), "wb").write(
        struct.pack("QIIII", 0x400000, 0x1000, 0, 0, 0) + "\x45\xc7\x00\x01")

    Database().connect()
    results = RunProcessing(task=Dictionary({
        "id": 1,
        "category": "file",
        "target": __file__,
    })).run()
    assert results["target"]["file"]["yara"][0]["offsets"] == {
        "virtualpc": [(0, 0)],
    }
    assert results["procmemory"][0]["yara"][0]["offsets"] == {
        "vmcheckdll": [(24, 0)],
    }
    assert results["dropped"][0]["yara"][0]["offsets"] == {
        "vmware": [(0, 0)],
        "vmware1": [(0, 0)],
    }

    class sig1(object):
        name = "sig1"

        @property
        def matched(self):
            return False

        @matched.setter
        def matched(self, value):
            pass

        def init(self):
            pass

        def on_signature(self):
            pass

        def on_complete(self):
            pass

        def on_extract(self):
            pass

        on_yara = mock.MagicMock()

    rs = RunSignatures(results)

    rs.signatures = sig1(),
    rs.run()

    assert sig1.on_yara.call_count == 3
    sig1.on_yara.assert_any_call("sample", cwd("binary", analysis=1), mock.ANY)
    sig1.on_yara.assert_any_call("dropped", cwd("files", "1.txt", analysis=1),
                                 mock.ANY)
    sig1.on_yara.assert_any_call("procmem", cwd("memory",
                                                "1-0.dmp",
                                                analysis=1), mock.ANY)
    ym = sig1.on_yara.call_args_list[0][0][2]
    assert ym.offsets == {
        "virtualpc": [(0, 0)],
    }
    assert ym.string("virtualpc", 0) == "\x0f\x3f\x07\x0b"
Esempio n. 49
0
 def test_api_status200(self, client):
     set_cwd(tempfile.mkdtemp())
     cuckoo_create()
     Database().connect()
     r = client.get("/cuckoo/api/status")
     assert r.status_code == 200
Esempio n. 50
0
    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

        self.d = Database()
        self.d.connect(dsn=self.URI)
Esempio n. 51
0
def test_on_extract():
    set_cwd(tempfile.mkdtemp())
    cuckoo_create()
    init_modules()

    Database().connect()
    mkdir(cwd(analysis=2))

    cmd = Scripting().parse_command("cmd.exe /c ping 1.2.3.4")

    ex = ExtractManager.for_task(2)
    ex.push_script({
        "pid": 1,
        "first_seen": 2,
    }, cmd)

    results = RunProcessing(task=Dictionary({
        "id": 2,
        "category": "file",
        "target": __file__,
    })).run()

    assert results["extracted"] == [{
        "category":
        "script",
        "pid":
        1,
        "first_seen":
        2,
        "program":
        "cmd",
        "script":
        cwd("extracted", "0.bat", analysis=2),
        "yara": [],
    }]

    class sig1(object):
        name = "sig1"

        @property
        def matched(self):
            return False

        @matched.setter
        def matched(self, value):
            pass

        def init(self):
            pass

        def on_signature(self):
            pass

        def on_complete(self):
            pass

        def on_yara(self):
            pass

        on_extract = mock.MagicMock()

    rs = RunSignatures(results)

    rs.signatures = sig1(),
    rs.run()

    sig1.on_extract.assert_called_once()
    em = sig1.on_extract.call_args_list[0][0][0]
    assert em.category == "script"
Esempio n. 52
0
import copy
import json
import logging
import os
import sflock
import zipfile

from cuckoo.common.exceptions import CuckooOperationalError
from cuckoo.common.files import Folders, Files, Storage
from cuckoo.common.utils import validate_url, validate_hash
from cuckoo.common.virustotal import VirusTotalAPI
from cuckoo.core.database import Database, TASK_COMPLETED
from cuckoo.misc import cwd, mkdir

log = logging.getLogger(__name__)
db = Database()


class SubmitManager(object):
    known_web_options = [
        "enable-injection",
        "enforce-timeout",
        "full-memory-dump",
        "process-memory-dump",
        "remote-control",
        "simulated-human-interaction",
    ]

    def _handle_string(self, submit, tmppath, line):
        if not line:
            return
Esempio n. 53
0
def test_machines():
    set_cwd(tempfile.mkdtemp())
    Folders.create(cwd(), "conf")
    Files.create(cwd("conf"), "cuckoo.conf", """
[cuckoo]
machinery = virtualbox
[database]
connection =
timeout =
[resultserver]
ip = 9.8.7.6
port = 9876
""")
    Files.create(cwd("conf"), "virtualbox.conf", """
[virtualbox]
machines = a, b, c
[a]
label = a
snapshot = derpa
platform = windows
ip = 1.2.3.4

[b]
label = b
snapshot = derpb
platform = windows
ip = 5.6.7.8
resultserver_ip = 7.5.3.1

[c]
label = c
snapshot = derpc
platform = windows
ip = 1.3.5.7
resultserver_port = 4242
""")

    class mock(object):
        port = 9001

    Singleton._instances[ResultServer] = mock()

    db = Database()
    db.connect()
    m = Machinery()
    m.set_options(Config("virtualbox"))
    m._initialize("virtualbox")

    machines = db.list_machines()
    assert len(machines) == 3
    assert machines[0].label == "a"
    assert machines[0].snapshot == "derpa"
    assert machines[0].ip == "1.2.3.4"
    assert machines[0].resultserver_ip == "9.8.7.6"
    assert machines[0].resultserver_port == 9001
    assert machines[1].label == "b"
    assert machines[1].snapshot == "derpb"
    assert machines[1].ip == "5.6.7.8"
    assert machines[1].resultserver_ip == "7.5.3.1"
    assert machines[1].resultserver_port == 9001
    assert machines[2].label == "c"
    assert machines[2].snapshot == "derpc"
    assert machines[2].ip == "1.3.5.7"
    assert machines[2].resultserver_ip == "9.8.7.6"
    assert machines[2].resultserver_port == 4242

    Singleton._instances.pop(ResultServer)