Esempio n. 1
0
class AnalysisManager(threading.Thread):
    """Analysis Manager.

    This class handles the full analysis process for a given task. It takes
    care of selecting the analysis machine, preparing the configuration and
    interacting with the guest agent and analyzer components to launch and
    complete the analysis and store, process and report its results.
    """
    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None
        self.unrouted_network = False
        self.stopped_aux = False
        self.rs_port = config("cuckoo:resultserver:port")

    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error(
                "Analysis results folder already exists at path \"%s\", "
                "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        # Also create all directories that the ResultServer can use for file
        # uploads.
        try:
            Folders.create(self.storage, RESULT_DIRECTORIES)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"", self.task.target)
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target)
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary)
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error(
                    "Unable to create symlink/copy from \"%s\" to "
                    "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True

    def store_task_info(self):
        """Grab latest task from db (if available) and update self.task"""
        dbtask = self.db.view_task(self.task.id)
        self.task = dbtask.to_dict()

        task_info_path = os.path.join(self.storage, "task.json")
        open(task_info_path, "w").write(dbtask.to_json())

    def acquire_machine(self):
        """Acquire an analysis machine from the pool of available ones."""
        machine = None

        # Start a loop to acquire the a machine to run the analysis on.
        while True:
            machine_lock.acquire()

            # In some cases it's possible that we enter this loop without
            # having any available machines. We should make sure this is not
            # such case, or the analysis task will fail completely.
            if not machinery.availables():
                machine_lock.release()
                time.sleep(1)
                continue

            # If the user specified a specific machine ID, a platform to be
            # used or machine tags acquire the machine accordingly.
            machine = machinery.acquire(machine_id=self.task.machine,
                                        platform=self.task.platform,
                                        tags=self.task.tags)

            # If no machine is available at this moment, wait for one second
            # and try again.
            if not machine:
                machine_lock.release()
                log.debug("Task #%d: no machine available yet", self.task.id)
                time.sleep(1)
            else:
                log.info("Task #%d: acquired machine %s (label=%s)",
                         self.task.id,
                         machine.name,
                         machine.label,
                         extra={
                             "action": "vm.acquire",
                             "status": "success",
                             "vmname": machine.name,
                         })
                break

        self.machine = machine

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            options["pe_exports"] = \
                ",".join(File(self.task.target).get_exported_functions())

            package, activity = File(self.task.target).get_apk_entry()
            self.task.options["apk_entry"] = "%s:%s" % (package, activity)
        elif self.task.category == "archive":
            options["file_name"] = File(self.task.target).get_name()

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.rs_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = emit_options(self.task.options)
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        # copy in other analyzer specific options, TEMPORARY (most likely)
        vm_options = getattr(machinery.options, self.machine.name)
        for k in vm_options:
            if k.startswith("analyzer_"):
                options[k] = vm_options[k]

        return options

    def route_network(self):
        """Enable network routing if desired."""
        # Determine the desired routing strategy (none, internet, VPN).
        self.route = self.task.options.get("route",
                                           config("routing:routing:route"))

        if self.route == "none" or self.route == "drop":
            self.interface = None
            self.rt_table = None
        elif self.route == "inetsim":
            pass
        elif self.route == "tor":
            pass
        elif self.route == "internet":
            if config("routing:routing:internet") == "none":
                log.warning(
                    "Internet network routing has been specified, but not "
                    "configured, ignoring routing for this analysis",
                    extra={
                        "action": "network.route",
                        "status": "error",
                        "route": self.route,
                    })
                self.route = "none"
                self.task.options["route"] = "none"
                self.interface = None
                self.rt_table = None
            else:
                self.interface = config("routing:routing:internet")
                self.rt_table = config("routing:routing:rt_table")
        elif self.route in config("routing:vpn:vpns"):
            self.interface = config("routing:%s:interface" % self.route)
            self.rt_table = config("routing:%s:rt_table" % self.route)
        else:
            log.warning(
                "Unknown network routing destination specified, ignoring "
                "routing for this analysis: %r",
                self.route,
                extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                })
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # Check if the network interface is still available. If a VPN dies for
        # some reason, its tunX interface will no longer be available.
        if self.interface and not rooter("nic_available", self.interface):
            log.error(
                "The network interface '%s' configured for this analysis is "
                "not available at the moment, switching to route=none mode.",
                self.interface,
                extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                })
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # For now this doesn't work yet in combination with tor routing.
        if self.route == "drop" or self.route == "internet":
            rooter("drop_enable", self.machine.ip,
                   config("cuckoo:resultserver:ip"), str(self.rs_port))

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter("inetsim_enable", self.machine.ip,
                   config("routing:inetsim:server"),
                   config("%s:%s:interface" % (machinery, machinery)),
                   str(self.rs_port),
                   config("routing:inetsim:ports") or "")

        if self.route == "tor":
            rooter("tor_enable", self.machine.ip,
                   str(config("cuckoo:resultserver:ip")),
                   str(config("routing:tor:dnsport")),
                   str(config("routing:tor:proxyport")))

        if self.interface:
            rooter("forward_enable", self.machine.interface, self.interface,
                   self.machine.ip)

        if self.rt_table:
            rooter("srcroute_enable", self.rt_table, self.machine.ip)

        # Propagate the taken route to the database.
        self.db.set_route(self.task.id, self.route)

    def unroute_network(self):
        """Disable any enabled network routing."""
        if self.interface:
            rooter("forward_disable", self.machine.interface, self.interface,
                   self.machine.ip)

        if self.rt_table:
            rooter("srcroute_disable", self.rt_table, self.machine.ip)

        if self.route == "drop" or self.route == "internet":
            rooter("drop_disable", self.machine.ip,
                   config("cuckoo:resultserver:ip"), str(self.rs_port))

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter("inetsim_disable", self.machine.ip,
                   config("routing:inetsim:server"),
                   config("%s:%s:interface" % (machinery, machinery)),
                   str(self.rs_port),
                   config("routing:inetsim:ports") or "")

        if self.route == "tor":
            rooter("tor_disable", self.machine.ip,
                   str(config("cuckoo:resultserver:ip")),
                   str(config("routing:tor:dnsport")),
                   str(config("routing:tor:proxyport")))

        self.unrouted_network = True

    def wait_finish(self):
        """Some VMs don't have an actual agent. Mainly those that are used as
        assistance for an analysis through the services auxiliary module. This
        method just waits until the analysis is finished rather than actively
        trying to engage with the Cuckoo Agent."""
        self.db.guest_set_status(self.task.id, "running")
        while self.db.guest_get_status(self.task.id) == "running":
            time.sleep(1)

    def guest_manage(self, options):
        # Handle a special case where we're creating a baseline report of this
        # particular virtual machine - a report containing all the results
        # that are gathered if no additional samples are ran in the VM. These
        # results, such as loaded drivers and opened sockets in volatility, or
        # DNS requests to hostnames related to Microsoft Windows, etc may be
        # omitted or at the very least given less priority when creating a
        # report for an analysis that ran on this VM later on.
        if self.task.category == "baseline":
            time.sleep(options["timeout"])
        else:
            # Start the analysis.
            self.db.guest_set_status(self.task.id, "starting")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(options, monitor)

            # In case the Agent didn't respond and we force-quit the analysis
            # at some point while it was still starting the analysis the state
            # will be "stop" (or anything but "running", really).
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                self.guest_manager.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info("Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
                 self.task.category.upper(),
                 target,
                 self.task.id,
                 emit_options(self.task.options),
                 extra={
                     "action": "task.init",
                     "status": "starting",
                     "task_id": self.task.id,
                     "target": target,
                     "category": self.task.category,
                     "package": self.task.package,
                     "options": emit_options(self.task.options),
                     "custom": self.task.custom,
                 })

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s",
                      e,
                      extra={
                          "action": "vm.acquire",
                          "status": "error",
                      })
            return False

        self.rs_port = self.machine.resultserver_port or ResultServer().port

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(self.machine.name, self.machine.ip,
                                          self.machine.platform, self.task.id,
                                          self)

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        control_enabled = (config("cuckoo:remotecontrol:enabled")
                           and "remotecontrol" in self.task.options)
        if control_enabled:
            try:
                machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                log.error(
                    "Remote control support has not been implemented for the "
                    "configured machinery module: %s",
                    config("cuckoo:cuckoo:machinery"))

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id, self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger("Starting VM",
                   action="vm.start",
                   status="pending",
                   vmname=self.machine.name)

            # Start the machine.
            machinery.start(self.machine.label, self.task)

            logger("Started VM",
                   action="vm.start",
                   status="success",
                   vmname=self.machine.name)

            # retrieve the port used for remote control
            if control_enabled:
                try:
                    params = machinery.get_remote_control_params(
                        self.machine.label)
                    self.db.set_machine_rcparams(self.machine.label, params)
                except NotImplementedError:
                    log.error(
                        "Remote control support has not been implemented for the "
                        "configured machinery module: %s",
                        config("cuckoo:cuckoo:machinery"))

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name,
                e,
                extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                })
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error starting Virtual Machine! VM: %s, error: %s",
                      self.machine.name,
                      e,
                      extra={
                          "action": "vm.start",
                          "status": "error",
                          "vmname": self.machine.name,
                      })
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                })
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s",
                      e,
                      extra={
                          "action": "guest.handle",
                          "status": "error",
                          "task_id": self.task.id,
                      })
        finally:
            # Stop Auxiliary modules.
            if not self.stopped_aux:
                self.stopped_aux = True
                self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger("Taking full memory dump",
                       action="vm.memdump",
                       status="pending",
                       vmname=self.machine.name)
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger("Taken full memory dump",
                           action="vm.memdump",
                           status="success",
                           vmname=self.machine.name)
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.",
                        extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        })
                except CuckooMachineError as e:
                    log.error("Machinery error: %s",
                              e,
                              extra={
                                  "action": "vm.memdump",
                                  "status": "error",
                              })

            logger("Stopping VM",
                   action="vm.stop",
                   status="pending",
                   vmname=self.machine.name)

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning("Unable to stop machine %s: %s",
                            self.machine.label,
                            e,
                            extra={
                                "action": "vm.stop",
                                "status": "error",
                                "vmname": self.machine.name,
                            })

            logger("Stopped VM",
                   action="vm.stop",
                   status="success",
                   vmname=self.machine.name)

            # Disable remote control after stopping the machine
            # if it was enabled for the task.
            if control_enabled:
                try:
                    machinery.disable_remote_control(self.machine.label)
                except NotImplementedError:
                    log.error(
                        "Remote control support has not been implemented for the "
                        "configured machinery module: %s",
                        config("cuckoo:cuckoo:machinery"))

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            if not self.unrouted_network:
                self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.",
                    self.machine.label,
                    e,
                    extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    })

        return succeeded

    def process_results(self):
        """Process the analysis results and generate the enabled reports."""
        logger("Starting task reporting",
               action="task.report",
               status="pending")

        # TODO Refactor this function as currently "cuckoo process" has a 1:1
        # copy of its code. TODO Also remove "archive" files.
        results = RunProcessing(task=self.task).run()
        RunSignatures(results=results).run()
        RunReporting(task=self.task, results=results).run()

        # If the target is a file and the user enabled the option,
        # delete the original copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_original:
            if not os.path.exists(self.task.target):
                log.warning(
                    "Original file does not exist anymore: \"%s\": "
                    "File not found.", self.task.target)
            else:
                try:
                    os.remove(self.task.target)
                except OSError as e:
                    log.error(
                        "Unable to delete original file at path "
                        "\"%s\": %s", self.task.target, e)

        # If the target is a file and the user enabled the delete copy of
        # the binary option, then delete the copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy:
            if not os.path.exists(self.binary):
                log.warning(
                    "Copy of the original file does not exist anymore: \"%s\": File not found",
                    self.binary)
            else:
                try:
                    os.remove(self.binary)
                except OSError as e:
                    log.error(
                        "Unable to delete the copy of the original file at path \"%s\": %s",
                        self.binary, e)
            # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it.
            if os.path.islink(self.storage_binary) and not os.path.exists(
                    self.storage_binary):
                try:
                    os.remove(self.storage_binary)
                except OSError as e:
                    log.error(
                        "Unable to delete symlink to the binary copy at path \"%s\": %s",
                        self.storage_binary, e)

        log.info("Task #%d: reports generation completed",
                 self.task.id,
                 extra={
                     "action": "task.report",
                     "status": "success",
                 })

        return True

    def run(self):
        """Run manager thread."""
        global active_analysis_count
        active_analysis_count += 1
        try:
            self.launch_analysis()

            log.debug("Released database task #%d", self.task.id)

            if self.cfg.cuckoo.process_results:
                self.store_task_info()
                self.db.set_status(self.task.id, TASK_COMPLETED)
                # TODO If self.process_results() is unified with apps.py's
                # process() method, then ensure that TASK_FAILED_PROCESSING is
                # handled correctly and not overwritten by the db.set_status()
                # at the end of this method.
                self.process_results()

            # We make a symbolic link ("latest") which links to the latest
            # analysis - this is useful for debugging purposes. This is only
            # supported under systems that support symbolic links.
            if hasattr(os, "symlink"):
                latest = cwd("storage", "analyses", "latest")

                # First we have to remove the existing symbolic link, then we
                # have to create the new one.
                # Deal with race conditions using a lock.
                latest_symlink_lock.acquire()
                try:
                    # As per documentation, lexists() returns True for dead
                    # symbolic links.
                    if os.path.lexists(latest):
                        os.remove(latest)

                    os.symlink(self.storage, latest)
                except OSError as e:
                    log.warning("Error pointing latest analysis symlink: %s" %
                                e)
                finally:
                    latest_symlink_lock.release()

            # overwrite task.json so we have the latest data inside
            self.store_task_info()
            log.info("Task #%d: analysis procedure completed",
                     self.task.id,
                     extra={
                         "action": "task.stop",
                         "status": "success",
                     })
        except:
            log.exception("Failure in AnalysisManager.run",
                          extra={
                              "action": "task.stop",
                              "status": "error",
                          })
        finally:
            if self.cfg.cuckoo.process_results:
                self.db.set_status(self.task.id, TASK_REPORTED)
            else:
                self.db.set_status(self.task.id, TASK_COMPLETED)
            task_log_stop(self.task.id)
            active_analysis_count -= 1

    def cleanup(self):
        # In case the analysis manager crashes, the network cleanup
        # should still be performed.
        if not self.unrouted_network:
            self.unroute_network()

        if not self.stopped_aux:
            self.stopped_aux = True
            self.aux.stop()

    def force_stop(self):
        # Make the guest manager stop the status checking loop and return
        # to the main analysis manager routine.
        if self.db.guest_get_status(self.task.id):
            self.db.guest_set_status(self.task.id, "stopping")

        self.guest_manager.stop()
        log.debug("Force stopping task #%s", self.task.id)
Esempio n. 2
0
class DatabaseEngine(object):
    """Tests database stuff."""
    URI = None

    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

    def setup(self):
        self.d = Database()
        self.d.connect(dsn=self.URI)

    def teardown(self):
        # Clear all tables without dropping them
        # This is done after each test to ensure a test doesn't fail because
        # of data of a previous test
        meta = MetaData()
        meta.reflect(self.d.engine)
        ses = self.d.Session()
        try:
            for t in reversed(meta.sorted_tables):
                ses.execute(t.delete())
            ses.commit()
        finally:
            ses.close()

    def test_add_target(self):
        count = self.d.Session().query(Target).count()
        add_target("http://example.com", category="url")
        assert self.d.Session().query(Target).count() == count + 1

    def test_add_task(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, "hehe")
        os.close(fd)

        # Add task.
        count = self.d.Session().query(Task).count()
        add_task(sample_path, category="file")
        assert self.d.Session().query(Task).count() == count + 1

    def test_processing_get_task(self):
        # First reset all existing rows so that earlier exceptions don't affect
        # this unit test run.
        null, session = None, self.d.Session()

        session.query(Task).filter(Task.status == "completed",
                                   Task.processing == null).update({
                                       "processing":
                                       "something",
                                   })
        session.commit()

        t1 = add_task("http://google.com/1",
                      priority=1,
                      status="completed",
                      category="url")
        t2 = add_task("http://google.com/2",
                      priority=2,
                      status="completed",
                      category="url")
        t3 = add_task("http://google.com/3",
                      priority=1,
                      status="completed",
                      category="url")
        t4 = add_task("http://google.com/4",
                      priority=1,
                      status="completed",
                      category="url")
        t5 = add_task("http://google.com/5",
                      priority=3,
                      status="completed",
                      category="url")
        t6 = add_task("http://google.com/6",
                      priority=1,
                      status="completed",
                      category="url")
        t7 = add_task("http://google.com/7",
                      priority=1,
                      status="completed",
                      category="url")

        assert self.d.processing_get_task("foo") == t5
        assert self.d.processing_get_task("foo") == t2
        assert self.d.processing_get_task("foo") == t1
        assert self.d.processing_get_task("foo") == t3
        assert self.d.processing_get_task("foo") == t4
        assert self.d.processing_get_task("foo") == t6
        assert self.d.processing_get_task("foo") == t7
        assert self.d.processing_get_task("foo") is None

    def test_error_exists(self):
        task_id = add_task("http://google.com/7", category="url")
        self.d.add_error("A" * 1024, task_id)
        assert len(self.d.view_errors(task_id)) == 1
        self.d.add_error("A" * 1024, task_id)
        assert len(self.d.view_errors(task_id)) == 2

    def test_long_error(self):
        add_task("http://google.com/", category="url")
        self.d.add_error("A" * 1024, 1)
        err = self.d.view_errors(1)
        assert err and len(err[0].message) == 1024

    def test_submit(self):
        dirpath = tempfile.mkdtemp()
        submit_id = self.d.add_submit(dirpath, "files", {
            "foo": "bar",
        })
        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        assert submit.tmp_path == dirpath
        assert submit.submit_type == "files"
        assert submit.data == {
            "foo": "bar",
        }

    def test_connect_no_create(self):
        AlembicVersion.__table__.drop(self.d.engine)
        self.d.connect(dsn=self.URI, create=False)
        assert "alembic_version" not in self.d.engine.table_names()
        self.d.connect(dsn=self.URI)
        assert "alembic_version" in self.d.engine.table_names()

    def test_view_submit_tasks(self):
        submit_id = self.d.add_submit(None, None, None)
        target_id = add_target(__file__, category="file")
        t1 = add_task(custom="1", submit_id=submit_id)
        t2 = add_task(custom="2", submit_id=submit_id)

        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        with pytest.raises(DetachedInstanceError):
            print submit.tasks

        submit = self.d.view_submit(submit_id, tasks=True)
        assert len(submit.tasks) == 2
        tasks = sorted((task.id, task) for task in submit.tasks)
        assert tasks[0][1].id == t1
        assert tasks[0][1].custom == "1"
        assert tasks[1][1].id == t2
        assert tasks[1][1].custom == "2"

    def test_task_set_options(self):
        t0 = add_task(__file__, options={"foo": "bar"})
        t1 = add_task(__file__, options="foo=bar")

        assert self.d.view_task(t0).options == {"foo": "bar"}
        assert self.d.view_task(t1).options == {"foo": "bar"}

    def test_error_action(self):
        task_id = add_task(__file__)
        self.d.add_error("message1", task_id)
        self.d.add_error("message2", task_id, "actionhere")
        e1, e2 = self.d.view_errors(task_id)
        assert e1.message == "message1"
        assert e1.action is None
        assert e2.message == "message2"
        assert e2.action == "actionhere"

    def test_view_tasks(self):
        t1 = add_task(__file__)
        t2 = add_task("http://example.com", category="url")
        tasks = self.d.view_tasks([t1, t2])
        assert tasks[0].to_dict() == self.d.view_task(t1).to_dict()
        assert tasks[1].to_dict() == self.d.view_task(t2).to_dict()

    def test_add_machine(self):
        self.d.add_machine("name1", "label", "1.2.3.4", "windows", None,
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.add_machine("name2", "label", "1.2.3.4", "windows", "",
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.add_machine("name3", "label", "1.2.3.4", "windows", "opt1 opt2",
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.add_machine("name4",
                           "label",
                           "1.2.3.4",
                           "windows", ["opt3", "opt4"],
                           "tag1 tag2",
                           "int0",
                           "snap0",
                           "5.6.7.8",
                           2043,
                           "virtualbox",
                           reserved_by=1600)
        m1 = self.d.view_machine("name1")
        m2 = self.d.view_machine("name2")
        m3 = self.d.view_machine("name3")
        m4 = self.d.view_machine("name4")
        assert m1.options == []
        assert m2.options == []
        assert m3.options == ["opt1", "opt2"]
        assert m4.options == ["opt3", "opt4"]
        assert m1.manager == "virtualbox"
        assert m4.reserved_by == 1600

    def test_adding_task(self):
        now = datetime.datetime.now()
        id = add_task(__file__, "file", 0, "py", "free=yes", 3, "custom",
                      "owner", "machine1", "DogeOS", ["tag1"], False, False,
                      now, "regular", None, now)

        task = self.d.view_task(id)
        assert id is not None
        assert task.timeout == 0
        assert task.package == "py"
        assert task.options == {"free": "yes"}
        assert task.priority == 3
        assert task.custom == "custom"
        assert task.owner == "owner"
        assert task.machine == "machine1"
        assert task.platform == "DogeOS"
        assert len(task.tags) == 1
        assert task.tags[0].name == "tag1"
        assert task.memory == False
        assert task.enforce_timeout == False
        assert task.clock == now
        assert task.submit_id is None
        assert task.start_on == now
        assert len(task.targets) == 1
        assert task.targets[0].category == "file"
        assert task.targets[0].target == __file__

    def test_set_machine_rcparams(self):
        self.d.add_machine("name5", "label5", "1.2.3.4", "windows", None,
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")

        self.d.set_machine_rcparams("label5", {
            "protocol": "rdp",
            "host": "127.0.0.1",
            "port": 3389,
        })

        m = self.d.view_machine("name5")
        assert m.rcparams == {
            "protocol": "rdp",
            "host": "127.0.0.1",
            "port": "3389",
        }

    def test_add_target_file(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, os.urandom(64))
        os.close(fd)
        target = File(sample_path)

        id = add_target(sample_path, "file")
        db_target = self.d.find_target(id=id)

        assert id is not None
        assert db_target.file_size == 64
        assert db_target.file_type == target.get_type()
        assert db_target.md5 == target.get_md5()
        assert db_target.crc32 == target.get_crc32()
        assert db_target.sha1 == target.get_sha1()
        assert db_target.sha256 == target.get_sha256()
        assert db_target.sha512 == target.get_sha512()
        assert db_target.ssdeep == target.get_ssdeep()
        assert db_target.category == "file"

    def test_add_target_url(self):
        target = URL("http://example.com/")

        id = add_target(target.url, "url")
        db_target = self.d.find_target(id=id)

        assert id is not None
        assert db_target.md5 == target.get_md5()
        assert db_target.crc32 == target.get_crc32()
        assert db_target.sha1 == target.get_sha1()
        assert db_target.sha256 == target.get_sha256()
        assert db_target.sha512 == target.get_sha512()
        assert db_target.ssdeep == target.get_ssdeep()
        assert db_target.category == "url"

    def test_find_target(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, os.urandom(64))
        os.close(fd)
        target = File(sample_path)
        id = add_target(sample_path, category="file")

        assert self.d.find_target(id=id).id == id
        assert self.d.find_target(crc32=target.get_crc32()).id == id
        assert self.d.find_target(md5=target.get_md5()).id == id
        assert self.d.find_target(sha1=target.get_sha1()).id == id
        assert self.d.find_target(sha256=target.get_sha256()).id == id
        assert self.d.find_target(sha512=target.get_sha512()).id == id

    def test_find_target_multifilter(self):
        ids = []
        paths = []
        target = None
        for x in range(2):
            fd, sample_path = tempfile.mkstemp()
            randbytes = os.urandom(64)
            paths.append(sample_path)
            os.write(fd, randbytes)
            os.close(fd)
            target = File(sample_path)
            ids.append(add_target(sample_path, category="file"))

        db_target = self.d.find_target(sha256=target.get_sha256(),
                                       target=paths[1])
        assert self.d.find_target(id=ids[0], md5=target.get_md5()) is None
        assert db_target.id == ids[1]

    def test_fetch_with_machine(self):
        future = datetime.datetime(2200, 5, 12, 12, 12)
        add_task(__file__, category="file", tags=["service"])
        t2 = add_task(__file__, category="file", machine="machine1")
        add_task(__file__, category="file", start_on=future)
        add_task(__file__, category="file")

        t = self.d.fetch(machine="machine1", service=False)

        assert t.id == t2
        assert t.status == "pending"

    def test_fetch_service_false(self):
        add_task(__file__, category="file", tags=["service"])
        t2 = add_task(__file__, category="file")

        t = self.d.fetch(service=False)
        assert t.id == t2
        assert t.status == "pending"

    def test_fetch_service_true(self):
        t1 = add_task(__file__, category="file", tags=["service"])
        add_task(__file__, category="file", machine="machine1")
        add_task(__file__)
        add_task(__file__)

        task = self.d.fetch()
        assert task.id == t1
        assert task.status == "pending"

    def test_fetch_use_start_on_true(self):
        future = datetime.datetime(2200, 5, 12, 12, 12)
        add_task(__file__, category="file", start_on=future, priority=999)
        t2 = add_task(__file__, category="file")
        t = self.d.fetch(service=False)

        assert t.id == t2
        assert t.status == "pending"

    def test_fetch_use_start_on_false(self):
        future = datetime.datetime(2200, 5, 12, 12, 12)
        t1 = add_task(__file__, category="file", start_on=future, priority=999)
        add_task(__file__, category="file")

        t = self.d.fetch(use_start_on=False, service=False)
        assert t.id == t1
        assert t.status == "pending"

    def test_fetch_use_exclude(self):

        t1 = add_task(__file__, category="file", priority=999)
        t2 = add_task(__file__, category="file", priority=999)
        t3 = add_task(__file__, category="file", priority=999)
        t4 = add_task(__file__, category="file", priority=999)

        t = self.d.fetch(service=False, exclude=[t1, t2, t3])
        assert t.id == t4
        assert t.status == "pending"

    def test_fetch_specific_task(self):
        t1 = add_task(__file__, category="file", priority=999)
        t2 = add_task(__file__, category="file", priority=999)
        t = self.d.fetch(task_id=t1)
        assert t.id == t1
        assert t.status == "pending"

    def test_lock_machine(self):
        t1 = add_task(__file__, category="file", tags=["app1", "office7"])
        t2 = add_task(__file__, category="file", tags=["app1", "office15"])

        self.d.add_machine("name1", "name1", "1.2.3.4", "windows", "",
                           "app1,office7", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.add_machine("name2", "name2", "1.2.3.4", "DogeOS", "opt1 opt2",
                           "office13", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.add_machine("name3", "name3", "1.2.3.4", "CoffeeOS",
                           ["opt3", "opt4"], "cofOS,office7", "int0", "snap0",
                           "5.6.7.8", 2043, "virtualbox")

        task1 = self.d.view_task(t1)
        task2 = self.d.view_task(t2)

        m1 = self.d.lock_machine(tags=task1.tags)
        assert m1.locked
        assert m1.name == "name1"
        with pytest.raises(CuckooOperationalError):
            self.d.lock_machine(platform="DogeOS", tags=task2.tags)
        m2 = self.d.lock_machine(platform="DogeOS")
        assert m2.name == "name2"
        m3 = self.d.lock_machine(label="name3")
        assert m3.locked
        assert m3.name == "name3"

    def test_list_tasks(self):
        t1 = add_task(__file__,
                      category="file",
                      owner="doge",
                      options={"route": "vpn511"})
        t2 = add_task(__file__, category="file")
        add_task(__file__, category="file")
        self.d.set_status(t2, "reported")
        self.d.set_status(t1, "reported")

        tasks = self.d.list_tasks(owner="doge", status="reported")
        tasks2 = self.d.list_tasks()
        tasks3 = self.d.list_tasks(status="reported")

        assert tasks[0].id == t1
        assert len(tasks2) == 3
        assert len(tasks3) == 2

    def test_list_tasks_between(self):
        for x in range(5):
            add_task(__file__, category="file")

        tasks = self.d.list_tasks(filter_by="id",
                                  operators="between",
                                  values=(1, 3))
        assert len(tasks) == 3

    def test_list_tasks_multiple_filter(self):
        ids = []
        future = None
        for x in range(10):
            id = add_task(__file__, category="file")
            ids.append(id)
            future = datetime.datetime.now() + datetime.timedelta(days=id)
            ses = self.d.Session()
            task = ses.query(Task).get(id)
            task.completed_on = future
            ses.commit()
            ses.close()

        tasks = self.d.list_tasks(filter_by=["id", "completed_on"],
                                  operators=[">", "<"],
                                  values=[4, future],
                                  order_by="id",
                                  limit=1)
        assert len(tasks) == 1
        assert tasks[0].id == 5

    def test_list_tasks_offset_limit(self):
        for x in range(10):
            add_task(__file__, category="file")

        tasks = self.d.list_tasks(offset=5, limit=10, order_by="id")
        assert len(tasks) == 5
        assert tasks[4].id == 10

    def test_list_tasks_notvalue(self):
        for x in range(10):
            id = add_task(__file__, category="file")
            if id % 2 == 0:
                self.d.set_status(id, "running")

        tasks = self.d.list_tasks(filter_by="status",
                                  operators="!=",
                                  values="running",
                                  order_by="id")
        assert len(tasks) == 5
        assert tasks[4].id == 9

    def test_list_tasks_noresults(self):
        for x in range(5):
            add_task(__file__, category="file")
        tasks = self.d.list_tasks(status="reported")
        assert tasks == []

    def test_get_available_machines(self):
        self.d.add_machine("name1", "name1", "1.2.3.4", "windows", "",
                           "app1,office7", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.add_machine("name2", "name2", "1.2.3.4", "DogeOS", "opt1 opt2",
                           "office13", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.add_machine("name3", "name3", "1.2.3.4", "CoffeeOS",
                           ["opt3", "opt4"], "cofOS,office7", "int0", "snap0",
                           "5.6.7.8", 2043, "virtualbox")
        self.d.machine_reserve(label="name2", task_id=1337)
        self.d.lock_machine(label="name3")
        available = self.d.get_available_machines()
        names = [m["name"] for m in [db_m.to_dict() for db_m in available]]

        assert len(available) == 2
        assert "name2" in names
        assert "name1" in names

    def test_unlock_machine(self):
        self.d.add_machine("name1", "name1", "1.2.3.4", "windows", "",
                           "app1,office7", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.lock_machine(label="name1")

        assert self.d.view_machine(name="name1").locked
        self.d.unlock_machine(label="name1")
        assert not self.d.view_machine(name="name1").locked

    def test_list_machines(self):
        self.d.add_machine("name1", "name1", "1.2.3.4", "windows", "",
                           "app1,office7", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.add_machine("name2", "name2", "1.2.3.4", "DogeOS", "opt1 opt2",
                           "office13", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        allmachines = self.d.list_machines()
        names = [m["name"] for m in [db_m.to_dict() for db_m in allmachines]]

        assert len(allmachines) == 2
        assert "name2" in names
        assert "name1" in names

    def test_machine_reserve(self):
        self.d.add_machine("name1", "name1", "1.2.3.4", "windows", "",
                           "app1,office7", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        assert self.d.view_machine(name="name1").reserved_by is None
        self.d.machine_reserve(label="name1", task_id=42)
        assert self.d.view_machine(name="name1").reserved_by == 42

    def test_clear_reservation(self):
        self.d.add_machine("name1", "name1", "1.2.3.4", "windows", "",
                           "app1,office7", "int0", "snap0", "5.6.7.8", 2043,
                           "virtualbox")
        self.d.machine_reserve(label="name1", task_id=42)
        assert self.d.view_machine(name="name1").reserved_by == 42
        self.d.clear_reservation(label="name1")
        assert self.d.view_machine(name="name1").reserved_by is None

    def test_clean_machines(self):
        for x in range(6):
            name = "name%s" % x
            self.d.add_machine(name, name, "1.2.3.4", "windows", "",
                               "app1,office7", "int0", "snap0", "5.6.7.8",
                               2043, "virtualbox")

        assert len(self.d.list_machines()) == 6
        self.d.clean_machines()
        assert len(self.d.list_machines()) == 0

    def test_target_to_dict(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, os.urandom(64))
        os.close(fd)
        target = File(sample_path)
        id = add_target(sample_path, category="file")
        db_target = self.d.find_target(id=id)
        db_target = db_target.to_dict()

        assert db_target["id"] == id
        assert db_target["file_size"] == 64
        assert db_target["file_type"] == target.get_type()
        assert db_target["md5"] == target.get_md5()
        assert db_target["crc32"] == target.get_crc32()
        assert db_target["sha1"] == target.get_sha1()
        assert db_target["sha256"] == target.get_sha256()
        assert db_target["sha512"] == target.get_sha512()
        assert db_target["ssdeep"] == target.get_ssdeep()
        assert db_target["category"] == "file"
        assert db_target["target"] == sample_path

    def test_task_multiple_targets(self):
        db_targets = []
        task_id = add_task()
        for x in range(10):
            fd, sample_path = tempfile.mkstemp()
            os.write(fd, os.urandom(64))
            os.close(fd)
            add_target(sample_path, category="file", task_id=task_id)

        task = self.d.view_task(task_id)
        assert task.id == task_id
        assert len(task.targets) == 10
Esempio n. 3
0
class DatabaseEngine(object):
    """Tests database stuff."""
    URI = None

    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

        self.d = Database()
        self.d.connect(dsn=self.URI)

    def add_url(self, url, priority=1, status="pending"):
        task_id = self.d.add_url(url, priority=priority)
        self.d.set_status(task_id, status)
        return task_id

    def test_add_tasks(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, "hehe")
        os.close(fd)

        # Add task.
        count = self.d.Session().query(Task).count()
        self.d.add_path(sample_path)
        assert self.d.Session().query(Task).count() == count + 1

        # Add url.
        self.d.add_url("http://foo.bar")
        assert self.d.Session().query(Task).count() == count + 2

    def test_processing_get_task(self):
        # First reset all existing rows so that earlier exceptions don't affect
        # this unit test run.
        null, session = None, self.d.Session()

        session.query(Task).filter(Task.status == "completed",
                                   Task.processing == null).update({
                                       "processing":
                                       "something",
                                   })
        session.commit()

        t1 = self.add_url("http://google.com/1",
                          priority=1,
                          status="completed")
        t2 = self.add_url("http://google.com/2",
                          priority=2,
                          status="completed")
        t3 = self.add_url("http://google.com/3",
                          priority=1,
                          status="completed")
        t4 = self.add_url("http://google.com/4",
                          priority=1,
                          status="completed")
        t5 = self.add_url("http://google.com/5",
                          priority=3,
                          status="completed")
        t6 = self.add_url("http://google.com/6",
                          priority=1,
                          status="completed")
        t7 = self.add_url("http://google.com/7",
                          priority=1,
                          status="completed")

        assert self.d.processing_get_task("foo") == t5
        assert self.d.processing_get_task("foo") == t2
        assert self.d.processing_get_task("foo") == t1
        assert self.d.processing_get_task("foo") == t3
        assert self.d.processing_get_task("foo") == t4
        assert self.d.processing_get_task("foo") == t6
        assert self.d.processing_get_task("foo") == t7
        assert self.d.processing_get_task("foo") is None

    def test_error_exists(self):
        task_id = self.add_url("http://google.com/")
        self.d.add_error("A" * 1024, task_id)
        assert len(self.d.view_errors(task_id)) == 1
        self.d.add_error("A" * 1024, task_id)
        assert len(self.d.view_errors(task_id)) == 2

    def test_long_error(self):
        self.add_url("http://google.com/")
        self.d.add_error("A" * 1024, 1)
        err = self.d.view_errors(1)
        assert err and len(err[0].message) == 1024

    def test_submit(self):
        dirpath = tempfile.mkdtemp()
        submit_id = self.d.add_submit(dirpath, "files", {
            "foo": "bar",
        })
        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        assert submit.tmp_path == dirpath
        assert submit.submit_type == "files"
        assert submit.data == {
            "foo": "bar",
        }

    def test_connect_no_create(self):
        AlembicVersion.__table__.drop(self.d.engine)
        self.d.connect(dsn=self.URI, create=False)
        assert "alembic_version" not in self.d.engine.table_names()
        self.d.connect(dsn=self.URI)
        assert "alembic_version" in self.d.engine.table_names()

    def test_view_submit_tasks(self):
        submit_id = self.d.add_submit(None, None, None)
        t1 = self.d.add_path(__file__, custom="1", submit_id=submit_id)
        t2 = self.d.add_path(__file__, custom="2", submit_id=submit_id)

        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        with pytest.raises(DetachedInstanceError):
            print submit.tasks

        submit = self.d.view_submit(submit_id, tasks=True)
        assert len(submit.tasks) == 2
        tasks = sorted((task.id, task) for task in submit.tasks)
        assert tasks[0][1].id == t1
        assert tasks[0][1].custom == "1"
        assert tasks[1][1].id == t2
        assert tasks[1][1].custom == "2"

    def test_add_reboot(self):
        t0 = self.d.add_path(__file__)
        s0 = self.d.add_submit(None, None, None)
        t1 = self.d.add_reboot(task_id=t0, submit_id=s0)

        t = self.d.view_task(t1)
        assert t.custom == "%s" % t0
        assert t.submit_id == s0

    def test_task_set_options(self):
        t0 = self.d.add_path(__file__, options={"foo": "bar"})
        t1 = self.d.add_path(__file__, options="foo=bar")
        assert self.d.view_task(t0).options == {"foo": "bar"}
        assert self.d.view_task(t1).options == {"foo": "bar"}

    def test_task_tags_str(self):
        task = self.d.add_path(__file__, tags="foo,,bar")
        tag0, tag1 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name)) == ["bar", "foo"]

    def test_task_tags_list(self):
        task = self.d.add_path(__file__, tags=["tag1", "tag2", "", 1, "tag3"])
        tag0, tag1, tag2 = self.d.view_task(task).tags
        assert sorted(
            (tag0.name, tag1.name, tag2.name)) == ["tag1", "tag2", "tag3"]

    def test_error_action(self):
        task_id = self.d.add_path(__file__)
        self.d.add_error("message1", task_id)
        self.d.add_error("message2", task_id, "actionhere")
        e1, e2 = self.d.view_errors(task_id)
        assert e1.message == "message1"
        assert e1.action is None
        assert e2.message == "message2"
        assert e2.action == "actionhere"

    def test_view_tasks(self):
        t1 = self.d.add_path(__file__)
        t2 = self.d.add_url("http://google.com/")
        tasks = self.d.view_tasks([t1, t2])
        assert tasks[0].to_dict() == self.d.view_task(t1).to_dict()
        assert tasks[1].to_dict() == self.d.view_task(t2).to_dict()

    def test_add_machine(self):
        self.d.add_machine("name1", "label", "1.2.3.4", "windows", None,
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043)
        self.d.add_machine("name2", "label", "1.2.3.4", "windows", "",
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043)
        self.d.add_machine("name3", "label", "1.2.3.4", "windows", "opt1 opt2",
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043)
        self.d.add_machine("name4", "label", "1.2.3.4", "windows",
                           ["opt3", "opt4"], "tag1 tag2", "int0", "snap0",
                           "5.6.7.8", 2043)
        m1 = self.d.view_machine("name1")
        m2 = self.d.view_machine("name2")
        m3 = self.d.view_machine("name3")
        m4 = self.d.view_machine("name4")
        assert m1.options == []
        assert m2.options == []
        assert m3.options == ["opt1", "opt2"]
        assert m4.options == ["opt3", "opt4"]

    def test_set_machine_rcparams(self):
        self.d.add_machine("name5", "label5", "1.2.3.4", "windows", None,
                           "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043)

        self.d.set_machine_rcparams("label5", {
            "protocol": "rdp",
            "host": "127.0.0.1",
            "port": 3389,
        })

        m = self.d.view_machine("name5")
        assert m.rcparams == {
            "protocol": "rdp",
            "host": "127.0.0.1",
            "port": "3389",
        }

    @mock.patch("sflock.magic")
    def test_add_sample(self, p):
        p.from_file.return_value = ""
        assert self.d.add_path(Files.temp_put(os.urandom(16))) is not None
Esempio n. 4
0
class AnalysisManager(threading.Thread):
    """Analysis Manager.

    This class handles the full analysis process for a given task. It takes
    care of selecting the analysis machine, preparing the configuration and
    interacting with the guest agent and analyzer components to launch and
    complete the analysis and store, process and report its results.
    """

    def __init__(self, task_id, error_queue):
        """@param task: task object containing the details for the analysis."""
        threading.Thread.__init__(self)

        self.errors = error_queue
        self.cfg = Config()
        self.storage = ""
        self.binary = ""
        self.storage_binary = ""
        self.machine = None
        self.db = Database()
        self.task = self.db.view_task(task_id)
        self.guest_manager = None
        self.route = None
        self.interface = None
        self.rt_table = None

    def init(self):
        """Initialize the analysis."""
        self.storage = cwd(analysis=self.task.id)

        # If the analysis storage folder already exists, we need to abort the
        # analysis or previous results will be overwritten and lost.
        if os.path.exists(self.storage):
            log.error("Analysis results folder already exists at path \"%s\", "
                      "analysis aborted", self.storage)
            return False

        # If we're not able to create the analysis storage folder, we have to
        # abort the analysis.
        try:
            Folders.create(self.storage)
        except CuckooOperationalError:
            log.error("Unable to create analysis folder %s", self.storage)
            return False

        self.store_task_info()

        if self.task.category == "file" or self.task.category == "archive":
            # Check if we have permissions to access the file.
            # And fail this analysis if we don't have access to the file.
            if not os.access(self.task.target, os.R_OK):
                log.error(
                    "Unable to access target file, please check if we have "
                    "permissions to access the file: \"%s\"",
                    self.task.target
                )
                return False

            # Check whether the file has been changed for some unknown reason.
            # And fail this analysis if it has been modified.
            # TODO Absorb the file upon submission.
            sample = self.db.view_sample(self.task.sample_id)
            sha256 = File(self.task.target).get_sha256()
            if sha256 != sample.sha256:
                log.error(
                    "Target file has been modified after submission: \"%s\"",
                    self.task.target
                )
                return False

            # Store a copy of the original file if does not exist already.
            # TODO This should be done at submission time.
            self.binary = cwd("storage", "binaries", sha256)
            if not os.path.exists(self.binary):
                try:
                    shutil.copy(self.task.target, self.binary)
                except (IOError, shutil.Error):
                    log.error(
                        "Unable to store file from \"%s\" to \"%s\", "
                        "analysis aborted", self.task.target, self.binary
                    )
                    return False

            # Each analysis directory contains a symlink/copy of the binary.
            try:
                self.storage_binary = os.path.join(self.storage, "binary")

                if hasattr(os, "symlink"):
                    os.symlink(self.binary, self.storage_binary)
                else:
                    shutil.copy(self.binary, self.storage_binary)
            except (AttributeError, OSError) as e:
                log.error("Unable to create symlink/copy from \"%s\" to "
                          "\"%s\": %s", self.binary, self.storage, e)
                return False

        # Initiates per-task logging.
        task_log_start(self.task.id)
        return True

    def store_task_info(self):
        """grab latest task from db (if available) and update self.task"""
        dbtask = self.db.view_task(self.task.id)
        self.task = dbtask.to_dict()

        task_info_path = os.path.join(self.storage, "task.json")
        open(task_info_path, "w").write(dbtask.to_json())

    def acquire_machine(self):
        """Acquire an analysis machine from the pool of available ones."""
        machine = None

        # Start a loop to acquire the a machine to run the analysis on.
        while True:
            machine_lock.acquire()

            # In some cases it's possible that we enter this loop without
            # having any available machines. We should make sure this is not
            # such case, or the analysis task will fail completely.
            if not machinery.availables():
                machine_lock.release()
                time.sleep(1)
                continue

            # If the user specified a specific machine ID, a platform to be
            # used or machine tags acquire the machine accordingly.
            machine = machinery.acquire(machine_id=self.task.machine,
                                        platform=self.task.platform,
                                        tags=self.task.tags)

            # If no machine is available at this moment, wait for one second
            # and try again.
            if not machine:
                machine_lock.release()
                log.debug("Task #%d: no machine available yet", self.task.id)
                time.sleep(1)
            else:
                log.info(
                    "Task #%d: acquired machine %s (label=%s)",
                    self.task.id, machine.name, machine.label, extra={
                        "action": "vm.acquire",
                        "status": "success",
                        "vmname": machine.name,
                    }
                )
                break

        self.machine = machine

    def build_options(self):
        """Generate analysis options.
        @return: options dict.
        """
        options = {}

        if self.task.category == "file":
            options["file_name"] = File(self.task.target).get_name()
            options["file_type"] = File(self.task.target).get_type()
            options["pe_exports"] = \
                ",".join(File(self.task.target).get_exported_functions())

            package, activity = File(self.task.target).get_apk_entry()
            self.task.options["apk_entry"] = "%s:%s" % (package, activity)
        elif self.task.category == "archive":
            options["file_name"] = File(self.task.target).get_name()

        options["id"] = self.task.id
        options["ip"] = self.machine.resultserver_ip
        options["port"] = self.machine.resultserver_port
        options["category"] = self.task.category
        options["target"] = self.task.target
        options["package"] = self.task.package
        options["options"] = emit_options(self.task.options)
        options["enforce_timeout"] = self.task.enforce_timeout
        options["clock"] = self.task.clock
        options["terminate_processes"] = self.cfg.cuckoo.terminate_processes

        if not self.task.timeout:
            options["timeout"] = self.cfg.timeouts.default
        else:
            options["timeout"] = self.task.timeout

        # copy in other analyzer specific options, TEMPORARY (most likely)
        vm_options = getattr(machinery.options, self.machine.name)
        for k in vm_options:
            if k.startswith("analyzer_"):
                options[k] = vm_options[k]

        return options

    def route_network(self):
        """Enable network routing if desired."""
        # Determine the desired routing strategy (none, internet, VPN).
        self.route = self.task.options.get(
            "route", config("routing:routing:route")
        )

        if self.route == "none" or self.route == "drop":
            self.interface = None
            self.rt_table = None
        elif self.route == "inetsim":
            pass
        elif self.route == "tor":
            pass
        elif self.route == "internet":
            if config("routing:routing:internet") == "none":
                log.warning(
                    "Internet network routing has been specified, but not "
                    "configured, ignoring routing for this analysis", extra={
                        "action": "network.route",
                        "status": "error",
                        "route": self.route,
                    }
                )
                self.route = "none"
                self.task.options["route"] = "none"
                self.interface = None
                self.rt_table = None
            else:
                self.interface = config("routing:routing:internet")
                self.rt_table = config("routing:routing:rt_table")
        elif self.route in config("routing:vpn:vpns"):
            self.interface = config("routing:%s:interface" % self.route)
            self.rt_table = config("routing:%s:rt_table" % self.route)
        else:
            log.warning(
                "Unknown network routing destination specified, ignoring "
                "routing for this analysis: %r", self.route, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # Check if the network interface is still available. If a VPN dies for
        # some reason, its tunX interface will no longer be available.
        if self.interface and not rooter("nic_available", self.interface):
            log.error(
                "The network interface '%s' configured for this analysis is "
                "not available at the moment, switching to route=none mode.",
                self.interface, extra={
                    "action": "network.route",
                    "status": "error",
                    "route": self.route,
                }
            )
            self.route = "none"
            self.task.options["route"] = "none"
            self.interface = None
            self.rt_table = None

        # For now this doesn't work yet in combination with tor routing.
        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_enable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_enable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port")),
                config("routing:inetsim:ports") or ""
            )

        if self.route == "tor":
            rooter(
                "tor_enable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

        if self.interface:
            rooter(
                "forward_enable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_enable", self.rt_table, self.machine.ip
            )

        # Propagate the taken route to the database.
        self.db.set_route(self.task.id, self.route)

    def unroute_network(self):
        """Disable any enabled network routing."""
        if self.interface:
            rooter(
                "forward_disable", self.machine.interface,
                self.interface, self.machine.ip
            )

        if self.rt_table:
            rooter(
                "srcroute_disable", self.rt_table, self.machine.ip
            )

        if self.route == "drop" or self.route == "internet":
            rooter(
                "drop_disable", self.machine.ip,
                config("cuckoo:resultserver:ip"),
                str(config("cuckoo:resultserver:port"))
            )

        if self.route == "inetsim":
            machinery = config("cuckoo:cuckoo:machinery")
            rooter(
                "inetsim_disable", self.machine.ip,
                config("routing:inetsim:server"),
                config("%s:%s:interface" % (machinery, machinery)),
                str(config("cuckoo:resultserver:port")),
                config("routing:inetsim:ports") or ""
            )

        if self.route == "tor":
            rooter(
                "tor_disable", self.machine.ip,
                str(config("cuckoo:resultserver:ip")),
                str(config("routing:tor:dnsport")),
                str(config("routing:tor:proxyport"))
            )

    def wait_finish(self):
        """Some VMs don't have an actual agent. Mainly those that are used as
        assistance for an analysis through the services auxiliary module. This
        method just waits until the analysis is finished rather than actively
        trying to engage with the Cuckoo Agent."""
        self.db.guest_set_status(self.task.id, "running")
        while self.db.guest_get_status(self.task.id) == "running":
            time.sleep(1)

    def guest_manage(self, options):
        # Handle a special case where we're creating a baseline report of this
        # particular virtual machine - a report containing all the results
        # that are gathered if no additional samples are ran in the VM. These
        # results, such as loaded drivers and opened sockets in volatility, or
        # DNS requests to hostnames related to Microsoft Windows, etc may be
        # omitted or at the very least given less priority when creating a
        # report for an analysis that ran on this VM later on.
        if self.task.category == "baseline":
            time.sleep(options["timeout"])
        else:
            # Start the analysis.
            self.db.guest_set_status(self.task.id, "starting")
            monitor = self.task.options.get("monitor", "latest")
            self.guest_manager.start_analysis(options, monitor)

            # In case the Agent didn't respond and we force-quit the analysis
            # at some point while it was still starting the analysis the state
            # will be "stop" (or anything but "running", really).
            if self.db.guest_get_status(self.task.id) == "starting":
                self.db.guest_set_status(self.task.id, "running")
                self.guest_manager.wait_for_completion()

            self.db.guest_set_status(self.task.id, "stopping")

    def launch_analysis(self):
        """Start analysis."""
        succeeded = False

        if self.task.category == "file" or self.task.category == "archive":
            target = os.path.basename(self.task.target)
        else:
            target = self.task.target

        log.info(
            "Starting analysis of %s \"%s\" (task #%d, options \"%s\")",
            self.task.category.upper(), target, self.task.id,
            emit_options(self.task.options), extra={
                "action": "task.init",
                "status": "starting",
                "task_id": self.task.id,
                "target": target,
                "category": self.task.category,
                "package": self.task.package,
                "options": emit_options(self.task.options),
                "custom": self.task.custom,
            }
        )

        # Initialize the analysis.
        if not self.init():
            logger("Failed to initialize", action="task.init", status="error")
            return False

        # Acquire analysis machine.
        try:
            self.acquire_machine()
        except CuckooOperationalError as e:
            machine_lock.release()
            log.error("Cannot acquire machine: %s", e, extra={
                "action": "vm.acquire", "status": "error",
            })
            return False

        # At this point we can tell the ResultServer about it.
        try:
            ResultServer().add_task(self.task, self.machine)
        except Exception as e:
            machinery.release(self.machine.label)
            self.errors.put(e)

        # Initialize the guest manager.
        self.guest_manager = GuestManager(
            self.machine.name, self.machine.ip,
            self.machine.platform, self.task.id, self
        )

        self.aux = RunAuxiliary(self.task, self.machine, self.guest_manager)
        self.aux.start()

        # Generate the analysis configuration file.
        options = self.build_options()

        # Check if the current task has remotecontrol
        # enabled before starting the machine.
        control_enabled = (
            config("cuckoo:remotecontrol:enabled") and
            "remotecontrol" in self.task.options
        )
        if control_enabled:
            try:
                machinery.enable_remote_control(self.machine.label)
            except NotImplementedError:
                raise CuckooMachineError(
                    "Remote control support has not been implemented "
                    "for this machinery."
                )

        try:
            unlocked = False
            self.interface = None

            # Mark the selected analysis machine in the database as started.
            guest_log = self.db.guest_start(self.task.id,
                                            self.machine.name,
                                            self.machine.label,
                                            machinery.__class__.__name__)
            logger(
                "Starting VM",
                action="vm.start", status="pending",
                vmname=self.machine.name
            )

            # Start the machine.
            machinery.start(self.machine.label, self.task)

            logger(
                "Started VM",
                action="vm.start", status="success",
                vmname=self.machine.name
            )

            # retrieve the port used for remote control
            if control_enabled:
                try:
                    params = machinery.get_remote_control_params(
                        self.machine.label
                    )
                    self.db.set_machine_rcparams(self.machine.label, params)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Enable network routing.
            self.route_network()

            # By the time start returns it will have fully started the Virtual
            # Machine. We can now safely release the machine lock.
            machine_lock.release()
            unlocked = True

            # Run and manage the components inside the guest unless this
            # machine has the "noagent" option specified (please refer to the
            # wait_finish() function for more details on this function).
            if "noagent" not in self.machine.options:
                self.guest_manage(options)
            else:
                self.wait_finish()

            succeeded = True
        except CuckooMachineSnapshotError as e:
            log.error(
                "Unable to restore to the snapshot for this Virtual Machine! "
                "Does your VM have a proper Snapshot and can you revert to it "
                "manually? VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.resume",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooMachineError as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error starting Virtual Machine! VM: %s, error: %s",
                self.machine.name, e, extra={
                    "action": "vm.start",
                    "status": "error",
                    "vmname": self.machine.name,
                }
            )
        except CuckooGuestCriticalTimeout as e:
            if not unlocked:
                machine_lock.release()
            log.error(
                "Error from machine '%s': it appears that this Virtual "
                "Machine hasn't been configured properly as the Cuckoo Host "
                "wasn't able to connect to the Guest. There could be a few "
                "reasons for this, please refer to our documentation on the "
                "matter: %s",
                self.machine.name,
                faq("troubleshooting-vm-network-configuration"),
                extra={
                    "error_action": "vmrouting",
                    "action": "guest.handle",
                    "status": "error",
                    "task_id": self.task.id,
                }
            )
        except CuckooGuestError as e:
            if not unlocked:
                machine_lock.release()
            log.error("Error from the Cuckoo Guest: %s", e, extra={
                "action": "guest.handle",
                "status": "error",
                "task_id": self.task.id,
            })
        finally:
            # Stop Auxiliary modules.
            self.aux.stop()

            # Take a memory dump of the machine before shutting it off.
            if self.cfg.cuckoo.memory_dump or self.task.memory:
                logger(
                    "Taking full memory dump",
                    action="vm.memdump", status="pending",
                    vmname=self.machine.name
                )
                try:
                    dump_path = os.path.join(self.storage, "memory.dmp")
                    machinery.dump_memory(self.machine.label, dump_path)

                    logger(
                        "Taken full memory dump",
                        action="vm.memdump", status="success",
                        vmname=self.machine.name
                    )
                except NotImplementedError:
                    log.error(
                        "The memory dump functionality is not available for "
                        "the current machine manager.", extra={
                            "action": "vm.memdump",
                            "status": "error",
                            "vmname": self.machine.name,
                        }
                    )
                except CuckooMachineError as e:
                    log.error("Machinery error: %s", e, extra={
                        "action": "vm.memdump",
                        "status": "error",
                    })

            logger(
                "Stopping VM",
                action="vm.stop", status="pending",
                vmname=self.machine.name
            )

            try:
                # Stop the analysis machine.
                machinery.stop(self.machine.label)
            except CuckooMachineError as e:
                log.warning(
                    "Unable to stop machine %s: %s",
                    self.machine.label, e, extra={
                        "action": "vm.stop",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

            logger(
                "Stopped VM",
                action="vm.stop", status="success",
                vmname=self.machine.name
            )

            # Disable remote control after stopping the machine
            # if it was enabled for the task.
            if control_enabled:
                try:
                    machinery.disable_remote_control(self.machine.label)
                except NotImplementedError:
                    raise CuckooMachineError(
                        "Remote control support has not been implemented "
                        "for this machinery."
                    )

            # Mark the machine in the database as stopped. Unless this machine
            # has been marked as dead, we just keep it as "started" in the
            # database so it'll not be used later on in this session.
            self.db.guest_stop(guest_log)

            # After all this, we can make the ResultServer forget about the
            # internal state for this analysis task.
            ResultServer().del_task(self.task, self.machine)

            # Drop the network routing rules if any.
            self.unroute_network()

            try:
                # Release the analysis machine. But only if the machine has
                # not turned dead yet.
                machinery.release(self.machine.label)
            except CuckooMachineError as e:
                log.error(
                    "Unable to release machine %s, reason %s. You might need "
                    "to restore it manually.", self.machine.label, e, extra={
                        "action": "vm.release",
                        "status": "error",
                        "vmname": self.machine.name,
                    }
                )

        return succeeded

    def process_results(self):
        """Process the analysis results and generate the enabled reports."""
        logger(
            "Starting task reporting",
            action="task.report", status="pending"
        )

        # TODO Refactor this function as currently "cuckoo process" has a 1:1
        # copy of its code. TODO Also remove "archive" files.
        results = RunProcessing(task=self.task).run()
        RunSignatures(results=results).run()
        RunReporting(task=self.task, results=results).run()

        # If the target is a file and the user enabled the option,
        # delete the original copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_original:
            if not os.path.exists(self.task.target):
                log.warning("Original file does not exist anymore: \"%s\": "
                            "File not found.", self.task.target)
            else:
                try:
                    os.remove(self.task.target)
                except OSError as e:
                    log.error("Unable to delete original file at path "
                              "\"%s\": %s", self.task.target, e)

        # If the target is a file and the user enabled the delete copy of
        # the binary option, then delete the copy.
        if self.task.category == "file" and self.cfg.cuckoo.delete_bin_copy:
            if not os.path.exists(self.binary):
                log.warning("Copy of the original file does not exist anymore: \"%s\": File not found", self.binary)
            else:
                try:
                    os.remove(self.binary)
                except OSError as e:
                    log.error("Unable to delete the copy of the original file at path \"%s\": %s", self.binary, e)
            # Check if the binary in the analysis directory is an invalid symlink. If it is, delete it.
            if os.path.islink(self.storage_binary) and not os.path.exists(self.storage_binary):
                try:
                    os.remove(self.storage_binary)
                except OSError as e:
                    log.error("Unable to delete symlink to the binary copy at path \"%s\": %s", self.storage_binary, e)

        log.info(
            "Task #%d: reports generation completed",
            self.task.id, extra={
                "action": "task.report",
                "status": "success",
            }
        )

        return True

    def run(self):
        """Run manager thread."""
        global active_analysis_count
        active_analysis_count += 1
        try:
            self.launch_analysis()

            log.debug("Released database task #%d", self.task.id)

            if self.cfg.cuckoo.process_results:
                self.store_task_info()
                self.db.set_status(self.task.id, TASK_COMPLETED)
                # TODO If self.process_results() is unified with apps.py's
                # process() method, then ensure that TASK_FAILED_PROCESSING is
                # handled correctly and not overwritten by the db.set_status()
                # at the end of this method.
                self.process_results()

            # We make a symbolic link ("latest") which links to the latest
            # analysis - this is useful for debugging purposes. This is only
            # supported under systems that support symbolic links.
            if hasattr(os, "symlink"):
                latest = cwd("storage", "analyses", "latest")

                # First we have to remove the existing symbolic link, then we
                # have to create the new one.
                # Deal with race conditions using a lock.
                latest_symlink_lock.acquire()
                try:
                    # As per documentation, lexists() returns True for dead
                    # symbolic links.
                    if os.path.lexists(latest):
                        os.remove(latest)

                    os.symlink(self.storage, latest)
                except OSError as e:
                    log.warning("Error pointing latest analysis symlink: %s" % e)
                finally:
                    latest_symlink_lock.release()

            # overwrite task.json so we have the latest data inside
            self.store_task_info()
            log.info(
                "Task #%d: analysis procedure completed",
                self.task.id, extra={
                    "action": "task.stop",
                    "status": "success",
                }
            )
        except:
            log.exception("Failure in AnalysisManager.run", extra={
                "action": "task.stop",
                "status": "error",
            })
        finally:
            if self.cfg.cuckoo.process_results:
                self.db.set_status(self.task.id, TASK_REPORTED)
            else:
                self.db.set_status(self.task.id, TASK_COMPLETED)
            task_log_stop(self.task.id)
            active_analysis_count -= 1
Esempio n. 5
0
class DatabaseEngine(object):
    """Tests database stuff."""
    URI = None

    def setup_class(self):
        set_cwd(tempfile.mkdtemp())

        self.d = Database()
        self.d.connect(dsn=self.URI)

    def add_url(self, url, priority=1, status="pending"):
        task_id = self.d.add_url(url, priority=priority)
        self.d.set_status(task_id, status)
        return task_id

    def test_add_tasks(self):
        fd, sample_path = tempfile.mkstemp()
        os.write(fd, "hehe")
        os.close(fd)

        # Add task.
        count = self.d.Session().query(Task).count()
        self.d.add_path(sample_path)
        assert self.d.Session().query(Task).count() == count + 1

        # Add url.
        self.d.add_url("http://foo.bar")
        assert self.d.Session().query(Task).count() == count + 2

    def test_processing_get_task(self):
        # First reset all existing rows so that earlier exceptions don't affect
        # this unit test run.
        null, session = None, self.d.Session()

        session.query(Task).filter(
            Task.status == "completed", Task.processing == null
        ).update({
            "processing": "something",
        })
        session.commit()

        t1 = self.add_url("http://google.com/1", priority=1, status="completed")
        t2 = self.add_url("http://google.com/2", priority=2, status="completed")
        t3 = self.add_url("http://google.com/3", priority=1, status="completed")
        t4 = self.add_url("http://google.com/4", priority=1, status="completed")
        t5 = self.add_url("http://google.com/5", priority=3, status="completed")
        t6 = self.add_url("http://google.com/6", priority=1, status="completed")
        t7 = self.add_url("http://google.com/7", priority=1, status="completed")

        assert self.d.processing_get_task("foo") == t5
        assert self.d.processing_get_task("foo") == t2
        assert self.d.processing_get_task("foo") == t1
        assert self.d.processing_get_task("foo") == t3
        assert self.d.processing_get_task("foo") == t4
        assert self.d.processing_get_task("foo") == t6
        assert self.d.processing_get_task("foo") == t7
        assert self.d.processing_get_task("foo") is None

    def test_error_exists(self):
        task_id = self.add_url("http://google.com/")
        self.d.add_error("A"*1024, task_id)
        assert len(self.d.view_errors(task_id)) == 1
        self.d.add_error("A"*1024, task_id)
        assert len(self.d.view_errors(task_id)) == 2

    def test_long_error(self):
        self.add_url("http://google.com/")
        self.d.add_error("A"*1024, 1)
        err = self.d.view_errors(1)
        assert err and len(err[0].message) == 1024

    def test_submit(self):
        dirpath = tempfile.mkdtemp()
        submit_id = self.d.add_submit(dirpath, "files", {
            "foo": "bar",
        })
        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        assert submit.tmp_path == dirpath
        assert submit.submit_type == "files"
        assert submit.data == {
            "foo": "bar",
        }

    def test_connect_no_create(self):
        AlembicVersion.__table__.drop(self.d.engine)
        self.d.connect(dsn=self.URI, create=False)
        assert "alembic_version" not in self.d.engine.table_names()
        self.d.connect(dsn=self.URI)
        assert "alembic_version" in self.d.engine.table_names()

    def test_view_submit_tasks(self):
        submit_id = self.d.add_submit(None, None, None)
        t1 = self.d.add_path(__file__, custom="1", submit_id=submit_id)
        t2 = self.d.add_path(__file__, custom="2", submit_id=submit_id)

        submit = self.d.view_submit(submit_id)
        assert submit.id == submit_id
        with pytest.raises(DetachedInstanceError):
            print submit.tasks

        submit = self.d.view_submit(submit_id, tasks=True)
        assert len(submit.tasks) == 2
        tasks = sorted((task.id, task) for task in submit.tasks)
        assert tasks[0][1].id == t1
        assert tasks[0][1].custom == "1"
        assert tasks[1][1].id == t2
        assert tasks[1][1].custom == "2"

    def test_add_reboot(self):
        t0 = self.d.add_path(__file__)
        s0 = self.d.add_submit(None, None, None)
        t1 = self.d.add_reboot(task_id=t0, submit_id=s0)

        t = self.d.view_task(t1)
        assert t.custom == "%s" % t0
        assert t.submit_id == s0

    def test_task_set_options(self):
        t0 = self.d.add_path(__file__, options={"foo": "bar"})
        t1 = self.d.add_path(__file__, options="foo=bar")
        assert self.d.view_task(t0).options == {"foo": "bar"}
        assert self.d.view_task(t1).options == {"foo": "bar"}

    def test_task_tags_str(self):
        task = self.d.add_path(__file__, tags="foo,,bar")
        tag0, tag1 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name)) == ["bar", "foo"]

    def test_task_tags_list(self):
        task = self.d.add_path(__file__, tags=["tag1", "tag2", "", 1, "tag3"])
        tag0, tag1, tag2 = self.d.view_task(task).tags
        assert sorted((tag0.name, tag1.name, tag2.name)) == [
            "tag1", "tag2", "tag3"
        ]

    def test_error_action(self):
        task_id = self.d.add_path(__file__)
        self.d.add_error("message1", task_id)
        self.d.add_error("message2", task_id, "actionhere")
        e1, e2 = self.d.view_errors(task_id)
        assert e1.message == "message1"
        assert e1.action is None
        assert e2.message == "message2"
        assert e2.action == "actionhere"

    def test_view_tasks(self):
        t1 = self.d.add_path(__file__)
        t2 = self.d.add_url("http://google.com/")
        tasks = self.d.view_tasks([t1, t2])
        assert tasks[0].to_dict() == self.d.view_task(t1).to_dict()
        assert tasks[1].to_dict() == self.d.view_task(t2).to_dict()

    def test_add_machine(self):
        self.d.add_machine(
            "name1", "label", "1.2.3.4", "windows", None,
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name2", "label", "1.2.3.4", "windows", "",
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name3", "label", "1.2.3.4", "windows", "opt1 opt2",
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        self.d.add_machine(
            "name4", "label", "1.2.3.4", "windows", ["opt3", "opt4"],
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )
        m1 = self.d.view_machine("name1")
        m2 = self.d.view_machine("name2")
        m3 = self.d.view_machine("name3")
        m4 = self.d.view_machine("name4")
        assert m1.options == []
        assert m2.options == []
        assert m3.options == ["opt1", "opt2"]
        assert m4.options == ["opt3", "opt4"]

    def test_set_machine_rcparams(self):
        self.d.add_machine(
            "name5", "label5", "1.2.3.4", "windows", None,
            "tag1 tag2", "int0", "snap0", "5.6.7.8", 2043
        )

        self.d.set_machine_rcparams("label5", {
            "protocol": "rdp",
            "host": "127.0.0.1",
            "port": 3389,
        })

        m = self.d.view_machine("name5")
        assert m.rcparams == {
            "protocol": "rdp",
            "host": "127.0.0.1",
            "port": "3389",
        }

    @mock.patch("cuckoo.common.objects.magic")
    def test_add_sample(self, p):
        p.from_file.return_value = ""
        assert self.d.add_path(Files.temp_put(os.urandom(16))) is not None