def test_cuckoo_api():
    """Test Distributed Cuckoo's interaction with the Cuckoo API."""
    with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps:
        get(rsps, "/machines/list", json={"machines": "foo"})
        assert api.list_machines("http://localhost") == "foo"

        get(rsps, ":80/cuckoo/status", json={"a": "b"})
        assert api.node_status("http://localhost:80") == {"a": "b"}

        get(rsps, ":8080/cuckoo/status", body="TIMEOUT", status=500)
        assert api.node_status("http://localhost:8080") is None

        get(rsps, "/cuckoo/status", body=requests.ConnectionError("foo"))
        assert api.node_status("http://localhost") is None

        filepath = tempfile.mktemp()
        open(filepath, "wb").write("hello")

        d = {
            "filename": "bar.exe",
            "path": filepath,
            "package": None,
            "timeout": None,
            "priority": None,
            "options": None,
            "machine": None,
            "platform": None,
            "tags": None,
            "custom": None,
            "owner": None,
            "memory": None,
            "clock": None,
            "enforce_timeout": None,
        }

        post(rsps, ":80/tasks/create/file", json={"task_id": 12345})
        assert api.submit_task("http://localhost:80", d) == 12345

        post(rsps,
             ":8080/tasks/create/file",
             body=requests.ConnectionError("a"))
        assert api.submit_task("http://localhost:8080", d) is None

        get(rsps, "/tasks/list/100", json={"tasks": ["foo"]})
        assert api.fetch_tasks("http://localhost", "finished", 100) == ["foo"]

        get(rsps, "/tasks/report/1/json", body="A" * 1024 * 1024 * 8)
        dirpath = tempfile.mkdtemp()
        r = api.store_report("http://localhost", 1, "json", dirpath)
        assert r == (1, "json")
        buf = open(os.path.join(dirpath, "report.json"), "rb").read()
        assert buf == "A" * 1024 * 1024 * 8

        get(rsps, "/tasks/delete/42")
        assert api.delete_task("http://localhost", 42)

        get(rsps, "/pcap/get/123", body="A" * 1024)
        filepath = tempfile.mktemp()
        assert api.fetch_pcap("http://localhost", 123, filepath) is None
        assert open(filepath, "rb").read() == "A" * 1024
Esempio n. 2
0
def test_cuckoo_api():
    """Test Distributed Cuckoo's interaction with the Cuckoo API."""
    with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps:
        get(rsps, "/machines/list", json={"machines": "foo"})
        assert api.list_machines("http://localhost") == "foo"

        get(rsps, "/cuckoo/status", json={"a": "b"})
        assert api.node_status("http://localhost") == {"a": "b"}

        get(rsps, "/cuckoo/status", body="TIMEOUT", status=500)
        assert api.node_status("http://localhost") is None

        get(rsps, "/cuckoo/status", body=requests.ConnectionError("foo"))
        assert api.node_status("http://localhost") is None

        filepath = tempfile.mktemp()
        open(filepath, "wb").write("hello")

        d = {
            "filename": "bar.exe",
            "path": filepath,
            "package": None,
            "timeout": None,
            "priority": None,
            "options": None,
            "machine": None,
            "platform": None,
            "tags": None,
            "custom": None,
            "owner": None,
            "memory": None,
            "clock": None,
            "enforce_timeout": None,
        }

        post(rsps, "/tasks/create/file", json={"task_id": 12345})
        assert api.submit_task("http://localhost", d) == 12345

        post(rsps, "/tasks/create/file", body=requests.ConnectionError("a"))
        assert api.submit_task("http://localhost", d) is None

        get(rsps, "/tasks/list/100", json={"tasks": ["foo"]})
        assert api.fetch_tasks("http://localhost", "finished", 100) == ["foo"]

        get(rsps, "/tasks/report/1/json", body="A"*1024*1024*8)
        dirpath = tempfile.mkdtemp()
        r = api.store_report("http://localhost", 1, "json", dirpath)
        assert r == (1, "json")
        buf = open(os.path.join(dirpath, "report.json"), "rb").read()
        assert buf == "A"*1024*1024*8

        get(rsps, "/tasks/delete/42")
        assert api.delete_task("http://localhost", 42)

        get(rsps, "/pcap/get/123", body="A"*1024)
        filepath = tempfile.mktemp()
        assert api.fetch_pcap("http://localhost", 123, filepath) is None
        assert open(filepath, "rb").read() == "A"*1024
Esempio n. 3
0
def scheduler():
    while True:
        nodes = Node.query.filter_by(enabled=True, mode="normal").all()
        random.shuffle(nodes)
        for node in nodes:
            # Check how many tasks have already been assigned for this node.
            q = Task.query.filter_by(status=Task.ASSIGNED, node_id=node.id)
            if q.count() >= settings.threshold:
                continue

            # Fetch the status of this node.
            status = node_status(node.url)
            if not status:
                log.debug("Error retrieving status of node %s", node.name)
                time.sleep(settings.interval)
                continue

            # Check whether this node still has enough samples to work with.
            if status["tasks"]["pending"] >= settings.threshold:
                continue

            # Schedule new samples for this node.
            q = Task.query.filter_by(status=Task.PENDING)
            tasks = q.limit(settings.threshold).all()
            for task in tasks:
                task.assign_node(node.id)

            if tasks:
                log.debug("Assigned %d tasks to %s", len(tasks), node.name)

            db.session.commit()

        time.sleep(10)
Esempio n. 4
0
def scheduler():
    while True:
        for node in Node.query.filter_by(enabled=True, mode="normal").all():
            # Check how many tasks have already been assigned for this node.
            q = Task.query.filter_by(status=Task.ASSIGNED, node_id=node.id)
            if q.count() >= settings.threshold:
                continue

            # Fetch the status of this node.
            status = node_status(node.url)
            if not status:
                log.debug("Error retrieving status of node %s", node.name)
                time.sleep(settings.interval)
                continue

            # Check whether this node still has enough samples to work with.
            if status["tasks"]["pending"] >= settings.threshold:
                continue

            # Schedule new samples for this node.
            q = Task.query.filter_by(status=Task.PENDING)
            tasks = q.limit(settings.threshold).all()
            for task in tasks:
                task.status = Task.ASSIGNED
                task.node_id = node.id

            if tasks:
                log.debug("Assigned %d tasks to %s", len(tasks), node.name)

            db.session.commit()

        time.sleep(10)
Esempio n. 5
0
def handle_node(instance):
    node = Node.query.filter_by(name=instance).first()
    if not node:
        log.critical("Node not found: %s", instance)
        return

    while True:
        # Fetch the status of this node.
        status = node_status(node.url)
        if not status:
            log.debug("Error retrieving status of node %s", node.name)
            time.sleep(settings.interval)
            continue

        # Include the timestamp of when we retrieved this status.
        status["timestamp"] = int(time.time())

        # Add this node status to the database for monitoring purposes.
        ns = NodeStatus(node.name, datetime.datetime.now(), status)
        db.session.add(ns)
        db.session.commit()

        # Submission of new tasks.
        if status["tasks"]["pending"] < settings.threshold:
            q = Task.query.filter_by(node_id=node.id, status=Task.ASSIGNED)
            q = q.order_by(Task.priority.desc(), Task.id)
            tasks = q.limit(settings.threshold).all()
            for t in tasks:
                task_id = submit_task(node.url, t.to_dict())
                if not task_id:
                    continue

                t.task_id = task_id
                t.status = Task.PROCESSING
                t.delegated = datetime.datetime.now()

            log.debug("Submitted %d tasks to %s", len(tasks), node.name)
            db.session.commit()

        # Fetching of reports.
        tasks = fetch_tasks(node.url, "reported", settings.threshold)
        for task in tasks:
            # The node_id/task_id tuple isn't necessarily unique, therefore
            # also filter on the status. Just in case older analyses didn't
            # finish, we request the last one.
            t = Task.query.filter_by(node_id=node.id,
                                     task_id=task["id"],
                                     status=Task.PROCESSING).order_by(
                                         Task.id.desc()).first()

            if t is None:
                log.debug("Node %s task #%d has not been submitted "
                          "by us!", instance, task["id"])

                # Should we delete this task? Improve through the usage of
                # the "owner" parameter. TODO Reintroduce.
                # delete_task(node.url, task["id"])
                continue

            dirpath = os.path.join(settings.reports_directory, "%d" % t.id)
            if not os.path.isdir(dirpath):
                os.makedirs(dirpath)

            # Fetch each report.
            for report_format in settings.report_formats:
                try:
                    store_report(node.url, t.task_id, report_format, dirpath)
                except InvalidReport as e:
                    log.critical(
                        "Error fetching report for task #%d (%s.%d): %s", t.id,
                        node.name, t.task_id, e)

            # Fetch the pcap file.
            if settings.pcap:
                pcap_path = os.path.join(dirpath, "dump.pcap")
                try:
                    fetch_pcap(node.url, t.task_id, pcap_path)
                except InvalidPcap as e:
                    log.critical(
                        "Error fetching pcap for task #%d (%s.%d): %s", t.id,
                        node.name, t.task_id, e)

            # Delete the task and all its associated files from the
            # Cuckoo node.
            delete_task(node.url, t.task_id)

            t.status = Task.FINISHED
            t.started = datetime.datetime.strptime(task["started_on"],
                                                   "%Y-%m-%d %H:%M:%S")
            t.completed = datetime.datetime.now()

        log.debug("Fetched %d reports from %s", len(tasks), node.name)

        db.session.commit()
        time.sleep(settings.interval)
Esempio n. 6
0
def handle_node(instance):
    node = Node.query.filter_by(name=instance).first()
    if not node:
        log.critical("Node not found: %s", instance)
        return

    while True:
        # Fetch the status of this node.
        status = node_status(node.url)
        if not status:
            log.debug("Error retrieving status of node %s", node.name)
            time.sleep(settings.interval)
            continue

        # Include the timestamp of when we retrieved this status.
        status["timestamp"] = int(time.time())

        # Add this node status to the database for monitoring purposes.
        ns = NodeStatus(node.name, datetime.datetime.now(), status)
        db.session.add(ns)
        db.session.commit()

        # Submission of new tasks.
        if status["tasks"]["pending"] < settings.threshold:
            q = Task.query.filter_by(node_id=node.id, status=Task.ASSIGNED)
            q = q.order_by(Task.priority.desc(), Task.id)
            tasks = q.limit(settings.threshold).all()
            for t in tasks:
                task_id = submit_task(node.url, t.to_dict())
                if not task_id:
                    continue

                t.task_id = task_id
                t.status = Task.PROCESSING
                t.delegated = datetime.datetime.now()

            log.debug("Submitted %d tasks to %s", len(tasks), node.name)
            db.session.commit()

        # Fetching of reports.
        tasks = fetch_tasks(node.url, "reported", settings.threshold)
        for task in tasks:
            # In the case that a Cuckoo node has been reset over time it's
            # possible that there are multiple combinations of
            # node-id/task-id, in this case we take the last one available.
            # (This makes it possible to re-setup a Cuckoo node).
            q = Task.query.filter_by(node_id=node.id, task_id=task["id"])
            t = q.order_by(Task.id.desc()).first()

            if t is None:
                log.debug("Node %s task #%d has not been submitted "
                          "by us!", instance, task["id"])

                # Should we delete this task? Improve through the usage of
                # the "owner" parameter.
                delete_task(node.url, task["id"])
                continue

            dirpath = os.path.join(settings.reports_directory, "%d" % t.id)
            if not os.path.isdir(dirpath):
                os.makedirs(dirpath)

            # Fetch each report.
            for report_format in settings.report_formats:
                try:
                    store_report(node.url, t.task_id, report_format, dirpath)
                except InvalidReport as e:
                    log.critical(
                        "Error fetching report for task #%d (%s.%d): %s",
                        t.id, node.name, t.task_id, e
                    )

            # Fetch the pcap file.
            if settings.pcap:
                pcap_path = os.path.join(dirpath, "dump.pcap")
                try:
                    fetch_pcap(node.url, t.task_id, pcap_path)
                except InvalidPcap as e:
                    log.critical(
                        "Error fetching pcap for task #%d (%s.%d): %s",
                        t.id, node.name, t.task_id, e
                    )

            # Delete the task and all its associated files from the
            # Cuckoo node.
            delete_task(node.url, t.task_id)

            t.status = Task.FINISHED
            t.started = datetime.datetime.strptime(task["started_on"],
                                                   "%Y-%m-%d %H:%M:%S")
            t.completed = datetime.datetime.now()

        log.debug("Fetched %d reports from %s", len(tasks), node.name)

        db.session.commit()
        time.sleep(settings.interval)