def test_cuckoo_api(): """Test Distributed Cuckoo's interaction with the Cuckoo API.""" with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: get(rsps, "/machines/list", json={"machines": "foo"}) assert api.list_machines("http://localhost") == "foo" get(rsps, ":80/cuckoo/status", json={"a": "b"}) assert api.node_status("http://localhost:80") == {"a": "b"} get(rsps, ":8080/cuckoo/status", body="TIMEOUT", status=500) assert api.node_status("http://localhost:8080") is None get(rsps, "/cuckoo/status", body=requests.ConnectionError("foo")) assert api.node_status("http://localhost") is None filepath = tempfile.mktemp() open(filepath, "wb").write("hello") d = { "filename": "bar.exe", "path": filepath, "package": None, "timeout": None, "priority": None, "options": None, "machine": None, "platform": None, "tags": None, "custom": None, "owner": None, "memory": None, "clock": None, "enforce_timeout": None, } post(rsps, ":80/tasks/create/file", json={"task_id": 12345}) assert api.submit_task("http://localhost:80", d) == 12345 post(rsps, ":8080/tasks/create/file", body=requests.ConnectionError("a")) assert api.submit_task("http://localhost:8080", d) is None get(rsps, "/tasks/list/100", json={"tasks": ["foo"]}) assert api.fetch_tasks("http://localhost", "finished", 100) == ["foo"] get(rsps, "/tasks/report/1/json", body="A" * 1024 * 1024 * 8) dirpath = tempfile.mkdtemp() r = api.store_report("http://localhost", 1, "json", dirpath) assert r == (1, "json") buf = open(os.path.join(dirpath, "report.json"), "rb").read() assert buf == "A" * 1024 * 1024 * 8 get(rsps, "/tasks/delete/42") assert api.delete_task("http://localhost", 42) get(rsps, "/pcap/get/123", body="A" * 1024) filepath = tempfile.mktemp() assert api.fetch_pcap("http://localhost", 123, filepath) is None assert open(filepath, "rb").read() == "A" * 1024
def test_cuckoo_api(): """Test Distributed Cuckoo's interaction with the Cuckoo API.""" with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: get(rsps, "/machines/list", json={"machines": "foo"}) assert api.list_machines("http://localhost") == "foo" get(rsps, "/cuckoo/status", json={"a": "b"}) assert api.node_status("http://localhost") == {"a": "b"} get(rsps, "/cuckoo/status", body="TIMEOUT", status=500) assert api.node_status("http://localhost") is None get(rsps, "/cuckoo/status", body=requests.ConnectionError("foo")) assert api.node_status("http://localhost") is None filepath = tempfile.mktemp() open(filepath, "wb").write("hello") d = { "filename": "bar.exe", "path": filepath, "package": None, "timeout": None, "priority": None, "options": None, "machine": None, "platform": None, "tags": None, "custom": None, "owner": None, "memory": None, "clock": None, "enforce_timeout": None, } post(rsps, "/tasks/create/file", json={"task_id": 12345}) assert api.submit_task("http://localhost", d) == 12345 post(rsps, "/tasks/create/file", body=requests.ConnectionError("a")) assert api.submit_task("http://localhost", d) is None get(rsps, "/tasks/list/100", json={"tasks": ["foo"]}) assert api.fetch_tasks("http://localhost", "finished", 100) == ["foo"] get(rsps, "/tasks/report/1/json", body="A"*1024*1024*8) dirpath = tempfile.mkdtemp() r = api.store_report("http://localhost", 1, "json", dirpath) assert r == (1, "json") buf = open(os.path.join(dirpath, "report.json"), "rb").read() assert buf == "A"*1024*1024*8 get(rsps, "/tasks/delete/42") assert api.delete_task("http://localhost", 42) get(rsps, "/pcap/get/123", body="A"*1024) filepath = tempfile.mktemp() assert api.fetch_pcap("http://localhost", 123, filepath) is None assert open(filepath, "rb").read() == "A"*1024
def scheduler(): while True: nodes = Node.query.filter_by(enabled=True, mode="normal").all() random.shuffle(nodes) for node in nodes: # Check how many tasks have already been assigned for this node. q = Task.query.filter_by(status=Task.ASSIGNED, node_id=node.id) if q.count() >= settings.threshold: continue # Fetch the status of this node. status = node_status(node.url) if not status: log.debug("Error retrieving status of node %s", node.name) time.sleep(settings.interval) continue # Check whether this node still has enough samples to work with. if status["tasks"]["pending"] >= settings.threshold: continue # Schedule new samples for this node. q = Task.query.filter_by(status=Task.PENDING) tasks = q.limit(settings.threshold).all() for task in tasks: task.assign_node(node.id) if tasks: log.debug("Assigned %d tasks to %s", len(tasks), node.name) db.session.commit() time.sleep(10)
def scheduler(): while True: for node in Node.query.filter_by(enabled=True, mode="normal").all(): # Check how many tasks have already been assigned for this node. q = Task.query.filter_by(status=Task.ASSIGNED, node_id=node.id) if q.count() >= settings.threshold: continue # Fetch the status of this node. status = node_status(node.url) if not status: log.debug("Error retrieving status of node %s", node.name) time.sleep(settings.interval) continue # Check whether this node still has enough samples to work with. if status["tasks"]["pending"] >= settings.threshold: continue # Schedule new samples for this node. q = Task.query.filter_by(status=Task.PENDING) tasks = q.limit(settings.threshold).all() for task in tasks: task.status = Task.ASSIGNED task.node_id = node.id if tasks: log.debug("Assigned %d tasks to %s", len(tasks), node.name) db.session.commit() time.sleep(10)
def handle_node(instance): node = Node.query.filter_by(name=instance).first() if not node: log.critical("Node not found: %s", instance) return while True: # Fetch the status of this node. status = node_status(node.url) if not status: log.debug("Error retrieving status of node %s", node.name) time.sleep(settings.interval) continue # Include the timestamp of when we retrieved this status. status["timestamp"] = int(time.time()) # Add this node status to the database for monitoring purposes. ns = NodeStatus(node.name, datetime.datetime.now(), status) db.session.add(ns) db.session.commit() # Submission of new tasks. if status["tasks"]["pending"] < settings.threshold: q = Task.query.filter_by(node_id=node.id, status=Task.ASSIGNED) q = q.order_by(Task.priority.desc(), Task.id) tasks = q.limit(settings.threshold).all() for t in tasks: task_id = submit_task(node.url, t.to_dict()) if not task_id: continue t.task_id = task_id t.status = Task.PROCESSING t.delegated = datetime.datetime.now() log.debug("Submitted %d tasks to %s", len(tasks), node.name) db.session.commit() # Fetching of reports. tasks = fetch_tasks(node.url, "reported", settings.threshold) for task in tasks: # The node_id/task_id tuple isn't necessarily unique, therefore # also filter on the status. Just in case older analyses didn't # finish, we request the last one. t = Task.query.filter_by(node_id=node.id, task_id=task["id"], status=Task.PROCESSING).order_by( Task.id.desc()).first() if t is None: log.debug("Node %s task #%d has not been submitted " "by us!", instance, task["id"]) # Should we delete this task? Improve through the usage of # the "owner" parameter. TODO Reintroduce. # delete_task(node.url, task["id"]) continue dirpath = os.path.join(settings.reports_directory, "%d" % t.id) if not os.path.isdir(dirpath): os.makedirs(dirpath) # Fetch each report. for report_format in settings.report_formats: try: store_report(node.url, t.task_id, report_format, dirpath) except InvalidReport as e: log.critical( "Error fetching report for task #%d (%s.%d): %s", t.id, node.name, t.task_id, e) # Fetch the pcap file. if settings.pcap: pcap_path = os.path.join(dirpath, "dump.pcap") try: fetch_pcap(node.url, t.task_id, pcap_path) except InvalidPcap as e: log.critical( "Error fetching pcap for task #%d (%s.%d): %s", t.id, node.name, t.task_id, e) # Delete the task and all its associated files from the # Cuckoo node. delete_task(node.url, t.task_id) t.status = Task.FINISHED t.started = datetime.datetime.strptime(task["started_on"], "%Y-%m-%d %H:%M:%S") t.completed = datetime.datetime.now() log.debug("Fetched %d reports from %s", len(tasks), node.name) db.session.commit() time.sleep(settings.interval)
def handle_node(instance): node = Node.query.filter_by(name=instance).first() if not node: log.critical("Node not found: %s", instance) return while True: # Fetch the status of this node. status = node_status(node.url) if not status: log.debug("Error retrieving status of node %s", node.name) time.sleep(settings.interval) continue # Include the timestamp of when we retrieved this status. status["timestamp"] = int(time.time()) # Add this node status to the database for monitoring purposes. ns = NodeStatus(node.name, datetime.datetime.now(), status) db.session.add(ns) db.session.commit() # Submission of new tasks. if status["tasks"]["pending"] < settings.threshold: q = Task.query.filter_by(node_id=node.id, status=Task.ASSIGNED) q = q.order_by(Task.priority.desc(), Task.id) tasks = q.limit(settings.threshold).all() for t in tasks: task_id = submit_task(node.url, t.to_dict()) if not task_id: continue t.task_id = task_id t.status = Task.PROCESSING t.delegated = datetime.datetime.now() log.debug("Submitted %d tasks to %s", len(tasks), node.name) db.session.commit() # Fetching of reports. tasks = fetch_tasks(node.url, "reported", settings.threshold) for task in tasks: # In the case that a Cuckoo node has been reset over time it's # possible that there are multiple combinations of # node-id/task-id, in this case we take the last one available. # (This makes it possible to re-setup a Cuckoo node). q = Task.query.filter_by(node_id=node.id, task_id=task["id"]) t = q.order_by(Task.id.desc()).first() if t is None: log.debug("Node %s task #%d has not been submitted " "by us!", instance, task["id"]) # Should we delete this task? Improve through the usage of # the "owner" parameter. delete_task(node.url, task["id"]) continue dirpath = os.path.join(settings.reports_directory, "%d" % t.id) if not os.path.isdir(dirpath): os.makedirs(dirpath) # Fetch each report. for report_format in settings.report_formats: try: store_report(node.url, t.task_id, report_format, dirpath) except InvalidReport as e: log.critical( "Error fetching report for task #%d (%s.%d): %s", t.id, node.name, t.task_id, e ) # Fetch the pcap file. if settings.pcap: pcap_path = os.path.join(dirpath, "dump.pcap") try: fetch_pcap(node.url, t.task_id, pcap_path) except InvalidPcap as e: log.critical( "Error fetching pcap for task #%d (%s.%d): %s", t.id, node.name, t.task_id, e ) # Delete the task and all its associated files from the # Cuckoo node. delete_task(node.url, t.task_id) t.status = Task.FINISHED t.started = datetime.datetime.strptime(task["started_on"], "%Y-%m-%d %H:%M:%S") t.completed = datetime.datetime.now() log.debug("Fetched %d reports from %s", len(tasks), node.name) db.session.commit() time.sleep(settings.interval)