def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if "network" not in report: report["network"] = {} # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report.get("behavior", {}).get("processes", []) or []: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Calculate the mlist_cnt for display if present to reduce db load if "signatures" in results: for entry in results["signatures"]: if entry["name"] == "ie_martian_children": report["mlist_cnt"] = len(entry["data"]) if entry["name"] == "office_martian_children": report["f_mlist_cnt"] = len(entry["data"]) # Other info we want quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results[ "virustotal"].has_key( "positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) if results.get("suricata", False): keywords = ("tls", "alerts", "files", "http", "ssh", "dns") keywords_dict = ("suri_tls_cnt", "suri_alert_cnt", "suri_file_cnt", "suri_http_cnt", "suri_ssh_cnt", "suri_dns_cnt") for keyword, keyword_value in zip(keywords, keywords_dict): if results["suricata"].get(keyword, 0): report[keyword_value] = len(results["suricata"][keyword]) # Create an index based on the info.id dict key. Increases overall scalability # with large amounts of data. # Note: Silently ignores the creation if the index already exists. self.db.analysis.create_index("info.id", background=True) #trick for distributed api if results.get("info", {}).get("options", {}).get("main_task_id", ""): report["info"]["id"] = int( results["info"]["options"]["main_task_id"]) analyses = self.db.analysis.find( {"info.id": int(report["info"]["id"])}) if analyses.count() > 0: log.debug("Deleting analysis data for Task %s" % report["info"]["id"]) for analysis in analyses: for process in analysis["behavior"]["processes"]: for call in process["calls"]: self.db.calls.remove({"_id": ObjectId(call)}) self.db.analysis.remove({"_id": ObjectId(analysis["_id"])}) log.debug("Deleted previous MongoDB data for Task %s" % report["info"]["id"]) self.ensure_valid_utf8(report) # Store the report and retrieve its object id. try: self.db.analysis.save(report, check_keys=False) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / MEGABYTE)) else: # Delete the problem keys and check for more error_saved = True size_filter = MONGOSIZELIMIT while error_saved: if type(report) == list: report = report[0] try: if type(report[parent_key]) == list: for j, parent_dict in enumerate( report[parent_key]): child_key, csize = self.debug_dict_size( parent_dict, parent_key)[0] if csize > size_filter: if parent_key == child_key: log.warn( "results['%s'] deleted due to size: %s" % (parent_key, csize)) del report[parent_key] break else: log.warn( "results['%s']['%s'] deleted due to size: %s" % (parent_key, child_key, csize)) del report[parent_key][j][child_key] else: child_key, csize = self.debug_dict_size( report[parent_key], parent_key)[0] if csize > size_filter: log.warn( "else - results['%s']['%s'] deleted due to size: %s" % (parent_key, child_key, csize)) del report[parent_key][child_key] try: self.db.analysis.save(report, check_keys=False) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / MEGABYTE)) size_filter = size_filter - MEGABYTE except Exception as e: log.error("Failed to delete child key: %s" % str(e)) error_saved = False self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Store the sample in GridFS. if results["info"]["category"] == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id # Store the process memory dump file in GridFS and reference it back in the report. if "procmemory" in report and self.options.get("store_memdump", False): for idx, procmem in enumerate(report["procmemory"]): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update( {"procmem_id": procmem_id}) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = { "pid": process["process_id"], "calls": chunk } chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) #self.conn.disconnect() //no longer exists self.conn.close()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to Elasticsearch. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_ELASTICSEARCH: raise CuckooDependencyError("Unable to import elasticsearch " "(install with `pip install elasticsearch`)") self.connect() index_prefix = self.options.get("index", "cuckoo") search_only = self.options.get("searchonly", False) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) idxdate = report["info"]["started"].split(" ")[0] self.index_name = '{0}-{1}'.format(index_prefix, idxdate) if not search_only: if not "network" in report: report["network"] = {} # Store API calls in chunks for pagination in Django if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in Elastcisearch. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} pchunk = self.es.index(index=self.index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} pchunk = self.es.index(index=self.index_name, doc_type="calls", body=to_insert) chunk_id = pchunk['_id'] chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Add screenshot paths report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) screenshot = File(shot_path) if screenshot.valid(): # Strip the extension as it's added later # in the Django view report["shots"].append(shot_file.replace(".jpg", "")) # Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) else: report = {} report["task_id"] = results["info"]["id"] report["info"] = results.get("info") report["target"] = results.get("target") report["summary"] = results.get("behavior", {}).get("summary") report["network"] = results.get("network") report["virustotal"] = results.get("virustotal") report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"]) # Store the report and retrieve its object id. self.es.index(index=self.index_name, doc_type="analysis", id=results["info"]["id"], body=report)
from lib.cuckoo.common.exceptions import CuckooDatabaseError from lib.cuckoo.common.exceptions import CuckooOperationalError from lib.cuckoo.common.exceptions import CuckooDependencyError from lib.cuckoo.common.objects import File, URL from lib.cuckoo.common.utils import create_folder, Singleton, classlock, SuperLock try: from sqlalchemy import create_engine, Column, not_ from sqlalchemy import Integer, String, Boolean, DateTime, Enum from sqlalchemy import ForeignKey, Text, Index, Table from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.exc import SQLAlchemyError, IntegrityError from sqlalchemy.orm import sessionmaker, relationship, joinedload, backref Base = declarative_base() except ImportError: raise CuckooDependencyError("Unable to import sqlalchemy " "(install with `pip install sqlalchemy`)") log = logging.getLogger(__name__) SCHEMA_VERSION = "3aa42d870199" TASK_PENDING = "pending" TASK_RUNNING = "running" TASK_COMPLETED = "completed" TASK_RECOVERED = "recovered" TASK_REPORTED = "reported" TASK_FAILED_ANALYSIS = "failed_analysis" TASK_FAILED_PROCESSING = "failed_processing" TASK_FAILED_REPORTING = "failed_reporting" # Secondary table used in association Machine - Tag. machines_tags = Table("machines_tags", Base.metadata,
# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org # See the file 'docs/LICENSE' for copying permission. import os from lib.cuckoo.common.abstracts import Report from lib.cuckoo.common.exceptions import CuckooDependencyError, CuckooReportError from lib.cuckoo.common.objects import File try: from pymongo.connection import Connection from pymongo.errors import ConnectionFailure, InvalidDocument from gridfs import GridFS from gridfs.errors import FileExists except ImportError: raise CuckooDependencyError("Unable to import pymongo") class MongoDB(Report): """Stores report in MongoDB.""" def connect(self): """Connects to Mongo database, loads options and set connectors. @raise CuckooReportError: if unable to connect. """ host = self.options.get("host", "127.0.0.1") port = self.options.get("port", 27017) try: self.conn = Connection(host, port) self.db = self.conn.cuckoo self.fs = GridFS(self.db)
def __init__(self): if not HAVE_LIBVIRT: raise CuckooDependencyError("Unable to import libvirt") super(LibVirtMachinery, self).__init__()
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError( "Unable to import pymongo (install with `pip3 install pymongo`)" ) self.connect() # Set mongo schema version. # TODO: This is not optimal because it run each analysis. Need to run only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.insert_one({"version": self.SCHEMA_VERSION}) # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = get_json_document(results, self.analysis_path) if "network" not in report: report["network"] = {} new_processes = insert_calls(report, mongo_calls_db=self.db.calls) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Create an index based on the info.id dict key. Increases overall scalability # with large amounts of data. # Note: Silently ignores the creation if the index already exists. self.db.analysis.create_index("info.id", background=True) # We performs a lot of SHA256 hash lookup so we need this index self.db.analysis.create_index( [("target.file.sha256", TEXT), ("dropped.sha256", TEXT), ("procdump.sha256", TEXT), ("CAPE.payloads.sha256", TEXT)], name="ALL_SHA256", background=True, ) # trick for distributed api if results.get("info", {}).get("options", {}).get("main_task_id", ""): report["info"]["id"] = int( results["info"]["options"]["main_task_id"]) analyses = self.db.analysis.find( {"info.id": int(report["info"]["id"])}) if analyses: log.debug("Deleting analysis data for Task %s", report["info"]["id"]) for analysis in analyses: for process in analysis["behavior"].get("processes", []) or []: for call in process["calls"]: self.db.calls.remove({"_id": ObjectId(call)}) self.db.analysis.remove({"_id": ObjectId(analysis["_id"])}) log.debug("Deleted previous MongoDB data for Task %s", report["info"]["id"]) ensure_valid_utf8(report) gc.collect() # Store the report and retrieve its object id. try: self.db.analysis.insert_one(report) except InvalidDocument as e: if str(e).startswith("cannot encode object") or str(e).endswith( "must not contain '.'"): self.loop_saver(report) return parent_key, psize = self.debug_dict_size(report)[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys # log.error(str(e)) log.warning("Largest parent key: %s (%d MB)", parent_key, int(psize) // MEGABYTE) else: # Delete the problem keys and check for more error_saved = True size_filter = MONGOSIZELIMIT while error_saved: if isinstance(report, list): report = report[0] try: if isinstance(report[parent_key], list): for j, parent_dict in enumerate( report[parent_key]): child_key, csize = self.debug_dict_size( parent_dict)[0] if csize > size_filter: log.warn( "results['%s']['%s'] deleted due to size: %s", parent_key, child_key, csize) del report[parent_key][j][child_key] else: child_key, csize = self.debug_dict_size( report[parent_key])[0] if csize > size_filter: log.warn( "results['%s']['%s'] deleted due to size: %s", parent_key, child_key, csize) del report[parent_key][child_key] try: self.db.analysis.insert_one(report) error_saved = False except InvalidDocument as e: if str(e).startswith( "documents must have only string keys"): log.error( "Search bug in your modifications - you got an dictionary key as int, should be string" ) log.error(str(e)) return else: parent_key, psize = self.debug_dict_size( report)[0] log.error(str(e)) log.warning("Largest parent key: %s (%d MB)", parent_key, int(psize) // MEGABYTE) size_filter -= MEGABYTE except Exception as e: log.error("Failed to delete child key: %s", e) error_saved = False self.conn.close()
from lib.cuckoo.common.config import Config from lib.cuckoo.common.objects import File, URL from lib.cuckoo.common.utils import create_folder, Singleton try: from sqlalchemy import create_engine, Column from sqlalchemy import Integer, String, Boolean, DateTime, Enum from sqlalchemy import ForeignKey, Text, Index from sqlalchemy.orm import sessionmaker, relationship, joinedload from sqlalchemy.sql import func from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.exc import SQLAlchemyError, IntegrityError from sqlalchemy.pool import NullPool Base = declarative_base() except ImportError: raise CuckooDependencyError("SQLAlchemy library not found, " "verify your setup") class Machine(Base): """Configured virtual machines to be used as guests.""" __tablename__ = "machines" id = Column(Integer(), primary_key=True) name = Column(String(255), nullable=False) label = Column(String(255), nullable=False) ip = Column(String(255), nullable=False) platform = Column(String(255), nullable=False) locked = Column(Boolean(), nullable=False, default=False) locked_changed_on = Column(DateTime(timezone=False), nullable=True) status = Column(String(255), nullable=True) status_changed_on = Column(DateTime(timezone=False), nullable=True)
# Copyright (C) 2010-2012 Cuckoo Sandbox Developers. # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org # See the file 'docs/LICENSE' for copying permission. from lib.cuckoo.common.abstracts import MachineManager from lib.cuckoo.common.exceptions import CuckooDependencyError, CuckooMachineError try: import libvirt except ImportError: raise CuckooDependencyError("Unable to import libvirt") class KVM(MachineManager): """Virtualization layer for KVM based on python-libvirt.""" def _initialize_check(self): """Runs all checks when a machine manager is initialized. @raise CuckooMachineError: if libvirt version is not supported. """ # KVM specific checks. if not self._version_check(): raise CuckooMachineError( "Libvirt version is not supported, please get an updated version" ) # Base checks. super(KVM, self)._initialize_check() def start(self, label): """Starts a virtual machine. @param label: virtual machine name. @raise CuckooMachineError: if unable to start virtual machine.
def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Store the sample in GridFS. if results["info"]["category"] == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id if "procmemory" in report: # Store the process memory dump file in GridFS and reference it back in the report. for idx, procmem in enumerate(report['procmemory']): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem['pid'])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update( {"procmem_id": procmem_id}) # Store the suri extracted files in GridFS and reference it back in the report. suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip") suri_extracted_zip = File(suri_extracted_zip_path) if suri_extracted_zip.valid(): suri_extracted_zip_id = self.store_file(suri_extracted_zip) report["suricata"] = {"suri_extracted_zip": suri_extracted_zip_id} report["suricata"].update(results["suricata"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Store the Zipped Droppings file in GridFS and reference it back in the report. #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip") #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path) #if cuckoo_dropped_zip.valid(): # cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip) # report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id} # report["zippeddroppings"].update(results["zippeddroppings"]) # Walk through the suricata extracted files, store them in GridFS and update the # report with the ObjectIds. new_suricata_files = [] if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key( "files") and results["suricata"]["files"]: for suricata_file_e in results["suricata"]["files"]: if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) suricata_file = File( suricata_file_e["file_info"]["path"]) if suricata_file.valid(): suricata_file_id = self.store_file( suricata_file, filename=suricata_file_e["file_info"]["name"]) tmp_suricata_file_d["object_id"] = suricata_file_id new_suricata_files.append(tmp_suricata_file_d) report["suricata"]["files"] = new_suricata_files # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = { "pid": process["process_id"], "calls": chunk } chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if results.has_key("virustotal") and results["virustotal"] and results[ "virustotal"].has_key( "positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % ( results["virustotal"]["positives"], results["virustotal"]["total"]) if results.has_key("suricata") and results["suricata"]: if results["suricata"].has_key("tls") and len( results["suricata"]["tls"]) > 0: report["suri_tls_cnt"] = len(results["suricata"]["tls"]) if results["suricata"] and results["suricata"].has_key( "alerts") and len(results["suricata"]["alerts"]) > 0: report["suri_alert_cnt"] = len(results["suricata"]["alerts"]) if results["suricata"].has_key("files") and len( results["suricata"]["files"]) > 0: report["suri_file_cnt"] = len(results["suricata"]["files"]) if results["suricata"].has_key("http") and len( results["suricata"]["http"]) > 0: report["suri_http_cnt"] = len(results["suricata"]["http"]) # Create an index based on the info.id dict key. Increases overall scalability # with large amounts of data. # Note: Silently ignores the creation if the index already exists. self.db.analysis.create_index("info.id", background=True) # Store the report and retrieve its object id. try: self.db.analysis.save(report) except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size(report[parent_key])[0] if not self.options.get("fix_large_docs", False): # Just log the error and problem keys log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) else: # Delete the problem keys and check for more error_saved = True while error_saved: log.warn( "results['%s']['%s'] deleted due to >16MB size (%dMB)" % (parent_key, child_key, int(psize) / 1048576)) del report[parent_key][child_key] try: self.db.analysis.save(report) error_saved = False except InvalidDocument as e: parent_key, psize = self.debug_dict_size(report)[0] child_key, csize = self.debug_dict_size( report[parent_key])[0] log.error(str(e)) log.error("Largest parent key: %s (%d MB)" % (parent_key, int(psize) / 1048576)) log.error("Largest child key: %s (%d MB)" % (child_key, int(csize) / 1048576)) self.conn.close()
# Copyright (C) 2010-2013 Cuckoo Sandbox Developers. # This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org # See the file 'docs/LICENSE' for copying permission. import json from lib.cuckoo.common.abstracts import Report from lib.cuckoo.common.exceptions import CuckooDependencyError, CuckooReportError try: import lib.hpfeeds as hpfeeds except: raise CuckooDependencyError("Unable to import HPFeeds library") class HPFClient(Report): """Publishes the results on an HPFeeds broker channel.""" def run(self, results): """Sends JSON report to HPFeeds channel. @param results: Cuckoo results dict. @raise CuckooReportError: if fails to write report. """ try: hpc = hpfeeds.HPC(self.options["host"], self.options["port"], self.options["ident"], self.options["secret"], timeout=60) hpc.publish(self.options["channel"], json.dumps(results, sort_keys=False, indent=4)) hpc.close() except hpfeeds.FeedException as e: raise CuckooReportError("Failed to publish on HPFeeds channel: %s" % e)