Beispiel #1
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one(
            )["version"] != self.SCHEMA_VERSION:
                CuckooReportError(
                    "Mongo schema version not expected, check data migration tool"
                )
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        if "network" not in report:
            report["network"] = {}

        # Add screenshot paths
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]
            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                screenshot = File(shot_path)
                if screenshot.valid():
                    # Strip the extension as it's added later
                    # in the Django view
                    report["shots"].append(shot_file.replace(".jpg", ""))

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        new_processes = []

        for process in report.get("behavior", {}).get("processes", []) or []:
            new_process = dict(process)
            chunk = []
            chunks_ids = []
            # Loop on each process call.
            for index, call in enumerate(process["calls"]):
                # If the chunk size is 100 or if the loop is completed then
                # store the chunk in MongoDB.
                if len(chunk) == 100:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)
                    # Reset the chunk.
                    chunk = []
                # Append call to the chunk.
                chunk.append(call)
            # Store leftovers.
            if chunk:
                to_insert = {"pid": process["process_id"], "calls": chunk}
                chunk_id = self.db.calls.insert(to_insert)
                chunks_ids.append(chunk_id)
            # Add list of chunks.
            new_process["calls"] = chunks_ids
            new_processes.append(new_process)
        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes
        # Calculate the mlist_cnt for display if present to reduce db load
        if "signatures" in results:
            for entry in results["signatures"]:
                if entry["name"] == "ie_martian_children":
                    report["mlist_cnt"] = len(entry["data"])
                if entry["name"] == "office_martian_children":
                    report["f_mlist_cnt"] = len(entry["data"])

        # Other info we want quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results[
                "virustotal"].has_key(
                    "positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (
                results["virustotal"]["positives"],
                results["virustotal"]["total"])
        if results.get("suricata", False):

            keywords = ("tls", "alerts", "files", "http", "ssh", "dns")
            keywords_dict = ("suri_tls_cnt", "suri_alert_cnt", "suri_file_cnt",
                             "suri_http_cnt", "suri_ssh_cnt", "suri_dns_cnt")
            for keyword, keyword_value in zip(keywords, keywords_dict):
                if results["suricata"].get(keyword, 0):
                    report[keyword_value] = len(results["suricata"][keyword])

        # Create an index based on the info.id dict key. Increases overall scalability
        # with large amounts of data.
        # Note: Silently ignores the creation if the index already exists.
        self.db.analysis.create_index("info.id", background=True)

        #trick for distributed api
        if results.get("info", {}).get("options", {}).get("main_task_id", ""):
            report["info"]["id"] = int(
                results["info"]["options"]["main_task_id"])

        analyses = self.db.analysis.find(
            {"info.id": int(report["info"]["id"])})
        if analyses.count() > 0:
            log.debug("Deleting analysis data for Task %s" %
                      report["info"]["id"])
            for analysis in analyses:
                for process in analysis["behavior"]["processes"]:
                    for call in process["calls"]:
                        self.db.calls.remove({"_id": ObjectId(call)})
                self.db.analysis.remove({"_id": ObjectId(analysis["_id"])})
            log.debug("Deleted previous MongoDB data for Task %s" %
                      report["info"]["id"])

        self.ensure_valid_utf8(report)

        # Store the report and retrieve its object id.
        try:
            self.db.analysis.save(report, check_keys=False)
        except InvalidDocument as e:
            parent_key, psize = self.debug_dict_size(report)[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                log.error(str(e))
                log.error("Largest parent key: %s (%d MB)" %
                          (parent_key, int(psize) / MEGABYTE))
            else:
                # Delete the problem keys and check for more
                error_saved = True
                size_filter = MONGOSIZELIMIT
                while error_saved:
                    if type(report) == list:
                        report = report[0]
                    try:
                        if type(report[parent_key]) == list:
                            for j, parent_dict in enumerate(
                                    report[parent_key]):
                                child_key, csize = self.debug_dict_size(
                                    parent_dict, parent_key)[0]
                                if csize > size_filter:
                                    if parent_key == child_key:
                                        log.warn(
                                            "results['%s'] deleted due to size: %s"
                                            % (parent_key, csize))
                                        del report[parent_key]
                                        break
                                    else:
                                        log.warn(
                                            "results['%s']['%s'] deleted due to size: %s"
                                            % (parent_key, child_key, csize))
                                        del report[parent_key][j][child_key]
                        else:
                            child_key, csize = self.debug_dict_size(
                                report[parent_key], parent_key)[0]
                            if csize > size_filter:
                                log.warn(
                                    "else - results['%s']['%s'] deleted due to size: %s"
                                    % (parent_key, child_key, csize))
                                del report[parent_key][child_key]
                        try:
                            self.db.analysis.save(report, check_keys=False)
                            error_saved = False
                        except InvalidDocument as e:
                            parent_key, psize = self.debug_dict_size(report)[0]
                            log.error(str(e))
                            log.error("Largest parent key: %s (%d MB)" %
                                      (parent_key, int(psize) / MEGABYTE))
                            size_filter = size_filter - MEGABYTE
                    except Exception as e:
                        log.error("Failed to delete child key: %s" % str(e))
                        error_saved = False

        self.conn.close()
Beispiel #2
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one(
            )["version"] != self.SCHEMA_VERSION:
                CuckooReportError(
                    "Mongo schema version not expected, check data migration tool"
                )
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256",
                                      unique=True,
                                      sparse=True,
                                      name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        if not "network" in report:
            report["network"] = {}

        # Store the sample in GridFS.
        if results["info"]["category"] == "file" and "target" in results:
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"]["pcap_id"] = pcap_id

        sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap")
        spcap = File(sorted_pcap_path)
        if spcap.valid():
            spcap_id = self.store_file(spcap)
            report["network"]["sorted_pcap_id"] = spcap_id

        # Store the process memory dump file in GridFS and reference it back in the report.
        if "procmemory" in report and self.options.get("store_memdump", False):
            for idx, procmem in enumerate(report["procmemory"]):
                procmem_path = os.path.join(self.analysis_path, "memory",
                                            "{0}.dmp".format(procmem["pid"]))
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    report["procmemory"][idx].update(
                        {"procmem_id": procmem_id})

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop,
                                                 filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in MongoDB.
                    if len(chunk) == 100:
                        to_insert = {
                            "pid": process["process_id"],
                            "calls": chunk
                        }
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        # Store the report and retrieve its object id.
        self.db.analysis.save(report)
        #self.conn.disconnect() //no longer exists
        self.conn.close()
Beispiel #3
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to Elasticsearch.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_ELASTICSEARCH:
            raise CuckooDependencyError("Unable to import elasticsearch "
                                        "(install with `pip install elasticsearch`)")

        self.connect()
        index_prefix  = self.options.get("index", "cuckoo")
        search_only   = self.options.get("searchonly", False)

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)

        idxdate = report["info"]["started"].split(" ")[0]
        self.index_name = '{0}-{1}'.format(index_prefix, idxdate)

        if not search_only:
            if not "network" in report:
                report["network"] = {}

            # Store API calls in chunks for pagination in Django
            if "behavior" in report and "processes" in report["behavior"]:
                new_processes = []
                for process in report["behavior"]["processes"]:
                    new_process = dict(process)
                    chunk = []
                    chunks_ids = []
                    # Loop on each process call.
                    for index, call in enumerate(process["calls"]):
                        # If the chunk size is 100 or if the loop is completed then
                        # store the chunk in Elastcisearch.
                        if len(chunk) == 100:
                            to_insert = {"pid": process["process_id"],
                                         "calls": chunk}
                            pchunk = self.es.index(index=self.index_name,
                                                   doc_type="calls", body=to_insert)
                            chunk_id = pchunk['_id']
                            chunks_ids.append(chunk_id)
                            # Reset the chunk.
                            chunk = []

                        # Append call to the chunk.
                        chunk.append(call)

                    # Store leftovers.
                    if chunk:
                        to_insert = {"pid": process["process_id"], "calls": chunk}
                        pchunk = self.es.index(index=self.index_name, 
                                               doc_type="calls", body=to_insert)
                        chunk_id = pchunk['_id']
                        chunks_ids.append(chunk_id)

                    # Add list of chunks.
                    new_process["calls"] = chunks_ids
                    new_processes.append(new_process)

                # Store the results in the report.
                report["behavior"] = dict(report["behavior"])
                report["behavior"]["processes"] = new_processes

            # Add screenshot paths
            report["shots"] = []
            shots_path = os.path.join(self.analysis_path, "shots")
            if os.path.exists(shots_path):
                shots = [shot for shot in os.listdir(shots_path)
                         if shot.endswith(".jpg")]
                for shot_file in sorted(shots):
                    shot_path = os.path.join(self.analysis_path, "shots",
                                             shot_file)
                    screenshot = File(shot_path)
                    if screenshot.valid():
                        # Strip the extension as it's added later 
                        # in the Django view
                        report["shots"].append(shot_file.replace(".jpg", ""))

            # Other info we want Quick access to from the web UI
            if results.has_key("virustotal") and results["virustotal"] and results["virustotal"].has_key("positives") and results["virustotal"].has_key("total"):
                report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])

            if results.has_key("suricata") and results["suricata"]:
                if results["suricata"].has_key("tls") and len(results["suricata"]["tls"]) > 0:
                    report["suri_tls_cnt"] = len(results["suricata"]["tls"])
                if results["suricata"] and results["suricata"].has_key("alerts") and len(results["suricata"]["alerts"]) > 0:
                    report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
                if results["suricata"].has_key("files") and len(results["suricata"]["files"]) > 0:
                    report["suri_file_cnt"] = len(results["suricata"]["files"])
                if results["suricata"].has_key("http") and len(results["suricata"]["http"]) > 0:
                    report["suri_http_cnt"] = len(results["suricata"]["http"])
        else:
            report = {}
            report["task_id"] = results["info"]["id"]
            report["info"]    = results.get("info")
            report["target"]  = results.get("target")
            report["summary"] = results.get("behavior", {}).get("summary")
            report["network"] = results.get("network")
            report["virustotal"] = results.get("virustotal")
            report["virustotal_summary"] = "%s/%s" % (results["virustotal"]["positives"],results["virustotal"]["total"])

        # Store the report and retrieve its object id.
        self.es.index(index=self.index_name, doc_type="analysis", id=results["info"]["id"], body=report)
Beispiel #4
0
from lib.cuckoo.common.exceptions import CuckooDatabaseError
from lib.cuckoo.common.exceptions import CuckooOperationalError
from lib.cuckoo.common.exceptions import CuckooDependencyError
from lib.cuckoo.common.objects import File, URL
from lib.cuckoo.common.utils import create_folder, Singleton, classlock, SuperLock

try:
    from sqlalchemy import create_engine, Column, not_
    from sqlalchemy import Integer, String, Boolean, DateTime, Enum
    from sqlalchemy import ForeignKey, Text, Index, Table
    from sqlalchemy.ext.declarative import declarative_base
    from sqlalchemy.exc import SQLAlchemyError, IntegrityError
    from sqlalchemy.orm import sessionmaker, relationship, joinedload, backref
    Base = declarative_base()
except ImportError:
    raise CuckooDependencyError("Unable to import sqlalchemy "
                                "(install with `pip install sqlalchemy`)")

log = logging.getLogger(__name__)

SCHEMA_VERSION = "3aa42d870199"
TASK_PENDING = "pending"
TASK_RUNNING = "running"
TASK_COMPLETED = "completed"
TASK_RECOVERED = "recovered"
TASK_REPORTED = "reported"
TASK_FAILED_ANALYSIS = "failed_analysis"
TASK_FAILED_PROCESSING = "failed_processing"
TASK_FAILED_REPORTING = "failed_reporting"

# Secondary table used in association Machine - Tag.
machines_tags = Table("machines_tags", Base.metadata,
Beispiel #5
0
# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org
# See the file 'docs/LICENSE' for copying permission.

import os

from lib.cuckoo.common.abstracts import Report
from lib.cuckoo.common.exceptions import CuckooDependencyError, CuckooReportError
from lib.cuckoo.common.objects import File

try:
    from pymongo.connection import Connection
    from pymongo.errors import ConnectionFailure, InvalidDocument
    from gridfs import GridFS
    from gridfs.errors import FileExists
except ImportError:
    raise CuckooDependencyError("Unable to import pymongo")

class MongoDB(Report):
    """Stores report in MongoDB."""

    def connect(self):
        """Connects to Mongo database, loads options and set connectors.
        @raise CuckooReportError: if unable to connect.
        """
        host = self.options.get("host", "127.0.0.1")
        port = self.options.get("port", 27017)

        try:
            self.conn = Connection(host, port)
            self.db = self.conn.cuckoo
            self.fs = GridFS(self.db)
    def __init__(self):
        if not HAVE_LIBVIRT:
            raise CuckooDependencyError("Unable to import libvirt")

        super(LibVirtMachinery, self).__init__()
Beispiel #7
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError(
                "Unable to import pymongo (install with `pip3 install pymongo`)"
            )

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal because it run each analysis. Need to run only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one(
            )["version"] != self.SCHEMA_VERSION:
                CuckooReportError(
                    "Mongo schema version not expected, check data migration tool"
                )
        else:
            self.db.cuckoo_schema.insert_one({"version": self.SCHEMA_VERSION})

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = get_json_document(results, self.analysis_path)

        if "network" not in report:
            report["network"] = {}

        new_processes = insert_calls(report, mongo_calls_db=self.db.calls)
        # Store the results in the report.
        report["behavior"] = dict(report["behavior"])
        report["behavior"]["processes"] = new_processes

        # Create an index based on the info.id dict key. Increases overall scalability
        # with large amounts of data.
        # Note: Silently ignores the creation if the index already exists.
        self.db.analysis.create_index("info.id", background=True)

        # We performs a lot of SHA256 hash lookup so we need this index
        self.db.analysis.create_index(
            [("target.file.sha256", TEXT), ("dropped.sha256", TEXT),
             ("procdump.sha256", TEXT), ("CAPE.payloads.sha256", TEXT)],
            name="ALL_SHA256",
            background=True,
        )

        # trick for distributed api
        if results.get("info", {}).get("options", {}).get("main_task_id", ""):
            report["info"]["id"] = int(
                results["info"]["options"]["main_task_id"])

        analyses = self.db.analysis.find(
            {"info.id": int(report["info"]["id"])})
        if analyses:
            log.debug("Deleting analysis data for Task %s",
                      report["info"]["id"])
            for analysis in analyses:
                for process in analysis["behavior"].get("processes", []) or []:
                    for call in process["calls"]:
                        self.db.calls.remove({"_id": ObjectId(call)})
                self.db.analysis.remove({"_id": ObjectId(analysis["_id"])})
            log.debug("Deleted previous MongoDB data for Task %s",
                      report["info"]["id"])

        ensure_valid_utf8(report)
        gc.collect()

        # Store the report and retrieve its object id.
        try:
            self.db.analysis.insert_one(report)
        except InvalidDocument as e:
            if str(e).startswith("cannot encode object") or str(e).endswith(
                    "must not contain '.'"):
                self.loop_saver(report)
                return
            parent_key, psize = self.debug_dict_size(report)[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                # log.error(str(e))
                log.warning("Largest parent key: %s (%d MB)", parent_key,
                            int(psize) // MEGABYTE)
            else:
                # Delete the problem keys and check for more
                error_saved = True
                size_filter = MONGOSIZELIMIT
                while error_saved:
                    if isinstance(report, list):
                        report = report[0]
                    try:
                        if isinstance(report[parent_key], list):
                            for j, parent_dict in enumerate(
                                    report[parent_key]):
                                child_key, csize = self.debug_dict_size(
                                    parent_dict)[0]
                                if csize > size_filter:
                                    log.warn(
                                        "results['%s']['%s'] deleted due to size: %s",
                                        parent_key, child_key, csize)
                                    del report[parent_key][j][child_key]
                        else:
                            child_key, csize = self.debug_dict_size(
                                report[parent_key])[0]
                            if csize > size_filter:
                                log.warn(
                                    "results['%s']['%s'] deleted due to size: %s",
                                    parent_key, child_key, csize)
                                del report[parent_key][child_key]
                        try:
                            self.db.analysis.insert_one(report)
                            error_saved = False
                        except InvalidDocument as e:
                            if str(e).startswith(
                                    "documents must have only string keys"):
                                log.error(
                                    "Search bug in your modifications - you got an dictionary key as int, should be string"
                                )
                                log.error(str(e))
                                return
                            else:
                                parent_key, psize = self.debug_dict_size(
                                    report)[0]
                                log.error(str(e))
                                log.warning("Largest parent key: %s (%d MB)",
                                            parent_key,
                                            int(psize) // MEGABYTE)
                                size_filter -= MEGABYTE
                    except Exception as e:
                        log.error("Failed to delete child key: %s", e)
                        error_saved = False

        self.conn.close()
Beispiel #8
0
from lib.cuckoo.common.config import Config
from lib.cuckoo.common.objects import File, URL
from lib.cuckoo.common.utils import create_folder, Singleton

try:
    from sqlalchemy import create_engine, Column
    from sqlalchemy import Integer, String, Boolean, DateTime, Enum
    from sqlalchemy import ForeignKey, Text, Index
    from sqlalchemy.orm import sessionmaker, relationship, joinedload
    from sqlalchemy.sql import func
    from sqlalchemy.ext.declarative import declarative_base
    from sqlalchemy.exc import SQLAlchemyError, IntegrityError
    from sqlalchemy.pool import NullPool
    Base = declarative_base()
except ImportError:
    raise CuckooDependencyError("SQLAlchemy library not found, "
                                "verify your setup")


class Machine(Base):
    """Configured virtual machines to be used as guests."""
    __tablename__ = "machines"

    id = Column(Integer(), primary_key=True)
    name = Column(String(255), nullable=False)
    label = Column(String(255), nullable=False)
    ip = Column(String(255), nullable=False)
    platform = Column(String(255), nullable=False)
    locked = Column(Boolean(), nullable=False, default=False)
    locked_changed_on = Column(DateTime(timezone=False), nullable=True)
    status = Column(String(255), nullable=True)
    status_changed_on = Column(DateTime(timezone=False), nullable=True)
Beispiel #9
0
# Copyright (C) 2010-2012 Cuckoo Sandbox Developers.
# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org
# See the file 'docs/LICENSE' for copying permission.

from lib.cuckoo.common.abstracts import MachineManager
from lib.cuckoo.common.exceptions import CuckooDependencyError, CuckooMachineError

try:
    import libvirt
except ImportError:
    raise CuckooDependencyError("Unable to import libvirt")


class KVM(MachineManager):
    """Virtualization layer for KVM based on python-libvirt."""
    def _initialize_check(self):
        """Runs all checks when a machine manager is initialized.
        @raise CuckooMachineError: if libvirt version is not supported.
        """
        # KVM specific checks.
        if not self._version_check():
            raise CuckooMachineError(
                "Libvirt version is not supported, please get an updated version"
            )
        # Base checks.
        super(KVM, self)._initialize_check()

    def start(self, label):
        """Starts a virtual machine.
        @param label: virtual machine name.
        @raise CuckooMachineError: if unable to start virtual machine.
Beispiel #10
0
    def run(self, results):
        """Writes report.
        @param results: analysis results dictionary.
        @raise CuckooReportError: if fails to connect or write to MongoDB.
        """
        # We put the raise here and not at the import because it would
        # otherwise trigger even if the module is not enabled in the config.
        if not HAVE_MONGO:
            raise CuckooDependencyError("Unable to import pymongo "
                                        "(install with `pip install pymongo`)")

        self.connect()

        # Set mongo schema version.
        # TODO: This is not optimal becuase it run each analysis. Need to run
        # only one time at startup.
        if "cuckoo_schema" in self.db.collection_names():
            if self.db.cuckoo_schema.find_one(
            )["version"] != self.SCHEMA_VERSION:
                CuckooReportError(
                    "Mongo schema version not expected, check data migration tool"
                )
        else:
            self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION})

        # Set an unique index on stored files, to avoid duplicates.
        # From pymongo docs:
        #  Returns the name of the created index if an index is actually
        #    created.
        #  Returns None if the index already exists.
        # TODO: This is not optimal because it run each analysis. Need to run
        # only one time at startup.
        self.db.fs.files.ensure_index("sha256",
                                      unique=True,
                                      sparse=True,
                                      name="sha256_unique")

        # Create a copy of the dictionary. This is done in order to not modify
        # the original dictionary and possibly compromise the following
        # reporting modules.
        report = dict(results)
        if not "network" in report:
            report["network"] = {}
        # Store the sample in GridFS.
        if results["info"]["category"] == "file" and "target" in results:
            sample = File(self.file_path)
            if sample.valid():
                fname = results["target"]["file"]["name"]
                sample_id = self.store_file(sample, filename=fname)
                report["target"] = {"file_id": sample_id}
                report["target"].update(results["target"])

        # Store the PCAP file in GridFS and reference it back in the report.
        pcap_path = os.path.join(self.analysis_path, "dump.pcap")
        pcap = File(pcap_path)
        if pcap.valid():
            pcap_id = self.store_file(pcap)
            report["network"]["pcap_id"] = pcap_id

        sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap")
        spcap = File(sorted_pcap_path)
        if spcap.valid():
            spcap_id = self.store_file(spcap)
            report["network"]["sorted_pcap_id"] = spcap_id

        if "procmemory" in report:
            # Store the process memory dump file in GridFS and reference it back in the report.
            for idx, procmem in enumerate(report['procmemory']):
                procmem_path = os.path.join(self.analysis_path, "memory",
                                            "{0}.dmp".format(procmem['pid']))
                procmem_file = File(procmem_path)
                if procmem_file.valid():
                    procmem_id = self.store_file(procmem_file)
                    report["procmemory"][idx].update(
                        {"procmem_id": procmem_id})

        # Store the suri extracted files in GridFS and reference it back in the report.
        suri_extracted_zip_path = os.path.join(self.analysis_path,
                                               "logs/files.zip")
        suri_extracted_zip = File(suri_extracted_zip_path)
        if suri_extracted_zip.valid():
            suri_extracted_zip_id = self.store_file(suri_extracted_zip)
            report["suricata"] = {"suri_extracted_zip": suri_extracted_zip_id}
            report["suricata"].update(results["suricata"])

        # Walk through the dropped files, store them in GridFS and update the
        # report with the ObjectIds.
        new_dropped = []
        if "dropped" in report:
            for dropped in report["dropped"]:
                new_drop = dict(dropped)
                drop = File(dropped["path"])
                if drop.valid():
                    dropped_id = self.store_file(drop,
                                                 filename=dropped["name"])
                    new_drop["object_id"] = dropped_id

                new_dropped.append(new_drop)

        report["dropped"] = new_dropped

        # Store the Zipped Droppings file in GridFS and reference it back in the report.
        #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip")
        #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path)
        #if cuckoo_dropped_zip.valid():
        #    cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip)
        #    report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id}
        #    report["zippeddroppings"].update(results["zippeddroppings"])

        # Walk through the suricata extracted files, store them in GridFS and update the
        # report with the ObjectIds.
        new_suricata_files = []
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key(
                    "files") and results["suricata"]["files"]:
                for suricata_file_e in results["suricata"]["files"]:
                    if suricata_file_e.has_key("file_info"):
                        tmp_suricata_file_d = dict(suricata_file_e)
                        suricata_file = File(
                            suricata_file_e["file_info"]["path"])
                        if suricata_file.valid():
                            suricata_file_id = self.store_file(
                                suricata_file,
                                filename=suricata_file_e["file_info"]["name"])
                            tmp_suricata_file_d["object_id"] = suricata_file_id
                            new_suricata_files.append(tmp_suricata_file_d)

                report["suricata"]["files"] = new_suricata_files

        # Add screenshots.
        report["shots"] = []
        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            # Walk through the files and select the JPGs.
            shots = [
                shot for shot in os.listdir(shots_path)
                if shot.endswith(".jpg")
            ]

            for shot_file in sorted(shots):
                shot_path = os.path.join(self.analysis_path, "shots",
                                         shot_file)
                shot = File(shot_path)
                # If the screenshot path is a valid file, store it and
                # reference it back in the report.
                if shot.valid():
                    shot_id = self.store_file(shot)
                    report["shots"].append(shot_id)

        # Store chunks of API calls in a different collection and reference
        # those chunks back in the report. In this way we should defeat the
        # issue with the oversized reports exceeding MongoDB's boundaries.
        # Also allows paging of the reports.
        if "behavior" in report and "processes" in report["behavior"]:
            new_processes = []
            for process in report["behavior"]["processes"]:
                new_process = dict(process)

                chunk = []
                chunks_ids = []
                # Loop on each process call.
                for index, call in enumerate(process["calls"]):
                    # If the chunk size is 100 or if the loop is completed then
                    # store the chunk in MongoDB.
                    if len(chunk) == 100:
                        to_insert = {
                            "pid": process["process_id"],
                            "calls": chunk
                        }
                        chunk_id = self.db.calls.insert(to_insert)
                        chunks_ids.append(chunk_id)
                        # Reset the chunk.
                        chunk = []

                    # Append call to the chunk.
                    chunk.append(call)

                # Store leftovers.
                if chunk:
                    to_insert = {"pid": process["process_id"], "calls": chunk}
                    chunk_id = self.db.calls.insert(to_insert)
                    chunks_ids.append(chunk_id)

                # Add list of chunks.
                new_process["calls"] = chunks_ids
                new_processes.append(new_process)

            # Store the results in the report.
            report["behavior"] = dict(report["behavior"])
            report["behavior"]["processes"] = new_processes

        #Other info we want Quick access to from the web UI
        if results.has_key("virustotal") and results["virustotal"] and results[
                "virustotal"].has_key(
                    "positives") and results["virustotal"].has_key("total"):
            report["virustotal_summary"] = "%s/%s" % (
                results["virustotal"]["positives"],
                results["virustotal"]["total"])
        if results.has_key("suricata") and results["suricata"]:
            if results["suricata"].has_key("tls") and len(
                    results["suricata"]["tls"]) > 0:
                report["suri_tls_cnt"] = len(results["suricata"]["tls"])
            if results["suricata"] and results["suricata"].has_key(
                    "alerts") and len(results["suricata"]["alerts"]) > 0:
                report["suri_alert_cnt"] = len(results["suricata"]["alerts"])
            if results["suricata"].has_key("files") and len(
                    results["suricata"]["files"]) > 0:
                report["suri_file_cnt"] = len(results["suricata"]["files"])
            if results["suricata"].has_key("http") and len(
                    results["suricata"]["http"]) > 0:
                report["suri_http_cnt"] = len(results["suricata"]["http"])

        # Create an index based on the info.id dict key. Increases overall scalability
        # with large amounts of data.
        # Note: Silently ignores the creation if the index already exists.
        self.db.analysis.create_index("info.id", background=True)
        # Store the report and retrieve its object id.
        try:
            self.db.analysis.save(report)
        except InvalidDocument as e:
            parent_key, psize = self.debug_dict_size(report)[0]
            child_key, csize = self.debug_dict_size(report[parent_key])[0]
            if not self.options.get("fix_large_docs", False):
                # Just log the error and problem keys
                log.error(str(e))
                log.error("Largest parent key: %s (%d MB)" %
                          (parent_key, int(psize) / 1048576))
                log.error("Largest child key: %s (%d MB)" %
                          (child_key, int(csize) / 1048576))
            else:
                # Delete the problem keys and check for more
                error_saved = True
                while error_saved:
                    log.warn(
                        "results['%s']['%s'] deleted due to >16MB size (%dMB)"
                        % (parent_key, child_key, int(psize) / 1048576))
                    del report[parent_key][child_key]
                    try:
                        self.db.analysis.save(report)
                        error_saved = False
                    except InvalidDocument as e:
                        parent_key, psize = self.debug_dict_size(report)[0]
                        child_key, csize = self.debug_dict_size(
                            report[parent_key])[0]
                        log.error(str(e))
                        log.error("Largest parent key: %s (%d MB)" %
                                  (parent_key, int(psize) / 1048576))
                        log.error("Largest child key: %s (%d MB)" %
                                  (child_key, int(csize) / 1048576))

        self.conn.close()
Beispiel #11
0
# Copyright (C) 2010-2013 Cuckoo Sandbox Developers.
# This file is part of Cuckoo Sandbox - http://www.cuckoosandbox.org
# See the file 'docs/LICENSE' for copying permission.

import json

from lib.cuckoo.common.abstracts import Report
from lib.cuckoo.common.exceptions import CuckooDependencyError, CuckooReportError

try:
	import lib.hpfeeds as hpfeeds
except:
	raise CuckooDependencyError("Unable to import HPFeeds library")

class HPFClient(Report):
	"""Publishes the results on an HPFeeds broker channel."""

	def run(self, results):
		"""Sends JSON report to HPFeeds channel.
        @param results: Cuckoo results dict.
        @raise CuckooReportError: if fails to write report.
        """
		try:
			hpc = hpfeeds.HPC(self.options["host"], self.options["port"], self.options["ident"], self.options["secret"], timeout=60)
			hpc.publish(self.options["channel"], json.dumps(results, sort_keys=False, indent=4))
			hpc.close()
		except hpfeeds.FeedException as e:
			raise CuckooReportError("Failed to publish on HPFeeds channel: %s" % e)