Ejemplo n.º 1
0
 def test_basename(self):
     assert Storage.get_filename_from_path("C:\\a.txt") == "a.txt"
     assert Storage.get_filename_from_path("C:/a.txt") == "a.txt"
     assert Storage.get_filename_from_path("C:\\\x00a.txt") == "\x00a.txt"
     assert Storage.get_filename_from_path("/tmp/a.txt") == "a.txt"
     assert Storage.get_filename_from_path("../../b.txt") == "b.txt"
     assert Storage.get_filename_from_path("..\\..\\c.txt") == "c.txt"
Ejemplo n.º 2
0
 def test_basename(self):
     assert Storage.get_filename_from_path("C:\\a.txt") == "a.txt"
     assert Storage.get_filename_from_path("C:/a.txt") == "a.txt"
     assert Storage.get_filename_from_path("C:\\\x00a.txt") == "\x00a.txt"
     assert Storage.get_filename_from_path("/tmp/a.txt") == "a.txt"
     assert Storage.get_filename_from_path("../../b.txt") == "b.txt"
     assert Storage.get_filename_from_path("..\\..\\c.txt") == "c.txt"
Ejemplo n.º 3
0
    def get_files(self, submit_id, password=None, astree=False):
        """
        Returns files or URLs from a submitted analysis.
        @param password: The password to unlock container archives with
        @param astree: sflock option; determines the format in which the files are returned
        @return: A tree of files
        """
        submit = db.view_submit(submit_id)
        files, duplicates = [], []

        for data in submit.data["data"]:
            if data["type"] == "file":
                filename = Storage.get_filename_from_path(data["data"])
                filepath = os.path.join(submit.tmp_path, filename)

                unpacked = sflock.unpack(
                    filepath=filepath, password=password,
                    duplicates=duplicates
                )

                if astree:
                    unpacked = unpacked.astree(sanitize=True)

                files.append(unpacked)
            elif data["type"] == "url":
                files.append({
                    "filename": data["data"],
                    "filepath": "",
                    "relapath": "",
                    "selected": True,
                    "size": 0,
                    "type": "url",
                    "package": "ie",
                    "extrpath": [],
                    "duplicate": False,
                    "children": [],
                    "mime": "text/html",
                    "finger": {
                        "magic_human": "url",
                        "magic": "url"
                    }
                })
            else:
                raise RuntimeError(
                    "Unknown data entry type: %s" % data["type"]
                )

        return files, submit.data["errors"], submit.data["options"]
Ejemplo n.º 4
0
    def get_files(self, submit_id, password=None, astree=False):
        """
        Returns files or URLs from a submitted analysis.
        @param password: The password to unlock container archives with
        @param astree: sflock option; determines the format in which the files are returned
        @return: A tree of files
        """
        submit = db.view_submit(submit_id)
        files, duplicates = [], []

        for data in submit.data["data"]:
            if data["type"] == "file":
                filename = Storage.get_filename_from_path(data["data"])
                filepath = os.path.join(submit.tmp_path, filename)

                unpacked = sflock.unpack(
                    filepath=filepath, password=password,
                    duplicates=duplicates
                )

                if astree:
                    unpacked = unpacked.astree(sanitize=True)

                files.append(unpacked)
            elif data["type"] == "url":
                files.append({
                    "filename": data["data"],
                    "filepath": "",
                    "relapath": "",
                    "selected": True,
                    "size": 0,
                    "type": "url",
                    "package": "ie",
                    "extrpath": [],
                    "duplicate": False,
                    "children": [],
                    "mime": "text/html",
                    "finger": {
                        "magic_human": "url",
                        "magic": "url"
                    }
                })
            else:
                raise RuntimeError(
                    "Unknown data entry type: %s" % data["type"]
                )

        return files, submit.data["errors"], submit.data["options"]
Ejemplo n.º 5
0
    def pre(self, submit_type, data, options=None):
        """
        The first step to submitting new analysis.
        @param submit_type: "files" or "strings"
        @param data: a list of dicts containing "name" (file name)
                and "data" (file data) or a list of strings (urls or hashes)
        @return: submit id
        """
        if submit_type not in ("strings", "files"):
            log.error("Bad parameter '%s' for submit_type", submit_type)
            return False

        path_tmp = Folders.create_temp()
        submit_data = {
            "data": [],
            "errors": [],
            "options": options or {},
        }

        if submit_type == "strings":
            for line in data:
                self._handle_string(submit_data, path_tmp, line.strip())

        if submit_type == "files":
            for entry in data:
                filename = Storage.get_filename_from_path(entry["name"])
                filepath = Files.create(path_tmp, filename, entry["data"])
                submit_data["data"].append({
                    "type":
                    "file",
                    "data":
                    filepath,
                    "options":
                    self.translate_options_to(entry.get("options", {})),
                })

        return db.add_submit(path_tmp, submit_type, submit_data)
Ejemplo n.º 6
0
    def pre(self, submit_type, data, options=None):
        """
        The first step to submitting new analysis.
        @param submit_type: "files" or "strings"
        @param data: a list of dicts containing "name" (file name)
                and "data" (file data) or a list of strings (urls or hashes)
        @return: submit id
        """
        if submit_type not in ("strings", "files"):
            log.error("Bad parameter '%s' for submit_type", submit_type)
            return False

        path_tmp = Folders.create_temp()
        submit_data = {
            "data": [],
            "errors": [],
            "options": options or {},
        }

        if submit_type == "strings":
            for line in data:
                self._handle_string(submit_data, path_tmp, line.strip())

        if submit_type == "files":
            for entry in data:
                filename = Storage.get_filename_from_path(entry["name"])
                filepath = Files.create(path_tmp, filename, entry["data"])
                submit_data["data"].append({
                    "type": "file",
                    "data": filepath,
                    "options": self.translate_options_to(
                        entry.get("options", {})
                    ),
                })

        return db.add_submit(path_tmp, submit_type, submit_data)
Ejemplo n.º 7
0
    def __iter__(self):
        self.fd.seek(0)

        while True:
            data = self.fd.read(4)
            if not data:
                return

            if len(data) != 4:
                log.critical("BsonParser lacking data.")
                return

            blen = struct.unpack("I", data)[0]
            if blen > MAX_MESSAGE_LENGTH:
                log.critical(
                    "BSON message larger than MAX_MESSAGE_LENGTH, "
                    "stopping handler."
                )
                return

            data += self.fd.read(blen-4)
            if len(data) < blen:
                log.critical("BsonParser lacking data.")
                return

            try:
                dec = bson_decode(data)
            except Exception as e:
                log.warning(
                    "BsonParser decoding problem %s on data[:50] %s",
                    e, repr(data[:50])
                )
                return

            mtype = dec.get("type", "none")
            index = dec.get("I", -1)

            if mtype == "info":
                # API call index info message, explaining the argument names, etc.
                name = dec.get("name", "NONAME")
                arginfo = dec.get("args", [])
                category = dec.get("category")

                argnames, converters = self.determine_unserializers(arginfo)
                self.infomap[index] = name, arginfo, argnames, converters, category

                if dec.get("flags_value"):
                    self.flags_value[name] = {}
                    for arg, values in dec["flags_value"].items():
                        self.flags_value[name][arg] = dict(values)

                if dec.get("flags_bitmask"):
                    self.flags_bitmask[name] = {}
                    for arg, values in dec["flags_bitmask"].items():
                        self.flags_bitmask[name][arg] = values
                continue

            # Handle dumped buffers.
            if mtype == "buffer":
                buf = dec.get("buffer")
                sha1 = dec.get("checksum")
                self.buffer_sha1 = hashlib.sha1(buf).hexdigest()

                # TODO Reimplement storing of buffers. This has not been done
                # yet in the new resultserver

                # Why do we pass along a sha1 checksum again?
                if sha1 != self.buffer_sha1:
                    log.warning("Incorrect sha1 passed along for a buffer.")

                filepath = cwd("buffer", self.buffer_sha1, analysis=self.task_id)
                with open(filepath, "wb") as f:
                    f.write(buf)

                continue

            tid = dec.get("T", 0)
            time = dec.get("t", 0)

            parsed = {
                "type": mtype,
                "tid": tid,
                "time": time,
            }

            if mtype == "debug":
                parsed["message"] = dec.get("msg", "")
                log.info("Debug message from monitor: %s", parsed["message"])
            else:
                # Regular api call from monitor
                if index not in self.infomap:
                    log.warning("Got API with unknown index - monitor needs "
                                "to explain first: {0}".format(dec))
                    continue

                apiname, arginfo, argnames, converters, category = self.infomap[index]
                args = dec.get("args", [])

                if len(args) != len(argnames):
                    log.warning(
                        "Inconsistent arg count (compared to arg names) "
                        "on %s: %s names %s", dec, argnames, apiname
                    )
                    continue

                argdict = {}
                for idx, value in enumerate(args):
                    argdict[argnames[idx]] = converters[idx](value)

                # Special new process message from the monitor.
                if apiname == "__process__":
                    parsed["type"] = "process"

                    if "TimeLow" in argdict:
                        timelow = argdict["TimeLow"]
                        timehigh = argdict["TimeHigh"]

                        parsed["pid"] = pid = argdict["ProcessIdentifier"]
                        parsed["ppid"] = argdict["ParentProcessIdentifier"]
                        modulepath = argdict["ModulePath"]

                    elif "time_low" in argdict:
                        timelow = argdict["time_low"]
                        timehigh = argdict["time_high"]

                        if "pid" in argdict:
                            parsed["pid"] = pid = argdict["pid"]
                            parsed["ppid"] = argdict["ppid"]
                        else:
                            parsed["pid"] = pid = argdict["process_identifier"]
                            parsed["ppid"] = argdict["parent_process_identifier"]

                        modulepath = argdict["module_path"]

                    else:
                        raise CuckooResultError(
                            "I don't recognize the bson log contents."
                        )

                    # FILETIME is 100-nanoseconds from 1601 :/
                    vmtimeunix = (timelow + (timehigh << 32))
                    vmtimeunix = vmtimeunix / 10000000.0 - 11644473600
                    vmtime = datetime.datetime.fromtimestamp(vmtimeunix)
                    parsed["first_seen"] = vmtime

                    procname = Storage.get_filename_from_path(modulepath)
                    parsed["process_path"] = modulepath
                    parsed["process_name"] = procname
                    parsed["command_line"] = argdict.get("command_line")

                    # Is this a 64-bit process?
                    if argdict.get("is_64bit"):
                        self.is_64bit = True

                    # Is this process being "tracked"?
                    parsed["track"] = bool(argdict.get("track", 1))
                    parsed["modules"] = argdict.get("modules", {})

                    self.pid = pid

                elif apiname == "__thread__":
                    parsed["pid"] = pid = argdict["ProcessIdentifier"]

                # elif apiname == "__anomaly__":
                    # tid = argdict["ThreadIdentifier"]
                    # subcategory = argdict["Subcategory"]
                    # msg = argdict["Message"]
                    # self.handler.log_anomaly(subcategory, tid, msg)
                    # return True
                elif apiname == "__action__":
                    parsed["type"] = "action"
                    parsed["action"] = argdict["action"]
                else:
                    parsed["type"] = "apicall"
                    parsed["pid"] = self.pid
                    parsed["api"] = apiname
                    parsed["category"] = category
                    parsed["status"] = argdict.pop("is_success", 1)
                    parsed["return_value"] = argdict.pop("retval", 0)
                    parsed["arguments"] = argdict
                    parsed["flags"] = {}

                    parsed["stacktrace"] = dec.get("s", [])
                    parsed["uniqhash"] = dec.get("h", 0)

                    if "e" in dec and "E" in dec:
                        parsed["last_error"] = dec["e"]
                        parsed["nt_status"] = dec["E"]

                    if apiname in self.flags_value:
                        self.resolve_flags(apiname, argdict, parsed["flags"])

                    if self.buffer_sha1:
                        parsed["buffer"] = self.buffer_sha1
                        self.buffer_sha1 = None

            yield parsed
Ejemplo n.º 8
0
    def __iter__(self):
        self.fd.seek(0)

        while True:
            data = self.fd.read(4)
            if not data:
                return

            if len(data) != 4:
                log.critical("BsonParser lacking data.")
                return

            blen = struct.unpack("I", data)[0]
            if blen > MAX_MESSAGE_LENGTH:
                log.critical(
                    "BSON message larger than MAX_MESSAGE_LENGTH, "
                    "stopping handler."
                )
                return

            data += self.fd.read(blen-4)
            if len(data) < blen:
                log.critical("BsonParser lacking data.")
                return

            try:
                dec = bson_decode(data)
            except Exception as e:
                log.warning(
                    "BsonParser decoding problem %s on data[:50] %s",
                    e, repr(data[:50])
                )
                return

            mtype = dec.get("type", "none")
            index = dec.get("I", -1)

            if mtype == "info":
                # API call index info message, explaining the argument names, etc.
                name = dec.get("name", "NONAME")
                arginfo = dec.get("args", [])
                category = dec.get("category")

                argnames, converters = self.determine_unserializers(arginfo)
                self.infomap[index] = name, arginfo, argnames, converters, category

                if dec.get("flags_value"):
                    self.flags_value[name] = {}
                    for arg, values in dec["flags_value"].items():
                        self.flags_value[name][arg] = dict(values)

                if dec.get("flags_bitmask"):
                    self.flags_bitmask[name] = {}
                    for arg, values in dec["flags_bitmask"].items():
                        self.flags_bitmask[name][arg] = values
                continue

            # Handle dumped buffers.
            if mtype == "buffer":
                buf = dec.get("buffer")
                sha1 = dec.get("checksum")
                self.buffer_sha1 = hashlib.sha1(buf).hexdigest()

                # Why do we pass along a sha1 checksum again?
                if sha1 != self.buffer_sha1:
                    log.warning("Incorrect sha1 passed along for a buffer.")

                # If the parent is netlogs ResultHandler then we actually dump
                # it - this should only be the case during the analysis, any
                # after processing will then be ignored.
                from cuckoo.core.resultserver import ResultHandler

                if isinstance(self.fd, ResultHandler):
                    filepath = os.path.join(
                        self.fd.storagepath, "buffer", self.buffer_sha1
                    )
                    with open(filepath, "wb") as f:
                        f.write(buf)

                continue

            tid = dec.get("T", 0)
            time = dec.get("t", 0)

            parsed = {
                "type": mtype,
                "tid": tid,
                "time": time,
            }

            if mtype == "debug":
                parsed["message"] = dec.get("msg", "")
                log.info("Debug message from monitor: %s", parsed["message"])
            else:
                # Regular api call from monitor
                if index not in self.infomap:
                    log.warning("Got API with unknown index - monitor needs "
                                "to explain first: {0}".format(dec))
                    continue

                apiname, arginfo, argnames, converters, category = self.infomap[index]
                args = dec.get("args", [])

                if len(args) != len(argnames):
                    log.warning(
                        "Inconsistent arg count (compared to arg names) "
                        "on %s: %s names %s", dec, argnames, apiname
                    )
                    continue

                argdict = {}
                for idx, value in enumerate(args):
                    argdict[argnames[idx]] = converters[idx](value)

                # Special new process message from the monitor.
                if apiname == "__process__":
                    parsed["type"] = "process"

                    if "TimeLow" in argdict:
                        timelow = argdict["TimeLow"]
                        timehigh = argdict["TimeHigh"]

                        parsed["pid"] = pid = argdict["ProcessIdentifier"]
                        parsed["ppid"] = argdict["ParentProcessIdentifier"]
                        modulepath = argdict["ModulePath"]

                    elif "time_low" in argdict:
                        timelow = argdict["time_low"]
                        timehigh = argdict["time_high"]

                        if "pid" in argdict:
                            parsed["pid"] = pid = argdict["pid"]
                            parsed["ppid"] = argdict["ppid"]
                        else:
                            parsed["pid"] = pid = argdict["process_identifier"]
                            parsed["ppid"] = argdict["parent_process_identifier"]

                        modulepath = argdict["module_path"]

                    else:
                        raise CuckooResultError(
                            "I don't recognize the bson log contents."
                        )

                    # FILETIME is 100-nanoseconds from 1601 :/
                    vmtimeunix = (timelow + (timehigh << 32))
                    vmtimeunix = vmtimeunix / 10000000.0 - 11644473600
                    vmtime = datetime.datetime.fromtimestamp(vmtimeunix)
                    parsed["first_seen"] = vmtime

                    procname = Storage.get_filename_from_path(modulepath)
                    parsed["process_path"] = modulepath
                    parsed["process_name"] = procname
                    parsed["command_line"] = argdict.get("command_line")

                    # Is this a 64-bit process?
                    if argdict.get("is_64bit"):
                        self.is_64bit = True

                    # Is this process being "tracked"?
                    parsed["track"] = bool(argdict.get("track", 1))
                    parsed["modules"] = argdict.get("modules", {})

                    self.pid = pid

                elif apiname == "__thread__":
                    parsed["pid"] = pid = argdict["ProcessIdentifier"]

                # elif apiname == "__anomaly__":
                    # tid = argdict["ThreadIdentifier"]
                    # subcategory = argdict["Subcategory"]
                    # msg = argdict["Message"]
                    # self.handler.log_anomaly(subcategory, tid, msg)
                    # return True
                elif apiname == "__action__":
                    parsed["type"] = "action"
                    parsed["action"] = argdict["action"]
                else:
                    parsed["type"] = "apicall"
                    parsed["pid"] = self.pid
                    parsed["api"] = apiname
                    parsed["category"] = category
                    parsed["status"] = argdict.pop("is_success", 1)
                    parsed["return_value"] = argdict.pop("retval", 0)
                    parsed["arguments"] = argdict
                    parsed["flags"] = {}

                    parsed["stacktrace"] = dec.get("s", [])
                    parsed["uniqhash"] = dec.get("h", 0)

                    if "e" in dec and "E" in dec:
                        parsed["last_error"] = dec["e"]
                        parsed["nt_status"] = dec["E"]

                    if apiname in self.flags_value:
                        self.resolve_flags(apiname, argdict, parsed["flags"])

                    if self.buffer_sha1:
                        parsed["buffer"] = self.buffer_sha1
                        self.buffer_sha1 = None

            yield parsed