Beispiel #1
0
    def run(self):
        """Run analysis.
        @return: processes infomartion list.
        """
        results = []

        if not os.path.exists(self._logs_path):
            log.warning('Analysis results folder does not exist at path "%s".',
                        self._logs_path)
            return results

        # TODO: this should check the current analysis configuration and raise a warning
        # if injection is enabled and there is no logs folder.
        if len(os.listdir(self._logs_path)) == 0:
            log.info(
                "Analysis results folder does not contain any file or injection was disabled."
            )
            return results

        for file_name in os.listdir(self._logs_path):
            file_path = os.path.join(self._logs_path, file_name)

            # Check if Loop Detection is enabled globally or locally (as an option)
            if cfg_process.loop_detection.enabled or self.options.get(
                    "loop_detection"):
                self.compress_log_file(file_path)

            if os.path.isdir(file_path):
                continue

            # Skipping the current log file if it's too big.
            if os.stat(file_path).st_size > cfg.processing.analysis_size_limit:
                log.warning(
                    "Behavioral log {0} too big to be processed, skipped.".
                    format(file_name))
                continue

            # Invoke parsing of current log file (if ram_boost is enabled, otherwise parsing is done on-demand)
            current_log = ParseProcessLog(file_path)
            if current_log.process_id is None:
                continue

            # If the current log actually contains any data, add its data to
            # the results list.
            results.append({
                "process_id": current_log.process_id,
                "process_name": bytes2str(current_log.process_name),
                "parent_id": current_log.parent_id,
                "module_path": bytes2str(current_log.module_path),
                "first_seen": logtime(current_log.first_seen),
                "calls": current_log.calls,
                "threads": current_log.threads,
                "environ": current_log.environdict,
            })

        # Sort the items in the results list chronologically. In this way we
        # can have a sequential order of spawned processes.
        results.sort(key=lambda process: process["first_seen"])

        return results
Beispiel #2
0
    def log_environ(self, context, environdict):
        """log user/process environment information for later use in behavioral signatures

        @param context: ignored
        @param environdict: dict of the various collected information, which will expand over time
        """
        self.environdict.update(bytes2str(environdict))
Beispiel #3
0
    def run(self):
        """Run extract of printable strings.
        @return: list of printable strings.
        """
        self.key = "strings"
        strings = []

        if self.task["category"] in ("file", "static"):
            if not os.path.exists(self.file_path):
                raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path)

            try:
                data = open(self.file_path, "rb").read()
            except (IOError, OSError) as e:
                raise CuckooProcessingError("Error opening file %s" % e)

            nulltermonly = self.options.get("nullterminated_only", True)
            minchars = self.options.get("minchars", 5)

            endlimit = b""
            if not HAVE_RE2:
                endlimit = b"8192"

            if nulltermonly:
                apat = b"([\x20-\x7e]{" + str(minchars).encode("utf-8") + b"," + endlimit + b"})\x00"
                upat = b"((?:[\x20-\x7e][\x00]){" + str(minchars).encode("utf-8") + b"," + endlimit + b"})\x00\x00"
            else:
                apat = b"[\x20-\x7e]{" + str(minchars).encode("utf-8") + b"," + endlimit + b"}"
                upat = b"(?:[\x20-\x7e][\x00]){" + str(minchars).encode("utf-8") + b"," + endlimit + b"}"

            strings = [bytes2str(string) for string in re.findall(apat, data)]
            for ws in re.findall(upat, data):
                strings.append(str(ws.decode("utf-16le")))

        return strings
Beispiel #4
0
def extract_strings(path, nulltermonly, minchars):
    strings = []

    try:
        data = open(path, "rb").read()
    except (IOError, OSError) as e:
        raise CuckooProcessingError(f"Error opening file {e}")

    endlimit = b""
    if not HAVE_RE2:
        endlimit = b"8192"

    if nulltermonly:
        apat = b"([\x20-\x7e]{" + str(
            minchars).encode() + b"," + endlimit + b"})\x00"
        upat = b"((?:[\x20-\x7e][\x00]){" + str(
            minchars).encode() + b"," + endlimit + b"})\x00\x00"
    else:
        apat = b"[\x20-\x7e]{" + str(
            minchars).encode() + b"," + endlimit + b"}"
        upat = b"(?:[\x20-\x7e][\x00]){" + str(
            minchars).encode() + b"," + endlimit + b"}"

    strings = [bytes2str(string) for string in re.findall(apat, data)]
    for ws in re.findall(upat, data):
        strings.append(str(ws.decode("utf-16le")))

    return strings
Beispiel #5
0
def recon(filename, orig_options, timeout, enforce_timeout):
    filename = filename.lower()
    if not isinstance(filename, str):
        filename = bytes2str(filename)
    if "name" in filename:
        orig_options += ",timeout=400,enforce_timeout=1,procmemdump=1,procdump=1"
        timeout = 400
        enforce_timeout = True

    return orig_options, timeout, enforce_timeout
Beispiel #6
0
def extract_strings(filepath: str, on_demand: bool = False):
    """Extract strings from analyzed file.
    @return: list of printable strings.
    """
    if not processing_cfg.strings.enabled or processing_cfg.strings.on_demand and not on_demand:
        return

    nulltermonly = processing_cfg.strings.nullterminated_only
    minchars = processing_cfg.strings.minchars

    if not os.path.exists(filepath):
        log.error("Sample file doesn't exist: %s", filepath)
        return

    strings = []

    try:
        with open(filepath, "rb") as f:
            data = f.read()
    except (IOError, OSError) as e:
        log.error("Error reading file: %s", e)
        return

    endlimit = b"8192" if not HAVE_RE2 else b""
    if nulltermonly:
        apat = b"([\x20-\x7e]{" + str(
            minchars).encode() + b"," + endlimit + b"})\x00"
        upat = b"((?:[\x20-\x7e][\x00]){" + str(
            minchars).encode() + b"," + endlimit + b"})\x00\x00"
    else:
        apat = b"[\x20-\x7e]{" + str(
            minchars).encode() + b"," + endlimit + b"}"
        upat = b"(?:[\x20-\x7e][\x00]){" + str(
            minchars).encode() + b"," + endlimit + b"}"

    strings = [bytes2str(string) for string in re.findall(apat, data)]
    strings.extend(str(ws.decode("utf-16le")) for ws in re.findall(upat, data))

    return strings
Beispiel #7
0
    def _parse(self, row):
        """Parse log row.
        @param row: row data.
        @return: parsed information dict.
        """
        arguments = []

        try:
            timestamp = row[0]  # Timestamp of current API call invocation.
            thread_id = row[1]  # Thread ID.
            caller = row[2]  # non-system DLL return address
            parentcaller = row[3]  # non-system DLL parent of non-system-DLL return address
            category = row[4]  # Win32 function category.
            api_name = row[5]  # Name of the Windows API.
            repeated = row[6]  # Times log repeated
            status_value = row[7]  # Success or Failure?
            return_value = row[8]  # Value returned by the function.
        except IndexError as e:
            log.debug("Unable to parse process log row: %s", e)
            return None

        # Now walk through the remaining columns, which will contain API
        # arguments.
        for api_arg in row[9:]:
            # Split the argument name with its value based on the separator.
            try:
                arg_name, arg_value = api_arg
            except ValueError as e:
                log.debug("Unable to parse analysis row argument (row=%s): %s", api_arg, e)
                continue

            argument = {"name": arg_name}
            if isinstance(arg_value, bytes):
                arg_value = bytes2str(arg_value)

            if arg_value and isinstance(arg_value, list) and len(arg_value) >= 1 and isinstance(arg_value[0], bytes):
                arg_value = " ".join(bytes2str(arg_value))

            try:
                argument["value"] = convert_to_printable(arg_value, self.conversion_cache)
            except Exception as e:
                log.error(arg_value, exc_info=True)
                continue
            if not self.reporting_mode:
                argument["raw_value"] = arg_value
            pretty = pretty_print_arg(category, api_name, arg_name, argument["value"])
            if pretty:
                argument["pretty_value"] = pretty
            arguments.append(argument)

        call = {
            "timestamp": timestamp,
            "thread_id": str(thread_id),
            "caller": f"0x{default_converter(caller):08x}",
            "parentcaller": f"0x{default_converter(parentcaller):08x}",
            "category": category,
            "api": api_name,
            "status": bool(int(status_value)),
        }

        if isinstance(return_value, int):
            call["return"] = f"0x{default_converter(return_value):08x}"
        else:
            call["return"] = convert_to_printable(str(return_value), self.conversion_cache)

        prettyret = pretty_print_retval(call["status"], call["return"])
        if prettyret:
            call["pretty_return"] = prettyret

        call["arguments"] = arguments
        call["repeated"] = repeated

        # add the thread id to our thread set
        if call["thread_id"] not in self.threads:
            self.threads.append(call["thread_id"])

        return call
Beispiel #8
0
    def _parse(self, row):
        """Parse log row.
        @param row: row data.
        @return: parsed information dict.
        """
        call = {}
        arguments = []

        try:
            timestamp = row[0]  # Timestamp of current API call invocation.
            thread_id = row[1]  # Thread ID.
            caller = row[2]  # non-system DLL return address
            parentcaller = row[
                3]  # non-system DLL parent of non-system-DLL return address
            category = row[4]  # Win32 function category.
            api_name = row[5]  # Name of the Windows API.
            repeated = row[6]  # Times log repeated
            status_value = row[7]  # Success or Failure?
            return_value = row[8]  # Value returned by the function.
        except IndexError as e:
            log.debug("Unable to parse process log row: %s", e)
            return None

        # Now walk through the remaining columns, which will contain API
        # arguments.
        for index in range(9, len(row)):
            argument = {}

            # Split the argument name with its value based on the separator.
            try:
                arg_name, arg_value = row[index]
            except ValueError as e:
                log.debug("Unable to parse analysis row argument (row=%s): %s",
                          row[index], e)
                continue

            argument["name"] = arg_name
            if isinstance(arg_value, bytes):
                arg_value = bytes2str(arg_value)
            argument["value"] = convert_to_printable(str(arg_value),
                                                     self.conversion_cache)
            if not self.reporting_mode:
                argument["raw_value"] = arg_value
            pretty = pretty_print_arg(category, api_name, arg_name,
                                      argument["value"])
            if pretty:
                argument["pretty_value"] = pretty
            arguments.append(argument)

        call["timestamp"] = timestamp
        call["thread_id"] = str(thread_id)
        call["caller"] = "0x%.08x" % default_converter(caller)
        call["parentcaller"] = "0x%.08x" % default_converter(parentcaller)
        call["category"] = category
        call["api"] = api_name
        call["status"] = bool(int(status_value))

        if isinstance(return_value, int) or isinstance(return_value, int):
            call["return"] = "0x%.08x" % default_converter(return_value)
        else:
            call["return"] = convert_to_printable(str(return_value),
                                                  self.conversion_cache)

        prettyret = pretty_print_retval(category, api_name, call["status"],
                                        call["return"])
        if prettyret:
            call["pretty_return"] = prettyret

        call["arguments"] = arguments
        call["repeated"] = repeated

        # add the thread id to our thread set
        if call["thread_id"] not in self.threads:
            self.threads.append(call["thread_id"])

        return call