def get_printable_key_value(self, key, value):
        output = u""
        printkey = key

        if isinstance(value, (str, bytes)):
            output += u"{:20} {}\n".format(printkey, convert_to_unicode(value))
        else:
            for item in value:
                if isinstance(item, (str, bytes)):
                    output += u"{:20} {}\n".format(printkey, convert_to_unicode(item))
                else:
                    output += u"{:20} {}\n".format(printkey, self.format_list(item, key=key))
                printkey = u""

        return output
 def gen_results(self, parser_name, input_file_path):
     """
     Generate JSON results for the given file using the given parser name.
     """
     self.reporter.run_parser(parser_name, input_file_path)
     self.reporter.metadata[INPUT_FILE_PATH] = convert_to_unicode(input_file_path)
     return self.reporter.metadata
    def add_metadata(self, key, value):
        """
        Report a metadata item

        Primary method to report metadata as a result of parsing.

        Args:
            key: string specifying the key of the metadata. Should be one of values specified in fields.json.
            value: string specifying the value of the metadata. Should be a utf-8 encoded string or a unicode object.

        """
        keyu = convert_to_unicode(key)
        if value is None or all(not _value for _value in value):
            logger.warn("no values provided for %s, skipping" % key)
            return

        if keyu not in self.fields:
            raise KeyError('Invalid field name: {}'.format(keyu))

        fieldtype = self.fields[keyu]['type']

        try:
            if fieldtype == "listofstrings":
                self._add_metatadata_listofstrings(keyu, value)

            if fieldtype == "listofstringtuples":
                self._add_metadata_listofstringtuples(keyu, value)

            if fieldtype == "dictofstrings":
                self._add_metadata_dictofstrings(keyu, value)
        except Exception:
            logger.exception("Error adding metadata for key: {}".format(keyu))
def _write_csv(input_files, results, csv_path=None):
    """
    Writes out results as a csv.

    :param input_files: List of filenames for each respective metadata.
    :param results: List of metadata dictionaries.
    :param csv_path: Path to write out csv file, defaults to stdout.

    :raises IOError: If csv could not be written out.
    """
    scan_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    # Add/Teak metadata.
    for inputfilename, metadata in zip(input_files, results):
        # Add scan date.
        metadata[u'scan_date'] = scan_date
        if u'inputfilename' not in metadata:
            metadata[u'inputfilename'] = inputfilename

        # Flatten 'other' entry so nested values get their own columns,
        # are more readable, and easier to individually analyze.
        #
        # Example:
        #   {'other': {"unique_entry": "value", "unique_key": "value2"}}
        #   Results in columns: other, other.unique_entry, other.unique_key
        if u'other' in metadata:
            for sub_key, sub_value in metadata[u'other'].items():
                metadata[u'other.{}'.format(convert_to_unicode(sub_key))] = sub_value
            del metadata[u'other']

        # Split outputfile into multiple fields.
        if u'outputfile' in metadata:
            value = list(zip(*metadata[u'outputfile']))
            metadata[u'outputfile.name'] = value[0]
            metadata[u'outputfile.description'] = value[1]
            metadata[u'outputfile.md5'] = value[2]
            del metadata[u'outputfile']

    # Sort columns, but with PREFIX_COLUMNS showing up first.
    column_names = set(itertools.chain(*(metadata.keys() for metadata in results)))
    column_names = sorted(
        column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else str(x))

    # Reformat metadata and write to CSV
    if csv_path is None:
        csvfile = sys.stdout
    else:
        csvfile = open(csv_path, 'wb' if six.PY2 else 'w')

    try:
        dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n')
        dw.writeheader()
        for metadata in results:
            dw.writerow({k: _format_metadata_value(v) for k, v in metadata.items()})
    finally:
        if csv_path:
            csvfile.close()
def _format_metadata_value(v):
    """Formats metadata value to a human readable unicode string."""
    if isinstance(v, (list, tuple)):
        result = u''
        for j in v:
            if not isinstance(j, (bytes, str)):
                result += u'{}\n'.format(u', '.join(map(convert_to_unicode, j)))
            else:
                result += u'{}\n'.format(convert_to_unicode(j))
        return result.rstrip()
    elif isinstance(v, dict):
        result = u''
        for field, value in iteritems(v):
            if isinstance(value, (list, tuple)):
                value = u'[{}]'.format(u', '.join(value))

            result += u'{}: {}\n'.format(field, value)
        return result.rstrip()
    else:
        return convert_to_unicode(v)
def _write_csv(input_files, results, csv_path, base64_outputfiles=False):
    """
    Writes out results as a csv.

    :param input_files: List of filenames for each respective metadata.
    :param results: List of metadata dictionaries.
    :param csv_path: Path to write out csv file.
    :param base64_outputfiles: Whether to include base64 outputfiles.
    :raises IOError: If csv could not be written out.
    """
    scan_date = time.ctime()

    # Add/Teak metadata.
    for inputfilename, metadata in zip(input_files, results):
        # Add scan date.
        metadata['scan_date'] = scan_date
        if 'inputfilename' not in metadata:
            metadata['inputfilename'] = inputfilename

        # Flatten 'other' entry so nested values get their own columns,
        # are more readable, and easier to individually analyze.
        #
        # Example:
        #   {'other': {"unique_entry": "value", "unique_key": "value2"}}
        #   Results in columns: other, other.unique_entry, other.unique_key
        if 'other' in metadata:
            for sub_key, sub_value in metadata['other'].items():
                metadata['other.{}'.format(convert_to_unicode(sub_key))] = sub_value
            del metadata['other']

        # Split outputfile into multiple fields.
        if 'outputfile' in metadata:
            value = list(zip(*metadata['outputfile']))
            metadata['outputfile.name'] = value[0]
            metadata['outputfile.description'] = value[1]
            metadata['outputfile.md5'] = value[2]
            if len(value) > 3 and base64_outputfiles:
                metadata['outputfile.base64'] = value[3]
            del metadata['outputfile']

    # Sort columns, but with PREFIX_COLUMNS showing up first.
    column_names = set(itertools.chain(*(metadata.keys() for metadata in results)))
    column_names = sorted(
        column_names, key=lambda x: str(_STD_CSV_COLUMNS.index(x)) if x in _STD_CSV_COLUMNS else x)

    # Reformat metadata and write to CSV
    with open(csv_path, 'wb' if sys.version_info.major < 3 else 'w') as csvfile:
        dw = csv.DictWriter(csvfile, fieldnames=column_names, lineterminator='\n')
        dw.writeheader()
        for metadata in results:
            dw.writerow({k: _format_metadata_value(v) for k, v in metadata.items()})
 def _add_metadata_dictofstrings(self, key, value):
     # check for type of other?
     for subkey, subvalue in value.items():
         if isinstance(subvalue, (bytes, str)):
             subkey = convert_to_unicode(subkey)
             subvalue = convert_to_unicode(subvalue)
             obj = self.metadata.setdefault(key, {})
             if subkey in obj:
                 # this key already exists, we don't want to clobber so
                 # we turn into list?
                 existing_value = obj[subkey]
                 if isinstance(existing_value, list):
                     if subvalue not in obj[subkey]:
                         obj[subkey].append(subvalue)
                 elif subvalue != existing_value:
                     obj[subkey] = [existing_value, subvalue]
             else:
                 # normal insert of single value
                 obj[subkey] = subvalue
         else:
             # TODO: support inserts of lists (assuming members are strings)?
             logger.warn("Could not add object of %s to metadata under other using key %s" % (
                 str(type(subvalue[subkey])), subkey))
    def get_report(self, json=False, tabs=1):
        """
        If json parameter is False, get report as a unicode string.
        If json parameter is True, get report as a dictionary.
        """

        if json:
            return self.__dict__
        else:
            tab = tabs * "\t"
            tab_1 = tab + "\t"
            tab_2 = tab_1 + "\t"
            report = tab + "{}:\n".format(self.field)
            report += tab_1 + "Passed: {}\n".format(self.passed)
            if self.missing:
                report += tab_1 + "Missing From New Results:\n"
                for item in self.missing:
                    report += tab_2 + "{}\n".format(convert_to_unicode(item))
            if self.unexpected:
                report += tab_1 + "Unexpected New Results:\n"
                for item in self.unexpected:
                    report += tab_2 + "{}\n".format(convert_to_unicode(item))

            return report
    def file_path(self):
        """
        Returns a full file path to the file object.
        This is useful for when you want to use this file on libraries which require
        a file path instead of data or file-like object (e.g. cabinet).
        Always create a temporary file, this avoids issues where the identify function requires the file_path and
        the file would be output before a description is set.
        """
        if not self._file_path:
            safe_file_name = convert_to_unicode(self.md5)
            file_path = os.path.join(self.reporter.managed_tempdir, safe_file_name)
            with open(file_path, 'wb') as file_object:
                file_object.write(self.file_data)
            self._file_path = file_path

        return self._file_path
    def __init__(
            self, file_data, reporter, pe=None, file_name=None, def_stub=None,
            description=None, output_file=True, use_supplied_fname=True, use_arch=False,
            ext='.bin'):
        """
        Initializes the FileObject.

        :param bytes file_data: Data for the file.
        :param pefile.PE pe: PE object for the file.
        :param mwcp.Reporter reporter: MWCP reporter.
        :param str file_name: File name to use if file is not a PE or use_supplied_fname was specified.
        :param str description: Description of the file object.
        :param bool output_file: Boolean indicating if file should be outputted when the dispatcher process the file.
        :param bool use_supplied_fname: Boolean indicating if the file_name should be used even if the file is a PE.
        :param str def_stub: def_stub argument to pass to obtain_original_filename()
        :param bool use_arch: use_arch argument to pass to obtain_original_filename()
        :param str ext: default extension to use if not determined from pe file.
        """
        self._file_path = None
        self._md5 = None
        self._sha1 = None
        self._sha256 = None
        self._stack_strings = None
        self._resources = None
        self._elf = None
        self._elf_attempt = False
        self.output_file = output_file
        self._outputted_file = False
        self._kordesii_cache = {}
        self.parent = None   # Parent FileObject from which FileObject was extracted from (this is set externally).
        self.parser = None   # This will be set by the dispatcher.
        self.file_data = file_data
        self.reporter = reporter
        self.description = description
        self.knowledge_base = {}

        self.pe = pe or pefileutils.obtain_pe(file_data)

        use_supplied_fname = use_supplied_fname or not self.pe

        if file_name and use_supplied_fname:
            self._file_name = file_name
        else:
            self._file_name = pefileutils.obtain_original_filename(
                def_stub or self.md5, pe=self.pe, use_arch=use_arch, ext=ext)
        self._file_name = convert_to_unicode(self._file_name)
    def _compare_results_field(self, results_a, results_b, field_name):
        """
        Compare the values for a single results field in the two passed in results.

        Args:
            results_a (dict): MWCP generated result for a given file using a given parser.
            results_b (dict): MWCP generated result for a given file using a given parser.
        """

        # Check if provided field_name is a valid key (based on fields.json)
        try:
            field_name_u = convert_to_unicode(field_name)
        except:
            raise Exception(
                "Failed to convert field name '{}' to unicode.".format(field_name))

        try:
            field_type = self._reporter.fields[field_name_u]['type']
        except:
            raise Exception(
                "Key error. Field name '{}' was not identified as a standardized field.".format(field_name))

        # Establish value to send for comparison
        value_a = None
        value_b = None
        if field_name_u in results_a:
            value_a = results_a[field_name_u]
        if field_name_u in results_b:
            value_b = results_b[field_name_u]

        # Now compare results based on field type (see "fields.json" for more
        # details)
        if field_type == "listofstrings":
            comparer = ListOfStringsComparer(field_name_u)
            comparer.compare(value_a, value_b)
        elif field_type == "listofstringtuples":
            comparer = ListOfStringTuplesComparer(field_name_u)
            comparer.compare(value_a, value_b)
        elif field_type == "dictofstrings":
            comparer = DictOfStringsComparer(field_name_u)
            comparer.compare(value_a, value_b)
        else:
            raise Exception("Unhandled field type '{}' found for field name '{}'.".format(
                field_type, field_name))

        return comparer
    def run(self):
        """Run test case."""
        start_time = default_timer()

        self._reporter.run_parser(self.parser, self.input_file_path)
        self._reporter.metadata[INPUT_FILE_PATH] = convert_to_unicode(self.input_file_path)
        results = self._reporter.metadata

        comparer_results = self._compare_results(self.expected_results, results)
        passed = all(comparer.passed for comparer in comparer_results)

        done_time = default_timer()
        run_time = done_time - start_time

        return TestResult(
            parser=self.parser,
            input_file_path=self.input_file_path,
            passed=passed,
            errors=self._reporter.errors,
            debug=self._reporter.metadata.get('debug', None),
            results=comparer_results,
            run_time=run_time
        )
    def _add_metatadata_listofstrings(self, key, value):
        if not value:
            logger.info("no values provided for {}, skipping".format(key))
            return
        value = convert_to_unicode(value)
        obj = self.metadata.setdefault(key, [])
        if key == 'debug' or value not in obj:
            obj.append(value)

        if key == "filepath":
            # use ntpath instead of os.path so we are consistent across platforms. ntpath
            # should work for both windows and unix paths. os.path works for the platform
            # you are running on, not necessarily what the malware was written for.
            # Ex. when running mwcp on linux to process windows
            # malware, os.path will fail due to not handling
            # backslashes correctly.
            self.add_metadata("filename", ntpath.basename(value))
            self.add_metadata("directory", ntpath.dirname(value))

        if key == "c2_url":
            self.add_metadata("url", value)

        if key in ("c2_address", "proxy_address"):
            self.add_metadata("address", value)

        if key == "serviceimage":
            # we use tactic of looking for first .exe in value. This is
            # not guaranteed to be reliable
            if '.exe' in value:
                self.add_metadata("filepath", value[
                                              0:value.find('.exe') + 4])

        if key == "servicedll":
            self.add_metadata("filepath", value)

        if key == "ssl_cer_sha1":
            if not self.SHA1_RE.match(value):
                logger.error("Invalid SHA1 hash found: {!r}".format(value))

        if key in ("url", "c2_url"):
            # http://[fe80::20c:1234:5678:9abc]:80/badness
            # http://bad.com:80
            # ftp://127.0.0.1/really/bad?hostname=pwned
            match = self.URL_RE.search(value)
            if not match:
                logger.error("Error parsing as url: %s" % value)
                return

            if match.group("path"):
                self.add_metadata("urlpath", match.group("path"))

            if match.group("address"):
                address = match.group("address").rstrip(': ')
                if address.startswith("["):
                    # ipv6--something like
                    # [fe80::20c:1234:5678:9abc]:80
                    domain, found, port = address[1:].partition(']:')
                else:
                    domain, found, port = address.partition(":")

                if found:
                    if port:
                        if key == "c2_url":
                            self.add_metadata("c2_socketaddress", [domain, port, "tcp"])
                        else:
                            self.add_metadata("socketaddress", [domain, port, "tcp"])
                    else:
                        logger.error("Invalid URL {!r} found ':' at end without a port.".format(address))
                else:
                    if key == "c2_url":
                        self.add_metadata("c2_address", address)
                    else:
                        self.add_metadata("address", address)
Example #14
0
    def _add_metatadata_listofstrings(self, key, value):
        if not value:
            logger.error("no values provided for {}, skipping".format(key))
            return
        value = convert_to_unicode(value)
        obj = self.metadata.setdefault(key, [])
        if self._disable_value_dedup or key == 'debug' or value not in obj:
            obj.append(value)

        if self._disable_auto_subfield_parsing:
            return

        if key == "filepath":
            # use ntpath instead of os.path so we are consistent across platforms. ntpath
            # should work for both windows and unix paths. os.path works for the platform
            # you are running on, not necessarily what the malware was written for.
            # Ex. when running mwcp on linux to process windows
            # malware, os.path will fail due to not handling
            # backslashes correctly.
            self.add_metadata("filename", ntpath.basename(value))
            self.add_metadata("directory", ntpath.dirname(value))

        if key == "c2_url":
            self.add_metadata("url", value)

        if key in ("c2_address", "proxy_address"):
            self.add_metadata("address", value)

        if key == "serviceimage":
            # we use tactic of looking for first .exe in value. This is
            # not guaranteed to be reliable
            if '.exe' in value:
                self.add_metadata("filepath", value[
                                  0:value.find('.exe') + 4])

        if key == "servicedll":
            self.add_metadata("filepath", value)

        if key == "ssl_cer_sha1":
            if not self.SHA1_RE.match(value):
                logger.error("Invalid SHA1 hash found: {!r}".format(value))

        if key in ("url", "c2_url"):
            # http://[fe80::20c:1234:5678:9abc]:80/badness
            # http://bad.com:80
            # ftp://127.0.0.1/really/bad?hostname=pwned
            match = self.URL_RE.search(value)
            if not match:
                logger.error("Error parsing as url: %s" % value)
                return

            if match.group("path"):
                self.add_metadata("urlpath", match.group("path"))

            if match.group("address"):
                address = match.group("address").rstrip(': ')
                if address.startswith("["):
                    # ipv6--something like
                    # [fe80::20c:1234:5678:9abc]:80
                    domain, found, port = address[1:].partition(']:')
                else:
                    domain, found, port = address.partition(":")

                if found:
                    if port:
                        if key == "c2_url":
                            self.add_metadata("c2_socketaddress", [domain, port, "tcp"])
                        else:
                            self.add_metadata("socketaddress", [domain, port, "tcp"])
                    else:
                        logger.error("Invalid URL {!r} found ':' at end without a port.".format(address))
                else:
                    if key == "c2_url":
                        self.add_metadata("c2_address", address)
                    else:
                        self.add_metadata("address", address)
Example #15
0
 def file_name(self, value):
     # If someone changes the name, record the rename.
     value = convert_to_unicode(value)
     if self._file_name != value:
         self.reporter.debug('[*] Renamed {} to {}'.format(self._file_name, value))
     self._file_name = value
 def file_name(self, value):
     # If someone changes the name, record the rename.
     value = convert_to_unicode(value)
     if self._file_name != value:
         logger.info('Renamed {} to {}'.format(self._file_name, value))
     self._file_name = value
Example #17
0
def _write_csv(input_files, results, csv_path=None):
    """
    Writes out results as a csv.

    :param input_files: List of filenames for each respective metadata.
    :param results: List of metadata dictionaries.
    :param csv_path: Path to write out csv file, defaults to stdout.

    :raises IOError: If csv could not be written out.
    """
    scan_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

    # Add/Teak metadata.
    for inputfilename, metadata in zip(input_files, results):
        # Add scan date.
        metadata[u'scan_date'] = scan_date
        if u'inputfilename' not in metadata:
            metadata[u'inputfilename'] = inputfilename

        # Flatten 'other' entry so nested values get their own columns,
        # are more readable, and easier to individually analyze.
        #
        # Example:
        #   {'other': {"unique_entry": "value", "unique_key": "value2"}}
        #   Results in columns: other, other.unique_entry, other.unique_key
        if u'other' in metadata:
            for sub_key, sub_value in metadata[u'other'].items():
                metadata[u'other.{}'.format(
                    convert_to_unicode(sub_key))] = sub_value
            del metadata[u'other']

        # Split outputfile into multiple fields.
        if u'outputfile' in metadata:
            value = list(zip(*metadata[u'outputfile']))
            metadata[u'outputfile.name'] = value[0]
            metadata[u'outputfile.description'] = value[1]
            metadata[u'outputfile.md5'] = value[2]
            del metadata[u'outputfile']

    # Sort columns, but with PREFIX_COLUMNS showing up first.
    column_names = set(
        itertools.chain(*(metadata.keys() for metadata in results)))
    column_names = sorted(column_names,
                          key=lambda x: str(_STD_CSV_COLUMNS.index(x))
                          if x in _STD_CSV_COLUMNS else str(x))

    # Reformat metadata and write to CSV
    if csv_path is None:
        csvfile = sys.stdout
    else:
        csvfile = open(csv_path, 'wb' if six.PY2 else 'w')

    try:
        dw = csv.DictWriter(csvfile,
                            fieldnames=column_names,
                            lineterminator='\n')
        dw.writeheader()
        for metadata in results:
            dw.writerow(
                {k: _format_metadata_value(v)
                 for k, v in metadata.items()})
    finally:
        if csv_path:
            csvfile.close()
Example #18
0
 def file_name(self, value):
     # If someone changes the name, record the rename.
     value = convert_to_unicode(value)
     if self._file_name != value:
         logger.info("Renamed {} to {}".format(self._file_name, value))
     self._file_name = value
Example #19
0
    def __init__(
        self,
        file_data: bytes,
        reporter,
        pe: pefile.PE = None,
        file_name=None,
        def_stub=None,
        description=None,
        output_file=True,
        use_supplied_fname=True,
        use_arch=False,
        ext=".bin",
    ):
        """
        Initializes the FileObject.

        :param bytes file_data: Data for the file.
        :param pefile.PE pe: PE object for the file.
        :param mwcp.Reporter reporter: MWCP reporter.
        :param str file_name: File name to use if file is not a PE or use_supplied_fname was specified.
        :param str description: Description of the file object.
        :param bool output_file: Boolean indicating if file should be outputted when the dispatcher process the file.
        :param bool use_supplied_fname: Boolean indicating if the file_name should be used even if the file is a PE.
        :param str def_stub: def_stub argument to pass to obtain_original_filename()
        :param bool use_arch: use_arch argument to pass to obtain_original_filename()
        :param str ext: default extension to use if not determined from pe file.
        """
        # Ensure we are getting a bytes string. Libraries like pefile depend on this.
        if not isinstance(file_data, bytes):
            raise TypeError("file_data must be a bytes string.")

        self._file_path = None
        self._md5 = None
        self._sha1 = None
        self._sha256 = None
        self._stack_strings = None
        self._static_strings = None
        self._resources = None
        self._elf = None
        self._elf_attempt = False
        self.output_file = output_file
        self._outputted_file = False
        self._kordesii_cache = {}
        self.parent = None  # Parent FileObject from which FileObject was extracted from (this is set externally).
        self.parser = None  # This will be set by the dispatcher.
        self.children = []  # List of residual FileObject
        self.file_data = file_data
        self.reporter = reporter
        self.description = description
        self.knowledge_base = {}

        self.pe = pe or pefileutils.obtain_pe(file_data)

        use_supplied_fname = use_supplied_fname or not self.pe

        if file_name and use_supplied_fname:
            self._file_name = file_name
        else:
            self._file_name = pefileutils.obtain_original_filename(
                def_stub or self.md5, pe=self.pe, use_arch=use_arch, ext=ext
            )
        self._file_name = convert_to_unicode(self._file_name)