Esempio n. 1
0
def _get_meta_data(raw):
    meta = {}
    for item in META_FIELDS:
        tmp = re.search(b'%%%? ?(' + make_bytes(item) + b'):([\\w=., -]+)', raw)
        if tmp:
            meta[make_unicode_string(tmp.group(1))] = remove_uneccessary_spaces(make_unicode_string(tmp.group(2)))
    return meta
Esempio n. 2
0
def _get_type_from_magic_object(path_or_binary, magic_object, function_name, mime=True):
    try:
        if isinstance(magic_object, magic.Magic):
            result = make_unicode_string(getattr(magic_object, function_name)(path_or_binary))
        else:
            result = make_unicode_string(getattr(magic_object, function_name)(path_or_binary, mime=mime))
    except FileNotFoundError as e:
        logging.error('File not found: {}'.format(e))
        result = 'error/file-not-found' if mime else 'Error: File not in storage!'
    except Exception as exception:
        logging.error('Could not determine file type: {} {}'.format(type(exception), str(exception)))
        result = 'application/octet-stream' if mime else 'data'
    return result
Esempio n. 3
0
 def _app_get_binary_search_results(self):
     firmware_dict, error, yara_rules = None, None, None
     if request.args.get('request_id'):
         request_id = request.args.get('request_id')
         with ConnectTo(InterComFrontEndBinding,
                        self._config) as connection:
             result, yara_rules = connection.get_binary_search_result(
                 request_id)
         if isinstance(result, str):
             error = result
         elif result is not None:
             yara_rules = make_unicode_string(yara_rules[0])
             joined_results = self._join_results(result)
             query_uid = self._store_binary_search_query(
                 joined_results, yara_rules)
             return redirect(
                 url_for('database/browse',
                         query=query_uid,
                         only_firmwares=request.args.get('only_firmware')))
     else:
         error = 'No request ID found'
         request_id = None
     return render_template('database/database_binary_search_results.html',
                            result=firmware_dict,
                            error=error,
                            request_id=request_id,
                            yara_rules=yara_rules)
Esempio n. 4
0
 def process_object(self, file_object):
     if self._is_text_file(file_object) and (file_object.file_name
                                             not in FILE_IGNORES):
         file_path = self._get_file_path(file_object)
         self.content = make_unicode_string(file_object.binary)
         if '/inittab' in file_path:
             file_object.processed_analysis[
                 self.NAME] = self._get_inittab_config(file_object)
         if 'systemd/system/' in file_path:
             file_object.processed_analysis[
                 self.NAME] = self._get_systemd_config(file_object)
         if file_path.endswith(('etc/rc', 'etc/rc.local',
                                'etc/rc.firsttime', 'etc/rc.securelevel')):
             file_object.processed_analysis[
                 self.NAME] = self._get_rc_config(file_object)
         if file_path.endswith('etc/initscript'):
             file_object.processed_analysis[
                 self.NAME] = self._get_initscript_config(file_object)
         if 'etc/init/' in file_path or 'etc/event.d/' in file_path:
             file_object.processed_analysis[
                 self.NAME] = self._get_upstart_config(file_object)
         if 'etc/service/' in file_path or 'etc/sv/' in file_path:
             file_object.processed_analysis[
                 self.NAME] = self._get_runit_config(file_object)
         if 'etc/init.d/' in file_path or 'etc/rc.d/' in file_path:
             file_object.processed_analysis[
                 self.NAME] = self._get_sysvinit_config(file_object)
     else:
         file_object.processed_analysis[self.NAME] = dict()
         file_object.processed_analysis[self.NAME]['summary'] = list()
     return file_object
Esempio n. 5
0
 def generate_and_store_file_objects(self, file_paths: List[Path],
                                     extractor_dir: str,
                                     parent: FileObject):
     extracted_files = {}
     for item in file_paths:
         if not file_is_empty(item):
             current_file = FileObject(file_path=str(item))
             current_virtual_path = '{}|{}|{}'.format(
                 parent.get_base_of_virtual_path(
                     parent.get_virtual_file_paths()[
                         parent.get_root_uid()][0]), parent.get_uid(),
                 get_object_path_excluding_fact_dirs(
                     make_unicode_string(str(item)),
                     str(Path(extractor_dir, 'files'))))
             current_file.temporary_data[
                 'parent_fo_type'] = get_file_type_from_path(
                     parent.file_path)['mime']
             if current_file.get_uid(
             ) in extracted_files:  # the same file is extracted multiple times from one archive
                 extracted_files[current_file.get_uid()].virtual_file_path[
                     parent.get_root_uid()].append(current_virtual_path)
             else:
                 self.db_interface.set_unpacking_lock(current_file.uid)
                 self.file_storage_system.store_file(current_file)
                 current_file.virtual_file_path = {
                     parent.get_root_uid(): [current_virtual_path]
                 }
                 current_file.parent_firmware_uids.add(
                     parent.get_root_uid())
                 extracted_files[current_file.get_uid()] = current_file
     return extracted_files
Esempio n. 6
0
 def get_version_for_component(self, result):
     versions = set()
     for matched_string in result['strings']:
         match = matched_string[2]
         match = make_unicode_string(match)
         versions.add(self.get_version(match))
     result['meta']['version'] = list(versions)
     return result
Esempio n. 7
0
def read_ssl_cert(binary=None, start=None, end=None):
    try:
        cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
                                               binary[start:end + 25])
        return make_unicode_string(
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_TEXT,
                                            cert))
    except OpenSSL.crypto.Error:
        logging.debug(
            'Found SSL certificate signature, but looks false positive')
        return None
Esempio n. 8
0
def get_matched_strings_dict(matched_string_list):
    '''
    returns a dict {'MATCHED_STRING': [OFFSET_1, OFFSET_2]}
    '''
    string_dict = {}
    for match in matched_string_list:
        current_matched_string = make_unicode_string(match[2])
        current_matched_string = get_save_key_name(current_matched_string)
        if current_matched_string not in string_dict:
            string_dict[current_matched_string] = []
        string_dict[current_matched_string].append(match[0])
    return string_dict
Esempio n. 9
0
def read_asn1_key(binary=None, offset=None):
    if binary[offset] not in TLV_KNOWN_STARTS:
        return None
    start, size = _get_start_and_size_of_der_field(binary=binary,
                                                   offset=offset)
    try:
        key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_ASN1,
                                             binary[offset:start + size])
        text_key = make_unicode_string(
            OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_TEXT, key))
        return text_key
    except OpenSSL.crypto.Error:
        logging.debug('Found PKCS#8 key signature, but looks false positive')
        return None
Esempio n. 10
0
def read_pkcs_cert(binary=None, offset=None):
    if binary[offset] not in TLV_KNOWN_STARTS:
        return None
    start, size = _get_start_and_size_of_der_field(binary=binary,
                                                   offset=offset)
    try:
        x509_cert = OpenSSL.crypto.load_pkcs12(
            buffer=binary[offset:start + size]).get_certificate()
        return make_unicode_string(
            OpenSSL.crypto.dump_certificate(type=OpenSSL.crypto.FILETYPE_TEXT,
                                            cert=x509_cert))
    except OpenSSL.crypto.Error:
        logging.debug(
            'Found PKCS#12 certificate, but passphrase is missing or false positive.'
        )
        return None
Esempio n. 11
0
 def _app_show_binary_search_results(self):
     firmware_dict, error, yara_rules = None, None, None
     if request.args.get('request_id'):
         request_id = request.args.get('request_id')
         with ConnectTo(InterComFrontEndBinding, self._config) as connection:
             result, yara_rules = connection.get_binary_search_result(request_id)
         if isinstance(result, str):
             error = result
         elif result is not None:
             yara_rules = make_unicode_string(yara_rules)
             firmware_dict = self._build_firmware_dict_for_binary_search(result)
     else:
         error = 'No request ID found'
         request_id = None
     return render_template('database/database_binary_search_results.html', result=firmware_dict, error=error,
                            request_id=request_id, yara_rules=yara_rules)
Esempio n. 12
0
    def process_object(self, file_object):
        result = {}
        tmp_dir = TemporaryDirectory(prefix='faf_analysis_binwalk_')
        dir_path = tmp_dir.name

        signature_analysis_result = execute_shell_command('(cd {} && xvfb-run -a binwalk -BEJ {})'.format(dir_path, file_object.file_path))
        result['signature_analysis'] = make_unicode_string(signature_analysis_result)

        result['summary'] = list(set(self._extract_summary(result['signature_analysis'])))

        pic_path = os.path.join(dir_path, '{}.png'.format(os.path.basename(file_object.file_path)))
        result['entropy_analysis_graph'] = get_binary_from_file(pic_path)

        tmp_dir.cleanup()
        file_object.processed_analysis[self.NAME] = result
        return file_object
Esempio n. 13
0
 def generate_and_store_file_objects(self, file_paths, tmp_dir, parent):
     extracted_files = {}
     for item in file_paths:
         if not file_is_empty(item):
             current_file = FileObject(file_path=item)
             current_virtual_path = '{}|{}|{}'.format(
                 parent.get_base_of_virtual_path(parent.get_virtual_file_paths()[parent.get_root_uid()][0]),
                 parent.get_uid(), get_chroot_path_excluding_extracted_dir(make_unicode_string(item), tmp_dir)
             )
             if current_file.get_uid() in extracted_files:  # the same file is extracted multiple times from one archive
                 extracted_files[current_file.get_uid()].virtual_file_path[parent.get_root_uid()].append(current_virtual_path)
             else:
                 self.file_storage_system.store_file(current_file)
                 current_file.virtual_file_path = {parent.get_root_uid(): [current_virtual_path]}
                 current_file.parent_firmware_uids.add(parent.get_root_uid())
                 extracted_files[current_file.get_uid()] = current_file
     return extracted_files
Esempio n. 14
0
 def get_version_for_component(self, result, file_object: FileObject):
     versions = set()
     for matched_string in result['strings']:
         match = matched_string[2]
         match = make_unicode_string(match)
         versions.add(self.get_version(match, result['meta']))
     if result['meta'].get('format_string'):
         key_strings = [
             s.decode() for _, _, s in result['strings'] if b'%s' in s
         ]
         if key_strings:
             versions.update(
                 extract_data_from_ghidra(file_object.binary, key_strings))
     if '' in versions and len(
             versions
     ) > 1:  # if there are actual version results, remove the "empty" result
         versions.remove('')
     result['meta']['version'] = list(versions)
     return result
Esempio n. 15
0
def bytes_to_str_filter(string):
    return make_unicode_string(string)
Esempio n. 16
0
 def set_name(self, name):
     self.file_name = make_unicode_string(name)
Esempio n. 17
0
 def test_string_from_bytes_decoding_error(self):
     test_string = b'\xf5 test string'
     result = make_unicode_string(test_string)
     self.assertEqual(result, '� test string', 'string not correct')
Esempio n. 18
0
 def test_string_from_other_object(self):
     test_string = ['test string']
     result = make_unicode_string(test_string)
     self.assertEqual(result, '[\'test string\']', 'string not correct')
Esempio n. 19
0
 def check_type_and_content(self, input_data):
     self.assertIsInstance(make_unicode_string(input_data), str,
                           "type is correct")
     self.assertEqual(make_unicode_string(input_data), "test string",
                      "content correct")
Esempio n. 20
0
 def test_string_from_bytes_none_unicode_character(self):
     test_string = b'\xfc test string'
     result = make_unicode_string(test_string)
     self.assertEqual(result, "ü test string", "string not correct")
Esempio n. 21
0
 def _get_identifier(self):
     self.meta['Extended DSK Identifier'] = make_unicode_string(
         self.raw[0:self.dsk_file_postion])
def test_make_unicode_string(input_data, expected):
    result = make_unicode_string(input_data)
    assert isinstance(result, str)
    assert result == expected
Esempio n. 23
0
 def create_binary_from_path(self) -> None:
     if self.file_path is not None:
         if self.binary is None:
             self._create_from_file(self.file_path)
         if self.file_name is None:
             self.file_name = make_unicode_string(Path(self.file_path).name)
Esempio n. 24
0
 def test_string_from_bytes_unicode_character(self):
     test_string = b'\xc3\xbc test string'
     result = make_unicode_string(test_string)
     self.assertEqual(result, 'ü test string', 'string not correct')
Esempio n. 25
0
    def __init__(
            self,
            binary: Optional[bytes] = None,
            file_name: Optional[str] = None,
            file_path: Optional[str] = None,
            scheduled_analysis: List[str] = None
    ):
        self._uid = None

        #: The set of files included in this file. This is usually true for archives.
        #: Only lists the next layer, not recursively included files on lower extraction layers.
        self.files_included = set()

        #: The list of all recursively included files in this file.
        #: That means files are included that are themselves included in files contained in this file, and so on.
        #: This value is not set by default as it's expensive to aggregate and takes up a lot of memory.
        self.list_of_all_included_files = None

        #: List of parent uids.
        #: A parent in this context is the direct predecessor in a firmware tree.
        #: Not necessarily it's root.
        self.parents = []

        #: UID of root (i.e. firmware) object for the given file.
        #: Useful to associate results of children with firmware.
        #: This value might not be set at all times (cf. :func:`get_root_uid`).
        self.root_uid = None

        #: Extraction depth of this object. If outer firmware file, this is 0.
        #: Every extraction increments this by one.
        #: For a file inside a squashfs, that is contained inside a tar archive this would be 1 (tar) + 1 (fs) = 2.
        self.depth = 0

        #: Analysis results for this file.
        #:
        #: Structure of results:
        #: The first level of this dict is a pair of ``'plugin_name': <result_dict>`` pairs.
        #: The result dict can have any content, but always has at least the fields:
        #:
        #: * analysis_date - float representing the time of analysis in unix time.
        #: * plugin_version - str defining the version of each plugin at time of analysis.
        #: * summary - list holding a summary of each file's result, that can be aggregated.
        self.processed_analysis = {}

        #: List of plugins that are scheduled to be run on this file.
        self.scheduled_analysis = scheduled_analysis

        #: List of comments that have been made on this file.
        #: Comments are dicts with the keys time (float), author (str) and comment (str).
        self.comments = []

        #: Set of parent firmware uids.
        #: Parent uids are from the root object, this file belongs to, not its direct predecessor.
        #: Thus, as a file can be part of multiple firmware images, this field is a set.
        #: This field should be closely related to the keys in the virtual file path field.
        self.parent_firmware_uids = set()

        #: This field can be used for arbitrary temporary storage.
        #: It will not be persisted to the database, so it dies after the analysis cycle.
        self.temporary_data = {}

        #: Analysis tags for this file.
        #: An analysis tag has the structure
        #: ``{tag_name: {'value': value, 'color': color, 'propagate': propagate,}, 'root_uid': root uid}``
        #: while the first layer of this dict is a key for each plugin.
        #: So in total you have a dict ``{plugin: [tags, of, plugin], ..}``.
        self.analysis_tags = {}

        #: If an exception occurred during analysis, this fields stores a tuple
        #: ``(<plugin name>, <error message>)``
        #: for debugging purposes and as placeholder in UI.
        self.analysis_exception = None

        if binary is not None:
            self.set_binary(binary)
        else:
            #: Binary representation of this file in bytes.
            self.binary = None

            #: SHA256 hash of this file.
            self.sha256 = None

            #: Size of this file in bytes
            self.size = None

        #: Name of this file. Similar to ``file_path``, this probably is generated for carved objects.
        self.file_name = make_unicode_string(file_name) if file_name is not None else file_name

        #: The path of this file. Has to be a local path if binary is not set.
        #: For carved objects, this will likely only be a (generated) name.
        self.file_path = file_path
        self.create_binary_from_path()

        #: The virtual file path (vfp) is not a path on the analysis machine but the full path inside a firmware object.
        #: For a file inside a filesystem, that was itself packed inside an archive this might look like
        #: `firmware_uid|fs_uid|/etc/hosts` with the pipe sign ( | ) separating extraction levels.
        #: For files such as symlinks, there can be multiple paths inside a single firmware for one unique file.
        self.virtual_file_path = {}