def _get_meta_data(raw): meta = {} for item in META_FIELDS: tmp = re.search(b'%%%? ?(' + make_bytes(item) + b'):([\\w=., -]+)', raw) if tmp: meta[make_unicode_string(tmp.group(1))] = remove_uneccessary_spaces(make_unicode_string(tmp.group(2))) return meta
def _get_type_from_magic_object(path_or_binary, magic_object, function_name, mime=True): try: if isinstance(magic_object, magic.Magic): result = make_unicode_string(getattr(magic_object, function_name)(path_or_binary)) else: result = make_unicode_string(getattr(magic_object, function_name)(path_or_binary, mime=mime)) except FileNotFoundError as e: logging.error('File not found: {}'.format(e)) result = 'error/file-not-found' if mime else 'Error: File not in storage!' except Exception as exception: logging.error('Could not determine file type: {} {}'.format(type(exception), str(exception))) result = 'application/octet-stream' if mime else 'data' return result
def _app_get_binary_search_results(self): firmware_dict, error, yara_rules = None, None, None if request.args.get('request_id'): request_id = request.args.get('request_id') with ConnectTo(InterComFrontEndBinding, self._config) as connection: result, yara_rules = connection.get_binary_search_result( request_id) if isinstance(result, str): error = result elif result is not None: yara_rules = make_unicode_string(yara_rules[0]) joined_results = self._join_results(result) query_uid = self._store_binary_search_query( joined_results, yara_rules) return redirect( url_for('database/browse', query=query_uid, only_firmwares=request.args.get('only_firmware'))) else: error = 'No request ID found' request_id = None return render_template('database/database_binary_search_results.html', result=firmware_dict, error=error, request_id=request_id, yara_rules=yara_rules)
def process_object(self, file_object): if self._is_text_file(file_object) and (file_object.file_name not in FILE_IGNORES): file_path = self._get_file_path(file_object) self.content = make_unicode_string(file_object.binary) if '/inittab' in file_path: file_object.processed_analysis[ self.NAME] = self._get_inittab_config(file_object) if 'systemd/system/' in file_path: file_object.processed_analysis[ self.NAME] = self._get_systemd_config(file_object) if file_path.endswith(('etc/rc', 'etc/rc.local', 'etc/rc.firsttime', 'etc/rc.securelevel')): file_object.processed_analysis[ self.NAME] = self._get_rc_config(file_object) if file_path.endswith('etc/initscript'): file_object.processed_analysis[ self.NAME] = self._get_initscript_config(file_object) if 'etc/init/' in file_path or 'etc/event.d/' in file_path: file_object.processed_analysis[ self.NAME] = self._get_upstart_config(file_object) if 'etc/service/' in file_path or 'etc/sv/' in file_path: file_object.processed_analysis[ self.NAME] = self._get_runit_config(file_object) if 'etc/init.d/' in file_path or 'etc/rc.d/' in file_path: file_object.processed_analysis[ self.NAME] = self._get_sysvinit_config(file_object) else: file_object.processed_analysis[self.NAME] = dict() file_object.processed_analysis[self.NAME]['summary'] = list() return file_object
def generate_and_store_file_objects(self, file_paths: List[Path], extractor_dir: str, parent: FileObject): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=str(item)) current_virtual_path = '{}|{}|{}'.format( parent.get_base_of_virtual_path( parent.get_virtual_file_paths()[ parent.get_root_uid()][0]), parent.get_uid(), get_object_path_excluding_fact_dirs( make_unicode_string(str(item)), str(Path(extractor_dir, 'files')))) current_file.temporary_data[ 'parent_fo_type'] = get_file_type_from_path( parent.file_path)['mime'] if current_file.get_uid( ) in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.get_uid()].virtual_file_path[ parent.get_root_uid()].append(current_virtual_path) else: self.db_interface.set_unpacking_lock(current_file.uid) self.file_storage_system.store_file(current_file) current_file.virtual_file_path = { parent.get_root_uid(): [current_virtual_path] } current_file.parent_firmware_uids.add( parent.get_root_uid()) extracted_files[current_file.get_uid()] = current_file return extracted_files
def get_version_for_component(self, result): versions = set() for matched_string in result['strings']: match = matched_string[2] match = make_unicode_string(match) versions.add(self.get_version(match)) result['meta']['version'] = list(versions) return result
def read_ssl_cert(binary=None, start=None, end=None): try: cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, binary[start:end + 25]) return make_unicode_string( OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_TEXT, cert)) except OpenSSL.crypto.Error: logging.debug( 'Found SSL certificate signature, but looks false positive') return None
def get_matched_strings_dict(matched_string_list): ''' returns a dict {'MATCHED_STRING': [OFFSET_1, OFFSET_2]} ''' string_dict = {} for match in matched_string_list: current_matched_string = make_unicode_string(match[2]) current_matched_string = get_save_key_name(current_matched_string) if current_matched_string not in string_dict: string_dict[current_matched_string] = [] string_dict[current_matched_string].append(match[0]) return string_dict
def read_asn1_key(binary=None, offset=None): if binary[offset] not in TLV_KNOWN_STARTS: return None start, size = _get_start_and_size_of_der_field(binary=binary, offset=offset) try: key = OpenSSL.crypto.load_privatekey(OpenSSL.crypto.FILETYPE_ASN1, binary[offset:start + size]) text_key = make_unicode_string( OpenSSL.crypto.dump_privatekey(OpenSSL.crypto.FILETYPE_TEXT, key)) return text_key except OpenSSL.crypto.Error: logging.debug('Found PKCS#8 key signature, but looks false positive') return None
def read_pkcs_cert(binary=None, offset=None): if binary[offset] not in TLV_KNOWN_STARTS: return None start, size = _get_start_and_size_of_der_field(binary=binary, offset=offset) try: x509_cert = OpenSSL.crypto.load_pkcs12( buffer=binary[offset:start + size]).get_certificate() return make_unicode_string( OpenSSL.crypto.dump_certificate(type=OpenSSL.crypto.FILETYPE_TEXT, cert=x509_cert)) except OpenSSL.crypto.Error: logging.debug( 'Found PKCS#12 certificate, but passphrase is missing or false positive.' ) return None
def _app_show_binary_search_results(self): firmware_dict, error, yara_rules = None, None, None if request.args.get('request_id'): request_id = request.args.get('request_id') with ConnectTo(InterComFrontEndBinding, self._config) as connection: result, yara_rules = connection.get_binary_search_result(request_id) if isinstance(result, str): error = result elif result is not None: yara_rules = make_unicode_string(yara_rules) firmware_dict = self._build_firmware_dict_for_binary_search(result) else: error = 'No request ID found' request_id = None return render_template('database/database_binary_search_results.html', result=firmware_dict, error=error, request_id=request_id, yara_rules=yara_rules)
def process_object(self, file_object): result = {} tmp_dir = TemporaryDirectory(prefix='faf_analysis_binwalk_') dir_path = tmp_dir.name signature_analysis_result = execute_shell_command('(cd {} && xvfb-run -a binwalk -BEJ {})'.format(dir_path, file_object.file_path)) result['signature_analysis'] = make_unicode_string(signature_analysis_result) result['summary'] = list(set(self._extract_summary(result['signature_analysis']))) pic_path = os.path.join(dir_path, '{}.png'.format(os.path.basename(file_object.file_path))) result['entropy_analysis_graph'] = get_binary_from_file(pic_path) tmp_dir.cleanup() file_object.processed_analysis[self.NAME] = result return file_object
def generate_and_store_file_objects(self, file_paths, tmp_dir, parent): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=item) current_virtual_path = '{}|{}|{}'.format( parent.get_base_of_virtual_path(parent.get_virtual_file_paths()[parent.get_root_uid()][0]), parent.get_uid(), get_chroot_path_excluding_extracted_dir(make_unicode_string(item), tmp_dir) ) if current_file.get_uid() in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.get_uid()].virtual_file_path[parent.get_root_uid()].append(current_virtual_path) else: self.file_storage_system.store_file(current_file) current_file.virtual_file_path = {parent.get_root_uid(): [current_virtual_path]} current_file.parent_firmware_uids.add(parent.get_root_uid()) extracted_files[current_file.get_uid()] = current_file return extracted_files
def get_version_for_component(self, result, file_object: FileObject): versions = set() for matched_string in result['strings']: match = matched_string[2] match = make_unicode_string(match) versions.add(self.get_version(match, result['meta'])) if result['meta'].get('format_string'): key_strings = [ s.decode() for _, _, s in result['strings'] if b'%s' in s ] if key_strings: versions.update( extract_data_from_ghidra(file_object.binary, key_strings)) if '' in versions and len( versions ) > 1: # if there are actual version results, remove the "empty" result versions.remove('') result['meta']['version'] = list(versions) return result
def bytes_to_str_filter(string): return make_unicode_string(string)
def set_name(self, name): self.file_name = make_unicode_string(name)
def test_string_from_bytes_decoding_error(self): test_string = b'\xf5 test string' result = make_unicode_string(test_string) self.assertEqual(result, '� test string', 'string not correct')
def test_string_from_other_object(self): test_string = ['test string'] result = make_unicode_string(test_string) self.assertEqual(result, '[\'test string\']', 'string not correct')
def check_type_and_content(self, input_data): self.assertIsInstance(make_unicode_string(input_data), str, "type is correct") self.assertEqual(make_unicode_string(input_data), "test string", "content correct")
def test_string_from_bytes_none_unicode_character(self): test_string = b'\xfc test string' result = make_unicode_string(test_string) self.assertEqual(result, "ü test string", "string not correct")
def _get_identifier(self): self.meta['Extended DSK Identifier'] = make_unicode_string( self.raw[0:self.dsk_file_postion])
def test_make_unicode_string(input_data, expected): result = make_unicode_string(input_data) assert isinstance(result, str) assert result == expected
def create_binary_from_path(self) -> None: if self.file_path is not None: if self.binary is None: self._create_from_file(self.file_path) if self.file_name is None: self.file_name = make_unicode_string(Path(self.file_path).name)
def test_string_from_bytes_unicode_character(self): test_string = b'\xc3\xbc test string' result = make_unicode_string(test_string) self.assertEqual(result, 'ü test string', 'string not correct')
def __init__( self, binary: Optional[bytes] = None, file_name: Optional[str] = None, file_path: Optional[str] = None, scheduled_analysis: List[str] = None ): self._uid = None #: The set of files included in this file. This is usually true for archives. #: Only lists the next layer, not recursively included files on lower extraction layers. self.files_included = set() #: The list of all recursively included files in this file. #: That means files are included that are themselves included in files contained in this file, and so on. #: This value is not set by default as it's expensive to aggregate and takes up a lot of memory. self.list_of_all_included_files = None #: List of parent uids. #: A parent in this context is the direct predecessor in a firmware tree. #: Not necessarily it's root. self.parents = [] #: UID of root (i.e. firmware) object for the given file. #: Useful to associate results of children with firmware. #: This value might not be set at all times (cf. :func:`get_root_uid`). self.root_uid = None #: Extraction depth of this object. If outer firmware file, this is 0. #: Every extraction increments this by one. #: For a file inside a squashfs, that is contained inside a tar archive this would be 1 (tar) + 1 (fs) = 2. self.depth = 0 #: Analysis results for this file. #: #: Structure of results: #: The first level of this dict is a pair of ``'plugin_name': <result_dict>`` pairs. #: The result dict can have any content, but always has at least the fields: #: #: * analysis_date - float representing the time of analysis in unix time. #: * plugin_version - str defining the version of each plugin at time of analysis. #: * summary - list holding a summary of each file's result, that can be aggregated. self.processed_analysis = {} #: List of plugins that are scheduled to be run on this file. self.scheduled_analysis = scheduled_analysis #: List of comments that have been made on this file. #: Comments are dicts with the keys time (float), author (str) and comment (str). self.comments = [] #: Set of parent firmware uids. #: Parent uids are from the root object, this file belongs to, not its direct predecessor. #: Thus, as a file can be part of multiple firmware images, this field is a set. #: This field should be closely related to the keys in the virtual file path field. self.parent_firmware_uids = set() #: This field can be used for arbitrary temporary storage. #: It will not be persisted to the database, so it dies after the analysis cycle. self.temporary_data = {} #: Analysis tags for this file. #: An analysis tag has the structure #: ``{tag_name: {'value': value, 'color': color, 'propagate': propagate,}, 'root_uid': root uid}`` #: while the first layer of this dict is a key for each plugin. #: So in total you have a dict ``{plugin: [tags, of, plugin], ..}``. self.analysis_tags = {} #: If an exception occurred during analysis, this fields stores a tuple #: ``(<plugin name>, <error message>)`` #: for debugging purposes and as placeholder in UI. self.analysis_exception = None if binary is not None: self.set_binary(binary) else: #: Binary representation of this file in bytes. self.binary = None #: SHA256 hash of this file. self.sha256 = None #: Size of this file in bytes self.size = None #: Name of this file. Similar to ``file_path``, this probably is generated for carved objects. self.file_name = make_unicode_string(file_name) if file_name is not None else file_name #: The path of this file. Has to be a local path if binary is not set. #: For carved objects, this will likely only be a (generated) name. self.file_path = file_path self.create_binary_from_path() #: The virtual file path (vfp) is not a path on the analysis machine but the full path inside a firmware object. #: For a file inside a filesystem, that was itself packed inside an archive this might look like #: `firmware_uid|fs_uid|/etc/hosts` with the pipe sign ( | ) separating extraction levels. #: For files such as symlinks, there can be multiple paths inside a single firmware for one unique file. self.virtual_file_path = {}