def setUp(self): self.ds_tmp_dir = TemporaryDirectory(prefix='faf_tests_') config = ConfigParser() config.add_section('data_storage') config.set('data_storage', 'firmware_file_storage_directory', self.ds_tmp_dir.name) self.fs_organzier = FS_Organizer(config)
class InterComBackEndDeleteFile(InterComListener): CONNECTION_TYPE = "file_delete_task" def additional_setup(self, config=None): self.fs_organizer = FS_Organizer(config=config) def post_processing(self, task, task_id): if self._entry_was_removed_from_db(task['_id']): logging.info('remove file: {}'.format(task['_id'])) self.fs_organizer.delete_file(task['_id']) return None def _entry_was_removed_from_db(self, uid): with ConnectTo(MongoInterfaceCommon, self.config) as db: if db.existence_quick_check(uid): logging.debug( 'file not removed, because database entry exists: {}'. format(uid)) return False elif db.check_unpacking_lock(uid): logging.debug( 'file not removed, because it is processed by unpacker: {}' .format(uid)) return False return True
class InterComBackEndAnalysisTask(InterComListener): CONNECTION_TYPE = 'analysis_task' def additional_setup(self, config=None): self.fs_organizer = FS_Organizer(config=config) def post_processing(self, task, task_id): self.fs_organizer.store_file(task) return task
class InterComBackEndDeleteFile(InterComListener): CONNECTION_TYPE = "file_delete_task" def additional_setup(self, config=None): self.fs_organizer = FS_Organizer(config=config) def post_processing(self, task, task_id): self.fs_organizer.delete_file(task) return None
def _delete_firmware(self): fs_backend = FS_Organizer(config=self.config) local_firmware_path = Path(fs_backend.generate_path_from_uid(self.test_fw_a.uid)) self.assertTrue(local_firmware_path.exists(), 'file not found before delete') rv = self.test_client.get('/admin/delete/{}'.format(self.test_fw_a.uid)) self.assertIn(b'Deleted 4 file(s) from database', rv.data, 'deletion success page not shown') rv = self.test_client.get('/analysis/{}'.format(self.test_fw_a.uid)) self.assertIn(b'File not found in database', rv.data, 'file is still available after delete') time.sleep(5) self.assertFalse(local_firmware_path.exists(), 'file not deleted')
def test_re_analyze_task(self): self.backend = InterComBackEndReAnalyzeTask(config=self.config) fs_organizer = FS_Organizer(config=self.config) test_fw = create_test_firmware() fs_organizer.store_file(test_fw) original_file_path = test_fw.file_path original_binary = test_fw.binary test_fw.file_path = None test_fw.binary = None self.frontend.add_re_analyze_task(test_fw) task = self.backend.get_next_task() self.assertEqual(task.get_uid(), test_fw.get_uid(), 'uid not correct') self.assertIsNotNone(task.file_path, 'file path not set') self.assertEqual(task.file_path, original_file_path) self.assertIsNotNone(task.binary, 'binary not set') self.assertEqual(task.binary, original_binary, 'binary content not correct')
class Test_FS_Organizer(unittest.TestCase): def setUp(self): self.ds_tmp_dir = TemporaryDirectory(prefix='faf_tests_') config = ConfigParser() config.add_section('data_storage') config.set('data_storage', 'firmware_file_storage_directory', self.ds_tmp_dir.name) self.fs_organzier = FS_Organizer(config) def tearDown(self): self.ds_tmp_dir.cleanup() gc.collect() def check_file_presence_and_content(self, file_path, file_binary): self.assertTrue(os.path.exists(file_path), 'file exists') self.assertEqual(get_binary_from_file(file_path), file_binary, 'correct content') def test_generate_path(self): test_binary = b'abcde' file_object = FileObject(test_binary) file_path = self.fs_organzier.generate_path(file_object) # file path should be 'DATA_DIR/UID_PEFIX/UID' self.assertEqual( file_path, '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5' .format(self.ds_tmp_dir.name), 'generate file path') def test_store_and_delete_file(self): test_binary = b'abcde' file_object = FileObject(test_binary) self.fs_organzier.store_file(file_object) self.check_file_presence_and_content( '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5' .format(self.ds_tmp_dir.name), b'abcde') self.assertEqual( file_object.file_path, '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5' .format(self.ds_tmp_dir.name), 'wrong file path set in file object') self.fs_organzier.delete_file(file_object.get_uid()) self.assertFalse(os.path.exists(file_object.file_path), 'file not deleted')
class InterComBackEndReAnalyzeTask(InterComListener): CONNECTION_TYPE = 're_analyze_task' def additional_setup(self, config=None): self.fs_organizer = FS_Organizer(config=config) def post_processing(self, task, task_id): file_path = self.fs_organizer.generate_path(task) task.set_file_path(file_path) return task
def __init__(self, config=None, worker_id=None, db_interface=None): super().__init__(config=config, worker_id=worker_id) self.file_storage_system = FS_Organizer(config=self.config) self.db_interface = db_interface
class Unpacker(UnpackBase): def __init__(self, config=None, worker_id=None, db_interface=None): super().__init__(config=config, worker_id=worker_id) self.file_storage_system = FS_Organizer(config=self.config) self.db_interface = db_interface def unpack(self, current_fo: FileObject): ''' Recursively extract all objects included in current_fo and add them to current_fo.files_included ''' logging.debug('[worker {}] Extracting {}: Depth: {}'.format( self.worker_id, current_fo.get_uid(), current_fo.depth)) if current_fo.depth >= self.config.getint('unpack', 'max_depth'): logging.warning( '{} is not extracted since depth limit ({}) is reached'.format( current_fo.get_uid(), self.config.get('unpack', 'max_depth'))) return [] tmp_dir = TemporaryDirectory(prefix='fact_unpack_') file_path = self._generate_local_file_path(current_fo) extracted_files = self.extract_files_from_file(file_path, tmp_dir.name) extracted_file_objects = self.generate_and_store_file_objects( extracted_files, tmp_dir.name, current_fo) extracted_file_objects = self.remove_duplicates( extracted_file_objects, current_fo) self.add_included_files_to_object(extracted_file_objects, current_fo) # set meta data current_fo.processed_analysis['unpacker'] = json.loads( Path(tmp_dir.name, 'reports', 'meta.json').read_text()) self.cleanup(tmp_dir) return extracted_file_objects def cleanup(self, tmp_dir): try: tmp_dir.cleanup() except OSError as error: logging.error( '[worker {}] Could not CleanUp tmp_dir: {} - {}'.format( self.worker_id, type(error), str(error))) @staticmethod def add_included_files_to_object(included_file_objects, root_file_object): for item in included_file_objects: root_file_object.add_included_file(item) def generate_and_store_file_objects(self, file_paths: List[Path], extractor_dir: str, parent: FileObject): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=str(item)) current_virtual_path = '{}|{}|{}'.format( parent.get_base_of_virtual_path( parent.get_virtual_file_paths()[ parent.get_root_uid()][0]), parent.get_uid(), get_object_path_excluding_fact_dirs( make_unicode_string(str(item)), str(Path(extractor_dir, 'files')))) current_file.temporary_data[ 'parent_fo_type'] = get_file_type_from_path( parent.file_path)['mime'] if current_file.get_uid( ) in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.get_uid()].virtual_file_path[ parent.get_root_uid()].append(current_virtual_path) else: self.db_interface.set_unpacking_lock(current_file.uid) self.file_storage_system.store_file(current_file) current_file.virtual_file_path = { parent.get_root_uid(): [current_virtual_path] } current_file.parent_firmware_uids.add( parent.get_root_uid()) extracted_files[current_file.get_uid()] = current_file return extracted_files @staticmethod def remove_duplicates(extracted_fo_dict, parent_fo): if parent_fo.get_uid() in extracted_fo_dict: del extracted_fo_dict[parent_fo.get_uid()] return make_list_from_dict(extracted_fo_dict) def _generate_local_file_path(self, file_object: FileObject): if not Path(file_object.file_path).exists(): local_path = self.file_storage_system.generate_path( file_object.get_uid()) return local_path return file_object.file_path
def get_file_paths_of_files_included_in_fo(self, fo_uid: str) -> List[str]: fs_organizer = FS_Organizer(self.config) return [ fs_organizer.generate_path_from_uid(uid) for uid in self.get_uids_of_all_included_files(fo_uid) ]
def additional_setup(self, config=None): self.fs_organizer = FS_Organizer(config=config)
class Unpacker(UnpackBase): GENERIC_FS_FALLBACK_CANDIDATES = ['SquashFS'] GENERIC_CARVER_FALLBACK_BLACKLIST = ['generic_carver', 'NOP', 'PaTool', 'SFX'] VALID_COMPRESSED_FILE_TYPES = ['application/x-shockwave-flash', 'audio/mpeg', 'audio/ogg', 'image/png', 'image/jpeg', 'image/gif', 'video/mp4', 'video/ogg'] HEADER_OVERHEAD = 256 def __init__(self, config=None, worker_id=None): super().__init__(config=config, worker_id=worker_id) self.file_storage_system = FS_Organizer(config=self.config) def unpack(self, current_fo): ''' Recursively extract all objects included in current_fo and add them to current_fo.files_included ''' logging.debug('[worker {}] Extracting {}: Depth: {}'.format(self.worker_id, current_fo.get_uid(), current_fo.depth)) tmp_dir = TemporaryDirectory(prefix='faf_unpack_') extracted_files, meta_data = self.extract_files_from_file(current_fo.file_path, tmp_dir.name, current_fo.depth) extracted_files, meta_data = self._do_fallback_if_necessary(extracted_files, meta_data, tmp_dir, current_fo) extracted_file_objects = self.generate_and_store_file_objects(extracted_files, tmp_dir.name, current_fo) extracted_file_objects = self.remove_duplicates(extracted_file_objects, current_fo) self.add_included_files_to_object(extracted_file_objects, current_fo) self.add_additional_unpacking_meta(current_fo, meta_data) self.get_unpack_status(current_fo, extracted_file_objects) self.cleanup(tmp_dir) return extracted_file_objects def _do_fallback_if_necessary(self, extracted_files, meta_data, tmp_dir, current_fo): if len(extracted_files) < 1 and meta_data['plugin_used'] in self.GENERIC_FS_FALLBACK_CANDIDATES: logging.warning('[worker {}] {} could not extract any files -> generic fs fallback'.format(self.worker_id, meta_data['plugin_used'])) extracted_files, meta_data = self.unpacking_fallback(current_fo.file_path, tmp_dir.name, meta_data, 'generic/fs') if len(extracted_files) < 1 and meta_data['plugin_used'] not in self.GENERIC_CARVER_FALLBACK_BLACKLIST: logging.warning('[worker {}] {} could not extract any files -> generic carver fallback'.format(self.worker_id, meta_data['plugin_used'])) extracted_files, meta_data = self.unpacking_fallback(current_fo.file_path, tmp_dir.name, meta_data, 'generic/carver') return extracted_files, meta_data def cleanup(self, tmp_dir): try: tmp_dir.cleanup() except Exception as e: logging.error('[worker {}] Could not CleanUp tmp_dir: {} - {}'.format(self.worker_id, sys.exc_info()[0].__name__, e)) def get_unpack_status(self, fo, extracted_fos): fo.processed_analysis['unpacker']['summary'] = [] fo_entropy = avg_entropy(fo.binary) fo.processed_analysis['unpacker']['entropy'] = fo_entropy if len(fo.files_included) < 1: if get_file_type_from_path(fo.file_path)['mime'] in self.VALID_COMPRESSED_FILE_TYPES: fo.processed_analysis['unpacker']['summary'] = ['unpacked'] else: if is_compressed(fo.binary, compress_entropy_threshold=self.config['ExpertSettings'].getfloat('unpack_threshold', 0.7), classifier=avg_entropy): fo.processed_analysis['unpacker']['summary'] = ['packed'] else: fo.processed_analysis['unpacker']['summary'] = ['unpacked'] else: self._detect_unpack_loss(fo, extracted_fos) def _detect_unpack_loss(self, fo, extracted_fos): decoding_overhead = 1 - fo.processed_analysis['unpacker'].get('encoding_overhead', 0) cleaned_size = get_binary_size_without_padding(fo.binary) * decoding_overhead - self.HEADER_OVERHEAD extracted_fos_size_sum = self._get_extracted_fos_size_sum(extracted_fos) fo.processed_analysis['unpacker']['size packed -> unpacked'] = '{} -> {}'.format(human_readable_file_size(cleaned_size), human_readable_file_size(extracted_fos_size_sum)) if cleaned_size > extracted_fos_size_sum: fo.processed_analysis['unpacker']['summary'] = ['data lost'] else: fo.processed_analysis['unpacker']['summary'] = ['no data lost'] @staticmethod def _get_extracted_fos_size_sum(extracted_fos): result = 0 for item in extracted_fos: result += len(item.binary) return result @staticmethod def add_additional_unpacking_meta(current_file, meta_data): meta_data['number_of_unpacked_files'] = len(current_file.files_included) current_file.processed_analysis['unpacker'] = meta_data def generate_and_store_file_objects(self, file_paths, tmp_dir, parent): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=item) current_virtual_path = '{}|{}|{}'.format( parent.get_base_of_virtual_path(parent.get_virtual_file_paths()[parent.get_root_uid()][0]), parent.get_uid(), get_chroot_path_excluding_extracted_dir(make_unicode_string(item), tmp_dir) ) current_file.temporary_data['parent_fo_type'] = get_file_type_from_path(parent.file_path)['mime'] if current_file.get_uid() in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.get_uid()].virtual_file_path[parent.get_root_uid()].append(current_virtual_path) else: self.file_storage_system.store_file(current_file) current_file.virtual_file_path = {parent.get_root_uid(): [current_virtual_path]} current_file.parent_firmware_uids.add(parent.get_root_uid()) extracted_files[current_file.get_uid()] = current_file return extracted_files @staticmethod def remove_duplicates(extracted_fo_dict, parent_fo): if parent_fo.get_uid() in extracted_fo_dict: del extracted_fo_dict[parent_fo.get_uid()] return make_list_from_dict(extracted_fo_dict) @staticmethod def add_included_files_to_object(included_file_objects, root_file_object): for item in included_file_objects: root_file_object.add_included_file(item)