Exemple #1
0
 def setUp(self):
     self.ds_tmp_dir = TemporaryDirectory(prefix='faf_tests_')
     config = ConfigParser()
     config.add_section('data_storage')
     config.set('data_storage', 'firmware_file_storage_directory',
                self.ds_tmp_dir.name)
     self.fs_organzier = FS_Organizer(config)
class InterComBackEndDeleteFile(InterComListener):

    CONNECTION_TYPE = "file_delete_task"

    def additional_setup(self, config=None):
        self.fs_organizer = FS_Organizer(config=config)

    def post_processing(self, task, task_id):
        if self._entry_was_removed_from_db(task['_id']):
            logging.info('remove file: {}'.format(task['_id']))
            self.fs_organizer.delete_file(task['_id'])
        return None

    def _entry_was_removed_from_db(self, uid):
        with ConnectTo(MongoInterfaceCommon, self.config) as db:
            if db.existence_quick_check(uid):
                logging.debug(
                    'file not removed, because database entry exists: {}'.
                    format(uid))
                return False
            elif db.check_unpacking_lock(uid):
                logging.debug(
                    'file not removed, because it is processed by unpacker: {}'
                    .format(uid))
                return False
        return True
Exemple #3
0
class InterComBackEndAnalysisTask(InterComListener):

    CONNECTION_TYPE = 'analysis_task'

    def additional_setup(self, config=None):
        self.fs_organizer = FS_Organizer(config=config)

    def post_processing(self, task, task_id):
        self.fs_organizer.store_file(task)
        return task
Exemple #4
0
class InterComBackEndDeleteFile(InterComListener):

    CONNECTION_TYPE = "file_delete_task"

    def additional_setup(self, config=None):
        self.fs_organizer = FS_Organizer(config=config)

    def post_processing(self, task, task_id):
        self.fs_organizer.delete_file(task)
        return None
Exemple #5
0
 def _delete_firmware(self):
     fs_backend = FS_Organizer(config=self.config)
     local_firmware_path = Path(fs_backend.generate_path_from_uid(self.test_fw_a.uid))
     self.assertTrue(local_firmware_path.exists(), 'file not found before delete')
     rv = self.test_client.get('/admin/delete/{}'.format(self.test_fw_a.uid))
     self.assertIn(b'Deleted 4 file(s) from database', rv.data, 'deletion success page not shown')
     rv = self.test_client.get('/analysis/{}'.format(self.test_fw_a.uid))
     self.assertIn(b'File not found in database', rv.data, 'file is still available after delete')
     time.sleep(5)
     self.assertFalse(local_firmware_path.exists(), 'file not deleted')
Exemple #6
0
 def test_re_analyze_task(self):
     self.backend = InterComBackEndReAnalyzeTask(config=self.config)
     fs_organizer = FS_Organizer(config=self.config)
     test_fw = create_test_firmware()
     fs_organizer.store_file(test_fw)
     original_file_path = test_fw.file_path
     original_binary = test_fw.binary
     test_fw.file_path = None
     test_fw.binary = None
     self.frontend.add_re_analyze_task(test_fw)
     task = self.backend.get_next_task()
     self.assertEqual(task.get_uid(), test_fw.get_uid(), 'uid not correct')
     self.assertIsNotNone(task.file_path, 'file path not set')
     self.assertEqual(task.file_path, original_file_path)
     self.assertIsNotNone(task.binary, 'binary not set')
     self.assertEqual(task.binary, original_binary, 'binary content not correct')
Exemple #7
0
class Test_FS_Organizer(unittest.TestCase):
    def setUp(self):
        self.ds_tmp_dir = TemporaryDirectory(prefix='faf_tests_')
        config = ConfigParser()
        config.add_section('data_storage')
        config.set('data_storage', 'firmware_file_storage_directory',
                   self.ds_tmp_dir.name)
        self.fs_organzier = FS_Organizer(config)

    def tearDown(self):
        self.ds_tmp_dir.cleanup()
        gc.collect()

    def check_file_presence_and_content(self, file_path, file_binary):
        self.assertTrue(os.path.exists(file_path), 'file exists')
        self.assertEqual(get_binary_from_file(file_path), file_binary,
                         'correct content')

    def test_generate_path(self):
        test_binary = b'abcde'
        file_object = FileObject(test_binary)
        file_path = self.fs_organzier.generate_path(file_object)
        # file path should be 'DATA_DIR/UID_PEFIX/UID'
        self.assertEqual(
            file_path,
            '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'
            .format(self.ds_tmp_dir.name), 'generate file path')

    def test_store_and_delete_file(self):
        test_binary = b'abcde'
        file_object = FileObject(test_binary)

        self.fs_organzier.store_file(file_object)
        self.check_file_presence_and_content(
            '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'
            .format(self.ds_tmp_dir.name), b'abcde')
        self.assertEqual(
            file_object.file_path,
            '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'
            .format(self.ds_tmp_dir.name),
            'wrong file path set in file object')

        self.fs_organzier.delete_file(file_object.get_uid())
        self.assertFalse(os.path.exists(file_object.file_path),
                         'file not deleted')
Exemple #8
0
class InterComBackEndReAnalyzeTask(InterComListener):

    CONNECTION_TYPE = 're_analyze_task'

    def additional_setup(self, config=None):
        self.fs_organizer = FS_Organizer(config=config)

    def post_processing(self, task, task_id):
        file_path = self.fs_organizer.generate_path(task)
        task.set_file_path(file_path)
        return task
Exemple #9
0
 def __init__(self, config=None, worker_id=None, db_interface=None):
     super().__init__(config=config, worker_id=worker_id)
     self.file_storage_system = FS_Organizer(config=self.config)
     self.db_interface = db_interface
Exemple #10
0
class Unpacker(UnpackBase):
    def __init__(self, config=None, worker_id=None, db_interface=None):
        super().__init__(config=config, worker_id=worker_id)
        self.file_storage_system = FS_Organizer(config=self.config)
        self.db_interface = db_interface

    def unpack(self, current_fo: FileObject):
        '''
        Recursively extract all objects included in current_fo and add them to current_fo.files_included
        '''

        logging.debug('[worker {}] Extracting {}: Depth: {}'.format(
            self.worker_id, current_fo.get_uid(), current_fo.depth))

        if current_fo.depth >= self.config.getint('unpack', 'max_depth'):
            logging.warning(
                '{} is not extracted since depth limit ({}) is reached'.format(
                    current_fo.get_uid(),
                    self.config.get('unpack', 'max_depth')))
            return []

        tmp_dir = TemporaryDirectory(prefix='fact_unpack_')

        file_path = self._generate_local_file_path(current_fo)

        extracted_files = self.extract_files_from_file(file_path, tmp_dir.name)

        extracted_file_objects = self.generate_and_store_file_objects(
            extracted_files, tmp_dir.name, current_fo)
        extracted_file_objects = self.remove_duplicates(
            extracted_file_objects, current_fo)
        self.add_included_files_to_object(extracted_file_objects, current_fo)

        # set meta data
        current_fo.processed_analysis['unpacker'] = json.loads(
            Path(tmp_dir.name, 'reports', 'meta.json').read_text())

        self.cleanup(tmp_dir)
        return extracted_file_objects

    def cleanup(self, tmp_dir):
        try:
            tmp_dir.cleanup()
        except OSError as error:
            logging.error(
                '[worker {}] Could not CleanUp tmp_dir: {} - {}'.format(
                    self.worker_id, type(error), str(error)))

    @staticmethod
    def add_included_files_to_object(included_file_objects, root_file_object):
        for item in included_file_objects:
            root_file_object.add_included_file(item)

    def generate_and_store_file_objects(self, file_paths: List[Path],
                                        extractor_dir: str,
                                        parent: FileObject):
        extracted_files = {}
        for item in file_paths:
            if not file_is_empty(item):
                current_file = FileObject(file_path=str(item))
                current_virtual_path = '{}|{}|{}'.format(
                    parent.get_base_of_virtual_path(
                        parent.get_virtual_file_paths()[
                            parent.get_root_uid()][0]), parent.get_uid(),
                    get_object_path_excluding_fact_dirs(
                        make_unicode_string(str(item)),
                        str(Path(extractor_dir, 'files'))))
                current_file.temporary_data[
                    'parent_fo_type'] = get_file_type_from_path(
                        parent.file_path)['mime']
                if current_file.get_uid(
                ) in extracted_files:  # the same file is extracted multiple times from one archive
                    extracted_files[current_file.get_uid()].virtual_file_path[
                        parent.get_root_uid()].append(current_virtual_path)
                else:
                    self.db_interface.set_unpacking_lock(current_file.uid)
                    self.file_storage_system.store_file(current_file)
                    current_file.virtual_file_path = {
                        parent.get_root_uid(): [current_virtual_path]
                    }
                    current_file.parent_firmware_uids.add(
                        parent.get_root_uid())
                    extracted_files[current_file.get_uid()] = current_file
        return extracted_files

    @staticmethod
    def remove_duplicates(extracted_fo_dict, parent_fo):
        if parent_fo.get_uid() in extracted_fo_dict:
            del extracted_fo_dict[parent_fo.get_uid()]
        return make_list_from_dict(extracted_fo_dict)

    def _generate_local_file_path(self, file_object: FileObject):
        if not Path(file_object.file_path).exists():
            local_path = self.file_storage_system.generate_path(
                file_object.get_uid())
            return local_path
        return file_object.file_path
Exemple #11
0
 def get_file_paths_of_files_included_in_fo(self, fo_uid: str) -> List[str]:
     fs_organizer = FS_Organizer(self.config)
     return [
         fs_organizer.generate_path_from_uid(uid)
         for uid in self.get_uids_of_all_included_files(fo_uid)
     ]
Exemple #12
0
 def additional_setup(self, config=None):
     self.fs_organizer = FS_Organizer(config=config)
Exemple #13
0
class Unpacker(UnpackBase):

    GENERIC_FS_FALLBACK_CANDIDATES = ['SquashFS']
    GENERIC_CARVER_FALLBACK_BLACKLIST = ['generic_carver', 'NOP', 'PaTool', 'SFX']
    VALID_COMPRESSED_FILE_TYPES = ['application/x-shockwave-flash', 'audio/mpeg', 'audio/ogg', 'image/png', 'image/jpeg', 'image/gif', 'video/mp4', 'video/ogg']
    HEADER_OVERHEAD = 256

    def __init__(self, config=None, worker_id=None):
        super().__init__(config=config, worker_id=worker_id)
        self.file_storage_system = FS_Organizer(config=self.config)

    def unpack(self, current_fo):
        '''
        Recursively extract all objects included in current_fo and add them to current_fo.files_included
        '''

        logging.debug('[worker {}] Extracting {}: Depth: {}'.format(self.worker_id, current_fo.get_uid(), current_fo.depth))
        tmp_dir = TemporaryDirectory(prefix='faf_unpack_')
        extracted_files, meta_data = self.extract_files_from_file(current_fo.file_path, tmp_dir.name, current_fo.depth)
        extracted_files, meta_data = self._do_fallback_if_necessary(extracted_files, meta_data, tmp_dir, current_fo)
        extracted_file_objects = self.generate_and_store_file_objects(extracted_files, tmp_dir.name, current_fo)
        extracted_file_objects = self.remove_duplicates(extracted_file_objects, current_fo)
        self.add_included_files_to_object(extracted_file_objects, current_fo)
        self.add_additional_unpacking_meta(current_fo, meta_data)
        self.get_unpack_status(current_fo, extracted_file_objects)
        self.cleanup(tmp_dir)
        return extracted_file_objects

    def _do_fallback_if_necessary(self, extracted_files, meta_data, tmp_dir, current_fo):
        if len(extracted_files) < 1 and meta_data['plugin_used'] in self.GENERIC_FS_FALLBACK_CANDIDATES:
                logging.warning('[worker {}] {} could not extract any files -> generic fs fallback'.format(self.worker_id, meta_data['plugin_used']))
                extracted_files, meta_data = self.unpacking_fallback(current_fo.file_path, tmp_dir.name, meta_data, 'generic/fs')
        if len(extracted_files) < 1 and meta_data['plugin_used'] not in self.GENERIC_CARVER_FALLBACK_BLACKLIST:
                logging.warning('[worker {}] {} could not extract any files -> generic carver fallback'.format(self.worker_id, meta_data['plugin_used']))
                extracted_files, meta_data = self.unpacking_fallback(current_fo.file_path, tmp_dir.name, meta_data, 'generic/carver')
        return extracted_files, meta_data

    def cleanup(self, tmp_dir):
        try:
            tmp_dir.cleanup()
        except Exception as e:
            logging.error('[worker {}] Could not CleanUp tmp_dir: {} - {}'.format(self.worker_id, sys.exc_info()[0].__name__, e))

    def get_unpack_status(self, fo, extracted_fos):
        fo.processed_analysis['unpacker']['summary'] = []
        fo_entropy = avg_entropy(fo.binary)
        fo.processed_analysis['unpacker']['entropy'] = fo_entropy

        if len(fo.files_included) < 1:
            if get_file_type_from_path(fo.file_path)['mime'] in self.VALID_COMPRESSED_FILE_TYPES:
                fo.processed_analysis['unpacker']['summary'] = ['unpacked']
            else:
                if is_compressed(fo.binary, compress_entropy_threshold=self.config['ExpertSettings'].getfloat('unpack_threshold', 0.7), classifier=avg_entropy):
                    fo.processed_analysis['unpacker']['summary'] = ['packed']
                else:
                    fo.processed_analysis['unpacker']['summary'] = ['unpacked']
        else:
            self._detect_unpack_loss(fo, extracted_fos)

    def _detect_unpack_loss(self, fo, extracted_fos):
        decoding_overhead = 1 - fo.processed_analysis['unpacker'].get('encoding_overhead', 0)
        cleaned_size = get_binary_size_without_padding(fo.binary) * decoding_overhead - self.HEADER_OVERHEAD
        extracted_fos_size_sum = self._get_extracted_fos_size_sum(extracted_fos)
        fo.processed_analysis['unpacker']['size packed -> unpacked'] = '{} -> {}'.format(human_readable_file_size(cleaned_size), human_readable_file_size(extracted_fos_size_sum))
        if cleaned_size > extracted_fos_size_sum:
            fo.processed_analysis['unpacker']['summary'] = ['data lost']
        else:
            fo.processed_analysis['unpacker']['summary'] = ['no data lost']

    @staticmethod
    def _get_extracted_fos_size_sum(extracted_fos):
        result = 0
        for item in extracted_fos:
            result += len(item.binary)
        return result

    @staticmethod
    def add_additional_unpacking_meta(current_file, meta_data):
        meta_data['number_of_unpacked_files'] = len(current_file.files_included)
        current_file.processed_analysis['unpacker'] = meta_data

    def generate_and_store_file_objects(self, file_paths, tmp_dir, parent):
        extracted_files = {}
        for item in file_paths:
            if not file_is_empty(item):
                current_file = FileObject(file_path=item)
                current_virtual_path = '{}|{}|{}'.format(
                    parent.get_base_of_virtual_path(parent.get_virtual_file_paths()[parent.get_root_uid()][0]),
                    parent.get_uid(), get_chroot_path_excluding_extracted_dir(make_unicode_string(item), tmp_dir)
                )
                current_file.temporary_data['parent_fo_type'] = get_file_type_from_path(parent.file_path)['mime']
                if current_file.get_uid() in extracted_files:  # the same file is extracted multiple times from one archive
                    extracted_files[current_file.get_uid()].virtual_file_path[parent.get_root_uid()].append(current_virtual_path)
                else:
                    self.file_storage_system.store_file(current_file)
                    current_file.virtual_file_path = {parent.get_root_uid(): [current_virtual_path]}
                    current_file.parent_firmware_uids.add(parent.get_root_uid())
                    extracted_files[current_file.get_uid()] = current_file
        return extracted_files

    @staticmethod
    def remove_duplicates(extracted_fo_dict, parent_fo):
        if parent_fo.get_uid() in extracted_fo_dict:
            del extracted_fo_dict[parent_fo.get_uid()]
        return make_list_from_dict(extracted_fo_dict)

    @staticmethod
    def add_included_files_to_object(included_file_objects, root_file_object):
        for item in included_file_objects:
            root_file_object.add_included_file(item)