Esempio n. 1
0
class UnpackingScheduler(object):
    '''
    This scheduler performs unpacking on firmware objects
    '''
    def __init__(self,
                 config=None,
                 post_unpack=None,
                 analysis_workload=None,
                 db_interface=None):
        self.config = config
        self.stop_condition = Value('i', 0)
        self.throttle_condition = Value('i', 0)
        self.get_analysis_workload = analysis_workload
        self.in_queue = Queue()
        self.work_load_counter = 25
        self.workers = []
        self.post_unpack = post_unpack
        self.db_interface = MongoInterfaceCommon(
            config) if not db_interface else db_interface
        self.drop_cached_locks()
        self.start_unpack_workers()
        self.start_work_load_monitor()
        logging.info('Unpacker Module online')

    def drop_cached_locks(self):
        self.db_interface.drop_unpacking_locks()

    def add_task(self, fo):
        '''
        schedule a firmware_object for unpacking
        '''
        self.in_queue.put(fo)

    def get_scheduled_workload(self):
        return {'unpacking_queue': self.in_queue.qsize()}

    def shutdown(self):
        '''
        shutdown the scheduler
        '''
        logging.debug('Shutting down...')
        self.stop_condition.value = 1
        for worker in self.workers:
            worker.join()
        self.in_queue.close()
        logging.info('Unpacker Module offline')

# ---- internal functions ----

    def start_unpack_workers(self):
        logging.debug('Starting {} working threads'.format(
            int(self.config['unpack']['threads'])))
        for process_index in range(int(self.config['unpack']['threads'])):
            self._start_single_worker(process_index)

    def unpack_worker(self, worker_id):
        unpacker = Unpacker(self.config,
                            worker_id=worker_id,
                            db_interface=self.db_interface)
        while self.stop_condition.value == 0:
            with suppress(Empty):
                fo = self.in_queue.get(timeout=float(
                    self.config['ExpertSettings']['block_delay']))
                extracted_objects = unpacker.unpack(fo)
                logging.debug(
                    '[worker {}] unpacking of {} complete: {} files extracted'.
                    format(worker_id, fo.get_uid(), len(extracted_objects)))
                self.post_unpack(fo)
                self.schedule_extracted_files(extracted_objects)

    def schedule_extracted_files(self, object_list):
        for item in object_list:
            self._add_object_to_unpack_queue(item)

    def _add_object_to_unpack_queue(self, item):
        while self.stop_condition.value == 0:
            if self.throttle_condition.value == 0:
                self.in_queue.put(item)
                break
            else:
                logging.debug(
                    'throttle down unpacking to reduce memory consumption...')
                sleep(5)

    def start_work_load_monitor(self):
        logging.debug('Start work load monitor...')
        process = ExceptionSafeProcess(target=self._work_load_monitor)
        process.start()
        self.workers.append(process)

    def _work_load_monitor(self):
        while self.stop_condition.value == 0:
            workload = self._get_combined_analysis_workload()
            unpack_queue_size = self.in_queue.qsize()

            if self.work_load_counter >= 25:
                self.work_load_counter = 0
                log_function = logging.info
            else:
                self.work_load_counter += 1
                log_function = logging.debug
            log_function('{}Queue Length (Analysis/Unpack): {} / {}{}'.format(
                bcolors.WARNING, workload, unpack_queue_size, bcolors.ENDC))

            if workload < int(
                    self.config['ExpertSettings']['unpack_throttle_limit']):
                self.throttle_condition.value = 0
            else:
                self.throttle_condition.value = 1
            sleep(2)

    def _get_combined_analysis_workload(self):
        if self.get_analysis_workload is not None:
            current_analysis_workload = self.get_analysis_workload()
            return sum(current_analysis_workload.values())
        return 0

    def check_exceptions(self):
        return_value = False
        for worker in self.workers:
            if worker.exception:
                logging.error("{}Worker Exception Found!!{}".format(
                    bcolors.FAIL, bcolors.ENDC))
                logging.error(worker.exception[1])
                terminate_process_and_childs(worker)
                self.workers.remove(worker)

                if self.config.getboolean('ExpertSettings',
                                          'throw_exceptions'):
                    return_value = True
                else:
                    process_index = worker.name.split('-')[2]
                    self._start_single_worker(process_index)
        return return_value

    def _start_single_worker(self, process_index):
        process = ExceptionSafeProcess(
            target=self.unpack_worker,
            name='Unpacking-Worker-{}'.format(process_index),
            args=(process_index, ))
        process.start()
        self.workers.append(process)
Esempio n. 2
0
class TestMongoInterface(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls._config = get_config_for_testing(TMP_DIR)
        cls._config.set('data_storage', 'report_threshold', '32')
        cls._config.set('data_storage', 'sanitize_database', 'tmp_sanitize')
        cls.mongo_server = MongoMgr(config=cls._config)

    def setUp(self):
        self.db_interface = MongoInterfaceCommon(config=self._config)
        self.db_interface_backend = BackEndDbInterface(config=self._config)

        self.test_firmware = create_test_firmware()

        self.test_yara_match = {
            'rule': 'OpenSSH',
            'tags': [],
            'namespace': 'default',
            'strings': [(0, '$a', b'OpenSSH')],
            'meta': {
                'description': 'SSH library',
                'website': 'http://www.openssh.com',
                'open_source': True,
                'software_name': 'OpenSSH'
            },
            'matches': True
        }

        self.test_fo = create_test_file_object()

    def tearDown(self):
        self.db_interface_backend.client.drop_database(
            self._config.get('data_storage', 'main_database'))
        self.db_interface_backend.shutdown()
        self.db_interface.client.drop_database(
            self._config.get('data_storage', 'sanitize_database'))
        self.db_interface.shutdown()
        gc.collect()

    @classmethod
    def tearDownClass(cls):
        cls.mongo_server.shutdown()
        TMP_DIR.cleanup()

    def _get_all_firmware_uids(self):
        uid_list = []
        tmp = self.db_interface.firmwares.find()
        for item in tmp:
            uid_list.append(item['_id'])
        return uid_list

    def test_existence_quick_check(self):
        self.assertFalse(
            self.db_interface.existence_quick_check('none_existing'),
            'none existing firmware found')
        self.db_interface_backend.add_firmware(self.test_firmware)
        self.assertTrue(
            self.db_interface.existence_quick_check(self.test_firmware.uid),
            'existing firmware not found')
        self.db_interface_backend.add_file_object(self.test_fo)
        self.assertTrue(
            self.db_interface.existence_quick_check(self.test_fo.uid),
            'existing file not found')

    def test_get_firmware(self):
        self.db_interface_backend.add_firmware(self.test_firmware)
        fobject = self.db_interface.get_firmware(self.test_firmware.uid)
        self.assertEqual(fobject.vendor, 'test_vendor')
        self.assertEqual(fobject.device_name, 'test_router')
        self.assertEqual(fobject.part, '')

    def test_get_object(self):
        fo = self.db_interface.get_object(self.test_firmware.uid)
        self.assertIsNone(
            fo, 'found something but there is nothing in the database')
        self.db_interface_backend.add_firmware(self.test_firmware)
        fo = self.db_interface.get_object(self.test_firmware.uid)
        self.assertIsInstance(fo, Firmware, 'firmware has wrong type')
        self.assertEqual(fo.device_name, 'test_router',
                         'Device name in Firmware not correct')
        test_file = FileObject(file_path=path.join(get_test_data_dir(),
                                                   'get_files_test/testfile2'))
        self.db_interface_backend.add_file_object(test_file)
        fo = self.db_interface.get_object(test_file.uid)
        self.assertIsInstance(fo, FileObject, 'file object has wrong type')

    def test_get_complete_object_including_all_summaries(self):
        self.db_interface_backend.report_threshold = 1024
        test_file = create_test_file_object()
        self.test_firmware.add_included_file(test_file)
        self.db_interface_backend.add_firmware(self.test_firmware)
        self.db_interface_backend.add_file_object(test_file)
        tmp = self.db_interface.get_complete_object_including_all_summaries(
            self.test_firmware.uid)
        self.assertIsInstance(tmp, Firmware, 'wrong type')
        self.assertIn('summary', tmp.processed_analysis['dummy'].keys(),
                      'summary not found in processed analysis')
        self.assertIn('sum a', tmp.processed_analysis['dummy']['summary'],
                      'summary of original file not included')
        self.assertIn('file exclusive sum b',
                      tmp.processed_analysis['dummy']['summary'],
                      'summary of included file not found')

    def test_sanitize_analysis(self):
        short_dict = {'stub_plugin': {'result': 0}}
        long_dict = {
            'stub_plugin': {
                'result': 10000000000,
                'misc': 'Bananarama',
                'summary': []
            }
        }

        self.test_firmware.processed_analysis = short_dict
        sanitized_dict = self.db_interface.sanitize_analysis(
            self.test_firmware.processed_analysis, self.test_firmware.uid)
        self.assertIn('file_system_flag', sanitized_dict['stub_plugin'].keys())
        self.assertFalse(sanitized_dict['stub_plugin']['file_system_flag'])
        self.assertEqual(self.db_interface.sanitize_fs.list(), [],
                         'file stored in db but should not')

        self.test_firmware.processed_analysis = long_dict
        sanitized_dict = self.db_interface.sanitize_analysis(
            self.test_firmware.processed_analysis, self.test_firmware.uid)
        self.assertIn('stub_plugin_result_{}'.format(self.test_firmware.uid),
                      self.db_interface.sanitize_fs.list(),
                      'sanitized file not stored')
        self.assertNotIn('summary_result_{}'.format(self.test_firmware.uid),
                         self.db_interface.sanitize_fs.list(),
                         'summary is erroneously stored')
        self.assertIn('file_system_flag', sanitized_dict['stub_plugin'].keys())
        self.assertTrue(sanitized_dict['stub_plugin']['file_system_flag'])
        self.assertEqual(type(sanitized_dict['stub_plugin']['summary']), list)

    def test_sanitize_db_duplicates(self):
        long_dict = {
            'stub_plugin': {
                'result': 10000000000,
                'misc': 'Bananarama',
                'summary': []
            }
        }
        gridfs_file_name = 'stub_plugin_result_{}'.format(
            self.test_firmware.uid)

        self.test_firmware.processed_analysis = long_dict
        assert self.db_interface.sanitize_fs.find({
            'filename': gridfs_file_name
        }).count() == 0
        self.db_interface.sanitize_analysis(
            self.test_firmware.processed_analysis, self.test_firmware.uid)
        assert self.db_interface.sanitize_fs.find({
            'filename': gridfs_file_name
        }).count() == 1
        self.db_interface.sanitize_analysis(
            self.test_firmware.processed_analysis, self.test_firmware.uid)
        assert self.db_interface.sanitize_fs.find({
            'filename': gridfs_file_name
        }).count() == 1, 'duplicate entry was created'
        md5 = self.db_interface.sanitize_fs.find_one({
            'filename':
            gridfs_file_name
        }).md5

        long_dict['stub_plugin']['result'] += 1  # new analysis result
        self.db_interface.sanitize_analysis(
            self.test_firmware.processed_analysis, self.test_firmware.uid)
        assert self.db_interface.sanitize_fs.find({
            'filename': gridfs_file_name
        }).count() == 1, 'duplicate entry was created'
        assert self.db_interface.sanitize_fs.find_one({
            'filename':
            gridfs_file_name
        }).md5 != md5, 'hash of new file did not change'

    def test_retrieve_analysis(self):
        self.db_interface.sanitize_fs.put(pickle.dumps('This is a test!'),
                                          filename='test_file_path')

        sanitized_dict = {
            'stub_plugin': {
                'result': 'test_file_path',
                'file_system_flag': True
            }
        }
        sanitized_dict['inbound_result'] = {
            'result': 'inbound result',
            'file_system_flag': False
        }
        retrieved_dict = self.db_interface.retrieve_analysis(sanitized_dict)

        self.assertNotIn('file_system_flag',
                         retrieved_dict['stub_plugin'].keys())
        self.assertIn('result', retrieved_dict['stub_plugin'].keys())
        self.assertEqual(retrieved_dict['stub_plugin']['result'],
                         'This is a test!')
        self.assertNotIn('file_system_flag',
                         retrieved_dict['inbound_result'].keys())
        self.assertEqual(retrieved_dict['inbound_result']['result'],
                         'inbound result')

    def test_retrieve_analysis_filter(self):
        self.db_interface.sanitize_fs.put(pickle.dumps('This is a test!'),
                                          filename='test_file_path')
        sanitized_dict = {
            'selected_plugin': {
                'result': 'test_file_path',
                'file_system_flag': True
            }
        }
        sanitized_dict['other_plugin'] = {
            'result': 'test_file_path',
            'file_system_flag': True
        }
        retrieved_dict = self.db_interface.retrieve_analysis(
            sanitized_dict, analysis_filter=['selected_plugin'])
        self.assertEqual(retrieved_dict['selected_plugin']['result'],
                         'This is a test!')
        self.assertIn('file_system_flag', retrieved_dict['other_plugin'])

    def test_get_objects_by_uid_list(self):
        self.db_interface_backend.add_firmware(self.test_firmware)
        fo_list = self.db_interface.get_objects_by_uid_list(
            [self.test_firmware.uid])
        self.assertIsInstance(fo_list[0], Firmware, 'firmware has wrong type')
        self.assertEqual(fo_list[0].device_name, 'test_router',
                         'Device name in Firmware not correct')
        test_file = FileObject(file_path=path.join(get_test_data_dir(),
                                                   'get_files_test/testfile2'))
        self.db_interface_backend.add_file_object(test_file)
        fo_list = self.db_interface.get_objects_by_uid_list([test_file.uid])
        self.assertIsInstance(fo_list[0], FileObject,
                              'file object has wrong type')

    def test_sanitize_extract_and_retrieve_binary(self):
        test_data = {'dummy': {'test_key': 'test_value'}}
        test_data['dummy'] = self.db_interface._extract_binaries(
            test_data, 'dummy', 'uid')
        self.assertEqual(self.db_interface.sanitize_fs.list(),
                         ['dummy_test_key_uid'], 'file not written')
        self.assertEqual(test_data['dummy']['test_key'], 'dummy_test_key_uid',
                         'new file path not set')
        test_data['dummy'] = self.db_interface._retrieve_binaries(
            test_data, 'dummy')
        self.assertEqual(test_data['dummy']['test_key'], 'test_value',
                         'value not recoverd')

    def test_get_firmware_number(self):
        result = self.db_interface.get_firmware_number()
        self.assertEqual(result, 0)

        self.db_interface_backend.add_firmware(self.test_firmware)
        result = self.db_interface.get_firmware_number(query={})
        self.assertEqual(result, 1)
        result = self.db_interface.get_firmware_number(
            query={'_id': self.test_firmware.uid})
        self.assertEqual(result, 1)

        test_fw_2 = create_test_firmware(bin_path='container/test.7z')
        self.db_interface_backend.add_firmware(test_fw_2)
        result = self.db_interface.get_firmware_number(query='{}')
        self.assertEqual(result, 2)
        result = self.db_interface.get_firmware_number(
            query={'_id': self.test_firmware.uid})
        self.assertEqual(result, 1)

    def test_get_file_object_number(self):
        result = self.db_interface.get_file_object_number()
        self.assertEqual(result, 0)

        self.db_interface_backend.add_file_object(self.test_fo)
        result = self.db_interface.get_file_object_number(
            query={}, zero_on_empty_query=False)
        self.assertEqual(result, 1)
        result = self.db_interface.get_file_object_number(
            query={'_id': self.test_fo.uid})
        self.assertEqual(result, 1)
        result = self.db_interface.get_file_object_number(
            query=json.dumps({'_id': self.test_fo.uid}))
        self.assertEqual(result, 1)
        result = self.db_interface.get_file_object_number(
            query={}, zero_on_empty_query=True)
        self.assertEqual(result, 0)
        result = self.db_interface.get_file_object_number(
            query='{}', zero_on_empty_query=True)
        self.assertEqual(result, 0)

        test_fo_2 = create_test_file_object(
            bin_path='get_files_test/testfile2')
        self.db_interface_backend.add_file_object(test_fo_2)
        result = self.db_interface.get_file_object_number(
            query={}, zero_on_empty_query=False)
        self.assertEqual(result, 2)
        result = self.db_interface.get_file_object_number(
            query={'_id': self.test_fo.uid})
        self.assertEqual(result, 1)

    def test_unpacking_lock(self):
        first_uid, second_uid = 'id1', 'id2'
        assert not self.db_interface.check_unpacking_lock(
            first_uid) and not self.db_interface.check_unpacking_lock(
                second_uid), 'locks should not be set at start'

        self.db_interface.set_unpacking_lock(first_uid)
        assert self.db_interface.check_unpacking_lock(
            first_uid), 'locks should have been set'

        self.db_interface.set_unpacking_lock(second_uid)
        assert self.db_interface.check_unpacking_lock(
            first_uid) and self.db_interface.check_unpacking_lock(
                second_uid), 'both locks should be set'

        self.db_interface.release_unpacking_lock(first_uid)
        assert not self.db_interface.check_unpacking_lock(
            first_uid) and self.db_interface.check_unpacking_lock(
                second_uid), 'lock 1 should be released, lock 2 not'

        self.db_interface.drop_unpacking_locks()
        assert not self.db_interface.check_unpacking_lock(
            second_uid), 'all locks should be dropped'

    def test_lock_is_released(self):
        self.db_interface.set_unpacking_lock(self.test_fo.uid)
        assert self.db_interface.check_unpacking_lock(
            self.test_fo.uid), 'setting lock did not work'

        self.db_interface_backend.add_object(self.test_fo)
        assert not self.db_interface.check_unpacking_lock(
            self.test_fo.uid), 'add_object should release lock'

    def test_is_firmware(self):
        assert self.db_interface.is_firmware(self.test_firmware.uid) is False

        self.db_interface_backend.add_firmware(self.test_firmware)
        assert self.db_interface.is_firmware(self.test_firmware.uid) is True

    def test_is_file_object(self):
        assert self.db_interface.is_file_object(self.test_fo.uid) is False

        self.db_interface_backend.add_file_object(self.test_fo)
        assert self.db_interface.is_file_object(self.test_fo.uid) is True
Esempio n. 3
0
class UnpackingScheduler:
    '''
    This scheduler performs unpacking on firmware objects
    '''
    def __init__(self,
                 config=None,
                 post_unpack=None,
                 analysis_workload=None,
                 db_interface=None):
        self.config = config
        self.stop_condition = Value('i', 0)
        self.throttle_condition = Value('i', 0)
        self.get_analysis_workload = analysis_workload
        self.in_queue = Queue()
        self.work_load_counter = 25
        self.workers = []
        self.post_unpack = post_unpack
        self.db_interface = MongoInterfaceCommon(
            config) if not db_interface else db_interface
        self.drop_cached_locks()
        self.start_unpack_workers()
        self.work_load_process = self.start_work_load_monitor()
        logging.info('Unpacker Module online')

    def drop_cached_locks(self):
        self.db_interface.drop_unpacking_locks()

    def add_task(self, fo):
        '''
        schedule a firmware_object for unpacking
        '''
        self.in_queue.put(fo)

    def get_scheduled_workload(self):
        return {'unpacking_queue': self.in_queue.qsize()}

    def shutdown(self):
        '''
        shutdown the scheduler
        '''
        logging.debug('Shutting down...')
        self.stop_condition.value = 1
        for worker in self.workers:
            worker.join()
        self.work_load_process.join()
        self.in_queue.close()
        logging.info('Unpacker Module offline')

# ---- internal functions ----

    def start_unpack_workers(self):
        threads = int(self.config['unpack']['threads'])
        logging.debug(f'Starting {threads} working threads')
        for process_index in range(threads):
            self.workers.append(
                start_single_worker(process_index, 'Unpacking',
                                    self.unpack_worker))

    def unpack_worker(self, worker_id):
        unpacker = Unpacker(self.config,
                            worker_id=worker_id,
                            db_interface=self.db_interface)
        while self.stop_condition.value == 0:
            with suppress(Empty):
                fo = self.in_queue.get(timeout=float(
                    self.config['ExpertSettings']['block_delay']))
                extracted_objects = unpacker.unpack(fo)
                logging.debug(
                    f'[worker {worker_id}] unpacking of {fo.uid} complete: {len(extracted_objects)} files extracted'
                )
                self.post_unpack(fo)
                self.schedule_extracted_files(extracted_objects)

    def schedule_extracted_files(self, object_list):
        for item in object_list:
            self._add_object_to_unpack_queue(item)

    def _add_object_to_unpack_queue(self, item):
        while self.stop_condition.value == 0:
            if self.throttle_condition.value == 0:
                self.in_queue.put(item)
                break
            logging.debug(
                'throttle down unpacking to reduce memory consumption...')
            sleep(5)

    def start_work_load_monitor(self):
        logging.debug('Start work load monitor...')
        return start_single_worker(None, 'unpack-load',
                                   self._work_load_monitor)

    def _work_load_monitor(self):
        while self.stop_condition.value == 0:
            workload = self._get_combined_analysis_workload()
            unpack_queue_size = self.in_queue.qsize()

            if self.work_load_counter >= 25:
                self.work_load_counter = 0
                log_function = logging.info
            else:
                self.work_load_counter += 1
                log_function = logging.debug
            log_function(
                color_string(
                    f'Queue Length (Analysis/Unpack): {workload} / {unpack_queue_size}',
                    TerminalColors.WARNING))

            if workload < int(
                    self.config['ExpertSettings']['unpack_throttle_limit']):
                self.throttle_condition.value = 0
            else:
                self.throttle_condition.value = 1
            sleep(2)

    def _get_combined_analysis_workload(self):
        if self.get_analysis_workload is not None:
            return self.get_analysis_workload()
        return 0

    def check_exceptions(self):
        shutdown = check_worker_exceptions(self.workers, 'Unpacking',
                                           self.config, self.unpack_worker)

        list_with_load_process = [
            self.work_load_process,
        ]
        shutdown |= check_worker_exceptions(list_with_load_process,
                                            'unpack-load', self.config,
                                            self._work_load_monitor)
        if new_worker_was_started(new_process=list_with_load_process[0],
                                  old_process=self.work_load_process):
            self.work_load_process = list_with_load_process.pop()

        return shutdown