class InterComBackEndDeleteFile(InterComListener): CONNECTION_TYPE = 'file_delete_task' def additional_setup(self, config=None): self.fs_organizer = FSOrganizer(config=config) def post_processing(self, task, task_id): if self._entry_was_removed_from_db(task['_id']): logging.info('remove file: {}'.format(task['_id'])) self.fs_organizer.delete_file(task['_id']) def _entry_was_removed_from_db(self, uid): with ConnectTo(MongoInterfaceCommon, self.config) as db: if db.existence_quick_check(uid): logging.debug( 'file not removed, because database entry exists: {}'. format(uid)) return False if db.check_unpacking_lock(uid): logging.debug( 'file not removed, because it is processed by unpacker: {}' .format(uid)) return False return True
class TestFsOrganizer(unittest.TestCase): def setUp(self): self.ds_tmp_dir = TemporaryDirectory(prefix='fact_tests_') config = ConfigParser() config.add_section('data_storage') config.set('data_storage', 'firmware_file_storage_directory', self.ds_tmp_dir.name) self.fs_organzier = FSOrganizer(config) def tearDown(self): self.ds_tmp_dir.cleanup() gc.collect() def check_file_presence_and_content(self, file_path, file_binary): self.assertTrue(os.path.exists(file_path), 'file exists') self.assertEqual(get_binary_from_file(file_path), file_binary, 'correct content') def test_generate_path(self): test_binary = b'abcde' file_object = FileObject(test_binary) file_path = self.fs_organzier.generate_path(file_object) # file path should be 'DATA_DIR/UID_PEFIX/UID' self.assertEqual(file_path, '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'.format(self.ds_tmp_dir.name), 'generate file path') def test_store_and_delete_file(self): test_binary = b'abcde' file_object = FileObject(test_binary) self.fs_organzier.store_file(file_object) self.check_file_presence_and_content('{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'.format(self.ds_tmp_dir.name), b'abcde') self.assertEqual(file_object.file_path, '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'.format(self.ds_tmp_dir.name), 'wrong file path set in file object') self.fs_organzier.delete_file(file_object.uid) self.assertFalse(os.path.exists(file_object.file_path), 'file not deleted')
class BinaryService: ''' This is a binary and database backend providing basic return functions ''' def __init__(self, config=None): self.config = config self.fs_organizer = FSOrganizer(config=config) logging.info("binary service online") def get_binary_and_file_name( self, uid: str) -> Tuple[Optional[bytes], Optional[str]]: file_name = self._get_file_name_from_db(uid) if file_name is None: return None, None binary = get_binary_from_file( self.fs_organizer.generate_path_from_uid(uid)) return binary, file_name def get_repacked_binary_and_file_name( self, uid: str) -> Tuple[Optional[bytes], Optional[str]]: file_name = self._get_file_name_from_db(uid) if file_name is None: return None, None repack_service = TarRepack(config=self.config) tar = repack_service.tar_repack( self.fs_organizer.generate_path_from_uid(uid)) name = "{}.tar.gz".format(file_name) return tar, name def _get_file_name_from_db(self, uid: str) -> Optional[str]: with ConnectTo(BinaryServiceDbInterface, self.config) as db_service: return db_service.get_file_name(uid)
class InterComBackEndAnalysisTask(InterComListener): CONNECTION_TYPE = 'analysis_task' def additional_setup(self, config=None): self.fs_organizer = FSOrganizer(config=config) def post_processing(self, task, task_id): self.fs_organizer.store_file(task) return task
def __init__(self, plugin_administrator, config=None, recursive=True, offline_testing=False): self.config = config self._fs_organizer = FSOrganizer(config) super().__init__(plugin_administrator, config=config, plugin_path=__file__, recursive=recursive, offline_testing=offline_testing)
def _delete_firmware(self): fs_backend = FSOrganizer(config=self.config) local_firmware_path = Path( fs_backend.generate_path_from_uid(self.test_fw_a.uid)) self.assertTrue(local_firmware_path.exists(), 'file not found before delete') rv = self.test_client.get('/admin/delete/{}'.format( self.test_fw_a.uid)) self.assertIn(b'Deleted 4 file(s) from database', rv.data, 'deletion success page not shown') rv = self.test_client.get('/analysis/{}'.format(self.test_fw_a.uid)) self.assertIn(b'File not found in database', rv.data, 'file is still available after delete') time.sleep(5) self.assertFalse(local_firmware_path.exists(), 'file not deleted')
def test_re_analyze_task(self): self.backend = InterComBackEndReAnalyzeTask(config=self.config) fs_organizer = FSOrganizer(config=self.config) test_fw = create_test_firmware() fs_organizer.store_file(test_fw) original_file_path = test_fw.file_path original_binary = test_fw.binary test_fw.file_path = None test_fw.binary = None self.frontend.add_re_analyze_task(test_fw) task = self.backend.get_next_task() self.assertEqual(task.uid, test_fw.uid, 'uid not correct') self.assertIsNotNone(task.file_path, 'file path not set') self.assertEqual(task.file_path, original_file_path) self.assertIsNotNone(task.binary, 'binary not set') self.assertEqual(task.binary, original_binary, 'binary content not correct')
class InterComBackEndReAnalyzeTask(InterComListener): CONNECTION_TYPE = 're_analyze_task' def additional_setup(self, config=None): self.fs_organizer = FSOrganizer(config=config) def post_processing(self, task, task_id): task.file_path = self.fs_organizer.generate_path(task) task.create_binary_from_path() return task
def _start_backend(self, post_analysis=None, compare_callback=None): # pylint: disable=attribute-defined-outside-init self.analysis_service = AnalysisScheduler(config=self.config, post_analysis=post_analysis) self.unpacking_service = UnpackingScheduler( config=self.config, post_unpack=self.analysis_service.start_analysis_of_object) self.compare_service = CompareScheduler(config=self.config, callback=compare_callback) self.intercom = InterComBackEndBinding( config=self.config, analysis_service=self.analysis_service, compare_service=self.compare_service, unpacking_service=self.unpacking_service) self.fs_organizer = FSOrganizer(config=self.config)
def get_file_paths_of_files_included_in_fo(self, fo_uid: str) -> List[str]: fs_organizer = FSOrganizer(self.config) return [ fs_organizer.generate_path_from_uid(uid) for uid in self.get_uids_of_all_included_files(fo_uid) ]
def additional_setup(self, config=None): self.fs_organizer = FSOrganizer(config=config)
def setUp(self): self.ds_tmp_dir = TemporaryDirectory(prefix='fact_tests_') config = ConfigParser() config.add_section('data_storage') config.set('data_storage', 'firmware_file_storage_directory', self.ds_tmp_dir.name) self.fs_organzier = FSOrganizer(config)
class AnalysisPlugin(AnalysisBasePlugin): ''' This class implements the FACT wrapper for multiple linters including - shellcheck (shell) - pylint (python) - jshint (javascript) - lua (luacheck) TODO Implement proper view TODO implement additional linters (ruby, perl, php) ''' NAME = 'source_code_analysis' DESCRIPTION = 'This plugin implements static code analysis for multiple scripting languages' DEPENDENCIES = ['file_type'] VERSION = '0.6' MIME_WHITELIST = ['text/'] # All linters must contain a 'do_analysis' method which must return an array of dicts. # These dicts must at least contain a value for the 'symbol' key. SCRIPT_TYPES = { 'shell': { 'mime': 'shell', 'shebang': 'sh', 'ending': '.sh', 'linter': shell_linter.ShellLinter }, 'lua': { 'mime': 'luascript', 'shebang': 'lua', 'ending': '.lua', 'linter': lua_linter.LuaLinter }, 'javascript': { 'mime': 'javascript', 'shebang': 'javascript', 'ending': '.js', 'linter': js_linter.JavaScriptLinter }, 'python': { 'mime': 'python', 'shebang': 'python', 'ending': '.py', 'linter': python_linter.PythonLinter } } def __init__(self, plugin_administrator, config=None, recursive=True, offline_testing=False): self.config = config self._fs_organizer = FSOrganizer(config) super().__init__(plugin_administrator, config=config, plugin_path=__file__, recursive=recursive, offline_testing=offline_testing) def process_object(self, file_object): ''' After only receiving text files thanks to the whitelist, we try to detect the correct scripting language and then call a linter if a supported language is detected ''' script_type = self._get_script_type(file_object) if script_type is None: file_object.processed_analysis[self.NAME] = { 'summary': [], 'warning': 'Is not a script or language could not be detected' } return file_object script_type = script_type.lower() if script_type not in self.SCRIPT_TYPES: logging.debug( f'[{self.NAME}] {file_object.file_name} ({script_type}) is not a supported script.' ) file_object.processed_analysis[self.NAME] = { 'summary': [], 'warning': f'Unsupported script type: {script_type}' } return file_object issues = self.SCRIPT_TYPES[script_type]['linter']().do_analysis( file_object.file_path) if len(issues) == 0: file_object.processed_analysis[self.NAME] = {'summary': []} else: file_object.processed_analysis[self.NAME] = { 'full': sorted(issues, key=lambda k: k['symbol']), 'summary': [f'Warnings in {script_type} script'] } return file_object def _get_script_type(self, file_object): host_path = self._fs_organizer.generate_path_from_uid(file_object.uid) container_path = f'/repo/{file_object.file_name}' linguist_output = run_docker_container('crazymax/linguist', 60, f'--json {container_path}', reraise=True, mount=(container_path, host_path), label=self.NAME) output_json = json.loads(linguist_output) # FIXME plugins should not set the output for other plugins # But due to performance reasons we don't want the filetype plugin to run linguist file_object.processed_analysis['file_type']['linguist'] = ''.join([ f'{k:<10} {str(v):<10}\n' for k, v in output_json[container_path].items() ]) script_type = output_json[container_path].get('language') return script_type
def __init__(self, config=None): super().__init__(config) self.fs_organizer = FSOrganizer(config=config)
def __init__(self, config=None): self.config = config self.fs_organizer = FSOrganizer(config=config) logging.info("binary service online")
def __init__(self, config=None, worker_id=None, db_interface=None): super().__init__(config=config, worker_id=worker_id) self.file_storage_system = FSOrganizer(config=self.config) self.db_interface = db_interface
class Unpacker(UnpackBase): def __init__(self, config=None, worker_id=None, db_interface=None): super().__init__(config=config, worker_id=worker_id) self.file_storage_system = FSOrganizer(config=self.config) self.db_interface = db_interface def unpack(self, current_fo: FileObject): ''' Recursively extract all objects included in current_fo and add them to current_fo.files_included ''' logging.debug('[worker {}] Extracting {}: Depth: {}'.format(self.worker_id, current_fo.uid, current_fo.depth)) if current_fo.depth >= self.config.getint('unpack', 'max_depth'): logging.warning('{} is not extracted since depth limit ({}) is reached'.format(current_fo.uid, self.config.get('unpack', 'max_depth'))) self._store_unpacking_depth_skip_info(current_fo) return [] tmp_dir = TemporaryDirectory(prefix='fact_unpack_', dir=get_temp_dir_path(self.config)) file_path = self._generate_local_file_path(current_fo) extracted_files = self.extract_files_from_file(file_path, tmp_dir.name) extracted_file_objects = self.generate_and_store_file_objects(extracted_files, tmp_dir.name, current_fo) extracted_file_objects = self.remove_duplicates(extracted_file_objects, current_fo) self.add_included_files_to_object(extracted_file_objects, current_fo) # set meta data current_fo.processed_analysis['unpacker'] = json.loads(Path(tmp_dir.name, 'reports', 'meta.json').read_text()) self.cleanup(tmp_dir) return extracted_file_objects @staticmethod def _store_unpacking_depth_skip_info(file_object: FileObject): file_object.processed_analysis['unpacker'] = { 'plugin_used': 'None', 'number_of_unpacked_files': 0, 'info': 'Unpacking stopped because maximum unpacking depth was reached', } tag_dict = {'unpacker': {'depth reached': {'value': 'unpacking depth reached', 'color': TagColor.ORANGE, 'propagate': False}}} file_object.analysis_tags.update(tag_dict) def cleanup(self, tmp_dir): try: tmp_dir.cleanup() except OSError as error: logging.error('[worker {}] Could not CleanUp tmp_dir: {} - {}'.format(self.worker_id, type(error), str(error))) @staticmethod def add_included_files_to_object(included_file_objects, root_file_object): for item in included_file_objects: root_file_object.add_included_file(item) def generate_and_store_file_objects(self, file_paths: List[Path], extractor_dir: str, parent: FileObject): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=str(item)) current_virtual_path = join_virtual_path( get_base_of_virtual_path(parent.get_virtual_file_paths()[parent.get_root_uid()][0]), parent.uid, get_object_path_excluding_fact_dirs(make_unicode_string(str(item)), str(Path(extractor_dir, 'files'))) ) current_file.temporary_data['parent_fo_type'] = get_file_type_from_path(parent.file_path)['mime'] if current_file.uid in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.uid].virtual_file_path[parent.get_root_uid()].append(current_virtual_path) else: self.db_interface.set_unpacking_lock(current_file.uid) self.file_storage_system.store_file(current_file) current_file.virtual_file_path = {parent.get_root_uid(): [current_virtual_path]} current_file.parent_firmware_uids.add(parent.get_root_uid()) extracted_files[current_file.uid] = current_file return extracted_files @staticmethod def remove_duplicates(extracted_fo_dict, parent_fo): if parent_fo.uid in extracted_fo_dict: del extracted_fo_dict[parent_fo.uid] return make_list_from_dict(extracted_fo_dict) def _generate_local_file_path(self, file_object: FileObject): if not Path(file_object.file_path).exists(): local_path = self.file_storage_system.generate_path(file_object.uid) return local_path return file_object.file_path