def test_get_virtual_file_path(self): fo = FileObject(binary=b'file_object') self.assertIn(fo.get_uid(), fo.get_virtual_file_paths().keys(), 'not correct if path _ name not set') fo.set_name('the_file_name.txt') self.assertEqual(fo.get_virtual_file_paths()[fo.get_uid()][0], fo.get_uid(), 'not correct if path not set') fo.virtual_file_path = {fo.get_uid(): '/foo/bar/the_file_name.txt'} self.assertEqual(fo.get_virtual_file_paths()[fo.get_uid()], '/foo/bar/the_file_name.txt', 'not correct if path set')
def unpack(self, current_fo: FileObject): ''' Recursively extract all objects included in current_fo and add them to current_fo.files_included ''' logging.debug('[worker {}] Extracting {}: Depth: {}'.format( self.worker_id, current_fo.get_uid(), current_fo.depth)) if current_fo.depth >= self.config.getint('unpack', 'max_depth'): logging.warning( '{} is not extracted since depth limit ({}) is reached'.format( current_fo.get_uid(), self.config.get('unpack', 'max_depth'))) return [] tmp_dir = TemporaryDirectory(prefix='fact_unpack_') file_path = self._generate_local_file_path(current_fo) extracted_files = self.extract_files_from_file(file_path, tmp_dir.name) extracted_file_objects = self.generate_and_store_file_objects( extracted_files, tmp_dir.name, current_fo) extracted_file_objects = self.remove_duplicates( extracted_file_objects, current_fo) self.add_included_files_to_object(extracted_file_objects, current_fo) # set meta data current_fo.processed_analysis['unpacker'] = json.loads( Path(tmp_dir.name, 'reports', 'meta.json').read_text()) self.cleanup(tmp_dir) return extracted_file_objects
def generate_and_store_file_objects(self, file_paths: List[Path], extractor_dir: str, parent: FileObject): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=str(item)) current_virtual_path = '{}|{}|{}'.format( parent.get_base_of_virtual_path( parent.get_virtual_file_paths()[ parent.get_root_uid()][0]), parent.get_uid(), get_object_path_excluding_fact_dirs( make_unicode_string(str(item)), str(Path(extractor_dir, 'files')))) current_file.temporary_data[ 'parent_fo_type'] = get_file_type_from_path( parent.file_path)['mime'] if current_file.get_uid( ) in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.get_uid()].virtual_file_path[ parent.get_root_uid()].append(current_virtual_path) else: self.db_interface.set_unpacking_lock(current_file.uid) self.file_storage_system.store_file(current_file) current_file.virtual_file_path = { parent.get_root_uid(): [current_virtual_path] } current_file.parent_firmware_uids.add( parent.get_root_uid()) extracted_files[current_file.get_uid()] = current_file return extracted_files
def test_get_included_files_uids(self): test_parent = FileObject(binary=b'parent_file') test_child = FileObject(binary=b'1st child') test_child2 = FileObject(binary=b'2nd child') test_parent.add_included_file(test_child) test_parent.add_included_file(test_child2) self.assertEqual(len(test_parent.get_included_files_uids()), 2, 'number of uids not correct') self.assertIn(test_child.get_uid(), test_parent.get_included_files_uids(), 'uid of first file not found') self.assertIn(test_child2.get_uid(), test_parent.get_included_files_uids(), 'uid of second file not found')
def test_add_included_file(self): parent = FileObject(binary=b'parent_file') parent.scheduled_analysis = ['test'] child = FileObject(binary=b'child') parent.add_included_file(child) self.assertEqual(len(parent.files_included), 1, 'number of included files not correct') self.assertIn(child.get_uid(), parent.files_included, 'child uid not stored correctly') self.assertIn(parent.get_uid(), child.parents, 'parent not added to child') self.assertEqual(child.depth, parent.depth + 1, 'child depth not updated') self.assertEqual(child.scheduled_analysis, ['test'], 'child did not get scheduled analysis list of parent')
def test_object_processing_one_child(self): root_object = FileObject(binary=b'root_file') child_object = FileObject(binary=b'first_child_object') root_object.add_included_file(child_object) self.base_plugin.in_queue.put(root_object) processed_object = self.base_plugin.out_queue.get() self.assertEqual(processed_object.get_uid(), root_object.get_uid(), 'uid changed') self.assertTrue( child_object.get_uid() in root_object.get_included_files_uids(), 'child object not in processed file')
def unpack_fo(self, file_object: FileObject) -> Optional[TemporaryDirectory]: file_path = ( file_object.file_path if file_object.file_path else self._get_file_path_from_db(file_object.get_uid()) ) if not file_path or not Path(file_path).is_file(): logging.error('could not unpack {}: file path not found'.format(file_object.get_uid())) return None extraction_dir = TemporaryDirectory(prefix='FACT_plugin_qemu_exec') self.extract_files_from_file(file_path, extraction_dir.name) return extraction_dir
def _update_analysis(self, file_object: FileObject, analysis_system: str, result: dict): try: if type(file_object) == Firmware: self.firmwares.update_one( {'_id': file_object.get_uid()}, {'$set': {'processed_analysis.{}'.format(analysis_system): result}} ) else: self.file_objects.update_one( {'_id': file_object.get_uid()}, {'$set': {'processed_analysis.{}'.format(analysis_system): result}} ) except Exception as exception: logging.error('Update of analysis failed badly ({})'.format(exception)) raise exception
def add_job(self, fw_object: FileObject): if self._dependencies_are_unfulfilled(fw_object): logging.error('{}: dependencies of plugin {} not fulfilled'.format(fw_object.get_uid(), self.NAME)) elif self._analysis_depth_not_reached_yet(fw_object): self.in_queue.put(fw_object) return self.out_queue.put(fw_object)
def _update_processed_analysis(self, new_object: FileObject, old_object: dict) -> dict: old_pa = self.retrieve_analysis(old_object['processed_analysis']) for key in new_object.processed_analysis.keys(): old_pa[key] = new_object.processed_analysis[key] return self.sanitize_analysis(analysis_dict=old_pa, uid=new_object.get_uid())
def generate_and_store_file_objects(self, file_paths, tmp_dir, parent): extracted_files = {} for item in file_paths: if not file_is_empty(item): current_file = FileObject(file_path=item) current_virtual_path = '{}|{}|{}'.format( parent.get_base_of_virtual_path(parent.get_virtual_file_paths()[parent.get_root_uid()][0]), parent.get_uid(), get_chroot_path_excluding_extracted_dir(make_unicode_string(item), tmp_dir) ) if current_file.get_uid() in extracted_files: # the same file is extracted multiple times from one archive extracted_files[current_file.get_uid()].virtual_file_path[parent.get_root_uid()].append(current_virtual_path) else: self.file_storage_system.store_file(current_file) current_file.virtual_file_path = {parent.get_root_uid(): [current_virtual_path]} current_file.parent_firmware_uids.add(parent.get_root_uid()) extracted_files[current_file.get_uid()] = current_file return extracted_files
def test_get_one_virtual_path(self): fo = FileObject(binary=b'foo') self.assertEqual(fo.get_virtual_paths_for_one_uid(), [fo.get_uid()], 'No Path set should be uid') fo.virtual_file_path = {'uid_a': ['test_file_path_a'], 'uid_b': ['test_file_path_b'], 'uid_c': ['test_file_path_c']} self.assertEqual(fo.get_virtual_paths_for_one_uid(), ['test_file_path_a']) self.assertEqual(fo.get_virtual_paths_for_one_uid(root_uid='uid_b'), ['test_file_path_b']) fo.root_uid = 'uid_c' self.assertEqual(fo.get_virtual_paths_for_one_uid(), ['test_file_path_c'])
def test_get_objects_by_uid_list(self): self.db_interface_backend.add_firmware(self.test_firmware) fo_list = self.db_interface.get_objects_by_uid_list([self.test_firmware.get_uid()]) self.assertIsInstance(fo_list[0], Firmware, 'firmware has wrong type') self.assertEqual(fo_list[0].device_name, 'test_router', 'Device name in Firmware not correct') test_file = FileObject(file_path=path.join(get_test_data_dir(), 'get_files_test/testfile2')) self.db_interface_backend.add_file_object(test_file) fo_list = self.db_interface.get_objects_by_uid_list([test_file.get_uid()]) self.assertIsInstance(fo_list[0], FileObject, 'file object has wrong type')
def _add_completed_analysis_results_to_file_object(self, analysis_to_do: str, fw_object: FileObject): db_entry = self.db_backend_service.get_specific_fields_of_db_entry( fw_object.get_uid(), {'processed_analysis.{}'.format(analysis_to_do): 1}) desanitized_analysis = self.db_backend_service.retrieve_analysis( db_entry['processed_analysis']) fw_object.processed_analysis[analysis_to_do] = desanitized_analysis[ analysis_to_do]
def test_get_object(self): fo = self.db_interface.get_object(self.test_firmware.get_uid()) self.assertIsNone(fo, 'found something but there is nothing in the database') self.db_interface_backend.add_firmware(self.test_firmware) fo = self.db_interface.get_object(self.test_firmware.get_uid()) self.assertIsInstance(fo, Firmware, 'firmware has wrong type') self.assertEqual(fo.device_name, 'test_router', 'Device name in Firmware not correct') test_file = FileObject(file_path=path.join(get_test_data_dir(), 'get_files_test/testfile2')) self.db_interface_backend.add_file_object(test_file) fo = self.db_interface.get_object(test_file.get_uid()) self.assertIsInstance(fo, FileObject, 'file object has wrong type')
def add_analysis(self, file_object: FileObject): if isinstance(file_object, (Firmware, FileObject)): processed_analysis = self.sanitize_analysis( file_object.processed_analysis, file_object.get_uid()) for analysis_system in processed_analysis: self._update_analysis(file_object, analysis_system, processed_analysis[analysis_system]) else: raise RuntimeError( 'Trying to add from type \'{}\' to database. Only allowed for \'Firmware\' and \'FileObject\'' )
def _start_or_skip_analysis(self, analysis_to_do: str, fw_object: FileObject): if self._analysis_is_already_in_db_and_up_to_date(analysis_to_do, fw_object.get_uid()): logging.debug('skipping analysis "{}" for {} (analysis already in DB)'.format(analysis_to_do, fw_object.get_uid())) if analysis_to_do in self._get_cumulative_remaining_dependencies(fw_object.scheduled_analysis): self._add_completed_analysis_results_to_file_object(analysis_to_do, fw_object) self.check_further_process_or_complete(fw_object) elif analysis_to_do not in MANDATORY_PLUGINS and self._next_analysis_is_blacklisted(analysis_to_do, fw_object): logging.debug('skipping analysis "{}" for {} (blacklisted file type)'.format(analysis_to_do, fw_object.get_uid())) fw_object.processed_analysis[analysis_to_do] = self._get_skipped_analysis_result(analysis_to_do) self.check_further_process_or_complete(fw_object) else: self.analysis_plugins[analysis_to_do].add_job(fw_object)
def get_results_from_parent_fos(parent_fo: FileObject, this_fo: FileObject, results: dict): if parent_fo is None: return None file_names = [ virtual_file_path.split('|')[-1][1:] for virtual_path_list in this_fo.virtual_file_path.values() for virtual_file_path in virtual_path_list if parent_fo.get_uid() in virtual_file_path ] if AnalysisPlugin.NAME in parent_fo.processed_analysis and 'files' in parent_fo.processed_analysis[ AnalysisPlugin.NAME]: parent_analysis = parent_fo.processed_analysis[ AnalysisPlugin.NAME]['files'] for file_name in file_names: encoded_name = b64encode(file_name.encode()).decode() if encoded_name in parent_analysis: results[file_name] = parent_analysis[encoded_name] results[file_name]['parent_uid'] = parent_fo.get_uid()
def test_object_processing_no_childs(self): root_object = FileObject(binary=b'root_file') self.base_plugin.in_queue.put(root_object) processed_object = self.base_plugin.out_queue.get() self.assertEqual(processed_object.get_uid(), root_object.get_uid(), 'uid changed') self.assertTrue('base' in processed_object.processed_analysis, 'object not processed') self.assertEqual( processed_object.processed_analysis['base']['plugin_version'], 'not set', 'plugin version missing in results') self.assertGreater( processed_object.processed_analysis['base']['analysis_date'], 1, 'analysis date missing in results')
def test_store_and_delete_file(self): test_binary = b'abcde' file_object = FileObject(test_binary) self.fs_organzier.store_file(file_object) self.check_file_presence_and_content( '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5' .format(self.ds_tmp_dir.name), b'abcde') self.assertEqual( file_object.file_path, '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5' .format(self.ds_tmp_dir.name), 'wrong file path set in file object') self.fs_organzier.delete_file(file_object.get_uid()) self.assertFalse(os.path.exists(file_object.file_path), 'file not deleted')
def _update_analysis(self, file_object: FileObject, analysis_system: str, result: dict): try: collection = self.firmwares if isinstance( file_object, Firmware) else self.file_objects entry_with_tags = collection.find_one({'_id': file_object.uid}, {'analysis_tags': 1}) collection.update_one({'_id': file_object.get_uid()}, { '$set': { 'processed_analysis.{}'.format(analysis_system): result, 'analysis_tags': update_analysis_tags(file_object, entry_with_tags) } }) except Exception as exception: logging.error( 'Update of analysis failed badly ({})'.format(exception)) raise exception
def test_get_uid_deprecation(self, caplog): fo = FileObject() with caplog.at_level(logging.INFO): fo.get_uid() assert 'Deprecation warning' in caplog.messages[0]
def _generate_local_file_path(self, file_object: FileObject): if not Path(file_object.file_path).exists(): local_path = self.file_storage_system.generate_path( file_object.get_uid()) return local_path return file_object.file_path
def test_remove_duplicates_child_equals_parent(self): parent = FileObject(binary=b'parent_content') result = self.unpacker.remove_duplicates({parent.get_uid(): parent}, parent) self.assertEqual(len(result), 0, 'parent not removed from list')