Exemple #1
0
 def test_get_virtual_file_path(self):
     fo = FileObject(binary=b'file_object')
     self.assertIn(fo.get_uid(), fo.get_virtual_file_paths().keys(), 'not correct if path _ name not set')
     fo.set_name('the_file_name.txt')
     self.assertEqual(fo.get_virtual_file_paths()[fo.get_uid()][0], fo.get_uid(), 'not correct if path not set')
     fo.virtual_file_path = {fo.get_uid(): '/foo/bar/the_file_name.txt'}
     self.assertEqual(fo.get_virtual_file_paths()[fo.get_uid()], '/foo/bar/the_file_name.txt', 'not correct if path set')
Exemple #2
0
    def unpack(self, current_fo: FileObject):
        '''
        Recursively extract all objects included in current_fo and add them to current_fo.files_included
        '''

        logging.debug('[worker {}] Extracting {}: Depth: {}'.format(
            self.worker_id, current_fo.get_uid(), current_fo.depth))

        if current_fo.depth >= self.config.getint('unpack', 'max_depth'):
            logging.warning(
                '{} is not extracted since depth limit ({}) is reached'.format(
                    current_fo.get_uid(),
                    self.config.get('unpack', 'max_depth')))
            return []

        tmp_dir = TemporaryDirectory(prefix='fact_unpack_')

        file_path = self._generate_local_file_path(current_fo)

        extracted_files = self.extract_files_from_file(file_path, tmp_dir.name)

        extracted_file_objects = self.generate_and_store_file_objects(
            extracted_files, tmp_dir.name, current_fo)
        extracted_file_objects = self.remove_duplicates(
            extracted_file_objects, current_fo)
        self.add_included_files_to_object(extracted_file_objects, current_fo)

        # set meta data
        current_fo.processed_analysis['unpacker'] = json.loads(
            Path(tmp_dir.name, 'reports', 'meta.json').read_text())

        self.cleanup(tmp_dir)
        return extracted_file_objects
Exemple #3
0
 def generate_and_store_file_objects(self, file_paths: List[Path],
                                     extractor_dir: str,
                                     parent: FileObject):
     extracted_files = {}
     for item in file_paths:
         if not file_is_empty(item):
             current_file = FileObject(file_path=str(item))
             current_virtual_path = '{}|{}|{}'.format(
                 parent.get_base_of_virtual_path(
                     parent.get_virtual_file_paths()[
                         parent.get_root_uid()][0]), parent.get_uid(),
                 get_object_path_excluding_fact_dirs(
                     make_unicode_string(str(item)),
                     str(Path(extractor_dir, 'files'))))
             current_file.temporary_data[
                 'parent_fo_type'] = get_file_type_from_path(
                     parent.file_path)['mime']
             if current_file.get_uid(
             ) in extracted_files:  # the same file is extracted multiple times from one archive
                 extracted_files[current_file.get_uid()].virtual_file_path[
                     parent.get_root_uid()].append(current_virtual_path)
             else:
                 self.db_interface.set_unpacking_lock(current_file.uid)
                 self.file_storage_system.store_file(current_file)
                 current_file.virtual_file_path = {
                     parent.get_root_uid(): [current_virtual_path]
                 }
                 current_file.parent_firmware_uids.add(
                     parent.get_root_uid())
                 extracted_files[current_file.get_uid()] = current_file
     return extracted_files
Exemple #4
0
 def test_get_included_files_uids(self):
     test_parent = FileObject(binary=b'parent_file')
     test_child = FileObject(binary=b'1st child')
     test_child2 = FileObject(binary=b'2nd child')
     test_parent.add_included_file(test_child)
     test_parent.add_included_file(test_child2)
     self.assertEqual(len(test_parent.get_included_files_uids()), 2, 'number of uids not correct')
     self.assertIn(test_child.get_uid(), test_parent.get_included_files_uids(), 'uid of first file not found')
     self.assertIn(test_child2.get_uid(), test_parent.get_included_files_uids(), 'uid of second file not found')
Exemple #5
0
 def test_add_included_file(self):
     parent = FileObject(binary=b'parent_file')
     parent.scheduled_analysis = ['test']
     child = FileObject(binary=b'child')
     parent.add_included_file(child)
     self.assertEqual(len(parent.files_included), 1, 'number of included files not correct')
     self.assertIn(child.get_uid(), parent.files_included, 'child uid not stored correctly')
     self.assertIn(parent.get_uid(), child.parents, 'parent not added to child')
     self.assertEqual(child.depth, parent.depth + 1, 'child depth not updated')
     self.assertEqual(child.scheduled_analysis, ['test'], 'child did not get scheduled analysis list of parent')
Exemple #6
0
 def test_object_processing_one_child(self):
     root_object = FileObject(binary=b'root_file')
     child_object = FileObject(binary=b'first_child_object')
     root_object.add_included_file(child_object)
     self.base_plugin.in_queue.put(root_object)
     processed_object = self.base_plugin.out_queue.get()
     self.assertEqual(processed_object.get_uid(), root_object.get_uid(),
                      'uid changed')
     self.assertTrue(
         child_object.get_uid() in root_object.get_included_files_uids(),
         'child object not in processed file')
Exemple #7
0
    def unpack_fo(self, file_object: FileObject) -> Optional[TemporaryDirectory]:
        file_path = (
            file_object.file_path if file_object.file_path
            else self._get_file_path_from_db(file_object.get_uid())
        )
        if not file_path or not Path(file_path).is_file():
            logging.error('could not unpack {}: file path not found'.format(file_object.get_uid()))
            return None

        extraction_dir = TemporaryDirectory(prefix='FACT_plugin_qemu_exec')
        self.extract_files_from_file(file_path, extraction_dir.name)
        return extraction_dir
Exemple #8
0
 def _update_analysis(self, file_object: FileObject, analysis_system: str, result: dict):
     try:
         if type(file_object) == Firmware:
             self.firmwares.update_one(
                 {'_id': file_object.get_uid()},
                 {'$set': {'processed_analysis.{}'.format(analysis_system): result}}
             )
         else:
             self.file_objects.update_one(
                 {'_id': file_object.get_uid()},
                 {'$set': {'processed_analysis.{}'.format(analysis_system): result}}
             )
     except Exception as exception:
         logging.error('Update of analysis failed badly ({})'.format(exception))
         raise exception
Exemple #9
0
 def add_job(self, fw_object: FileObject):
     if self._dependencies_are_unfulfilled(fw_object):
         logging.error('{}: dependencies of plugin {} not fulfilled'.format(fw_object.get_uid(), self.NAME))
     elif self._analysis_depth_not_reached_yet(fw_object):
         self.in_queue.put(fw_object)
         return
     self.out_queue.put(fw_object)
Exemple #10
0
 def _update_processed_analysis(self, new_object: FileObject,
                                old_object: dict) -> dict:
     old_pa = self.retrieve_analysis(old_object['processed_analysis'])
     for key in new_object.processed_analysis.keys():
         old_pa[key] = new_object.processed_analysis[key]
     return self.sanitize_analysis(analysis_dict=old_pa,
                                   uid=new_object.get_uid())
Exemple #11
0
 def generate_and_store_file_objects(self, file_paths, tmp_dir, parent):
     extracted_files = {}
     for item in file_paths:
         if not file_is_empty(item):
             current_file = FileObject(file_path=item)
             current_virtual_path = '{}|{}|{}'.format(
                 parent.get_base_of_virtual_path(parent.get_virtual_file_paths()[parent.get_root_uid()][0]),
                 parent.get_uid(), get_chroot_path_excluding_extracted_dir(make_unicode_string(item), tmp_dir)
             )
             if current_file.get_uid() in extracted_files:  # the same file is extracted multiple times from one archive
                 extracted_files[current_file.get_uid()].virtual_file_path[parent.get_root_uid()].append(current_virtual_path)
             else:
                 self.file_storage_system.store_file(current_file)
                 current_file.virtual_file_path = {parent.get_root_uid(): [current_virtual_path]}
                 current_file.parent_firmware_uids.add(parent.get_root_uid())
                 extracted_files[current_file.get_uid()] = current_file
     return extracted_files
Exemple #12
0
 def test_get_one_virtual_path(self):
     fo = FileObject(binary=b'foo')
     self.assertEqual(fo.get_virtual_paths_for_one_uid(), [fo.get_uid()], 'No Path set should be uid')
     fo.virtual_file_path = {'uid_a': ['test_file_path_a'], 'uid_b': ['test_file_path_b'], 'uid_c': ['test_file_path_c']}
     self.assertEqual(fo.get_virtual_paths_for_one_uid(), ['test_file_path_a'])
     self.assertEqual(fo.get_virtual_paths_for_one_uid(root_uid='uid_b'), ['test_file_path_b'])
     fo.root_uid = 'uid_c'
     self.assertEqual(fo.get_virtual_paths_for_one_uid(), ['test_file_path_c'])
Exemple #13
0
 def test_get_objects_by_uid_list(self):
     self.db_interface_backend.add_firmware(self.test_firmware)
     fo_list = self.db_interface.get_objects_by_uid_list([self.test_firmware.get_uid()])
     self.assertIsInstance(fo_list[0], Firmware, 'firmware has wrong type')
     self.assertEqual(fo_list[0].device_name, 'test_router', 'Device name in Firmware not correct')
     test_file = FileObject(file_path=path.join(get_test_data_dir(), 'get_files_test/testfile2'))
     self.db_interface_backend.add_file_object(test_file)
     fo_list = self.db_interface.get_objects_by_uid_list([test_file.get_uid()])
     self.assertIsInstance(fo_list[0], FileObject, 'file object has wrong type')
Exemple #14
0
 def _add_completed_analysis_results_to_file_object(self,
                                                    analysis_to_do: str,
                                                    fw_object: FileObject):
     db_entry = self.db_backend_service.get_specific_fields_of_db_entry(
         fw_object.get_uid(),
         {'processed_analysis.{}'.format(analysis_to_do): 1})
     desanitized_analysis = self.db_backend_service.retrieve_analysis(
         db_entry['processed_analysis'])
     fw_object.processed_analysis[analysis_to_do] = desanitized_analysis[
         analysis_to_do]
Exemple #15
0
 def test_get_object(self):
     fo = self.db_interface.get_object(self.test_firmware.get_uid())
     self.assertIsNone(fo, 'found something but there is nothing in the database')
     self.db_interface_backend.add_firmware(self.test_firmware)
     fo = self.db_interface.get_object(self.test_firmware.get_uid())
     self.assertIsInstance(fo, Firmware, 'firmware has wrong type')
     self.assertEqual(fo.device_name, 'test_router', 'Device name in Firmware not correct')
     test_file = FileObject(file_path=path.join(get_test_data_dir(), 'get_files_test/testfile2'))
     self.db_interface_backend.add_file_object(test_file)
     fo = self.db_interface.get_object(test_file.get_uid())
     self.assertIsInstance(fo, FileObject, 'file object has wrong type')
Exemple #16
0
 def add_analysis(self, file_object: FileObject):
     if isinstance(file_object, (Firmware, FileObject)):
         processed_analysis = self.sanitize_analysis(
             file_object.processed_analysis, file_object.get_uid())
         for analysis_system in processed_analysis:
             self._update_analysis(file_object, analysis_system,
                                   processed_analysis[analysis_system])
     else:
         raise RuntimeError(
             'Trying to add from type \'{}\' to database. Only allowed for \'Firmware\' and \'FileObject\''
         )
Exemple #17
0
 def _start_or_skip_analysis(self, analysis_to_do: str, fw_object: FileObject):
     if self._analysis_is_already_in_db_and_up_to_date(analysis_to_do, fw_object.get_uid()):
         logging.debug('skipping analysis "{}" for {} (analysis already in DB)'.format(analysis_to_do, fw_object.get_uid()))
         if analysis_to_do in self._get_cumulative_remaining_dependencies(fw_object.scheduled_analysis):
             self._add_completed_analysis_results_to_file_object(analysis_to_do, fw_object)
         self.check_further_process_or_complete(fw_object)
     elif analysis_to_do not in MANDATORY_PLUGINS and self._next_analysis_is_blacklisted(analysis_to_do, fw_object):
         logging.debug('skipping analysis "{}" for {} (blacklisted file type)'.format(analysis_to_do, fw_object.get_uid()))
         fw_object.processed_analysis[analysis_to_do] = self._get_skipped_analysis_result(analysis_to_do)
         self.check_further_process_or_complete(fw_object)
     else:
         self.analysis_plugins[analysis_to_do].add_job(fw_object)
Exemple #18
0
    def get_results_from_parent_fos(parent_fo: FileObject, this_fo: FileObject,
                                    results: dict):
        if parent_fo is None:
            return None

        file_names = [
            virtual_file_path.split('|')[-1][1:]
            for virtual_path_list in this_fo.virtual_file_path.values()
            for virtual_file_path in virtual_path_list
            if parent_fo.get_uid() in virtual_file_path
        ]

        if AnalysisPlugin.NAME in parent_fo.processed_analysis and 'files' in parent_fo.processed_analysis[
                AnalysisPlugin.NAME]:
            parent_analysis = parent_fo.processed_analysis[
                AnalysisPlugin.NAME]['files']
            for file_name in file_names:
                encoded_name = b64encode(file_name.encode()).decode()
                if encoded_name in parent_analysis:
                    results[file_name] = parent_analysis[encoded_name]
                    results[file_name]['parent_uid'] = parent_fo.get_uid()
Exemple #19
0
 def test_object_processing_no_childs(self):
     root_object = FileObject(binary=b'root_file')
     self.base_plugin.in_queue.put(root_object)
     processed_object = self.base_plugin.out_queue.get()
     self.assertEqual(processed_object.get_uid(), root_object.get_uid(),
                      'uid changed')
     self.assertTrue('base' in processed_object.processed_analysis,
                     'object not processed')
     self.assertEqual(
         processed_object.processed_analysis['base']['plugin_version'],
         'not set', 'plugin version missing in results')
     self.assertGreater(
         processed_object.processed_analysis['base']['analysis_date'], 1,
         'analysis date missing in results')
Exemple #20
0
    def test_store_and_delete_file(self):
        test_binary = b'abcde'
        file_object = FileObject(test_binary)

        self.fs_organzier.store_file(file_object)
        self.check_file_presence_and_content(
            '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'
            .format(self.ds_tmp_dir.name), b'abcde')
        self.assertEqual(
            file_object.file_path,
            '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'
            .format(self.ds_tmp_dir.name),
            'wrong file path set in file object')

        self.fs_organzier.delete_file(file_object.get_uid())
        self.assertFalse(os.path.exists(file_object.file_path),
                         'file not deleted')
Exemple #21
0
    def _update_analysis(self, file_object: FileObject, analysis_system: str,
                         result: dict):
        try:
            collection = self.firmwares if isinstance(
                file_object, Firmware) else self.file_objects

            entry_with_tags = collection.find_one({'_id': file_object.uid},
                                                  {'analysis_tags': 1})

            collection.update_one({'_id': file_object.get_uid()}, {
                '$set': {
                    'processed_analysis.{}'.format(analysis_system):
                    result,
                    'analysis_tags':
                    update_analysis_tags(file_object, entry_with_tags)
                }
            })
        except Exception as exception:
            logging.error(
                'Update of analysis failed badly ({})'.format(exception))
            raise exception
Exemple #22
0
 def test_get_uid_deprecation(self, caplog):
     fo = FileObject()
     with caplog.at_level(logging.INFO):
         fo.get_uid()
         assert 'Deprecation warning' in caplog.messages[0]
Exemple #23
0
 def _generate_local_file_path(self, file_object: FileObject):
     if not Path(file_object.file_path).exists():
         local_path = self.file_storage_system.generate_path(
             file_object.get_uid())
         return local_path
     return file_object.file_path
Exemple #24
0
 def test_remove_duplicates_child_equals_parent(self):
     parent = FileObject(binary=b'parent_content')
     result = self.unpacker.remove_duplicates({parent.get_uid(): parent}, parent)
     self.assertEqual(len(result), 0, 'parent not removed from list')