Example #1
0
class InterComBackEndDeleteFile(InterComListener):

    CONNECTION_TYPE = 'file_delete_task'

    def additional_setup(self, config=None):
        self.fs_organizer = FSOrganizer(config=config)

    def post_processing(self, task, task_id):
        if self._entry_was_removed_from_db(task['_id']):
            logging.info('remove file: {}'.format(task['_id']))
            self.fs_organizer.delete_file(task['_id'])

    def _entry_was_removed_from_db(self, uid):
        with ConnectTo(MongoInterfaceCommon, self.config) as db:
            if db.existence_quick_check(uid):
                logging.debug(
                    'file not removed, because database entry exists: {}'.
                    format(uid))
                return False
            if db.check_unpacking_lock(uid):
                logging.debug(
                    'file not removed, because it is processed by unpacker: {}'
                    .format(uid))
                return False
        return True
Example #2
0
class TestFsOrganizer(unittest.TestCase):

    def setUp(self):
        self.ds_tmp_dir = TemporaryDirectory(prefix='fact_tests_')
        config = ConfigParser()
        config.add_section('data_storage')
        config.set('data_storage', 'firmware_file_storage_directory', self.ds_tmp_dir.name)
        self.fs_organzier = FSOrganizer(config)

    def tearDown(self):
        self.ds_tmp_dir.cleanup()
        gc.collect()

    def check_file_presence_and_content(self, file_path, file_binary):
        self.assertTrue(os.path.exists(file_path), 'file exists')
        self.assertEqual(get_binary_from_file(file_path), file_binary, 'correct content')

    def test_generate_path(self):
        test_binary = b'abcde'
        file_object = FileObject(test_binary)
        file_path = self.fs_organzier.generate_path(file_object)
        # file path should be 'DATA_DIR/UID_PEFIX/UID'
        self.assertEqual(file_path, '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'.format(self.ds_tmp_dir.name), 'generate file path')

    def test_store_and_delete_file(self):
        test_binary = b'abcde'
        file_object = FileObject(test_binary)

        self.fs_organzier.store_file(file_object)
        self.check_file_presence_and_content('{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'.format(self.ds_tmp_dir.name), b'abcde')
        self.assertEqual(file_object.file_path, '{}/36/36bbe50ed96841d10443bcb670d6554f0a34b761be67ec9c4a8ad2c0c44ca42c_5'.format(self.ds_tmp_dir.name), 'wrong file path set in file object')

        self.fs_organzier.delete_file(file_object.uid)
        self.assertFalse(os.path.exists(file_object.file_path), 'file not deleted')
Example #3
0
class BinaryService:
    '''
    This is a binary and database backend providing basic return functions
    '''
    def __init__(self, config=None):
        self.config = config
        self.fs_organizer = FSOrganizer(config=config)
        logging.info("binary service online")

    def get_binary_and_file_name(
            self, uid: str) -> Tuple[Optional[bytes], Optional[str]]:
        file_name = self._get_file_name_from_db(uid)
        if file_name is None:
            return None, None
        binary = get_binary_from_file(
            self.fs_organizer.generate_path_from_uid(uid))
        return binary, file_name

    def get_repacked_binary_and_file_name(
            self, uid: str) -> Tuple[Optional[bytes], Optional[str]]:
        file_name = self._get_file_name_from_db(uid)
        if file_name is None:
            return None, None
        repack_service = TarRepack(config=self.config)
        tar = repack_service.tar_repack(
            self.fs_organizer.generate_path_from_uid(uid))
        name = "{}.tar.gz".format(file_name)
        return tar, name

    def _get_file_name_from_db(self, uid: str) -> Optional[str]:
        with ConnectTo(BinaryServiceDbInterface, self.config) as db_service:
            return db_service.get_file_name(uid)
Example #4
0
class InterComBackEndAnalysisTask(InterComListener):

    CONNECTION_TYPE = 'analysis_task'

    def additional_setup(self, config=None):
        self.fs_organizer = FSOrganizer(config=config)

    def post_processing(self, task, task_id):
        self.fs_organizer.store_file(task)
        return task
 def __init__(self,
              plugin_administrator,
              config=None,
              recursive=True,
              offline_testing=False):
     self.config = config
     self._fs_organizer = FSOrganizer(config)
     super().__init__(plugin_administrator,
                      config=config,
                      plugin_path=__file__,
                      recursive=recursive,
                      offline_testing=offline_testing)
Example #6
0
 def _delete_firmware(self):
     fs_backend = FSOrganizer(config=self.config)
     local_firmware_path = Path(
         fs_backend.generate_path_from_uid(self.test_fw_a.uid))
     self.assertTrue(local_firmware_path.exists(),
                     'file not found before delete')
     rv = self.test_client.get('/admin/delete/{}'.format(
         self.test_fw_a.uid))
     self.assertIn(b'Deleted 4 file(s) from database', rv.data,
                   'deletion success page not shown')
     rv = self.test_client.get('/analysis/{}'.format(self.test_fw_a.uid))
     self.assertIn(b'File not found in database', rv.data,
                   'file is still available after delete')
     time.sleep(5)
     self.assertFalse(local_firmware_path.exists(), 'file not deleted')
Example #7
0
 def test_re_analyze_task(self):
     self.backend = InterComBackEndReAnalyzeTask(config=self.config)
     fs_organizer = FSOrganizer(config=self.config)
     test_fw = create_test_firmware()
     fs_organizer.store_file(test_fw)
     original_file_path = test_fw.file_path
     original_binary = test_fw.binary
     test_fw.file_path = None
     test_fw.binary = None
     self.frontend.add_re_analyze_task(test_fw)
     task = self.backend.get_next_task()
     self.assertEqual(task.uid, test_fw.uid, 'uid not correct')
     self.assertIsNotNone(task.file_path, 'file path not set')
     self.assertEqual(task.file_path, original_file_path)
     self.assertIsNotNone(task.binary, 'binary not set')
     self.assertEqual(task.binary, original_binary,
                      'binary content not correct')
Example #8
0
class InterComBackEndReAnalyzeTask(InterComListener):

    CONNECTION_TYPE = 're_analyze_task'

    def additional_setup(self, config=None):
        self.fs_organizer = FSOrganizer(config=config)

    def post_processing(self, task, task_id):
        task.file_path = self.fs_organizer.generate_path(task)
        task.create_binary_from_path()
        return task
Example #9
0
 def _start_backend(self, post_analysis=None, compare_callback=None):
     # pylint: disable=attribute-defined-outside-init
     self.analysis_service = AnalysisScheduler(config=self.config,
                                               post_analysis=post_analysis)
     self.unpacking_service = UnpackingScheduler(
         config=self.config,
         post_unpack=self.analysis_service.start_analysis_of_object)
     self.compare_service = CompareScheduler(config=self.config,
                                             callback=compare_callback)
     self.intercom = InterComBackEndBinding(
         config=self.config,
         analysis_service=self.analysis_service,
         compare_service=self.compare_service,
         unpacking_service=self.unpacking_service)
     self.fs_organizer = FSOrganizer(config=self.config)
Example #10
0
 def get_file_paths_of_files_included_in_fo(self, fo_uid: str) -> List[str]:
     fs_organizer = FSOrganizer(self.config)
     return [
         fs_organizer.generate_path_from_uid(uid)
         for uid in self.get_uids_of_all_included_files(fo_uid)
     ]
Example #11
0
 def additional_setup(self, config=None):
     self.fs_organizer = FSOrganizer(config=config)
Example #12
0
 def setUp(self):
     self.ds_tmp_dir = TemporaryDirectory(prefix='fact_tests_')
     config = ConfigParser()
     config.add_section('data_storage')
     config.set('data_storage', 'firmware_file_storage_directory', self.ds_tmp_dir.name)
     self.fs_organzier = FSOrganizer(config)
class AnalysisPlugin(AnalysisBasePlugin):
    '''
    This class implements the FACT wrapper for multiple linters including
    - shellcheck (shell)
    - pylint (python)
    - jshint (javascript)
    - lua (luacheck)
    TODO Implement proper view
    TODO implement additional linters (ruby, perl, php)
    '''
    NAME = 'source_code_analysis'
    DESCRIPTION = 'This plugin implements static code analysis for multiple scripting languages'
    DEPENDENCIES = ['file_type']
    VERSION = '0.6'
    MIME_WHITELIST = ['text/']
    # All linters must contain a 'do_analysis' method which must return an array of dicts.
    # These dicts must at least contain a value for the 'symbol' key.
    SCRIPT_TYPES = {
        'shell': {
            'mime': 'shell',
            'shebang': 'sh',
            'ending': '.sh',
            'linter': shell_linter.ShellLinter
        },
        'lua': {
            'mime': 'luascript',
            'shebang': 'lua',
            'ending': '.lua',
            'linter': lua_linter.LuaLinter
        },
        'javascript': {
            'mime': 'javascript',
            'shebang': 'javascript',
            'ending': '.js',
            'linter': js_linter.JavaScriptLinter
        },
        'python': {
            'mime': 'python',
            'shebang': 'python',
            'ending': '.py',
            'linter': python_linter.PythonLinter
        }
    }

    def __init__(self,
                 plugin_administrator,
                 config=None,
                 recursive=True,
                 offline_testing=False):
        self.config = config
        self._fs_organizer = FSOrganizer(config)
        super().__init__(plugin_administrator,
                         config=config,
                         plugin_path=__file__,
                         recursive=recursive,
                         offline_testing=offline_testing)

    def process_object(self, file_object):
        '''
        After only receiving text files thanks to the whitelist, we try to detect the correct scripting language
        and then call a linter if a supported language is detected
        '''
        script_type = self._get_script_type(file_object)
        if script_type is None:
            file_object.processed_analysis[self.NAME] = {
                'summary': [],
                'warning': 'Is not a script or language could not be detected'
            }
            return file_object

        script_type = script_type.lower()

        if script_type not in self.SCRIPT_TYPES:
            logging.debug(
                f'[{self.NAME}] {file_object.file_name} ({script_type}) is not a supported script.'
            )
            file_object.processed_analysis[self.NAME] = {
                'summary': [],
                'warning': f'Unsupported script type: {script_type}'
            }
            return file_object

        issues = self.SCRIPT_TYPES[script_type]['linter']().do_analysis(
            file_object.file_path)

        if len(issues) == 0:
            file_object.processed_analysis[self.NAME] = {'summary': []}
        else:
            file_object.processed_analysis[self.NAME] = {
                'full': sorted(issues, key=lambda k: k['symbol']),
                'summary': [f'Warnings in {script_type} script']
            }
        return file_object

    def _get_script_type(self, file_object):
        host_path = self._fs_organizer.generate_path_from_uid(file_object.uid)
        container_path = f'/repo/{file_object.file_name}'
        linguist_output = run_docker_container('crazymax/linguist',
                                               60,
                                               f'--json {container_path}',
                                               reraise=True,
                                               mount=(container_path,
                                                      host_path),
                                               label=self.NAME)
        output_json = json.loads(linguist_output)

        # FIXME plugins should not set the output for other plugins
        # But due to performance reasons we don't want the filetype plugin to run linguist
        file_object.processed_analysis['file_type']['linguist'] = ''.join([
            f'{k:<10} {str(v):<10}\n'
            for k, v in output_json[container_path].items()
        ])

        script_type = output_json[container_path].get('language')

        return script_type
 def __init__(self, config=None):
     super().__init__(config)
     self.fs_organizer = FSOrganizer(config=config)
Example #15
0
 def __init__(self, config=None):
     self.config = config
     self.fs_organizer = FSOrganizer(config=config)
     logging.info("binary service online")
Example #16
0
 def __init__(self, config=None, worker_id=None, db_interface=None):
     super().__init__(config=config, worker_id=worker_id)
     self.file_storage_system = FSOrganizer(config=self.config)
     self.db_interface = db_interface
Example #17
0
class Unpacker(UnpackBase):
    def __init__(self, config=None, worker_id=None, db_interface=None):
        super().__init__(config=config, worker_id=worker_id)
        self.file_storage_system = FSOrganizer(config=self.config)
        self.db_interface = db_interface

    def unpack(self, current_fo: FileObject):
        '''
        Recursively extract all objects included in current_fo and add them to current_fo.files_included
        '''

        logging.debug('[worker {}] Extracting {}: Depth: {}'.format(self.worker_id, current_fo.uid, current_fo.depth))

        if current_fo.depth >= self.config.getint('unpack', 'max_depth'):
            logging.warning('{} is not extracted since depth limit ({}) is reached'.format(current_fo.uid, self.config.get('unpack', 'max_depth')))
            self._store_unpacking_depth_skip_info(current_fo)
            return []

        tmp_dir = TemporaryDirectory(prefix='fact_unpack_', dir=get_temp_dir_path(self.config))

        file_path = self._generate_local_file_path(current_fo)

        extracted_files = self.extract_files_from_file(file_path, tmp_dir.name)

        extracted_file_objects = self.generate_and_store_file_objects(extracted_files, tmp_dir.name, current_fo)
        extracted_file_objects = self.remove_duplicates(extracted_file_objects, current_fo)
        self.add_included_files_to_object(extracted_file_objects, current_fo)

        # set meta data
        current_fo.processed_analysis['unpacker'] = json.loads(Path(tmp_dir.name, 'reports', 'meta.json').read_text())

        self.cleanup(tmp_dir)
        return extracted_file_objects

    @staticmethod
    def _store_unpacking_depth_skip_info(file_object: FileObject):
        file_object.processed_analysis['unpacker'] = {
            'plugin_used': 'None', 'number_of_unpacked_files': 0,
            'info': 'Unpacking stopped because maximum unpacking depth was reached',
        }
        tag_dict = {'unpacker': {'depth reached': {'value': 'unpacking depth reached', 'color': TagColor.ORANGE, 'propagate': False}}}
        file_object.analysis_tags.update(tag_dict)

    def cleanup(self, tmp_dir):
        try:
            tmp_dir.cleanup()
        except OSError as error:
            logging.error('[worker {}] Could not CleanUp tmp_dir: {} - {}'.format(self.worker_id, type(error), str(error)))

    @staticmethod
    def add_included_files_to_object(included_file_objects, root_file_object):
        for item in included_file_objects:
            root_file_object.add_included_file(item)

    def generate_and_store_file_objects(self, file_paths: List[Path], extractor_dir: str, parent: FileObject):
        extracted_files = {}
        for item in file_paths:
            if not file_is_empty(item):
                current_file = FileObject(file_path=str(item))
                current_virtual_path = join_virtual_path(
                    get_base_of_virtual_path(parent.get_virtual_file_paths()[parent.get_root_uid()][0]),
                    parent.uid, get_object_path_excluding_fact_dirs(make_unicode_string(str(item)), str(Path(extractor_dir, 'files')))
                )
                current_file.temporary_data['parent_fo_type'] = get_file_type_from_path(parent.file_path)['mime']
                if current_file.uid in extracted_files:  # the same file is extracted multiple times from one archive
                    extracted_files[current_file.uid].virtual_file_path[parent.get_root_uid()].append(current_virtual_path)
                else:
                    self.db_interface.set_unpacking_lock(current_file.uid)
                    self.file_storage_system.store_file(current_file)
                    current_file.virtual_file_path = {parent.get_root_uid(): [current_virtual_path]}
                    current_file.parent_firmware_uids.add(parent.get_root_uid())
                    extracted_files[current_file.uid] = current_file
        return extracted_files

    @staticmethod
    def remove_duplicates(extracted_fo_dict, parent_fo):
        if parent_fo.uid in extracted_fo_dict:
            del extracted_fo_dict[parent_fo.uid]
        return make_list_from_dict(extracted_fo_dict)

    def _generate_local_file_path(self, file_object: FileObject):
        if not Path(file_object.file_path).exists():
            local_path = self.file_storage_system.generate_path(file_object.uid)
            return local_path
        return file_object.file_path