def process_object(self, file_object): ''' After only receiving text files thanks to the whitelist, we try to detect the correct scripting language and then call a linter if a supported language is detected ''' try: with NamedTemporaryFile() as fp: fp.write(file_object.binary) fp.seek(0) container_path = '/repo/{}'.format(file_object.file_name) output = run_docker_container('crazymax/linguist', 60, container_path, reraise=True, mount=(container_path, fp.name), label=self.NAME) script_type = self._get_script_type(file_object, output) issues = self.SCRIPT_TYPES[script_type]['linter']().do_analysis(file_object.file_path) if not issues: file_object.processed_analysis[self.NAME] = {'summary': []} else: file_object.processed_analysis[self.NAME] = {'full': sorted(issues, key=lambda k: k['symbol']), 'summary': ['Warnings in {} script'.format(script_type)]} except (NotImplementedError, UnicodeDecodeError, KeyError): logging.debug('[{}] {} is not a supported script.'.format(self.NAME, file_object.file_name)) file_object.processed_analysis[self.NAME] = {'summary': [], 'warning': 'Unsupported script type'} except ReadTimeout: file_object.processed_analysis[self.NAME] = {'summary': [], 'warning': 'Analysis timed out'} except (DockerException, IOError): file_object.processed_analysis[self.NAME] = {'summary': [], 'warning': 'Error during analysis'} return file_object
def _run_cwe_checker_in_docker(file_object): return run_docker_container( DOCKER_IMAGE, timeout=TIMEOUT_IN_SECONDS, command= 'bap /tmp/input --pass=cwe-checker --cwe-checker-json --cwe-checker-no-logging', mount=('/tmp/input', file_object.file_path))
def _run_cwe_checker_to_get_module_versions(): # unfortunately, there must be a dummy file passed to BAP, I chose /bin/true because it is damn small return run_docker_container( DOCKER_IMAGE, timeout=60, command= 'bap /bin/true --pass=cwe-checker --cwe-checker-module-versions')
def get_docker_output(arch_suffix: str, file_path: str, root_path: Path) -> dict: ''' :return: in the case of no error, the output will have the form { 'parameter 1': {'stdout': <b64_str>, 'stderr': <b64_str>, 'return_code': <int>}, 'parameter 2': {...}, '...', 'strace': {'stdout': <b64_str>, 'stderr': <b64_str>, 'return_code': <int>}, } in case of an error, there will be an entry 'error' instead of the entries stdout/stderr/return_code ''' command = '{arch_suffix} {target}'.format(arch_suffix=arch_suffix, target=file_path) try: return loads( run_docker_container(DOCKER_IMAGE, TIMEOUT_IN_SECONDS, command, reraise=True, mount=(CONTAINER_TARGET_PATH, str(root_path)), label='qemu_exec')) except ReadTimeout: return {'error': 'timeout'} except (DockerException, IOError): return {'error': 'process error'} except JSONDecodeError: return {'error': 'could not decode result'}
def process_object(self, file_object: FileObject): with TemporaryDirectory(prefix=self.NAME, dir=get_temp_dir_path(self.config)) as tmp_dir: file_path = Path(tmp_dir) / file_object.file_name file_path.write_bytes(file_object.binary) try: result = run_docker_container(DOCKER_IMAGE, TIMEOUT_IN_SECONDS, CONTAINER_TARGET_PATH, reraise=True, mount=(CONTAINER_TARGET_PATH, str(file_path)), label=self.NAME, include_stderr=False) file_object.processed_analysis[self.NAME] = loads(result) except ReadTimeout: file_object.processed_analysis[self.NAME][ 'warning'] = 'Analysis timed out. It might not be complete.' except (DockerException, IOError): file_object.processed_analysis[self.NAME][ 'warning'] = 'Analysis issues. It might not be complete.' except JSONDecodeError: logging.error('Could not decode JSON output: {}'.format( repr(result))) return file_object
def _mount_in_docker(self, input_dir: str) -> str: return run_docker_container( DOCKER_IMAGE, mount=('/work', input_dir), label=self.NAME, timeout=int(self.timeout * .8), privileged=True )
def extract_data_from_ghidra(input_file_data: bytes, key_strings: List[str]) -> List[str]: with TemporaryDirectory(prefix='FSR_') as tmp_dir: tmp_dir_path = Path(tmp_dir) ghidra_input_file = tmp_dir_path / 'ghidra_input' (tmp_dir_path / KEY_FILE).write_text(json.dumps(key_strings)) ghidra_input_file.write_bytes(input_file_data) docker_output = run_docker_container(DOCKER_IMAGE, TIMEOUT, mount=(CONTAINER_TARGET_PATH, tmp_dir), label='FSR') logging.debug(docker_output) try: output_file = (tmp_dir_path / DOCKER_OUTPUT_FILE).read_text() return filter_implausible_results(json.loads(output_file)) except (json.JSONDecodeError, FileNotFoundError): logging.debug("[FSR]: output file could not be read") return []
def _get_script_type(self, file_object): host_path = self._fs_organizer.generate_path_from_uid(file_object.uid) container_path = f'/repo/{file_object.file_name}' linguist_output = run_docker_container('crazymax/linguist', 60, f'--json {container_path}', reraise=True, mount=(container_path, host_path), label=self.NAME) output_json = json.loads(linguist_output) # FIXME plugins should not set the output for other plugins # But due to performance reasons we don't want the filetype plugin to run linguist file_object.processed_analysis['file_type']['linguist'] = ''.join([ f'{k:<10} {str(v):<10}\n' for k, v in output_json[container_path].items() ]) script_type = output_json[container_path].get('language') return script_type
def _run_cwe_checker_in_docker(file_object): return run_docker_container(DOCKER_IMAGE, timeout=TIMEOUT_IN_SECONDS, command='/input --json --quiet', mount=('/input', file_object.file_path))
def _run_cwe_checker_to_get_version_string(): return run_docker_container(DOCKER_IMAGE, timeout=60, command='--version')