def identify_decoding_functions(vw): selected_functions = floss_main.select_functions(vw, None) selected_plugin_names = floss_main.select_plugins(None) selected_plugins = filter(lambda p: str(p) in selected_plugin_names, floss_main.get_all_plugins()) decoding_functions_candidates = im.identify_decoding_functions( vw, selected_plugins, selected_functions) return decoding_functions_candidates
def identify_decoding_functions(vw): selected_functions = floss_main.select_functions(vw, None) selected_plugin_names = floss_main.select_plugins(None) selected_plugins = [ p for p in floss_main.get_all_plugins() if str(p) in selected_plugin_names ] decoding_functions_candidates = im.identify_decoding_functions( vw, selected_plugins, selected_functions) return decoding_functions_candidates
def floss_analyze(self, decoders=None, vw=None): if vw is None: print('floss_analyze: loading workspace...') vw = self.floss_load_workspace() if decoders is None or len(decoders) == 0: print('floss_analyze: identifying decoding functions...') functions = floss.main.select_functions(vw, '') plugins = floss.main.get_all_plugins() candidates = floss_im.identify_decoding_functions( vw, plugins, functions) decoders = [] for fva, _ in candidates.get_top_candidate_functions(10): decoders.append(fva) print('floss_analyze: decoding strings...') result = self.floss_decode_strings(vw, decoders) print('floss_analyze: done') return result
def identify_decoding_functions(vw): selected_functions = floss_main.select_functions(vw, None) selected_plugin_names = floss_main.select_plugins(None) selected_plugins = filter(lambda p: str(p) in selected_plugin_names, floss_main.get_all_plugins()) decoding_functions_candidates = im.identify_decoding_functions(vw, selected_plugins, selected_functions) return decoding_functions_candidates
def each(self, target): self.results = { 'warnings': [], 'static_strings': [], 'decoded_strings': [], 'stack_strings': [] } try: data = open(target, "r").read(MAX_FILESIZE) except (IOError, OSError) as e: self.log('error', 'Cannot open file {}'.format(target)) self.results = None return False # Load list of IOC's try: with open(self.interesting_strings_file) as f: self.interesting_strings = f.read().splitlines() self.log( 'info', 'Loaded interesting strings from {}'.format( self.interesting_strings_file)) except: # No IOC file, create an empty list self.log('info', 'No file with interesting strings defined') self.interesting_strings = [] # Load list of ignored strings try: with open(self.ignored_strings_file) as f: self.ignored_strings = f.read().splitlines() self.log( 'info', 'Loaded ignored strings from {}'.format( self.ignored_strings_file)) except: # No IOC file, create an empty list self.log('info', 'No file with ignored strings defined') self.ignored_strings = [] # Extract static strings static_strings = re.findall( "[\x1f-\x7e]{" + str(self.minimum_string_len) + ",}", data) for s in re.findall( "(?:[\x1f-\x7e][\x00]){" + str(self.minimum_string_len) + ",}", data): static_strings.append(s.decode("utf-16le")) if self.maximum_string_len != 0: for i, s in enumerate(static_strings): static_strings[i] = s[:self.maximum_string_len] if self.maximum_strings != 0 and len( static_strings) > self.maximum_strings: self.log( 'warning', 'Maximum number of strings reached ({})'.format( str(self.maximum_strings))) static_strings = static_strings[:self.maximum_strings] static_strings.append("[snip]") try: # Prepare Floss for extracting hidden & encoded strings vw = vivisect.VivWorkspace() vw.loadFromFile(target) vw.analyze() selected_functions = main.select_functions(vw, None) decoding_functions_candidates = id_man.identify_decoding_functions( vw, main.get_all_plugins(), selected_functions) except Exception as e: self.log('error', 'Cannot analyze file {}'.format(target)) self.results = None return False # Decode & extract hidden & encoded strings try: decoded_strings = main.decode_strings( vw, decoding_functions_candidates, self.minimum_string_len) decoded_strs = main.filter_unique_decoded(decoded_strings) stack_strings = stackstrings.extract_stackstrings( vw, selected_functions, self.minimum_string_len) stack_strings = list(stack_strings) decoded_strings = [ x for x in decoded_strs if not x in static_strings ] except Exception as e: self.log('error', 'Cannot extract strings from {}'.format(target)) self.results = None return False # Populate results[] with found strings if len(decoded_strings) or len(stack_strings): self.log('info', 'Found stack or decoded strings') for k, s in enumerate(decoded_strings): buffer = main.sanitize_string_for_printing(s.s) skip = False for ignore in self.ignored_strings: if str(buffer).find(ignore) >= 0: skip = True break if not skip: self.results['decoded_strings'].append(buffer) self.search_ioc(self.results['decoded_strings']) for k, s in enumerate(stack_strings): skip = False for ignore in self.ignored_strings: if str(s.s).find(ignore) >= 0: skip = True break if not skip: self.results['stack_strings'].append(s.s) self.search_ioc(self.results['stack_strings']) # Populate results[] with static strings self.log('info', 'Found static strings') for s in static_strings: skip = False for ignore in self.ignored_strings: if str(s).find(ignore) >= 0: skip = True break if not skip: self.results['static_strings'].append(s) self.search_ioc(self.results['static_strings']) # Deduplicate warnings self.results['warnings'] = list(dict.fromkeys( self.results['warnings'])) return True
def main(argv=None): """ :param argv: optional command line arguments, like sys.argv[1:] :return: 0 on success, non-zero on failure """ logging.basicConfig(level=logging.WARNING) parser = make_parser() if argv is not None: options, args = parser.parse_args(argv[1:]) else: options, args = parser.parse_args() set_log_config(options.debug, options.verbose) if options.list_plugins: print_plugin_list() return 0 sample_file_path = parse_sample_file_path(parser, args) min_length = parse_min_length_option(options.min_length) # expert profile settings if options.expert: options.save_workspace = True options.group_functions = True options.quiet = False if not is_workspace_file(sample_file_path): if not options.no_static_strings and not options.functions: floss_logger.info("Extracting static strings...") if os.path.getsize(sample_file_path) > sys.maxsize: floss_logger.warning("File too large, strings listings may be truncated.") floss_logger.warning("FLOSS cannot handle files larger than 4GB on 32bit systems.") file_buf = get_file_as_mmap(sample_file_path) print_static_strings(file_buf, min_length=min_length, quiet=options.quiet) static_ascii_strings = strings.extract_ascii_strings(file_buf, min_length) static_unicode_strings = strings.extract_unicode_strings(file_buf, min_length) static_strings = chain(static_ascii_strings, static_unicode_strings) del file_buf else: static_strings = [] if options.no_decoded_strings and options.no_stack_strings and not options.should_show_metainfo: if options.json_output_file: create_json_output_static_only(options, sample_file_path, static_strings) # we are done return 0 if os.path.getsize(sample_file_path) > MAX_FILE_SIZE: floss_logger.error("FLOSS cannot extract obfuscated strings or stackstrings from files larger than" " %d bytes" % MAX_FILE_SIZE) if options.json_output_file: create_json_output_static_only(options, sample_file_path, static_strings) return 1 try: vw = load_vw(sample_file_path, options.save_workspace, options.verbose, options.is_shellcode, options.shellcode_entry_point, options.shellcode_base) except WorkspaceLoadError: if options.json_output_file: create_json_output_static_only(options, sample_file_path, static_strings) return 1 try: selected_functions = select_functions(vw, options.functions) except Exception as e: floss_logger.error(str(e)) return 1 floss_logger.debug("Selected the following functions: %s", get_str_from_func_list(selected_functions)) selected_plugin_names = select_plugins(options.plugins) floss_logger.debug("Selected the following plugins: %s", ", ".join(map(str, selected_plugin_names))) selected_plugins = filter(lambda p: str(p) in selected_plugin_names, get_all_plugins()) if options.should_show_metainfo: meta_functions = None if options.functions: meta_functions = selected_functions print_file_meta_info(vw, meta_functions) time0 = time() if not options.no_decoded_strings: floss_logger.info("Identifying decoding functions...") decoding_functions_candidates = im.identify_decoding_functions(vw, selected_plugins, selected_functions) if options.expert: print_identification_results(sample_file_path, decoding_functions_candidates) floss_logger.info("Decoding strings...") decoded_strings = decode_strings(vw, decoding_functions_candidates, min_length, options.no_filter, options.max_instruction_count, options.max_address_revisits + 1) # TODO: The de-duplication process isn't perfect as it is done here and in print_decoding_results and # TODO: all of them on non-sanitized strings. if not options.expert: decoded_strings = filter_unique_decoded(decoded_strings) print_decoding_results(decoded_strings, options.group_functions, quiet=options.quiet, expert=options.expert) else: decoded_strings = [] if not options.no_stack_strings: floss_logger.info("Extracting stackstrings...") stack_strings = stackstrings.extract_stackstrings(vw, selected_functions, min_length, options.no_filter) stack_strings = list(stack_strings) if not options.expert: # remove duplicate entries stack_strings = set(stack_strings) print_stack_strings(stack_strings, quiet=options.quiet, expert=options.expert) else: stack_strings = [] if options.x64dbg_database_file: imagebase = vw.filemeta.values()[0]['imagebase'] floss_logger.info("Creating x64dbg database...") create_x64dbg_database(sample_file_path, options.x64dbg_database_file, imagebase, decoded_strings) if options.ida_python_file: floss_logger.info("Creating IDA script...") create_ida_script(sample_file_path, options.ida_python_file, decoded_strings, stack_strings) if options.radare2_script_file: floss_logger.info("Creating r2script...") create_r2_script(sample_file_path, options.radare2_script_file, decoded_strings, stack_strings) if options.binja_script_file: floss_logger.info("Creating Binary Ninja script...") create_binja_script(sample_file_path, options.binja_script_file, decoded_strings, stack_strings) time1 = time() if not options.quiet: print("\nFinished execution after %f seconds" % (time1 - time0)) if options.json_output_file: create_json_output(options, sample_file_path, decoded_strings=decoded_strings, stack_strings=stack_strings, static_strings=static_strings) floss_logger.info("Wrote JSON file to %s\n" % options.json_output_file) return 0
def identify_decoding_functions(vw): selected_functions = floss_main.select_functions(vw, None) decoding_functions_candidates = im.identify_decoding_functions( vw, selected_functions) return decoding_functions_candidates
def run(self): """Run Floss on analyzed file. @return: Floss results dict. """ self.key = "strings" self.floss = self.options.get("floss") self.MIN_STRINGLEN = int(self.options.get("min_str_len")) self.MAX_STRINGLEN = self.options.get("max_str_len") self.MAX_STRINGCNT = self.options.get("max_str_cnt") self.MAX_FILESIZE = 16*1024*1024 STRING_TYPES = [ "decoded", "stack", "static" ] strings = {} if self.task["category"] == "file": if not os.path.exists(self.file_path): raise CuckooProcessingError( "Sample file doesn't exist: \"%s\"" % self.file_path ) try: f = File(self.file_path) filename = os.path.basename(self.task["target"]) base_name = os.path.splitext(filename)[0] ext = filename.split(os.path.extsep)[-1].lower() data = open(self.file_path, "r").read(self.MAX_FILESIZE) except (IOError, OSError) as e: raise CuckooProcessingError("Error opening file %s" % e) # Extract static strings static_strings = re.findall("[\x1f-\x7e]{" + str(self.MIN_STRINGLEN) + ",}", data) for s in re.findall("(?:[\x1f-\x7e][\x00]){" + str(self.MIN_STRINGLEN) + ",}", data): static_strings.append(s.decode("utf-16le")) if self.MAX_STRINGLEN != 0: for i, s in enumerate(static_strings): static_strings[i] = s[:self.MAX_STRINGLEN] if self.MAX_STRINGCNT != 0 and len(static_strings) > self.MAX_STRINGCNT: static_strings = static_strings[:self.MAX_STRINGCNT] static_strings.append("[snip]") package = self.task.get("package") if self.floss and (package == "exe" or ext == "exe" or "PE32" in f.get_type()): # Disable floss verbose logging main.set_logging_levels() try: # Prepare Floss for extracting hidden & encoded strings vw = vivisect.VivWorkspace() vw.loadFromFile(self.file_path) vw.analyze() selected_functions = main.select_functions(vw, None) decoding_functions_candidates = id_man.identify_decoding_functions( vw, main.get_all_plugins(), selected_functions ) except Exception as e: raise CuckooProcessingError("Error analyzing file with vivisect: %s" % e) try: # Decode & extract hidden & encoded strings decoded_strings = main.decode_strings( vw, decoding_functions_candidates, self.MIN_STRINGLEN ) decoded_strs = main.filter_unique_decoded(decoded_strings) stack_strings = stackstrings.extract_stackstrings( vw, selected_functions, self.MIN_STRINGLEN ) stack_strings = list(stack_strings) decoded_strings = [x for x in decoded_strs if not x in static_strings] except Exception as e: raise CuckooProcessingError("Error extracting strings with floss: %s" % e) if len(decoded_strings) or len(stack_strings): # Create annotated scripts if self.options.get("idapro_str_sct"): idapro_sct_name = base_name + ".idb" strings["idapro_sct_name"] = idapro_sct_name main.create_ida_script( self.file_path, os.path.join(self.str_script_path, idapro_sct_name), decoded_strings, stack_strings ) if self.options.get("radare_str_sct"): radare_sct_name = base_name + ".r2" strings["radare_sct_name"] = radare_sct_name main.create_r2_script( self.file_path, os.path.join(self.str_script_path, radare_sct_name), decoded_strings, stack_strings ) if self.options.get("x64dbg_str_sct"): x64dbg_sct_name = base_name + ".json" strings["x64dbg_sct_name"] = x64dbg_sct_name imagebase = vw.filemeta.values()[0]['imagebase'] main.create_x64dbg_database( self.file_path, os.path.join(self.str_script_path, base_name + ".json"), imagebase, decoded_strings ) # convert Floss strings into regular, readable strings for idx, s in enumerate(decoded_strings): decoded_strings[idx] = main.sanitize_string_for_printing(s.s) for idx, s in enumerate(stack_strings): stack_strings[idx] = s.s results = [decoded_strings, stack_strings, static_strings] for idx, str_type in enumerate(STRING_TYPES): strings[str_type] = results[idx] else: strings["static"] = static_strings return strings