def extract_strings(vw): """ Deobfuscate strings from vivisect workspace """ decoding_functions_candidates = identify_decoding_functions(vw) decoded_strings = floss_main.decode_strings(vw, decoding_functions_candidates, 4) selected_functions = floss_main.select_functions(vw, None) decoded_stackstrings = stackstrings.extract_stackstrings(vw, selected_functions, 4) decoded_strings.extend(decoded_stackstrings) return [ds.s for ds in decoded_strings]
def extract_strings(sample_path): """ Deobfuscate strings from sample_path """ vw = viv_utils.getWorkspace(sample_path) function_index = viv_utils.InstructionFunctionIndex(vw) decoding_functions_candidates = identify_decoding_functions(vw) decoded_strings = floss_main.decode_strings(vw, function_index, decoding_functions_candidates) decoded_stackstrings = stackstrings.extract_stackstrings(vw) decoded_strings.extend(decoded_stackstrings) return [ds.s for ds in decoded_strings]
def extract_strings(sample_path): """ Deobfuscate strings from sample_path """ vw = viv_utils.getWorkspace(sample_path) function_index = viv_utils.InstructionFunctionIndex(vw) decoding_functions_candidates = identify_decoding_functions(vw) decoded_strings = floss_main.decode_strings(vw, function_index, decoding_functions_candidates) selected_functions = floss_main.select_functions(vw, None) decoded_stackstrings = stackstrings.extract_stackstrings(vw, selected_functions) decoded_strings.extend(decoded_stackstrings) return [ds.s for ds in decoded_strings]
def extract_strings(vw): """ Deobfuscate strings from vivisect workspace """ top_functions, decoding_function_features = identify_decoding_functions(vw) for s in floss.string_decoder.decode_strings( vw, get_function_fvas(top_functions), MIN_STRING_LENGTH, disable_progress=True ): yield s.string no_tightloop_functions = get_functions_without_tightloops(decoding_function_features) for s in stackstrings.extract_stackstrings(vw, no_tightloop_functions, MIN_STRING_LENGTH, disable_progress=True): yield s.string tightloop_functions = get_functions_with_tightloops(decoding_function_features) for s in tightstrings.extract_tightstrings(vw, tightloop_functions, MIN_STRING_LENGTH, disable_progress=True): yield s.string
def main(argv=None) -> int: """ arguments: argv: the command line arguments """ if argv is None: argv = sys.argv[1:] parser = make_parser(argv) try: args = parser.parse_args(args=argv) # manual check here, because add_mutually_exclusive_group() on argument_group("...") appears wrong if args.enabled_types and args.disabled_types: parser.error("--no and --only arguments are not allowed together") except ArgumentValueError as e: print(e) return -1 set_log_config(args.debug, args.quiet) # Since Python 3.8 cp65001 is an alias to utf_8, but not for Python < 3.8 # TODO: remove this code when only supporting Python 3.8+ # https://stackoverflow.com/a/3259271/87207 codecs.register(lambda name: codecs.lookup("utf-8") if name == "cp65001" else None) if hasattr(args, "signatures"): if args.signatures == SIGNATURES_PATH_DEFAULT_STRING: logger.debug("-" * 80) logger.debug(" Using default embedded signatures.") logger.debug( " To provide your own signatures, use the form `floss.exe --signature ./path/to/signatures/ /path/to/mal.exe`." ) logger.debug("-" * 80) sigs_path = os.path.join(get_default_root(), "sigs") else: sigs_path = args.signatures logger.debug("using signatures path: %s", sigs_path) args.signatures = sigs_path # TODO pass buffer along instead of file path, also should work for stdin sample = args.sample.name args.sample.close() if args.functions: # when analyzing specified functions do not show static strings args.disabled_types.append(StringType.STATIC) analysis = Analysis( enable_static_strings=is_string_type_enabled(StringType.STATIC, args.disabled_types, args.enabled_types), enable_stack_strings=is_string_type_enabled(StringType.STACK, args.disabled_types, args.enabled_types), enable_decoded_strings=is_string_type_enabled(StringType.DECODED, args.disabled_types, args.enabled_types), enable_tight_strings=is_string_type_enabled(StringType.TIGHT, args.disabled_types, args.enabled_types), ) results = ResultDocument(metadata=Metadata(file_path=sample), analysis=analysis) time0 = time() interim = time0 # in order of expected run time, fast to slow # 1. static strings # 2. stack strings # 3. tight strings # 4. decoded strings if results.analysis.enable_static_strings: logger.info("extracting static strings...") if os.path.getsize(sample) > sys.maxsize: logger.warning("file is very large, strings listings may be truncated.") with open(sample, "rb") as f: with contextlib.closing(mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)) as buf: static_strings = list(extract_ascii_unicode_strings(buf, args.min_length)) results.strings.static_strings = static_strings results.metadata.runtime.static_strings = get_runtime_diff(interim) interim = time() if ( results.analysis.enable_decoded_strings or results.analysis.enable_stack_strings or results.analysis.enable_tight_strings ): if os.path.getsize(sample) > MAX_FILE_SIZE: logger.error("cannot deobfuscate strings from files larger than %d bytes", MAX_FILE_SIZE) return -1 sigpaths = get_signatures(args.signatures) should_save_workspace = os.environ.get("FLOSS_SAVE_WORKSPACE") not in ("0", "no", "NO", "n", None) try: with halo.Halo( text="analyzing program", spinner="simpleDots", stream=sys.stderr, enabled=not (args.quiet or args.disable_progress), ): vw = load_vw(sample, args.format, sigpaths, should_save_workspace) results.metadata.runtime.vivisect = get_runtime_diff(interim) interim = time() except WorkspaceLoadError as e: logger.error("failed to analyze sample: %s", e) return -1 results.metadata.imagebase = get_imagebase(vw) try: selected_functions = select_functions(vw, args.functions) results.analysis.functions.discovered = len(vw.getFunctions()) except ValueError as e: # failed to find functions in workspace logger.error(e.args[0]) return -1 decoding_function_features, library_functions = find_decoding_function_features( vw, selected_functions, disable_progress=args.quiet or args.disable_progress ) # TODO trim libfuncs from selected_funcs results.analysis.functions.library = len(library_functions) results.metadata.runtime.find_features = get_runtime_diff(interim) interim = time() logger.trace("analysis summary:") for k, v in get_vivisect_meta_info(vw, selected_functions, decoding_function_features).items(): logger.trace(" %s: %s", k, v or "N/A") if results.analysis.enable_stack_strings: if results.analysis.enable_tight_strings: # don't run this on functions with tight loops as this will likely result in FPs # and should be caught by the tightstrings extraction below selected_functions = get_functions_without_tightloops(decoding_function_features) results.strings.stack_strings = extract_stackstrings( vw, selected_functions, args.min_length, verbosity=args.verbose, disable_progress=args.quiet or args.disable_progress, ) results.analysis.functions.analyzed_stack_strings = len(selected_functions) results.metadata.runtime.stack_strings = get_runtime_diff(interim) interim = time() if results.analysis.enable_tight_strings: tightloop_functions = get_functions_with_tightloops(decoding_function_features) # TODO if there are many tight loop functions, emit that the program likely uses tightstrings? see #400 results.strings.tight_strings = extract_tightstrings( vw, tightloop_functions, min_length=args.min_length, verbosity=args.verbose, disable_progress=args.quiet or args.disable_progress, ) results.analysis.functions.analyzed_tight_strings = len(tightloop_functions) results.metadata.runtime.tight_strings = get_runtime_diff(interim) interim = time() if results.analysis.enable_decoded_strings: # TODO select more based on score rather than absolute count?! top_functions = get_top_functions(decoding_function_features, 20) fvas_to_emulate = get_function_fvas(top_functions) fvas_tight_functions = get_tight_function_fvas( decoding_function_features ) # TODO exclude tight functions from stackstrings analysis?! fvas_to_emulate = append_unique(fvas_to_emulate, fvas_tight_functions) if len(fvas_to_emulate) == 0: logger.info("no candidate decoding functions found.") else: logger.debug("identified %d candidate decoding functions", len(fvas_to_emulate)) for fva in fvas_to_emulate: results.analysis.functions.decoding_function_scores[fva] = decoding_function_features[fva]["score"] logger.debug(" - 0x%x: %.3f", fva, decoding_function_features[fva]["score"]) # TODO filter out strings decoded in library function or function only called by library function(s) results.strings.decoded_strings = decode_strings( vw, fvas_to_emulate, args.min_length, verbosity=args.verbose, disable_progress=args.quiet or args.disable_progress, ) results.analysis.functions.analyzed_decoded_strings = len(fvas_to_emulate) results.metadata.runtime.decoded_strings = get_runtime_diff(interim) results.metadata.runtime.total = get_runtime_diff(time0) logger.info("finished execution after %.2f seconds", results.metadata.runtime.total) if args.json: r = floss.render.json.render(results) else: r = floss.render.default.render(results, args.verbose, args.quiet) if args.outfile: logger.info("writing results to %s", args.outfile) with open(args.outfile, "wb") as f: f.write(r.encode("utf-8")) else: print(r) return 0
def each(self, target): self.results = { 'warnings': [], 'static_strings': [], 'decoded_strings': [], 'stack_strings': [] } try: data = open(target, "r").read(MAX_FILESIZE) except (IOError, OSError) as e: self.log('error', 'Cannot open file {}'.format(target)) self.results = None return False # Load list of IOC's try: with open(self.interesting_strings_file) as f: self.interesting_strings = f.read().splitlines() self.log( 'info', 'Loaded interesting strings from {}'.format( self.interesting_strings_file)) except: # No IOC file, create an empty list self.log('info', 'No file with interesting strings defined') self.interesting_strings = [] # Load list of ignored strings try: with open(self.ignored_strings_file) as f: self.ignored_strings = f.read().splitlines() self.log( 'info', 'Loaded ignored strings from {}'.format( self.ignored_strings_file)) except: # No IOC file, create an empty list self.log('info', 'No file with ignored strings defined') self.ignored_strings = [] # Extract static strings static_strings = re.findall( "[\x1f-\x7e]{" + str(self.minimum_string_len) + ",}", data) for s in re.findall( "(?:[\x1f-\x7e][\x00]){" + str(self.minimum_string_len) + ",}", data): static_strings.append(s.decode("utf-16le")) if self.maximum_string_len != 0: for i, s in enumerate(static_strings): static_strings[i] = s[:self.maximum_string_len] if self.maximum_strings != 0 and len( static_strings) > self.maximum_strings: self.log( 'warning', 'Maximum number of strings reached ({})'.format( str(self.maximum_strings))) static_strings = static_strings[:self.maximum_strings] static_strings.append("[snip]") try: # Prepare Floss for extracting hidden & encoded strings vw = vivisect.VivWorkspace() vw.loadFromFile(target) vw.analyze() selected_functions = main.select_functions(vw, None) decoding_functions_candidates = id_man.identify_decoding_functions( vw, main.get_all_plugins(), selected_functions) except Exception as e: self.log('error', 'Cannot analyze file {}'.format(target)) self.results = None return False # Decode & extract hidden & encoded strings try: decoded_strings = main.decode_strings( vw, decoding_functions_candidates, self.minimum_string_len) decoded_strs = main.filter_unique_decoded(decoded_strings) stack_strings = stackstrings.extract_stackstrings( vw, selected_functions, self.minimum_string_len) stack_strings = list(stack_strings) decoded_strings = [ x for x in decoded_strs if not x in static_strings ] except Exception as e: self.log('error', 'Cannot extract strings from {}'.format(target)) self.results = None return False # Populate results[] with found strings if len(decoded_strings) or len(stack_strings): self.log('info', 'Found stack or decoded strings') for k, s in enumerate(decoded_strings): buffer = main.sanitize_string_for_printing(s.s) skip = False for ignore in self.ignored_strings: if str(buffer).find(ignore) >= 0: skip = True break if not skip: self.results['decoded_strings'].append(buffer) self.search_ioc(self.results['decoded_strings']) for k, s in enumerate(stack_strings): skip = False for ignore in self.ignored_strings: if str(s.s).find(ignore) >= 0: skip = True break if not skip: self.results['stack_strings'].append(s.s) self.search_ioc(self.results['stack_strings']) # Populate results[] with static strings self.log('info', 'Found static strings') for s in static_strings: skip = False for ignore in self.ignored_strings: if str(s).find(ignore) >= 0: skip = True break if not skip: self.results['static_strings'].append(s) self.search_ioc(self.results['static_strings']) # Deduplicate warnings self.results['warnings'] = list(dict.fromkeys( self.results['warnings'])) return True
def main(argv=None): """ :param argv: optional command line arguments, like sys.argv[1:] :return: 0 on success, non-zero on failure """ logging.basicConfig(level=logging.WARNING) parser = make_parser() if argv is not None: options, args = parser.parse_args(argv[1:]) else: options, args = parser.parse_args() set_log_config(options.debug, options.verbose) if options.list_plugins: print_plugin_list() return 0 sample_file_path = parse_sample_file_path(parser, args) min_length = parse_min_length_option(options.min_length) # expert profile settings if options.expert: options.save_workspace = True options.group_functions = True options.quiet = False if not is_workspace_file(sample_file_path): if not options.no_static_strings and not options.functions: floss_logger.info("Extracting static strings...") if os.path.getsize(sample_file_path) > sys.maxsize: floss_logger.warning("File too large, strings listings may be truncated.") floss_logger.warning("FLOSS cannot handle files larger than 4GB on 32bit systems.") file_buf = get_file_as_mmap(sample_file_path) print_static_strings(file_buf, min_length=min_length, quiet=options.quiet) static_ascii_strings = strings.extract_ascii_strings(file_buf, min_length) static_unicode_strings = strings.extract_unicode_strings(file_buf, min_length) static_strings = chain(static_ascii_strings, static_unicode_strings) del file_buf else: static_strings = [] if options.no_decoded_strings and options.no_stack_strings and not options.should_show_metainfo: if options.json_output_file: create_json_output_static_only(options, sample_file_path, static_strings) # we are done return 0 if os.path.getsize(sample_file_path) > MAX_FILE_SIZE: floss_logger.error("FLOSS cannot extract obfuscated strings or stackstrings from files larger than" " %d bytes" % MAX_FILE_SIZE) if options.json_output_file: create_json_output_static_only(options, sample_file_path, static_strings) return 1 try: vw = load_vw(sample_file_path, options.save_workspace, options.verbose, options.is_shellcode, options.shellcode_entry_point, options.shellcode_base) except WorkspaceLoadError: if options.json_output_file: create_json_output_static_only(options, sample_file_path, static_strings) return 1 try: selected_functions = select_functions(vw, options.functions) except Exception as e: floss_logger.error(str(e)) return 1 floss_logger.debug("Selected the following functions: %s", get_str_from_func_list(selected_functions)) selected_plugin_names = select_plugins(options.plugins) floss_logger.debug("Selected the following plugins: %s", ", ".join(map(str, selected_plugin_names))) selected_plugins = filter(lambda p: str(p) in selected_plugin_names, get_all_plugins()) if options.should_show_metainfo: meta_functions = None if options.functions: meta_functions = selected_functions print_file_meta_info(vw, meta_functions) time0 = time() if not options.no_decoded_strings: floss_logger.info("Identifying decoding functions...") decoding_functions_candidates = im.identify_decoding_functions(vw, selected_plugins, selected_functions) if options.expert: print_identification_results(sample_file_path, decoding_functions_candidates) floss_logger.info("Decoding strings...") decoded_strings = decode_strings(vw, decoding_functions_candidates, min_length, options.no_filter, options.max_instruction_count, options.max_address_revisits + 1) # TODO: The de-duplication process isn't perfect as it is done here and in print_decoding_results and # TODO: all of them on non-sanitized strings. if not options.expert: decoded_strings = filter_unique_decoded(decoded_strings) print_decoding_results(decoded_strings, options.group_functions, quiet=options.quiet, expert=options.expert) else: decoded_strings = [] if not options.no_stack_strings: floss_logger.info("Extracting stackstrings...") stack_strings = stackstrings.extract_stackstrings(vw, selected_functions, min_length, options.no_filter) stack_strings = list(stack_strings) if not options.expert: # remove duplicate entries stack_strings = set(stack_strings) print_stack_strings(stack_strings, quiet=options.quiet, expert=options.expert) else: stack_strings = [] if options.x64dbg_database_file: imagebase = vw.filemeta.values()[0]['imagebase'] floss_logger.info("Creating x64dbg database...") create_x64dbg_database(sample_file_path, options.x64dbg_database_file, imagebase, decoded_strings) if options.ida_python_file: floss_logger.info("Creating IDA script...") create_ida_script(sample_file_path, options.ida_python_file, decoded_strings, stack_strings) if options.radare2_script_file: floss_logger.info("Creating r2script...") create_r2_script(sample_file_path, options.radare2_script_file, decoded_strings, stack_strings) if options.binja_script_file: floss_logger.info("Creating Binary Ninja script...") create_binja_script(sample_file_path, options.binja_script_file, decoded_strings, stack_strings) time1 = time() if not options.quiet: print("\nFinished execution after %f seconds" % (time1 - time0)) if options.json_output_file: create_json_output(options, sample_file_path, decoded_strings=decoded_strings, stack_strings=stack_strings, static_strings=static_strings) floss_logger.info("Wrote JSON file to %s\n" % options.json_output_file) return 0
def run(self): """Run Floss on analyzed file. @return: Floss results dict. """ self.key = "strings" self.floss = self.options.get("floss") self.MIN_STRINGLEN = int(self.options.get("min_str_len")) self.MAX_STRINGLEN = self.options.get("max_str_len") self.MAX_STRINGCNT = self.options.get("max_str_cnt") self.MAX_FILESIZE = 16*1024*1024 STRING_TYPES = [ "decoded", "stack", "static" ] strings = {} if self.task["category"] == "file": if not os.path.exists(self.file_path): raise CuckooProcessingError( "Sample file doesn't exist: \"%s\"" % self.file_path ) try: f = File(self.file_path) filename = os.path.basename(self.task["target"]) base_name = os.path.splitext(filename)[0] ext = filename.split(os.path.extsep)[-1].lower() data = open(self.file_path, "r").read(self.MAX_FILESIZE) except (IOError, OSError) as e: raise CuckooProcessingError("Error opening file %s" % e) # Extract static strings static_strings = re.findall("[\x1f-\x7e]{" + str(self.MIN_STRINGLEN) + ",}", data) for s in re.findall("(?:[\x1f-\x7e][\x00]){" + str(self.MIN_STRINGLEN) + ",}", data): static_strings.append(s.decode("utf-16le")) if self.MAX_STRINGLEN != 0: for i, s in enumerate(static_strings): static_strings[i] = s[:self.MAX_STRINGLEN] if self.MAX_STRINGCNT != 0 and len(static_strings) > self.MAX_STRINGCNT: static_strings = static_strings[:self.MAX_STRINGCNT] static_strings.append("[snip]") package = self.task.get("package") if self.floss and (package == "exe" or ext == "exe" or "PE32" in f.get_type()): # Disable floss verbose logging main.set_logging_levels() try: # Prepare Floss for extracting hidden & encoded strings vw = vivisect.VivWorkspace() vw.loadFromFile(self.file_path) vw.analyze() selected_functions = main.select_functions(vw, None) decoding_functions_candidates = id_man.identify_decoding_functions( vw, main.get_all_plugins(), selected_functions ) except Exception as e: raise CuckooProcessingError("Error analyzing file with vivisect: %s" % e) try: # Decode & extract hidden & encoded strings decoded_strings = main.decode_strings( vw, decoding_functions_candidates, self.MIN_STRINGLEN ) decoded_strs = main.filter_unique_decoded(decoded_strings) stack_strings = stackstrings.extract_stackstrings( vw, selected_functions, self.MIN_STRINGLEN ) stack_strings = list(stack_strings) decoded_strings = [x for x in decoded_strs if not x in static_strings] except Exception as e: raise CuckooProcessingError("Error extracting strings with floss: %s" % e) if len(decoded_strings) or len(stack_strings): # Create annotated scripts if self.options.get("idapro_str_sct"): idapro_sct_name = base_name + ".idb" strings["idapro_sct_name"] = idapro_sct_name main.create_ida_script( self.file_path, os.path.join(self.str_script_path, idapro_sct_name), decoded_strings, stack_strings ) if self.options.get("radare_str_sct"): radare_sct_name = base_name + ".r2" strings["radare_sct_name"] = radare_sct_name main.create_r2_script( self.file_path, os.path.join(self.str_script_path, radare_sct_name), decoded_strings, stack_strings ) if self.options.get("x64dbg_str_sct"): x64dbg_sct_name = base_name + ".json" strings["x64dbg_sct_name"] = x64dbg_sct_name imagebase = vw.filemeta.values()[0]['imagebase'] main.create_x64dbg_database( self.file_path, os.path.join(self.str_script_path, base_name + ".json"), imagebase, decoded_strings ) # convert Floss strings into regular, readable strings for idx, s in enumerate(decoded_strings): decoded_strings[idx] = main.sanitize_string_for_printing(s.s) for idx, s in enumerate(stack_strings): stack_strings[idx] = s.s results = [decoded_strings, stack_strings, static_strings] for idx, str_type in enumerate(STRING_TYPES): strings[str_type] = results[idx] else: strings["static"] = static_strings return strings