def run(self): """Run analysis. @return: processes infomartion list. """ results = [] if not os.path.exists(self._logs_path): log.warning('Analysis results folder does not exist at path "%s".', self._logs_path) return results # TODO: this should check the current analysis configuration and raise a warning # if injection is enabled and there is no logs folder. if len(os.listdir(self._logs_path)) == 0: log.info( "Analysis results folder does not contain any file or injection was disabled." ) return results for file_name in os.listdir(self._logs_path): file_path = os.path.join(self._logs_path, file_name) # Check if Loop Detection is enabled globally or locally (as an option) if cfg_process.loop_detection.enabled or self.options.get( "loop_detection"): self.compress_log_file(file_path) if os.path.isdir(file_path): continue # Skipping the current log file if it's too big. if os.stat(file_path).st_size > cfg.processing.analysis_size_limit: log.warning( "Behavioral log {0} too big to be processed, skipped.". format(file_name)) continue # Invoke parsing of current log file (if ram_boost is enabled, otherwise parsing is done on-demand) current_log = ParseProcessLog(file_path) if current_log.process_id is None: continue # If the current log actually contains any data, add its data to # the results list. results.append({ "process_id": current_log.process_id, "process_name": bytes2str(current_log.process_name), "parent_id": current_log.parent_id, "module_path": bytes2str(current_log.module_path), "first_seen": logtime(current_log.first_seen), "calls": current_log.calls, "threads": current_log.threads, "environ": current_log.environdict, }) # Sort the items in the results list chronologically. In this way we # can have a sequential order of spawned processes. results.sort(key=lambda process: process["first_seen"]) return results
def log_environ(self, context, environdict): """log user/process environment information for later use in behavioral signatures @param context: ignored @param environdict: dict of the various collected information, which will expand over time """ self.environdict.update(bytes2str(environdict))
def run(self): """Run extract of printable strings. @return: list of printable strings. """ self.key = "strings" strings = [] if self.task["category"] in ("file", "static"): if not os.path.exists(self.file_path): raise CuckooProcessingError("Sample file doesn't exist: \"%s\"" % self.file_path) try: data = open(self.file_path, "rb").read() except (IOError, OSError) as e: raise CuckooProcessingError("Error opening file %s" % e) nulltermonly = self.options.get("nullterminated_only", True) minchars = self.options.get("minchars", 5) endlimit = b"" if not HAVE_RE2: endlimit = b"8192" if nulltermonly: apat = b"([\x20-\x7e]{" + str(minchars).encode("utf-8") + b"," + endlimit + b"})\x00" upat = b"((?:[\x20-\x7e][\x00]){" + str(minchars).encode("utf-8") + b"," + endlimit + b"})\x00\x00" else: apat = b"[\x20-\x7e]{" + str(minchars).encode("utf-8") + b"," + endlimit + b"}" upat = b"(?:[\x20-\x7e][\x00]){" + str(minchars).encode("utf-8") + b"," + endlimit + b"}" strings = [bytes2str(string) for string in re.findall(apat, data)] for ws in re.findall(upat, data): strings.append(str(ws.decode("utf-16le"))) return strings
def extract_strings(path, nulltermonly, minchars): strings = [] try: data = open(path, "rb").read() except (IOError, OSError) as e: raise CuckooProcessingError(f"Error opening file {e}") endlimit = b"" if not HAVE_RE2: endlimit = b"8192" if nulltermonly: apat = b"([\x20-\x7e]{" + str( minchars).encode() + b"," + endlimit + b"})\x00" upat = b"((?:[\x20-\x7e][\x00]){" + str( minchars).encode() + b"," + endlimit + b"})\x00\x00" else: apat = b"[\x20-\x7e]{" + str( minchars).encode() + b"," + endlimit + b"}" upat = b"(?:[\x20-\x7e][\x00]){" + str( minchars).encode() + b"," + endlimit + b"}" strings = [bytes2str(string) for string in re.findall(apat, data)] for ws in re.findall(upat, data): strings.append(str(ws.decode("utf-16le"))) return strings
def recon(filename, orig_options, timeout, enforce_timeout): filename = filename.lower() if not isinstance(filename, str): filename = bytes2str(filename) if "name" in filename: orig_options += ",timeout=400,enforce_timeout=1,procmemdump=1,procdump=1" timeout = 400 enforce_timeout = True return orig_options, timeout, enforce_timeout
def extract_strings(filepath: str, on_demand: bool = False): """Extract strings from analyzed file. @return: list of printable strings. """ if not processing_cfg.strings.enabled or processing_cfg.strings.on_demand and not on_demand: return nulltermonly = processing_cfg.strings.nullterminated_only minchars = processing_cfg.strings.minchars if not os.path.exists(filepath): log.error("Sample file doesn't exist: %s", filepath) return strings = [] try: with open(filepath, "rb") as f: data = f.read() except (IOError, OSError) as e: log.error("Error reading file: %s", e) return endlimit = b"8192" if not HAVE_RE2 else b"" if nulltermonly: apat = b"([\x20-\x7e]{" + str( minchars).encode() + b"," + endlimit + b"})\x00" upat = b"((?:[\x20-\x7e][\x00]){" + str( minchars).encode() + b"," + endlimit + b"})\x00\x00" else: apat = b"[\x20-\x7e]{" + str( minchars).encode() + b"," + endlimit + b"}" upat = b"(?:[\x20-\x7e][\x00]){" + str( minchars).encode() + b"," + endlimit + b"}" strings = [bytes2str(string) for string in re.findall(apat, data)] strings.extend(str(ws.decode("utf-16le")) for ws in re.findall(upat, data)) return strings
def _parse(self, row): """Parse log row. @param row: row data. @return: parsed information dict. """ arguments = [] try: timestamp = row[0] # Timestamp of current API call invocation. thread_id = row[1] # Thread ID. caller = row[2] # non-system DLL return address parentcaller = row[3] # non-system DLL parent of non-system-DLL return address category = row[4] # Win32 function category. api_name = row[5] # Name of the Windows API. repeated = row[6] # Times log repeated status_value = row[7] # Success or Failure? return_value = row[8] # Value returned by the function. except IndexError as e: log.debug("Unable to parse process log row: %s", e) return None # Now walk through the remaining columns, which will contain API # arguments. for api_arg in row[9:]: # Split the argument name with its value based on the separator. try: arg_name, arg_value = api_arg except ValueError as e: log.debug("Unable to parse analysis row argument (row=%s): %s", api_arg, e) continue argument = {"name": arg_name} if isinstance(arg_value, bytes): arg_value = bytes2str(arg_value) if arg_value and isinstance(arg_value, list) and len(arg_value) >= 1 and isinstance(arg_value[0], bytes): arg_value = " ".join(bytes2str(arg_value)) try: argument["value"] = convert_to_printable(arg_value, self.conversion_cache) except Exception as e: log.error(arg_value, exc_info=True) continue if not self.reporting_mode: argument["raw_value"] = arg_value pretty = pretty_print_arg(category, api_name, arg_name, argument["value"]) if pretty: argument["pretty_value"] = pretty arguments.append(argument) call = { "timestamp": timestamp, "thread_id": str(thread_id), "caller": f"0x{default_converter(caller):08x}", "parentcaller": f"0x{default_converter(parentcaller):08x}", "category": category, "api": api_name, "status": bool(int(status_value)), } if isinstance(return_value, int): call["return"] = f"0x{default_converter(return_value):08x}" else: call["return"] = convert_to_printable(str(return_value), self.conversion_cache) prettyret = pretty_print_retval(call["status"], call["return"]) if prettyret: call["pretty_return"] = prettyret call["arguments"] = arguments call["repeated"] = repeated # add the thread id to our thread set if call["thread_id"] not in self.threads: self.threads.append(call["thread_id"]) return call
def _parse(self, row): """Parse log row. @param row: row data. @return: parsed information dict. """ call = {} arguments = [] try: timestamp = row[0] # Timestamp of current API call invocation. thread_id = row[1] # Thread ID. caller = row[2] # non-system DLL return address parentcaller = row[ 3] # non-system DLL parent of non-system-DLL return address category = row[4] # Win32 function category. api_name = row[5] # Name of the Windows API. repeated = row[6] # Times log repeated status_value = row[7] # Success or Failure? return_value = row[8] # Value returned by the function. except IndexError as e: log.debug("Unable to parse process log row: %s", e) return None # Now walk through the remaining columns, which will contain API # arguments. for index in range(9, len(row)): argument = {} # Split the argument name with its value based on the separator. try: arg_name, arg_value = row[index] except ValueError as e: log.debug("Unable to parse analysis row argument (row=%s): %s", row[index], e) continue argument["name"] = arg_name if isinstance(arg_value, bytes): arg_value = bytes2str(arg_value) argument["value"] = convert_to_printable(str(arg_value), self.conversion_cache) if not self.reporting_mode: argument["raw_value"] = arg_value pretty = pretty_print_arg(category, api_name, arg_name, argument["value"]) if pretty: argument["pretty_value"] = pretty arguments.append(argument) call["timestamp"] = timestamp call["thread_id"] = str(thread_id) call["caller"] = "0x%.08x" % default_converter(caller) call["parentcaller"] = "0x%.08x" % default_converter(parentcaller) call["category"] = category call["api"] = api_name call["status"] = bool(int(status_value)) if isinstance(return_value, int) or isinstance(return_value, int): call["return"] = "0x%.08x" % default_converter(return_value) else: call["return"] = convert_to_printable(str(return_value), self.conversion_cache) prettyret = pretty_print_retval(category, api_name, call["status"], call["return"]) if prettyret: call["pretty_return"] = prettyret call["arguments"] = arguments call["repeated"] = repeated # add the thread id to our thread set if call["thread_id"] not in self.threads: self.threads.append(call["thread_id"]) return call