def open_process_log(self, event): pid = event["pid"] ppid = event["ppid"] procname = event["process_name"] if self.rawlogfd: log.debug( "ResultServer got a new process message but already " "has pid %d ppid %s procname %s.", pid, ppid, procname) raise CuckooResultError( "ResultServer connection state inconsistent.") if not isinstance(pid, (int, long)): raise CuckooResultError( "An invalid process identifier has been provided, this " "could be a potential security hazard.") # Only report this process when we're tracking it. if event["track"]: log.debug("New process (pid=%s, ppid=%s, name=%s)", pid, ppid, procname) filepath = os.path.join(self.storagepath, "logs", "%s.bson" % pid) self.rawlogfd = open(filepath, "wb") self.rawlogfd.write(self.startbuf)
def open_process_log(self, event): pid = event["pid"] ppid = event["ppid"] procname = event["process_name"] if self.pid is not None: log.debug( "ResultServer got a new process message but already " "has pid %d ppid %s procname %s.", pid, ppid, procname ) raise CuckooResultError( "ResultServer connection state inconsistent." ) # Only report this process when we're tracking it. if event["track"]: log.debug( "New process (pid=%s, ppid=%s, name=%s)", pid, ppid, procname ) filepath = os.path.join(self.storagepath, "logs", "%s.bson" % pid) self.rawlogfd = open(filepath, "wb") self.rawlogfd.write(self.startbuf) self.pid, self.ppid, self.procname = pid, ppid, procname
def log_process(self, ctx, timestring, pid, ppid, modulepath, procname): if not self.pid is None: log.debug( "Resultserver got a new process message but already " "has pid %d ppid %s procname %s", pid, str(ppid), procname) raise CuckooResultError("Resultserver connection state " "incosistent.") log.debug("New process (pid={0}, ppid={1}, name={2}, " "path={3})".format(pid, ppid, procname, modulepath)) # CSV format files are optional if self.server.cfg.resultserver.store_csvs: self.logfd = open( os.path.join(self.storagepath, "logs", str(pid) + ".csv"), "wb") # Raw Bson or Netlog extension ext = EXTENSIONS.get(type(self.protocol), ".raw") self.rawlogfd = open( os.path.join(self.storagepath, "logs", str(pid) + ext), "wb") self.rawlogfd.write(self.startbuf) self.pid, self.ppid, self.procname = pid, ppid, procname
def read_string(self): """Reads an utf8 string from the socket.""" length, maxlength = struct.unpack("II", self.handler.read(8)) if length < 0 or length > 0x10000: log.critical("read_string length weirdness " "length: %d maxlength: %d", length, maxlength) raise CuckooResultError("read_string length failure, " "protocol broken?") s = self.handler.read(length) if maxlength > length: s += "... (truncated)" return s
def open_process_log(self, event): pid = event["pid"] ppid = event["ppid"] procname = event["process_name"] if self.pid is not None: log.debug( "ResultServer got a new process message but already " "has pid %d ppid %s procname %s.", pid, str(ppid), procname) raise CuckooResultError("ResultServer connection state " "inconsistent.") log.debug("New process (pid=%s, ppid=%s, name=%s)", pid, ppid, procname) path = os.path.join(self.storagepath, "logs", str(pid) + ".bson") self.rawlogfd = open(path, "wb") self.rawlogfd.write(self.startbuf) self.pid, self.ppid, self.procname = pid, ppid, procname
def read(self, length): buf = "" while len(buf) < length: if not self.wait_sock_or_end(): raise Disconnect() tmp = self.request.recv(length - len(buf)) if not tmp: raise Disconnect() buf += tmp if isinstance(self.protocol, BsonParser): if self.rawlogfd: self.rawlogfd.write(buf) else: self.startbuf += buf if len(self.startbuf) > 0x10000: raise CuckooResultError( "Somebody is knowingly overflowing the startbuf " "buffer, possibly to use excessive amounts of memory.") return buf
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if len(data) != 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return data += self.fd.read(blen - 4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = self.determine_unserializers(arginfo) self.infomap[ index] = name, arginfo, argnames, converters, category if dec.get("flags_value"): self.flags_value[name] = {} for arg, values in dec["flags_value"].items(): self.flags_value[name][arg] = dict(values) if dec.get("flags_bitmask"): self.flags_bitmask[name] = {} for arg, values in dec["flags_bitmask"].items(): self.flags_bitmask[name][arg] = values continue # Handle dumped buffers. if mtype == "buffer": buf = dec.get("buffer") sha1 = dec.get("checksum") self.buffer_sha1 = hashlib.sha1(buf).hexdigest() # Why do we pass along a sha1 checksum again? if sha1 != self.buffer_sha1: log.warning("Incorrect sha1 passed along for a buffer.") # If the parent is netlogs ResultHandler then we actually dump # it - this should only be the case during the analysis, any # after proposing will then be ignored. from lib.cuckoo.core.resultserver import ResultHandler if isinstance(self.fd, ResultHandler): filepath = os.path.join(self.fd.storagepath, "buffer", self.buffer_sha1) with open(filepath, "wb") as f: f.write(buf) continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": log.info("Debug message from monitor: {0}".format( dec.get("msg", ""))) parsed["message"] = dec.get("msg", "") else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[ index] args = dec.get("args", []) if len(args) != len(argnames): log.warning( "Inconsistent arg count (compared to arg names) " "on {2}: {0} names {1}".format(dec, argnames, apiname)) continue argdict = {} for idx, value in enumerate(args): argdict[argnames[idx]] = converters[idx](value) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict[ "parent_process_identifier"] modulepath = argdict["module_path"] else: raise CuckooResultError( "I don't recognise the bson log contents.") # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = get_filename_from_path(modulepath) parsed["process_path"] = modulepath parsed["process_name"] = procname parsed["command_line"] = argdict.get("command_line") # Is this a 64-bit process? if argdict.get("is_64bit"): self.is_64bit = True # Is this process being "tracked"? parsed["track"] = bool(argdict.get("track", 1)) self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["flags"] = {} parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags_value: self.resolve_flags(apiname, argdict, parsed["flags"]) if self.buffer_sha1: parsed["buffer"] = self.buffer_sha1 self.buffer_sha1 = None yield parsed
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if not len(data) == 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical("BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler.") return data += self.fd.read(blen - 4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning("BsonParser decoding problem {0} on " "data[:50] {1}".format(e, repr(data[:50]))) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = check_names_for_typeinfo(arginfo) self.infomap[ index] = name, arginfo, argnames, converters, category continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": log.info("Debug message from monitor: {0}".format( dec.get("msg", ""))) parsed["message"] = dec.get("msg", "") else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[ index] args = dec.get("args", []) if len(args) != len(argnames): log.warning( "Inconsistent arg count (compared to arg names) " "on {2}: {0} names {1}".format(dec, argnames, apiname)) continue argdict = dict((argnames[i], converters[i](args[i])) for i in range(len(args))) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict[ "parent_process_identifier"] modulepath = argdict["module_path"] else: raise CuckooResultError( "I don't recognise the bson log contents.") # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = get_filename_from_path(modulepath) parsed["process_name"] = procname self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags: for flag in self.flags[apiname].keys(): argdict[flag + "_s"] = self._flag_represent( apiname, flag, argdict[flag]) yield parsed