def open_process_log(self, event): pid = event["pid"] ppid = event["ppid"] procname = event["process_name"] if self.rawlogfd: log.debug( "ResultServer got a new process message but already " "has pid %d ppid %s procname %s.", pid, ppid, procname) raise CuckooResultError( "ResultServer connection state inconsistent.") if not isinstance(pid, (int, long)): raise CuckooResultError( "An invalid process identifier has been provided, this " "could be a potential security hazard.") # Only report this process when we're tracking it. if event["track"]: log.debug("New process (pid=%s, ppid=%s, name=%s)", pid, ppid, procname.encode("utf8")) filepath = os.path.join(self.storagepath, "logs", "%s.bson" % pid) self.rawlogfd = open(filepath, "wb") self.rawlogfd.write(self.startbuf)
def read(self, length): buf = "" while len(buf) < length: if not self.wait_sock_or_end(): raise Disconnect() tmp = self.request.recv(length - len(buf)) if not tmp: raise Disconnect() buf += tmp if isinstance(self.protocol, BsonParser): if self.rawlogfd: self.rawlogfd.write(buf) else: self.startbuf += buf if len(self.startbuf) > 0x10000: raise CuckooResultError( "Somebody is knowingly overflowing the startbuf " "buffer, possibly to use excessive amounts of memory.") return buf
def __iter__(self): self.fd.seek(0) while True: data = self.fd.read(4) if not data: return if len(data) != 4: log.critical("BsonParser lacking data.") return blen = struct.unpack("I", data)[0] if blen > MAX_MESSAGE_LENGTH: log.critical( "BSON message larger than MAX_MESSAGE_LENGTH, " "stopping handler." ) return data += self.fd.read(blen-4) if len(data) < blen: log.critical("BsonParser lacking data.") return try: dec = bson_decode(data) except Exception as e: log.warning( "BsonParser decoding problem %s on data[:50] %s", e, repr(data[:50]) ) return mtype = dec.get("type", "none") index = dec.get("I", -1) if mtype == "info": # API call index info message, explaining the argument names, etc. name = dec.get("name", "NONAME") arginfo = dec.get("args", []) category = dec.get("category") argnames, converters = self.determine_unserializers(arginfo) self.infomap[index] = name, arginfo, argnames, converters, category if dec.get("flags_value"): self.flags_value[name] = {} for arg, values in dec["flags_value"].items(): self.flags_value[name][arg] = dict(values) if dec.get("flags_bitmask"): self.flags_bitmask[name] = {} for arg, values in dec["flags_bitmask"].items(): self.flags_bitmask[name][arg] = values continue # Handle dumped buffers. if mtype == "buffer": buf = dec.get("buffer") sha1 = dec.get("checksum") self.buffer_sha1 = hashlib.sha1(buf).hexdigest() # TODO Reimplement storing of buffers. This has not been done # yet in the new resultserver # Why do we pass along a sha1 checksum again? if sha1 != self.buffer_sha1: log.warning("Incorrect sha1 passed along for a buffer.") filepath = cwd("buffer", self.buffer_sha1, analysis=self.task_id) with open(filepath, "wb") as f: f.write(buf) continue tid = dec.get("T", 0) time = dec.get("t", 0) parsed = { "type": mtype, "tid": tid, "time": time, } if mtype == "debug": parsed["message"] = dec.get("msg", "") log.info("Debug message from monitor: %s", parsed["message"]) else: # Regular api call from monitor if index not in self.infomap: log.warning("Got API with unknown index - monitor needs " "to explain first: {0}".format(dec)) continue apiname, arginfo, argnames, converters, category = self.infomap[index] args = dec.get("args", []) if len(args) != len(argnames): log.warning( "Inconsistent arg count (compared to arg names) " "on %s: %s names %s", dec, argnames, apiname ) continue argdict = {} for idx, value in enumerate(args): argdict[argnames[idx]] = converters[idx](value) # Special new process message from the monitor. if apiname == "__process__": parsed["type"] = "process" if "TimeLow" in argdict: timelow = argdict["TimeLow"] timehigh = argdict["TimeHigh"] parsed["pid"] = pid = argdict["ProcessIdentifier"] parsed["ppid"] = argdict["ParentProcessIdentifier"] modulepath = argdict["ModulePath"] elif "time_low" in argdict: timelow = argdict["time_low"] timehigh = argdict["time_high"] if "pid" in argdict: parsed["pid"] = pid = argdict["pid"] parsed["ppid"] = argdict["ppid"] else: parsed["pid"] = pid = argdict["process_identifier"] parsed["ppid"] = argdict["parent_process_identifier"] modulepath = argdict["module_path"] else: raise CuckooResultError( "I don't recognize the bson log contents." ) # FILETIME is 100-nanoseconds from 1601 :/ vmtimeunix = (timelow + (timehigh << 32)) vmtimeunix = vmtimeunix / 10000000.0 - 11644473600 vmtime = datetime.datetime.fromtimestamp(vmtimeunix) parsed["first_seen"] = vmtime procname = Storage.get_filename_from_path(modulepath) parsed["process_path"] = modulepath parsed["process_name"] = procname parsed["command_line"] = argdict.get("command_line") # Is this a 64-bit process? if argdict.get("is_64bit"): self.is_64bit = True # Is this process being "tracked"? parsed["track"] = bool(argdict.get("track", 1)) parsed["modules"] = argdict.get("modules", {}) self.pid = pid elif apiname == "__thread__": parsed["pid"] = pid = argdict["ProcessIdentifier"] # elif apiname == "__anomaly__": # tid = argdict["ThreadIdentifier"] # subcategory = argdict["Subcategory"] # msg = argdict["Message"] # self.handler.log_anomaly(subcategory, tid, msg) # return True elif apiname == "__action__": parsed["type"] = "action" parsed["action"] = argdict["action"] else: parsed["type"] = "apicall" parsed["pid"] = self.pid parsed["api"] = apiname parsed["category"] = category parsed["status"] = argdict.pop("is_success", 1) parsed["return_value"] = argdict.pop("retval", 0) parsed["arguments"] = argdict parsed["flags"] = {} parsed["stacktrace"] = dec.get("s", []) parsed["uniqhash"] = dec.get("h", 0) if "e" in dec and "E" in dec: parsed["last_error"] = dec["e"] parsed["nt_status"] = dec["E"] if apiname in self.flags_value: self.resolve_flags(apiname, argdict, parsed["flags"]) if self.buffer_sha1: parsed["buffer"] = self.buffer_sha1 self.buffer_sha1 = None yield parsed