def wait_for_dbcall(self, action): if self.run is None: return ## return response to prev db func call if isinstance(action, DbFnCall): util.file_write(self.php_out, action.retnode.data) rd = PipeReader(self.php_in) rd.start() while self.run.poll() is None and rd.data is None: time.sleep(0.05) if self.run.poll() is not None: ## redo done for this php actor; cancel remaining actions for a in [x for x in self.actions if x > action]: if not isinstance(a, PhpExit): a.cancel = True rd.kill() return rd.join() fn, fid, args = rd.data.strip().split(':') print "PhpActor: wait_for_dbcall: got request:", fn, fid, args ## if next action is the same as the requested action, update ## its args. else, create a new action nextact = min([x for x in self.actions if x > action]) if isinstance(nextact, DbFnCall) and nextact.fn == fn: ts = max([x for x in nextact.argsnode.checkpts if x < nextact]).tac u = UpdateBufAction(util.between(ts, nextact.tic), args, nextact.argsnode) u.connect() else: ts = util.between(action.tac, nextact.tic) name = (self.pid, fn, time.time()) argsnode = mgrutil.BufferNode(name + ('fncall', 'args'), ts, args) retnode = mgrutil.BufferNode(name + ('fncall', 'ret'), ts, None) fncall = DbFnCall(fn, fid, self, argsnode, retnode) connect(fncall, ts + (1, ), ts + (2, ))
def run_poll(self, action): while True: if self.run.poll() is not None: for a in [x for x in self.actions if x > action]: if not isinstance(a, PhpExit): cancel_action(a) return with open(self.log_file, 'r') as f: m = f.read() if '\n' in m: self.runmsg = m open(self.log_file, 'w').close() break dbg.debug('php message', self.runmsg.strip()) (_, _, type, _, _) = self.runmsg.strip().split(' ') nextact = min([x for x in self.actions if x > action]) ts = util.between(action.tac, nextact.tic) if type == 'query': q = DbQueryAction(('nquery', self.pid, time.time())) q.tic = ts + (3, ) q.tac = ts + (4, ) q.argsnode = mgrutil.BufferNode(q.name + ('args', ), ts + (2, ), None) q.retnode = mgrutil.BufferNode(q.name + ('ret', ), ts + (5, ), None) q.connect() c = PhpDbCall(self, q.argsnode) c.tic = ts c.tac = ts + (1, ) c.connect() r = PhpDbRet(self, q.retnode) r.tic = ts + (6, ) r.tac = ts + (7, ) r.connect() else: raise Exception('unknown PHP message type', type)
def load(a, b): phpf = open(logdir + "/php.log") for line in phpf: (pids, tss, type, data) = line.strip().split(':') pid = int(pids) ts = int(tss) php = PhpActor.get(pid) if type == "call": args = mgrutil.BufferNode((pid, 'pargs'), (ts, ), data) p_start = PhpStart(php, args) connect(p_start, (ts, 1), (ts, 2)) elif type == "ret": ret = mgrutil.BufferNode((pid, 'pret'), (ts, ), data) p_exit = PhpExit(php, ret) connect(p_exit, (ts, 1), (ts, 2)) else: raise Exception("unknown type: " + type) phpf.close() db = psycopg2.connect(db_connect_params()) dbf = open(logdir + "/db.log") for line in dbf: (pids, tss, fn, fid, type, data) = line.strip().split(':') pid = int(pids) ts = int(tss) php = PhpActor.get(pid) if type == "call": args = mgrutil.BufferNode((pid, fn, fid, 'dargs'), (ts, ), data) args_ts = ts elif type == "ret": ## XXX: timestamp should be (ts,) but that fails because an UPDATE ## fncall's time interval overlaps with checkpoint created ## by the update. fix it by adding an extra DbFnRet ret = mgrutil.BufferNode((pid, fn, fid, 'dret'), (args_ts, ), data) fncall = DbFnCall(db, fn, fid, php, args, ret) connect(fncall, (args_ts, 1), (args_ts, 1)) elif type != "serverpid": raise Exception("unknown type: " + type)
def load(h, what): currentPages = {} ## Read the logs maxSessionId = int(file_read(logdir + "/sessionId")) debug("++ maxSessionId = %d" % maxSessionId) logFilePrefix = logdir + "/logfile-" ts = 0 for sessionId in xrange(maxSessionId): debug("++ Processing session %d" % sessionId) logFileIdx = 0 while True: logfile = "%s%d-%d" % (logFilePrefix, sessionId, logFileIdx) logFileIdx += 1 if not os.path.exists(logfile): break debug("++++ Processing logfile " + logfile) logfp = open(logfile) log = json.load(logfp) logfp.close() ## on FrameId: ## create a new FrameActor and FrameLoc ## ## on HTTPRequest: ## if isMainDoc: ## new PageData node corresponding to the new page (with a ckpt) ## ckpt FrameLoc, (XXX: ReferAction writes page URL to FrameLoc) ## PageLoad reads from FrameLoc and writes to RequestData ## else: ## HTTPReqAction reads from PageData and writes to RequestData ## ## HTTPRespAction reads from ResponseData and writes to PageData ## ## on PageId: nothing ## ## on TabId: nothing ## ## on any user input event (keydown, mouseover, etc.,): ## add a new UserInput to the UserActor ## connect the event to PageData for entry in log: ts += 1 debug("") debug(str(entry)) if entry['evType'] == 'FrameId': debug("++++ Processing frame " + str(entry['id'])) f = FrameActor.get(entry['id']) fl = FrameLocationNode.get(entry['id']) elif entry['evType'] == 'PageId': debug("++++ Processing page" + str(entry['id'])) pd = PageDataNode.get(entry['parentFrame'], entry['id']) elif entry['evType'] == 'HTTPRequest': debug("++++ Processing HTTP request") ## XXX: extend HTTPRequest and HTTPResponse to support ## non-pageload http requests if entry['isMainDoc']: f = FrameActor.get(entry['frameId']) pd = PageDataNode.get(entry['frameId'], entry['pageId']) floc = FrameLocationNode.get(entry['frameId']) floc.checkpoints.add( LocationCkpt((ts, 0), entry['URI']['asciiSpec'])) req_data = mgrutil.BufferNode( f.name + ('htreq', entry['pageId']), (ts, 3), entry) pl = FramePageLoad(f, req_data) pl.tic = (ts, 1) pl.tac = (ts, 2) pl.connect() elif entry['evType'] == 'HTTPResponse': debug("++++ Processing HTTP Response") if entry['isMainDoc']: f = FrameActor.get(entry['frameId']) pd = PageDataNode.get(entry['frameId'], entry['pageId']) resp_data = mgrutil.BufferNode( f.name + ('htresp', entry['pageId']), (ts, 3), entry) htresp = HTTPResponse(f, resp_data) htresp.tic = (ts, 1) htresp.tac = (ts, 2) htresp.connect() elif entry['evType'] == 'event' and entry['type'] in events: ## XXX: add info about event into event name (eg: key pressed ## mouse loc, DOM element) debug("++++ Processing %s event" % entry['type']) ua = UserActor.get() useraction = mgrutil.BufferNode((entry['type'], ts), (ts, 3), entry) inp = UserInput(ua, useraction) inp.tic = (ts, 1) inp.tac = (ts, 2) inp.connect()
def load(h, what): dbg.load('loading name', h) dbf = open(os.path.join(logdir, "db.log")) htf = open(os.path.join(logdir, "httpd.log")) q = {} for l in dbf.readlines(): (pids, tss, type, db_path, dataq) = l.strip().split(' ') pid = int(pids) ts = int(tss) php = PhpActor.get(pid) data = urllib.unquote(dataq) if h: if h[0] == 'php': if h[1] != pid: continue elif h[0] == 'db': if h[1] != db_path: continue else: continue if type == 'query': qname = ('dbq', pid, ts) if mgrapi.RegisteredObject.by_name(qname): q[pid] = None continue q[pid] = DbQueryAction(qname) q[pid].tic = (ts, 2) q[pid].argsnode = mgrutil.BufferNode(q[pid].name + ('args', ), (ts, 2), { 'dir': db_path, 'query': data }) x = PhpDbCall(php, q[pid].argsnode) x.tic = (ts, 1) x.tac = (ts, 2) x.connect() if type == 'query_result' and q[pid]: q[pid].tac = (ts, 2) q[pid].retnode = mgrutil.BufferNode(q[pid].name + ('ret', ), (ts, 2), data) q[pid].connect() x = PhpDbRet(php, q[pid].retnode) x.tic = (ts, 2) x.tac = (ts, 3) x.connect() for l in htf.readlines(): x = l.strip().split(' ') if len(x) == 4: (pids, tss, type, subtype) = x dataq = '' else: (pids, tss, type, subtype, dataq) = x pid = int(pids) ts = int(tss) data = urllib.unquote(dataq) if h: if h[0] != 'php' or h[1] != pid: continue p_actor = PhpActor.get(pid) if type == 'httpreq_start': qname = p_actor.name + ('htargs', ) if mgrapi.RegisteredObject.by_name(qname): q[pid] = None continue q[pid] = {'env': {}, 'post': ''} an = mgrutil.BufferNode(qname, (ts, 2), q[pid]) ph_call = PhpStart(p_actor, an) ph_call.tic = (ts, 5) ph_call.tac = (ts, 6) ph_call.connect() if not q[pid]: continue if type == 'httpreq_env': q[pid]['env'][subtype] = data if subtype == 'HTTP_X_CLIENT_ID': ## XXX: relies on CLIENT_ID coming before PAGE_ID p_actor.clientid = data if subtype == 'HTTP_X_PAGE_ID': p_actor.pageid = data if type == 'httpreq_end': qname = p_actor.name + ('htargs', ) an = mgrapi.RegisteredObject.by_name(qname) p_actor_ts = min(p_actor.actions).tic[0] bp = BrowserPageActor.get(p_actor.clientid, p_actor.pageid, (p_actor_ts, 0)) breq_start = BrowserReqStart(bp, an) breq_start.tic = (p_actor_ts, 3) breq_start.tac = (p_actor_ts, 4) breq_start.connect() breq_start.connect_script() if type == 'httpreq_post': q[pid]['post'] = data if type == 'httpreq_cwd': q[pid]['cwd'] = data if type == 'httpresp': an = mgrutil.BufferNode(p_actor.name + ('htret', ), (ts, 2), data) ht_call = HttpResponse(an) ht_call.tic = (ts, 7) ht_call.tac = (ts, 8) ht_call.connect() ph_call = PhpExit(p_actor, an) ph_call.tic = (ts, 3) ph_call.tac = (ts, 4) ph_call.connect() bp = BrowserPageActor.get(p_actor.clientid, p_actor.pageid, ts) breq_end = BrowserReqExit(bp, an) breq_end.tic = (ts, 5) breq_end.tac = (ts, 6) breq_end.connect()
def waitreq(self, action): while True: ## check if browser exited -- replay is complete when browser exits if self.browser.poll() is not None: ## cancel all the remaining actions for a in self.actions: if a > action: cancel_action(a) self.httpd.shutdown() return False if self.httpd.has_request(): ## read the request req = self.httpd.get_request() d = {'env': {}, 'post': ''} for line in req: (pids, tss, type, subtype, dataq) = line.split(' ') data = urllib.unquote(dataq) if type == 'httpreq_env': d['env'][subtype] = data if type == 'httpreq_post': d['post'] = data if type == 'httpreq_cwd': d['cwd'] = data ## check whether the req matches the next recorded req in line ## for this page. if so, set the http req data nextact = min([a for a in self.actions if a > action]) if isinstance(nextact, BrowserReqStart) and \ 'HTTP_X_REQ_ID' in nextact.argsnode.origdata['env'] and \ 'HTTP_X_REQ_ID' in d['env']: r1 = nextact.argsnode.origdata['env']['HTTP_X_REQ_ID'] r2 = d['env']['HTTP_X_REQ_ID'] if r1 == r2: nextact.httpreq = d return True print 'got unmatched http request ', d if 'HTTP_X_PAGE_ID' in d['env'] and d['env'][ 'HTTP_X_PAGE_ID'] == self.pageid: print 'unmatched req has same page id. processing it...' ts = util.between(action.tac, nextact.tic) ## create a php actor and attach: ## BrowserReqStart -> htargs -> PhpStart, and ## PhpExit -> htret -> BrowserReqExit php_pid = hash(time.time()) ## unique dummy 32-bit pid p_actor = PhpActor.get(php_pid) ## htargs args = mgrutil.BufferNode(p_actor.name + ('htargs', ), ts + (1, ), d) ## BrowserReqStart brs = BrowserReqStart(self, args) brs.tic = ts + (2, ) brs.tac = ts + (3, ) brs.connect() brs.connect_script() brs.httpreq = d ## Phpstart ph_call = PhpStart(p_actor, args) ph_call.tic = ts + (4, ) ph_call.tac = ts + (5, ) ph_call.connect() ## htret ret = mgrutil.BufferNode(p_actor.name + ('htret', ), ts + (6, ), "") ## PhpExit ph_call = PhpExit(p_actor, ret) ph_call.tic = ts + (7, ) ph_call.tac = ts + (8, ) ph_call.connect() ## BrowserReqExit bre = BrowserReqExit(self, ret) bre.tic = ts + (9, ) bre.tac = ts + (10, ) bre.connect() return True time.sleep(0.05)
def parse_record(self, r): argsnode = None retnode = None retnode_child = None retnode_parent = None is_clone = r.nr in [NR_clone, NR_fork, NR_vfork] # enter if r.usage & ENTER: actor_call = procmgr.ProcessActor.get(r.pid, r) argsname = actor_call.name + ('sysarg', r.sid) if mgrapi.RegisteredObject.by_name(argsname) is None: argsnode = mgrutil.BufferNode(argsname, r.ts + (2, ), r) pc = procmgr.ProcSysCall(actor_call, argsnode, self) pc.tic = r.ts + (0 + r.pid.gen, ) pc.tac = r.ts + (1 + r.pid.gen, ) pc.connect() # exit if r.usage & EXIT: if not is_clone: actor_ret = procmgr.ProcessActor.get(r.pid, r) retname = actor_ret.name + ('sysret', r.sid) if mgrapi.RegisteredObject.by_name(retname) is None: retnode = mgrutil.BufferNode(retname, r.ts + (1, ), r) pr = procmgr.ProcSysRet(actor_ret, retnode, self) pr.tic = r.ts + (2 + r.pid.gen, ) pr.tac = r.ts + (3 + r.pid.gen, ) if r.nr == NR_execve: assert hasattr(r, "uids") setattr(pr, "uids", r.uids) pr.connect() else: actor_child = procmgr.ProcessActor.get(r.ret, r) actor_parent = procmgr.ProcessActor.get(r.pid, r) retname_parent = actor_parent.name + ('sysret', r.sid) retname_child = actor_child.name + ('sysret', r.sid) dbg.syscall('parentnode %s' % mgrapi.RegisteredObject.by_name(retname_parent)) if mgrapi.RegisteredObject.by_name(retname_parent) is None: retnode_parent = mgrutil.BufferNode( retname_parent, r.ts + (1, ), r) pr = procmgr.ProcSysRet(actor_parent, retnode_parent, self) pr.tic = r.ts + (4 + r.pid.gen, ) pr.tac = r.ts + (5 + r.pid.gen, ) pr.connect() dbg.syscall('childnode %s' % mgrapi.RegisteredObject.by_name(retname_child)) if mgrapi.RegisteredObject.by_name(retname_child) is None: retnode_child = mgrutil.BufferNode(retname_child, r.ts + (1, ), r) pr = procmgr.ProcSysRet(actor_child, retnode_child, self) pr.tic = r.ts + (2 + r.pid.gen, ) pr.tac = r.ts + (3 + r.pid.gen, ) pr.connect() if is_clone: sc = procmgr.CloneSyscallAction.get(r, self) dbg.syscall( 'populating sc %s with parent_retnode %s and child_retnode %s' % (sc, retnode_parent, retnode_child)) if retnode_parent: sc.parent_retnode = retnode_parent if retnode_child: sc.child_retnode = retnode_child else: sc = procmgr.SyscallAction.get(r, self) if retnode: sc.retnode = retnode if argsnode: sc.argsnode = argsnode if r.usage & ENTER: sc.tic = r.ts + (3, ) if r.usage & EXIT: sc.tac = r.ts + (0, ) ## Some system calls do not have return records ## (or return value objects, but that's OK for now.) if r.nr in [NR_exit, NR_exit_group]: sc.tac = r.ts + (4, ) sc.connect() return sc