def modelStreamStructure(http_entries): # Modeling traffic with StreamStructure assert isinstance(http_entries[0], HTTPLogEntry) http_entries.sort(key=lambda x:x.rqtstart()) entities = {} for e in http_entries: ua = e.ua() if ua not in entities: entities[ua] = [] entities[ua].append(e) # temporal results forest_tmp = {} for ua in entities: if ua not in forest_tmp: forest_tmp[ua] = [] forest_tmp[ua].extend(WebTree.plant(entities[ua])) # finalize the forest with cutting suggested by StreamStructure forest = {} for ua in forest_tmp: wts = forest_tmp[ua] for wt in wts[::-1]: rn = [] reverse_nodes = [] for node in wt.expand_tree(mode=WebTree.WIDTH): reverse_nodes.append(node) for node in reverse_nodes[::-1]: # Reversed width-first search if is_pred_entity(wt[node].pl) and len(wt.is_branch(node)) > 3 and node != wt.root: rn.append(node) for n in rn: st = WebTree(tree=wt.remove_subtree(n)) wts.append(st) forest[ua] = wts for ua in forest: for tree in forest[ua]: tree.show() return forest
def add_entry(self, http_log_entry): """ add one HTTP log entry to this user It is skipped if the request time is null. Otherwise, we add it to it web tree or create a new tree for it. """ # create a new node nn = WebNode(http_log_entry) nn_ref = nn.pl.referer() nn_st = nn.pl.rqtstart() # it's not a valid HTTP request, return directly if nn_st == None: return False nn_et = nn.pl.rspend() nn_md = nn.pl.method() nn_ct = nn.pl.type() nn_url = nn.pl.url() nn_ua = nn.pl.ua() # Set readable node tag ntag = "%s | %s | %s | %s | %s | %s |%s " % ( nn_st, nn_et, str(nn_md), str(nn_url)[:50], str(nn_ct).split(";")[0], "Yes" if nn_ref != None else "No", nn_ua) nn.tag = ntag # formated tag to show by tree # add this node to a web tree of this user linked_flag = False for webtree in self.webtrees[::-1]: last_action_time = webtree.get_nodes()[-1].pl.rqtstart() # session idle time equals to 15 mins if float(nn_st) - float(last_action_time) <= 60*15: # seconds # Find its predecessor pred_id = None if nn_ref == None: break nn_ref = URL.strip_proto(nn_ref) # create map between URL and node id url_nid_map = {} for node in webtree.get_nodes(): node_url = node.pl.url() if node_url == None: continue node_url = URL.strip_proto(node_url) url_nid_map[node_url] = node.identifier # find predecessor id if nn_ref in url_nid_map: pred_id = url_nid_map[nn_ref] if pred_id != None: # Predecessor found... webtree.add_node(nn, pred_id) linked_flag = True break # After all the trees are checked: if not linked_flag: new_tree = WebTree() if nn_ref != None: dn = WebNode(HTTPLogEntry()) dn.type = 0 # dummy signature dn.pl['request_timestamp'] = nn.pl.rqtstart() nn_ref = URL.strip_proto(nn_ref) chops = nn_ref.split('/', 1) dn.pl['request_host'] = chops[0] if len(chops) == 1: dn.pl['request_url'] = '' else: dn.pl['request_url'] = '/' + chops[1] dn.tag = "(dummy) %s | %s" % (dn.pl.rqtstart(), dn.pl.url()[:50]) new_tree.add_node(dn, parent=None) new_tree.add_node(nn, dn.identifier) else: new_tree.add_node(nn, parent=None) self.webtrees.append(new_tree) return True