def modelStreamStructure(http_entries):
    # Modeling traffic with StreamStructure
    assert isinstance(http_entries[0], HTTPLogEntry)
    http_entries.sort(key=lambda x:x.rqtstart())
    entities = {}
    for e in http_entries:
        ua = e.ua()
        if ua not in entities:
            entities[ua] = []
        entities[ua].append(e)
    # temporal results
    forest_tmp = {}
    for ua in entities:
        if ua not in forest_tmp:
            forest_tmp[ua] = []
        forest_tmp[ua].extend(WebTree.plant(entities[ua]))
    # finalize the forest with cutting suggested by StreamStructure
    forest = {}
    for ua in forest_tmp:
        wts = forest_tmp[ua]
        for wt in wts[::-1]:
            rn = []
            reverse_nodes = []
            for node in wt.expand_tree(mode=WebTree.WIDTH):
                reverse_nodes.append(node)
            for node in reverse_nodes[::-1]: # Reversed width-first search
                if is_pred_entity(wt[node].pl) and len(wt.is_branch(node)) > 3 and node != wt.root:
                    rn.append(node)
            for n in rn:
                st = WebTree(tree=wt.remove_subtree(n))
                wts.append(st)
        forest[ua] = wts

    for ua in forest:
    	for tree in forest[ua]:
    		tree.show()

   	return forest
Exemple #2
0
	def add_entry(self, http_log_entry):
		""" add one HTTP log entry to this user
		It is skipped if the request time is null.
		Otherwise, we add it to it web tree or create a new tree for it.
		"""
		# create a new node
		nn = WebNode(http_log_entry)
		nn_ref = nn.pl.referer()
		nn_st = nn.pl.rqtstart()
		# it's not a valid HTTP request, return directly
		if nn_st == None:
			return False
		nn_et = nn.pl.rspend()
		nn_md = nn.pl.method()
		nn_ct = nn.pl.type()
		nn_url = nn.pl.url()
		nn_ua = nn.pl.ua()
		# Set readable node tag		
		ntag = "%s | %s | %s | %s | %s | %s |%s " % (
			nn_st, nn_et,
			str(nn_md), str(nn_url)[:50], 
			str(nn_ct).split(";")[0], 
			"Yes" if nn_ref != None else "No", nn_ua)
		nn.tag = ntag # formated tag to show by tree

		# add this node to a web tree of this user
		linked_flag = False
		for webtree in self.webtrees[::-1]:
			last_action_time = webtree.get_nodes()[-1].pl.rqtstart()
			# session idle time equals to 15 mins
			if float(nn_st) - float(last_action_time) <= 60*15: # seconds
				# Find its predecessor
				pred_id = None
				if nn_ref ==  None: break
				nn_ref = URL.strip_proto(nn_ref)
				# create map between URL and node id
				url_nid_map = {}
				for node in webtree.get_nodes():
					node_url = node.pl.url()
					if node_url == None: continue
					node_url = URL.strip_proto(node_url)
					url_nid_map[node_url] = node.identifier
				# find predecessor id
				if nn_ref in url_nid_map: pred_id = url_nid_map[nn_ref]
				if pred_id != None:
					# Predecessor found...
					webtree.add_node(nn, pred_id)
					linked_flag = True
					break

		# After all the trees are checked:	
		if not linked_flag:
			new_tree = WebTree()
			if nn_ref != None:
				dn = WebNode(HTTPLogEntry())
				dn.type = 0	# dummy signature
				dn.pl['request_timestamp'] = nn.pl.rqtstart()
				nn_ref = URL.strip_proto(nn_ref)
				chops = nn_ref.split('/', 1)
				dn.pl['request_host'] = chops[0]
				if len(chops) == 1:
					dn.pl['request_url'] = ''
				else:
					dn.pl['request_url'] = '/' + chops[1]
				dn.tag = "(dummy) %s | %s" % (dn.pl.rqtstart(), dn.pl.url()[:50])
				new_tree.add_node(dn, parent=None)
				new_tree.add_node(nn, dn.identifier)
			else:
				new_tree.add_node(nn, parent=None)
			self.webtrees.append(new_tree)
			return True