def extractTCPFlows(entryList): finishedFlows = [] # a map between flow's signature and flow ongoingFlows = {} for i in range(len(entryList)): entry = entryList[i] if entry.logID == const.PROTOCOL_ID and \ entry.ip["tlp_id"] == const.TCP_ID: flow_signature = Flow.extractFlowSignature(entry) if flow_signature: if entry.tcp["SYN_FLAG"] and not entry.tcp["ACK_FLAG"]: # capture a new flow by SYN packet if not ongoingFlows.has_key(flow_signature): # create a new flow ongoingFlows[flow_signature] = Flow(flow_signature) ongoingFlows[flow_signature].addPacket(entry, i) elif entry.tcp["FIN_FLAG"]: # finish a TCP flow if there is one if ongoingFlows.has_key(flow_signature): ongoingFlows[flow_signature].addPacket(entry, i) finishedFlows.append(ongoingFlows[flow_signature]) del ongoingFlows[flow_signature] else: # add to existing ongoing flow if ongoingFlows.has_key(flow_signature): ongoingFlows[flow_signature].addPacket(entry, i) # wrap up anything leftover flow for flow in ongoingFlows.values(): finishedFlows.append(flow) # filter out super short flow filteredFlows = [] for f in finishedFlows: if len(f.flow) > 2: filteredFlows.append(f) # initiate the DNS trace dns = DNS(entryList) ipToURLMap = dns.getIpToURLMap() for f in filteredFlows: syn = f.flow[0] inverseIp = None if syn.ip["src_ip"] in ipToURLMap: inverseIp = syn.ip["src_ip"] elif syn.ip["dst_ip"] in ipToURLMap: inverseIp = syn.ip["dst_ip"] if inverseIp != None: f.setURL(ipToURLMap[inverseIp]) if DNS_CHECK: print inverseIp + " -> " + str(ipToURLMap[inverseIp]) if FLOW_CHECK: for f in finishedFlows: if f.properties["http"] != None: line = str(f.properties["http"]) + "\t" + str(len(f.flow)) + "\t" + \ util.convert_ts_in_human(f.flow[0].timestamp) if f.flow[0].rrcID != None: line += "\t" + const.RRC_MAP[f.flow[0].rrcID] print line # print pw.printTCPEntry(f.flow[0]) print "*" * 60 print "Total # of flows are " + str(len(finishedFlows)) return filteredFlows