def extractTCPFlows(entryList):
    finishedFlows = []
    # a map between flow's signature and flow
    ongoingFlows = {}

    for i in range(len(entryList)):
        entry = entryList[i]
        if entry.logID == const.PROTOCOL_ID and \
           entry.ip["tlp_id"] == const.TCP_ID:
            flow_signature = Flow.extractFlowSignature(entry)
            if flow_signature:
                if entry.tcp["SYN_FLAG"] and not entry.tcp["ACK_FLAG"]:
                    # capture a new flow by SYN packet
                    if not ongoingFlows.has_key(flow_signature):
                        # create a new flow
                        ongoingFlows[flow_signature] = Flow(flow_signature)
                        ongoingFlows[flow_signature].addPacket(entry, i)
                elif entry.tcp["FIN_FLAG"]:
                    # finish a TCP flow if there is one
                    if ongoingFlows.has_key(flow_signature):
                        ongoingFlows[flow_signature].addPacket(entry, i)
                        finishedFlows.append(ongoingFlows[flow_signature])
                        del ongoingFlows[flow_signature]
                else:
                    # add to existing ongoing flow
                    if ongoingFlows.has_key(flow_signature):
                        ongoingFlows[flow_signature].addPacket(entry, i)

    # wrap up anything leftover flow
    for flow in ongoingFlows.values():
        finishedFlows.append(flow)

    # filter out super short flow
    filteredFlows = []
    for f in finishedFlows:
        if len(f.flow) > 2:
            filteredFlows.append(f)

    # initiate the DNS trace
    dns = DNS(entryList)
    ipToURLMap = dns.getIpToURLMap()
    for f in filteredFlows:
        syn = f.flow[0]
        inverseIp = None
        if syn.ip["src_ip"] in ipToURLMap:
            inverseIp = syn.ip["src_ip"]
        elif syn.ip["dst_ip"] in ipToURLMap:
            inverseIp = syn.ip["dst_ip"]
        if inverseIp != None:
            f.setURL(ipToURLMap[inverseIp])
            if DNS_CHECK:
                print inverseIp + " -> " + str(ipToURLMap[inverseIp])

    if FLOW_CHECK:
        for f in finishedFlows:
            if f.properties["http"] != None:
                line = str(f.properties["http"]) + "\t" + str(len(f.flow)) + "\t" + \
                       util.convert_ts_in_human(f.flow[0].timestamp)
                if f.flow[0].rrcID != None:
                    line += "\t" + const.RRC_MAP[f.flow[0].rrcID]
                print line
                # print pw.printTCPEntry(f.flow[0])
        print "*" * 60
        print "Total # of flows are " + str(len(finishedFlows))
  
    return filteredFlows
Example #2
0
def extractTCPFlows(entryList):
    finishedFlows = []
    # a map between flow's signature and flow
    ongoingFlows = {}

    for i in range(len(entryList)):
        entry = entryList[i]
        if entry.logID == const.PROTOCOL_ID and \
           entry.ip["tlp_id"] == const.TCP_ID:
            flow_signature = Flow.extractFlowSignature(entry)
            if flow_signature:
                if entry.tcp["SYN_FLAG"] and not entry.tcp["ACK_FLAG"]:
                    # capture a new flow by SYN packet
                    if not ongoingFlows.has_key(flow_signature):
                        # create a new flow
                        ongoingFlows[flow_signature] = Flow(flow_signature)
                        ongoingFlows[flow_signature].addPacket(entry, i)
                elif entry.tcp["FIN_FLAG"]:
                    # finish a TCP flow if there is one
                    if ongoingFlows.has_key(flow_signature):
                        ongoingFlows[flow_signature].addPacket(entry, i)
                        finishedFlows.append(ongoingFlows[flow_signature])
                        del ongoingFlows[flow_signature]
                else:
                    # add to existing ongoing flow
                    if ongoingFlows.has_key(flow_signature):
                        ongoingFlows[flow_signature].addPacket(entry, i)

    # wrap up anything leftover flow
    for flow in ongoingFlows.values():
        finishedFlows.append(flow)

    # filter out super short flow
    filteredFlows = []
    for f in finishedFlows:
        if len(f.flow) > 2:
            filteredFlows.append(f)

    # initiate the DNS trace
    dns = DNS(entryList)
    ipToURLMap = dns.getIpToURLMap()
    for f in filteredFlows:
        syn = f.flow[0]
        inverseIp = None
        if syn.ip["src_ip"] in ipToURLMap:
            inverseIp = syn.ip["src_ip"]
        elif syn.ip["dst_ip"] in ipToURLMap:
            inverseIp = syn.ip["dst_ip"]
        if inverseIp != None:
            f.setURL(ipToURLMap[inverseIp])
            if DNS_CHECK:
                print inverseIp + " -> " + str(ipToURLMap[inverseIp])

    if FLOW_CHECK:
        for f in finishedFlows:
            if f.properties["http"] != None:
                line = str(f.properties["http"]) + "\t" + str(len(f.flow)) + "\t" + \
                       util.convert_ts_in_human(f.flow[0].timestamp)
                if f.flow[0].rrcID != None:
                    line += "\t" + const.RRC_MAP[f.flow[0].rrcID]
                print line
                # print pw.printTCPEntry(f.flow[0])
        print "*" * 60
        print "Total # of flows are " + str(len(finishedFlows))

    return filteredFlows