def build_messages(filename): packets = load_pcap(filename, strict=True) messages = [] connection_ids = {} stream_ids = {} next_connection_id = 0 next_stream_id = 0 for p in packets: src_socket = (p.src_addr, p.src_port) dst_socket = (p.dst_addr, p.dst_port) sockets = (src_socket, dst_socket) if sockets not in connection_ids: connection_ids[sockets] = next_connection_id connection_ids[sockets[::-1]] = next_connection_id next_connection_id += 1 if sockets not in stream_ids: stream_ids[sockets] = next_stream_id next_stream_id += 1 conn_id = connection_ids[sockets] stream_id = stream_ids[sockets] msg = Message(p.payload, conn_id, stream_id) messages.append(msg) return messages
#! /usr/bin/env python import pcap_glue import protocol_analyser import pcap_reassembler packets = pcap_reassembler.load_pcap('../cap/dns-30628-packets.pcap', strict=True) nums = [p.number for p in packets] truth = [] with open('../cap/dns.csv.clean') as f: for line in f: (no, type_) = line.split(',') if int(no) in nums: truth.append(type_[:-1]) msgs = pcap_glue.build_messages('../cap/dns-30628-packets.pcap') an = protocol_analyser.ProtocolAnalyser(msgs[:20000], 100, truth) an.cluster(200, max_num_types=10) #an.state_inference('dns-state_diagram.png', 5) #an.classify_fields()