def main(pcap_file): packets = rdpcap("./raw_pcap/data2/" + pcap_file) packets = [pkt for pkt in packets if IP in pkt for p in pkt if TCP in p] #here we are sure ALL PACKETS ARE TCP for pkt in packets: flow_tuple = reverse_flow_tuple = key_to_search = None flow_tuple, reverse_flow_tuple = create_flow_keys(pkt[IP]) flow_key, tcp_stream = lookup_stream(flow_tuple, reverse_flow_tuple) if tcp_stream is None: tcp_stream = TCPStream(pkt[IP]) else: tcp_stream.add(pkt[IP]) flows[flow_key] = tcp_stream print(','.join(attrs)) with open('./output/' + pcap_file[:-8] + '.csv', 'w+') as f: f.write('id,' + ','.join(attrs) + '\n') for (flow, i) in zip(flows.values(), range(len(flows))): if flow.pkt_count >= 20: tmp = ( "%s,%s,%s,%s,%s,%s,%.3f,%s,%s,%s,%s,%s,%s" % (proto_name(flow.sport, flow.dport), flow.src, flow.sport, flow.dst, flow.dport, flow.proto, flow.push_flag_ratio(), flow.avrg_len(), flow.avrg_payload_len(), flow.pkt_count, flow.avrg_inter_arrival_time(), flow.kolmogorov(), flow.shannon())) with open("./payload/" + pcap_file + "_" + str(i) + '.txt', 'w+') as ff: ff.write(hexdump(flow.payload, dump=True)) f.write(pcap_file[:-5] + "_" + str(i) + "," + tmp + "\n") names = os.listdir("./payload") for name in names: lines = "" with open("./payload/" + name, 'r') as f: lines = f.readlines() with open("./payload2/" + name, 'w+') as ff: for line in lines: ff.write(line[6:38] + "\n")
def get_data(path): packets = rdpcap(path) flows = dict() packets = [pkt for pkt in packets if IP in pkt for p in pkt if TCP in p] for pkt in packets: flow_tuple = reverse_flow_tuple = key_to_search = None flow_tuple, reverse_flow_tuple = create_flow_keys(pkt[IP]) if flow_tuple in flows.keys(): flow_key, tcp_stream = flow_tuple, flows[flow_tuple] elif reverse_flow_tuple in flows.keys(): flow_key, tcp_stream = reverse_flow_tuple, flows[ reverse_flow_tuple] else: flow_key, tcp_stream = flow_tuple, None if tcp_stream is None: tcp_stream = TCPStream(pkt[IP]) else: tcp_stream.add(pkt[IP]) flows[flow_key] = tcp_stream return flows
def parse(pcap_file): packets=rdpcap(config.HTTPS_CONFIG["pcap_path"]+pcap_file) packets = [ pkt for pkt in packets if IP in pkt for p in pkt if TCP in p ] #here we are sure ALL PACKETS ARE TCP for pkt in packets: flow_tuple = reverse_flow_tuple = key_to_search = None flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP]) #混合流 # flow_key,tcp_stream = lookup_stream(flow_tuple,reverse_flow_tuple) #单向流 flow_key,tcp_stream=lookup_stream_single(flow_tuple) if tcp_stream is None: tcp_stream = TCPStream(pkt[IP]) else: tcp_stream.add(pkt[IP]) flows[flow_key] = tcp_stream a=config.HTTPS_CONFIG["total_path"]+pcap_file[:-8]+'.csv' b=config.HTTPS_CONFIG["record_type_total"]+pcap_file[:-8]+'_record_type.csv' c=config.HTTPS_CONFIG["packet_length_total"]+pcap_file[:-8]+'_packet_length.csv' d=config.HTTPS_CONFIG["time_interval_total"]+pcap_file[:-8]+'_time_interval.csv' e=config.HTTPS_CONFIG["payload_total"]+pcap_file[:-8]+'_payload.csv'
def parse(pcap_file): packets=rdpcap(config.HTTPS_CONFIG["pcap_path"]+pcap_file) packets = [ pkt for pkt in packets if IP in pkt for p in pkt if TCP in p ] #here we are sure ALL PACKETS ARE TCP for pkt in packets: flow_tuple = reverse_flow_tuple = key_to_search = None flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP]) flow_key,tcp_stream = lookup_stream(flow_tuple,reverse_flow_tuple) if tcp_stream is None: tcp_stream = TCPStream(pkt[IP]) else: tcp_stream.add(pkt[IP]) flows[flow_key] = tcp_stream with open(config.HTTPS_CONFIG["time_interval_total"]+pcap_file[:-8]+'_time_interval.csv','a')as f: for (flow,i) in zip(flows.values(),range(len(flows))): # 只有长度大于20的流才会保留 if flow.pkt_count>=20: tmp=padArray(flow.inter_arrival_times,0) tmp=str(tmp).strip('[]') f.write(pcap_file[:-5]+"_"+str(i)+","+tmp+"\n")
attrs = ['src','sport','dst','dport','proto','push_flag_ratio','average_len','average_payload_len','pkt_count','flow_average_inter_arrival_time','kolmogorov','shannon'] #TODO check if its possible to pack it again in the original class, that we are able to call .conversations() on this array myreader = PcapReader(pcap_file) #use iterator for pkt in myreader: if IP not in pkt: continue if pkt.proto not in (6,17): continue flow_tuple = reverse_flow_tuple = key_to_search = None flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP]) flow_key,network_stream = lookup_stream(flow_tuple,reverse_flow_tuple) if network_stream is None: if pkt.proto == 6: network_stream = TCPStream(pkt[IP]) else: network_stream = UDPStream(pkt[IP]) else: network_stream.add(pkt[IP]) flows[flow_key] = network_stream if output_type == "arff": print "@relation protocol_detection" print "@attribute protocol-name string" for attr in attrs: if attr in ['proto','pkt_count','average_len','flow_average_inter_arrival_time','push_flag_ratio','average_payload_len','kolmogorov','shannon']: print "@attribute",attr,"numeric" else:
for pkt in packets: #filtering IP,TCP an UDP if not IP in pkt: continue if not pkt[IP].proto in [1,6,17]: continue #XXX HACK get payload layered length on top of IP. the if above filter out protocols other than ICMP, TCP and ICMP #packets without payload shouldnt be counted, otherwise it is a problem for entropy and chitest if not len(pkt[IP].payload.payload): continue flow_tuple = reverse_flow_tuple = key_to_search = None flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP]) flow_key,stream = lookup_stream(flow_tuple,reverse_flow_tuple) if stream is None: #XXX: it doesnt belong to here if pkt[IP].proto == 6: stream = TCPStream(pkt[IP]) elif pkt[IP].proto == 17: stream = UDPStream(pkt[IP]) elif pkt[IP].proto == 1: stream = ICMPStream(pkt[IP]) else: pass else: stream.add(pkt[IP]) if stream: flows[flow_key] = stream print "flow,entropy_mean,entropy_sd,pvalue" for idx,flow in enumerate(flows.values()): #filter flows with less than 5 packets #it was just used in this python script. its not how we are doing
'src', 'sport', 'dst', 'dport', 'proto', 'push_flag_ratio', 'average_len', 'average_payload_len', 'pkt_count', 'flow_average_inter_arrival_time', 'kolmogorov', 'shannon' ] #reduce it to TCP #TODO check if its possible to pack it again in the original class, that we are able to call .conversations() on this array packets = [pkt for pkt in packets if IP in pkt for p in pkt if TCP in p] #here we are sure ALL PACKETS ARE TCP for pkt in packets: flow_tuple = reverse_flow_tuple = key_to_search = None flow_tuple, reverse_flow_tuple = create_flow_keys(pkt[IP]) flow_key, tcp_stream = lookup_stream(flow_tuple, reverse_flow_tuple) if tcp_stream is None: tcp_stream = TCPStream(pkt[IP]) else: tcp_stream.add(pkt[IP]) flows[flow_key] = tcp_stream if output_type == "arff": print("@relation protocol_detection") print("@attribute protocol-name", "{ssh,http,mysql,unknown}") for attr in attrs: if attr in [ 'pkt_count', 'average_len', 'flow_average_inter_arrival_time', 'push_flag_ratio', 'average_payload_len', 'kolmogorov', 'shannon' ]:
flows = {} attrs = ['src','sport','dst','dport','proto','flags','average_len','pkt_count','flow_average_inter_arrival_time'] #reduce it to TCP #TODO check if its possible to pack it again in the original class, that we are able to call .conversations() on this array packets = [ pkt for pkt in packets if IP in pkt for p in pkt if TCP in p ] #here we are sure ALL PACKETS ARE TCP for pkt in packets: flow_tuple = reverse_flow_tuple = key_to_search = None flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP]) flow_key,tcp_stream = lookup_stream(flow_tuple,reverse_flow_tuple) if tcp_stream is None: tcp_stream = TCPStream(pkt[IP]) else: tcp_stream.add(pkt[IP]) flows[flow_key] = tcp_stream if output_type == "arff": print "@relation protocol_detection" print "@attribute protocol-name","{ssh,http,mysql,unknown}" for attr in attrs: if attr in ['pkt_count','average_len','flow_average_inter_arrival_time']: print "@attribute",attr,"numeric" else: print "@attribute",attr,"string" print "@data"