def main(pcap_file):
    packets = rdpcap("./raw_pcap/data2/" + pcap_file)
    packets = [pkt for pkt in packets if IP in pkt for p in pkt if TCP in p]
    #here we are sure ALL PACKETS ARE TCP
    for pkt in packets:
        flow_tuple = reverse_flow_tuple = key_to_search = None
        flow_tuple, reverse_flow_tuple = create_flow_keys(pkt[IP])
        flow_key, tcp_stream = lookup_stream(flow_tuple, reverse_flow_tuple)
        if tcp_stream is None:
            tcp_stream = TCPStream(pkt[IP])
        else:
            tcp_stream.add(pkt[IP])

        flows[flow_key] = tcp_stream

    print(','.join(attrs))

    with open('./output/' + pcap_file[:-8] + '.csv', 'w+') as f:
        f.write('id,' + ','.join(attrs) + '\n')
        for (flow, i) in zip(flows.values(), range(len(flows))):
            if flow.pkt_count >= 20:

                tmp = (
                    "%s,%s,%s,%s,%s,%s,%.3f,%s,%s,%s,%s,%s,%s" %
                    (proto_name(flow.sport, flow.dport), flow.src, flow.sport,
                     flow.dst, flow.dport, flow.proto, flow.push_flag_ratio(),
                     flow.avrg_len(), flow.avrg_payload_len(), flow.pkt_count,
                     flow.avrg_inter_arrival_time(), flow.kolmogorov(),
                     flow.shannon()))
                with open("./payload/" + pcap_file + "_" + str(i) + '.txt',
                          'w+') as ff:
                    ff.write(hexdump(flow.payload, dump=True))
                f.write(pcap_file[:-5] + "_" + str(i) + "," + tmp + "\n")

    names = os.listdir("./payload")
    for name in names:
        lines = ""
        with open("./payload/" + name, 'r') as f:
            lines = f.readlines()
        with open("./payload2/" + name, 'w+') as ff:
            for line in lines:
                ff.write(line[6:38] + "\n")
def get_data(path):
    packets = rdpcap(path)
    flows = dict()
    packets = [pkt for pkt in packets if IP in pkt for p in pkt if TCP in p]
    for pkt in packets:
        flow_tuple = reverse_flow_tuple = key_to_search = None
        flow_tuple, reverse_flow_tuple = create_flow_keys(pkt[IP])

        if flow_tuple in flows.keys():
            flow_key, tcp_stream = flow_tuple, flows[flow_tuple]
        elif reverse_flow_tuple in flows.keys():
            flow_key, tcp_stream = reverse_flow_tuple, flows[
                reverse_flow_tuple]
        else:
            flow_key, tcp_stream = flow_tuple, None

        if tcp_stream is None:
            tcp_stream = TCPStream(pkt[IP])
        else:
            tcp_stream.add(pkt[IP])
        flows[flow_key] = tcp_stream
    return flows
Exemple #3
0
def parse(pcap_file):
    packets=rdpcap(config.HTTPS_CONFIG["pcap_path"]+pcap_file)
    packets = [ pkt for pkt in packets if IP in pkt for p in pkt if TCP in p ]
    #here we are sure ALL PACKETS ARE TCP
    for pkt in packets:
        flow_tuple = reverse_flow_tuple = key_to_search = None
        flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP])
        #混合流
        # flow_key,tcp_stream = lookup_stream(flow_tuple,reverse_flow_tuple)
        #单向流
        flow_key,tcp_stream=lookup_stream_single(flow_tuple)
        if tcp_stream is None:
            tcp_stream = TCPStream(pkt[IP])
        else:
            tcp_stream.add(pkt[IP])

        flows[flow_key] = tcp_stream
    a=config.HTTPS_CONFIG["total_path"]+pcap_file[:-8]+'.csv'
    b=config.HTTPS_CONFIG["record_type_total"]+pcap_file[:-8]+'_record_type.csv'
    c=config.HTTPS_CONFIG["packet_length_total"]+pcap_file[:-8]+'_packet_length.csv'
    d=config.HTTPS_CONFIG["time_interval_total"]+pcap_file[:-8]+'_time_interval.csv'
    e=config.HTTPS_CONFIG["payload_total"]+pcap_file[:-8]+'_payload.csv'
Exemple #4
0
def parse(pcap_file):
    packets=rdpcap(config.HTTPS_CONFIG["pcap_path"]+pcap_file)
    packets = [ pkt for pkt in packets if IP in pkt for p in pkt if TCP in p ]
    #here we are sure ALL PACKETS ARE TCP
    for pkt in packets:
        flow_tuple = reverse_flow_tuple = key_to_search = None
        flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP])
        flow_key,tcp_stream = lookup_stream(flow_tuple,reverse_flow_tuple)
        if tcp_stream is None:
            tcp_stream = TCPStream(pkt[IP])
        else:
            tcp_stream.add(pkt[IP])

        flows[flow_key] = tcp_stream


    with open(config.HTTPS_CONFIG["time_interval_total"]+pcap_file[:-8]+'_time_interval.csv','a')as f:
        for (flow,i) in zip(flows.values(),range(len(flows))):
            # 只有长度大于20的流才会保留
            if flow.pkt_count>=20:
                tmp=padArray(flow.inter_arrival_times,0)
                tmp=str(tmp).strip('[]')
                f.write(pcap_file[:-5]+"_"+str(i)+","+tmp+"\n")
Exemple #5
0
attrs = ['src','sport','dst','dport','proto','push_flag_ratio','average_len','average_payload_len','pkt_count','flow_average_inter_arrival_time','kolmogorov','shannon']

#TODO check if its possible to pack it again in the original class, that we are able to call .conversations() on this array

myreader = PcapReader(pcap_file)
#use iterator
for pkt in myreader:
     if IP not in pkt: continue
     if pkt.proto not in (6,17): continue
     flow_tuple = reverse_flow_tuple = key_to_search = None
     flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP])
     flow_key,network_stream = lookup_stream(flow_tuple,reverse_flow_tuple)

     if network_stream is None:
         if pkt.proto == 6:
             network_stream = TCPStream(pkt[IP])
         else:
             network_stream = UDPStream(pkt[IP])
     else:
       network_stream.add(pkt[IP])

     flows[flow_key] = network_stream

if output_type == "arff":
    print "@relation protocol_detection"
    print "@attribute protocol-name string"

    for attr in attrs:
        if attr in ['proto','pkt_count','average_len','flow_average_inter_arrival_time','push_flag_ratio','average_payload_len','kolmogorov','shannon']:
            print "@attribute",attr,"numeric"
        else:
Exemple #6
0
for pkt in packets:
         #filtering IP,TCP an UDP
         if not IP in pkt: continue
         if not pkt[IP].proto in [1,6,17]: continue
         #XXX HACK get payload layered length on top of IP. the if above filter out protocols other than ICMP, TCP and ICMP
         #packets without payload shouldnt be counted, otherwise it is a problem for entropy and chitest
         if not len(pkt[IP].payload.payload): continue
	 flow_tuple = reverse_flow_tuple = key_to_search = None
	 flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP])
	 flow_key,stream = lookup_stream(flow_tuple,reverse_flow_tuple)

	 if stream is None:
           #XXX: it doesnt belong to here
           if pkt[IP].proto == 6:
	       stream = TCPStream(pkt[IP])
           elif pkt[IP].proto == 17:
               stream = UDPStream(pkt[IP])
           elif pkt[IP].proto == 1:
               stream = ICMPStream(pkt[IP])
           else:
               pass
	 else:
	   stream.add(pkt[IP])

         if stream: flows[flow_key] = stream

print "flow,entropy_mean,entropy_sd,pvalue"
for idx,flow in enumerate(flows.values()):
  #filter flows with less than 5 packets
  #it was just used in this python script. its not how we are doing 
Exemple #7
0
    'src', 'sport', 'dst', 'dport', 'proto', 'push_flag_ratio', 'average_len',
    'average_payload_len', 'pkt_count', 'flow_average_inter_arrival_time',
    'kolmogorov', 'shannon'
]

#reduce it to TCP
#TODO check if its possible to pack it again in the original class, that we are able to call .conversations() on this array
packets = [pkt for pkt in packets if IP in pkt for p in pkt if TCP in p]
#here we are sure ALL PACKETS ARE TCP
for pkt in packets:
    flow_tuple = reverse_flow_tuple = key_to_search = None
    flow_tuple, reverse_flow_tuple = create_flow_keys(pkt[IP])
    flow_key, tcp_stream = lookup_stream(flow_tuple, reverse_flow_tuple)

    if tcp_stream is None:
        tcp_stream = TCPStream(pkt[IP])
    else:
        tcp_stream.add(pkt[IP])

    flows[flow_key] = tcp_stream

if output_type == "arff":
    print("@relation protocol_detection")
    print("@attribute protocol-name", "{ssh,http,mysql,unknown}")

    for attr in attrs:
        if attr in [
                'pkt_count', 'average_len', 'flow_average_inter_arrival_time',
                'push_flag_ratio', 'average_payload_len', 'kolmogorov',
                'shannon'
        ]:
Exemple #8
0
flows = {}

attrs = ['src','sport','dst','dport','proto','flags','average_len','pkt_count','flow_average_inter_arrival_time']
#reduce it to TCP
#TODO check if its possible to pack it again in the original class, that we are able to call .conversations() on this array
packets = [ pkt for pkt in packets if IP in pkt for p in pkt if TCP in p ]

#here we are sure ALL PACKETS ARE TCP
for pkt in packets:
	 flow_tuple = reverse_flow_tuple = key_to_search = None
	 flow_tuple,reverse_flow_tuple = create_flow_keys(pkt[IP])
	 flow_key,tcp_stream = lookup_stream(flow_tuple,reverse_flow_tuple)

	 if tcp_stream is None:
	   tcp_stream = TCPStream(pkt[IP])
	 else:
	   tcp_stream.add(pkt[IP])

	 flows[flow_key] = tcp_stream

if output_type == "arff":
	print "@relation protocol_detection"
	print "@attribute protocol-name","{ssh,http,mysql,unknown}"

	for attr in attrs:
		if attr in ['pkt_count','average_len','flow_average_inter_arrival_time']:
			print "@attribute",attr,"numeric"
		else:
			print "@attribute",attr,"string"
	print "@data"