def ReadPcap(file): try: # scapy.utils.PcapReader print('Reading: %s' % file) pcap = PcapReader(file) except: # yes, logging.exception should be used here, but it doesn't add any value print('Not a valid pcap file: %s' % file) raise # Build a list of streams that match the search regex num = 0 while pcap: try: packet = pcap.read_packet() if not 'IP' in packet: continue; srcip = packet['IP'].src dstip = packet['IP'].dst sport = packet[2].sport dport = packet[2].dport length = packet[2].len print srcip, dstip, sport, dport, length #print packet.summary(); #print binascii.hexlify(packet[2].payload['Raw'].load) num+=1 if num%1000 == 0: print num except TypeError: print 'exception' break pcap.close() del pcap
def scapy_io(f_in,f_out): f = PcapReader(f_in) o = PcapWriter(f_out) pkt = f.read_packet() while pkt is not None: o.write(pkt) pkt = f.read_packet() f.close() o.close()
def scapy_io(f_in, f_out): f = PcapReader(f_in) o = PcapWriter(f_out) pkt = f.read_packet() while pkt is not None: o.write(pkt) pkt = f.read_packet() f.close() o.close()
def extract_attributes(pcap_file, attr_list, filter_attributes=None, filter_=None): packets_reader = PcapReader(pcap_file) try: attr_name_list = map(format_attr_name, attr_list) while 1: packet = packets_reader.read_packet() if not packet: break attributes = dict() for i, attr in enumerate(attr_list): attributes[attr_name_list[i]] = extract_attr(packet, attr) ok = True if filter_attributes: for filter_attr in filter_attributes: if isinstance(filter_attr, types.StringTypes): filter_attr = format_attr_name(filter_attr) if filter_attr not in attributes or not attributes[ filter_attr]: ok = False break if ok and filter_: ok = filter_(attributes) if ok: yield attributes finally: packets_reader.close()
def foo(in_filename, out_filename): # open the input file for reading f = PcapReader(in_filename) # open the output file for writing o = PcapWriter(out_filename) # read the first packet from the input file p = f.read_packet() # while we haven't processed the last packet while p: layer = p.firstlayer() while not isinstance(layer, NoPayload): if (type(layer) is IPv6): new_layer = IP() del new_layer.ihl new_layer.ttl = layer.hlim new_layer.proto = layer.nh new_layer.src = ".".join(map(str, six2four(layer.src))) new_layer.dst = ".".join(map(str, six2four(layer.dst))) new_layer.add_payload(layer.payload) prev_layer = layer.underlayer del layer prev_layer.remove_payload() prev_layer.add_payload(new_layer) if type(prev_layer) is Ether: prev_layer.type = ETH_P_IP layer = new_layer if layer.default_fields.has_key('chksum'): del layer.chksum if layer.default_fields.has_key('len'): del layer.len # advance to the next layer layer = layer.payload # write the packet we just dissected into the output file o.write(p) # read the next packet p = f.read_packet() # close the input file f.close() # close the output file o.close()
if (not os.path.isdir(folder)): print "You must specify a valid folder" sys.exit(2) folder.rstrip("/") tcpState = {} dnsMapping = {} objectData = {} for file in os.listdir(folder): file = "%s/%s" % (folder, file) if ".pcap" in file: myreader = PcapReader(file) while True: packet = myreader.read_packet() if packet is None: break #packet.show() if not packet.haslayer("IP"): continue if packet.haslayer("TCP"): #packet.show() proto = "TCP" tcpflags = [x for x in packet.sprintf('%TCP.flags%')] if hashTCP(packet) in tcpState: print "Packet src to dst" src = packet[IP].src
def pcap2sessions_statistic_with_pcapreader_scapy(input_f): """ achieve the statistic of full sessions in pcap after removing uncompleted TCP sessions There is no process on UDP sessions Note: pkts_lst = rdpcap(input_f) # this will read all packets in memory at once. changed to : There are 2 classes: PcapReader - decodes all packets immediately RawPcapReader - does not decode packets Both of them have iterator interface (which I fixed in latest commit). So you can write in your case: with PcapReader('file.pcap') as pr: for p in pr: ...do something with a packet p... reference: https://github.com/phaethon/kamene/issues/7 flags in scapy flags = { 'F': 'FIN', 'S': 'SYN', 'R': 'RST', 'P': 'PSH', 'A': 'ACK', 'U': 'URG', 'E': 'ECE', 'C': 'CWR', } :param input_f: :return: """ st = time.time() print('process ... \'%s\'' % input_f, flush=True) # Step 1. read from pcap and do not return a list of packets try: # pkts_lst = rdpcap(input_f) # this will read all packets in memory at once, please don't use it directly. # input_f = '../pcaps_data/vpn_hangouts_audio2.pcap' # # input_f = '/home/kun/PycharmProjects/pcap_process_scapy/pcaps_data/aim_chat_3a.pcap' # myreader = PcapReader( input_f ) # iterator, please use it to process large file, such as more than 4 GB except MemoryError as me: print('memory error ', me) return -1 except FileNotFoundError as fnfe: print('file not found ', fnfe) return -2 except: print('other exceptions') return -10 # Step 2. achieve all the session in pcap. # input_data.stats pkts_stats = { 'non_Ether_IPv4_pkts': 0, 'non_IPv4_pkts': 0, 'non_TCP_UDP_pkts': 0, 'TCP_pkts': 0, 'UDP_pkts': 0 } cnt = 0 sess_dict = {} first_print_flg = True max_pkts_cnt = 1 while True: pkt = myreader.read_packet() if pkt is None: break if max_pkts_cnt >= 1000: print( '\'%s\' includes more than %d packets and in this time just process the first %d packets. Please split it firstly and do again.' % (input_f, max_pkts_cnt, max_pkts_cnt)) break max_pkts_cnt += 1 # step 1. parse "Ethernet" firstly if pkt.name == "Ethernet": if first_print_flg: first_print_flg = False print('\'%s\' encapsulated by "Ethernet Header" directly' % input_f) if pkt.payload.name.upper() in ['IP', 'IPV4']: if pkt.payload.payload.name.upper() in ["TCP", "UDP"]: if cnt == 0: print('packet[0] info: "%s:%d-%s:%d-%s"+%s' % (pkt.payload.src, pkt.payload.payload.sport, pkt.payload.dst, pkt.payload.payload.dport, pkt.payload.payload.name, pkt.payload.payload.payload)) five_tuple = pkt.payload.src + ':' + str( pkt.payload.payload.sport ) + '-' + pkt.payload.dst + ':' + str( pkt.payload.payload.dport ) + '-' + pkt.payload.payload.name.upper() # save_session_to_dict(k=five_tuple, v=pkt,sess_dict=sess_dict) save_session_to_dict( k=five_tuple, v=pkt.payload, sess_dict=sess_dict ) # only save Ethernet payload to sess_dict cnt += 1 # pkts_lst.append(pkt.payload) # only include "IPv4+IPv4_payload" if pkt.payload.payload.name.upper() == "TCP": pkts_stats['TCP_pkts'] += 1 else: pkts_stats['UDP_pkts'] += 1 else: pkts_stats['non_TCP_UDP_pkts'] += 1 # pkts_stats['IPv4_pkts'] += 1 else: pkts_stats['non_IPv4_pkts'] += 1 else: # step 2. if this pkt can not be recognized as "Ethernet", then try to parse it as (IP,IPv4) pkt = IP( pkt ) # without ethernet header, then try to parse it as (IP,IPv4) if first_print_flg: first_print_flg = False print( '\'%s\' encapsulated by "IP Header" directly, without "Ethernet Header"' % input_f) if pkt.name.upper() in ['IP', 'IPV4']: if pkt.payload.name.upper() in ["TCP", "UDP"]: if cnt == 0: print('packet[0] info: "%s:%d-%s:%d-%s"+%s' % (pkt.src, pkt.payload.sport, pkt.dst, pkt.payload.dport, pkt.payload.name, pkt.payload.payload)) five_tuple = pkt.src + ':' + str( pkt.payload.sport) + '-' + pkt.dst + ':' + str( pkt.payload.dport) + '-' + pkt.payload.name.upper( ) save_session_to_dict(k=five_tuple, v=pkt, sess_dict=sess_dict) cnt += 1 # pkts_lst.append(pkt.payload) # only include "IPv4+IPv4_payload" if pkt.payload.name.upper() == "TCP": pkts_stats['TCP_pkts'] += 1 else: pkts_stats['UDP_pkts'] += 1 else: pkts_stats['non_TCP_UDP_pkts'] += 1 # pkts_stats['IPv4_pkts'] += 1 else: pkts_stats['non_IPv4_pkts'] += 1 # print('unknown packets type!',pkt.name) pkts_stats['non_Ether_IPv4_pkts'] += 1 # input_data.stats # print('%s info is %s' % (input_f, pkts_lst)) print('packet info:"srcIP:srcPort-dstIP:dstPort-prtcl" + IP_payload') # Step 3. achieve all full session in sess_dict. full_sess_dict = {} for k, v in sess_dict.items( ): # all pkts in sess_dict without Ethernet headers and tails prtl = k.split('-')[-1] if prtl == "TCP": """ only save the first full session in v (maybe there are more than one full session in v) """ tcp_sess_list = [] full_session_flg = False i = -1 TCP_start_flg = False for pkt in v: i += 1 if len( v ) < 5: # tcp start (3 packets) + tcp finish (at least 2 packets) print('%s not full session, it only has %d packets' % (k, len(v))) break S = str(pkt.payload.fields['flags']) # step 1. discern the begin of TCP session. if 'S' in S: if 'A' not in S: # the first SYN packet in TCP session. # if flags[S] == "SYN": TCP_start_flg = True tcp_sess_list.append(pkt) continue # cannot ignore else: # the second SYN + ACK tcp_sess_list.append(pkt) continue # step 2. discern the transmitted input_data of TCP session if TCP_start_flg: # TCP input_data transform. for pkt_t in v[i:]: tcp_sess_list.append(pkt_t) F = str(pkt_t.payload.fields['flags']) if 'F' in F: # if flags[F]== "FIN": full_session_flg = True # step 3. discern the finish of TCP session. if 'S' in str(pkt_t.payload.fields['flags']) and len( tcp_sess_list) >= 5: # the second session print('the second session begins.') break else: # TCP_start_flg = False # print('TCP still does not begin...') pass if full_session_flg: full_sess_dict[k] = tcp_sess_list # print('tcp_sess_list:', k, len(tcp_sess_list)) break elif prtl == "UDP": # if len(v) < 2: # print('%s is not a UDP session.'%k) # else: # full_sess_dict[k] = v full_sess_dict[k] = v # do not do any process for UDP session. else: pass print('pkts_stats is ', pkts_stats) print( 'Number of sessions(TCP/UDP) in %s is %d, number of full session(TCP/UDP) is %d' % (input_f, len(sess_dict.keys()), len(full_sess_dict.keys()))) print('all_sess_dict:', count_protocls(sess_dict), '\nfull_sess_dict:', count_protocls(full_sess_dict)) all_stats_dict = {} all_stats_dict['pkts_stats'] = pkts_stats all_stats_dict['all_sess'] = count_protocls(sess_dict) all_stats_dict['full_sess'] = count_protocls(full_sess_dict) all_stats_dict['full_sess_size_distribution'] = count_sess_size( full_sess_dict) print(all_stats_dict) return all_stats_dict
def pcap2packets(input_file='.pcap or pcapng', retDict=True): """ "transform pcap to packets" :param input_file: pcap or pcapng :return: a list of packets. """ pkts_dict = {} pkts_list = [] try: myreader = PcapReader(input_file) except MemoryError as me: print('memory error ', me) return -1 except FileNotFoundError as fnfe: print('file not found ', fnfe) return -2 except: print('other exceptions') return -10 # data = rdpcap(input_file) # print('%s info is ' % data) ab_pkts = {'non_Ether_pkts': 0, 'non_IPv4_pkts': 0, 'non_TCP_UDP_pkts': 0} print('packet info:"srcIP:srcPort-dstIP:dstPort-prtcl" + IP_payload') cnt = 0 # for pkt in data: while True: pkt = myreader.read_packet() if pkt is None: break if pkt.name == "Ethernet": if pkt.payload.name.upper() in ['IP', 'IPV4']: if pkt.payload.payload.name.upper() in ["TCP", "UDP"]: # if pkt.payload.payload.name.upper() in ["TCP"]: src = pkt.payload.src sport = pkt.payload.payload.sport dst = pkt.payload.dst dport = pkt.payload.payload.dport if src > dst: src, dst = dst, src sport, dport = dport, sport if src in filterIP or dst in filterIP or sport in filterPort or dport in filterPort: continue name = pkt.payload.payload.name payload = pkt.payload.payload.payload.original.hex() curKey = (src, sport, dst, dport, name) # update dict if curKey not in pkts_dict: pkts_dict[curKey] = list() pkts_dict[curKey].append(payload) # update list pkts_list.append([curKey, payload]) cnt += 1 else: ab_pkts['non_TCP_UDP_pkts'] += 1 else: ab_pkts['non_IPv4_pkts'] += 1 # handle non Ether pkts elif pkt.name.upper() in ['IP', 'IPV4']: if pkt.payload.name.upper() in ["TCP", "UDP"]: # if pkt.payload.name.upper() in ["TCP"]: src = pkt.src sport = pkt.payload.sport dst = pkt.dst dport = pkt.payload.dport if src > dst: src, dst = dst, src sport, dport = dport, sport if src in filterIP or dst in filterIP or sport in filterPort or dport in filterPort: continue name = pkt.payload.name payload = pkt.payload.payload.original.hex() curKey = (src, sport, dst, dport, name) # update dict if curKey not in pkts_dict: pkts_dict[curKey] = list() pkts_dict[curKey].append(payload) # update list pkts_list.append([curKey, payload]) cnt += 1 else: ab_pkts['non_TCP_UDP_pkts'] += 1 else: ab_pkts['non_Ether_pkts'] += 1 print('Number of packets in %s is %d.' % (str(input_file), cnt)) print('Abnormal packets in %s is %s' % (str(input_file), np.array(ab_pkts.values()).sum())) if retDict: return pkts_dict print(len(pkts_list)) return pkts_list
ts.get_stock_basics() #看股票基本面 ts.get_sz50s() 获取一些分类信息,如上证50成份股 } 处理pcap{ from scapy.all import * p=rdpcap(path) #将整个包读取到内存 from scapy.all import PcapReader 或 from scapy.utils import * from scapy.route import * from scapy.layers.all import * packets=PcapReader(path) p=packets.read_packet()#一个包一个包的读取,节省内存 } 随机数random{ random() 返回0<=n<1之间的随机实数n;会生成一个随机的浮点数,范围是在0.0~1.0之间。 uniform()正好弥补了上面函数的不足,它可以设定浮点数的范围,一个是上限,一个是下限。 randint()随机生一个整数int类型,可以指定这个整数的范围,同样有上限和下限 choice(seq) 从序列seq中返回随机的元素;可以从任何序列,比如list列表中,选取一个随机的元素返回,可以用于字符串、列表、元组等 getrandbits(n) 以长整型形式返回n个随机位; shuffle(seq[, random]) 原地指定seq序列;将一个序列中的元素,随机打乱 sample(seq, n) 从序列seq中选择n个随机且独立的元素; } 计时{ import time
from scapy.all import PcapReader import numpy with open('input.csv', 'a') as f: f.write('时间戳,协议,源ip,源port,目的ip,目的port,真实标签,\n') s1 = PcapReader("ctu13_12-1.pcap") infected_ip = ['147.32.84.165', '147.32.84.191', '147.32.84.192'] num = 0 index = 0 try: while True: data = s1.read_packet() if data[1].name == 'ARP': src = data[1].psrc dst = data[1].pdst sport = None dport = None protocol = data[1].name time = data[0].time origin = data[0].original if time > 1313743991 and src in infected_ip: label = 1 else: label = 0 elif data[2].name == 'UDP': # 从IP层拿到ip地址 src = data[1].src dst = data[1].dst # 从UDP层拿到端口号
def MatchPcap(self, regex, file, negated=False, ports=[]): '''MatchPcap(regex, file, negated=False, ports=[]) Matches the given regex with the given pcap file. Returns a matchMap dictionary of sessions that matched. regex: Any ol' valid regex will do. file: A libpcap file. negated: If true, finds sessions that do NOT match the regex. ports: A list of ports to restrict the matching to. ''' try: regex = re.compile(r'(?s)%s' % regex) except: self.error('Invalid regular expression: %s' % regex) raise Exception('Invalid regular expression.') try: # scapy.utils.PcapReader self.debug('Reading: %s' % file) pcap = PcapReader(file) except: # yes, logging.exception should be used here, but it doesn't add any value self.error('Not a valid pcap file: %s' % file) raise # matchMap format: # {3: {'proto': 6, 'host1': ('1.2.3.4',1024), 'host2': ('9.8.7.6',80)}} matchMap = {} newid = 1 # Build a list of streams that match the search regex while pcap: try: packet = pcap.read_packet() match = {} matchedStream = False # Skip if the session's ports aren't in the allowed port list (-p). try: if ports and not (packet[2].sport in ports or packet[2].dport in ports): continue except AttributeError: # Continue; weren't any ports at all (ip.proto not in (6,17)) pass except IndexError: # Wasn't even IP, skip it pass # Perform match try: rawpacket = packet[3].build() # for some reason, re.match doesn't work, yet re.findall does. if regex.findall(rawpacket): matchedStream = True #most verbose: self.debug('matched\n%s' % str(rawpacket)) if matchedStream or negated: # Run the list backwards in hope of matching early rather than matching at the end of the entire list. ids = matchMap.keys() ids.reverse() unknownStream = True for id in ids: try: # Assuming we'll never see a packet with same src and dst # TCP,UDP layers referred to by index offset for code simplicity # would do this as one if statement, but seperating it helps exit early and save cpu cycles if (packet['IP'].src,packet[2].sport) in (matchMap[id]['host1'],matchMap[id]['host2']): if (packet['IP'].dst,packet[2].dport) in (matchMap[id]['host1'],matchMap[id]['host2']): unknownStream = False # This avoids source port reuse problems, causing session collisions # unknownStream is True if its a known session yet tcp syn flag is set. if packet['IP'].proto == 6 and packet['TCP'].sprintf('%flags%') == 'S': unknownStream = True break except AttributeError: # most likely the session isn't tcp/udp so scapy throws AttributeError if no sport/dport exists. Try without it instead. if matchMap[id]['proto'] == packet['IP'].proto: if packet['IP'].src in (matchMap[id]['host1'], matchMap[id]['host2']): if packet['IP'].dst in (matchMap[id]['host1'], matchMap[id]['host2']): unknownStream = False break # if its not negated and its a newly matched stream, OR negated and an unknown, add it to matchMap. if its negated and matched later, it gets deleted before the end if (matchedStream and unknownStream and not negated) or (negated and unknownStream and not matchedStream): matchMap[newid] = {} # Personal preference of mine: printing matches here rather than when the function finishes gives the user a feeling things are happening, rather than get the messages all at once at the end of the call. # This is doubly so when dealing with massive 1g+ pcap files try: matchMap[newid] = {'proto': packet['IP'].proto, 'host1': (packet['IP'].src,packet[2].sport), 'host2': (packet['IP'].dst,packet[2].dport)} self.info('Match #%d: Proto %d, IPs %s:%d, %s:%d' % (newid,matchMap[newid]['proto'],matchMap[newid]['host1'][0],matchMap[newid]['host1'][1],matchMap[newid]['host2'][0],matchMap[newid]['host2'][1])) except AttributeError: matchMap[newid] = {'proto': packet['IP'].proto, 'host1': packet['IP'].src, 'host2': packet['IP'].dst} self.info('Match #%d: Proto %d, IPs %s, %s' % (newid,matchMap[newid]['proto'],matchMap[newid]['host1'],matchMap[newid]['host2'])) newid += 1 elif matchedStream and negated and not unknownStream: # Flag the session as matching regex to NOT keep. # If deleted now, it would just come back from the next related packet matchMap[id]['delete'] = True except IndexError: pass # no raw layer, nothing to search except TypeError: break if negated: for id in matchMap.keys(): try: if matchMap[id]['delete']: del matchMap[id] self.info('Match #%d matched, removed from result.' % id) except KeyError: pass # rebuilding the sequential id's here might get confusing with the prior-printed messages. probably best to avoid it. pcap.close() del pcap return matchMap
def WritePcap(self, matchMap, file, outputFilename=None): '''WritePcap(matchMap, file, outputFilename=None Writes the matched pcap sessions in matchMap found in file to separate pcap files. matchMap: The output from MatchPcap. file: The pcap file you matched on previously. outputFilename: Allows you to specify the prefix on the output pcaps. ''' try: if not matchMap.keys(): self.debug('matchMap is empty! No matches from greppcap?') raise except: self.debug('Not a valid matchMap.') raise try: pcap = PcapReader(file) if not outputFilename: # There's probably some python fu way to do this. I have the regex fu. try: filename = re.findall(r'^(?is)[./]?(?:[^/]+/)*([^/]+)(?:\.[^\./]+)$', file)[0] except: # base filename was too crazy to figure out, go with a default one filename = 'greppcap' else: filename = outputFilename except: self.error('Not a valid pcap file: %s' % file) raise self.debug('matchMap: %s' % matchMap) self.debug('Writing pcaps...') # Open file handle on a pcap and append the packet to the right pcap. while pcap: try: packet = pcap.read_packet() writePacket = False for id in matchMap.keys(): try: if (packet['IP'].src,packet[2].sport) in (matchMap[id]['host1'],matchMap[id]['host2']): if (packet['IP'].dst,packet[2].dport) in (matchMap[id]['host1'],matchMap[id]['host2']): writePacket = True except AttributeError: if matchMap[id]['proto'] == packet['IP'].proto: if packet['IP'].src in (matchMap[id]['host1'], matchMap[id]['host2']): if packet['IP'].dst in (matchMap[id]['host1'], matchMap[id]['host2']): writePacket = True except IndexError: continue # not IP if writePacket: # Create/append the packet to a pcap file and close the handler. # Doing it this way avoids hitting any open file handler limit (resource.getrlimit(resource.RLIMIT_NOFILE)) try: wrpcap('%s_match%d.pcap' % (filename,id),packet,append=True,sync=True) except IOError as e: self.error('OS limitation prevented completion of %s_match%d.pcap. Error: %s' % (filename,id,e)) break except TypeError: break # end of pcap # Now nicely announce the completion of pcaps. for id in matchMap.keys(): matchMap[id]['pcap'] = '%s_match%d.pcap' % (filename,id) try: self.info('Wrote IP proto %d %s:%d <> %s:%d into %s' % (matchMap[id]['proto'],matchMap[id]['host1'][0],matchMap[id]['host1'][1],matchMap[id]['host2'][0],matchMap[id]['host2'][1],matchMap[id]['pcap'])) except: self.info('Wrote IP proto %d %s <> %s into %s' % (matchMap[id]['proto'],matchMap[id]['host1'],matchMap[id]['host2'],matchMap[id]['pcap'])) return matchMap