def lookupIP(ip, dxlif): try: v = IP(ip).version() except: return if v == 4 or v == 6: try: print("Looking up: %s" % ip) d = pynfdump.Dumper("/data/nfsen/profiles-data", profile='live', sources=['local']) d.set_where(start=time.strftime("%Y-%m-%d"), end=time.strftime("%Y-%m-%d %H:%M")) records = d.search("src ip %s" % ip, aggregate=['dstip']) tgt = [] for r in records: if r['dstip'] not in tgt: tgt.append(r['dstip']) if len(tgt) > 0: for t in tgt: evtstr = '/feed/compromised/ipv' + str(IP(t).version()) evt = Event(evtstr) evt.payload = str(t).encode() dxlif.send_event(evt) print("Event emitted topic: %s content: %s" % (evtstr, str(t))) except Exception as e: print("Exception while processing %s: %s" % (ip, str(e))) return
def search(ip_list,datatype): str1 = "" for ip in ip_list: str1 += (" host "+str(ip)+" or") str1 = str1[:-2] str2 = " and({0})".format(str1) query = SearchTypeDict[datatype] + str2 timedelta = datetime.timedelta(minutes=30) timeflag = TimeBegin output_dict = {} for ip in ip_list: output_dict[ip] = open('../data/measurement_data/icmpresult/{addr}.csv'.format(addr=ip),'w') while 1: #This loop is to read the files one after another to fetch the result, aggregation is done for every read dirname = timeflag.strftime('%Y%m%d%H%M') timeflag += timedelta if timeflag>TimeEnd:break d = pynfdump.Dumper(DataPath,profile=Nfprofile,sources=[Nfsources+dirname]) print dirname for key in output_dict: output_dict[key].write("##{0}\n".format(dirname)) d.set_where(start=None, end=None) records = d.search(query) ippair_dict = data_aggregation(datatype,records) fstr_all = flow2str(ippair_dict,datatype) for ftuple in fstr_all: output_dict[str(ftuple[0])].write(ftuple[1]+"\n") for key in output_dict: output_dict[key].close()
def main(): #d = pynfdump.Dumper('/data2/datasource/',profile='16/',sources=['nfcapd.201711161000','nfcapd.201711161005']) d = pynfdump.Dumper() # d.set_where(start=None,end=None,filename='/data2/datasource/16/nfcapd.201711161000') dstring = '/data2/datasource/16/' for i in range(6): nstr = str(i * 5) if len(nstr) < 2: nstr = '0' + nstr dstring += 'nfcapd.2017111610' + nstr + ':' dstring = dstring[:-1] d.set_where( start=None, end=None, dirfiles='/data2/datasource/16/nfcapd.201711161000:nfcapd.201711161030' ) # d.set_where(start=None,end=None,filename='/data2/datasource/16/nfcapd.201711161000') records = d.search('proto icmp and host 166.111.8.241') timedict = {-1: 0} for r in records: first = r['first'] last = r['last'] msec_first = r['msec_first'] msec_last = r['msec_last'] srcip = str(r['srcip']) dstip = str(r['dstip']) srcport = r['srcport'] dstport = r['dstport'] packets = r['packets'] tbytes = r['bytes'] srcip_prefix = srcip.split('.')[0] + '.' + srcip.split('.')[1] first_time = first + datetime.timedelta(microseconds=msec_first) last_time = last + datetime.timedelta(microseconds=msec_last) tt = last_time - first_time if tt < datetime.timedelta(0, 0, 0): timedict[-1] += 1 continue ts = int(tt.total_seconds()) if ts not in timedict: timedict[ts] = 1 else: timedict[ts] += 1 for key in timedict: print str(key) + '\t' + str(timedict[key])
#!/bin/python import sys, os import logging import pynfdump import datetime InnerIP_list = [ '166.111', '59.66', '101.5', '101.6', '183.172', '183.173', '118.229', '202.112', '202.38', '106.120' ] datain = pynfdump.Dumper( '../../../', profile='datasource/16', sources=['nfcapd.201711161000', 'nfcapd.201711161005']) #datain.set_where(start=None,end=None,filename='../datasource/flowdata/nfcapd.201705171805') datain.set_where(start=None, end=None) query = "proto icmp and host 166.111.8.241" record = datain.search(query) for r in record: af = r['af'] first = r['first'] last = r['last'] msec_first = r['msec_first'] msec_last = r['msec_last'] proto = r['prot'] srcip = str(r['srcip']) dstip = str(r['dstip']) srcport = r['srcport']
def search(ip_list): middle_print = [ False, False, False, False ] #Prior one is middlefile, latter one is middlefile without those without IN or OUT middle_process = True if middle_print[0]: fout0 = open('middlefile_total.txt', 'w') if middle_print[1]: fout1 = open('middlefile_only_O', 'w') if middle_print[2]: fout2 = open('middlefile_not_equal', 'w') if middle_print[3]: fout3 = open('middlefile_equal', 'w') logger = logging.getLogger() hdlr = logging.StreamHandler() logger.addHandler(hdlr) logger.setLevel(logging.WARNING) InnerIP_list = [ '166.111', '59.66', '101.5', '101.6', '183.172', '183.173', '118.229', '202.38', '202.112' ] ipdict_dict = {} datain = pynfdump.Dumper() datain.set_where( start=None, end=None, filename='/lrjapps/netflowdata/datasource/flowdata/nfcapd.201705171800' ) query = "" for ip in ip_list: query = query + ' or host ' + ip query = 'proto icmp and (' + query[4:] + ')' logger.debug(query) records = datain.search(query) for r in records: af = r['af'] first = r['first'] last = r['last'] msec_first = r['msec_first'] msec_last = r['msec_last'] proto = r['prot'] srcip = str(r['srcip']) srcport = r['srcport'] dstip = str(r['dstip']) dstport = r['dstport'] srcas = r['srcas'] dstas = r['dstas'] packets = r['packets'] tbytes = r['bytes'] srcip_prefix = srcip.split('.')[0] + '.' + srcip.split('.')[1] dstip_prefix = dstip.split('.')[0] + '.' + dstip.split('.')[1] if srcip in ip_list and dstip_prefix in InnerIP_list: flag = 'IN' elif dstip in ip_list and srcip_prefix in InnerIP_list: flag = 'OUT' elif srcip not in ip_list and dstip not in ip_list: logging.error(srcip + ' ' + dscip + ' UNEXCEPTED source/destination address') continue else: #This means srcip or dstip is in ip_list, but dstip/srcip is located out of campus continue if flag == 'IN': key = (dstip, srcip) #Tsinghua IP is [0] and target IP is [1] else: key = (srcip, dstip) if key not in ipdict_dict: ipdict_dict[key] = {'IN': [], 'OUT': []} if flag == 'IN': ipdict_dict[key]['IN'].append( Flow.IcmpFlow(srcip, srcport, dstip, dstport, first, msec_first, last, msec_last, packets, tbytes, flag)) else: ipdict_dict[key]['OUT'].append( Flow.IcmpFlow(srcip, srcport, dstip, dstport, first, msec_first, last, msec_last, packets, tbytes, flag)) if middle_print[0]: for key in ipdict_dict: prtln = key[0] + ' => ' + key[1] + '\n' + '\tIN:\n' fout0.write(prtln) for item in ipdict_dict[key]['IN']: fout0.write('\t\t' + item.display_string() + '\n') fout0.write('\tOUT:\n') for item in ipdict_dict[key]['OUT']: fout0.write('\t\t' + item.display_string() + '\n') fout0.write('\n') fout0.close() if middle_print[1]: for key in ipdict_dict: if len(ipdict_dict[key]['IN']) == 0: prtln = key[0] + ' => ' + key[1] + '\n' + '\tOUT:\n' fg = 0 for item in ipdict_dict[key]['OUT']: if item.display_dict()['type'] == 'REQUEST_OUT': if fg == 0: fout1.write(prtln) fg = 1 fout1.write('\t\t' + item.display_string() + '\n') fout1.write('\n') fout1.close() if middle_print[2]: for key in ipdict_dict: if len(ipdict_dict[key]['IN']) == len( ipdict_dict[key]['OUT']) or len( ipdict_dict[key]['IN']) == 0: continue prtln = key[0] + ' => ' + key[1] + '\n' + '\tIN:\n' fout2.write(prtln) for item in ipdict_dict[key]['IN']: fout2.write('\t\t' + item.display_string() + '\n') fout2.write('\tOUT:\n') for item in ipdict_dict[key]['OUT']: fout2.write('\t\t' + item.display_string() + '\n') fout2.write('\n') fout2.close() if middle_print[3]: for key in ipdict_dict: if len(ipdict_dict[key]['IN']) != len(ipdict_dict[key]['OUT']): continue prtln = key[0] + ' => ' + key[1] + '\n' + '\tIN:\n' fout3.write(prtln) for item in ipdict_dict[key]['IN']: fout3.write('\t\t' + item.display_string() + '\n') fout3.write('\tOUT:\n') for item in ipdict_dict[key]['OUT']: fout3.write('\t\t' + item.display_string() + '\n') fout3.write('\n') fout3.close() if middle_process: middle_process_f(ipdict_dict)
def main(): print_detail = False print_json = True for iti in range(23): if iti < 10: continue for jti in range(2): if jti == 0: hour = iti minute = 0 shour = str(hour) ehour = str(hour) sminute = '00' eminute = '30' start = str(hour) + ':00:00' end = str(hour) + ':30:00' start_time = datetime.datetime(2017, 11, 16, hour, 0, 0) stop_time = datetime.datetime(2017, 11, 16, hour, 30, 0) start_time_server = datetime.datetime(2017, 11, 16, hour - 1, 58, 0) stop_time_server = datetime.datetime(2017, 11, 16, hour, 28, 0) else: hour = iti minute = 30 shour = str(hour) ehour = str(hour + 1) sminute = '30' eminute = '00' start = str(hour) + ':30:00' end = str(hour + 1) + ':00:00' start_time = datetime.datetime(2017, 11, 16, hour, 30, 0) stop_time = datetime.datetime(2017, 11, 16, hour + 1, 0, 0) start_time_server = datetime.datetime(2017, 11, 16, hour, 28, 0) stop_time_server = datetime.datetime(2017, 11, 16, hour, 58, 0) fout_string = 'out.' + start #d = pynfdump.Dumper('/data2/datasource/',profile='16/',sources=['nfcapd.201711161000','nfcapd.201711161005']) d = pynfdump.Dumper() # ponce = 0 # d.set_where(start=None,end=None,filename='/data2/datasource/16/nfcapd.201711161000') # dstring = '/data2/datasource/16/' # for i in range(6): # nstr = str(i*5) # if len(nstr) < 2: # nstr = '0' + nstr # dstring += 'nfcapd.2017111610' + nstr + ':' # # dstring = dstring[:-1] dfiles = '/data2/datasource/16/nfcapd.20171116' + shour + sminute + ':nfcapd.20171116' + ehour + eminute d.set_where(start=None, end=None, dirfiles=dfiles) # d.set_where(start=None,end=None,filename='/data2/datasource/16/nfcapd.201711161000') records = d.search('proto icmp and host 166.111.8.241') fin = open('/data2/datasource/ICMP/time/' + start + '.txt', 'r') ip_dict = {} agg_dict = {} print dfiles, start for line in fin.readlines(): items = line.split('#') ip = items[0].strip() string = items[1].split(',') if ip not in ip_dict: ip_dict[ip] = { 'IN': [], 'OUT': [], 'ICMP': [string[1], string[2], string[3].strip()] } agg_dict[ip]= {\ 'IN':[],\ 'OUT':[],\ 'ICMP':[string[1],string[2],string[3].strip()],\ 'time_itv':-3,\ 'flow_time_error_in':[0,0],\ 'flow_time_error_out':[0,0],\ 'first_time_error':False,\ 'last_time_error':False,\ 'itvf':-1,\ 'itvl':-1} # -3: initialized but not assigned; -2 host unreachable -1: first_time_error(first time out is later than in) and last_time_error fin.close() for r in records: first = r['first'] last = r['last'] msec_first = r['msec_first'] msec_last = r['msec_last'] srcip = str(r['srcip']) dstip = str(r['dstip']) srcport = r['srcport'] dstport = r['dstport'] packets = r['packets'] tbytes = r['bytes'] srcip_prefix = srcip.split('.')[0] + '.' + srcip.split('.')[1] dstip_prefix = dstip.split('.')[0] + '.' + dstip.split('.')[1] # first_time = first + datetime.timedelta(microseconds = msec_first) # last_time = last + datetime.timedelta(microseconds = msec_last) if dstip in ip_dict: key = dstip flag = 'OUT' elif srcip in ip_dict: key = srcip flag = 'IN' else: continue # prtstr = srcip + ' => ' + dstip + ':' + str(dstport) + ' '+ first_time.strftime("%Y-%m-%d %H:%M:%S.%f") + ' ' + last_time.strftime("%Y-%m-%d %H:%M:%S.%f")+ ' ' + str(packets) + ' ' + str(tbytes) ip_dict[key][flag].append( Flow.IcmpFlow(srcip, srcport, dstip, dstport, first, msec_first, last, msec_last, packets, tbytes, flag)) for key in ip_dict: if len(ip_dict[key]['IN']) == 0: agg_dict[key]['time_itv'] = -2 continue if len(ip_dict[key]['OUT']) == 0: agg_dict[key]['time_itv'] = -2 continue #Here to gathering the flows ip = key srcip, srcport = ip_dict[key]['OUT'][0].get_srcip() dstip, dstport = ip_dict[key]['OUT'][0].get_dstip() lgin = len(ip_dict[key]['IN']) lgout = len(ip_dict[key]['OUT']) real_flow_dict = {'IN': [], 'OUT': []} real_flow_list = [] real_flow_dict['IN'] = [ Flow.FlowGroup(srcip, srcport, dstip, dstport) ] real_flow_dict['OUT'] = [ Flow.FlowGroup(srcip, srcport, dstip, dstport) ] i = 0 for j in range(lgout): agg_dict[key]['flow_time_error_out'][1] += 1 if ip_dict[key]['OUT'][j].get_first_time( ) > ip_dict[key]['OUT'][j].get_last_time(): agg_dict[key]['flow_time_error_out'][0] += 1 if real_flow_dict['OUT'][i].add_flow( ip_dict[key]['OUT'][j]): pass else: i += 1 real_flow_dict['OUT'].append( Flow.FlowGroup(srcip, srcport, dstip, dstport)) real_flow_dict['OUT'][i].add_flow( ip_dict[key]['OUT'][j]) # except TypeError: # print key,ip_dict[key] # print j,ip_dict[key]['OUT'][j] # exit() # for item in ip_dict[key]: # print item.print_string() # print # for item in real_flow_dict[key]: # print item.display_string(): # exit() i = 0 for j in range(lgin): agg_dict[key]['flow_time_error_in'][1] += 1 if ip_dict[key]['IN'][j].get_first_time( ) > ip_dict[key]['IN'][j].get_last_time(): agg_dict[key]['flow_time_error_in'][0] += 1 if real_flow_dict['IN'][i].add_flow(ip_dict[key]['IN'][j]): pass else: i += 1 real_flow_dict['IN'].append( Flow.FlowGroup(srcip, srcport, dstip, dstport)) real_flow_dict['IN'][i].add_flow(ip_dict[key]['IN'][j]) for item in real_flow_dict['OUT']: agg_dict[key]['OUT'].append(item) for item in real_flow_dict['IN']: agg_dict[key]['IN'].append(item) lenin = len(real_flow_dict['IN']) for item in real_flow_dict['OUT']: for i in range(lenin): if datetime.timedelta(0, -10, 0) < item.get_first_time( ) - real_flow_dict['IN'][i].get_first_time( ) < datetime.timedelta(0, 10, 0): real_flow_list.append({ 'IN': real_flow_dict['IN'][i], 'OUT': item }) break if i == range(lenin): pass for item in real_flow_list: if agg_dict[key]['time_itv'] in [-2, -4]: break elif agg_dict[key]['time_itv'] != -3: if item['OUT'].get_first_time() > stop_time_server: break else: agg_dict[key]['time_itv'] = -4 time_itvf = item['IN'].get_first_time( ) - item['OUT'].get_first_time() time_itvl = item['IN'].get_last_time( ) - item['OUT'].get_last_time() dtzero = datetime.timedelta(0, 0, 0) if time_itvf > dtzero: agg_dict[key][ 'itvf'] = time_itvf.seconds * 1000 + time_itvf.microseconds / 1000 else: agg_dict[key]['itvf'] = -1 if time_itvl > dtzero: agg_dict[key][ 'itvl'] = time_itvl.seconds * 1000 + time_itvl.microseconds / 1000 else: agg_dict[key]['itvl'] = -1 if time_itvf < dtzero: agg_dict[key]['first_time_error'] = True if time_itvl < dtzero: agg_dict[key]['time_itv'] = -1 agg_dict[key]['last_time_error'] = True else: agg_dict[key]['time_itv'] = time_itvl else: agg_dict[key]['time_itv'] = time_itvf if time_itvl < dtzero: agg_dict[key]['last_time_error'] = True elif time_itvf > datetime.timedelta(0, 0, 100000): if time_itvl < datetime.timedelta(0, 0, 100000): agg_dict[key]['time_itv'] = time_itvl elif datetime.timedelta( 0, 0, -50000 ) < time_itvl - time_itvf < datetime.timedelta( 0, 0, 50000): agg_dict[key]['time_itv'] = (time_itvf + time_itvl) / 2 if print_detail: p_detail(ip_dict, agg_dict, fout_string) if print_json: p_json(ip_dict, agg_dict, fout_string)
import sys, os import logging import pynfdump import datetime logging.basicConfig( level=logging.DEBUG, format= '%(asctime)s %(filename)s [line:%(lineno)d] %(levelname)s %(message)s') InnerIP_list = [ '166.111', '59.66', '101.5', '101.6', '183.172', '183.173', '118.229', '202.112', '202.38', '106.120' ] datain = pynfdump.Dumper('../', profile='datasource/flowdata', sources='flowdata/') #datain.set_where(start=None,end=None,filename='../datasource/flowdata/nfcapd.201705171805') datain.set_where(start=None, end=None) query = "proto icmp" fin = open("ip.txt", 'r') line = fin.readline() ip_dict = {} while line: ip_dict[line.strip()] = 0 line = fin.readline() # for ip in ip_list: # query = query + ' host or ' + ip # query = query[1:] + ' and proto icmp' logging.debug(query)
def parse_stats(txt): lines = [l.strip() for l in txt.strip().splitlines()] return list(pynfdump.Dumper().parse_stats(lines))
def parse_search_helper(txt): lines = [l.strip() for l in txt.strip().splitlines()] return list(pynfdump.Dumper().parse_search(lines))