Example #1
0
def lookupIP(ip, dxlif):
    try:
        v = IP(ip).version()
    except:
        return
    if v == 4 or v == 6:
        try:
            print("Looking up: %s" % ip)
            d = pynfdump.Dumper("/data/nfsen/profiles-data",
                                profile='live',
                                sources=['local'])
            d.set_where(start=time.strftime("%Y-%m-%d"),
                        end=time.strftime("%Y-%m-%d %H:%M"))
            records = d.search("src ip %s" % ip, aggregate=['dstip'])
            tgt = []
            for r in records:
                if r['dstip'] not in tgt:
                    tgt.append(r['dstip'])
            if len(tgt) > 0:
                for t in tgt:
                    evtstr = '/feed/compromised/ipv' + str(IP(t).version())
                    evt = Event(evtstr)
                    evt.payload = str(t).encode()
                    dxlif.send_event(evt)
                    print("Event emitted topic: %s content: %s" %
                          (evtstr, str(t)))

        except Exception as e:
            print("Exception while processing %s: %s" % (ip, str(e)))
            return
def search(ip_list,datatype):
    str1 = ""
    for ip in ip_list:
        str1 += (" host "+str(ip)+" or")
    str1 = str1[:-2]
    str2 = " and({0})".format(str1)
    query = SearchTypeDict[datatype] + str2
    timedelta = datetime.timedelta(minutes=30)
    timeflag = TimeBegin
    output_dict = {}
    for ip in ip_list:
        output_dict[ip] = open('../data/measurement_data/icmpresult/{addr}.csv'.format(addr=ip),'w')
    while 1: #This loop is to read the files one after another to fetch the result, aggregation is done for every read
        dirname = timeflag.strftime('%Y%m%d%H%M')
        timeflag += timedelta
        if timeflag>TimeEnd:break
        d = pynfdump.Dumper(DataPath,profile=Nfprofile,sources=[Nfsources+dirname])
        print dirname
        for key in output_dict:
            output_dict[key].write("##{0}\n".format(dirname))
        d.set_where(start=None, end=None)
        records = d.search(query)
        ippair_dict = data_aggregation(datatype,records)
        fstr_all = flow2str(ippair_dict,datatype)
        for ftuple in fstr_all:
            output_dict[str(ftuple[0])].write(ftuple[1]+"\n")
    for key in output_dict:
        output_dict[key].close()
Example #3
0
def main():
    #d = pynfdump.Dumper('/data2/datasource/',profile='16/',sources=['nfcapd.201711161000','nfcapd.201711161005'])
    d = pynfdump.Dumper()
    #    d.set_where(start=None,end=None,filename='/data2/datasource/16/nfcapd.201711161000')
    dstring = '/data2/datasource/16/'
    for i in range(6):
        nstr = str(i * 5)
        if len(nstr) < 2:
            nstr = '0' + nstr
        dstring += 'nfcapd.2017111610' + nstr + ':'

    dstring = dstring[:-1]

    d.set_where(
        start=None,
        end=None,
        dirfiles='/data2/datasource/16/nfcapd.201711161000:nfcapd.201711161030'
    )
    #    d.set_where(start=None,end=None,filename='/data2/datasource/16/nfcapd.201711161000')

    records = d.search('proto icmp and host 166.111.8.241')
    timedict = {-1: 0}

    for r in records:
        first = r['first']
        last = r['last']
        msec_first = r['msec_first']
        msec_last = r['msec_last']
        srcip = str(r['srcip'])
        dstip = str(r['dstip'])
        srcport = r['srcport']
        dstport = r['dstport']
        packets = r['packets']
        tbytes = r['bytes']
        srcip_prefix = srcip.split('.')[0] + '.' + srcip.split('.')[1]
        first_time = first + datetime.timedelta(microseconds=msec_first)
        last_time = last + datetime.timedelta(microseconds=msec_last)

        tt = last_time - first_time
        if tt < datetime.timedelta(0, 0, 0):
            timedict[-1] += 1
            continue

        ts = int(tt.total_seconds())
        if ts not in timedict:
            timedict[ts] = 1
        else:
            timedict[ts] += 1

    for key in timedict:
        print str(key) + '\t' + str(timedict[key])
Example #4
0
#!/bin/python
import sys, os
import logging
import pynfdump
import datetime

InnerIP_list = [
    '166.111', '59.66', '101.5', '101.6', '183.172', '183.173', '118.229',
    '202.112', '202.38', '106.120'
]

datain = pynfdump.Dumper(
    '../../../',
    profile='datasource/16',
    sources=['nfcapd.201711161000', 'nfcapd.201711161005'])
#datain.set_where(start=None,end=None,filename='../datasource/flowdata/nfcapd.201705171805')
datain.set_where(start=None, end=None)
query = "proto icmp and host 166.111.8.241"

record = datain.search(query)

for r in record:
    af = r['af']
    first = r['first']
    last = r['last']
    msec_first = r['msec_first']
    msec_last = r['msec_last']
    proto = r['prot']
    srcip = str(r['srcip'])
    dstip = str(r['dstip'])
    srcport = r['srcport']
Example #5
0
def search(ip_list):

    middle_print = [
        False, False, False, False
    ]  #Prior one is middlefile, latter one is middlefile without those without IN or OUT
    middle_process = True

    if middle_print[0]:
        fout0 = open('middlefile_total.txt', 'w')
    if middle_print[1]:
        fout1 = open('middlefile_only_O', 'w')
    if middle_print[2]:
        fout2 = open('middlefile_not_equal', 'w')
    if middle_print[3]:
        fout3 = open('middlefile_equal', 'w')

    logger = logging.getLogger()
    hdlr = logging.StreamHandler()
    logger.addHandler(hdlr)
    logger.setLevel(logging.WARNING)

    InnerIP_list = [
        '166.111', '59.66', '101.5', '101.6', '183.172', '183.173', '118.229',
        '202.38', '202.112'
    ]
    ipdict_dict = {}
    datain = pynfdump.Dumper()
    datain.set_where(
        start=None,
        end=None,
        filename='/lrjapps/netflowdata/datasource/flowdata/nfcapd.201705171800'
    )
    query = ""
    for ip in ip_list:
        query = query + ' or host ' + ip
    query = 'proto icmp and (' + query[4:] + ')'
    logger.debug(query)
    records = datain.search(query)
    for r in records:
        af = r['af']
        first = r['first']
        last = r['last']
        msec_first = r['msec_first']
        msec_last = r['msec_last']
        proto = r['prot']
        srcip = str(r['srcip'])
        srcport = r['srcport']
        dstip = str(r['dstip'])
        dstport = r['dstport']
        srcas = r['srcas']
        dstas = r['dstas']
        packets = r['packets']
        tbytes = r['bytes']
        srcip_prefix = srcip.split('.')[0] + '.' + srcip.split('.')[1]
        dstip_prefix = dstip.split('.')[0] + '.' + dstip.split('.')[1]

        if srcip in ip_list and dstip_prefix in InnerIP_list:
            flag = 'IN'
        elif dstip in ip_list and srcip_prefix in InnerIP_list:
            flag = 'OUT'
        elif srcip not in ip_list and dstip not in ip_list:
            logging.error(srcip + ' ' + dscip +
                          ' UNEXCEPTED source/destination address')
            continue
        else:  #This means srcip or dstip is in ip_list, but dstip/srcip is located out of campus
            continue

        if flag == 'IN':
            key = (dstip, srcip)  #Tsinghua IP is [0] and target IP is [1]
        else:
            key = (srcip, dstip)

        if key not in ipdict_dict:
            ipdict_dict[key] = {'IN': [], 'OUT': []}

        if flag == 'IN':
            ipdict_dict[key]['IN'].append(
                Flow.IcmpFlow(srcip, srcport, dstip, dstport, first,
                              msec_first, last, msec_last, packets, tbytes,
                              flag))
        else:
            ipdict_dict[key]['OUT'].append(
                Flow.IcmpFlow(srcip, srcport, dstip, dstport, first,
                              msec_first, last, msec_last, packets, tbytes,
                              flag))

    if middle_print[0]:
        for key in ipdict_dict:
            prtln = key[0] + ' => ' + key[1] + '\n' + '\tIN:\n'
            fout0.write(prtln)
            for item in ipdict_dict[key]['IN']:
                fout0.write('\t\t' + item.display_string() + '\n')
            fout0.write('\tOUT:\n')
            for item in ipdict_dict[key]['OUT']:
                fout0.write('\t\t' + item.display_string() + '\n')
            fout0.write('\n')
        fout0.close()

    if middle_print[1]:
        for key in ipdict_dict:
            if len(ipdict_dict[key]['IN']) == 0:
                prtln = key[0] + ' => ' + key[1] + '\n' + '\tOUT:\n'
                fg = 0
                for item in ipdict_dict[key]['OUT']:
                    if item.display_dict()['type'] == 'REQUEST_OUT':
                        if fg == 0:
                            fout1.write(prtln)
                            fg = 1
                        fout1.write('\t\t' + item.display_string() + '\n')
                fout1.write('\n')
        fout1.close()

    if middle_print[2]:
        for key in ipdict_dict:
            if len(ipdict_dict[key]['IN']) == len(
                    ipdict_dict[key]['OUT']) or len(
                        ipdict_dict[key]['IN']) == 0:
                continue
            prtln = key[0] + ' => ' + key[1] + '\n' + '\tIN:\n'
            fout2.write(prtln)
            for item in ipdict_dict[key]['IN']:
                fout2.write('\t\t' + item.display_string() + '\n')
            fout2.write('\tOUT:\n')
            for item in ipdict_dict[key]['OUT']:
                fout2.write('\t\t' + item.display_string() + '\n')
            fout2.write('\n')
        fout2.close()

    if middle_print[3]:
        for key in ipdict_dict:
            if len(ipdict_dict[key]['IN']) != len(ipdict_dict[key]['OUT']):
                continue
            prtln = key[0] + ' => ' + key[1] + '\n' + '\tIN:\n'
            fout3.write(prtln)
            for item in ipdict_dict[key]['IN']:
                fout3.write('\t\t' + item.display_string() + '\n')
            fout3.write('\tOUT:\n')
            for item in ipdict_dict[key]['OUT']:
                fout3.write('\t\t' + item.display_string() + '\n')
            fout3.write('\n')
        fout3.close()

    if middle_process:
        middle_process_f(ipdict_dict)
Example #6
0
def main():

    print_detail = False
    print_json = True

    for iti in range(23):
        if iti < 10:
            continue
        for jti in range(2):
            if jti == 0:
                hour = iti
                minute = 0
                shour = str(hour)
                ehour = str(hour)
                sminute = '00'
                eminute = '30'
                start = str(hour) + ':00:00'
                end = str(hour) + ':30:00'
                start_time = datetime.datetime(2017, 11, 16, hour, 0, 0)
                stop_time = datetime.datetime(2017, 11, 16, hour, 30, 0)
                start_time_server = datetime.datetime(2017, 11, 16, hour - 1,
                                                      58, 0)
                stop_time_server = datetime.datetime(2017, 11, 16, hour, 28, 0)
            else:
                hour = iti
                minute = 30
                shour = str(hour)
                ehour = str(hour + 1)
                sminute = '30'
                eminute = '00'
                start = str(hour) + ':30:00'
                end = str(hour + 1) + ':00:00'
                start_time = datetime.datetime(2017, 11, 16, hour, 30, 0)
                stop_time = datetime.datetime(2017, 11, 16, hour + 1, 0, 0)
                start_time_server = datetime.datetime(2017, 11, 16, hour, 28,
                                                      0)
                stop_time_server = datetime.datetime(2017, 11, 16, hour, 58, 0)

            fout_string = 'out.' + start

            #d = pynfdump.Dumper('/data2/datasource/',profile='16/',sources=['nfcapd.201711161000','nfcapd.201711161005'])
            d = pynfdump.Dumper()
            #    ponce = 0
            #    d.set_where(start=None,end=None,filename='/data2/datasource/16/nfcapd.201711161000')
            #    dstring = '/data2/datasource/16/'
            #    for i in range(6):
            #        nstr = str(i*5)
            #        if len(nstr) < 2:
            #            nstr = '0' + nstr
            #        dstring += 'nfcapd.2017111610' + nstr + ':'
            #
            #    dstring = dstring[:-1]

            dfiles = '/data2/datasource/16/nfcapd.20171116' + shour + sminute + ':nfcapd.20171116' + ehour + eminute
            d.set_where(start=None, end=None, dirfiles=dfiles)
            #    d.set_where(start=None,end=None,filename='/data2/datasource/16/nfcapd.201711161000')

            records = d.search('proto icmp and host 166.111.8.241')

            fin = open('/data2/datasource/ICMP/time/' + start + '.txt', 'r')
            ip_dict = {}
            agg_dict = {}

            print dfiles, start

            for line in fin.readlines():
                items = line.split('#')
                ip = items[0].strip()
                string = items[1].split(',')
                if ip not in ip_dict:
                    ip_dict[ip] = {
                        'IN': [],
                        'OUT': [],
                        'ICMP': [string[1], string[2], string[3].strip()]
                    }
                    agg_dict[ip]= {\
                            'IN':[],\
                            'OUT':[],\
                            'ICMP':[string[1],string[2],string[3].strip()],\
                            'time_itv':-3,\
                            'flow_time_error_in':[0,0],\
                            'flow_time_error_out':[0,0],\
                            'first_time_error':False,\
                            'last_time_error':False,\
                            'itvf':-1,\
                            'itvl':-1}
            # -3: initialized but not assigned; -2 host unreachable -1: first_time_error(first time out is later than in) and last_time_error

            fin.close()

            for r in records:
                first = r['first']
                last = r['last']
                msec_first = r['msec_first']
                msec_last = r['msec_last']
                srcip = str(r['srcip'])
                dstip = str(r['dstip'])
                srcport = r['srcport']
                dstport = r['dstport']
                packets = r['packets']
                tbytes = r['bytes']
                srcip_prefix = srcip.split('.')[0] + '.' + srcip.split('.')[1]
                dstip_prefix = dstip.split('.')[0] + '.' + dstip.split('.')[1]
                #    first_time = first + datetime.timedelta(microseconds = msec_first)
                #    last_time = last + datetime.timedelta(microseconds = msec_last)

                if dstip in ip_dict:
                    key = dstip
                    flag = 'OUT'
                elif srcip in ip_dict:
                    key = srcip
                    flag = 'IN'
                else:
                    continue

    #    prtstr = srcip + ' => ' + dstip + ':' + str(dstport) + ' '+ first_time.strftime("%Y-%m-%d %H:%M:%S.%f") + ' ' + last_time.strftime("%Y-%m-%d %H:%M:%S.%f")+ ' ' + str(packets) + ' ' + str(tbytes)
                ip_dict[key][flag].append(
                    Flow.IcmpFlow(srcip, srcport, dstip, dstport, first,
                                  msec_first, last, msec_last, packets, tbytes,
                                  flag))

            for key in ip_dict:
                if len(ip_dict[key]['IN']) == 0:
                    agg_dict[key]['time_itv'] = -2
                    continue
                if len(ip_dict[key]['OUT']) == 0:
                    agg_dict[key]['time_itv'] = -2
                    continue
        #Here to gathering the flows
                ip = key
                srcip, srcport = ip_dict[key]['OUT'][0].get_srcip()
                dstip, dstport = ip_dict[key]['OUT'][0].get_dstip()
                lgin = len(ip_dict[key]['IN'])
                lgout = len(ip_dict[key]['OUT'])
                real_flow_dict = {'IN': [], 'OUT': []}
                real_flow_list = []
                real_flow_dict['IN'] = [
                    Flow.FlowGroup(srcip, srcport, dstip, dstport)
                ]
                real_flow_dict['OUT'] = [
                    Flow.FlowGroup(srcip, srcport, dstip, dstport)
                ]
                i = 0
                for j in range(lgout):
                    agg_dict[key]['flow_time_error_out'][1] += 1
                    if ip_dict[key]['OUT'][j].get_first_time(
                    ) > ip_dict[key]['OUT'][j].get_last_time():
                        agg_dict[key]['flow_time_error_out'][0] += 1

                    if real_flow_dict['OUT'][i].add_flow(
                            ip_dict[key]['OUT'][j]):
                        pass
                    else:
                        i += 1
                        real_flow_dict['OUT'].append(
                            Flow.FlowGroup(srcip, srcport, dstip, dstport))
                        real_flow_dict['OUT'][i].add_flow(
                            ip_dict[key]['OUT'][j])


#            except TypeError:
#                print key,ip_dict[key]
#                print j,ip_dict[key]['OUT'][j]
#                exit()

#        for item in ip_dict[key]:
#            print item.print_string()
#        print
#        for item in real_flow_dict[key]:
#            print item.display_string():
#        exit()

                i = 0
                for j in range(lgin):
                    agg_dict[key]['flow_time_error_in'][1] += 1
                    if ip_dict[key]['IN'][j].get_first_time(
                    ) > ip_dict[key]['IN'][j].get_last_time():
                        agg_dict[key]['flow_time_error_in'][0] += 1

                    if real_flow_dict['IN'][i].add_flow(ip_dict[key]['IN'][j]):
                        pass
                    else:
                        i += 1
                        real_flow_dict['IN'].append(
                            Flow.FlowGroup(srcip, srcport, dstip, dstport))
                        real_flow_dict['IN'][i].add_flow(ip_dict[key]['IN'][j])

                for item in real_flow_dict['OUT']:
                    agg_dict[key]['OUT'].append(item)

                for item in real_flow_dict['IN']:
                    agg_dict[key]['IN'].append(item)

                lenin = len(real_flow_dict['IN'])
                for item in real_flow_dict['OUT']:
                    for i in range(lenin):
                        if datetime.timedelta(0, -10, 0) < item.get_first_time(
                        ) - real_flow_dict['IN'][i].get_first_time(
                        ) < datetime.timedelta(0, 10, 0):
                            real_flow_list.append({
                                'IN': real_flow_dict['IN'][i],
                                'OUT': item
                            })
                            break
                    if i == range(lenin):
                        pass

                for item in real_flow_list:
                    if agg_dict[key]['time_itv'] in [-2, -4]:
                        break
                    elif agg_dict[key]['time_itv'] != -3:
                        if item['OUT'].get_first_time() > stop_time_server:
                            break
                        else:
                            agg_dict[key]['time_itv'] = -4
                    time_itvf = item['IN'].get_first_time(
                    ) - item['OUT'].get_first_time()
                    time_itvl = item['IN'].get_last_time(
                    ) - item['OUT'].get_last_time()
                    dtzero = datetime.timedelta(0, 0, 0)

                    if time_itvf > dtzero:
                        agg_dict[key][
                            'itvf'] = time_itvf.seconds * 1000 + time_itvf.microseconds / 1000
                    else:
                        agg_dict[key]['itvf'] = -1

                    if time_itvl > dtzero:
                        agg_dict[key][
                            'itvl'] = time_itvl.seconds * 1000 + time_itvl.microseconds / 1000
                    else:
                        agg_dict[key]['itvl'] = -1

                    if time_itvf < dtzero:
                        agg_dict[key]['first_time_error'] = True
                        if time_itvl < dtzero:
                            agg_dict[key]['time_itv'] = -1
                            agg_dict[key]['last_time_error'] = True
                        else:
                            agg_dict[key]['time_itv'] = time_itvl
                    else:
                        agg_dict[key]['time_itv'] = time_itvf
                        if time_itvl < dtzero:
                            agg_dict[key]['last_time_error'] = True
                        elif time_itvf > datetime.timedelta(0, 0, 100000):
                            if time_itvl < datetime.timedelta(0, 0, 100000):
                                agg_dict[key]['time_itv'] = time_itvl
                            elif datetime.timedelta(
                                    0, 0, -50000
                            ) < time_itvl - time_itvf < datetime.timedelta(
                                    0, 0, 50000):
                                agg_dict[key]['time_itv'] = (time_itvf +
                                                             time_itvl) / 2

            if print_detail:
                p_detail(ip_dict, agg_dict, fout_string)

            if print_json:
                p_json(ip_dict, agg_dict, fout_string)
Example #7
0
import sys, os
import logging
import pynfdump
import datetime

logging.basicConfig(
    level=logging.DEBUG,
    format=
    '%(asctime)s %(filename)s [line:%(lineno)d] %(levelname)s %(message)s')
InnerIP_list = [
    '166.111', '59.66', '101.5', '101.6', '183.172', '183.173', '118.229',
    '202.112', '202.38', '106.120'
]

datain = pynfdump.Dumper('../',
                         profile='datasource/flowdata',
                         sources='flowdata/')
#datain.set_where(start=None,end=None,filename='../datasource/flowdata/nfcapd.201705171805')
datain.set_where(start=None, end=None)
query = "proto icmp"

fin = open("ip.txt", 'r')
line = fin.readline()
ip_dict = {}
while line:
    ip_dict[line.strip()] = 0
    line = fin.readline()
#    for ip in ip_list:
#        query = query + ' host or ' + ip
#    query = query[1:] + ' and proto icmp'
logging.debug(query)
Example #8
0
def parse_stats(txt):
    lines = [l.strip() for l in txt.strip().splitlines()]
    return list(pynfdump.Dumper().parse_stats(lines))
Example #9
0
def parse_search_helper(txt):
    lines = [l.strip() for l in txt.strip().splitlines()]
    return list(pynfdump.Dumper().parse_search(lines))