Esempio n. 1
0
    def __init__(self, file_path, limit, skip):
        self.path = file_path
        self.limit = limit
        self.curPacketIndx = skip

        ### Prep Feature extractor (AfterImage) ###
        maxHost = 100000000000
        maxSess = 100000000000
        self.nstat = ns.netStat(np.nan, maxHost, maxSess)
Esempio n. 2
0
    def test_run(self):
        maxHost = 50
        maxSess = 50
        nstat = ns.netStat(maxHost, maxSess)
        with open('D:\datasets\\SYN.tsv', 'rt', encoding="utf8") as tsvin:
            tsvin = csv.reader(tsvin, delimiter='\t')
            count = 0
            timestats = []

            for row in tsvin:
                count = count + 1
                if count % 10000 == 0:
                    print(count)
                if count > 1:
                    if count == 10000:
                        print((srcMAC, srcIP, srcproto, dstIP, dstproto,
                               int(framelen), float(timestamp)))
                        #print(stats)
                        print('Mean packet processing time: ' +
                              str(np.mean(timestats)))
                        break
                    else:
                        timestamp = row[0]
                        framelen = row[1]
                        srcIP = row[5] + row[
                            50]  # ipv4 or ipv6 address: ipv4 or ipv6 (one will be '')
                        dstIP = row[6] + row[51]  # ipv4 or ipv6 address
                        srcMAC = row[2]
                        srcproto = row[14] + row[
                            32]  # UDP or TCP port: the concatenation of the two port strings will will results in an OR "[tcp|udp]"
                        dstproto = row[15] + row[33]  # UDP or TCP port
                        if srcproto == '':  # it's a L2/L1 level protocol
                            if row[37] != '':  # is ARP
                                srcproto = 'arp'
                                dstproto = 'arp'
                                srcIP = row[2]  # src MAC
                                dstIP = row[3]  # dst MAC
                            elif row[36] != '':  # is IGMP
                                srcproto = 'igmp'
                                dstproto = 'igmp'
                            elif row[34] != '':  # is ICMP
                                srcproto = 'icmp'
                                dstproto = 'icmp'
                            elif srcIP + srcproto + dstIP + dstproto == '':  # some other protocol
                                srcIP = row[2]  # src MAC
                                dstIP = row[3]  # dst MAC
                        tic = time.time()
                        stats = nstat.updateGetStats(srcMAC, srcIP, srcproto,
                                                     dstIP, dstproto,
                                                     int(framelen),
                                                     float(timestamp))
                        toc = time.time() - tic
                        timestats.append(toc)
Esempio n. 3
0
    def __init__(self, file_path, limit=np.inf):
        self.path = file_path
        self.limit = limit
        self.parse_type = None  #unknown
        self.curPacketIndx = 0
        self.tsvin = None  #used for parsing TSV file
        self.scapyin = None  #used for parsing pcap with scapy

        ### Prep pcap ##
        self.__prep__()

        ### Prep Feature extractor (AfterImage) ###
        maxHost = 100000000000
        maxSess = 100000000000
        self.nstat = ns.netStat(np.nan, maxHost, maxSess)
Esempio n. 4
0
 def test_HostLimit(self):
     maxHost = 10
     maxSess = 10
     nstat = ns.netStat(maxHost, maxSess)
     t = 0
     hostCount = 0
     try:
         for src_h in range(0, maxHost + 1):
             for sid in range(0, maxSess):
                 nstat.updateGetStats('MAC', '10.0.0.' + str(src_h),
                                      str(sid), '10.0.1.1', str(sid), 1, t)
                 t = t + 0.001
             hostCount = hostCount + 1
     except LookupError:
         self.assertEquals(
             hostCount - maxHost, 0
         )  #if Fails, this means that LookupError was not throw correctly (more or less sessions were allowed)
         return
     self.assertLessEqual(
         maxHost, hostCount)  #LookupError wasn't raised but it should have
Esempio n. 5
0
 def test_MACIPLimit(self):
     maxHost = 10
     maxSess = 10
     nstat = ns.netStat(maxHost, maxSess)
     t = 0
     MACIPCount = 0
     try:
         for src_h in range(0, maxHost):
             for MAC in range(0, 3 + 1):
                 nstat.updateGetStats(str(MAC), '10.0.0.' + str(src_h),
                                      str(1), '10.0.1.1', str(2), 1, t)
                 t = t + 0.001
                 MACIPCount = MACIPCount + 1
     except LookupError:
         self.assertEquals(
             MACIPCount - maxHost * 3, 1
         )  #if Fails, this means that LookupError was not thrown correctly (more or less sessions were allowed)
         return
     self.assertLessEqual(
         maxHost * 3,
         MACIPCount)  #LookupError wasn't raised but it should have
Esempio n. 6
0
 def test_purgeOldData(self):
     maxHost = 255
     maxSess = 80000
     nstat = ns.netStat(maxHost, maxSess)
     t = 0
     print("Adding Before Sessions")
     for src_h in range(0, 5):
         for dst_h in range(0, 10):
             for sid in range(0, 5):
                 ssid = sid
                 nstat.updateGetStats('MAC', '10.0.0.' + str(src_h),
                                      str(sid), '10.0.0.' + str(dst_h),
                                      str(ssid), 1, t)
     print("Adding After Sessions")
     t = 1000000000
     for src_h in range(0, 5):
         for dst_h in range(0, 5):
             for sid in range(0, 2):
                 ssid = sid
                 nstat.updateGetStats('MAC', '10.0.0.' + str(src_h),
                                      str(sid), '10.0.0.' + str(dst_h),
                                      str(ssid), 1, t)
     print("Begin Purge")
     before = len(nstat.HT.HT)
     memb4 = sys.getsizeof(nstat.HT.HT) + sys.getsizeof(
         nstat.Rec_MAC_Host) + sys.getsizeof(
             nstat.Rec_Hosts) + sys.getsizeof(nstat.Rec_Sessions)
     tic = time.time()
     nstat.purgeOldRecords(t)
     toc = time.time() - tic
     gc.collect()
     memAft = sys.getsizeof(nstat.HT.HT) + sys.getsizeof(
         nstat.Rec_MAC_Host) + sys.getsizeof(
             nstat.Rec_Hosts) + sys.getsizeof(nstat.Rec_Sessions)
     after = len(nstat.HT.HT)
     print('Purge: Before ' + str(before) + ' After ' + str(after) +
           'Time: ' + str(toc) + ' seconds.\nMem Before: ' +
           str(memb4 / (1024 * 1024)) + 'MB, Mem After: ' +
           str(memAft / (1024 * 1024)) + ' MB')
     self.assertEqual(before - after, 400)  #there should be 4 less entries
Esempio n. 7
0
    def test_run_affectOfOneSided_winstats(
            self):  #should have no affect on results
        maxHost = 50
        maxSess = 50
        nstat = ns.netStat(maxHost, maxSess)
        with open('D:\datasets\\SYN.tsv', 'rt', encoding="utf8") as tsvin:
            tsvin = csv.reader(tsvin, delimiter='\t')
            count = 0
            timestats = []

            for row in tsvin:
                count = count + 1
                if count % 10000 == 0:
                    print(count)
                if count > 1:
                    if count == 100000:
                        #print((srcMAC, srcIP, srcproto, dstIP, dstproto, int(framelen), float(timestamp)))
                        #print(stats1)
                        print('Mean packet processing time: ' +
                              str(np.mean(timestats)))
                        break
                    else:
                        timestamp = row[0]
                        framelen = row[1]
                        srcIP = row[5] + row[
                            50]  # ipv4 or ipv6 address: ipv4 or ipv6 (one will be '')
                        dstIP = row[6] + row[51]  # ipv4 or ipv6 address
                        srcMAC = row[2]
                        srcproto = row[14] + row[
                            32]  # UDP or TCP port: the concatenation of the two port strings will will results in an OR "[tcp|udp]"
                        dstproto = row[15] + row[33]  # UDP or TCP port
                        if srcproto == '':  # it's a L2/L1 level protocol
                            if row[37] != '':  # is ARP
                                srcproto = 'arp'
                                dstproto = 'arp'
                                srcIP = row[2]  # src MAC
                                dstIP = row[3]  # dst MAC
                            elif row[36] != '':  # is IGMP
                                srcproto = 'igmp'
                                dstproto = 'igmp'
                            elif row[34] != '':  # is ICMP
                                srcproto = 'icmp'
                                dstproto = 'icmp'
                            elif srcIP + srcproto + dstIP + dstproto == '':  # some other protocol
                                srcIP = row[2]  # src MAC
                                dstIP = row[3]  # dst MAC
                        tic = time.time()
                        stats1 = nstat.updateGetStats(srcMAC, srcIP, srcproto,
                                                      dstIP, dstproto,
                                                      int(framelen),
                                                      float(timestamp))
                        toc = time.time() - tic
                        timestats.append(toc)
        #reset, with purges
        nstat = ns.netStat(maxHost, maxSess)
        with open('D:\datasets\\SYN.tsv', 'rt', encoding="utf8") as tsvin:
            tsvin = csv.reader(tsvin, delimiter='\t')
            count = 0
            timestats = []

            for row in tsvin:
                count = count + 1
                if count % 10000 == 0:
                    print(count)
                if count > 1:
                    if count == 100000:
                        print((srcMAC, srcIP, srcproto, dstIP, dstproto,
                               int(framelen), float(timestamp)))
                        print(stats1[0:10])
                        print(stats2[0:10])
                        print('Relative error')
                        print(
                            np.absolute(np.array(stats1) - np.array(stats2)) /
                            np.array(stats1))
                        print('percent error:')
                        print((np.array(stats2) / np.array(stats1) - 1) * 100)
                        print('Mean packet processing time: ' +
                              str(np.mean(timestats)))
                        break
                    else:
                        timestamp = row[0]
                        framelen = row[1]
                        srcIP = row[5] + row[
                            50]  # ipv4 or ipv6 address: ipv4 or ipv6 (one will be '')
                        dstIP = row[6] + row[51]  # ipv4 or ipv6 address
                        srcMAC = row[2]
                        srcproto = row[14] + row[
                            32]  # UDP or TCP port: the concatenation of the two port strings will will results in an OR "[tcp|udp]"
                        dstproto = row[15] + row[33]  # UDP or TCP port
                        if srcproto == '':  # it's a L2/L1 level protocol
                            if row[37] != '':  # is ARP
                                srcproto = 'arp'
                                dstproto = 'arp'
                                srcIP = row[2]  # src MAC
                                dstIP = row[3]  # dst MAC
                            elif row[36] != '':  # is IGMP
                                srcproto = 'igmp'
                                dstproto = 'igmp'
                            elif row[34] != '':  # is ICMP
                                srcproto = 'icmp'
                                dstproto = 'icmp'
                            elif srcIP + srcproto + dstIP + dstproto == '':  # some other protocol
                                srcIP = row[2]  # src MAC
                                dstIP = row[3]  # dst MAC
                        if count % 10000 == 0:
                            nstat.purgeOldRecords(float(timestamp))
                        tic = time.time()
                        stats2 = nstat.updateGetStats(srcMAC, srcIP, srcproto,
                                                      dstIP, dstproto,
                                                      int(framelen),
                                                      float(timestamp))
                        toc = time.time() - tic
                        timestats.append(toc)
        self.assertGreaterEqual(
            np.mean((np.array(stats2) / np.array(stats1) - 1) * 100),
            0.000001)  #average percent error with purges
Esempio n. 8
0
def RTSP_videoJak_Dataset_Gen():
    ht = ns.netStat()
    with io.open('/media/root/66fff5fd-de78-45b0-880a-d2e8104242b5/datasets/RTSP_record_parsed.tsv','rt',encoding="utf8") as tsvin, io.open('/media/root/66fff5fd-de78-45b0-880a-d2e8104242b5/datasets/videoJak_full.csv', 'wt', newline='') as csvout:
        tsvin = csv.reader(tsvin, delimiter='\t')
        count = 0

        for row in tsvin:
            count= count + 1
            if count%10000==0:
                print(count)

            if count == 1:
                #print (str(len(row))+str (" num of original headers"))

                #csvout.writerow(str(row) + str(ht.getNetStatHeaders())+["Class"])
                for f in row:
                    csvout.write(unicode(str(f)+",","utf-8"))
                for f in ht.getNetStatHeaders():

                    csvout.write(unicode(str(f)+",","utf-8"))
                csvout.write(unicode("Class","utf-8"))
                csvout.write(unicode("\n","utf-8"))
                #print (str(len(ht.getNetStatHeaders()))+str(" are the stats headers"))
                #csvout = csv.writer(csvout)

                """
                counter = 0
                for x in row:
                    print(str(x) + ", " + str(counter))
                    counter += 1
                """
            else:
                #print (str(len(row))+str(" num of original features"))
                try:

                    timestamp = row[53]
                    framelen = row[54]
                    srcIP = row[15] #ipv4 or ipv6 address: ipv4 or ipv6 (one will be '')
                    dstIP = row[16] #ipv4 or ipv6 address
                    srcproto = row[17] + row[33] #UDP or TCP port: the concatenation of the two port strings will will results in an OR "[tcp|udp]"
                    dstproto = row[18] + row[34] #UDP or TCP port


                    if srcproto == '': #it's a L2/L1 level protocol
                        if row[48] != '': #is ARP
                            srcproto = 'arp'
                            dstproto = 'arp'
                            srcIP = row[49] #src MAC
                            dstIP = row[51] #dst MAC
                        elif row[40] != '': #is IGMP
                            srcproto = 'igmp'
                            dstproto = 'igmp'
                        elif row[37] != '': #is ICMP
                            srcproto = 'icmp'
                            dstproto = 'icmp'
                        elif srcIP+srcproto+dstIP+dstproto == '': #some other protocol
                            srcIP = row[1]  # src MAC
                            dstIP = row[0]  # dst MAC
                    stats = ht.updateGetStats(srcIP,srcIP,srcproto,dstIP,dstproto,int(framelen),float(timestamp))
                    #print (str(len(stats))+ str(" num of stat features"))
                    Label = "0"
                    if float(timestamp)>=2874.460763: #1750648 frame.no
                        print("reached")
                        if row[15] != '': #row[5] is srcIPv4
                            if row[15].split(".")[3] == "13":
                                Label = "1"
                    #replace missing values with -1
                    for index, item in enumerate(row):
                        if item == '':
                            row[index] = '-1'
                    m=map(str,row)
                    m2=map(str,list(stats))
                    j2=', '.join(m2)
                    j=', '.join(m)
                    j+=","+j2
                    j+=","+Label
                    #csvout.writerow(row + list(stats) + [Label],"utf-8")
                    csvout.write(unicode(str(j),"utf-8"))
                    csvout.write(unicode("\n","utf-8"))

                except:
                    count+=1
                    print("observation "+str(count)+" was rejected")
                    continue
Esempio n. 9
0
def physicalMIM_Dataset_Gen():
    ht = ns.netStat()
    with io.open('/media/root/66fff5fd-de78-45b0-880a-d2e8104242b5/datasets/piddle_record_parsed.tsv','rt',encoding="utf8") as tsvin, io.open('/media/root/66fff5fd-de78-45b0-880a-d2e8104242b5/datasets/piddle_FULL.csv', 'wt', newline='') as csvout:

        tsvin = csv.reader(tsvin, delimiter='\t')
        count = 0
        #csvout = csv.writer(csvout)

        for row in tsvin:
            count = count + 1
            if count % 10000 == 0:
                print(count)

            if count == 1:
                directional = True
                for f in row:
                    csvout.write(unicode(str(f)+",","utf-8"))
                for f in ht.getNetStatHeaders():

                    csvout.write(unicode(str(f)+",","utf-8"))
                csvout.write(unicode("Class","utf-8"))
                csvout.write(unicode("\n","utf-8"))

                #csvout.writerow(row + ht.getNetStatHeaders(directional) + ["Class"])
            else:

                timestamp = row[53]
                framelen = row[54]
                srcIP = row[15]  # ipv4 or ipv6 address: ipv4 or ipv6 (one will be '')
                dstIP = row[16]  # ipv4 or ipv6 address
                srcproto = row[17] + row[
                    33]  # UDP or TCP port: the concatenation of the two port strings will will results in an OR "[tcp|udp]"
                dstproto = row[18] + row[34]  # UDP or TCP port
                if srcproto == '':  # it's a L2/L1 level protocol

                    if row[48] != '':  # is ARP
                        srcproto = 'arp'
                        dstproto = 'arp'
                        srcIP = row[49]  # src MAC
                        dstIP = row[51]  # dst MAC
                    elif row[40] != '':  # is IGMP
                        srcproto = 'igmp'
                        dstproto = 'igmp'
                    elif row[37] != '':  # is ICMP
                        srcproto = 'icmp'
                        dstproto = 'icmp'
                    elif srcIP + srcproto + dstIP + dstproto == '':  # some other protocol
                        srcIP = row[1]  # src MAC
                        if srcIP=='':
                            srcIP=row[49]
                        dstIP = row[0]  # dst MAC
                        if dstIP=='':
                            dstIP=row[51]
                        srcproto="other"
                        dstproto="other"
                elif srcIP+dstIP=='':
                    srcIP = row[1]
                    dstIP = row[0]
                if row[1] == '00:a0:de:f1:88:6e':  # the source is the yamaha gateway
                    direction = "in"
                else:
                    direction = "out"

                try:

                    stats = ht.updateGetStats(direction,srcIP, srcproto, dstIP, dstproto, int(framelen), float(timestamp))

                except:
                    count+=1
                    print ("skipped netstat")
                    continue
                Label = "0"
                if count >= 5179941:

                    Label = "1"

                # replace missing values with -1
                for index, item in enumerate(row):
                    if item == '':
                        row[index] = '-1'

                m = map(str, row)
                m2 = map(str, list(stats))
                j2 = ', '.join(m2)
                j = ', '.join(m)
                j += "," + j2
                j += "," + Label
                # csvout.writerow(row + list(stats) + [Label],"utf-8")
                csvout.write(unicode(str(j), "utf-8"))
                csvout.write(unicode("\n", "utf-8"))
Esempio n. 10
0
def CTU52818_Desaset_Gen_V2_400():

    ht = ns.netStat(50000, 50000)
    with io.open('E:/thesis_data/datasets/ctu52818_400_sortedTS.txt',
                 'rt',
                 encoding="utf8") as tsvin, io.open(
                     'E:/thesis_data/datasets/ctu52818_400_full.csv',
                     'wt',
                     newline='') as csvout:
        tsvin = csv.reader(tsvin, delimiter=',')
        count = 0

        rowListByTS = []
        for row in tsvin:
            count = count + 1
            if count % 10000 == 0:
                print(count)

            if count == 1:
                # print (str(len(row))+str (" num of original headers"))

                # csvout.writerow(str(row) + str(ht.getNetStatHeaders())+["Class"])
                for f in row:
                    csvout.write(unicode(str(f) + ",", "utf-8"))
                for f in ht.getNetStatHeaders():
                    csvout.write(unicode(str(f) + ",", "utf-8"))
                csvout.write(unicode("Class", "utf-8"))
                csvout.write(unicode("\n", "utf-8"))

            else:
                # print (str(len(row))+str(" num of original features"))
                try:
                    if count == 2:
                        dateAr = row[0].split(' ')[1].split(':')
                        for m in range(len(dateAr)):
                            dateAr[m] = float(dateAr[m])
                        startTS = dateAr[0] * 3600 * 1000 + dateAr[
                            1] * 60 * 1000 + dateAr[2] * 1000
                    dateAr = row[0].split(' ')[1].split(':')
                    for m in range(len(dateAr)):
                        dateAr[m] = float(dateAr[m])
                    timestamp = dateAr[0] * 3600 * 1000 + dateAr[
                        1] * 60 * 1000 + dateAr[
                            2] * 1000 - startTS  # change format

                    framelen = row[9]
                    srcIP = row[3].split(
                        ':'
                    )[0]  # ipv4 or ipv6 address: ipv4 or ipv6 (one will be '')
                    dstIP = row[5].split(':')[0]  # ipv4 or ipv6 address
                    if row[3].__contains__(':') == True:
                        srcproto = row[3].split(
                            ':'
                        )[1]  # UDP or TCP port: the concatenation of the two port strings will will results in an OR "[tcp|udp]"
                    else:
                        srcproto = 'NoProto'
                    if row[3].__contains__(':') == True:
                        dstproto = row[5].split(':')[1]  # UDP or TCP port
                    else:
                        dstproto = 'NoProto'

                    if srcproto == '':  # it's a L2/L1 level protocol
                        if row[2] == "ARP":  # is ARP
                            srcproto = 'ARP'
                            dstproto = 'ARP'
                        elif row[2] == 'IGMP':  # is IGMP
                            srcproto = 'IGMP'
                            dstproto = 'IGMP'
                        elif row[2] == 'ICMP':  # is ICMP
                            srcproto = 'ICMP'
                            dstproto = 'ICMP'

                        elif srcIP + srcproto + dstIP + dstproto == '':  # some other protocol
                            srcIP = row[3].split(':')[0]  # src MAC
                            dstIP = row[5].split(':')[0]  # dst MAC
                    stats = ht.updateGetStats(srcIP, srcIP, srcproto, dstIP,
                                              dstproto, int(framelen),
                                              float(timestamp))
                    # print (str(len(stats))+ str(" num of stat features"))
                    Label = "0"
                    if row[11].find("Botnet") != -1:  # 1750648 frame.no
                        # print("reached")
                        Label = "1"
                    # replace missing values with -1
                    for index, item in enumerate(row):
                        if item == '':
                            row[index] = '-1'
                    m = map(str, row)
                    m2 = map(str, list(stats))
                    j2 = ', '.join(m2)
                    j = ', '.join(m)
                    j += "," + j2
                    j += "," + Label
                    # csvout.writerow(row + list(stats) + [Label],"utf-8")
                    csvout.write(unicode(str(j), "utf-8"))
                    csvout.write(unicode("\n", "utf-8"))

                except Exception as ex:
                    print(ex.message)
                    count += 1
                    print("observation " + str(count) + " was rejected")
                    continue
Esempio n. 11
0
def sortCTU52818File():

    with io.open('E:/thesis_data/datasets/ctu52818_400.txt',
                 'rt',
                 encoding="utf8") as tsvinFullLine:
        with io.open('E:/thesis_data/datasets/ctu52818_400.txt',
                     'rt',
                     encoding="utf8") as tsvin:

            with io.open('E:/thesis_data/datasets/ctu52818_400_sortedTS.csv',
                         'wt',
                         encoding="utf8") as tswrite:
                tsvin = csv.reader(tsvin, delimiter='\t')
                count = 0
                ht = ns.netStat(50000, 50000)
                fullLineOrig = tsvinFullLine.readline()

                rowListByTS = []
                for row in tsvin:
                    count = count + 1
                    if count % 10000 == 0:
                        print(count)

                    if count == 1:
                        # print (str(len(row))+str (" num of original headers"))

                        # csvout.writerow(str(row) + str(ht.getNetStatHeaders())+["Class"])
                        editedLine = ''
                        for f in row:
                            editedLine += str(f) + ","

                        #for f in ht.getNetStatHeaders():
                        #editedLine += str(f) + ","
                        #editedLine+="Class"

                        tswrite.write(unicode(str(editedLine) + "\n", "utf-8"))
                        continue

                    if count == 2:
                        dateAr = row[0].split(' ')[1].split(':')
                        for m in range(len(dateAr)):
                            dateAr[m] = float(dateAr[m])
                        startTS = dateAr[0] * 3600 * 1000 + dateAr[
                            1] * 60 * 1000 + dateAr[2] * 1000  # change format
                        timestamp = 0
                    else:
                        dateAr = row[0].split(' ')[1].split(':')
                        for m in range(len(dateAr)):
                            dateAr[m] = float(dateAr[m])
                        timestamp = dateAr[0] * 3600 * 1000 + dateAr[
                            1] * 60 * 1000 + dateAr[2] * 1000  # change format
                        #timestamp -=startTS
                    fullLineOrig = tsvinFullLine.readline()
                    # sort the file
                    fullLine = ''

                    for r in range(len(row)):

                        if row[r] == '':
                            continue
                        if r < len(row) - 1:
                            fullLine += str(row[r]) + ","
                        else:
                            fullLine += str(row[r])
                    rowListByTS.append((timestamp, fullLineOrig))
                    #if row[3].__contains__(':')==False or row[5].__contains__(':')==False:
                    #   print("fs")

                    if fullLine.__contains__(',,') == True:
                        print("here")

                rowListByTS.sort(key=lambda tup: tup[0])

                for row in rowListByTS:

                    tswrite.write(unicode(str(row[1]) + "\n", "utf-8"))

                print(rowListByTS[:30])
                print("finished sort")