Esempio n. 1
0
    def __init__(self, month):
        """
        Initialize database and codec
        """
        if month == 10:
            self.filename = '/data/users/sarthak/filtered-20121001-20121101'
            self.folder = '/data/users/sarthak/node_data_OCT/'
            self.date_start = datetime(2012, 10, 1).date()
            self.date_end = datetime(2012, 11, 1).date()
        else:
            self.filename = '/data/users/sarthak/filtered-20121101-20121201'
            self.folder = '/data/users/sarthak/node_data_NOV/'
            self.date_start = datetime(2012, 11, 1).date()
            self.date_end = datetime(2012, 12, 1).date()

        # mapped key format: device_id, direction, port,
        # trans_proto, domain, timestamp
        # mapped value : packetsize
        self.mapped_format = '%s,%s,%s,%s,%s,%s'
        self.currentNode = ''
        self.Date = ''
        self.nodes = []
#        self.NODE_LIST = pkl.load(open('NODE_LIST.dat', 'rb'))
        try:
            self.db = leveldb.LevelDB(self.filename)     # read leveldb
            self.codec = MessageCodec(pb2file='trace.pb2', typename='passive.Trace')    # mapped to only useful metrics
        except:
            print "Initialization Unsuccessful"
        return
Esempio n. 2
0
def size_dist(filename):
    size_distribution = defaultdict(int)
    db = leveldb.LevelDB(filename)
    codec = MessageCodec(pb2file='trace.pb2', typename='passive.Trace')
    for key, value in db.RangeIter():
        trace = codec.decode(value)
        for packet_series_entry in trace['packet_series']:
            size_distribution[packet_series_entry['size']] += 1
    points = sorted(size_distribution.items())
    xs, ys = zip(*points)
    plt.plot(xs, numpy.cumsum(ys)/sum(ys))
    plt.xlabel('Packet size')
    plt.ylabel('CDF')
    plt.title('Cumulative distribution of packet sizes')
    plt.savefig('packet_size_distribution.pdf')
    print 'Wrote plot to packet_size_distribution.pdf'
Esempio n. 3
0
    def __init__(self, filename='/data/users/sarthak/filtered-20130401-20130414', folder='/data/users/sarthak/node_data201304/'):
        """
        Initialize database and codec
        """
        self.filename = filename
        self.folder = folder
        self.date_start = datetime(2013, 04, 1).date()
        self.date_end = datetime(2013, 04, 15).date()
        self.currentNode = ''
        self.Date = ''
        self.NODE_LIST = []

        try:
            self.db = leveldb.LevelDB(filename)     # read leveldb
            self.codec = MessageCodec(pb2file='trace.pb2', typename='passive.Trace')    # mapped to only useful metrics
        except:
            print "Initialization Unsuccessful"
        return
Esempio n. 4
0
    def __init__(self, filename='/data/users/sarthak/filtered-20130401-20130414', folder='/data/users/sarthak/node_data201304/'):
        """
        Initialize database and codec
        """
        self.filename = filename
        self.folder = folder
        self.date_start = datetime(2013, 04, 1).date()
        self.date_end = datetime(2013, 04, 15).date()
        # mapped key format: device_id, direction, port,
        # trans_proto, domain, timestamp
        # mapped value : packetsize
        #self.mapped_format = '%s,%s,%s,%s,%s,%s'
        self.mapped_format = '%s,%s,%s,%s,%s'
        self.currentNode = ''
        self.Date = ''
        self.nodes = []
        self.NODE_LIST = []

        try:
            self.db = leveldb.LevelDB(filename)     # read leveldb
            self.codec = MessageCodec(pb2file='trace.pb2', typename='passive.Trace')    # mapped to only useful metrics
        except:
            print "Initialization Unsuccessful"
        return
Esempio n. 5
0
import pickle as pkl

import leveldb
from lwpb.codec import MessageCodec


def parse_key(key):
    node_id, anonymization_context, remainder = key.split('\x00', 2)
    session_id, sequence_number = struct.unpack('>QI', remainder)
    session_id = session_id - 2 ** 63
    sequence_number = sequence_number - 2 ** 31
    return node_id, anonymization_context, session_id, sequence_number

filename = '/data/users/sarthak/filtered-20130401-20130414/'
db = leveldb.LevelDB(filename)
codec = MessageCodec(pb2file='trace.pb2', typename='passive.Trace')
num_dropped_packets = defaultdict(int)
NODE_LIST = []
date_start = datetime(2013,4,1)
date_end = datetime(2013,4,15)

for key, value in db.RangeIter():
    node_id, anon_context, session_id, sequence_number = parse_key(key)
    trace = codec.decode(value)
    currentTime = datetime.fromtimestamp(trace['trace_creation_timestamp'])
    if currentTime < date_end and currentTime >= date_start:
        if not(node_id in NODE_LIST):
            print node_id
            NODE_LIST.append(node_id)

    #if 'packet_series_dropped' in trace:
Esempio n. 6
0
class passiveHandler(object):
    """
    Attributes:
    db = leveldb database handler

    """
    def __init__(self, filename='/data/users/sarthak/filtered-20130401-20130414', folder='/data/users/sarthak/node_data201304/'):
        """
        Initialize database and codec
        """
        self.filename = filename
        self.folder = folder
        self.date_start = datetime(2013, 04, 1).date()
        self.date_end = datetime(2013, 04, 15).date()
        self.currentNode = ''
        self.Date = ''
        self.NODE_LIST = []

        try:
            self.db = leveldb.LevelDB(filename)     # read leveldb
            self.codec = MessageCodec(pb2file='trace.pb2', typename='passive.Trace')    # mapped to only useful metrics
        except:
            print "Initialization Unsuccessful"
        return

    def initialization(self, node_id):
        self.currentNode = node_id
        self.flowTable = defaultdict(int)           # trace[flow_table_entry]
        # [flow_id] : {dstip, dstport, srcip, srcport, transport_protocol}
        self.addressTable = defaultdict(int)        # trace[address_table_entry]
        # [mac] : {device ip}
        self.NODE_LIST.append(node_id)

        # DATA
        self.bytespersecond = defaultdict(int)      # bytes transfer per second per node per direction
        self.bytesperportperminute = defaultdict(int)
        self.requestsperportperminute = defaultdict(int)

        # DEVICE v/s connected or not
        self.devices = defaultdict(list)

        self.routerActive = []

        return

    def iterTrace(self):

        decode_it = True
        for key, value in self.db.RangeIter():
            #get key
            node_id, anon_context, session_id, sequence_number = self.parse_key(key)
            if node_id != self.currentNode:
                if self.currentNode != '':
                    # decode trace only if date in range, else wait till next node_id
                    decode_it = True
                    # dump old data
                    self.dumpData()
                # Reinitialize
                self.initialization(node_id)
                print "START", node_id
                self.NODE_LIST.append(node_id)

            if decode_it:
                trace = self.codec.decode(value)
                # table timestamp 30 sec granularity - after write
                currentTime = datetime.fromtimestamp(trace['trace_creation_timestamp'])
                currentDate = currentTime.date()
                if currentDate >= self.date_end:
                    decode_it = False
                    continue

                if self.Date != currentDate:
                    print currentDate
                    self.Date = currentDate

                # router activity periods
                self.routerActive.append(currentTime)

                # maintain current address table MAC:IP
                self.addressTableMaker(trace)

                # maintain current flowtable FLOWID: SRCIP, SRCPORT, DSTIP, DSTPORT, TRANS_PROTO
                self.flowTableMaker(trace)

                # get device_id : ip -> srcip : flowid, direction / dstip : flowid,
                # direction -> flowid: timestamp, size
                self.packetSeriesReader(trace)
            else:
                continue

        return

    #trace level
    def packetSeriesReader(self, trace):
        """
        trace['packet_series'] = [
            {'flow_id: (int), 'size': (int),
                'time_microseconds': (long int)},
            ... ]
        """

        if not 'packet_series' in trace:
            return

        for pse in trace['packet_series']:
            flow_id = pse['flow_id']

            timestamp = pse['timestamp_microseconds']       # unix timestamp

            packetSize = pse['size']

            # get corresponding IPs/ports from self.flowTable OR trace['flow_table_entry']
            if (flow_id != 2) and (flow_id != 4) and (flow_id in self.flowTable):
                dstip, dstport, srcip, srcport, trans_proto = self.flowTable[flow_id]
            else:
                continue
            # get corresponding devices/directions/ports
            if dstip in self.addressTable:
                deviceid = self.addressTable[dstip]
                direction = 'dw'
                port = srcport      # monitor port number on the server side
            elif srcip in self.addressTable:
                deviceid = self.addressTable[srcip]
                direction = 'up'
                port = dstport      # monitor port number on the server side
            else:
                continue
            if self.Date >= self.date_start:

                # size distributions
                ts = datetime.fromtimestamp(timestamp * 0.000001)
                timesec = ts.replace(microsecond=0)
                timehash = timesec.replace(second=0)
                self.size_stats(packetSize, timesec.strftime("%s"), direction)
                self.port_stats(packetSize, timehash.strftime("%s"), direction, port, deviceid)
                self.device_stats(timehash.strftime("%s"), deviceid)

        return

    def parse_key(self, key):
        node_id, anonymization_context, remainder = key.split('\x00', 2)
        session_id, sequence_number = struct.unpack('>QI', remainder)
        session_id = session_id - 2 ** 63
        sequence_number = sequence_number - 2 ** 31
        return node_id, anonymization_context, session_id, sequence_number

    def flowTableMaker(self, trace):
        if 'flow_table_entry' in trace:
            for fte in trace['flow_table_entry']:
                self.flowTable[fte['flow_id']] = (fte['destination_ip'],
                                                  fte['destination_port'],
                                                  fte['source_ip'],
                                                  fte['source_port'],
                                                  fte['transport_protocol'])
        return

    def addressTableMaker(self, trace):
        if 'address_table_entry' in trace:
            for ate in trace['address_table_entry']:
                self.addressTable[ate['ip_address']] = ate['mac_address']
        return

    def testIterTrace(self, totcount):

        cnt = 0
        decode_it = False
        for key, value in self.db.RangeIter():
            node_id, anon_context, session_id, sequence_number = self.parse_key(key)
            if node_id != self.currentNode:
                decode_it = True
                self.currentNode = node_id
                print(node_id)
                cnt += 1

            if decode_it:
                trace = self.codec.decode(value)

                # table timestamp 30 sec granularity - after write
                currentTime = datetime.fromtimestamp(trace['trace_creation_timestamp'])
                currentDate = currentTime.date()
                print(currentDate)
                if currentDate >= self.date_end:
                    decode_it = False
                    continue
                if self.Date != currentDate:
                    self.Date = currentDate

                self.addressTableMaker(trace)
                self.flowTableMaker(trace)
                self.packetSeriesReader(trace)
                if cnt == totcount:
                    break
        return

    def size_stats(self, psize, timesec, direction):

        self.bytespersecond[timesec, direction] += psize
        return

    def port_stats(self, size, timehash, direction, port, deviceid):
        self.bytesperportperminute[deviceid, port, direction, timehash] += size
        self.requestsperportperminute[deviceid, port, direction, timehash] +=1
        return

    def device_stats(self, timehash, deviceid):
        if not deviceid in self.devices[timehash]:
            self.devices[timehash].append(deviceid)


    def dumpData(self):
        # DATA
        pkl.dump(self.bytespersecond, open(self.folder + self.currentNode + 'bytespersecond.out', 'wb'))

        pkl.dump(self.bytesperportperminute, open(self.folder + self.currentNode + 'bytesperportperminute.out', 'wb'))
        pkl.dump(self.requestsperportperminute, open(self.folder + self.currentNode + 'requestsperportperminute.out', 'wb'))

        pkl.dump(self.devices, open(self.folder + self.currentNode + 'device_state.out', 'wb'))

        pkl.dump(self.routerActive, open(self.folder + self.currentNode+'active.out', 'wb'))
        return
Esempio n. 7
0
class passiveHandler(object):
    """
    Attributes:
    db = leveldb database handler

    """
    def __init__(self, filename='/data/users/sarthak/filtered-20130401-20130414', folder='/data/users/sarthak/node_data/'):
        """
        Initialize database and codec
        """
        self.filename = filename
        self.folder = folder
        self.date_start = datetime(2012, 10, 1).date()
        self.date_end = datetime(2012, 10, 15).date()
        # mapped key format: device_id, direction, port,
        # trans_proto, domain, timestamp
        # mapped value : packetsize
        self.mapped_format = '%s,%s,%s,%s,%s,%s'
        self.currentNode = ''
        self.Date = ''
        self.nodes = []
        self.NODE_LIST = pkl.load(open('NODE_LIST.dat', 'rb'))

        try:
            self.db = leveldb.LevelDB(filename)     # read leveldb
            self.codec = MessageCodec(pb2file='trace.pb2', typename='passive.Trace')    # mapped to only useful metrics
        except:
            print "Initialization Unsuccessful"
        return

    def initialization(self, node_id):
        self.currentNode = node_id
        self.flowTable = defaultdict(int)           # trace[flow_table_entry]
        self.DNStable = defaultdict(int)            # IP: (domain, anonymized)
        # [flow_id] : {dstip, dstport, srcip, srcport, transport_protocol}
        self.addressTable = defaultdict(int)        # trace[address_table_entry]
        # [mac] : {device ip}
        dbmapped = leveldb.LevelDB(self.filename + '_' + node_id)
        self.nodes.append(node_id)

        # DATA
#        self.size_dist = defaultdict(int)           # packet size distribution from trace[packets series]
#        self.bytesperminute = defaultdict(int)      # size v/s timestamp from trace[packet series]
#        self.bytesperday = defaultdict(int)

#        self.port_dist_size = defaultdict(int)
#        self.port_dist_count = defaultdict(int)
#        self.bytesperportperminute = defaultdict(int)
#        self.requestsperportpersecond = defaultdict(int)

        # DEVICE v/s connected or not
        self.devices = defaultdict(list)

        self.routerActive = []

        # ALL INFO
        self.allInfo = defaultdict(dict)

        return dbmapped

    def iterTrace(self):

        decode_it = True
        for key, value in self.db.RangeIter():
            #get key
            node_id, anon_context, session_id, sequence_number = self.parse_key(key)
            if not (node_id in self.NODE_LIST):
                continue
            if node_id != self.currentNode:
                if self.currentNode != '':
                    # decode trace only if date in range, else wait till next node_id
                    decode_it = True
                    # dump old data
                    pkl.dump(self.routerActive, open(self.currentNode+'active.out', 'wb'))
#                    self.dumpData()
                # Reinitialize
#                dbmapped = self.initialization(node_id)
                self.initialization(node_id)
                print "START", node_id

            if decode_it:
                trace = self.codec.decode(value)
                # table timestamp 30 sec granularity - after write
                currentTime = datetime.fromtimestamp(trace['trace_creation_timestamp'])
                currentDate = currentTime.date()
                if currentDate >= self.date_end:
                    decode_it = False
                    continue
                if currentDate < self.date_start:
                    continue

                if self.Date != currentDate:
                    print currentDate
                    self.Date = currentDate

                # router activity periods
                self.routerActive.append(currentTime)
                '''
                # maintain current address table MAC:IP
                self.addressTableMaker(trace)

                # maintain current flowtable FLOWID: SRCIP, SRCPORT, DSTIP, DSTPORT, TRANS_PROTO
                self.flowTableMaker(trace)

                # make DNS lookup table
                # IP : anonynymized, domain name
                self.DNStableMaker(trace)

                # get device_id : ip -> srcip : flowid, direction / dstip : flowid,
                # direction -> flowid: timestamp, size
                # save mapped data
                dbmapped = self.packetSeriesReader(trace, dbmapped)
            else:
                continue

        return dbmapped
                '''
        return 0

    #trace level
    def packetSeriesReader(self, trace, dbmapped):
        """
        trace['packet_series'] = [
            {'flow_id: (int), 'size': (int),
                'time_microseconds': (long int)},
            ... ]
        """
        dbbatch = leveldb.WriteBatch()

        if not 'packet_series' in trace:
            return dbmapped

        for pse in trace['packet_series']:
            flow_id = pse['flow_id']
            timestamp = pse['timestamp_microseconds']       # unix timestamp
            # timestamp = datetime.fromtimestamp(pse['timestamp_microseconds'] * 0.000001)
            # timestamp2 = timestamp.replace(microsecond=0)
            packetSize = pse['size']
            # get corresponding IPs/ports from self.flowTable OR trace['flow_table_entry']
            if (flow_id != 2) and (flow_id != 4) and (flow_id in self.flowTable):
                dstip, dstport, srcip, srcport, trans_proto = self.flowTable[flow_id]
            else:
                continue
            # get corresponding devices/directions/ports
            if dstip in self.addressTable:
                deviceid = self.addressTable[dstip]
                direction = 'dw'
                port = srcport      # monitor port number on the server side
                if srcip in self.DNStable:
                    domain, anonymized = self.DNStable[srcip]
                    if anonymized is True:
                        domain = 'REST'
                #    print domain, anonymized
                else:
                    domain = 'UNKNOWN'
                #    # print "NO DNS ENTRY for", srcip
                #    continue
            elif srcip in self.addressTable:
                deviceid = self.addressTable[srcip]
                direction = 'up'
                port = dstport      # monitor port number on the server side
                if dstip in self.DNStable:
                    domain, anonymized = self.DNStable[dstip]
                    if anonymized is True:
                        domain = 'REST'
                #    print domain, anonymized
                else:
                    domain = 'UNKNOWN'
                #    # print "NO DNS ENTRY for", dstip
                #    continue
            else:
                # print "Not in addressTable", dstip, srcip
                continue
            mappedKey = self.mapped_format % (deviceid, direction,
                                              port, trans_proto, domain, timestamp)
            mappedValue = str(packetSize)

            # Write batch
            dbbatch.Put(mappedKey, mappedValue)

            # size distributions
            ts = datetime.fromtimestamp(timestamp * 0.000001)
            timehash = ts.replace(microsecond=0, second=0)
            self.size_stats(packetSize, timehash, direction)
            self.port_stats(packetSize, timehash, direction, port)
            self.device_stats(timehash, deviceid)
            self.requestsperportpersecond[port, direction, ts.replace(microsecond=0)] += 1

        dbmapped.Write(dbbatch, sync=True)

        return dbmapped

    def parse_key(self, key):
        node_id, anonymization_context, remainder = key.split('\x00', 2)
        session_id, sequence_number = struct.unpack('>QI', remainder)
        session_id = session_id - 2 ** 63
        sequence_number = sequence_number - 2 ** 31
        return node_id, anonymization_context, session_id, sequence_number

    def flowTableMaker(self, trace):
        if 'flow_table_entry' in trace:
            for fte in trace['flow_table_entry']:
                self.flowTable[fte['flow_id']] = (fte['destination_ip'],
                                                  fte['destination_port'],
                                                  fte['source_ip'],
                                                  fte['source_port'],
                                                  fte['transport_protocol'])
        return

    def addressTableMaker(self, trace):
        if 'address_table_entry' in trace:
            for ate in trace['address_table_entry']:
                self.addressTable[ate['ip_address']] = ate['mac_address']
        return

    def DNStableMaker(self, trace):
        if 'a_record' in trace:
            for arecord in trace['a_record']:
                self.DNStable[arecord['ip_address']] = (arecord['domain'], arecord['anonymized'])
        return

    def testIterTrace(self, totcount):

        cnt = 0
        for key, value in self.db.RangeIter():
            node_id, anon_context, session_id, sequence_number = self.parse_key(key)
            if node_id != self.currentNode:
                dbmapped = self.initialization(node_id)
                print "DONE", node_id
                cnt += 1
            trace = self.codec.decode(value)

            # table timestamp 30 sec granularity - after write
            currentTime = datetime.fromtimestamp(trace['trace_creation_timestamp'])
            currentDate = currentTime.date()
            if currentDate >= self.date_end:
                break
            if currentDate < self.date_start:
                continue
            if self.Date != currentDate:
                print currentDate
                self.Date = currentDate

            self.addressTableMaker(trace)
            self.flowTableMaker(trace)
            self.DNStableMaker(trace)
            dbmapped = self.packetSeriesReader(trace, dbmapped)
            if cnt == totcount:
                break
        return dbmapped

    def size_stats(self, size, timehash, direction):
        # Size distribution overall
        self.size_dist[size] += 1

        # Bytes per minute and per day
        datestamp = timehash.date()
        self.bytesperminute[timehash, direction] += size
        self.bytesperday[datestamp, direction] += size
        return

    def port_stats(self, size, timehash, direction, port):
        self.port_dist_size[port, direction] += size
        self.port_dist_count[port, direction] += 1

        self.bytesperportperminute[port, direction, timehash] += size
        return

    def device_stats(self, timehash, deviceid):
        if not deviceid in self.devices[timehash]:
            self.devices[timehash].append(deviceid)

    def dumpData(self):
        # DATA
        pkl.dump(self.size_dist, open(self.folder + self.currentNode + 'size_dist.out', 'wb'))
        pkl.dump(self.bytesperminute, open(self.folder + self.currentNode + 'bytesperminute.out', 'wb'))
        pkl.dump(self.bytesperday, open(self.folder + self.currentNode + 'bytesperday.out', 'wb'))

        pkl.dump(self.port_dist_size, open(self.folder + self.currentNode + 'port_dist_size.out', 'wb'))
        pkl.dump(self.port_dist_count, open(self.folder + self.currentNode + 'port_dist_count.out', 'wb'))
        pkl.dump(self.bytesperportperminute, open(self.folder + self.currentNode + 'bytesperportperminute.out', 'wb'))
        pkl.dump(self.requestsperportpersecond, open(self.folder + self.currentNode + 'requestsperportpersecond.out', 'wb'))

        pkl.dump(self.devices, open(self.folder + self.currentNode + 'device_state.out', 'wb'))

        return