Exemple #1
0
 def __init__(self, watch_dir=None) :
     self.settings = {'sort-key':PNADefaults.sort_key,
                      'threshold':PNADefaults.threshold,
                      'filters':PNADefaults.filters,}
     self.parser = PNALogParser()
     self.all_data = [ ]
     self.cache = {'key':None,'threshold':None,'filters':None,'valid':False}
Exemple #2
0
def main(args):
    # convert each command line file individually
    for arg in args:
        print 'identity of "' + arg + '" is "' + arg + '.id"'
        if arg[-5:] == ".dict":
            # just need to unparse
            f = open(arg, "r")
            data = f.read()
            f.close()
            raw_data = eval(data)
            file_data = PNAParseLogger.unparse(raw_data)
        else:
            parser = PNALogParser()
            parser.parse(arg)
            raw_data = parser.get_flows()
            file_data = PNAParseLogger.unparse(raw_data)
        file = open(arg + ".id", "w")
        file.write(file_data)
        file.close()
Exemple #3
0
def main(args) :
    # convert each command line file individually
    for arg in args :
        print 'identity of "'+arg+'" is "'+arg+'.id"'
        if arg[-5:] == '.dict' :
            # just need to unparse
            f = open(arg, 'r')
            data = f.read()
            f.close()
            raw_data = eval(data)
            file_data = PNAParseLogger.unparse(raw_data)
        else :
            raw_data = PNALogParser.parse(arg)
            file_data = PNAParseLogger.unparse(raw_data)
        file = open(arg+'.id', 'w')
        file.write(file_data)
        file.close()
Exemple #4
0
def main(args):
    # convert each command line file individually
    for arg in args:
        print 'identity of "' + arg + '" is "' + arg + '.id"'
        if arg[-5:] == '.dict':
            # just need to unparse
            f = open(arg, 'r')
            data = f.read()
            f.close()
            raw_data = eval(data)
            file_data = PNAParseLogger.unparse(raw_data)
        else:
            raw_data = PNALogParser.parse(arg)
            file_data = PNAParseLogger.unparse(raw_data)
        file = open(arg + '.id', 'w')
        file.write(file_data)
        file.close()
Exemple #5
0
class PNAModel :
    ip_fmt = '123.45.67.89/32'
    ip_re = re.compile('^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})(/\d{1,2})?$')

    port_fmt = '65535'
    # three formats: 65535; 65535-65535 inclusive; 65535,80,22
    port_re = re.compile('^(\d{1,5})\-?(\d{1,5})?(\,\d{1,5})*$')

    time_fmt = 'YYYYMMDDHHMMSS'
    time_re = re.compile('^(\d{4})(\d{1,2})?(\d{1,2})?(\d{1,2})?(\d{1,2})?(\d{1,2})?$')

    latest_fmt = 'nnnn'
    latest_re = re.compile('^(\d+)$')

    sort_keys = ('raw', 'connections', 'sessions',
                 'tcp-ports', 'tcp-packets', 'tcp-octets',
                 'udp-ports', 'udp-packets', 'udp-octets',
                 'all-ports', 'all-packets', 'all-octets',)
    filters    = ('local-ip','remote-ip',
                  'source-port','destination-port',
                  'begin-time','end-time','latest',)
    filter_res = (ip_re, ip_re, port_re, port_re, time_re, time_re, latest_re,)

    # set up initial structures
    def __init__(self, watch_dir=None) :
        self.settings = {'sort-key':PNADefaults.sort_key,
                         'threshold':PNADefaults.threshold,
                         'filters':PNADefaults.filters,}
        self.parser = PNALogParser()
        self.all_data = [ ]
        self.cache = {'key':None,'threshold':None,'filters':None,'valid':False}

    def filter_reject(self, f_name, value) :
        # make sure this filter exists
        if f_name not in self.settings['filters'] :
            return False
        f_value = self.settings['filters'][f_name]

        if f_name == 'local-ip' or f_name == 'remote-ip' :
            # see if we should only match a netmask
            f_value = f_value.split('/')
            if len(f_value) == 1 :
                # no netmask, match full IP
                if self.int2ip(value) == f_value[0] :
                    return False
                else :
                    return True
            else :
                # netmask, check IP against subnet IP
                filter_ip = self.ip2int(f_value[0])
                filter_bits = int(f_value[1])
                mask = 0
                for i in range(32) :
                    mask = (mask << 1)
                    if filter_bits > i :
                        mask += 1
                    else:
                        mask += 0
                if (value & mask) == (filter_ip & mask) :
                    return False
                else :
                    return True
        elif f_name == 'source-port' or f_name == 'destination-port' :
            # determine which kind of format the port is
                
                return True
        elif f_name == 'begin-time' or f_name == 'end-time' :
            try :
                index = self.filters.index(f_name)
                regex = self.filter_res[index]
            except ValueError :
                # no regex matching that, so accept
                return False
            # match f_value against time_re
            match = regex.match(f_value)
            if match == None :
                # no match, shouldn't happen, but just silently accept
                return False
            # get the values
            (year,month,day,hour,min,sec) = match.groups()

            if year: year = int(year)
            if month: month = int(month)
            if day: day = int(day)
            if hour: hour = int(hour)
            if min: min = int(min)
            if sec: sec = int(sec)

            # datetime-ify the input value
            v_dt = datetime.utcfromtimestamp(value)

            if f_name == 'begin-time' :
                # fill any unset parameters ("rounding down")
                if not year: year = dt_date.min.year
                if not month: month = dt_date.min.month
                if not day: day = dt_date.min.day
                if not hour: hour = dt_date.min.hour
                if not min: min = dt_date.min.minute
                if not sec: sec = dt_date.min.second
                # datetime-ify the filter value
                f_dt = datetime(year,month,day,hour,min,sec)
                # check the values
                if f_dt > v_dt :
                    # 'begin-time' is bigger than input value, so drop
                    return True
            elif f_name == 'end-time' :
                # fill any unset parameters ("rounding up")
                if not year: year = dt_date.max.year
                if not month: month = dt_date.max.month
                if not day: day = dt_date.max.day
                if not hour: hour = dt_date.max.hour
                if not min: min = dt_date.max.minute
                if not sec: sec = dt_date.max.second
                # datetime-ify the filter value
                f_dt = datetime(year,month,day,hour,min,sec)
                # check the values
                if f_dt < v_dt :
                    # 'end-time' is smaller than input value, so drop
                    return True
            return False
        elif f_name == 'latest' :
            now = datetime.utcnow()
            try :
                index = self.filters.index(f_name)
                regex = self.filter_res[index]
            except ValueError :
                # no regex matching that, so accept
                return False
            # match f_value against regex
            match = regex.match(f_value)
            if match == None :
                # no match, shouldn't happen, but just silently accept
                return False
            # get the value
            (nsecs,) = match.groups()
            latest_secs = timedelta(0, int(nsecs))
            then = datetime.utcfromtimestamp(value)
            if (now - latest_secs) >= then :
                return True
        return False

    # see if the item matches against a filter
    def do_filter(self, all_data) :
        new_data = [ ]

        # we'll go through all the all_data and filter out items we don't want
        for item in all_data :
            start_time = item['start-time']
            end_time = item['end-time']
            if self.filter_reject('end-time', end_time) :
                # skip this entire item (don't add to new_data)
                continue
            if self.filter_reject('latest', end_time) :
                # make sure this record ends within the past 'latest' seconds
                continue
            sessions = item['sessions']
            new_sessions = [ ]
            # each item has multiple data entries for a dump-/end-time
            for session in sessions :
                local_ip = session['local-ip']
                remote_ip = session['remote-ip']
                begin_time = session['begin-time']
                if self.filter_reject('local-ip', local_ip) :
                    continue
                if self.filter_reject('remote-ip', remote_ip) :
                    continue
                if self.filter_reject('begin-time', begin_time) :
                    continue
                # if it wasn't filtered out, then it is a valid session
                new_sessions.append(session)

            # now combine all the filtered data for a item
            new_item = { }
            new_item['start-time'] = start_time
            new_item['end-time'] = end_time
            new_item['size'] = item['size']
            new_item['sessions'] = new_sessions
            new_data.append(new_item)
        return new_data

    # should parse a file and add it to the all_data structure
    def add_file(self, file_name) :
        self.parser.clear_log()
        self.parser.parse(file_name)
        self.all_data.append(self.parser.get_log())
        self.cache['valid'] = False

    # derives a all_data list to a local->remote list
    # returns
    # { 'local-ip', 'remote-ip', 'all-ports', 'all-packets', 'all-octets',
    #   'tcp-ports', 'tcp-packets','tcp-octets', 'udp-ports', 'udp-packets',
    #   'udp-octets' }
    def derive_point_to_points(self, all_data) :
        # this is gonna be intense
        data = { } # we want to combine like point-to-point connections

        # we need to derive from every item in the all_data list
        for item in all_data :
            end_time = item['end-time']
            watch_data = item['watch-data']
            # each item has multiple data entries for a dump-/end-time
            for entry in watch_data :
                local_ip = self.int2ip(entry['local-ip'])
                remote_ip = self.int2ip(entry['remote-ip'])
                ntcp = entry['ntcp']
                nudp = entry['nudp']
                tcp_tuples = entry['tcp-tuples']
                udp_tuples = entry['udp-tuples']
                # each data entry has some number of tcp-/udp-tuples
                pkts = {'all':0,'tcp':0,'udp':0,}
                bytes = {'all':0,'tcp':0,'udp':0,}
                for tuple in tcp_tuples :
                    pkts['tcp'] += tuple['npkts-in'] + tuple['npkts-out']
                    bytes['tcp'] += tuple['nbytes-in'] + tuple['nbytes-out']
                    pkts['all'] += pkts['tcp']
                    bytes['all'] += bytes['tcp']
                for tuple in udp_tuples :
                    pkts['udp'] += tuple['npkts-in'] + tuple['npkts-out']
                    bytes['udp'] += tuple['nbytes-in'] + tuple['nbytes-out']
                    pkts['all'] += pkts['udp']
                    bytes['all'] += bytes['udp']
                # now combine all the data into the structure
                key = local_ip+'>'+remote_ip
                p2p = data.get(key, { })
                p2p['local-ip'] = local_ip
                p2p['remote-ip'] = remote_ip
                p2p['tcp-ports'] = p2p.get('tcp-ports', 0) + ntcp
                p2p['tcp-packets'] = p2p.get('tcp-packets', 0) + pkts['tcp']
                p2p['tcp-octets'] = p2p.get('tcp-octets', 0) + bytes['tcp']
                p2p['udp-ports'] = p2p.get('udp-ports', 0) + nudp
                p2p['udp-packets'] = p2p.get('udp-packets', 0) + pkts['udp']
                p2p['udp-octets'] = p2p.get('udp-octets', 0) + bytes['udp']
                p2p['all-ports'] = p2p.get('all-ports', 0) + ntcp + nudp
                p2p['all-packets'] = p2p.get('all-packets', 0) + pkts['all']
                p2p['all-octets'] = p2p.get('all-octets', 0) + bytes['all']
                data[key] = p2p
        return data.values()

    # derives a all_data list to a local-ip only list
    # returns
    # { 'local-ip', 'connections', 'sessions', 'all-packets', 'all-octets',
    #   'tcp-ports', 'tcp-packets','tcp-octets', 'udp-ports', 'udp-packets',
    #   'udp-octets' }
    def derive_local_ips(self, all_data) :
        #all_data = self.derive_point_to_points(all_data)
        data_names = ('tcp-ports','tcp-packets','tcp-octets',
                      'udp-ports','udp-packets','udp-octets',
                      'all-ports','all-packets','all-octets',)
        data = { }
        # go through all the raw data and combine local ips
        for item in all_data :
            local_ip = item['local-ip']
            entry = data.get(local_ip, {})
            entry['local-ip'] = local_ip
            entry['connections'] = entry.get('connections', 0) + 1
            # aggregate all information with this IP
            for name in data_names :
                # (tcp,udp,all)-(ports,packets,octets) incr by matching item
                entry[name] = entry.get(name, 0) + item[name]
            # connections increment every new item (remote-ip(?))
            data[local_ip] = entry
        # now rename all-ports -> sessions
        for local_ip in data :
            entry = data[local_ip]
            entry['sessions'] = entry['all-ports']
            del entry['all-ports']
        return data.values()

    def get_data(self, raw=False) :
        # based on the settings, return a tuple of data tuples
        key = self.settings['sort-key']
        threshold = self.settings['threshold']

        # see if any thing has changed
        if self.cache['key'] == key and self.cache['threshold'] == threshold :
            if self.cache['valid'] :
                c_filters = self.cache['filters']
                s_filters = self.settings['filters']
                valid = True
                for f in self.filters :
                    if f in c_filters and f in s_filters :
                        if c_filters[f] != s_filters[f] :
                            valid = False
                            break
                if valid :
                    return self.cache['data']

        # filter the all_data through any filters
        data = self.all_data
        data = self.do_filter(self.all_data)

        point_to_points = ('tcp-ports','tcp-packets','tcp-octets',
                           'udp-ports','udp-packets','udp-octets',
                           'all-ports','all-packets','all-octets',)
        local_ips = ('connections','sessions',)

        if raw == True :
            return data
        elif key == 'raw' :
            data = sorted(data, lambda l,r : cmp(r['end-time'], l['end-time']))
            flat_data = [ ]
            # bump every ip entry under an end-time to a top-level entry
            for d in data :
                end_time = d['end-time']
                end_time = time.strftime('%Y-%m-%d.%H:%M:%S', time.gmtime(end_time))
                watch_data = d['watch-data']
                for entry in watch_data :
                    flat_entry = {'end-time':end_time,}
                    for item in entry :
                        if item == 'local-ip' or item == 'remote-ip' :
                            flat_entry[item] = self.int2ip(entry[item])
                        else :
                            flat_entry[item] = entry[item]
                    flat_data.append(flat_entry)
            return flat_data
        elif key in local_ips :
            data = self.derive_point_to_points(data)
            data = self.derive_local_ips(data)
        elif key in point_to_points :
            data = self.derive_point_to_points(data)

        # make sure we exceed the threshold
        data = [ i for i in data if i[key] >= threshold ]
        # sort the data by a key
        data = sorted(data, lambda l,r : cmp(r[key], l[key]))

        self.cache['key'] = key
        self.cache['threshold'] = threshold
        for f in self.filters :
            self.cache['filters'] = self.settings['filters']
        self.cache['data'] = tuple(data)
        self.cache['valid'] = True

        return tuple(data)

    def get_fields(self, headers=True) :
        # based on the settings, return a tuple of display fields
        # the first five are displayed by default, the rest show on detail view
        key = self.settings['sort-key']
        tcp_fields = ('tcp-ports','tcp-packets','tcp-octets',)
        udp_fields = ('udp-ports','udp-packets','udp-octets',)
        all_fields = ('all-ports','all-packets','all-octets',)

        if key == 'raw' :
            fields = ['local-ip','remote-ip', 'end-time', 'ntcp', 'nudp', 'tcp-tuples','udp-tuples',]
        elif key == 'connections' or key == 'sessions' :
            fields = ['local-ip',]
            fields.extend(['connections','sessions',])
            fields.extend(list(all_fields[1:]))
            fields.extend(list(tcp_fields))
            fields.extend(list(udp_fields))
        else :
            fields = ['local-ip','remote-ip',]
            if headers == True :
                fields.extend(list(eval(key[0:3]+'_fields')))
            else :
                fields.extend(list(tcp_fields))
                fields.extend(list(udp_fields))
                fields.extend(list(all_fields))
        return tuple(fields)

    # convert an IP-as-integer to an IP-as-string
    def int2ip(self, addr) :
        octet = (addr>>24&0xff, addr>>16&0xff, addr>>8&0xff, addr&0xff)
        return '.'.join([ str(o) for o in octet ])

    # convert an IP-as-string to an IP-as-integer
    def ip2int(self, ip) :
        octets = ip.split('.')
        ip_int = int(octets[0])
        ip_int = (ip_int << 8) + int(octets[1])
        ip_int = (ip_int << 8) + int(octets[2])
        return (ip_int << 8) + int(octets[3])

    @classmethod
    def stringify(cls, str) :
        known_strings = {'sort-key':'Sort Key',
                         'local-ip':'Local IP',
                         'remote-ip':'Remote IP',
                         'source-port':'Source Port',
                         'destination-port':'Destination Port',
                         'begin-time':'Beginning Time',
                         'end-time':'Ending Time',
                         'tcp-tuples':'TCP Ports',
                         'udp-tuples':'UDP Ports',
                         'latest':'Latest Time',
                         'ntcp':'# TCP Ports',
                         'nudp':'# UDP Ports',
                         'tcp-ports':'# TCP Ports',
                         'tcp-packets':'# TCP Packets',
                         'tcp-octets':'# TCP Octets',
                         'udp-ports':'# UDP Ports',
                         'udp-packets':'# UDP Packets',
                         'udp-octets':'# UDP Octets',
                         'all-ports':'# All Ports',
                         'all-packets':'# All Packets',
                         'all-octets':'# All Octets',}
        if str in known_strings :
            return known_strings[str]
        else :
            return str.title()
Exemple #6
0
 def add_file(self, file_name):
     file_data = PNALogParser(file_name)
     data = file_data.header.copy()
     data['flows'] = file_data.parse()
     self.all_data.append(data)
     self.cache['valid'] = False
Exemple #7
0
 def stream_parser(self, files) :
     parser = PNALogParser()
     for file in files :
         parser.parse(file, self.stream_printer)