Example #1
0
class SoSHostnameParser(SoSCleanerParser):

    name = 'Hostname Parser'
    map_file_key = 'hostname_map'
    prep_map_file = 'sos_commands/host/hostname'
    regex_patterns = [r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))']

    def __init__(self, conf_file=None, opt_domains=None):
        self.mapping = SoSHostnameMap()
        super(SoSHostnameParser, self).__init__(conf_file)
        self.mapping.load_domains_from_map()
        self.mapping.load_domains_from_options(opt_domains)
        self.short_names = []
        self.load_short_names_from_mapping()
        self.mapping.set_initial_counts()

    def load_short_names_from_mapping(self):
        """When we load the mapping file into the hostname map, we have to do
        some dancing to get those loaded properly into the "intermediate" dicts
        that the map uses to hold hosts and domains. Similarly, we need to also
        extract shortnames known to the map here.
        """
        for hname in self.mapping.dataset.keys():
            if len(hname.split('.')) == 1:
                # we have a short name only with no domain
                if hname not in self.short_names:
                    self.short_names.append(hname)

    def load_hostname_into_map(self, hostname_string):
        """Force add the domainname found in /sos_commands/host/hostname into
        the map. We have to do this here since the normal map prep approach
        from the parser would be ignored since the system's hostname is not
        guaranteed
        """
        if 'localhost' in hostname_string:
            return
        domains = hostname_string.split('.')
        if len(domains) > 1:
            self.short_names.append(domains[0])
        else:
            self.short_names.append(hostname_string)
        if len(domains) > 3:
            # make sure we implicitly get example.com if the system's hostname
            # is something like foo.bar.example.com
            high_domain = '.'.join(domains[-2:])
            self.mapping.add(high_domain)
        self.mapping.add(hostname_string)

    def parse_line(self, line):
        """Override the default parse_line() method to also check for the
        shortname of the host derived from the hostname.
        """
        count = 0
        line, count = super(SoSHostnameParser, self).parse_line(line)
        for short_name in self.short_names:
            if short_name in line:
                count += 1
                line = line.replace(short_name, self.mapping.get(short_name))
        return line, count
Example #2
0
class CleanerMapTests(unittest.TestCase):

    def setUp(self):
        self.mac_map = SoSMacMap()
        self.ip_map = SoSIPMap()
        self.host_map = SoSHostnameMap()
        self.host_map.load_domains_from_options(['redhat.com'])
        self.kw_map = SoSKeywordMap()

    def test_mac_map_obfuscate_valid_v4(self):
        _test = self.mac_map.get('12:34:56:78:90:ab')
        self.assertNotEqual(_test, '12:34:56:78:90:ab')

    def test_mac_map_obfuscate_valid_v6(self):
        _test = self.mac_map.get('12:34:56:ff:fe:78:90:ab')
        self.assertNotEqual(_test, '12:34:56:ff:fe:78:90:ab')

    def test_mac_map_obfuscate_valid_v6_quad(self):
        _test = self.mac_map.get('1234:56ff:fe78:90ab')
        self.assertNotEqual(_test, '1234:56ff:fe78:90ab')

    def test_mac_map_skip_ignores(self):
        _test = self.mac_map.get('ff:ff:ff:ff:ff:ff')
        self.assertEquals(_test, 'ff:ff:ff:ff:ff:ff')

    def test_mac_map_avoid_duplicate_obfuscation(self):
        _test = self.mac_map.get('ab:cd:ef:fe:dc:ba')
        _dup = self.mac_map.get(_test)
        self.assertEquals(_test, _dup)

    def test_ip_map_obfuscate_v4_with_cidr(self):
        _test = self.ip_map.get('192.168.1.0/24')
        self.assertNotEqual(_test, '192.168.1.0/24')

    def test_ip_map_obfuscate_no_cidr(self):
        _test = self.ip_map.get('192.168.2.2')
        self.assertNotEqual(_test, '192.168.2.2')

    def test_ip_map_obfuscate_same_subnet(self):
        _net = ip_interface(self.ip_map.get('192.168.3.0/24'))
        _test = ip_interface(self.ip_map.get('192.168.3.1'))
        self.assertTrue(_test.ip in _net.network)

    def test_ip_map_get_same_with_or_without_cidr(self):
        _hostwsub = self.ip_map.get('192.168.4.1/24')
        _hostnosub = self.ip_map.get('192.168.4.1')
        self.assertEqual(_hostwsub.split('/')[0], _hostnosub)

    def test_ip_skip_ignores(self):
        _test = self.ip_map.get('127.0.0.1')
        self.assertEquals(_test, '127.0.0.1')

    def test_hostname_obfuscate_domain_options(self):
        _test = self.host_map.get('www.redhat.com')
        self.assertNotEqual(_test, 'www.redhat.com')

    def test_hostname_obfuscate_same_item(self):
        _test1 = self.host_map.get('example.redhat.com')
        _test2 = self.host_map.get('example.redhat.com')
        self.assertEqual(_test1, _test2)

    def test_hostname_obfuscate_just_domain(self):
        _test = self.host_map.get('redhat.com')
        self.assertEqual(_test, 'obfuscateddomain0.com')

    def test_hostname_no_obfuscate_non_loaded_domain(self):
        _test = self.host_map.get('foobar.com')
        self.assertEqual(_test, 'foobar.com')

    def test_hostname_no_obfuscate_non_loaded_fqdn(self):
        _test = self.host_map.get('example.foobar.com')
        self.assertEqual(_test, 'example.foobar.com')

    def test_keyword_single(self):
        _test = self.kw_map.get('foobar')
        self.assertEqual(_test, 'obfuscatedword0')
Example #3
0
class SoSHostnameParser(SoSCleanerParser):

    name = 'Hostname Parser'
    map_file_key = 'hostname_map'
    regex_patterns = [
        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
    ]

    def __init__(self, config, opt_domains=None):
        self.mapping = SoSHostnameMap()
        super(SoSHostnameParser, self).__init__(config)
        self.mapping.load_domains_from_map()
        self.mapping.load_domains_from_options(opt_domains)
        self.short_names = []
        self.load_short_names_from_mapping()
        self.mapping.set_initial_counts()

    def load_short_names_from_mapping(self):
        """When we load the mapping file into the hostname map, we have to do
        some dancing to get those loaded properly into the "intermediate" dicts
        that the map uses to hold hosts and domains. Similarly, we need to also
        extract shortnames known to the map here.
        """
        for hname in self.mapping.dataset.keys():
            if len(hname.split('.')) == 1:
                # we have a short name only with no domain
                if hname not in self.short_names:
                    self.short_names.append(hname)

    def load_hostname_into_map(self, hostname_string):
        """Force add the domainname found in /sos_commands/host/hostname into
        the map. We have to do this here since the normal map prep approach
        from the parser would be ignored since the system's hostname is not
        guaranteed
        """
        if 'localhost' in hostname_string:
            return
        domains = hostname_string.split('.')
        if len(domains) > 1:
            self.short_names.append(domains[0])
        else:
            self.short_names.append(hostname_string)
        if len(domains) > 3:
            # make sure we implicitly get example.com if the system's hostname
            # is something like foo.bar.example.com
            high_domain = '.'.join(domains[-2:])
            self.mapping.add(high_domain)
        self.mapping.add(hostname_string)

    def parse_line(self, line):
        """Override the default parse_line() method to also check for the
        shortname of the host derived from the hostname.
        """

        def _check_line(ln, count, search, repl=None):
            """Perform a second manual check for substrings that may have been
            missed by regex matching
            """
            if search in self.mapping.skip_keys:
                return ln, count
            if search in ln:
                count += ln.count(search)
                ln = ln.replace(search, self.mapping.get(repl or search))
            return ln, count

        count = 0
        line, count = super(SoSHostnameParser, self).parse_line(line)
        # make an additional pass checking for '_' formatted substrings that
        # the regex patterns won't catch
        hosts = [h for h in self.mapping.dataset.keys() if '.' in h]
        for host in sorted(hosts, reverse=True, key=lambda x: len(x)):
            fqdn = host
            for c in '.-':
                fqdn = fqdn.replace(c, '_')
            line, count = _check_line(line, count, fqdn, host)

        for short_name in sorted(self.short_names, reverse=True):
            line, count = _check_line(line, count, short_name)

        return line, count
Example #4
0
class SoSHostnameParser(SoSCleanerParser):

    name = 'Hostname Parser'
    map_file_key = 'hostname_map'
    regex_patterns = [
        r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))'
    ]

    def __init__(self, config, opt_domains=None):
        self.mapping = SoSHostnameMap()
        super(SoSHostnameParser, self).__init__(config)
        self.mapping.load_domains_from_map()
        self.mapping.load_domains_from_options(opt_domains)
        self.short_names = []
        self.load_short_names_from_mapping()
        self.mapping.set_initial_counts()

    def load_short_names_from_mapping(self):
        """When we load the mapping file into the hostname map, we have to do
        some dancing to get those loaded properly into the "intermediate" dicts
        that the map uses to hold hosts and domains. Similarly, we need to also
        extract shortnames known to the map here.
        """
        for hname in self.mapping.dataset.keys():
            if len(hname.split('.')) == 1:
                # we have a short name only with no domain
                if hname not in self.short_names:
                    self.short_names.append(hname)

    def load_hostname_into_map(self, hostname_string):
        """Force add the domainname found in /sos_commands/host/hostname into
        the map. We have to do this here since the normal map prep approach
        from the parser would be ignored since the system's hostname is not
        guaranteed
        """
        if 'localhost' in hostname_string:
            return
        domains = hostname_string.split('.')
        if len(domains) > 1:
            self.short_names.append(domains[0])
        else:
            self.short_names.append(hostname_string)
        if len(domains) > 3:
            # make sure we implicitly get example.com if the system's hostname
            # is something like foo.bar.example.com
            high_domain = '.'.join(domains[-2:])
            self.mapping.add(high_domain)
        self.mapping.add(hostname_string)

    def load_hostname_from_etc_hosts(self, content):
        """Parse an archive's copy of /etc/hosts, which requires handling that
        is separate from the output of the `hostname` command. Just like
        load_hostname_into_map(), this has to be done explicitly and we
        cannot rely upon the more generic methods to do this reliably.
        """
        lines = content.splitlines()
        for line in lines:
            if line.startswith('#') or 'localhost' in line:
                continue
            hostln = line.split()[1:]
            for host in hostln:
                if len(host.split('.')) == 1:
                    # only generate a mapping for fqdns but still record the
                    # short name here for later obfuscation with parse_line()
                    self.short_names.append(host)
                    self.mapping.add_regex_item(host)
                else:
                    self.mapping.add(host)