class SoSHostnameParser(SoSCleanerParser): name = 'Hostname Parser' map_file_key = 'hostname_map' prep_map_file = 'sos_commands/host/hostname' regex_patterns = [r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))'] def __init__(self, conf_file=None, opt_domains=None): self.mapping = SoSHostnameMap() super(SoSHostnameParser, self).__init__(conf_file) self.mapping.load_domains_from_map() self.mapping.load_domains_from_options(opt_domains) self.short_names = [] self.load_short_names_from_mapping() self.mapping.set_initial_counts() def load_short_names_from_mapping(self): """When we load the mapping file into the hostname map, we have to do some dancing to get those loaded properly into the "intermediate" dicts that the map uses to hold hosts and domains. Similarly, we need to also extract shortnames known to the map here. """ for hname in self.mapping.dataset.keys(): if len(hname.split('.')) == 1: # we have a short name only with no domain if hname not in self.short_names: self.short_names.append(hname) def load_hostname_into_map(self, hostname_string): """Force add the domainname found in /sos_commands/host/hostname into the map. We have to do this here since the normal map prep approach from the parser would be ignored since the system's hostname is not guaranteed """ if 'localhost' in hostname_string: return domains = hostname_string.split('.') if len(domains) > 1: self.short_names.append(domains[0]) else: self.short_names.append(hostname_string) if len(domains) > 3: # make sure we implicitly get example.com if the system's hostname # is something like foo.bar.example.com high_domain = '.'.join(domains[-2:]) self.mapping.add(high_domain) self.mapping.add(hostname_string) def parse_line(self, line): """Override the default parse_line() method to also check for the shortname of the host derived from the hostname. """ count = 0 line, count = super(SoSHostnameParser, self).parse_line(line) for short_name in self.short_names: if short_name in line: count += 1 line = line.replace(short_name, self.mapping.get(short_name)) return line, count
class SoSHostnameParser(SoSCleanerParser): name = 'Hostname Parser' map_file_key = 'hostname_map' prep_map_file = 'sos_commands/host/hostname' regex_patterns = [ r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}\b))' ] def __init__(self, conf_file=None, opt_domains=None): self.mapping = SoSHostnameMap(opt_domains) self.short_names = [] super(SoSHostnameParser, self).__init__(conf_file) def load_hostname_into_map(self, hostname_string): """Force add the domainname found in /sos_commands/host/hostname into the map. We have to do this here since the normal map prep approach from the parser would be ignored since the system's hostname is not guaranteed """ if 'localhost' in hostname_string: return domains = hostname_string.split('.') if len(domains) > 1: self.short_names.append(domains[0]) else: self.short_names.append(hostname_string) if len(domains) > 3: # make sure we implicitly get example.com if the system's hostname # is something like foo.bar.example.com high_domain = '.'.join(domains[-2:]) self.mapping.add(high_domain) self.mapping.add(hostname_string) def parse_line(self, line): """Override the default parse_line() method to also check for the shortname of the host derived from the hostname. """ count = 0 line, count = super(SoSHostnameParser, self).parse_line(line) for short_name in self.short_names: if short_name in line: count += 1 line = line.replace(short_name, self.mapping.get(short_name)) return line, count
class SoSHostnameParser(SoSCleanerParser): name = 'Hostname Parser' map_file_key = 'hostname_map' regex_patterns = [ r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' ] def __init__(self, config, opt_domains=None): self.mapping = SoSHostnameMap() super(SoSHostnameParser, self).__init__(config) self.mapping.load_domains_from_map() self.mapping.load_domains_from_options(opt_domains) self.short_names = [] self.load_short_names_from_mapping() self.mapping.set_initial_counts() def load_short_names_from_mapping(self): """When we load the mapping file into the hostname map, we have to do some dancing to get those loaded properly into the "intermediate" dicts that the map uses to hold hosts and domains. Similarly, we need to also extract shortnames known to the map here. """ for hname in self.mapping.dataset.keys(): if len(hname.split('.')) == 1: # we have a short name only with no domain if hname not in self.short_names: self.short_names.append(hname) def load_hostname_into_map(self, hostname_string): """Force add the domainname found in /sos_commands/host/hostname into the map. We have to do this here since the normal map prep approach from the parser would be ignored since the system's hostname is not guaranteed """ if 'localhost' in hostname_string: return domains = hostname_string.split('.') if len(domains) > 1: self.short_names.append(domains[0]) else: self.short_names.append(hostname_string) if len(domains) > 3: # make sure we implicitly get example.com if the system's hostname # is something like foo.bar.example.com high_domain = '.'.join(domains[-2:]) self.mapping.add(high_domain) self.mapping.add(hostname_string) def parse_line(self, line): """Override the default parse_line() method to also check for the shortname of the host derived from the hostname. """ def _check_line(ln, count, search, repl=None): """Perform a second manual check for substrings that may have been missed by regex matching """ if search in self.mapping.skip_keys: return ln, count if search in ln: count += ln.count(search) ln = ln.replace(search, self.mapping.get(repl or search)) return ln, count count = 0 line, count = super(SoSHostnameParser, self).parse_line(line) # make an additional pass checking for '_' formatted substrings that # the regex patterns won't catch hosts = [h for h in self.mapping.dataset.keys() if '.' in h] for host in sorted(hosts, reverse=True, key=lambda x: len(x)): fqdn = host for c in '.-': fqdn = fqdn.replace(c, '_') line, count = _check_line(line, count, fqdn, host) for short_name in sorted(self.short_names, reverse=True): line, count = _check_line(line, count, short_name) return line, count
class SoSHostnameParser(SoSCleanerParser): name = 'Hostname Parser' map_file_key = 'hostname_map' regex_patterns = [ r'(((\b|_)[a-zA-Z0-9-\.]{1,200}\.[a-zA-Z]{1,63}(\b|_)))' ] def __init__(self, config, opt_domains=None): self.mapping = SoSHostnameMap() super(SoSHostnameParser, self).__init__(config) self.mapping.load_domains_from_map() self.mapping.load_domains_from_options(opt_domains) self.short_names = [] self.load_short_names_from_mapping() self.mapping.set_initial_counts() def load_short_names_from_mapping(self): """When we load the mapping file into the hostname map, we have to do some dancing to get those loaded properly into the "intermediate" dicts that the map uses to hold hosts and domains. Similarly, we need to also extract shortnames known to the map here. """ for hname in self.mapping.dataset.keys(): if len(hname.split('.')) == 1: # we have a short name only with no domain if hname not in self.short_names: self.short_names.append(hname) def load_hostname_into_map(self, hostname_string): """Force add the domainname found in /sos_commands/host/hostname into the map. We have to do this here since the normal map prep approach from the parser would be ignored since the system's hostname is not guaranteed """ if 'localhost' in hostname_string: return domains = hostname_string.split('.') if len(domains) > 1: self.short_names.append(domains[0]) else: self.short_names.append(hostname_string) if len(domains) > 3: # make sure we implicitly get example.com if the system's hostname # is something like foo.bar.example.com high_domain = '.'.join(domains[-2:]) self.mapping.add(high_domain) self.mapping.add(hostname_string) def load_hostname_from_etc_hosts(self, content): """Parse an archive's copy of /etc/hosts, which requires handling that is separate from the output of the `hostname` command. Just like load_hostname_into_map(), this has to be done explicitly and we cannot rely upon the more generic methods to do this reliably. """ lines = content.splitlines() for line in lines: if line.startswith('#') or 'localhost' in line: continue hostln = line.split()[1:] for host in hostln: if len(host.split('.')) == 1: # only generate a mapping for fqdns but still record the # short name here for later obfuscation with parse_line() self.short_names.append(host) self.mapping.add_regex_item(host) else: self.mapping.add(host)