def load(cls, filename):
     '''
     Load Public Suffix List from the given filename.
     '''
     rules = []
     part = None
     with open(filename, 'rb') as content_file:
         content = content_file.read().decode('utf-8')
     for line in content.splitlines():
         line = line.strip()
         if line.startswith('//') or not line:
             m = _BEGIN_SUBSET_MATCHER.search(line)
             if m:
                 part = m.group(1).lower()
             m = _END_SUBSET_MATCHER.search(line)
             if m:
                 part = None
             continue
         if part is None:
             raise Exception(
                 'Internal error: found PSL entry with no part!')
         exception_rule = False
         if line.startswith('!'):
             exception_rule = True
             line = line[1:]
         if line.startswith('.'):
             line = line[1:]
         labels = tuple(
             normalize_label(label) for label in split_into_labels(line)[0])
         rules.append(
             PublicSuffixEntry(labels,
                               exception_rule=exception_rule,
                               part=part))
     return cls(rules)
    def get_registrable_domain(self,
                               domain,
                               keep_unknown_suffix=True,
                               only_if_registerable=True,
                               normalize_result=False,
                               icann_only=False):
        '''
        Given a domain name, extracts the registrable domain. This is the public suffix
        including the last label before the suffix.

        If ``keep_unknown_suffix`` is set to ``False``, only suffixes matching explicit
        entries from the PSL are returned. If no suffix can be found, ``''`` is returned.
        If ``keep_unknown_suffix`` is ``True`` (default), the implicit ``*`` rule is used
        if no other rule matches.

        If ``only_if_registerable`` is set to ``False``, the public suffix is returned
        if there is no label before the suffix. If ``only_if_registerable`` is ``True``
        (default), ``''`` is returned in that case.

        If ``normalize_result`` is set to ``True``, the result is re-combined form the
        normalized labels. In that case, the result is lower-case ASCII. If
        ``normalize_result`` is ``False`` (default), the result ``result`` always satisfies
        ``domain.endswith(result)``.

        If ``icann_only`` is set to ``True``, only official ICANN rules are used. If
        ``icann_only`` is ``False`` (default), also private rules are used.
        '''
        # Split into labels and normalize
        try:
            labels, tail = split_into_labels(domain)
            normalized_labels = [normalize_label(label) for label in labels]
        except InvalidDomainName:
            return ''
        if normalize_result:
            labels = normalized_labels

        # Get suffix length
        suffix_length, rule = self.get_suffix_length_and_rule(
            normalized_labels, icann_only=icann_only)
        if rule is None:
            return ''
        if not keep_unknown_suffix and rule is self._generic_rule:
            return ''
        if suffix_length < len(labels):
            suffix_length += 1
        elif only_if_registerable:
            return ''
        return '.'.join(reversed(labels[:suffix_length])) + tail
Beispiel #3
0
def test_join_labels(labels, tail, result):
    domain = join_labels(labels, tail)
    assert domain == result
    _labels, _tail = split_into_labels(domain)
    assert _labels == labels
    assert _tail == tail
Beispiel #4
0
def test_split_into_labels_errors(domain):
    with pytest.raises(InvalidDomainName):
        split_into_labels(domain)
Beispiel #5
0
def test_split_into_labels(domain, labels, tail):
    _labels, _tail = split_into_labels(domain)
    assert _labels == labels
    assert _tail == tail
    assert join_labels(_labels, _tail) == domain
Beispiel #6
0
def normalize_dns_name(name):
    if name is None:
        return name
    labels, dummy = split_into_labels(name)
    return join_labels([normalize_label(label) for label in labels])