Python PublicSuffixList.privatesuffixの例、publicsuffixlist.PublicSuffixList.privatesuffix Pythonの例

コード例 #1

0

ファイルを表示

ファイル: login.py プロジェクト: Weasyl/weasyl

def is_email_blacklisted(address):
    """
    Determines if a supplied email address is present in the 'emailblacklist' table.
    Parameters:
        address: The email address to split out the domain from.
    Returns:
        Boolean True if present on the blacklist, or False otherwise.
    """
    _, domain = address.rsplit("@", 1)
    psl = PublicSuffixList()
    private_suffix = psl.privatesuffix(domain=domain)

    # Check the disposable email address list
    disposable_domains = _retrieve_disposable_email_domains()
    if private_suffix in disposable_domains:
        return True

    # Check the explicitly defined/blacklisted domains.
    blacklisted_domains = d.engine.execute("""
        SELECT domain_name
        FROM emailblacklist
    """).fetchall()
    for site in blacklisted_domains:
        if private_suffix == site['domain_name']:
            return True

    # If we get here, the domain (or subdomain) is not blacklisted
    return False

コード例 #2

0

ファイルを表示

def is_email_blacklisted(address):
    """
    Determines if a supplied email address is present in the 'emailblacklist' table.
    Parameters:
        address: The email address to split out the domain from.
    Returns:
        Boolean True if present on the blacklist, or False otherwise.
    """
    _, domain = address.rsplit("@", 1)
    psl = PublicSuffixList()
    private_suffix = psl.privatesuffix(domain=domain)

    # Check the disposable email address list
    disposable_domains = _retrieve_disposable_email_domains()
    if private_suffix in disposable_domains:
        return True

    # Check the explicitly defined/blacklisted domains.
    blacklisted_domains = d.engine.execute("""
        SELECT domain_name
        FROM emailblacklist
    """).fetchall()
    for site in blacklisted_domains:
        if private_suffix == site['domain_name']:
            return True

    # If we get here, the domain (or subdomain) is not blacklisted
    return False

コード例 #3

0

ファイルを表示

ファイル: spider.py プロジェクト: Catcherman/ark

 def _check_same_origin(self, current_url):
     '''
     检查两个URL是否同源
     '''
     current_url = to_unicode(current_url)
     url_part = urlparse.urlparse(current_url)
     #url_part_list=url_part.netloc.split('.')
     psl2 = PublicSuffixList()
     url_origin = psl2.privatesuffix(url_part.netloc)
     return url_origin == self.origin

コード例 #4

0

ファイルを表示

ファイル: spider.py プロジェクト: Catcherman/ark

    def feed_url(self, url):
        '''
        设置初始爬取URL
        '''
        if isinstance(url, basestring):
            url = to_unicode(url)
            url = UrlData(url)

        if self.same_origin:
            url_part = urlparse.urlparse(unicode(url))
            psl = PublicSuffixList()
            self.origin = psl.privatesuffix(url_part.netloc)

        self.fetcher_queue.put(url, block=True)

コード例 #5

0

ファイルを表示

ファイル: login.py プロジェクト: guptaarth87/weasyl

def is_email_blacklisted(address):
    """
    Determines if a supplied email address is present in the 'emailblacklist' table.
    Parameters:
        address: The email address to split out the domain from.
    Returns:
        Boolean True if present on the blacklist, or False otherwise.
    """
    _, domain = address.rsplit("@", 1)
    psl = PublicSuffixList()
    private_suffix = psl.privatesuffix(domain=domain)

    # Check the disposable email address list
    if private_suffix in DISPOSABLE_DOMAINS:
        return True

    # Check the explicitly defined/blacklisted domains.
    return d.engine.scalar(
        "SELECT EXISTS (SELECT FROM emailblacklist WHERE domain_name = %(domain)s)",
        domain=private_suffix,
    )

コード例 #6

0

ファイルを表示

ファイル: expirement.py プロジェクト: yandingkui/Pontus

def MY_expirement_process(root_dir="/home/yandingkui/dga_detection/result_data/", m_file="split_AGDs",
                          benign_file="split_benign_ac.json", n=815, m=10, c='entropy'):
    psl=PublicSuffixList()
    with open(root_dir + m_file, "r") as f:
        malicious_data = json.loads(f.read())

    with open(root_dir + benign_file, "r") as f:
        benign_data = json.loads(f.read())

    train_domains = []
    train_labels = []
    pred_domains = []
    pred_labels = []
    for k, v in malicious_data.items():
        for d in v[0]:
            d_split = d[:d.index(psl.publicsuffix(d)) - 1].split(".")
            if len(d_split) == 1:
                train_domains.append(d_split[0])
            else:
                m = 0
                lm = None
                for l in d_split:
                    if len(l) > m:
                        lm = l
                train_domains.append(lm)
            train_labels.append(1)
        for d in v[1]:
            pred_domains.append(d)
            pred_labels.append(1)

    for d in benign_data.get("train"):
        pri_d=psl.privatesuffix(d)
        lm=pri_d[:pri_d.index(psl.publicsuffix(pri_d))-1]
        train_domains.append(lm)
        train_labels.append(0)
    for d in benign_data.get("pred"):
        pred_domains.append(d)
        pred_labels.append(0)

    train_features = char_feature.extract_all_features(train_domains)

    index = list(range(len(train_domains)))
    random.shuffle(index)

    real_train_features = []
    real_train_labels = []
    for i in index:
        real_train_features.append(train_features[i])
        real_train_labels.append(train_labels[i])

    # clf = RandomForestClassifier(n_estimators=800, random_state=0)
    # {'criterion': 'entropy', 'max_features': 14, 'n_estimators': 820, 'random_state': 0}
    clf = RandomForestClassifier(n_estimators=n, max_features=m, criterion=c, random_state=0)
    # print("features")
    # n_es_list=range(750,850,5)
    # max_fea_list=range(10,30,2)
    # tuned_parameters = [{'n_estimators':n_es_list , 'random_state': [0],'max_features': max_fea_list,'criterion':["gini","entropy"]}]

    # clf = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=5,scoring='accuracy',n_jobs=30)

    clf.fit(real_train_features, real_train_labels)
    # print("best_params:")
    # print(clf.best_params_)
    print("Pontus:feature_importance_")
    im=clf.feature_importances_
    feature_items=[]
    for i in range(len(im)):
        feature_items.append((i+1,im[i]))
    feature_items.sort(key=takeSecond,reverse=True)
    print(feature_items)

コード例 #7

0

ファイルを表示

# -*- coding:utf-8 -*-
__author__ = '*****@*****.**'
from publicsuffixlist import PublicSuffixList

cn = set()
alexa = set()
psl = PublicSuffixList()
with open("alexa-top-1m.csv") as f:
    for i in xrange(0, 1000000):  #此处可以控制Alexa排名选择个数
        domain = f.readline().strip().split(',')[1]
        domain_2ld = psl.privatesuffix(domain)
        if domain_2ld is None:
            alexa.add(domain)
        else:
            alexa.add(domain_2ld)
with open("result/chinaz_top_domains.txt") as f:
    for line in f:
        domain = line.strip()
        domain_2ld = psl.privatesuffix(domain)
        if domain_2ld is None:
            cn.add(domain)
        else:
            cn.add(domain_2ld)
with open("whitedomains.txt", 'w') as f:
    unionset = cn | alexa
    for domain in unionset:
        try:
            f.write(domain.strip() + '\n')
        except:
            print "An Error Occured, program continue......"
            continue

コード例 #8

0

ファイルを表示

ファイル: content.py プロジェクト: digideskio/http-observatory

def subresource_integrity(reqs: dict, expectation='sri-implemented-and-external-scripts-loaded-securely') -> dict:
    """
    :param reqs: dictionary containing all the request and response objects
    :param expectation: test expectation
        sri-implemented-and-all-scripts-loaded-securely: all same origin, and uses SRI
        sri-implemented-and-external-scripts-loaded-securely: integrity attribute exists on all external scripts,
          and scripts loaded [default for HTML]
        sri-implemented-but-external-scripts-not-loaded-securely: SRI implemented, but with scripts loaded over HTTP
        sri-not-implemented-but-external-scripts-loaded-securely: SRI isn't implemented,
          but all scripts are loaded over HTTPS
        sri-not-implemented-and-external-scripts-not-loaded-securely: SRI isn't implemented,
          and scripts are downloaded over HTTP
        sri-not-implemented-but-all-scripts-loaded-from-secure-origin: SRI isn't implemented,
          but all scripts come from secure origins (self)
        sri-not-implemented-but-no-scripts-loaded: SRI isn't implemented, because the page doesn't load any scripts
        sri-not-implemented-response-not-html: SRI isn't needed, because the page isn't HTML [default for non-HTML]
        request-did-not-return-status-code-200: Only look for SRI on pages that returned 200, not things like 404s
        html-not-parsable: Can't parse the page's content
    :return: dictionary with:
        data: all external scripts and their integrity / crossorigin attributes
        expectation: test expectation
        pass: whether the site's external scripts met expectations
        result: short string describing the result of the test
    """
    output = {
        'data': {},
        'expectation': expectation,
        'pass': False,
        'result': None,
    }
    response = reqs['responses']['auto']

    # The order of how "good" the results are
    goodness = ['sri-implemented-and-all-scripts-loaded-securely',
                'sri-implemented-and-external-scripts-loaded-securely',
                'sri-implemented-but-external-scripts-not-loaded-securely',
                'sri-not-implemented-but-external-scripts-loaded-securely',
                'sri-not-implemented-and-external-scripts-not-loaded-securely',
                'sri-not-implemented-response-not-html']

    # If the response to get / fails
    if response.status_code != 200:
        output['result'] = 'request-did-not-return-status-code-200'

    # If the content isn't HTML, there's no scripts to load; this is okay
    elif response.headers.get('Content-Type', '').split(';')[0] not in ('text/html', 'application/xhtml+xml'):
        output['result'] = 'sri-not-implemented-response-not-html'

    else:
        # Try to parse the HTML
        try:
            soup = bs(reqs['resources']['/'], 'html.parser')
        except:
            output['result'] = 'html-not-parsable'
            return output

        # Track to see if any scripts were on foreign TLDs
        scripts_on_foreign_origin = False

        # Get all the scripts
        scripts = soup.find_all('script')
        for script in scripts:
            if script.has_attr('src'):
                # Script tag parameters
                src = urlparse(script['src'])
                integrity = script.get('integrity')
                crossorigin = script.get('crossorigin')

                # Check to see if they're on the same second-level domain
                # TODO: update the PSL list on startup
                psl = PublicSuffixList()
                samesld = True if (psl.privatesuffix(urlparse(response.url).netloc) ==
                                   psl.privatesuffix(src.netloc)) else False

                # Check to see if it's the same origin or second-level domain
                if src.netloc == '' or samesld:
                    secureorigin = True
                elif src.netloc != '' and '.' not in src.netloc:  # like localhost
                    secureorigin = False
                    scripts_on_foreign_origin = True
                else:
                    secureorigin = False
                    scripts_on_foreign_origin = True

                # See if it's a secure scheme
                if src.scheme == 'https' or (src.scheme == '' and urlparse(response.url).scheme == 'https'):
                    securescheme = True
                else:
                    securescheme = False

                # Add it to the scripts data result, if it's not a relative URI
                if not secureorigin:
                    output['data'][script['src']] = {
                                                        'crossorigin': crossorigin,
                                                        'integrity': integrity
                                                    }

                    if integrity and not securescheme:
                        output['result'] = only_if_worse('sri-implemented-but-external-scripts-not-loaded-securely',
                                                         output['result'],
                                                         goodness)
                    elif not integrity and securescheme:
                        output['result'] = only_if_worse('sri-not-implemented-but-external-scripts-loaded-securely',
                                                         output['result'],
                                                         goodness)
                    elif not integrity and not securescheme:
                        output['result'] = only_if_worse('sri-not-implemented-and-external-scripts'
                                                         '-not-loaded-securely',
                                                         output['result'],
                                                         goodness)

                # Grant bonus even if they use SRI on the same origin
                else:
                    if integrity and securescheme and not output['result']:
                        output['result'] = 'sri-implemented-and-all-scripts-loaded-securely'

        # If the page doesn't load any scripts
        if not scripts:
            output['result'] = 'sri-not-implemented-but-no-scripts-loaded'

        # If all the scripts are loaded from a secure origin, not triggering a need for SRI
        elif scripts and not scripts_on_foreign_origin and not output['result']:
            output['result'] = 'sri-not-implemented-but-all-scripts-loaded-from-secure-origin'

        # If the page loaded from a foreign origin, but everything included SRI
        elif scripts and scripts_on_foreign_origin and not output['result']:
            output['result'] = only_if_worse('sri-implemented-and-external-scripts-loaded-securely',
                                             output['result'],
                                             goodness)

    # Code defensively on the size of the data
    output['data'] = output['data'] if len(str(output['data'])) < 32768 else {}

    # Check to see if the test passed or failed
    if output['result'] in ('sri-implemented-and-all-scripts-loaded-securely',
                            'sri-implemented-and-external-scripts-loaded-securely',
                            'sri-not-implemented-response-not-html',
                            'sri-not-implemented-but-all-scripts-loaded-from-secure-origin',
                            'sri-not-implemented-but-no-scripts-loaded',
                            expectation):
        output['pass'] = True

    return output

コード例 #9

0

ファイルを表示

def subresource_integrity(reqs: dict, expectation='sri-implemented-and-external-scripts-loaded-securely') -> dict:
    """
    :param reqs: dictionary containing all the request and response objects
    :param expectation: test expectation
        sri-implemented-and-all-scripts-loaded-securely: all same origin, and uses SRI
        sri-implemented-and-external-scripts-loaded-securely: integrity attribute exists on all external scripts,
          and scripts loaded [default for HTML]
        sri-implemented-but-external-scripts-not-loaded-securely: SRI implemented, but with scripts loaded over HTTP
        sri-not-implemented-but-external-scripts-loaded-securely: SRI isn't implemented,
          but all scripts are loaded over HTTPS
        sri-not-implemented-and-external-scripts-not-loaded-securely: SRI isn't implemented,
          and scripts are downloaded over HTTP
        sri-not-implemented-but-all-scripts-loaded-from-secure-origin: SRI isn't implemented,
          but all scripts come from secure origins (self)
        sri-not-implemented-but-no-scripts-loaded: SRI isn't implemented, because the page doesn't load any scripts
        sri-not-implemented-response-not-html: SRI isn't needed, because the page isn't HTML [default for non-HTML]
        request-did-not-return-status-code-200: Only look for SRI on pages that returned 200, not things like 404s
        html-not-parsable: Can't parse the page's content
    :return: dictionary with:
        data: all external scripts and their integrity / crossorigin attributes
        expectation: test expectation
        pass: whether the site's external scripts met expectations
        result: short string describing the result of the test
    """
    output = {
        'data': {},
        'expectation': expectation,
        'pass': False,
        'result': None,
    }
    response = reqs['responses']['auto']

    # The order of how "good" the results are
    goodness = ['sri-implemented-and-all-scripts-loaded-securely',
                'sri-implemented-and-external-scripts-loaded-securely',
                'sri-implemented-but-external-scripts-not-loaded-securely',
                'sri-not-implemented-but-external-scripts-loaded-securely',
                'sri-not-implemented-and-external-scripts-not-loaded-securely',
                'sri-not-implemented-response-not-html']

    # If the content isn't HTML, there's no scripts to load; this is okay
    if response.headers.get('Content-Type', '').split(';')[0] not in HTML_TYPES:
        output['result'] = 'sri-not-implemented-response-not-html'

    else:
        # Try to parse the HTML
        try:
            soup = bs(reqs['resources']['__path__'], 'html.parser')
        except:
            output['result'] = 'html-not-parsable'
            return output

        # Track to see if any scripts were on foreign TLDs
        scripts_on_foreign_origin = False

        # Get all the scripts
        scripts = soup.find_all('script')
        for script in scripts:
            if script.has_attr('src'):
                # Script tag parameters
                src = urlparse(script['src'])
                integrity = script.get('integrity')
                crossorigin = script.get('crossorigin')

                # Check to see if they're on the same second-level domain
                # TODO: update the PSL list on startup
                psl = PublicSuffixList()
                samesld = True if (psl.privatesuffix(urlparse(response.url).netloc) ==
                                   psl.privatesuffix(src.netloc)) else False

                if src.scheme == '':
                    if src.netloc == '':
                        # Relative URL (src="/path")
                        relativeorigin = True
                        relativeprotocol = False
                    else:
                        # Relative protocol (src="//host/path")
                        relativeorigin = False
                        relativeprotocol = True
                else:
                    relativeorigin = False
                    relativeprotocol = False

                # Check to see if it's the same origin or second-level domain
                if relativeorigin or (samesld and not relativeprotocol):
                    secureorigin = True
                else:
                    secureorigin = False
                    scripts_on_foreign_origin = True

                # See if it's a secure scheme
                if src.scheme == 'https' or (relativeorigin and urlparse(response.url).scheme == 'https'):
                    securescheme = True
                else:
                    securescheme = False

                # Add it to the scripts data result, if it's not a relative URI
                if not secureorigin:
                    output['data'][script['src']] = {
                        'crossorigin': crossorigin,
                        'integrity': integrity
                    }

                    if integrity and not securescheme:
                        output['result'] = only_if_worse('sri-implemented-but-external-scripts-not-loaded-securely',
                                                         output['result'],
                                                         goodness)
                    elif not integrity and securescheme:
                        output['result'] = only_if_worse('sri-not-implemented-but-external-scripts-loaded-securely',
                                                         output['result'],
                                                         goodness)
                    elif not integrity and not securescheme and samesld:
                        output['result'] = only_if_worse('sri-not-implemented-and-external-scripts'
                                                         '-not-loaded-securely',
                                                         output['result'],
                                                         goodness)
                    elif not integrity and not securescheme:
                        output['result'] = only_if_worse('sri-not-implemented-and-external-scripts'
                                                         '-not-loaded-securely',
                                                         output['result'],
                                                         goodness)

                # Grant bonus even if they use SRI on the same origin
                else:
                    if integrity and securescheme and not output['result']:
                        output['result'] = 'sri-implemented-and-all-scripts-loaded-securely'

        # If the page doesn't load any scripts
        if not scripts:
            output['result'] = 'sri-not-implemented-but-no-scripts-loaded'

        # If all the scripts are loaded from a secure origin, not triggering a need for SRI
        elif scripts and not scripts_on_foreign_origin and not output['result']:
            output['result'] = 'sri-not-implemented-but-all-scripts-loaded-from-secure-origin'

        # If the page loaded from a foreign origin, but everything included SRI
        elif scripts and scripts_on_foreign_origin and not output['result']:
            output['result'] = only_if_worse('sri-implemented-and-external-scripts-loaded-securely',
                                             output['result'],
                                             goodness)

    # Code defensively on the size of the data
    output['data'] = output['data'] if len(str(output['data'])) < 32768 else {}

    # Check to see if the test passed or failed
    if output['result'] in ('sri-implemented-and-all-scripts-loaded-securely',
                            'sri-implemented-and-external-scripts-loaded-securely',
                            'sri-not-implemented-response-not-html',
                            'sri-not-implemented-but-all-scripts-loaded-from-secure-origin',
                            'sri-not-implemented-but-no-scripts-loaded',
                            expectation):
        output['pass'] = True

    return output

コード例 #10

0

ファイルを表示

def scan(session: Session):
    reporter.register_data("url", session.url)
    reporter.register_data("domain", session.domain)

    # check to see if this is an IP, if so, bail out
    if utils.is_ip(session.domain):
        return

    output.empty()
    output.norm("DNS Information:")

    # get the root domain, by looking up via the PSL
    psl = PublicSuffixList()
    root_domain = psl.privatesuffix(session.domain)
    reporter.register_data("root_domain", root_domain)

    # IP Addresses for the domain we are scanning
    ips = basic.get_ips(session.domain)
    reporter.register_data("ip", ips)
    for ip in ips:
        output.norm("\t%s (%s)" % (ip, basic.get_host(str(ip))))

        addr = ipaddress.ip_address(str(ip))

        if not addr.is_private:
            ni = network_info.network_info(str(ip))
            output.norm("\t\t%s" % ni)

            if addr.version == 4:
                output.norm("\t\thttps://www.shodan.io/host/%s" % ip)
                output.norm("\t\thttps://censys.io/ipv4/%s" % ip)
            else:
                output.norm("\t\thttps://www.shodan.io/host/%s" %
                            str(ip).lower())

        output.empty()

    # TXT records for the domain we are scanning
    try:
        txt = basic.get_text(session.domain)
        reporter.register_data("dns_txt", {session.domain: txt})
        for rec in txt:
            output.norm("\tTXT: %s" % rec)
    except Exception as err:
        output.error(f"Error getting TXT records: {str(err)}")

    # TXT records for the root domain
    try:
        if root_domain != session.domain:
            txt = basic.get_text(root_domain)
            reporter.register_data("dns_txt", {root_domain: txt})
            for rec in txt:
                output.norm("\tTXT (%s): %s" % (root_domain, rec))
    except Exception as err:
        output.error(f"Error getting TXT (root) records: {str(err)}")

    output.empty()

    # MX records for the domain we are scanning
    try:
        mx = basic.get_mx(session.domain)
        reporter.register_data("dns_mx", {session.domain: mx})
        for rec in mx:
            server_ip, ni = _get_ip_info(rec[0])

            info = "%s (%s) - %s (%s)" % (rec[0], rec[1], server_ip, ni)
            output.norm("\tMX: %s" % info)
    except Exception as err:
        output.error(f"Error getting MX records: {str(err)}")

    try:
        # MX records for the root domain
        if root_domain != session.domain:
            mx = basic.get_mx(root_domain)
            reporter.register_data("dns_mx", {root_domain: mx})
            for rec in mx:
                server_ip, ni = _get_ip_info(rec[0])

                info = "%s (%s) - %s (%s)" % (rec[0], rec[1], server_ip, ni)
                output.norm("\tMX (%s): %s" % (root_domain, info))
    except Exception as err:
        output.error(f"Error getting MX (root) records: {str(err)}")

    output.empty()

    # NS records for the root domain
    try:
        ns = basic.get_ns(root_domain)
        reporter.register_data("dns_ns", {root_domain: ns})
        for rec in ns:
            server_ip, ni = _get_ip_info(rec)

            info = "%s - %s (%s)" % (rec, server_ip, ni)
            output.norm("\tNS: %s" % info)
    except Exception as err:
        output.error(f"Error getting NS records: {str(err)}")

    output.empty()

    if session.args.srv:
        try:
            output.norm(
                "Searching for SRV records, this will take a minute...")
            output.empty()

            with Spinner():
                srv_records = srv.find_srv_records(root_domain)
                reporter.register_data("dns_srv", srv_records)

            for rec in srv_records:
                server_ip, ni = _get_ip_info(rec[1])

                info = "%s: %s:%s - %s (%s)" % (rec[0], rec[1], rec[2],
                                                server_ip, ni)
                output.norm("\tSRV: %s" % info)

                output.empty()
        except Exception as err:
            output.error(f"Error getting SRV records: {str(err)}")

    if session.args.subdomains:
        try:
            output.norm(
                "Searching for sub-domains, this will take a few minutes...")
            output.empty()

            with Spinner():
                sds = subdomains.find_subdomains(root_domain)
                reporter.register_data("dns_subdomains", sds)

            for rec in sds:
                info = ""

                if rec[0] == "CNAME":
                    server_ip, ni = _get_ip_info(rec[2])

                    info = "(CNAME) %s -> %s - %s (%s)" % (
                        rec[1],
                        rec[2],
                        server_ip,
                        ni,
                    )
                elif rec[0] == "A":
                    ni = network_info.network_info(rec[2])
                    info = "(A) %s: %s (%s)" % (rec[1], rec[2], ni)
                elif rec[0] == "AAAA":
                    ni = network_info.network_info(rec[2])
                    info = "(AAAA) %s: %s (%s)" % (rec[1], rec[2], ni)

                output.norm("\tSubdomain: %s" % info)
        except Exception as err:
            output.error(f"Error getting subdomain records: {str(err)}")

        output.empty()

    try:
        caa_count = 0
        carec = caa.get_caa(session.domain)
        reporter.register_data("dns_caa", carec)
        for rec in carec:
            curr = rec[0]

            if rec[1] == "CNAME":
                output.norm("\tCAA (%s): CNAME Found: -> %s" % (curr, rec[2]))
            elif rec[1] == "CAA":
                if len(rec[2]) > 0:
                    for line in rec[2]:
                        output.norm('\tCAA (%s): "%s"' % (curr, line))
                        caa_count += 1
                else:
                    output.norm("\tCAA (%s): No Records Found" % curr)

        # notify the user if there's an issue
        if caa_count == 0:
            reporter.display(
                "\tCAA: Domain does not have protection from CAA",
                issue.Issue(Vulnerabilities.DNS_CAA_MISSING, session.url,
                            {"caa_records": carec}),
            )
    except Exception as err:
        output.error(f"Error getting CAA records: {str(err)}")

    output.empty()

    try:
        dk = dnssec.get_dnskey(session.domain)
        reporter.register_data("dns_dnskey", dk)
        if len(dk) > 0:
            for rec in dk:
                output.norm(
                    "\tDNSKEY: Algorithm: '%s' - Flags: '%s' - Key Length: %s"
                    % (rec[2], rec[0], len(rec[3]) * 8))
        else:
            reporter.display(
                "\tDNSKEY: Domain does not use DNSSEC",
                issue.Issue(Vulnerabilities.DNS_DNSSEC_NOT_ENABLED,
                            session.url, {}),
            )
    except Exception as err:
        output.error(f"Error getting DNSKEY records: {str(err)}")

    output.empty()

コード例 #11

0

ファイルを表示

ファイル: _psl_faup.py プロジェクト: cvandeplas/PyMISP

class PSLFaup(object):
    """
    Fake Faup Python Library using PSL for Windows support
    """
    def __init__(self):
        self.decoded = False
        self.psl = PublicSuffixList()
        self._url = None
        self._retval = {}
        self.ip_as_host = False

    def _clear(self):
        self.decoded = False
        self._url = None
        self._retval = {}
        self.ip_as_host = False

    def decode(self, url) -> None:
        """
        This function creates a dict of all the url fields.
        :param url: The URL to normalize
        """
        self._clear()
        if isinstance(url, bytes) and b'//' not in url[:10]:
            url = b'//' + url
        elif '//' not in url[:10]:
            url = '//' + url
        self._url = urlparse(url)

        self.ip_as_host = False
        hostname = _ensure_str(self._url.hostname)
        try:
            ipv4_bytes = socket.inet_aton(_ensure_str(hostname))
            ipv4 = ipaddress.IPv4Address(ipv4_bytes)
            self.ip_as_host = ipv4.compressed
        except (OSError, ValueError):
            try:
                addr, _, _ = hostname.partition('%')
                ipv6 = ipaddress.IPv6Address(addr)
                self.ip_as_host = ipv6.compressed
            except ValueError:
                pass

        self.decoded = True
        self._retval = {}

    @property
    def url(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        netloc = self.get_host() + ('' if self.get_port() is None else
                                    ':{}'.format(self.get_port()))
        return _ensure_bytes(
            urlunparse((
                self.get_scheme(),
                netloc,
                self.get_resource_path(),
                '',
                self.get_query_string(),
                self.get_fragment(),
            )))

    def get_scheme(self):
        """
        Get the scheme of the url given in the decode function
        :returns: The URL scheme
        """
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return _ensure_str(self._url.scheme)

    def get_credential(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self._url.password:
            return _ensure_str(self._url.username) + ':' + _ensure_str(
                self._url.password)
        if self._url.username:
            return _ensure_str(self._url.username)

    def get_subdomain(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self.get_host() is not None and not self.ip_as_host:
            if self.get_domain() in self.get_host():
                return self.get_host().rsplit(self.get_domain(),
                                              1)[0].rstrip('.') or None

    def get_domain(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self.get_host() is not None and not self.ip_as_host:
            return self.psl.privatesuffix(self.get_host())

    def get_domain_without_tld(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self.get_tld() is not None and not self.ip_as_host:
            return self.get_domain().rsplit(self.get_tld(), 1)[0].rstrip('.')

    def get_host(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self._url.hostname is None:
            return None
        elif self._url.hostname.isascii():
            return _ensure_str(self._url.hostname)
        else:
            return _ensure_str(idna.encode(self._url.hostname, uts46=True))

    def get_unicode_host(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if not self.ip_as_host:
            return idna.decode(self.get_host(), uts46=True)

    def get_tld(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self.get_host() is not None and not self.ip_as_host:
            return self.psl.publicsuffix(self.get_host())

    def get_port(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return self._url.port

    def get_resource_path(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return _ensure_str(self._url.path)

    def get_query_string(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return _ensure_str(self._url.query)

    def get_fragment(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return _ensure_str(self._url.fragment)

    def get(self):
        self._retval["scheme"] = self.get_scheme()
        self._retval["tld"] = self.get_tld()
        self._retval["domain"] = self.get_domain()
        self._retval["domain_without_tld"] = self.get_domain_without_tld()
        self._retval["subdomain"] = self.get_subdomain()
        self._retval["host"] = self.get_host()
        self._retval["port"] = self.get_port()
        self._retval["resource_path"] = self.get_resource_path()
        self._retval["query_string"] = self.get_query_string()
        self._retval["fragment"] = self.get_fragment()
        self._retval["url"] = self.url
        return self._retval

コード例 #12

0

ファイルを表示

from publicsuffixlist import PublicSuffixList
import os

psl = PublicSuffixList()

filter_file = os.path.join(adutil.project_root, 'filters', 'mono.yml')
with open(filter_file) as f:
    saved_data = yaml.safe_load(f)
    known_domains = set(saved_data['domains'])

vault_reply = requests.get(
    'https://otx.alienvault.com/otxapi/indicator/IPv4/passive_dns/134.209.136.68'
).json()

for entry in vault_reply['passive_dns']:
    hostname = psl.privatesuffix(entry['hostname'])
    known_domains.add(hostname)

known_domains = sorted(known_domains)
extra_hosts = []

for domain in known_domains:
    for i in range(10):
        extra_hosts.append('%d.%s' % (i, domain))

with open(filter_file, 'w') as f:
    f.write("# Don't bother manually updating this file.\n")
    f.write(
        "# It is automatically updated with the tools/update-mono-list.py script.\n"
    )
    yaml.dump({'domains': known_domains, 'extra_hosts': extra_hosts}, f)

コード例 #13

0

ファイルを表示

class gdyd():
    def __init__(self):
        self.psl = PublicSuffixList(accept_unknown=False)
        self.filter = Filter()

    def statistic_single_hour(self, hour_dir, day, hour: int):
        counter = Counter()
        for minute_file in os.listdir(hour_dir):
            bzfile = os.path.join(hour_dir, minute_file)
            try:
                file_point = bz2.open(bzfile, 'r')
                for line in file_point:
                    try:
                        line = line.decode().strip()
                        linesplit = line.split(',')
                        querydomain = linesplit[3].strip().lower()
                        if self.filter.isValidDomain(querydomain):
                            prisuf = self.psl.privatesuffix(querydomain)
                            if prisuf is not None and prisuf not in self.filter.sf.AleaxTop and \
                                    prisuf not in self.filter.sf.CDNSet and \
                                    prisuf not in self.filter.sf.commonset:
                                counter[prisuf] += 1
                                if prisuf != querydomain:
                                    front = querydomain[:querydomain.
                                                        rindex(prisuf) - 1]
                                    front_s = front.rsplit(".", 1)
                                    if len(front_s) != 0:
                                        ThreeLD = "{}.{}".format(
                                            front_s[len(front_s) - 1], prisuf)
                                        counter[ThreeLD] += 1
                    except:
                        pass
                file_point.close()
            except:
                print("error : {}".format(bzfile))

            print("{} finish".format(bzfile))
        print("{}{} write".format(day, hour))
        with open("../result_data/temp/{}{}.json".format(day, hour), "w") as f:
            f.write(json.dumps(counter))

    def all_day_counter(
            self,
            rootpath="/home/public/DNS_Project/pdns_gddx_compressed/gdyd",
            days=["20180502", "20180503", "20180504"]):
        s = time.time()
        number = 24
        pool = Pool(number)
        # result = []
        for day in days:
            daydir = os.path.join(rootpath, "dt={}".format(day))
            for h in range(24):
                hourdir = os.path.join(daydir, "hour={0:02d}".format(h))
                if os.path.exists(hourdir):
                    pool.apply_async(func=self.statistic_single_hour,
                                     args=(
                                         hourdir,
                                         day,
                                         h,
                                     ))
                else:
                    print("path error")
                    # result.append(r)
        pool.close()
        pool.join()

        # whole_counter = Counter()
        # for r in result:
        #     whole_counter.update(r.get())
        # for r in whole_counter.most_common(30000):
        #     print("{},{}".format(r[0], r[1]))
        e = time.time()
        print("spend time :{} minutes".format((e - s) / 60))

    def get_counter(self, days=["20180502", "20180503", "20180504"]):
        root_dir = "/home/yandingkui/Pontus/result_data/temp/"
        for day in days:
            counter = Counter()
            for i in range(24):
                path = os.path.join(root_dir, "{}{}.json".format(day, i))
                if os.path.exists(path):
                    with open(path, "r") as f:
                        counter1 = Counter(json.loads(f.read()))
                    counter.update(counter1)
            with open("{}{}.json".format(root_dir, day), "w") as f:
                f.write(json.dumps(counter))

    def remove_file(
            self,
            days=["20180427", "20180428", "20180429", "20180430", "20180501"]):
        root_dir = "/home/yandingkui/Pontus/result_data/temp/"
        for day in days:
            for i in range(24):
                path = os.path.join(root_dir, "{}{}.json".format(day, i))
                if os.path.exists(path):
                    os.remove(path)

    def getBenignDomains(self, days=["20180502", "20180503"]):
        root_dir = "/home/yandingkui/Pontus/result_data/temp/"
        for day in days:
            with open(os.path.join(root_dir, "{}.json".format(day)), "r") as f:
                counter = Counter(json.loads(f.read()))
                data = []
                for item in counter.most_common(30000):
                    data.append(item[0])
                with open("../data_sets/yd_{}".format(day), "w") as F:
                    F.write("\n".join(data))

    def dxvsyd(self, days=["20180427", "20171031"]):
        yd = "/home/yandingkui/Pontus/result_data/temp/20180427.json"
        dx = "/home/yandingkui/Pontus/result_data/gddx/20171031.json"
        with open(yd, "r") as f:
            counter1 = Counter(json.loads(f.read()))
        with open(dx, "r") as f:
            counter2 = Counter(json.loads(f.read()))
        s1 = []
        s2 = []
        for item in counter1.most_common(30000):
            s1.append(item[0])
        for item in counter2.most_common(30000):
            s2.append(item[0])
        with open("../result_data/yd_20180427", "w") as f:
            f.write("\n".join(s1))
        with open("../result_data/dx_20171031", "w") as f:
            f.write("\n".join(s2))

コード例 #14

0

ファイルを表示

def get_suspicious(year, month, day):
    timestring = "{}{:0>2d}{:0>2d}".format(year, month, day)
    suspicious_domains_set = set()
    if os.path.exists("../result_data/{}domains.txt".format(timestring)):
        with open("../result_data/{}domains.txt".format(timestring), "r") as f:
            for r in f:
                suspicious_domains_set.add(r.strip())
        check_active_domains(suspicious_domains_set, timestring)
    else:
        init_domain_set = set()
        # get all domains
        for hour in range(24):
            file_path = "{}{:0>2d}{:0>2d}{:0>2d}".format(year, month, day, hour)
            if not os.path.exists("../result_data/{}".format(file_path)):
                continue
            with open("../result_data/{}".format(file_path), "r") as f:
                for r in f:
                    domain = r.strip().split(",")[1]
                    init_domain_set.add(domain)
        psl = PublicSuffixList()
        domain_labels = []
        labels_labels = []
        i = 0
        # get labels
        domains_list = list(init_domain_set)
        for d in domains_list:
            s = d[:d.index(psl.publicsuffix(d)) - 1]
            for l in s.split("."):
                if len(l) > 0:
                    domain_labels.append(l)
                    labels_labels.append(i)
            i = i + 1

        features_path = "../result_data/{}_features.npy".format(timestring)
        if os.path.exists(features_path):
            features = np.load(features_path)
        else:
            features = extract_all_features(domain_labels)
            np.save(features_path, features)

        # classifier identifies labels
        clf = joblib.load("../result_data/ac_model.m")
        pred_labels = clf.predict(features)
        domain_index = set()
        for i in range(len(labels_labels)):
            if pred_labels[i] == 1:
                domain_index.add(labels_labels[i])
        # get suspicious domains

        for index in domain_index:
            ps = psl.privatesuffix(domains_list[index])
            if ps is None:
                continue
            suspicious_domains_set.add(ps)

        print("{} domains".format(len(suspicious_domains_set)))

        with open("../result_data/{}domains.txt".format(timestring), "w") as f:
            f.write("\n".join(suspicious_domains_set))
        print("save finish")
        # dgarchive check
        check_active_domains(suspicious_domains_set, timestring)

コード例 #15

0

ファイルを表示

ファイル: test.py プロジェクト: mzpqnxow/psl

class TestPSL(unittest.TestCase):
    def setUp(self):

        self.psl = PublicSuffixList()

    def test_typesafe(self):
        self.assertEqual(
            self.psl.suffix("www.example.co.jp").__class__,
            "example.co.jp".__class__)
        self.assertEqual(
            self.psl.suffix(u("www.example.co.jp")).__class__,
            u("example.co.jp").__class__)

        self.assertEqual(
            self.psl.publicsuffix("www.example.co.jp").__class__,
            "co.jp".__class__)
        self.assertEqual(
            self.psl.publicsuffix(u("www.example.co.jp")).__class__,
            u("co.jp").__class__)

    def test_uppercase(self):
        self.assertEqual(self.psl.suffix("wWw.eXaMpLe.cO.Jp"), "example.co.jp")
        self.assertEqual(self.psl.publicsuffix("wWw.eXaMpLe.cO.Jp"), "co.jp")

    def test_invaliddomain(self):
        self.assertEqual(self.psl.suffix("www..invalid"), None)
        self.assertEqual(self.psl.suffix(".example.com"), None)
        self.assertEqual(self.psl.suffix("example.com."), None)
        self.assertEqual(self.psl.suffix(""), None)

        self.assertEqual(self.psl.publicsuffix("www..invalid"), None)
        self.assertEqual(self.psl.publicsuffix(".example.com"), None)
        self.assertEqual(self.psl.publicsuffix("example.com."), None)
        self.assertEqual(self.psl.publicsuffix(""), None)

    def test_idn(self):
        tld = u("香港")
        self.assertEqual(self.psl.suffix(u("www.example.") + tld),
                         u("example.") + tld)
        self.assertEqual(self.psl.publicsuffix(u("www.example.") + tld), tld)

    def test_punycoded(self):
        tld = encode_idn(u("香港"))
        self.assertEqual(self.psl.suffix(u("www.example.") + tld),
                         u("example.") + tld)
        self.assertEqual(self.psl.publicsuffix(u("www.example.") + tld), tld)

    def test_suffix_deny_public(self):
        self.assertEqual(self.psl.suffix("com"), None)
        self.assertEqual(self.psl.suffix("co.jp"), None)
        self.assertEqual(self.psl.suffix("example.nagoya.jp"), None)

    def test_unknown(self):
        self.assertEqual(self.psl.suffix("www.example.unknowntld"),
                         "example.unknowntld")
        self.assertEqual(self.psl.suffix("unknowntld"), None)

        self.assertEqual(self.psl.publicsuffix("www.example.unknowntld"),
                         "unknowntld")
        self.assertEqual(self.psl.publicsuffix("unknowntld"), "unknowntld")

    def test_deny_unknown(self):
        source = """
known
"""
        psl = PublicSuffixList(source.splitlines(), accept_unknown=False)

        self.assertEqual(psl.suffix("www.example.unknowntld"), None)

    def test_custom_psl(self):
        source = """
invalid
*.invalid
!test.invalid
"""
        psl = PublicSuffixList(source.splitlines())

        self.assertEqual(psl.suffix("example.invalid"), None)
        self.assertEqual(psl.suffix("test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("some.test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("aaa.bbb.ccc.invalid"), "bbb.ccc.invalid")

        self.assertEqual(psl.publicsuffix("example.invalid"),
                         "example.invalid")
        self.assertEqual(psl.publicsuffix("test.invalid"), "invalid")

    def test_publicsuffix(self):
        self.assertEqual(self.psl.publicsuffix("www.example.com"), "com")
        self.assertEqual(self.psl.publicsuffix("unknowntld"), "unknowntld")

    def test_wildcard(self):
        self.assertEqual(self.psl.suffix("test.example.nagoya.jp"),
                         "test.example.nagoya.jp")
        self.assertEqual(self.psl.suffix("example.nagoya.jp"), None)
        self.assertEqual(self.psl.publicsuffix("example.nagoya.jp"),
                         "example.nagoya.jp")
        self.assertEqual(self.psl.publicsuffix("test.example.nagoya.jp"),
                         "example.nagoya.jp")

    def test_checkpublicsuffix_script(self):
        regex = re.compile(r"^checkPublicSuffix\(('[^']+'), (null|'[^']+')\);")
        with open(os.path.join(os.path.dirname(__file__), "test_psl.txt"),
                  "rb") as f:
            ln = 0

            for line in f:
                ln += 1
                l = line.decode("utf-8")
                m = regex.match(l)
                if not m:
                    continue

                arg = m.group(1).strip("'")
                res = None if m.group(2) == "null" else m.group(2).strip("'")

                self.assertEqual(self.psl.suffix(arg), res,
                                 "in line {0}: {1}".format(ln, line.strip()))

    def test_typeerror(self):

        self.assertRaises(TypeError, lambda: self.psl.suffix(None))
        self.assertRaises(TypeError, lambda: self.psl.suffix(1))
        if b("") != "":
            # python3
            self.assertRaises(TypeError,
                              lambda: self.psl.suffix(b("www.example.com")))

    def test_compatclass(self):

        from publicsuffixlist.compat import PublicSuffixList
        psl = PublicSuffixList()

        self.assertEqual(psl.get_public_suffix("test.example.com"),
                         "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "")
        self.assertEqual(psl.get_public_suffix(""), "")

    def test_unsafecompatclass(self):

        from publicsuffixlist.compat import UnsafePublicSuffixList
        psl = UnsafePublicSuffixList()

        self.assertEqual(psl.get_public_suffix("test.example.com"),
                         "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "com")
        self.assertEqual(psl.get_public_suffix(""), "")

    def test_toomanylabels(self):
        d = "a." * 1000000 + "example.com"

        self.assertEqual(self.psl.publicsuffix(d), "com")
        self.assertEqual(self.psl.privatesuffix(d), "example.com")

    def test_flatstring(self):
        psl = PublicSuffixList(u("com\nnet\n"))
        self.assertEqual(psl.publicsuffix("example.com"), "com")

    def test_flatbytestring(self):
        psl = PublicSuffixList(b("com\nnet\n"))
        self.assertEqual(psl.publicsuffix("example.com"), "com")

    def test_privateparts(self):
        psl = self.psl
        self.assertEqual(psl.privateparts("aaa.www.example.com"),
                         ("aaa", "www", "example.com"))

    def test_noprivateparts(self):
        psl = self.psl
        self.assertEqual(psl.privateparts("com"), None)  # no private part

    def test_reconstructparts(self):
        psl = self.psl
        self.assertEqual(".".join(psl.privateparts("aaa.www.example.com")),
                         "aaa.www.example.com")

    def test_subdomain(self):
        psl = self.psl
        self.assertEqual(psl.subdomain("aaa.www.example.com", depth=0),
                         "example.com")
        self.assertEqual(psl.subdomain("aaa.www.example.com", depth=1),
                         "www.example.com")
        self.assertEqual(psl.subdomain("aaa.www.example.com", depth=2),
                         "aaa.www.example.com")
        self.assertEqual(psl.subdomain("aaa.www.example.com", depth=3),
                         None)  # no sufficient depth

コード例 #16

0

ファイルを表示

ファイル: test.py プロジェクト: meschansky/psl

class TestPSL(unittest.TestCase):

    def setUp(self):

        self.psl = PublicSuffixList()
        

    def test_typesafe(self):
        self.assertEqual(self.psl.suffix("www.example.co.jp").__class__, "example.co.jp".__class__)
        self.assertEqual(self.psl.suffix(u("www.example.co.jp")).__class__, u("example.co.jp").__class__)

        self.assertEqual(self.psl.publicsuffix("www.example.co.jp").__class__, "co.jp".__class__)
        self.assertEqual(self.psl.publicsuffix(u("www.example.co.jp")).__class__, u("co.jp").__class__)


    def test_uppercase(self):
        self.assertEqual(self.psl.suffix("wWw.eXaMpLe.cO.Jp"), "example.co.jp")
        self.assertEqual(self.psl.publicsuffix("wWw.eXaMpLe.cO.Jp"), "co.jp")


    def test_invaliddomain(self):
        self.assertEqual(self.psl.suffix("www..invalid"), None)
        self.assertEqual(self.psl.suffix(".example.com"), None)
        self.assertEqual(self.psl.suffix("example.com."), None)
        self.assertEqual(self.psl.suffix(""), None)

        self.assertEqual(self.psl.publicsuffix("www..invalid"), None)
        self.assertEqual(self.psl.publicsuffix(".example.com"), None)
        self.assertEqual(self.psl.publicsuffix("example.com."), None)
        self.assertEqual(self.psl.publicsuffix(""), None)


    def test_idn(self):
        tld = u("香港")
        self.assertEqual(self.psl.suffix(u("www.example.") + tld), u("example.") + tld)
        self.assertEqual(self.psl.publicsuffix(u("www.example.") + tld), tld)

    
    def test_punycoded(self):
        tld = encode_idn(u("香港"))
        self.assertEqual(self.psl.suffix(u("www.example.") + tld), u("example.") + tld)
        self.assertEqual(self.psl.publicsuffix(u("www.example.") + tld), tld)


    def test_suffix_deny_public(self):
        self.assertEqual(self.psl.suffix("com"), None)
        self.assertEqual(self.psl.suffix("co.jp"), None)
        self.assertEqual(self.psl.suffix("example.nagoya.jp"), None)


    def test_unknown(self):
        self.assertEqual(self.psl.suffix("www.example.unknowntld"), "example.unknowntld")
        self.assertEqual(self.psl.suffix("unknowntld"), None)

        self.assertEqual(self.psl.publicsuffix("www.example.unknowntld"), "unknowntld")
        self.assertEqual(self.psl.publicsuffix("unknowntld"), "unknowntld")


    def test_deny_unknown(self):
        source = """
known
"""
        psl = PublicSuffixList(source.splitlines(), accept_unknown=False)

        self.assertEqual(psl.suffix("www.example.unknowntld"), None)


    def test_custom_psl(self):
        source = """
invalid
*.invalid
!test.invalid
"""
        psl = PublicSuffixList(source.splitlines())

        self.assertEqual(psl.suffix("example.invalid"), None)
        self.assertEqual(psl.suffix("test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("some.test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("aaa.bbb.ccc.invalid"), "bbb.ccc.invalid")

        self.assertEqual(psl.publicsuffix("example.invalid"), "example.invalid")
        self.assertEqual(psl.publicsuffix("test.invalid"), "invalid")




    def test_publicsuffix(self):
        self.assertEqual(self.psl.publicsuffix("www.example.com"), "com")
        self.assertEqual(self.psl.publicsuffix("unknowntld"), "unknowntld")

    def test_wildcard(self):
        self.assertEqual(self.psl.suffix("test.example.nagoya.jp"), "test.example.nagoya.jp")
        self.assertEqual(self.psl.suffix("example.nagoya.jp"), None)
        self.assertEqual(self.psl.publicsuffix("example.nagoya.jp"), "example.nagoya.jp")
        self.assertEqual(self.psl.publicsuffix("test.example.nagoya.jp"), "example.nagoya.jp")



    def test_checkpublicsuffix_script(self):
        regex = re.compile(r"^checkPublicSuffix\(('[^']+'), (null|'[^']+')\);")
        with open(os.path.join(os.path.dirname(__file__), "test_psl.txt"), "rb") as f:
            ln = 0
        
            for line in f:
                ln += 1
                l = line.decode("utf-8")
                m = regex.match(l)
                if not m:
                    continue
    
                arg = m.group(1).strip("'")
                res = None if m.group(2) == "null" else m.group(2).strip("'")
                
                self.assertEqual(self.psl.suffix(arg), res, "in line {0}: {1}".format(ln, line.strip()))
            


    def test_typeerror(self):

        self.assertRaises(TypeError, lambda: self.psl.suffix(None))
        self.assertRaises(TypeError, lambda: self.psl.suffix(1))
        if b("") != "":
            # python3
            self.assertRaises(TypeError, lambda: self.psl.suffix(b("www.example.com")))
        

    def test_compatclass(self):

        from publicsuffixlist.compat import PublicSuffixList
        psl = PublicSuffixList()
        
        self.assertEqual(psl.get_public_suffix("test.example.com"), "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "")
        self.assertEqual(psl.get_public_suffix(""), "")

    def test_unsafecompatclass(self):

        from publicsuffixlist.compat import UnsafePublicSuffixList
        psl = UnsafePublicSuffixList()
        
        self.assertEqual(psl.get_public_suffix("test.example.com"), "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "com")
        self.assertEqual(psl.get_public_suffix(""), "")


    def test_toomanylabels(self):
        d = "a." * 1000000 + "example.com"

        self.assertEqual(self.psl.publicsuffix(d), "com")
        self.assertEqual(self.psl.privatesuffix(d), "example.com")


    def test_flatstring(self):
        psl = PublicSuffixList(u("com\nnet\n"))
        self.assertEqual(psl.publicsuffix("example.com"), "com")

    def test_flatbytestring(self):
        psl = PublicSuffixList(b("com\nnet\n"))
        self.assertEqual(psl.publicsuffix("example.com"), "com")