Ejemplo n.º 1
0
def categorized (domain):
    """ Provides a discretized categorization on an input domain, and
        returns a DCat object dcat, where dcat.cat==
        'benign', 'gray', 'suspicious', 'notsure', ...

        Note the suspicion here is determined on a per-domain basis,
        without considering the clustering effects among a bunch of names.
    """
    global Cached

    try:
        return Cached[domain]

    except KeyError:

        ds = publicsuffix.DomainStruct(domain)

        # 'benign' determination
        if (not ds.isFQDN
            or whitelist.is_whitelisted_t2ld(ds.eTkLD[1])
            or dnsblav.is_dnsblav_service(ds)
            or alexa.in_top(ds.eTkLD[1], 50000)
            #or good WOT
        ): cat = 'benign'

        # 'suspicious', to be done
        elif ds in blacklisted_domains:
            cat = 'suspicious'

        # 'gray', e.g., non-top-Alexa p2p/p**n sites
        elif (
            p2p.is_p2p_domain(domain)
            or p**n.is_porn_domain(domain)
        ): cat = 'gray'

        # 'notsure'
        else:
            cat = 'notsure'

        dcat = DCat(cat, None)  # None: to be implemented
        Cached[domain] = dcat
        return dcat
Ejemplo n.º 2
0
 def test_is_porn_domain(self):
     self.assertFalse(p**n.is_porn_domain("mail.google.com"))
     self.assertTrue(p**n.is_porn_domain("boobs.com"))
     self.assertTrue(p**n.is_porn_domain("adultvideo.com"))