Python PublicSuffixList.publicsuffixの例、publicsuffixlist.PublicSuffixList.publicsuffix Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test.py プロジェクト: meschansky/psl

    def test_custom_psl(self):
        source = """
invalid
*.invalid
!test.invalid
"""
        psl = PublicSuffixList(source.splitlines())

        self.assertEqual(psl.suffix("example.invalid"), None)
        self.assertEqual(psl.suffix("test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("some.test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("aaa.bbb.ccc.invalid"), "bbb.ccc.invalid")

        self.assertEqual(psl.publicsuffix("example.invalid"), "example.invalid")
        self.assertEqual(psl.publicsuffix("test.invalid"), "invalid")

コード例 #2

0

ファイルを表示

class DomainSuffixExpertBot(Bot):
    suffixes = {}

    def init(self):
        self.field = self.parameters.field
        if self.field not in ALLOWED_FIELDS:
            raise InvalidArgument('key',
                                  got=self.field,
                                  expected=ALLOWED_FIELDS)
        with codecs.open(self.parameters.suffix_file,
                         encoding='UTF-8') as file_handle:
            self.psl = PublicSuffixList(source=file_handle, only_icann=True)

    def process(self):
        event = self.receive_message()
        for space in ('source', 'destination'):
            key = '.'.join((space, self.field))
            if key not in event:
                continue
            event['.'.join(
                (space,
                 'domain_suffix'))] = self.psl.publicsuffix(domain=event[key])

        self.send_message(event)
        self.acknowledge_message()

コード例 #3

0

ファイルを表示

ファイル: DomainFilter.py プロジェクト: yandingkui/Pontus

class Filter():
    def __init__(self):
        self.psl= PublicSuffixList(accept_unknown=False)
        self.sf = SingleFilter(100000, self.psl)

    def isValidDomain(self,domain:str):
        if (self.sf.isValidDomain(domain) and (not self.sf.inWhiteList(domain))):
            return True
        else:
            return False

    def Two_Three_level_domain(self,domain:str):
        """
        identify a domain
        :param domain:  domain:str
        :return: bool
        """
        publicsuffix=self.psl.publicsuffix(domain)
        if publicsuffix==None:
            return False
        pre_domain=domain[:domain.rindex(publicsuffix)-1]
        if len(pre_domain)==0:
            return False
        pre_domain_array=pre_domain.split(".")
        length=len(pre_domain_array)
        if length==2 or length==1:
            return True
        else:
            return False

コード例 #4

0

ファイルを表示

ファイル: test.py プロジェクト: mzpqnxow/psl

    def test_custom_psl(self):
        source = """
invalid
*.invalid
!test.invalid
"""
        psl = PublicSuffixList(source.splitlines())

        self.assertEqual(psl.suffix("example.invalid"), None)
        self.assertEqual(psl.suffix("test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("some.test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("aaa.bbb.ccc.invalid"), "bbb.ccc.invalid")

        self.assertEqual(psl.publicsuffix("example.invalid"),
                         "example.invalid")
        self.assertEqual(psl.publicsuffix("test.invalid"), "invalid")

コード例 #5

0

ファイルを表示

ファイル: apply_domain_suffix.py プロジェクト: motok/intelmq

def eventdb_apply(host, port, database, username, password, table, dry_run,
                  where, filename):
    if password:
        password = input('Password for user %r on %r: ' % (username, host))
    where = 'AND ' + where if where else ''

    con1 = psycopg2.connect(user=username,
                            password=password,
                            database=database,
                            host=host,
                            port=port)
    cur1 = con1.cursor(cursor_factory=DictCursor)
    con2 = psycopg2.connect(user=username,
                            password=password,
                            database=database,
                            host=host,
                            port=port)
    con2.autocommit = True
    cur2 = con2.cursor(cursor_factory=DictCursor)
    cur1.execute('''
                 SELECT id, "source.fqdn", "destination.fqdn"
                 FROM {table}
                 WHERE
                 ("source.fqdn" IS NOT NULL OR "destination.fqdn" IS NOT NULL)
                 {where}
                 '''.format(table=table, where=where))

    psl = PublicSuffixList(only_icann=True)

    counter = 0
    for row in cur1:
        counter += 1
        if row['source.fqdn']:
            cur2.execute(
                'update events set "source.domain_suffix" = %s where id = %s',
                (psl.publicsuffix(
                    row['source.fqdn'].encode('idna').decode()), row['id']))

        if row['destination.fqdn']:
            cur2.execute(
                'update events set "destination.domain_suffix" = %s where id = %s',
                (psl.publicsuffix(
                    row['destination.fqdn'].encode('idna').decode()),
                 row['id']))
    con2.commit()
    print("Changed %d rows" % counter)

コード例 #6

0

ファイルを表示

def static_num(file_path):
    psl = PublicSuffixList()
    result = [0, 0, 0]
    with open(file_path, "r") as f:
        for r in f:
            d = r.strip().split(",")[0]
            d_strip = d[:d.rindex(psl.publicsuffix(d)) - 1].split(".")
            if len(d_strip) == 1:
                result[0] += 1
            elif len(d_strip) == 2:
                result[1] += 1
            else:
                result[2] += 1
    print(result)

コード例 #7

0

ファイルを表示

ファイル: commons.py プロジェクト: wirenic/Hallo

 def get_domain_name(url: str) -> str:
     """
     Gets the domain name of a URL, removing the TLD
     :param url: URL to find domain of
     """
     # Sanitise the URL, removing protocol and directories
     url = url.split("://")[-1]
     url = url.split("/")[0]
     url = url.split(":")[0]
     # Get the public suffix
     public_suffix = PublicSuffixList()
     url_tld = public_suffix.publicsuffix(url)
     # Else return the last part before the TLD
     return url[:-len(url_tld) - 1].split(".")[-1]

コード例 #8

0

ファイルを表示

def createdataset(type="train",
                  AGD_file="../data_sets/split_AGDs",
                  BD_file="../data_sets/split_benign_nx.json",
                  datasetname="nx_train_data"):
    if type == "train":
        v_index = 0
    else:
        v_index = 1
    psl = PublicSuffixList()
    with open(AGD_file, "r") as f:
        AGD_dict = json.loads(f.read())
    with open(BD_file, "r") as f:
        bd_dict = json.loads(f.read())
    allAGDs = set()
    allBDs = set()
    for k, v in AGD_dict.items():
        for d in v[v_index]:
            pre_d = d[:d.rindex(psl.publicsuffix(d)) - 1]
            for l in pre_d.split("."):
                allAGDs.add(l)
    for d in bd_dict[type]:
        pre_d = d[:d.rindex(psl.publicsuffix(d)) - 1]
        for l in pre_d.split("."):
            allBDs.add(l)
    length = len(allAGDs)
    print(length)
    allBDs = list(allBDs)[:length]
    allAGDs = list(allAGDs)
    alldomains = allAGDs + allBDs
    alllabels = list(np.ones(length)) + list(np.zeros(length))
    allfeatures = extract_all_features(alldomains)
    np.save("../data_sets/{}_features.npy".format(datasetname), allfeatures)
    data = dict()
    data["domains"] = pd.Series(alldomains, dtype='str')
    data["labels"] = pd.Series(alllabels, dtype='int32')
    df = pd.DataFrame(data=data)
    df.to_csv("../data_sets/{}.csv".format(datasetname), index=False)

コード例 #9

0

ファイルを表示

def dga_static_num(file_path):
    psl = PublicSuffixList()
    result = [0, 0, 0]
    with open(file_path, "r") as f:
        map = json.loads(f.read())
    for k, v in map.items():
        for d in v[0]:

            d_strip = d[:d.rindex(psl.publicsuffix(d)) - 1].split(".")
            if len(d_strip) == 1:
                result[0] += 1
            elif len(d_strip) == 2:
                result[1] += 1
            else:
                result[2] += 1
        for d in v[1]:
            d_strip = d[:d.rindex(psl.publicsuffix(d)) - 1].split(".")
            if len(d_strip) == 1:
                result[0] += 1
            elif len(d_strip) == 2:
                result[1] += 1
            else:
                result[2] += 1
    print(result)

コード例 #10

0

ファイルを表示

 def getAllDomainLabels(self, domains):
     labels = []
     index = []
     psl = PublicSuffixList()
     for i in range(len(domains)):
         d = domains[i].strip()
         pub = psl.publicsuffix(d)
         d_split = d[:d.rindex(pub) - 1].split(".")
         if len(d_split) > 2:
             print("d:{} pub:{}".format(d, pub))
         for l in d_split:
             if len(l) == 0:
                 print("kong kong")
             labels.append(l)
             index.append(i)
     return labels, index

コード例 #11

0

ファイルを表示

def filter2LDAleax():
    psl=PublicSuffixList()
    data=[]
    with open("../data_sets/Aleax","r") as f:
        for r in f:
            d=r.strip()

            d1=d[:d.rindex(psl.publicsuffix(d))-1]
            if len(d1)==0:
                continue
            d_split=d1.split(".")
            if len(d_split)==1 and len(d_split[0])!=0:
                data.append(d)
        print(len(data))
    with open("../data_sets/Aleax2LD","w") as f:
        f.write("\n".join(data))

コード例 #12

0

ファイルを表示

ファイル: verify.py プロジェクト: deguif/disposable-email-domains

def main(arguments):
    suffix_detected = False
    psl = None
    download_suffixes()
    with open("public_suffix_list.dat", "r") as latest:
        psl = PublicSuffixList(latest)
    with io.open('disposable_email_blacklist.conf', 'r') as deb:
        for i, line in enumerate(deb):
            current_line = line.strip()
            public_suffix = psl.publicsuffix(current_line)
            if public_suffix == current_line:
                print(f'The line number {i+1} contains just a public suffix: {current_line}')
                suffix_detected = True
    if suffix_detected:
        print ('At least one valid public suffix found in the blacklist, please remove it. See https://publicsuffix.org for details on why this shouldn\'t be blacklisted.')
        sys.exit(1)

コード例 #13

0

ファイルを表示

def get2subdomain(root_dir="/home/public/2019-01-07-dgarchive_full"):
    result = dict()
    psl = PublicSuffixList()
    for filename in os.listdir(root_dir):
        with open("{}/{}".format(root_dir, filename), "r") as f:
            for r in f:
                d = r.strip().split(",")[0]
                d_strip = d[:d.rindex(psl.publicsuffix(d)) - 1].split(".")
                if len(d_strip) == 2:
                    domains = result.get(filename)
                    if domains is None:
                        domains = set()
                        result[filename] = domains
                    domains.add(d)

    for k, v in result.items():
        print("{} : {}".format(k, len(v)))
        v_list = list(v)
        print(v_list[:10])

コード例 #14

0

ファイルを表示

def lstm_getSingleFea(d: str):
    psl = PublicSuffixList()
    d = d[:d.rindex(psl.publicsuffix(d)) - 1].replace(".", "")
    vector = np.zeros(64)
    if (len(d) == 0):
        return vector
    cuter = CutWords()
    # wordlist = cuter.max_forward_cut(d)
    # wordlist = cuter.max_backward_cut(d)
    wordlist = cuter.max_biward_cut(d)

    vi = 63
    for i in range(len(wordlist) - 1, -1, -1):
        vector[vi] = CutWords.order[wordlist[i]]
        vi = vi - 1
        if (vi < 0):
            break
    # print(d)
    # print(vector)
    return vector

コード例 #15

0

ファイルを表示

ファイル: verify.py プロジェクト: sitedata/disposable-email-domains-2

def check_for_public_suffixes(filename):
    lines = files[filename]
    suffix_detected = False
    psl = None
    with open("public_suffix_list.dat", "r") as latest:
        psl = PublicSuffixList(latest)
    for i, line in enumerate(lines):
        current_line = line.strip()
        public_suffix = psl.publicsuffix(current_line)
        if public_suffix == current_line:
            print(
                f"The line number {i+1} contains just a public suffix: {current_line}"
            )
            suffix_detected = True
    if suffix_detected:
        print(
            "At least one valid public suffix found in {!r}, please "
            "remove it. See https://publicsuffix.org for details on why this "
            "shouldn't be blocklisted.".format(filename))
        sys.exit(1)

コード例 #16

0

ファイルを表示

ファイル: expert.py プロジェクト: CZ-NIC/intelmq

class DomainSuffixExpertBot(Bot):
    suffixes = {}

    def init(self):
        self.field = self.parameters.field
        if self.field not in ALLOWED_FIELDS:
            raise InvalidArgument('key', got=self.field, expected=ALLOWED_FIELDS)
        with codecs.open(self.parameters.suffix_file, encoding='UTF-8') as file_handle:
            self.psl = PublicSuffixList(source=file_handle, only_icann=True)

    def process(self):
        event = self.receive_message()
        for space in ('source', 'destination'):
            key = '.'.join((space, self.field))
            if key not in event:
                continue
            event['.'.join((space, 'domain_suffix'))] = self.psl.publicsuffix(domain=event[key])

        self.send_message(event)
        self.acknowledge_message()

コード例 #17

0

ファイルを表示

def static_1_2(root_dir="/home/public/2019-01-07-dgarchive_full"):
    psl=PublicSuffixList()
    result=dict()
    for filename in os.listdir(root_dir):
        df = pd.read_csv(os.path.join(root_dir,filename),header=None,error_bad_lines=False)
        domains = result.get(filename)
        if domains is None:
            domains = [set(), set()]
            result[filename] = domains
        for d in df.iloc[:,0]:
            pub_d=psl.publicsuffix(d)
            if d != pub_d:
                d_split=d[:d.rindex(pub_d)-1].split(".")
                if len(d_split)==1:
                    result.get(filename)[0].add(d)
                elif len(d_split)==2:
                    result.get(filename)[1].add(d)
                else:
                    print("Wow : {}".format(d))
        print("{} finish".format(filename))

    print("write")
    with open("../result_data/dga_data.json","w") as f:
        f.write(json.dumps(result,cls=MyJsonEncoder))

コード例 #18

0

ファイルを表示

ファイル: abuse_tld.py プロジェクト: iam-py-test/python-files

import subprocess
import requests
from publicsuffixlist import PublicSuffixList

psl = PublicSuffixList()

abusetld = {}
req = requests.get("https://raw.githubusercontent.com/iam-py-test/my_filters_001/main/antimalware.txt")

lines = req.text.split("\n")
for line in lines:
    if line.startswith("||") or line.startswith("!") or line == "":
        continue
    domain = line.split("$")[0]
    #print("Domain",domain)
    try:
        abusetld[psl.publicsuffix(domain)] += 1
    except:
        abusetld[psl.publicsuffix(domain)] = 1

for tld in abusetld:
    print("Domains for {}: {}".format(tld,abusetld[tld]))

コード例 #19

0

ファイルを表示

ファイル: expert.py プロジェクト: motok/intelmq

class DomainSuffixExpertBot(ExpertBot):
    """Extract the domain suffix from a domain and save it in the the domain_suffix field. Requires a local file with valid domain suffixes"""
    field: str = None
    suffix_file: str = None  # TODO: should be pathlib.Path

    def init(self):
        if self.field not in ALLOWED_FIELDS:
            raise InvalidArgument('key',
                                  got=self.field,
                                  expected=ALLOWED_FIELDS)
        with codecs.open(self.suffix_file, encoding='UTF-8') as file_handle:
            self.psl = PublicSuffixList(source=file_handle, only_icann=True)

    def process(self):
        event = self.receive_message()
        for space in ('source', 'destination'):
            key = '.'.join((space, self.field))
            if key not in event:
                continue
            event['.'.join(
                (space,
                 'domain_suffix'))] = self.psl.publicsuffix(domain=event[key])

        self.send_message(event)
        self.acknowledge_message()

    @staticmethod
    def check(parameters):
        if not os.path.exists(parameters.get('suffix_file', '')):
            return [[
                "error",
                "File given as parameter 'suffix_file' does not exist."
            ]]
        try:
            with codecs.open(parameters['suffix_file'],
                             encoding='UTF-8') as database:
                PublicSuffixList(source=database, only_icann=True)
        except Exception as exc:
            return [["error", "Error reading database: %r." % exc]]

    @classmethod
    def run(cls, parsed_args=None):
        if not parsed_args:
            parsed_args = cls._create_argparser().parse_args()

        if parsed_args.update_database:
            cls.update_database(verbose=parsed_args.verbose)

        else:
            super().run(parsed_args=parsed_args)

    @classmethod
    def _create_argparser(cls):
        argparser = super()._create_argparser()
        argparser.add_argument("--update-database",
                               action='store_true',
                               help='downloads latest database data')
        argparser.add_argument("--verbose",
                               action='store_true',
                               help='be verbose')
        return argparser

    @classmethod
    def update_database(cls, verbose=False):
        bots = {}
        runtime_conf = get_bots_settings()
        try:
            for bot in runtime_conf:
                if runtime_conf[bot]["module"] == __name__:
                    bots[bot] = runtime_conf[bot]["parameters"]["suffix_file"]

        except KeyError as e:
            sys.exit(
                "Database update failed. Your configuration of {0} is missing key {1}."
                .format(bot, e))

        if not bots:
            if verbose:
                print(
                    "Database update skipped. No bots of type {0} present in runtime.conf."
                    .format(__name__))
            sys.exit(0)

        # we only need to import now. If there are no asn_lookup bots, this dependency does not need to be installed

        try:
            session = create_request_session()
            url = "https://publicsuffix.org/list/public_suffix_list.dat"
            if verbose:
                print("Downloading the latest database update...")
            response = session.get(url)

            if not response.ok:
                sys.exit("Database update failed. Server responded: {0}.\n"
                         "URL: {1}".format(response.status_code, response.url))

        except requests.exceptions.RequestException as e:
            sys.exit("Database update failed. Connection Error: {0}".format(e))

        for database_path in set(bots.values()):
            database_dir = pathlib.Path(database_path).parent
            database_dir.mkdir(parents=True, exist_ok=True)
            with open(database_path, "wb") as database:
                database.write(response.content)

        if verbose:
            print("Database updated. Reloading affected bots.")

        ctl = IntelMQController()
        for bot in bots.keys():
            ctl.bot_reload(bot)

コード例 #20

0

ファイルを表示

ファイル: test.py プロジェクト: mzpqnxow/psl

class TestPSL(unittest.TestCase):
    def setUp(self):

        self.psl = PublicSuffixList()

    def test_typesafe(self):
        self.assertEqual(
            self.psl.suffix("www.example.co.jp").__class__,
            "example.co.jp".__class__)
        self.assertEqual(
            self.psl.suffix(u("www.example.co.jp")).__class__,
            u("example.co.jp").__class__)

        self.assertEqual(
            self.psl.publicsuffix("www.example.co.jp").__class__,
            "co.jp".__class__)
        self.assertEqual(
            self.psl.publicsuffix(u("www.example.co.jp")).__class__,
            u("co.jp").__class__)

    def test_uppercase(self):
        self.assertEqual(self.psl.suffix("wWw.eXaMpLe.cO.Jp"), "example.co.jp")
        self.assertEqual(self.psl.publicsuffix("wWw.eXaMpLe.cO.Jp"), "co.jp")

    def test_invaliddomain(self):
        self.assertEqual(self.psl.suffix("www..invalid"), None)
        self.assertEqual(self.psl.suffix(".example.com"), None)
        self.assertEqual(self.psl.suffix("example.com."), None)
        self.assertEqual(self.psl.suffix(""), None)

        self.assertEqual(self.psl.publicsuffix("www..invalid"), None)
        self.assertEqual(self.psl.publicsuffix(".example.com"), None)
        self.assertEqual(self.psl.publicsuffix("example.com."), None)
        self.assertEqual(self.psl.publicsuffix(""), None)

    def test_idn(self):
        tld = u("香港")
        self.assertEqual(self.psl.suffix(u("www.example.") + tld),
                         u("example.") + tld)
        self.assertEqual(self.psl.publicsuffix(u("www.example.") + tld), tld)

    def test_punycoded(self):
        tld = encode_idn(u("香港"))
        self.assertEqual(self.psl.suffix(u("www.example.") + tld),
                         u("example.") + tld)
        self.assertEqual(self.psl.publicsuffix(u("www.example.") + tld), tld)

    def test_suffix_deny_public(self):
        self.assertEqual(self.psl.suffix("com"), None)
        self.assertEqual(self.psl.suffix("co.jp"), None)
        self.assertEqual(self.psl.suffix("example.nagoya.jp"), None)

    def test_unknown(self):
        self.assertEqual(self.psl.suffix("www.example.unknowntld"),
                         "example.unknowntld")
        self.assertEqual(self.psl.suffix("unknowntld"), None)

        self.assertEqual(self.psl.publicsuffix("www.example.unknowntld"),
                         "unknowntld")
        self.assertEqual(self.psl.publicsuffix("unknowntld"), "unknowntld")

    def test_deny_unknown(self):
        source = """
known
"""
        psl = PublicSuffixList(source.splitlines(), accept_unknown=False)

        self.assertEqual(psl.suffix("www.example.unknowntld"), None)

    def test_custom_psl(self):
        source = """
invalid
*.invalid
!test.invalid
"""
        psl = PublicSuffixList(source.splitlines())

        self.assertEqual(psl.suffix("example.invalid"), None)
        self.assertEqual(psl.suffix("test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("some.test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("aaa.bbb.ccc.invalid"), "bbb.ccc.invalid")

        self.assertEqual(psl.publicsuffix("example.invalid"),
                         "example.invalid")
        self.assertEqual(psl.publicsuffix("test.invalid"), "invalid")

    def test_publicsuffix(self):
        self.assertEqual(self.psl.publicsuffix("www.example.com"), "com")
        self.assertEqual(self.psl.publicsuffix("unknowntld"), "unknowntld")

    def test_wildcard(self):
        self.assertEqual(self.psl.suffix("test.example.nagoya.jp"),
                         "test.example.nagoya.jp")
        self.assertEqual(self.psl.suffix("example.nagoya.jp"), None)
        self.assertEqual(self.psl.publicsuffix("example.nagoya.jp"),
                         "example.nagoya.jp")
        self.assertEqual(self.psl.publicsuffix("test.example.nagoya.jp"),
                         "example.nagoya.jp")

    def test_checkpublicsuffix_script(self):
        regex = re.compile(r"^checkPublicSuffix\(('[^']+'), (null|'[^']+')\);")
        with open(os.path.join(os.path.dirname(__file__), "test_psl.txt"),
                  "rb") as f:
            ln = 0

            for line in f:
                ln += 1
                l = line.decode("utf-8")
                m = regex.match(l)
                if not m:
                    continue

                arg = m.group(1).strip("'")
                res = None if m.group(2) == "null" else m.group(2).strip("'")

                self.assertEqual(self.psl.suffix(arg), res,
                                 "in line {0}: {1}".format(ln, line.strip()))

    def test_typeerror(self):

        self.assertRaises(TypeError, lambda: self.psl.suffix(None))
        self.assertRaises(TypeError, lambda: self.psl.suffix(1))
        if b("") != "":
            # python3
            self.assertRaises(TypeError,
                              lambda: self.psl.suffix(b("www.example.com")))

    def test_compatclass(self):

        from publicsuffixlist.compat import PublicSuffixList
        psl = PublicSuffixList()

        self.assertEqual(psl.get_public_suffix("test.example.com"),
                         "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "")
        self.assertEqual(psl.get_public_suffix(""), "")

    def test_unsafecompatclass(self):

        from publicsuffixlist.compat import UnsafePublicSuffixList
        psl = UnsafePublicSuffixList()

        self.assertEqual(psl.get_public_suffix("test.example.com"),
                         "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "com")
        self.assertEqual(psl.get_public_suffix(""), "")

    def test_toomanylabels(self):
        d = "a." * 1000000 + "example.com"

        self.assertEqual(self.psl.publicsuffix(d), "com")
        self.assertEqual(self.psl.privatesuffix(d), "example.com")

    def test_flatstring(self):
        psl = PublicSuffixList(u("com\nnet\n"))
        self.assertEqual(psl.publicsuffix("example.com"), "com")

    def test_flatbytestring(self):
        psl = PublicSuffixList(b("com\nnet\n"))
        self.assertEqual(psl.publicsuffix("example.com"), "com")

    def test_privateparts(self):
        psl = self.psl
        self.assertEqual(psl.privateparts("aaa.www.example.com"),
                         ("aaa", "www", "example.com"))

    def test_noprivateparts(self):
        psl = self.psl
        self.assertEqual(psl.privateparts("com"), None)  # no private part

    def test_reconstructparts(self):
        psl = self.psl
        self.assertEqual(".".join(psl.privateparts("aaa.www.example.com")),
                         "aaa.www.example.com")

    def test_subdomain(self):
        psl = self.psl
        self.assertEqual(psl.subdomain("aaa.www.example.com", depth=0),
                         "example.com")
        self.assertEqual(psl.subdomain("aaa.www.example.com", depth=1),
                         "www.example.com")
        self.assertEqual(psl.subdomain("aaa.www.example.com", depth=2),
                         "aaa.www.example.com")
        self.assertEqual(psl.subdomain("aaa.www.example.com", depth=3),
                         None)  # no sufficient depth

コード例 #21

0

ファイルを表示

ファイル: test.py プロジェクト: mzpqnxow/psl

 def test_flatbytestring(self):
     psl = PublicSuffixList(b("com\nnet\n"))
     self.assertEqual(psl.publicsuffix("example.com"), "com")

コード例 #22

0

ファイルを表示

ファイル: test.py プロジェクト: mzpqnxow/psl

 def test_icann(self):
     psl = PublicSuffixList(only_icann=True)
     self.assertEqual(psl.publicsuffix("www.example.com"), 'com')
     self.assertEqual(psl.publicsuffix("example.priv.at"), 'at')

コード例 #23

0

ファイルを表示

ファイル: expirement.py プロジェクト: yandingkui/Pontus

def MY_expirement_process(root_dir="/home/yandingkui/dga_detection/result_data/", m_file="split_AGDs",
                          benign_file="split_benign_ac.json", n=815, m=10, c='entropy'):
    psl=PublicSuffixList()
    with open(root_dir + m_file, "r") as f:
        malicious_data = json.loads(f.read())

    with open(root_dir + benign_file, "r") as f:
        benign_data = json.loads(f.read())

    train_domains = []
    train_labels = []
    pred_domains = []
    pred_labels = []
    for k, v in malicious_data.items():
        for d in v[0]:
            d_split = d[:d.index(psl.publicsuffix(d)) - 1].split(".")
            if len(d_split) == 1:
                train_domains.append(d_split[0])
            else:
                m = 0
                lm = None
                for l in d_split:
                    if len(l) > m:
                        lm = l
                train_domains.append(lm)
            train_labels.append(1)
        for d in v[1]:
            pred_domains.append(d)
            pred_labels.append(1)

    for d in benign_data.get("train"):
        pri_d=psl.privatesuffix(d)
        lm=pri_d[:pri_d.index(psl.publicsuffix(pri_d))-1]
        train_domains.append(lm)
        train_labels.append(0)
    for d in benign_data.get("pred"):
        pred_domains.append(d)
        pred_labels.append(0)

    train_features = char_feature.extract_all_features(train_domains)

    index = list(range(len(train_domains)))
    random.shuffle(index)

    real_train_features = []
    real_train_labels = []
    for i in index:
        real_train_features.append(train_features[i])
        real_train_labels.append(train_labels[i])

    # clf = RandomForestClassifier(n_estimators=800, random_state=0)
    # {'criterion': 'entropy', 'max_features': 14, 'n_estimators': 820, 'random_state': 0}
    clf = RandomForestClassifier(n_estimators=n, max_features=m, criterion=c, random_state=0)
    # print("features")
    # n_es_list=range(750,850,5)
    # max_fea_list=range(10,30,2)
    # tuned_parameters = [{'n_estimators':n_es_list , 'random_state': [0],'max_features': max_fea_list,'criterion':["gini","entropy"]}]

    # clf = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=5,scoring='accuracy',n_jobs=30)

    clf.fit(real_train_features, real_train_labels)
    # print("best_params:")
    # print(clf.best_params_)
    print("Pontus:feature_importance_")
    im=clf.feature_importances_
    feature_items=[]
    for i in range(len(im)):
        feature_items.append((i+1,im[i]))
    feature_items.sort(key=takeSecond,reverse=True)
    print(feature_items)

コード例 #24

0

ファイルを表示

ファイル: test.py プロジェクト: meschansky/psl

 def test_flatbytestring(self):
     psl = PublicSuffixList(b("com\nnet\n"))
     self.assertEqual(psl.publicsuffix("example.com"), "com")

コード例 #25

0

ファイルを表示

ファイル: _psl_faup.py プロジェクト: cvandeplas/PyMISP

class PSLFaup(object):
    """
    Fake Faup Python Library using PSL for Windows support
    """
    def __init__(self):
        self.decoded = False
        self.psl = PublicSuffixList()
        self._url = None
        self._retval = {}
        self.ip_as_host = False

    def _clear(self):
        self.decoded = False
        self._url = None
        self._retval = {}
        self.ip_as_host = False

    def decode(self, url) -> None:
        """
        This function creates a dict of all the url fields.
        :param url: The URL to normalize
        """
        self._clear()
        if isinstance(url, bytes) and b'//' not in url[:10]:
            url = b'//' + url
        elif '//' not in url[:10]:
            url = '//' + url
        self._url = urlparse(url)

        self.ip_as_host = False
        hostname = _ensure_str(self._url.hostname)
        try:
            ipv4_bytes = socket.inet_aton(_ensure_str(hostname))
            ipv4 = ipaddress.IPv4Address(ipv4_bytes)
            self.ip_as_host = ipv4.compressed
        except (OSError, ValueError):
            try:
                addr, _, _ = hostname.partition('%')
                ipv6 = ipaddress.IPv6Address(addr)
                self.ip_as_host = ipv6.compressed
            except ValueError:
                pass

        self.decoded = True
        self._retval = {}

    @property
    def url(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        netloc = self.get_host() + ('' if self.get_port() is None else
                                    ':{}'.format(self.get_port()))
        return _ensure_bytes(
            urlunparse((
                self.get_scheme(),
                netloc,
                self.get_resource_path(),
                '',
                self.get_query_string(),
                self.get_fragment(),
            )))

    def get_scheme(self):
        """
        Get the scheme of the url given in the decode function
        :returns: The URL scheme
        """
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return _ensure_str(self._url.scheme)

    def get_credential(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self._url.password:
            return _ensure_str(self._url.username) + ':' + _ensure_str(
                self._url.password)
        if self._url.username:
            return _ensure_str(self._url.username)

    def get_subdomain(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self.get_host() is not None and not self.ip_as_host:
            if self.get_domain() in self.get_host():
                return self.get_host().rsplit(self.get_domain(),
                                              1)[0].rstrip('.') or None

    def get_domain(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self.get_host() is not None and not self.ip_as_host:
            return self.psl.privatesuffix(self.get_host())

    def get_domain_without_tld(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self.get_tld() is not None and not self.ip_as_host:
            return self.get_domain().rsplit(self.get_tld(), 1)[0].rstrip('.')

    def get_host(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self._url.hostname is None:
            return None
        elif self._url.hostname.isascii():
            return _ensure_str(self._url.hostname)
        else:
            return _ensure_str(idna.encode(self._url.hostname, uts46=True))

    def get_unicode_host(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if not self.ip_as_host:
            return idna.decode(self.get_host(), uts46=True)

    def get_tld(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        if self.get_host() is not None and not self.ip_as_host:
            return self.psl.publicsuffix(self.get_host())

    def get_port(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return self._url.port

    def get_resource_path(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return _ensure_str(self._url.path)

    def get_query_string(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return _ensure_str(self._url.query)

    def get_fragment(self):
        if not self.decoded:
            raise UrlNotDecoded("You must call faup.decode() first")

        return _ensure_str(self._url.fragment)

    def get(self):
        self._retval["scheme"] = self.get_scheme()
        self._retval["tld"] = self.get_tld()
        self._retval["domain"] = self.get_domain()
        self._retval["domain_without_tld"] = self.get_domain_without_tld()
        self._retval["subdomain"] = self.get_subdomain()
        self._retval["host"] = self.get_host()
        self._retval["port"] = self.get_port()
        self._retval["resource_path"] = self.get_resource_path()
        self._retval["query_string"] = self.get_query_string()
        self._retval["fragment"] = self.get_fragment()
        self._retval["url"] = self.url
        return self._retval

コード例 #26

0

ファイルを表示

def get_suspicious(year, month, day):
    timestring = "{}{:0>2d}{:0>2d}".format(year, month, day)
    suspicious_domains_set = set()
    if os.path.exists("../result_data/{}domains.txt".format(timestring)):
        with open("../result_data/{}domains.txt".format(timestring), "r") as f:
            for r in f:
                suspicious_domains_set.add(r.strip())
        check_active_domains(suspicious_domains_set, timestring)
    else:
        init_domain_set = set()
        # get all domains
        for hour in range(24):
            file_path = "{}{:0>2d}{:0>2d}{:0>2d}".format(year, month, day, hour)
            if not os.path.exists("../result_data/{}".format(file_path)):
                continue
            with open("../result_data/{}".format(file_path), "r") as f:
                for r in f:
                    domain = r.strip().split(",")[1]
                    init_domain_set.add(domain)
        psl = PublicSuffixList()
        domain_labels = []
        labels_labels = []
        i = 0
        # get labels
        domains_list = list(init_domain_set)
        for d in domains_list:
            s = d[:d.index(psl.publicsuffix(d)) - 1]
            for l in s.split("."):
                if len(l) > 0:
                    domain_labels.append(l)
                    labels_labels.append(i)
            i = i + 1

        features_path = "../result_data/{}_features.npy".format(timestring)
        if os.path.exists(features_path):
            features = np.load(features_path)
        else:
            features = extract_all_features(domain_labels)
            np.save(features_path, features)

        # classifier identifies labels
        clf = joblib.load("../result_data/ac_model.m")
        pred_labels = clf.predict(features)
        domain_index = set()
        for i in range(len(labels_labels)):
            if pred_labels[i] == 1:
                domain_index.add(labels_labels[i])
        # get suspicious domains

        for index in domain_index:
            ps = psl.privatesuffix(domains_list[index])
            if ps is None:
                continue
            suspicious_domains_set.add(ps)

        print("{} domains".format(len(suspicious_domains_set)))

        with open("../result_data/{}domains.txt".format(timestring), "w") as f:
            f.write("\n".join(suspicious_domains_set))
        print("save finish")
        # dgarchive check
        check_active_domains(suspicious_domains_set, timestring)

コード例 #27

0

ファイルを表示

ファイル: crtsh_scanner.py プロジェクト: vavarachen/crtsh_scanner

def get_domain_private_suffix(domain):
    """ returns 'www.google' for 'www.google.com' """
    psl = PublicSuffixList()
    tld = "." + psl.publicsuffix(domain)
    return domain.replace(tld,'')

コード例 #28

0

ファイルを表示

ファイル: test.py プロジェクト: meschansky/psl

class TestPSL(unittest.TestCase):

    def setUp(self):

        self.psl = PublicSuffixList()
        

    def test_typesafe(self):
        self.assertEqual(self.psl.suffix("www.example.co.jp").__class__, "example.co.jp".__class__)
        self.assertEqual(self.psl.suffix(u("www.example.co.jp")).__class__, u("example.co.jp").__class__)

        self.assertEqual(self.psl.publicsuffix("www.example.co.jp").__class__, "co.jp".__class__)
        self.assertEqual(self.psl.publicsuffix(u("www.example.co.jp")).__class__, u("co.jp").__class__)


    def test_uppercase(self):
        self.assertEqual(self.psl.suffix("wWw.eXaMpLe.cO.Jp"), "example.co.jp")
        self.assertEqual(self.psl.publicsuffix("wWw.eXaMpLe.cO.Jp"), "co.jp")


    def test_invaliddomain(self):
        self.assertEqual(self.psl.suffix("www..invalid"), None)
        self.assertEqual(self.psl.suffix(".example.com"), None)
        self.assertEqual(self.psl.suffix("example.com."), None)
        self.assertEqual(self.psl.suffix(""), None)

        self.assertEqual(self.psl.publicsuffix("www..invalid"), None)
        self.assertEqual(self.psl.publicsuffix(".example.com"), None)
        self.assertEqual(self.psl.publicsuffix("example.com."), None)
        self.assertEqual(self.psl.publicsuffix(""), None)


    def test_idn(self):
        tld = u("香港")
        self.assertEqual(self.psl.suffix(u("www.example.") + tld), u("example.") + tld)
        self.assertEqual(self.psl.publicsuffix(u("www.example.") + tld), tld)

    
    def test_punycoded(self):
        tld = encode_idn(u("香港"))
        self.assertEqual(self.psl.suffix(u("www.example.") + tld), u("example.") + tld)
        self.assertEqual(self.psl.publicsuffix(u("www.example.") + tld), tld)


    def test_suffix_deny_public(self):
        self.assertEqual(self.psl.suffix("com"), None)
        self.assertEqual(self.psl.suffix("co.jp"), None)
        self.assertEqual(self.psl.suffix("example.nagoya.jp"), None)


    def test_unknown(self):
        self.assertEqual(self.psl.suffix("www.example.unknowntld"), "example.unknowntld")
        self.assertEqual(self.psl.suffix("unknowntld"), None)

        self.assertEqual(self.psl.publicsuffix("www.example.unknowntld"), "unknowntld")
        self.assertEqual(self.psl.publicsuffix("unknowntld"), "unknowntld")


    def test_deny_unknown(self):
        source = """
known
"""
        psl = PublicSuffixList(source.splitlines(), accept_unknown=False)

        self.assertEqual(psl.suffix("www.example.unknowntld"), None)


    def test_custom_psl(self):
        source = """
invalid
*.invalid
!test.invalid
"""
        psl = PublicSuffixList(source.splitlines())

        self.assertEqual(psl.suffix("example.invalid"), None)
        self.assertEqual(psl.suffix("test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("some.test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("aaa.bbb.ccc.invalid"), "bbb.ccc.invalid")

        self.assertEqual(psl.publicsuffix("example.invalid"), "example.invalid")
        self.assertEqual(psl.publicsuffix("test.invalid"), "invalid")




    def test_publicsuffix(self):
        self.assertEqual(self.psl.publicsuffix("www.example.com"), "com")
        self.assertEqual(self.psl.publicsuffix("unknowntld"), "unknowntld")

    def test_wildcard(self):
        self.assertEqual(self.psl.suffix("test.example.nagoya.jp"), "test.example.nagoya.jp")
        self.assertEqual(self.psl.suffix("example.nagoya.jp"), None)
        self.assertEqual(self.psl.publicsuffix("example.nagoya.jp"), "example.nagoya.jp")
        self.assertEqual(self.psl.publicsuffix("test.example.nagoya.jp"), "example.nagoya.jp")



    def test_checkpublicsuffix_script(self):
        regex = re.compile(r"^checkPublicSuffix\(('[^']+'), (null|'[^']+')\);")
        with open(os.path.join(os.path.dirname(__file__), "test_psl.txt"), "rb") as f:
            ln = 0
        
            for line in f:
                ln += 1
                l = line.decode("utf-8")
                m = regex.match(l)
                if not m:
                    continue
    
                arg = m.group(1).strip("'")
                res = None if m.group(2) == "null" else m.group(2).strip("'")
                
                self.assertEqual(self.psl.suffix(arg), res, "in line {0}: {1}".format(ln, line.strip()))
            


    def test_typeerror(self):

        self.assertRaises(TypeError, lambda: self.psl.suffix(None))
        self.assertRaises(TypeError, lambda: self.psl.suffix(1))
        if b("") != "":
            # python3
            self.assertRaises(TypeError, lambda: self.psl.suffix(b("www.example.com")))
        

    def test_compatclass(self):

        from publicsuffixlist.compat import PublicSuffixList
        psl = PublicSuffixList()
        
        self.assertEqual(psl.get_public_suffix("test.example.com"), "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "")
        self.assertEqual(psl.get_public_suffix(""), "")

    def test_unsafecompatclass(self):

        from publicsuffixlist.compat import UnsafePublicSuffixList
        psl = UnsafePublicSuffixList()
        
        self.assertEqual(psl.get_public_suffix("test.example.com"), "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "com")
        self.assertEqual(psl.get_public_suffix(""), "")


    def test_toomanylabels(self):
        d = "a." * 1000000 + "example.com"

        self.assertEqual(self.psl.publicsuffix(d), "com")
        self.assertEqual(self.psl.privatesuffix(d), "example.com")


    def test_flatstring(self):
        psl = PublicSuffixList(u("com\nnet\n"))
        self.assertEqual(psl.publicsuffix("example.com"), "com")

    def test_flatbytestring(self):
        psl = PublicSuffixList(b("com\nnet\n"))
        self.assertEqual(psl.publicsuffix("example.com"), "com")