Python PublicSuffixList.PublicSuffixListの例、publicsuffixlist.PublicSuffixList.PublicSuffixList Pythonの例

コード例 #1

0

ファイルを表示

def reduce_domain(domain_in):
    if not PublicSuffixList().publicsuffix(domain_in, accept_unknown=False):
        return None
    domain = PublicSuffixList().privatesuffix(domain_in)
    if domain:
        domain = domain.lower()
    else:
        log.debug("No eTLD for {}".format(domain))
    log.debug("Trimmed domain from {0} to {1}".format(domain_in, domain))
    return domain

コード例 #2

0

ファイルを表示

ファイル: test.py プロジェクト: mzpqnxow/psl

    def test_deny_unknown(self):
        source = """
known
"""
        psl = PublicSuffixList(source.splitlines(), accept_unknown=False)

        self.assertEqual(psl.suffix("www.example.unknowntld"), None)

コード例 #3

0

ファイルを表示

def is_email_blacklisted(address):
    """
    Determines if a supplied email address is present in the 'emailblacklist' table.
    Parameters:
        address: The email address to split out the domain from.
    Returns:
        Boolean True if present on the blacklist, or False otherwise.
    """
    _, domain = address.rsplit("@", 1)
    psl = PublicSuffixList()
    private_suffix = psl.privatesuffix(domain=domain)

    # Check the disposable email address list
    disposable_domains = _retrieve_disposable_email_domains()
    if private_suffix in disposable_domains:
        return True

    # Check the explicitly defined/blacklisted domains.
    blacklisted_domains = d.engine.execute("""
        SELECT domain_name
        FROM emailblacklist
    """).fetchall()
    for site in blacklisted_domains:
        if private_suffix == site['domain_name']:
            return True

    # If we get here, the domain (or subdomain) is not blacklisted
    return False

コード例 #4

0

ファイルを表示

ファイル: expert.py プロジェクト: motok/intelmq

 def init(self):
     if self.field not in ALLOWED_FIELDS:
         raise InvalidArgument('key',
                               got=self.field,
                               expected=ALLOWED_FIELDS)
     with codecs.open(self.suffix_file, encoding='UTF-8') as file_handle:
         self.psl = PublicSuffixList(source=file_handle, only_icann=True)

コード例 #5

0

ファイルを表示

ファイル: grouping.py プロジェクト: skumailraza/tls_malware_detection

def load_psl():
    global psl
    # Fetch PublicSuffix list and load it
    #    if not psl:
    print('Loading Public Suffix List')
    psl_file = pfetch()
    psl = PublicSuffixList(psl_file)

コード例 #6

0

ファイルを表示

ファイル: update.py プロジェクト: mzpqnxow/psl

def updatePSL(psl_file=PSLFILE):
    """ Updates a local copy of PSL file

    :param psl_file: path for the file to store the list. Default: PSLFILE
    """
    if requests is None:
        raise Exception("Please install python-requests http(s) library. $ sudo pip install requests")


    r = requests.get(PSLURL)
    if r.status_code != requests.codes.ok or len(r.content) == 0:
        raise Exception("Could not download PSL from " + PSLURL)

    lastmod = r.headers.get("last-modified", None)
    f = open(psl_file + ".swp", "wb")
    f.write(r.content)
    f.close()

    with open(psl_file + ".swp", "rb") as f:
        psl = PublicSuffixList(f)

    os.rename(psl_file + ".swp", psl_file)
    if lastmod:
        t = time.mktime(parsedate(lastmod))
        os.utime(psl_file, (t, t))

    print("PSL updated")
    if lastmod:
        print("last-modified: " + lastmod)

コード例 #7

0

ファイルを表示

ファイル: extract_tld_esld.py プロジェクト: skumailraza/tls_malware_detection

def get_tld_esld(PSL, DOMAIN):
    # Outputs the pairs (TopLevelDomain, EffectiveSecondLevelDomain) for a
    # given domain (string) provided in input.
    if not isinstance(DOMAIN, string_types):
        tld, esld = None, None
    else:
        # remove '.' characters
        while DOMAIN.endswith("."):
            DOMAIN = DOMAIN[:-1]
        while DOMAIN.startswith("."):
            DOMAIN = DOMAIN[1:]
        if len(DOMAIN) == 0:
            tld, esld = None, None
        else:
            try:
                # information about TLDs
                tld = PSL.publicsuffix(DOMAIN)
            except Exception:
                tld = PublicSuffixList().publicsuffix(DOMAIN)
            if tld is None:
                esld = None
            else:
                if tld == DOMAIN:
                    esld = tld
                else:
                    # we obtain the ESLD by removing the TLD from 'DOMAIN'
                    udn = DOMAIN[:-len(tld) - 1]
                    # find the rightmost '.' and extract the ESLD
                    i = udn.rfind(".")
                    esld = udn[i + 1:] + '.' + tld
    return tld, esld

コード例 #8

0

ファイルを表示

ファイル: test.py プロジェクト: mzpqnxow/psl

    def test_compatclass(self):

        from publicsuffixlist.compat import PublicSuffixList
        psl = PublicSuffixList()

        self.assertEqual(psl.get_public_suffix("test.example.com"),
                         "example.com")
        self.assertEqual(psl.get_public_suffix("com"), "")
        self.assertEqual(psl.get_public_suffix(""), "")

コード例 #9

0

ファイルを表示

def decompose_filter(inputstring, psl=PublicSuffixList()):
    logging.debug(f'Parsing "{inputstring}"')
    try:
        match_list = []
        querystring = inputstring
        # Clean input
        querystring = re.sub(r'(?i)[^-a-z0-9.%_]', '',
                             querystring).strip('. ').lower()
        logging.debug(f'Cleaned input to "{querystring}"')
        if '_' in querystring:
            logging.error(
                f'Single character wildcards are not handled yet. "{querystring}"'
            )
        if querystring.count('%') == 0:
            ts_q1 = querystring
            ts_q2 = querystring
        else:
            # Check for usable strings at the start of the string
            leading_match = re.search(
                r'^(?P<q_lead>[-a-z0-9.]+)(?:[%_.]*[%_])', querystring)
            if leading_match:
                match_list.append(leading_match.group('q_lead') + ':*')
            # Check for usable strings in the middle of the string
            mid_match_list = re.findall(
                r'(?<=[%_]\.)(?P<q_mid>[-a-z0-9.]+)(?:[%_.]*[%_])',
                querystring)
            if mid_match_list:
                mid_match_list = [m + ':*' for m in mid_match_list]
                match_list.extend(mid_match_list)
            # Check for usable strings at the end of the string
            trailing_match = re.search(
                r'(?<=[%_]\.)(?P<q_trail>[-a-z0-9.]+[-a-z0-9])$', querystring)
            if trailing_match:
                if psl.is_private(trailing_match.group('q_trail')):
                    match_list.append(trailing_match.group('q_trail'))
            if match_list:
                match_list = list(set(match_list))
                match_list.sort(key=lambda x: len(x.lstrip('w').rstrip(':*')),
                                reverse=True)
                ts_long_list = match_list[:2]
                ts_q1 = ts_long_list[0]
                ts_q2 = ts_long_list[-1]
            else:
                logging.error(
                    f'Could not extract usable querystring on "{inputstring}"')
                return
    except Exception as e:
        logging.error(f'Error on "{inputstring}", "{e}"')
        return
    return_dict = {
        'querystring': querystring,
        'ts_q1': ts_q1,
        'ts_q2': ts_q2,
    }
    return return_dict

コード例 #10

0

ファイルを表示

ファイル: expert.py プロジェクト: motok/intelmq

 def check(parameters):
     if not os.path.exists(parameters.get('suffix_file', '')):
         return [[
             "error",
             "File given as parameter 'suffix_file' does not exist."
         ]]
     try:
         with codecs.open(parameters['suffix_file'],
                          encoding='UTF-8') as database:
             PublicSuffixList(source=database, only_icann=True)
     except Exception as exc:
         return [["error", "Error reading database: %r." % exc]]

コード例 #11

0

ファイルを表示

ファイル: commons.py プロジェクト: wirenic/Hallo

 def get_domain_name(url: str) -> str:
     """
     Gets the domain name of a URL, removing the TLD
     :param url: URL to find domain of
     """
     # Sanitise the URL, removing protocol and directories
     url = url.split("://")[-1]
     url = url.split("/")[0]
     url = url.split(":")[0]
     # Get the public suffix
     public_suffix = PublicSuffixList()
     url_tld = public_suffix.publicsuffix(url)
     # Else return the last part before the TLD
     return url[:-len(url_tld) - 1].split(".")[-1]

コード例 #12

0

ファイルを表示

def static_num(file_path):
    psl = PublicSuffixList()
    result = [0, 0, 0]
    with open(file_path, "r") as f:
        for r in f:
            d = r.strip().split(",")[0]
            d_strip = d[:d.rindex(psl.publicsuffix(d)) - 1].split(".")
            if len(d_strip) == 1:
                result[0] += 1
            elif len(d_strip) == 2:
                result[1] += 1
            else:
                result[2] += 1
    print(result)

コード例 #13

0

ファイルを表示

ファイル: WebEye.py プロジェクト: 10467106/WebEye

    def get_whois(self, name):
        try:
            domain = urlparse.urlparse(self.target).netloc

            # if domain is ip,stop querying domain.
            result1 = re.search("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", domain)
            if result1:
                return
            # remove port
            result2 = re.search("\:\d{1,5}$", domain)
            if result2:
                domain = domain.split(":")[0]

            # get domain's ip
            try:
                ip = socket.gethostbyname(domain)
                self.cms_list.add("IP:" + ip)
            except Exception, e:
                # print e
                pass


#            if re.match("^www\.",domain):
#                 domain = domain.strip("www.")
            psl = PublicSuffixList()
            domain = psl.suffix(domain)
            who = pythonwhois.get_whois(domain)

            # get whois
            if who["contacts"]["registrant"]["name"] is not None:
                self.cms_list.add(
                    "Domain_User:"******"contacts"]["registrant"]["name"].encode("utf8"))
            if who["contacts"]["registrant"]["email"] is not None:
                self.cms_list.add(
                    "Domain_Email:" +
                    who["contacts"]["registrant"]["email"].encode("utf8"))
            if who["contacts"]["registrant"]["phone"] is not None:
                self.cms_list.add(
                    "Domain_Phone:" +
                    who["contacts"]["registrant"]["phone"].encode("utf8"))
            if who["registrar"] is not None:
                self.cms_list.add("Domain_Registrar:" +
                                  who["registrar"][0].encode("utf8"))
            if who["nameservers"] is not None:
                name_servers = []
                for i in who["nameservers"]:
                    name_servers.append(i.encode('UTF8'))
                self.cms_list.add("Domai_name_servers:" +
                                  str(name_servers).encode("utf8"))

コード例 #14

0

ファイルを表示

ファイル: verify.py プロジェクト: sitedata/disposable-email-domains-2

def check_for_third_level_domains(filename):
    with open("public_suffix_list.dat", "r") as latest:
        psl = PublicSuffixList(latest)

    invalid = {
        line
        for line in files[filename] if len(psl.privateparts(line.strip())) > 1
    }
    if invalid:
        print(
            "The following domains contain a third or lower level domain in {!r}:"
            .format(filename))
        for line in sorted(invalid):
            print("* {}".format(line))
        sys.exit(1)

コード例 #15

0

ファイルを表示

ファイル: yd_benign.py プロジェクト: yandingkui/Pontus

def getBenign(filepath):
    psl=PublicSuffixList()
    filter=Filter()
    domains=[]

    # out=dict()
    # with open(filepath,"r") as f:
    #     for r in f:
    #         r_split=r.strip().split(":")
    #         if filter.inWhiteList(r_split[0]):
    #             pri=psl.privatesuffix(r_split[0])
    #             lll=out.get(pri)
    #             if lll is None:
    #                 lll=[]
    #             lll.append(r_split[0])
    #             out[pri]=lll
    #             continue
    #         domains.append(r_split[0])
    #
    #
    # num=0
    # break_flag=False
    # for i in range(9):
    #     for k,v in out.items():
    #         if i>=len(v) or k in ["aliyunduncc.com","360wzb.cn","yundunwaf.com","bugtags.com","wscloudcdn.com","ourdvsss.com","aliyundunwaf.com","aligfwaf.com"]:
    #             continue
    #         domains.append(v[i])
    #         num+=1
    #         if num>=311:
    #             break_flag=True
    #             break
    #     if break_flag:
    #         break

    with open(filepath,"r") as f:
        for r in f:
            r_split=r.strip().split(":")
            domains.append(r_split[0])
    random.shuffle(domains)

    result=dict()
    result["train"]=domains[:23600]
    result["pred"]=domains[23600:29500]

    with open("../result_data/yd_nf_data.json","w") as f:
        f.write(json.dumps(result))

    print(len(domains))

コード例 #16

0

ファイルを表示

ファイル: test.py プロジェクト: mzpqnxow/psl

    def test_custom_psl(self):
        source = """
invalid
*.invalid
!test.invalid
"""
        psl = PublicSuffixList(source.splitlines())

        self.assertEqual(psl.suffix("example.invalid"), None)
        self.assertEqual(psl.suffix("test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("some.test.invalid"), "test.invalid")
        self.assertEqual(psl.suffix("aaa.bbb.ccc.invalid"), "bbb.ccc.invalid")

        self.assertEqual(psl.publicsuffix("example.invalid"),
                         "example.invalid")
        self.assertEqual(psl.publicsuffix("test.invalid"), "invalid")

コード例 #17

0

ファイルを表示

def filter2LDAleax():
    psl=PublicSuffixList()
    data=[]
    with open("../data_sets/Aleax","r") as f:
        for r in f:
            d=r.strip()

            d1=d[:d.rindex(psl.publicsuffix(d))-1]
            if len(d1)==0:
                continue
            d_split=d1.split(".")
            if len(d_split)==1 and len(d_split[0])!=0:
                data.append(d)
        print(len(data))
    with open("../data_sets/Aleax2LD","w") as f:
        f.write("\n".join(data))

コード例 #18

0

ファイルを表示

 def getAllDomainLabels(self, domains):
     labels = []
     index = []
     psl = PublicSuffixList()
     for i in range(len(domains)):
         d = domains[i].strip()
         pub = psl.publicsuffix(d)
         d_split = d[:d.rindex(pub) - 1].split(".")
         if len(d_split) > 2:
             print("d:{} pub:{}".format(d, pub))
         for l in d_split:
             if len(l) == 0:
                 print("kong kong")
             labels.append(l)
             index.append(i)
     return labels, index

コード例 #19

0

ファイルを表示

ファイル: http_basic.py プロジェクト: rurbin3/yawast

def check_hsts_preload(url: str) -> List[dict]:
    hsts_service = "https://hstspreload.com/api/v1/status/"
    results: List[dict] = []

    domain = utils.get_domain(url)

    if not checkers.is_ip_address(domain):
        while domain.count(".") > 0:
            # get the HSTS preload status for the domain
            res, _ = network.http_json(f"{hsts_service}{domain}")
            results.append(res)

            domain = domain.split(".", 1)[-1]
            if PublicSuffixList().is_public(domain):
                break

    return results

コード例 #20

0

ファイルを表示

ファイル: apply_domain_suffix.py プロジェクト: motok/intelmq

def eventdb_apply(host, port, database, username, password, table, dry_run,
                  where, filename):
    if password:
        password = input('Password for user %r on %r: ' % (username, host))
    where = 'AND ' + where if where else ''

    con1 = psycopg2.connect(user=username,
                            password=password,
                            database=database,
                            host=host,
                            port=port)
    cur1 = con1.cursor(cursor_factory=DictCursor)
    con2 = psycopg2.connect(user=username,
                            password=password,
                            database=database,
                            host=host,
                            port=port)
    con2.autocommit = True
    cur2 = con2.cursor(cursor_factory=DictCursor)
    cur1.execute('''
                 SELECT id, "source.fqdn", "destination.fqdn"
                 FROM {table}
                 WHERE
                 ("source.fqdn" IS NOT NULL OR "destination.fqdn" IS NOT NULL)
                 {where}
                 '''.format(table=table, where=where))

    psl = PublicSuffixList(only_icann=True)

    counter = 0
    for row in cur1:
        counter += 1
        if row['source.fqdn']:
            cur2.execute(
                'update events set "source.domain_suffix" = %s where id = %s',
                (psl.publicsuffix(
                    row['source.fqdn'].encode('idna').decode()), row['id']))

        if row['destination.fqdn']:
            cur2.execute(
                'update events set "destination.domain_suffix" = %s where id = %s',
                (psl.publicsuffix(
                    row['destination.fqdn'].encode('idna').decode()),
                 row['id']))
    con2.commit()
    print("Changed %d rows" % counter)

コード例 #21

0

ファイルを表示

def local_malicious_check(AGD_root="/home/public/2019-01-07-dgarchive_full"):
    psl=PublicSuffixList()
    filenames=os.listdir(AGD_root)
    AGD_set=set()
    for filename in filenames:
        filepath="{}/{}".format(AGD_root,filename)
        df=pd.read_csv(filepath,header=None)
        print(filepath)
        AGD_set.update(df.iloc[:,0])
        print(df.iloc[:2,0])
    domains=[]
    with open("../result_data/all_domain_list.txt","r") as f:
        for r in f:
            d=r.strip()
            if d in AGD_set:
                domains.append(d)
    with open("../result_data/all_FQDN_AGD_in_traffic","w") as f:
        f.write("\n".join(domains))

コード例 #22

0

ファイルを表示

def get2subdomain(root_dir="/home/public/2019-01-07-dgarchive_full"):
    result = dict()
    psl = PublicSuffixList()
    for filename in os.listdir(root_dir):
        with open("{}/{}".format(root_dir, filename), "r") as f:
            for r in f:
                d = r.strip().split(",")[0]
                d_strip = d[:d.rindex(psl.publicsuffix(d)) - 1].split(".")
                if len(d_strip) == 2:
                    domains = result.get(filename)
                    if domains is None:
                        domains = set()
                        result[filename] = domains
                    domains.add(d)

    for k, v in result.items():
        print("{} : {}".format(k, len(v)))
        v_list = list(v)
        print(v_list[:10])

コード例 #23

0

ファイルを表示

ファイル: verify.py プロジェクト: sitedata/disposable-email-domains-2

def check_for_public_suffixes(filename):
    lines = files[filename]
    suffix_detected = False
    psl = None
    with open("public_suffix_list.dat", "r") as latest:
        psl = PublicSuffixList(latest)
    for i, line in enumerate(lines):
        current_line = line.strip()
        public_suffix = psl.publicsuffix(current_line)
        if public_suffix == current_line:
            print(
                f"The line number {i+1} contains just a public suffix: {current_line}"
            )
            suffix_detected = True
    if suffix_detected:
        print(
            "At least one valid public suffix found in {!r}, please "
            "remove it. See https://publicsuffix.org for details on why this "
            "shouldn't be blocklisted.".format(filename))
        sys.exit(1)

コード例 #24

0

ファイルを表示

ファイル: cluster_procession.py プロジェクト: yandingkui/Pontus

 def domains_map_features(self, day):
     filepath = "../result_data/{}/{}_ip_dict.json".format(day, day)
     domain_set = set()
     with open(filepath, 'r') as f:
         ip_dict = json.loads(f.read())
     for k, v in ip_dict.items():
         for d in v[0]:
             domain_set.add(d)
         for d in v[1]:
             domain_set.add(d)
     domain_list = list(domain_set)
     print('domains number:{}'.format(len(domain_list)))
     psl = PublicSuffixList()
     domain_features = self.get_features(domain_list, psl)
     np.save(
         "../result_data/{}/{}_all_domain_features.npy".format(day, day),
         domain_features)
     with open("../result_data/{}/{}_all_domain_list.txt".format(day, day),
               "w") as f:
         f.write('\n'.join(domain_list))

コード例 #25

0

ファイルを表示

def lstm_getSingleFea(d: str):
    psl = PublicSuffixList()
    d = d[:d.rindex(psl.publicsuffix(d)) - 1].replace(".", "")
    vector = np.zeros(64)
    if (len(d) == 0):
        return vector
    cuter = CutWords()
    # wordlist = cuter.max_forward_cut(d)
    # wordlist = cuter.max_backward_cut(d)
    wordlist = cuter.max_biward_cut(d)

    vi = 63
    for i in range(len(wordlist) - 1, -1, -1):
        vector[vi] = CutWords.order[wordlist[i]]
        vi = vi - 1
        if (vi < 0):
            break
    # print(d)
    # print(vector)
    return vector

コード例 #26

0

ファイルを表示

ファイル: login.py プロジェクト: guptaarth87/weasyl

def is_email_blacklisted(address):
    """
    Determines if a supplied email address is present in the 'emailblacklist' table.
    Parameters:
        address: The email address to split out the domain from.
    Returns:
        Boolean True if present on the blacklist, or False otherwise.
    """
    _, domain = address.rsplit("@", 1)
    psl = PublicSuffixList()
    private_suffix = psl.privatesuffix(domain=domain)

    # Check the disposable email address list
    if private_suffix in DISPOSABLE_DOMAINS:
        return True

    # Check the explicitly defined/blacklisted domains.
    return d.engine.scalar(
        "SELECT EXISTS (SELECT FROM emailblacklist WHERE domain_name = %(domain)s)",
        domain=private_suffix,
    )

コード例 #27

0

ファイルを表示

ファイル: master.py プロジェクト: Mozilla-GitHub-Standards/d42da0ceda1013f3f2db64f56871c882092a5c801a29ed695ce0479186d62ebd

    def init(self, config=None):

        if config is not None:
            self.config = load_config_obj(config)

        app = Flask('product_identifier')
        app.config.from_object(self.config)
        self.__flask = app
        self.__db = SQLAlchemy(self.__flask)
        Migrate(self.__flask, self.db)

        self.handler_pool = gevent.pool.Pool(
            self.config.MASTER_HANDLER_POOL_SIZE)

        with open(os.path.join(self.config.DATA_DIR, "ruleset.json"),
                  "r") as f:
            rule_set = json.load(f)
            self.product_patterns = []
            for name, pattern in rule_set["rules"].iteritems():
                self.product_patterns.append(re.compile(pattern))

        self.__psl = PublicSuffixList()

コード例 #28

0

ファイルを表示

def createdataset(type="train",
                  AGD_file="../data_sets/split_AGDs",
                  BD_file="../data_sets/split_benign_nx.json",
                  datasetname="nx_train_data"):
    if type == "train":
        v_index = 0
    else:
        v_index = 1
    psl = PublicSuffixList()
    with open(AGD_file, "r") as f:
        AGD_dict = json.loads(f.read())
    with open(BD_file, "r") as f:
        bd_dict = json.loads(f.read())
    allAGDs = set()
    allBDs = set()
    for k, v in AGD_dict.items():
        for d in v[v_index]:
            pre_d = d[:d.rindex(psl.publicsuffix(d)) - 1]
            for l in pre_d.split("."):
                allAGDs.add(l)
    for d in bd_dict[type]:
        pre_d = d[:d.rindex(psl.publicsuffix(d)) - 1]
        for l in pre_d.split("."):
            allBDs.add(l)
    length = len(allAGDs)
    print(length)
    allBDs = list(allBDs)[:length]
    allAGDs = list(allAGDs)
    alldomains = allAGDs + allBDs
    alllabels = list(np.ones(length)) + list(np.zeros(length))
    allfeatures = extract_all_features(alldomains)
    np.save("../data_sets/{}_features.npy".format(datasetname), allfeatures)
    data = dict()
    data["domains"] = pd.Series(alldomains, dtype='str')
    data["labels"] = pd.Series(alllabels, dtype='int32')
    df = pd.DataFrame(data=data)
    df.to_csv("../data_sets/{}.csv".format(datasetname), index=False)

コード例 #29

0

ファイルを表示

def dga_static_num(file_path):
    psl = PublicSuffixList()
    result = [0, 0, 0]
    with open(file_path, "r") as f:
        map = json.loads(f.read())
    for k, v in map.items():
        for d in v[0]:

            d_strip = d[:d.rindex(psl.publicsuffix(d)) - 1].split(".")
            if len(d_strip) == 1:
                result[0] += 1
            elif len(d_strip) == 2:
                result[1] += 1
            else:
                result[2] += 1
        for d in v[1]:
            d_strip = d[:d.rindex(psl.publicsuffix(d)) - 1].split(".")
            if len(d_strip) == 1:
                result[0] += 1
            elif len(d_strip) == 2:
                result[1] += 1
            else:
                result[2] += 1
    print(result)

コード例 #30

0

ファイルを表示

def static_1_2(root_dir="/home/public/2019-01-07-dgarchive_full"):
    psl=PublicSuffixList()
    result=dict()
    for filename in os.listdir(root_dir):
        df = pd.read_csv(os.path.join(root_dir,filename),header=None,error_bad_lines=False)
        domains = result.get(filename)
        if domains is None:
            domains = [set(), set()]
            result[filename] = domains
        for d in df.iloc[:,0]:
            pub_d=psl.publicsuffix(d)
            if d != pub_d:
                d_split=d[:d.rindex(pub_d)-1].split(".")
                if len(d_split)==1:
                    result.get(filename)[0].add(d)
                elif len(d_split)==2:
                    result.get(filename)[1].add(d)
                else:
                    print("Wow : {}".format(d))
        print("{} finish".format(filename))

    print("write")
    with open("../result_data/dga_data.json","w") as f:
        f.write(json.dumps(result,cls=MyJsonEncoder))