Python extract_domainの例

プログラミング言語: Python

名前空間/パッケージ名: common

メソッド/関数: extract_domain

hotexamples.comのコード掲載数: 5

Python extract_domain - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのcommon.extract_domainの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: grp.py プロジェクト: BackupTheBerlios/ser

def convert_grp(dst, src):

    if common.verbose:
        print "Converting grp table into user attributes"

    cur = src.cursor()

    try:
        cur.execute("select username, domain, grp from grp")
        groups = cur.fetchall()
    except MySQLdb.ProgrammingError:
        print "Error while querying grp table, skipping"
        return

    for group in groups:
        (username, domain, grp) = group
        domain = common.extract_domain(username, domain)
        try:
            uid = get_uid_by_uri(dst, username, domain)
            dst.cursor().execute(
                "insert into user_attrs (uid, name, type, value, flags) values" "(%s, %s, %s, %s, %s)",
                (uid, "acl", 2, grp, common.DB_LOAD_SER | common.DB_FOR_SERWEB),
            )
        except UidNotFoundException:
            print "ERROR: Cannot find UID for group entry for '%s@%s'" % (username, domain)

        except MySQLdb.IntegrityError:
            print "Conflicting row found in user_attrs table (%s, %s, %s, %s, %s)" % (
                uid,
                "acl",
                2,
                grp,
                common.DB_LOAD_SER | common.DB_FOR_SERWEB,
            )
            pass

コード例 #2

ファイルを表示

ファイル: download.py プロジェクト: staticerror/SeKing

    def crawl(self, seed_url, max_urls=30, max_depth=1, obey_robots=False, max_size=1000000, force_html=True, **kwargs):
        """Crawl website html and return list of URLs crawled

        seed_url: url to start crawling from
        max_urls: maximum number of URLs to crawl (use None for no limit)
        max_depth: maximum depth to follow links into website (use None for no limit)
        obey_robots: whether to obey robots.txt
        max_size is passed to get() and is limited to 1MB by default
        force_text is passed to get() and is set to True by default so only crawl HTML content
        **kwargs is passed to get()
        """
        user_agent = kwargs.get("user_agent", self.user_agent)
        server = "http://" + extract_domain(seed_url)
        robots = RobotFileParser()
        if obey_robots:
            robots.parse(self.get(server + "/robots.txt").splitlines())  # load robots.txt
        outstanding = [(seed_url, 0), (server, 0)]  # which URLs need to crawl
        crawled = []  # urls that have crawled

        while outstanding:
            # more URLs to crawl
            if len(crawled) == max_urls:
                break
            url, cur_depth = outstanding.pop(0)
            if url not in crawled:
                html = self.get(url, max_size=max_size, force_html=force_html, **kwargs)
                crawled.append(url)
                if max_depth is None or cur_depth < max_depth:
                    # continue crawling
                    for scraped_url in re.findall(re.compile("<a[^>]+href=[\"'](.*?)[\"']", re.IGNORECASE), html):
                        if "#" in scraped_url:
                            scraped_url = scraped_url[
                                : scraped_url.index("#")
                            ]  # remove internal links to prevent duplicates
                        if os.path.splitext(scraped_url)[
                            -1
                        ].lower() not in Download.IGNORED_EXTENSIONS and robots.can_fetch(user_agent, scraped_url):
                            scraped_url = urljoin(server, scraped_url)  # support relative links
                            # check if same domain or sub-domain
                            this_server = extract_domain(scraped_url)
                            if this_server and (this_server in server or server in this_server):
                                outstanding.append((scraped_url, cur_depth + 1))
        return crawled

コード例 #3

ファイルを表示

ファイル: download.py プロジェクト: staticerror/SeKing

    def domain_delay(self, url, delay, proxy=None, variance=0.5):
        """Delay a minimum time for each domain per proxy by storing last access times in a pdict

        url is what intend to download
        delay is the minimum amount of time (in seconds) to wait after downloading content from this domain
        variance is the amount of randomness in delay, 0-1
        """
        key = str(proxy) + ":" + extract_domain(url)
        if key in self.cache:
            dt = datetime.now() - self.cache[key]
            wait_secs = delay - dt.days * 24 * 60 * 60 - dt.seconds
            if wait_secs > 0:
                # randomize the time so less suspicious
                wait_secs = wait_secs - variance * delay + (2 * variance * delay * random.random())
                time.sleep(max(0, wait_secs))  # make sure isn't negative time
        self.cache[key] = datetime.now()  # update database timestamp to now

コード例 #4

ファイルを表示

ファイル: admin_privileges.py プロジェクト: BackupTheBerlios/ser

def convert_admin_privileges(dst, src):
    cur = src.cursor();

    if common.verbose: print "Converting admin_privileges table"

    try:
        cur.execute("select username, domain, priv_name, priv_value from admin_privileges")
    except MySQLdb.ProgrammingError:
        print "Error while querying admin_privileges table, skipping"
        return

    privs = cur.fetchall()

    if common.verbose and cur.rowcount == 0:
        print "Source admin_privileges table is empty, skipping"
        return

    for priv in privs:
        (username, domain, priv_name, priv_value) = priv
        domain = common.extract_domain(username, domain)

        try:
            uid = get_uid_by_uri(dst, username, domain)
            if priv_name == "is_admin":
                dst.cursor().execute("insert into user_attrs (uid, name, type, value, flags) values "
                                     "(%s, %s, %s, %s, %s)", (uid, "sw_is_admin", 0, 1, common.DB_FOR_SERWEB))
            elif priv_name == "acl_control":
                dst.cursor().execute("insert into user_attrs (uid, name, value, type, flags) values "
                                     "(%s, %s, %s, %s, %s)", (uid, "sw_acl_control", priv_value, 2, common.DB_FOR_SERWEB))
            elif priv_name == "change_privileges":
                dst.cursor().execute("insert into user_attrs (uid, name, type, value, flags) values "
                                     "(%s, %s, %s, %s, %s)", (uid, "sw_is_hostmaster", 0, 1, common.DB_FOR_SERWEB))
            else:
                print "ERROR: Unsupported privilege '%s' in admin_privileges table, skipping" % priv_name
        except UidNotFoundException:
            print "ERROR: Cannot find UID for admin_privilege entry for '%s@%s'" % (username, domain)

        except MySQLdb.IntegrityError:
            print "Conflicting row found user_attrs table"
            sys.exit(1)

コード例 #5

ファイルを表示

ファイル: crtsh.py プロジェクト: becksteadn/insight

def lookup(target, color=common.bcolors.OKGREEN):
    ROW_LIMIT = 20
    API_URL = 'https://crt.sh/?q=%.{}&output=json'
    response = requests.get(API_URL.format(common.extract_domain(target)))
    if response.status_code != 200:
        print('Got status {} from crt.sh. Skipping...'.format(
            response.status_code))
        return None
    data = response.json()

    subdomains = set()
    for entry in data:
        subdomains.add(entry['name_value'])

    table = PrettyTable()
    table.field_names = ['Enumerated Subdomains']

    for domain in sorted(subdomains)[:ROW_LIMIT]:
        table.add_row([domain])

    if len(subdomains) > ROW_LIMIT:
        table.add_row(['+{} MORE'.format(len(subdomains) - ROW_LIMIT)])

    print(color + str(table) + common.bcolors.ENDC)