Python getBeautifulSoup 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tl.imports

메소드/함수: getBeautifulSoup

hotexamples.com에서의 예제들: 2

Python getBeautifulSoup - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tl.imports.getBeautifulSoup에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: url.py 프로젝트: buzzworkers/tl

 def geturls(self):
     if not self.html: self.fetch()
     urls = []
     from tl.imports import getBeautifulSoup
     soup = getBeautifulSoup()
     s = soup.BeautifulSoup(self.html)
     tags = s('a')
     for tag in tags:
        href = tag.get("href")
        if href:
            href = href.split("#")[0]
            if not href: continue
            if not href.endswith(".html"): continue
            if ".." in href: continue
            if href.startswith("mailto"): continue
            if not "http" in href:
                 if href.startswith("/"): href = self.root + href
                 else: href = self.base + "/" + href
            if not self.root in href: logging.warn("%s not in %s" % (self.root, href)) ; continue
            if href not in urls: urls.append(href)
     logging.warn("found %s urls" % len(urls))
     return urls

예제 #2

파일 보기

파일: spider.py 프로젝트: buzzworkers/tl

## tl imports

from tl.utils.name import stripname
from tl.utils.exception import handle_exception
from tl.utils.urldata import UrlData
from tl.utils.generic import waitforqueue
from tl.utils.url import geturl2, striphtml, Url
from tl.lib.datadir import getdatadir
from tl.lib.persist import PersistCollection
from tl.lib.commands import cmnds
from tl.lib.examples import examples
from tl.lib.threadloop import ThreadLoop
from tl.lib.callbacks import callbacks
from tl.imports import getBeautifulSoup
soup = getBeautifulSoup()

## basic imports

from collections import deque 
import os
import logging
import re
import sys
import time
import math
import urllib.request, urllib.error, urllib.parse
import urllib.parse
import optparse
from cgi import escape
from traceback import format_exc