Python BeautifulSoup.BeautifulSoupの例

プログラミング言語: Python

名前空間/パッケージ名: utils.BeautifulSoup

クラス/型: BeautifulSoup

メソッド/関数: BeautifulSoup

hotexamples.comのコード掲載数: 7

Python BeautifulSoup.BeautifulSoup - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのutils.BeautifulSoup.BeautifulSoup.BeautifulSoupの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

BeautifulSoup(7)

findAll(7)

find(6)

getText(1)

renderContents(1)

コード例 #1

ファイルを表示

def get_onepage_poclist(page):
    info = getHtml("http://beebeeto.com/pdb" + '/?page=' + str(page))
    if '' == info:
        return ''

    bt = BeautifulSoup(info)
    end = bt.find(
        'a', {
            'style':
            "font-size: 20px;font-weight: bold; border-bottom: 3px solid #777777;"
        })
    if '1' == end.renderContents() and page != 1:
        return ''

    ret = bt.find('div', {'class': 'mainlist'})
    ret = ret.renderContents()
    if ret == "":
        return ""

    retlist = []
    rets = re.findall('<a href=.*?>', ret)
    for one in rets:
        if "poc-" in one:
            one = one.replace('<a href="', "")
            one = one.replace('">', "")
            one = one.strip()
            retlist.append(one)

    return retlist

コード例 #2

ファイルを表示

ファイル: IcecastDirectory.py プロジェクト: buzztiaan/mediabox

    def __parse_genres(self, data):
        """
        Parses the list of genres.
        """

        genres = []
        soup = BeautifulSoup(data)
        tagcloud = soup.find("ul", {"class": "tag-cloud"})
        #print tagcloud

        if (tagcloud):
            for genre_tag in tagcloud.findAll("a", {"class": "tag"}):
                #print genre_tag
                name = genre_tag["title"]
                href = genre_tag["href"]
                genres.append((name, href))
            #end for
        #end if

        if (not genres):
            self.__current_folder.message = "genre list not available"
            logging.error("icecast genre listing download failed:\n%s",
                          logging.stacktrace())

        genres.sort(lambda a, b: cmp(a[0], b[0]))
        return genres

コード例 #3

ファイルを表示

    def __parse_genres(self, data):
        """
        Parses the list of genres.
        """

        self.call_service(msgs.UI_ACT_SHOW_INFO,
                          "SHOUTcast made it illegal for free software to access\n" \
                          "their full directory.\n" \
                          "You will only get the Top 10 stations listed per genre.")

        genres = []
        soup = BeautifulSoup(data)
        radiopicker = soup.find("div", {"id": "radiopicker"})
        #print radiopicker
        if (radiopicker):
            for genre_tag in radiopicker.findAll("li", {"class": "prigen"}):
                #print genre_tag
                name = genre_tag.a.contents[0]
                name = name.replace("&amp;", "&")
                genres.append(name)
            #end for
        #end if

        if (not genres):
            self.__current_folder.message = "genre list not available"
            logging.error("SHOUTcast genre listing download failed:\n%s",
                          logging.stacktrace())

        genres.sort()
        return genres

コード例 #4

ファイルを表示

    def __parse_stations(self, data, genre):
        """
        Parses the list of stations.
        """

        stations = []
        soup = BeautifulSoup(data)
        resulttable = soup.find("div", {"id": "resulttable"})
        if (resulttable):
            for entry in resulttable.findAll("div", {"class": "dirlist"}):
                #print entry
                station = File(self)
                a_tag = entry.find("a", {"class": "playbutton playimage"})
                playing_tag = entry.find("div", {"class": "playingtext"})
                bitrate_tag = entry.find("div", {"class": "dirbitrate"})
                type_tag = entry.find("div", {"class": "dirtype"})

                if (not a_tag or not playing_tag or not bitrate_tag
                        or not type_tag):
                    continue

                station.resource = a_tag["href"]
                station.name = a_tag["title"]
                now_playing = playing_tag["title"]
                bitrate = bitrate_tag.contents[0].strip()

                typename = type_tag.contents[0].strip()
                if (typename == "MP3"):
                    station.mimetype = "audio/mpeg"
                elif (typename == "AAC+"):
                    station.mimetype = "audio/mp4"
                else:
                    station.mimetype = "audio/x-unknown"

                station.path = File.pack_path("/" + urlquote.quote(genre, ""),
                                              station.name, bitrate,
                                              station.mimetype,
                                              station.resource, genre)
                station.info = "Bitrate: %s kb\n" \
                               "Now playing: %s" % (bitrate, now_playing)
                station.icon = theme.shoutcast_station.get_path()
                stations.append(station)
            #end for
        #end if

        if (not stations):
            self.__current_folder.message = "station list not available"
            logging.error("SHOUTcast station listing download failed\n%s",
                          logging.stacktrace())

        stations.sort()
        return stations

コード例 #5

ファイルを表示

def getPoc(poc):
    info = getHtml("http://beebeeto.com/pdb/" + poc + "/")
    if '' == info:
        return ''
    if '<img src="/static/img/test.jpg"' in info:
        return ''

    bt = BeautifulSoup(info)
    ret = bt.find('pre', {'class': "brush: python;"})
    ret = ret.renderContents()
    if ret:
        return strip_tags(ret)
    else:
        return ''

コード例 #6

ファイルを表示

ファイル: IcecastDirectory.py プロジェクト: buzztiaan/mediabox

    def __parse_stations(self, data, genre):
        """
        Parses the list of stations.
        """

        stations = []
        next_page_url = ""

        soup = BeautifulSoup(data)
        resulttable = soup.find("div", {"id": "content"})

        if (resulttable):
            for entry in resulttable.findAll("tr"):
                #print entry

                station = File(self)
                try:
                    station.name = entry.find("span", {
                        "class": "name"
                    }).a.contents[0]
                except:
                    continue
                try:
                    now_playing = entry.find("p", {
                        "class": "stream-onair"
                    }).contents[1]
                except:
                    now_playing = ""
                station.resource = _ICECAST_BASE + entry.find(
                    "td", {
                        "class": "tune-in"
                    }).find("a")["href"]
                try:
                    bitrate = entry.find("td", {
                        "class": "tune-in"
                    }).findAll("p", {"class": "format"})[1]["title"]
                except:
                    bitrate = "-"

                try:
                    typename = entry.find("a", {
                        "class": "no-link"
                    }).contents[0].strip()
                except:
                    typename = ""

                if (typename == "MP3"):
                    station.mimetype = "audio/mpeg"
                elif (typename == "AAC+"):
                    station.mimetype = "audio/mp4"
                else:
                    station.mimetype = "audio/x-unknown"

                station.path = "/" + genre + "/" + \
                    self.__encode_station(station.name,
                                          bitrate,
                                          station.mimetype,
                                          station.resource,
                                          genre)
                station.info = "Bitrate: %s\n" \
                               "Now playing: %s" % (bitrate, now_playing)
                station.icon = theme.icecast_station.get_path()
                stations.append(station)
            #end for

            pager_tag = resulttable.find("ul", {"class": "pager"})
            if (pager_tag):
                link = pager_tag.findAll("a")[-1]
                if (not link.contents[0].isdigit()):
                    # must be an arrow
                    next_page_url = link["href"]
                #end if
            #end if

        #end if

        if (not stations):
            self.__current_folder.message = "station list not available"
            logging.error("icecast station listing download failed\n%s",
                          logging.stacktrace())

        return (stations, next_page_url)

コード例 #7

ファイルを表示

ファイル: base.py プロジェクト: jpedraza/wordprocessor

def get_excerpt(content):
    soup = BeautifulSoup(content)
    return soup.getText()[:100]