Python BeautifulSoup.decode Examples

Programming Language: Python

Namespace/Package Name: BeautifulSoup

Class/Type: BeautifulSoup

Method/Function: decode

Examples at hotexamples.com: 4

Python BeautifulSoup.decode - 4 examples found. These are the top rated real world Python examples of BeautifulSoup.BeautifulSoup.decode extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BeautifulSoup(30)

decompose(30)

first(30)

find_all(30)

findAll(30)

find(30)

fetch(30)

feed(30)

getText(29)

insert(20)

findChildren(19)

body(12)

close(11)

__str__(11)

encode(8)

new_tag(6)

findChild(5)

append(4)

prettify(4)

findSelect(4)

decode(4)

get(4)

__unicode__(3)

goahead(3)

lower(3)

div(3)

findall(3)

pretify(3)

__init__(3)

firstText(2)

pop(2)

data(2)

findNext(2)

read(2)

index(1)

html(1)

query(1)

json(1)

load(1)

re_left(1)

noscript(1)

orig_url(1)

partition(1)

popTag(1)

pretiffy(1)

head(1)

findNextSiblings(1)

group(1)

encodeContents(1)

attrs(1)

Example #1

Show file

    def parse_page(self, title, bookid, path, response):
        template = Template(PAGE_TEMPLATE)
        with codecs.open("./output/OEBPS/" + path, "wb", "utf-8") as f:
            pretty = BeautifulSoup(response.body).prettify()
            f.write(template.render(body=pretty.decode('utf8')))

        for img in response.xpath("//img/@src").extract():
            if img:
                yield scrapy.Request(self.host + '/library/view/' + title +
                                     '/' + bookid + '/' + img,
                                     callback=partial(self.parse_content_img,
                                                      img))

Example #2

Show file

def get_links_list(url):
    response = requests.get(url, headers=headers)
    # response.status
    htm = response.text
    htm = htm.encode('iso-8859-1', 'ignore')
    htm = BeautifulSoup(htm)
    plot = htm.findAll('h1', attrs={'class': re.compile('sporttitle')})
    plot = parser.unescape(
        str(plot).replace('\t', '').replace('\n', '').replace('[', '').replace(
            ']', '').decode('utf-8').strip())
    plot = re.sub(r'<[^>]*>', r'', plot)
    htm = str(htm)

    if re.search(get_localized_string(T_ED_RESUL), htm):
        htm = re.split(get_localized_string(T_ED_RESUL), htm, 1)[1]
        htm = htm.split('<div id="comblockabs">', 1)[0]
        htm = re.sub(r'\t', r'', ''.join(htm))
        htm = re.sub(r'\n', r'', htm)
        htm = parser.unescape(htm.decode('utf-8').strip())

        query = """.+?<b>(.+?)</b>.+?"""
        elinks = re.compile(query, re.DOTALL).findall(htm)
        elinks[0] = '[COLOR lightskyblue]' + get_localized_string(
            T_ED_RESUL) + elinks[0] + '[/COLOR]'
        for el in elinks:
            image = media + '/33054.png'

            list_item = xbmcgui.ListItem(label=el)
            list_item.setArt({
                'fanart': addonID.getAddonInfo('fanart'),
                'icon': image,
                'thumb': image,
                'poster': image
            })
            list_item.setInfo('video', {'plot': plot})
            list_item.setProperty('IsPlayable', 'false')

            k_url = ''

            listing.append((k_url, list_item, False))

    elif re.search('AceStream Links', htm):
        htm = htm.split('<span class="lnkt">AceStream Links</span>', 1)[-1]
        htm = htm.split('<div id="comblockabs">', 1)[0]
        htm = re.sub(r'\t', r'', ''.join(htm))
        htm = re.sub(r'\n', r'', htm)
        htm = re.sub(r'<td width="16">', r'\n', htm)
        htm = parser.unescape(htm.decode('utf-8').strip())

        query = """<img title=".+?/linkflag/(.+?).png" />.+?class="bitrate".+?">(.+?)/td>.+?<a href="acestream:(.+?)">.+?"""
        elinks = re.compile(query, re.DOTALL).findall(htm)

        for el in elinks:
            image = media + "/flags/" + el[0] + ".gif"

            list_item = xbmcgui.ListItem(
                label='[B]Audio: ' + check_audio_lang(el[0]) +
                ', Bitrate: AceStream ' + el[1].replace('<', '') + '[/B]')
            list_item.setArt({
                'fanart': addonID.getAddonInfo('fanart'),
                'icon': image,
                'thumb': image,
                'poster': image
            })
            list_item.setInfo('video', {'plot': plot})
            list_item.setProperty('IsPlayable', 'true')

            k_url = 'plugin://program.plexus/?mode=1&url=acestream:' + el[
                2] + '&name=[B]Audio: ' + check_audio_lang(
                    el[0]) + ', Bitrate: AceStream ' + el[1].replace(
                        '<', '') + '[/B]'

            listing.append((k_url, list_item, False))

    else:
        image = media + '/33056.png'
        list_item = xbmcgui.ListItem(
            label='[I][B]' + get_localized_string(T_NO_LSTRM) + '[/B][/I]')
        list_item.setArt({
            'fanart': addonID.getAddonInfo('fanart'),
            'icon': image,
            'thumb': image,
            'poster': image
        })
        list_item.setInfo('video', {'plot': plot})
        list_item.setProperty('IsPlayable', 'false')

        k_url = ''

        listing.append((k_url, list_item, False))

    if len(listing) < 1:
        image = media + '/33057.png'
        list_item = xbmcgui.ListItem(label='¡¡¡GRFTJX!!! ¡¡¡GRMBLFJ!!!')
        list_item.setArt({
            'fanart': addonID.getAddonInfo('fanart'),
            'icon': image,
            'thumb': image,
            'poster': image
        })
        list_item.setInfo('video', {'plot': plot})
        list_item.setProperty('IsPlayable', 'false')

        k_url = ''

        listing.append((k_url, list_item, False))

    return listing

Example #3

Show file

def get_event_list(url):
    li = ''
    response = requests.get(url, headers=headers)
    # response.status
    htm = response.text
    htm = htm.encode('iso-8859-1', 'ignore')
    htm = BeautifulSoup(htm)
    image = url.split('/')
    image = media + "/sports/" + image[5] + ".sport.png"
    plot = htm.findAll('span', attrs={'class': re.compile('sltitle')})
    plot = parser.unescape(
        str(plot).replace('\t', '').replace('\n', '').replace('[', '').replace(
            ']', '').decode('utf-8').strip())
    plot = re.sub(r'<[^>]*>', r'', plot)
    htm = str(htm)

    tday = strftime("%-d de %B, %A ", localtime())
    htm = htm.split('<span class="sltitle">', 1)[-1]
    htm = htm.split('<a href="/es/archive/">', 1)[0]
    htm = re.sub(r'\t', r'', ''.join(htm))
    htm = re.sub(r'\n', r'', htm)
    htm = re.sub(r'/icons/', r'\n/icons/', htm)
    htm = re.sub(r'</span>', r'</span>\n', htm)
    htm = [line for line in htm.split('\n') if '/icons/' in line]
    htm = '\n'.join(htm)
    htm = parser.unescape(htm.decode('utf-8').strip())

    query = """/icons/(.+?)".+?<a class=.+?href="(.+?)">(.+?)</a>.+?"evdesc">(.+?)<.+?>(.+?)</span>"""
    events = re.compile(query, re.DOTALL).findall(htm)
    events = list(dict.fromkeys(events))

    #	print (htm2)
    #	print (events)

    for e in events:
        #image = "http://cdn.livetvcdn.net/img/icons/" + e[0]
        hrefs = urlbase + e[1]
        event = e[2]
        time = change_date_format(e[3])
        mins = 1440 - hms_to_m(str(datetime.datetime.now().time())[:-7])
        tnow = datetime.datetime.now() + datetime.timedelta(minutes=30)
        tday = datetime.datetime.now() + datetime.timedelta(minutes=mins)
        desc = e[4]
        #		desc_image = unicodedata.normalize('NFD', desc[1:-1]).encode('ascii', 'ignore')
        #
        #		response_image = google_images_download.googleimagesdownload()
        #		arguments_image = {
        #			"keywords": desc_image[1:-1],
        #			"suffix_keywords": "logo",
        #			"limit": 1,
        #			"format": "jpg",
        #			"output_directory": "storage",
        #			#"image_directory": "pictures",
        #			"no_directory": True,
        #			"no_download": True
        #		}
        #		absolute_image_paths = response_image.download(arguments_image)
        #		image = absolute_image_paths[desc_image[1:-1]+' logo'][0]

        if time < tnow:
            time = time.strftime("%d/%m/%y %H:%M")
            url = build_url({'mode': 'folder', 'foldername': hrefs})
            li = xbmcgui.ListItem('[COLOR lightskyblue](' + time +
                                  ')[/COLOR] [B]' + event +
                                  '[/B] [COLOR lightseagreen]' + desc +
                                  '[/COLOR]')
            li.setArt({
                'fanart': addonID.getAddonInfo('fanart'),
                'icon': image,
                'thumb': image,
                'poster': image
            })
            li.setInfo('video', {'plot': plot})
            xbmcplugin.addSortMethod(handle=addon_handle,
                                     sortMethod=xbmcplugin.SORT_METHOD_LABEL)
            xbmcplugin.addDirectoryItem(handle=addon_handle,
                                        url=url,
                                        listitem=li,
                                        isFolder=True)

        elif time < tday:
            time = time.strftime("%d/%m/%y %H:%M")
            url = ''
            li = xbmcgui.ListItem('[I][COLOR lightskyblue](' + time +
                                  ')[/COLOR] [B]' + event +
                                  '[/B] [COLOR lightseagreen]' + desc +
                                  '[/COLOR][/I]')
            li.setArt({
                'fanart': addonID.getAddonInfo('fanart'),
                'icon': image,
                'thumb': image,
                'poster': image
            })
            li.setInfo('video', {'plot': plot})
            xbmcplugin.addSortMethod(handle=addon_handle,
                                     sortMethod=xbmcplugin.SORT_METHOD_LABEL)
            xbmcplugin.addDirectoryItem(handle=addon_handle,
                                        url=url,
                                        listitem=li,
                                        isFolder=False)

    if li == '':
        image = media + '/33056.png'
        url = ''
        li = xbmcgui.ListItem('[I][B]' + get_localized_string(T_NO_LSTRM) +
                              '[/B][/I]')
        li.setArt({
            'fanart': addonID.getAddonInfo('fanart'),
            'icon': image,
            'thumb': image,
            'poster': image
        })
        li.setInfo('video', {'plot': plot})
        xbmcplugin.addDirectoryItem(handle=addon_handle,
                                    url=url,
                                    listitem=li,
                                    isFolder=False)

    xbmcplugin.endOfDirectory(handle=addon_handle, succeeded=True)

Example #4

Show file

    li = ''
    response = requests.get(url, headers=headers)
    # response.status
    htm = response.text
    htm = htm.encode('iso-8859-1', 'ignore')
    htm = BeautifulSoup(htm)
    htm = str(htm)

    htm = htm.split('<div id="aul">', 1)[-1]
    htm = htm.split('<a href="/es/majorcompetitions/">', 1)[0]
    htm = [line for line in htm.split('\n') if '<a class="main" ' in line]
    htm = re.sub(r'\t', r'', ''.join(htm))
    htm = re.sub(r'</td>', r'</td>\n', htm)
    htm = re.sub(r'</a><td background=', r'</a>\n<td background=', htm)
    htm = re.sub(r'(?m)^<td background=.*\n?', r'', htm)
    htm = parser.unescape(htm.decode('utf-8').strip())

    query = """<a class=.+?href="(.+?)".+?<b>(.+?)</b>.+?"""
    sports = re.compile(query, re.DOTALL).findall(htm)

    #print (htm)
    #print (sports)

    for s in sports:
        hrefs = urlbase + s[0]
        image = s[0].split('/')
        image = media + "/sports/" + image[3] + ".cubic.gif"
        sport = s[1].encode('utf-8')

        url = build_url({'mode': 'folder', 'foldername': hrefs})
        li = xbmcgui.ListItem(sport)