Python BeautifulSoup.startswithの例

プログラミング言語: Python

名前空間/パッケージ名: sigil_bs4

クラス/型: BeautifulSoup

メソッド/関数: startswith

hotexamples.comのコード掲載数: 1

Python BeautifulSoup.startswith - 1件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsigil_bs4.BeautifulSoup.startswithの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

BeautifulSoup(14)

decodexml(9)

find_all(8)

find(1)

prettify(1)

prettyprint_xhtml(1)

startswith(1)

コード例 #1

ファイルを表示

ファイル: plugin.py プロジェクト: ovictorsoares/BooknandoPageList

def run(bk):
    # get epub version number
    if bk.launcher_version() >= 20160102:
        epubversion = bk.epub_version()
    else:
        epubversion = BeautifulSoup(bk.get_opf(), 'lxml').find('package')['version']

    # get preferences
    prefs = bk.getPrefs()
    if prefs == {}:
        prefs['tag'] = 'span'
        prefs['attribute'] = 'epub:type'
        prefs['value'] = 'pagebreak'
        bk.savePrefs(prefs)
        prefs = bk.getPrefs()
    tag = prefs['tag']
    attribute = prefs['attribute']
    value = prefs['value']

    # get nav doc and toc.ncx ids
    nav_id = ncx_id = None
    ncx_id = bk.gettocid()

    if epubversion.startswith('3'):
        opf_soup = BeautifulSoup(bk.get_opf(), 'lxml')
        if opf_soup.find('item', {'properties' : 'nav'}) is not None:
            nav_id = opf_soup.find('item', {'properties' : 'nav'})['id']
        else:
            print('Nav document ID not found!')
                
    ncx_pagelist = '\n  <pageList>\n    <navLabel>\n      <text>Pages</text>\n    </navLabel>'
    nav_pagelist = '    <nav epub:type="page-list" id="page-list">\n      <ol>\n'
    page_targets = 0
       

    substitutePageNum(bk)


    # get all html files
    page_dic = {}
    errors = 0
    for (html_id, href) in bk.text_iter():
        html = bk.readfile(html_id)
    
        
        # load html code into BeautifulSoup
        soup = BeautifulSoup(html, 'html.parser')
        
        # find pagebreaks
        page_numbers = soup.find_all(tag, {attribute : value})
        if not page_numbers:
            print('\nNo page number targets found in ' + os.path.basename(href))
        else:
            page_targets += len(page_numbers)
            print('\n' + str(len(page_numbers)) + ' page number targets found in ' + os.path.basename(href))
        
        # add pagelist entries to pagelist
        for page_number in page_numbers:
            
            # title has priority over string
            if page_number.has_attr('title'):
                title = page_number['title']
            else:
                title = page_number.contents[0]

            # generate id, if necessary
            if not page_number.has_attr('id'):
                id = 'page' + title
            id = page_number['id']

            # check for duplicate titles/ids
            if title not in page_dic:
                page_dic[title] = os.path.basename(href + '#' + id)
            else:
                errors += 1
                page_dic[title] += ' / ' + os.path.basename(href + '#' + id)
                print('ERROR: duplicate page number found:', title, page_dic[title])
            
            # epub2
            ncx_pagelist += '''\n    <pageTarget id="{}" type="normal" value="{}">
      <navLabel>
        <text>{}</text>
      </navLabel>
      <content src="{}"/>
    </pageTarget>'''.format(id, title, title, href + '#' + id)
            
            # epub3
            if nav_id:
                nav_pagelist += '        <li>\n          <a href="{}">{}</a>\n        </li>\n'.format('../' + href + '#' + id, title)
    
    if errors != 0:
        print('Plugin aborted because of {} duplicate page number(s).'.format(str(errors)))
        return -1
    
    # add/replace NCX pagelist section
    if page_targets:
        ncx_pagelist += '\n  </pageList>'
        if ncx_id: 
            # get ncx contents
            ncx = bk.readfile(ncx_id)
            # delete existing pagelist
            ncx = re.sub('\s*\<pageList[^>]*\>.+?\<\/pageList\>\s*', '', ncx, flags = re.DOTALL)
            # add new pagelist
            ncx = ncx.replace('</ncx>', ncx_pagelist + '\n</ncx>')
            # update ncx file
            bk.writefile(ncx_id, ncx)
            print('\n' + str(page_targets) + ' page number targets found.\nNCX file updated. ')
        else:
            print('\nNCX file couldn\'t be found and updated.')
    else:
        print('\nNo page number targets found.\nNCX file not updated')

    # add/replace NAV pagelist section
    if nav_id:
        nav_pagelist += '      </ol>\n    </nav>'
        new_pagelist = BeautifulSoup(nav_pagelist, 'html.parser')
        # get nav contents
        nav = bk.readfile(nav_id)
        nav_soup = BeautifulSoup(nav, 'html.parser')
        orig_nav_soup = str(nav_soup)
        old_page_list = nav_soup.find('nav', {'epub:type' : 'page-list'})
        if old_page_list is not None:
            old_page_list.replace_with(new_pagelist)
            #print('Existing page-list updated.')
        else:
            nav_soup.body.insert(2, new_pagelist)
            #print('New page-list section added.')
        # update nav
        if str(nav_soup) != orig_nav_soup:
            try:
                bk.writefile(nav_id, str(nav_soup.prettyprint_xhtml(indent_level=0, eventual_encoding="utf-8", formatter="minimal", indent_chars="  ")))
            except:
                bk.writefile(nav_id, str(nav_soup))
            print('NAV file updated.')
        else:
            print('NAV NOT file updated.')
            
    print('\nPlease click OK to close the Plugin Runner window.')

    return 0