Python Tag.appendの例、BeautifulSoup.Tag.append Pythonの例

コード例 #1

0

ファイルを表示

def make_links_readable(html):
    """
    Goes through links making them readable
    If they are too long, they are turned into goo.gl links
    timing stats:
    before multiprocess = 0m18.063s
    """
    soup = BeautifulSoup(html)
    for link in soup.findAll('a'):  #links:
        oldlink = link
        if link and len(link.get('href', '')) > 90 and options.use_short_links:
            #make into goo.gl link
            short_link = shorten_link(soup, link)
            if short_link != None:
                link = short_link

        if validate_link(link) and link.get('href', None):
            if not link.text:
                oldlink.replaceWith(
                    link.get('href', "No href link to replace with"))
            else:
                div = Tag(soup, 'div')
                div.setString(link.text)
                br = Tag(soup, 'br')
                new_link = Tag(soup, 'a')
                new_link.setString("(%s)" % (link.get('href')))
                div.append(br)
                div.append(new_link)
                oldlink.replaceWith(div)
            print

    return soup

コード例 #2

0

ファイルを表示

ファイル: databaseHandler.py プロジェクト: DavidFalk/xbmc.plugin.program.download-next-ep

def addEpisode(xbmcid, scraperid, snr,enr, title, airdate):
	f = getDatabase("r")
	soup = BeautifulSoup(f.read())
	f.close()
	serie = soup.find(scraperid = scraperid)
	#TODO check inconsistency
	if serie == None :
		return False
	season = serie.find(seasonnr = snr)
	if season == None:
		tag = Tag(soup, "season")
		tag.attrs.append(('seasonnr', snr))
		serie.append(tag)
		season = serie.find(seasonnr = snr)
	if season == None:
		util.msg(localize(50000), localize(50004))
		return False
	episode = season.find(episodenr = enr)
	if episode == None:
		episodetag = Tag(soup, "episode")
		episodetag.attrs.append(('episodenr', enr))
		titletag = Tag(soup, "title")
		titletag.insert(0,title)
		episodetag.append(titletag)
		airdatetag = Tag(soup, "airdate")
		airdatetag.insert(0,airdate)
		episodetag.append(airdatetag)
		season.append(episodetag)
		
		f = getDatabase("w")
		f.write(soup.prettify())
		f.close()
	#else:
		#check consistency
	return True

コード例 #3

0

ファイルを表示

ファイル: HTML_Parser.py プロジェクト: iamutkarshtiwari/infoslicer

    def unTag(self, tag):
        """
            recursively removes unwanted tags according to defined lists
            @param tag: tag hierarchy to work on
        """
        for child in tag.findChildren(True, recursive=False):
            self.unTag(child)
        if (self.remove_classes_regexp != "") and (tag.has_key("class") and (re.match(self.remove_classes_regexp, tag["class"]) != None)):
            tag.extract()
        elif tag.name in self.keep_tags:
            new_tag = Tag(self.input, tag.name)
            new_tag.contents = tag.contents
            tag.replaceWith(new_tag)

        elif tag.name in self.remove_tags_keep_content:            
            children = tag.findChildren(True, recursive=False)
            if len(children)==1:
                tag.replaceWith(children[0])
            elif len(children) > 1:
                new_tag = Tag(self.input, "p")
                for child in tag.findChildren(True, recursive=False):
                    new_tag.append(child)
                tag.replaceWith(new_tag)
            else:
                tag.replaceWith(tag.renderContents())
        else:
            tag.extract()

コード例 #4

0

ファイルを表示

def generate_table_of_contents(soup, prefix):
    header_ids = Counter()
    headers = soup.findAll(header_re)
    if not headers:
        return
    tocdiv = Tag(soup, "div", [("class", "toc")])
    parent = Tag(soup, "ul")
    parent.level = 0
    tocdiv.append(parent)
    level = 0
    previous = 0
    for header in headers:
        contents = u''.join(header.findAll(text=True))

        # In the event of an empty header, skip
        if not contents:
            continue

        # Convert html entities to avoid ugly header ids
        aid = unicode(
            BeautifulSoup(contents,
                          convertEntities=BeautifulSoup.XML_ENTITIES))
        # Prefix with PREFIX_ to avoid ID conflict with the rest of the page
        aid = u'%s_%s' % (prefix, aid.replace(" ", "_").lower())
        # Convert down to ascii replacing special characters with hex
        aid = str(title_re.sub(lambda c: '.%X' % ord(c.group()), aid))

        # Check to see if a tag with the same ID exists
        id_num = header_ids[aid] + 1
        header_ids[aid] += 1
        # Only start numbering ids with the second instance of an id
        if id_num > 1:
            aid = '%s%d' % (aid, id_num)

        header['id'] = aid

        li = Tag(soup, "li", [("class", aid)])
        a = Tag(soup, "a", [("href", "#%s" % aid)])
        a.string = contents
        li.append(a)

        thislevel = int(header.name[-1])

        if previous and thislevel > previous:
            newul = Tag(soup, "ul")
            newul.level = thislevel
            newli = Tag(soup, "li", [("class", "toc_child")])
            newli.append(newul)
            parent.append(newli)
            parent = newul
            level += 1
        elif level and thislevel < previous:
            while level and parent.level > thislevel:
                parent = parent.findParent("ul")
                level -= 1

        previous = thislevel
        parent.append(li)

    return tocdiv

コード例 #5

0

ファイルを表示

ファイル: muttify.py プロジェクト: codygman/muttify

def make_links_readable(html):
    """
    Goes through links making them readable
    If they are too long, they are turned into goo.gl links
    timing stats:
    before multiprocess = 0m18.063s
    """
    soup = BeautifulSoup(html)
    for link in soup.findAll('a'):#links:
        oldlink = link
        if link and len(link.get('href', '')) > 90 and options.use_short_links:
            #make into goo.gl link
            short_link = shorten_link(soup, link)
            if short_link != None:
                link = short_link

        if validate_link(link) and link.get('href', None):
            if not link.text:
                oldlink.replaceWith(link.get('href', "No href link to replace with"))
            else:
                div = Tag(soup, 'div')
                div.setString(link.text)
                br = Tag(soup, 'br')
                new_link = Tag(soup, 'a')
                new_link.setString("(%s)" % (link.get('href')) )
                div.append(br)
                div.append(new_link)
                oldlink.replaceWith(div)
            print

    return soup

コード例 #6

0

ファイルを表示

ファイル: HTML_Parser.py プロジェクト: sugar-activities/4042-activity

    def unTag(self, tag):
        """
            recursively removes unwanted tags according to defined lists
            @param tag: tag hierarchy to work on
        """
        for child in tag.findChildren(True, recursive=False):
            self.unTag(child)
        if (self.remove_classes_regexp != "") and (
                tag.has_key("class") and
            (re.match(self.remove_classes_regexp, tag["class"]) != None)):
            tag.extract()
        elif tag.name in self.keep_tags:
            new_tag = Tag(self.input, tag.name)
            new_tag.contents = tag.contents
            tag.replaceWith(new_tag)

        elif tag.name in self.remove_tags_keep_content:
            children = tag.findChildren(True, recursive=False)
            if len(children) == 1:
                tag.replaceWith(children[0])
            elif len(children) > 1:
                new_tag = Tag(self.input, "p")
                for child in tag.findChildren(True, recursive=False):
                    new_tag.append(child)
                tag.replaceWith(new_tag)
            else:
                tag.replaceWith(tag.renderContents())
        else:
            tag.extract()

コード例 #7

0

ファイルを表示

ファイル: sfusd_demo.py プロジェクト: schasins/school-program-scraping

   def savePDF(self, pdf_filename, parent_soup, target_node, yes_phrase, url, key, school_name):
       if target_node:
          grandparent_node = target_node.parent.parent
          tag = self.highlightedNode(target_node, yes_phrase, parent_soup)
          self.replaceNode(target_node, tag)
          body = Tag(parent_soup,"body")
          body.append(grandparent_node)
       else:
          body = parent_soup
       try:
          weasyprint = HTML(string=body.prettify())
          tmp_filename = 'pdfs/tmp.pdf'
          weasyprint.write_pdf(tmp_filename,stylesheets=[CSS(string='body { font-size: 10px; font-family: serif !important }')])
       except:
          print "weasyprint failed on url: "+url
          if target_node:
             self.replaceNode(tag, target_node) #return to old state
          return

       if target_node:
          self.replaceNode(tag, target_node) #return to old state

       sep_filename = "pdfs/sep.pdf"
       self.makeSepPage(sep_filename, url, key, school_name)

       merger = PdfFileMerger()
       if (os.path.exists(pdf_filename)):
           merger.append(PdfFileReader(file(pdf_filename, 'rb')))
       merger.append(PdfFileReader(file(sep_filename, 'rb')))
       merger.append(PdfFileReader(file(tmp_filename, 'rb')))
       merger.write(pdf_filename)

コード例 #8

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def get_last_3(soup, table):
    loop = 0
    first = 1
    enclose = Tag(soup, "div")
    ul = Tag(soup, "ul")
    for tr in table.findAll("tr"):
        td = tr.findAll("td")
        li = Tag(soup, "li")
        for el in td[3:]:
            if loop != 3:
                try:
                    text = ''.join(el.findAll(text=True))
                    text = text.strip()
                    if text != '' and text != '&nbsp;':
                        el.name = "span"
                        if loop != 2: el.append(' - ')
                        li.append(el)
                except:
                    pass
            else:
                break    
            loop += 1
        loop = 0
        if ''.join(li.findAll(text=True)) != '':
            enclose.append(li)
    return enclose

コード例 #9

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def get_first_three(soup, table):
    loop = 0
    first = 1
    enclose = Tag(soup, "div")
    for tr in table.findAll("tr"):
        li = Tag(soup, "li")
        for td in tr.findAll("td"):
            if loop != 3:
                try:
                    text = ''.join(td.findAll(text=True))
                    text = text.strip()
                    if text != '' and text != '&nbsp;':
                        td.name = "span"
                        if first == 1:
                            first = 0
                            enclose.append(td)
                        else:
                            if loop != 2: td.append(' - ')
                            li.append(td)
                except:
                    pass
            else:
                break
            loop += 1
        loop = 0
        if ''.join(li.findAll(text=True)) != '':
            enclose.append(li)
    title = enclose.find("span")
    enclose.find("span").replaceWith("")
    enclose.name = "ul"
    div = Tag(soup, "div")
    div.append(title)
    div.append(enclose)
    return div

コード例 #10

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def get_last_3(soup, table):
    loop = 0
    first = 1
    enclose = Tag(soup, "div")
    ul = Tag(soup, "ul")
    for tr in table.findAll("tr"):
        td = tr.findAll("td")
        li = Tag(soup, "li")
        for el in td[3:]:
            if loop != 3:
                try:
                    text = ''.join(el.findAll(text=True))
                    text = text.strip()
                    if text != '' and text != '&nbsp;':
                        el.name = "span"
                        if loop != 2: el.append(' - ')
                        li.append(el)
                except:
                    pass
            else:
                break
            loop += 1
        loop = 0
        if ''.join(li.findAll(text=True)) != '':
            enclose.append(li)
    return enclose

コード例 #11

0

ファイルを表示

ファイル: pgessay.py プロジェクト: ChrisCinelli/pgessays

def rewriteLinksSection(dom, soup, links_table):
    links = []
    for fnt in links_table.findAll('font', {'size': '2', 'face':'verdana'}):
        if str(fnt).startswith('<font size="2" face="verdana"><a href="'):
            link = fnt.find('a')
            
            caption = link.getText('').strip()
            if caption.endswith(' Translation') and OMIT_TRANSLATIONS:
                continue
            
            links.append((link['href'], caption))
    
    links_table.decompose()
    
    if not INCLUDE_LINKS or len(links) == 0:
        return
    
    b = Tag(soup, 'b')
    b.string = 'Links'
    dom.append(b)
    
    ul = Tag(soup, 'ul')
    for url, caption in links:
        li = Tag(soup, 'li')
        a = Tag(soup, 'a', {'href': url})
        a.string = caption
        li.append(a)
        ul.append(li)
    
    dom.append(ul)

コード例 #12

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def get_first_three(soup, table):
    loop = 0
    first = 1
    enclose = Tag(soup, "div")
    for tr in table.findAll("tr"):
        li = Tag(soup, "li")
        for td in tr.findAll("td"):
            if loop != 3:
                try:
                    text = ''.join(td.findAll(text=True))
                    text = text.strip()
                    if text != '' and text != '&nbsp;':
                        td.name = "span"
                        if first == 1:
                            first = 0
                            enclose.append(td)
                        else:
                            if loop != 2: td.append(' - ')
                            li.append(td)
                except:
                    pass
            else:
                break    
            loop += 1
        loop = 0
        if ''.join(li.findAll(text=True)) != '':
            enclose.append(li)
    title = enclose.find("span")
    enclose.find("span").replaceWith("")
    enclose.name = "ul"
    div = Tag(soup, "div")
    div.append(title)
    div.append(enclose)
    return div

コード例 #13

0

ファイルを表示

ファイル: wikipedia.py プロジェクト: kascote/journalisted

 def fix_heading(heading, tags):
     '''
     Remove paragraphs with no strings.
     Remove non-special headings that don't start with a paragraph.
     Remove lists from non-special headings.
     '''
     SPECIAL = ['Books', 'Works', 'Bibliography', 'External links',
                'Further reading']
     tags = [tag for tag in tags if tag is not None and
                 tag.name!='p' or tag.renderContents(None).strip()]
     special = False
     heading_text = tagtext(heading)
     for word in SPECIAL:
         if word.lower() in heading_text.lower():
             special = True
     if heading_text == 'External links and references':
         set_heading_text(heading, 'External links')
     # Shorten lists (even special ones).
     # The motivation is that some pages like to list reams of crap,
     # usually in bibliographies, but in other things too.
     found_lis = 0
     MAX_ITEMS = 10  # per headed section
     for tag in list(tags):
         if tag.name in ('ul', 'ol'):
             for li in tag.findAll('li', recursive=False):
                 found_lis += 1
                 if found_lis > MAX_ITEMS:
                     li.extract()
     # Remove any now-empty uls and ols.
     # Harder than it sounds, due to nested lists.
     temp = Tag(soup, 'p')
     for tag in tags:
         temp.append(tag)
     for tag in temp.findAll(('ul', 'ol')):
         if not tag.findAll(('ul', 'ol', 'li')):
             tag.extract()
     tags = temp.contents
     if found_lis > MAX_ITEMS:
         # Add " (some omitted)" to heading
         if heading_text:
             heading_text = heading_text.replace(' (incomplete)', '')
             if context['srcurl'].startswith('http:'):
                 heading_text += ' (some <a href="%s">omitted</a>)' % context['srcurl']
             else:
                 heading_text += ' (some omitted)'  # no "relative" links
             set_heading_text(heading, heading_text)
     if not special:
         if heading is not None:
             # Remove non-special headings which don't start with a paragraph.
             if not tags or tags[0].name != 'p':
                 return drop_heading(heading)
             # Remove non-special headings containing lists.
             for tag in tags:
                 if tag.name in ('ul', 'ol'):
                     return drop_heading(heading)
         else:
             # Remove lists from None (before first heading, if any).
             tags = [tag for tag in tags if tag.name not in ('ul', 'ol')]
     return (heading, tags)

コード例 #14

0

ファイルを表示

ファイル: policies.py プロジェクト: rprz/reddit

 def _number_sections(self, soup):
     count = 1
     for para in soup.find("div", "md").findAll(["p"], recursive=False):
         a = Tag(soup, "a", [("class", "p-anchor"), ("id", "p_%d" % count), ("href", "#p_%d" % count)])
         a.append(str(count))
         para.insert(0, a)
         para.insert(1, " ")
         count += 1

コード例 #15

0

ファイルを表示

ファイル: filters.py プロジェクト: pra85/reddit

def generate_table_of_contents(soup, prefix):
    header_ids = Counter()
    headers = soup.findAll(header_re)
    if not headers:
        return
    tocdiv = Tag(soup, "div", [("class", "toc")])
    parent = Tag(soup, "ul")
    parent.level = 0
    tocdiv.append(parent)
    level = 0
    previous = 0
    for header in headers:
        contents = u''.join(header.findAll(text=True))
        
        # In the event of an empty header, skip
        if not contents:
            continue
        
        # Convert html entities to avoid ugly header ids
        aid = unicode(BeautifulSoup(contents, convertEntities=BeautifulSoup.XML_ENTITIES))
        # Prefix with PREFIX_ to avoid ID conflict with the rest of the page
        aid = u'%s_%s' % (prefix, aid.replace(" ", "_").lower())
        # Convert down to ascii replacing special characters with hex
        aid = str(title_re.sub(lambda c: '.%X' % ord(c.group()), aid))
        
        # Check to see if a tag with the same ID exists
        id_num = header_ids[aid] + 1
        header_ids[aid] += 1
        # Only start numbering ids with the second instance of an id
        if id_num > 1:
            aid = '%s%d' % (aid, id_num)
        
        header['id'] = aid
        
        li = Tag(soup, "li", [("class", aid)])
        a = Tag(soup, "a", [("href", "#%s" % aid)])
        a.string = contents
        li.append(a)
        
        thislevel = int(header.name[-1])
        
        if previous and thislevel > previous:
            newul = Tag(soup, "ul")
            newul.level = thislevel
            newli = Tag(soup, "li", [("class", "toc_child")])
            newli.append(newul)
            parent.append(newli)
            parent = newul
            level += 1
        elif level and thislevel < previous:
            while level and parent.level > thislevel:
                parent = parent.findParent("ul")
                level -= 1
        
        previous = thislevel
        parent.append(li)
    
    return tocdiv

コード例 #16

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_3(soup, table):
    if table.get('id') == "linearize-cols-3":
        div = Tag(soup, "ul")
        div["class"] = "div-container"
        ul_last = get_last_3(soup, table)
        ul_first = get_first_3(soup, table)
        div.append(ul_first)
        div.append(ul_last)
        table.replaceWith(div)

コード例 #17

0

ファイルを表示

ファイル: templater.py プロジェクト: altstone/doorscenter

 def CreateBody(self):
     '''Создаем body'''
     body = Tag(self.soup, 'body')
     totalTagsCount = random.randint(150, 400)
     
     '''Создаем структуру шаблона из тегов div'''
     for _ in range(random.randint(1, 3)):
         body.append(self.CreateDiv())
     divsTotalCount = totalTagsCount * random.randint(15, 25) / 100
     while divsTotalCount > 0:
         divsLowLevelList = [item for item in body.findAll('div') if len(item.findAll(True)) == 0]
         divToExtend = random.choice(divsLowLevelList)
         for _ in range(random.randint(2, 4)):
             divToExtend.append(self.CreateDiv())
             divsTotalCount -= 1
     
     '''Получаем список тегов div разных уровней'''
     divsList = body.findAll('div')
     divsTopLevelList = [item for item in body.findAll('div', recursive=False)]
     divsLowLevelList = [item for item in divsList if len(item.findAll(True)) == 0]
     divsMidLevelList = [item for item in divsList if item not in divsTopLevelList and item not in divsLowLevelList]
     
     '''Проставляем им атрибуты'''
     for item in divsTopLevelList:
         self.AppendIds(item, 95, 1)
     for item in divsMidLevelList:
         self.AppendIds(item, 20, 75)
     for item in divsLowLevelList:
         self.AppendIds(item, 30, 65)
         
     '''Создаем наполнение главных блоков'''
     divHeader = divsLowLevelList.pop(random.randint(0, 2))
     divHeader.string = '[header]'
     divMain = divsLowLevelList.pop(random.randint(1, 3))
     divMain.string = '[main]'
     divLinks = divsLowLevelList.pop(random.randint(-3, -1))
     divLinks.string = '[links]'
     divFooter = divsLowLevelList.pop(random.randint(-3, -1))
     divFooter.string = '[footer]'
     
     '''Создаем меню, сайдбары и формы'''
     for _ in range(random.randint(1, 2)):
         menu = divsLowLevelList.pop()
         menu.append(self.CreateList(0))
     for _ in range(random.randint(1, 2)):
         sidebar = divsLowLevelList.pop()
         self.CreateSidebar(sidebar)
     for _ in range(random.randint(0, 2)):
         form = divsLowLevelList.pop()
         form.append(self.CreateForm())
     
     '''Создаем прочее наполнение'''
     random.shuffle(divsLowLevelList)
     for _ in range(random.randint(2, 5)):
         div = divsLowLevelList.pop()
         self.CreateOthers(div)
     self.soup.html.append(body)

コード例 #18

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_2(soup, table):
    if table.get('id') == "linearize-cols-2":
        ul = Tag(soup, "ul")
        ul["class"] = "ul-container"
        ul_last = get_last_two(soup, table)
        ul_first = get_first_two(soup, table)
        ul.append(ul_first)
        ul.append(ul_last)
        table.replaceWith(ul)

コード例 #19

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_3(soup, table):
    if table.get('id') == "linearize-cols-3":
        div = Tag(soup, "ul")
        div["class"] = "div-container"
        ul_last = get_last_3(soup, table)
        ul_first = get_first_3(soup, table)        
        div.append(ul_first)
        div.append(ul_last)
        table.replaceWith(div)

コード例 #20

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_2(soup, table):
    if table.get('id') == "linearize-cols-2":
        ul = Tag(soup, "ul")
        ul["class"] = "ul-container"
        ul_last = get_last_two(soup, table)
        ul_first = get_first_two(soup, table)        
        ul.append(ul_first)
        ul.append(ul_last)
        table.replaceWith(ul)

コード例 #21

0

ファイルを表示

 def _number_sections(self, soup):
     count = 1
     for para in soup.find('div', 'md').findAll(['p'], recursive=False):
         a = Tag(soup, 'a', [
             ('class', 'p-anchor'),
             ('id', 'p_%d' % count),
             ('href', '#p_%d' % count),
         ])
         a.append(str(count))
         para.insert(0, a)
         para.insert(1, ' ')
         count += 1

コード例 #22

0

ファイルを表示

ファイル: comments2rst.py プロジェクト: PhysikOnline-FFM/sagenb

def replace_courier(soup):
    """Lacking a better option, I use courier font to mark <code>
    within tinyMCE. And I want to turn that into real code tags.

    Most users won't be needing this(?), so this code is not called anywhere
    but kept for reference
    """
    for t in soup.findAll(lambda s:s.has_key('style') and 'courier' in s['style']):
        tag = Tag(soup, 'code')
        while t.contents:
            tag.append(t.contents[0])
        t.replaceWith(tag)

コード例 #23

0

ファイルを表示

ファイル: xbmclibrary.py プロジェクト: sbeatz/xbmc

def SetupAmazonLibrary():
    common.Log('Trying to add Amazon source paths...')
    source_path = os.path.join(common.profilpath, 'sources.xml')
    source_added = False
    
    try:
        file = open(source_path)
        soup = BeautifulSoup(file)
        file.close()
    except:
        subtags = ['programs', 'video', 'music', 'pictures', 'files']
        soup = BeautifulSoup('<sources></sources>')
        root = soup.sources
        for cat in subtags:
            cat_tag = Tag(soup, cat)
            def_tag = Tag(soup, 'default')
            def_tag['pathversion'] = 1
            cat_tag.append(def_tag)
            root.append(cat_tag)

    video = soup.find("video")      
        
    if len(soup.findAll(text="Amazon Movies")) < 1:
        movie_source_tag = Tag(soup, "source")
        movie_name_tag = Tag(soup, "name")
        movie_name_tag.insert(0, "Amazon Movies")
        movie_path_tag = Tag(soup, "path")
        movie_path_tag['pathversion'] = 1
        movie_path_tag.insert(0, MOVIE_PATH)
        movie_source_tag.insert(0, movie_name_tag)
        movie_source_tag.insert(1, movie_path_tag)
        video.insert(2, movie_source_tag)
        source_added = True

    if len(soup.findAll(text="Amazon TV")) < 1: 
        tvshow_source_tag = Tag(soup, "source")
        tvshow_name_tag = Tag(soup, "name")
        tvshow_name_tag.insert(0, "Amazon TV")
        tvshow_path_tag = Tag(soup, "path")
        tvshow_path_tag['pathversion'] = 1
        tvshow_path_tag.insert(0, TV_SHOWS_PATH)
        tvshow_source_tag.insert(0, tvshow_name_tag)
        tvshow_source_tag.insert(1, tvshow_path_tag)
        video.insert(2, tvshow_source_tag)
        source_added = True
    
    if source_added:
        common.Log('Source paths added!')
        SaveFile(source_path, str(soup))
        dialog.ok(common.getString(30187), common.getString(30188), common.getString(30189), common.getString(30190))
        if dialog.yesno(common.getString(30191), common.getString(30192)):
            xbmc.executebuiltin('RestartApp')

コード例 #24

0

ファイルを表示

def replace_courier(soup):
    """Lacking a better option, I use courier font to mark <code>
    within tinyMCE. And I want to turn that into real code tags.

    Most users won't be needing this(?), so this code is not called anywhere
    but kept for reference
    """
    for t in soup.findAll(lambda s:
                          ('style' in s) and 'courier' in s['style']):
        tag = Tag(soup, 'code')
        while t.contents:
            tag.append(t.contents[0])
        t.replaceWith(tag)

コード例 #25

0

ファイルを表示

def generate_table(summary):
    soup = BeautifulSoup()
    new_tag_table = Tag(soup, "table")
    new_tag_table["border"] = 1
    new_tag_table["cellspacing"] = 0
    new_tag_table["cellpadding"] = 0
    new_tag_table["bordercolordark"] = "#000000"
    new_tag_table["cellspacing"] = "#ffffff"
    soup.append(new_tag_table)
    new_Tag_tr = Tag(soup, "tr")
    new_Tag_tr["bgcolor"] = "#0072E3"
    new_tag_table.append(new_Tag_tr)
    for i in ["TestSuite", "Passed", "Failed", "Total"]:
        new_Tag_td = Tag(soup, "td")
        new_Tag_td.string = str(i)
        new_Tag_tr.append(new_Tag_td)
    for i in summary:
        new_Tag_tr = Tag(soup, "tr")
        new_tag_table.append(new_Tag_tr)
        for j in i:
            new_Tag_td = Tag(soup, "td")
            new_Tag_td.string = str(j)
            new_Tag_tr.append(new_Tag_td)
    print str(soup.prettify())
    return str(soup.prettify())

コード例 #26

0

ファイルを表示

ファイル: requestreplacer.py プロジェクト: mwicat/pswproxy

        def footer_op(soup):
            head_el = soup.find("head")
            body_el = soup.find("body")
            footer_attachable = not (head_el is None or body_el is None)

            if footer_attachable:
                footer_wrap_el = Tag(soup, "div")
                footer_wrap_el['style'] = "position: fixed; width: 100%; height: auto; z-index: 10000; bottom: 0pt; display: block;"
                footer_el = Tag(soup, "div")
                footer_el['style'] = "background-color: rgb(15, 25, 35); color: white; height: auto"
                footer_text = NavigableString(open("footer.html").read())
                footer_el.append(footer_text)
                footer_wrap_el.append(footer_el)
                body_el.append(footer_wrap_el)

コード例 #27

0

ファイルを表示

ファイル: templater.py プロジェクト: altstone/doorscenter

 def CreateSelect(self):
     '''Создаем select и options'''
     select = Tag(self.soup, 'select')
     select['name'] = self.GenerateName()
     for _ in range(random.randint(3, 12)):
         option = Tag(self.soup, 'option')
         option['value'] = self.textShort
         option.string = self.textShortCap
         select.append(option)
     if self._Probability(80):
         select.option['selected'] = 'selected'
     self.AppendIds(select, 10, 30)
     self.ShuffleAttributes(select)
     return select

コード例 #28

0

ファイルを表示

ファイル: sitemap_creater.py プロジェクト: Attsun1031/progre_meiban

def generateContentDivTag(baseDir, h3text):
    import __main__

    contentDivTag = Tag(formatSoup, 'div', attrs={'class' : 'content band-content'})
    # 表題埋め込み
    h3tag = Tag(formatSoup, 'h3')
    h3tag.append(NavigableString(h3text))
    contentDivTag.append(h3tag)


    # HTML生成
    for file in os.listdir(PARENT_DIR + baseDir):
        if file.endswith(SHTML_EXT):
            # バンド名ulタグを生成
            progreUlTag = generateUlTag('/' + baseDir, file, 'column')
            albumLiTag = Tag(formatSoup, 'li')
            progreUlTag.append(albumLiTag)

            # 作品名ulタグを生成
            fileSoup = BeautifulSoup(open('/'.join([PARENT_DIR, '/' + baseDir, file])))
            albumList = []
            for albumClassTag in fileSoup.findAll('a', {'class' : 'album-name'}):
                albumList.append(albumClassTag['href'].split('/')[-1])
                __main__.contentCount += 1

            albumDir = '/'.join([baseDir, file.split('.')[0]])
            for album in albumList:
                albumUlTag = generateUlTag('/' + albumDir, album, 'child-column')
                albumLiTag.append(albumUlTag)
            contentDivTag.append(progreUlTag)

    return contentDivTag

コード例 #29

0

ファイルを表示

 def CreateSelect(self):
     '''Создаем select и options'''
     select = Tag(self.soup, 'select')
     select['name'] = self.GenerateName()
     for _ in range(random.randint(3, 12)):
         option = Tag(self.soup, 'option')
         option['value'] = self.textShort
         option.string = self.textShortCap
         select.append(option)
     if self._Probability(80):
         select.option['selected'] = 'selected'
     self.AppendIds(select, 10, 30)
     self.ShuffleAttributes(select)
     return select

コード例 #30

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_1(soup, table):
    if table.get('id') == "linearize-cols-1":
        ul = Tag(soup, "ul")
        ul["class"] = "linearized"
        for td in table.findAll("td"):
            for p in td.findAll("p"):
                p.name = "span"
            try:
                text = ''.join(td.findAll(text=True))
                text = text.strip()
                if text != '' and text != '&nbsp;':
                    td.name = "li"
                    ul.append(td)
            except:
                pass
        table.replaceWith(ul)

コード例 #31

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_rows_1(soup, table):
    if table.get('id') == "linearize-rows-1":
        div = Tag(soup, "div")
        div["class"] = "center"
        for tr in table.findAll("tr"):
            lista = tr.findAll("td")
            for td in lista:
                for p in td.findAll("p"):
                    p.name = "span"
                td.name = "span"
                if td == lista[-1]:
                    td = BeautifulSoup(td.prettify())
                else:
                    td = BeautifulSoup(td.prettify() + '<span> | </span>')
                div.append(td)
        table.replaceWith(div)

コード例 #32

0

ファイルを表示

ファイル: wiktionary_filter.py プロジェクト: gerald-kim/rdict

    def soup_filter_zz_fold_etymology( self, content ):
        heads = content.findAll( 'h2', {'class':'head'} ) + content.findAll( 'h3', {'class':'head'} ) + content.findAll( 'h4', {'class':'head'} )
        etymologys = []
        for h in heads:
#            print "Head, ", h
            if h.next and h.next.lower().startswith('etymology'):
#                print "found", h.content[0]
                etymologys.append( h )
#                print 'Etymology found: ', h

        etymology_index = 1
        for e in etymologys:
            div = Tag( content, 'div' )
            div['id'] = u'etymology_'+str(etymology_index)
            div['style'] = u'display:none'
            linkSoup = BeautifulSoup( u''' <a href="javascript:f('%s',this)">[show]</a>''' % (div['id']) )
            e.append( linkSoup )

            paragraphs = []
            
            n = e.nextSibling
            first = 1
            while n and (n.__class__.__name__ == 'NavigableString' or  (n.__dict__.has_key('name') and n.name == 'p') ):
                paragraphs.append( n )
                n = n.nextSibling
                
            [div.append(p) for p in paragraphs]
            
            eIndex = e.parent.contents.index( e )
            e.parent.insert( eIndex + 1, div )
 
            etymology_index = etymology_index + 1

コード例 #33

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_1(soup, table):
    if table.get('id') == "linearize-cols-1":
            ul = Tag(soup,"ul")
            ul["class"] = "linearized"
            for td in table.findAll("td"):
                for p in td.findAll("p"):
                    p.name = "span"
                try:
                    text = ''.join(td.findAll(text=True))
                    text = text.strip()
                    if text != '' and text != '&nbsp;':
                        td.name = "li"
                        ul.append(td)
                except:
                    pass
            table.replaceWith(ul)

コード例 #34

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_rows_1(soup, table):
    if table.get('id') == "linearize-rows-1":
        div = Tag(soup, "div")
        div["class"] = "center"
        for tr in table.findAll("tr"):
            lista = tr.findAll("td")
            for td in lista:
                for p in td.findAll("p"):
                    p.name = "span"
                td.name = "span"
                if td == lista[-1]:
                    td = BeautifulSoup(td.prettify())
                else:
                    td = BeautifulSoup(td.prettify() + '<span> | </span>')
                div.append(td)
        table.replaceWith(div)

コード例 #35

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_states(soup, table):
    if table.get('id') == "linearize-states":
        ul = Tag(soup, "ul")
        ul["class"] = "text-level3"
        tag = None
        for tr in table.findAll("tr"):
            tr.name = "span"
            tr["class"] = "spaced"
            for td in tr.findAll("td"):
                if td["width"] == "40%":
                    td.name = "li"
                    tag = td
                else:
                    tag.append(td)
                    td.name = "ul"
            ul.append(tr)
        table.replaceWith(ul)

コード例 #36

0

ファイルを表示

ファイル: templater.py プロジェクト: altstone/doorscenter

 def CreateList(self, probNested):
     '''Создаем список ul, вложенный с заданной вероятностью'''
     ul = Tag(self.soup, 'ul')
     self.AppendIds(ul, 50, 30)
     liClass = self.GenerateClass(0)
     for _ in range(random.randint(3, 7)):
         ul.append(self.CreateListItem(liClass))
     if self._Probability(probNested):
         liNestedList = ul.findAll('li')
         random.shuffle(liNestedList)
         liNestedList = liNestedList[:random.randint(1, 4)]
         for liNested in liNestedList:
             liNested.append(self.CreateList(0))
     for li in ul.findAll('li'):
         if len(li.findAll(True)) == 0:
             li.append(self.CreateLinkText())
     return ul

コード例 #37

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_states(soup, table):
    if table.get('id') == "linearize-states":
        ul = Tag(soup,"ul")
        ul["class"] = "text-level3"
        tag = None
        for tr in table.findAll("tr"):
            tr.name = "span"
            tr["class"] = "spaced"
            for td in tr.findAll("td"):
                if td["width"] =="40%":
                    td.name = "li"
                    tag = td
                else:
                    tag.append(td)
                    td.name = "ul"
            ul.append(tr)
        table.replaceWith(ul)

コード例 #38

0

ファイルを表示

 def CreateList(self, probNested):
     '''Создаем список ul, вложенный с заданной вероятностью'''
     ul = Tag(self.soup, 'ul')
     self.AppendIds(ul, 50, 30)
     liClass = self.GenerateClass(0)
     for _ in range(random.randint(3, 7)):
         ul.append(self.CreateListItem(liClass))
     if self._Probability(probNested):
         liNestedList = ul.findAll('li')
         random.shuffle(liNestedList)
         liNestedList = liNestedList[:random.randint(1, 4)]
         for liNested in liNestedList:
             liNested.append(self.CreateList(0))
     for li in ul.findAll('li'):
         if len(li.findAll(True)) == 0:
             li.append(self.CreateLinkText())
     return ul

コード例 #39

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_2_bold(soup, table):
    if table.get('id') == "linearize-cols-2-bold":
        ul = Tag(soup,"ul")
        ul["class"] = "linearized"
        for tr in table.findAll("tr"):
            tr.name = "span"
            tr["class"] = "spaced"
            for td in tr.findAll("td"):
                if td["width"] =="22%":
                    td.name = "li"
                    tag = td
                else:
                    tag.append(td)
                    td.name = "ul"
                for p in td.findAll("p", { "class" : "Example"}):
                    p.name = "li"
            ul.append(tr)
        table.replaceWith(ul)

コード例 #40

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def sub_table2(soup, subtable):
    ul = Tag(soup, "ul")
    li = Tag(soup, "li")
    string = '<i>ALWD Guide to Legal Citation: </i>'
    for tr in subtable.findAll("tr"):
        for td in tr.findAll("td"):
            try:
                text = ''.join(td.findAll(text=True))
                text = text.strip()
                if text != '' and text != '&nbsp;':
                    string += ''.join(td.findAll(text=True))
                    string += ', '
            except:
                pass
        string = string.strip().rstrip(',')
    li.append(BeautifulSoup(string))
    ul.append(li)
    return ul

コード例 #41

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_2_bold(soup, table):
    if table.get('id') == "linearize-cols-2-bold":
        ul = Tag(soup, "ul")
        ul["class"] = "linearized"
        for tr in table.findAll("tr"):
            tr.name = "span"
            tr["class"] = "spaced"
            for td in tr.findAll("td"):
                if td["width"] == "22%":
                    td.name = "li"
                    tag = td
                else:
                    tag.append(td)
                    td.name = "ul"
                for p in td.findAll("p", {"class": "Example"}):
                    p.name = "li"
            ul.append(tr)
        table.replaceWith(ul)

コード例 #42

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def sub_table2(soup, subtable):
    ul = Tag(soup, "ul")
    li = Tag(soup, "li")
    string = '<i>ALWD Guide to Legal Citation: </i>'
    for tr in subtable.findAll("tr"):
        for td in tr.findAll("td"):
            try:
                text = ''.join(td.findAll(text=True))
                text = text.strip()
                if text != '' and text != '&nbsp;':
                    string += ''.join(td.findAll(text=True)) 
                    string += ', '
            except:
                pass
        string = string.strip().rstrip(',')
    li.append(BeautifulSoup(string))
    ul.append(li)
    return ul

コード例 #43

0

ファイルを表示

    def SetupAmazonLibrary(self):
        source_path = xbmc.translatePath(
            'special://profile/sources.xml').decode('utf-8')
        source_added = False
        source = {
            self._s.ms_mov: self._s.MOVIE_PATH,
            self._s.ms_tv: self._s.TV_SHOWS_PATH
        }

        if xbmcvfs.exists(source_path):
            srcfile = xbmcvfs.File(source_path)
            soup = BeautifulSoup(srcfile)
            srcfile.close()
        else:
            subtags = ['programs', 'video', 'music', 'pictures', 'files']
            soup = BeautifulSoup('<sources></sources>')
            root = soup.sources
            for cat in subtags:
                cat_tag = Tag(soup, cat)
                def_tag = Tag(soup, 'default')
                def_tag['pathversion'] = 1
                cat_tag.append(def_tag)
                root.append(cat_tag)

        video = soup.find("video")

        for name, path in source.items():
            path_tag = Tag(soup, "path")
            path_tag['pathversion'] = 1
            path_tag.append(path)
            source_text = soup.find(text=name)
            if not source_text:
                source_tag = Tag(soup, "source")
                name_tag = Tag(soup, "name")
                name_tag.append(name)
                source_tag.append(name_tag)
                source_tag.append(path_tag)
                video.append(source_tag)
                Log(name + ' source path added!')
                source_added = True
            else:
                source_tag = source_text.findParent('source')
                old_path = source_tag.find('path').contents[0]
                if path not in old_path:
                    source_tag.find('path').replaceWith(path_tag)
                    Log(name + ' source path changed!')
                    source_added = True

        if source_added:
            self.SaveFile(source_path, str(soup))
            self._g.dialog.ok(getString(30187), getString(30188),
                              getString(30189), getString(30190))
            if self._g.dialog.yesno(getString(30191), getString(30192)):
                xbmc.executebuiltin('RestartApp')

コード例 #44

0

ファイルを表示

ファイル: converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_rows_1_cols(soup, table):
    if table.get('id') == "linearize-rows-1-cols":
        div = Tag(soup, "div")
        div["class"] = "center"
        for tr in table.findAll("tr"):
            lista = tr.findAll("td")
            li = Tag(soup, "li")
            for td in lista:
                for p in td.findAll("p"):
                    p.name = "span"
                td.name = "span"
                if td == lista[0]:
                    td = BeautifulSoup('<b>' + td.prettify() + '</b>')
                else:
                    td = BeautifulSoup('<span>[</span>' + td.prettify() + '<span>]</span>')
                li.append(td)
            div.append(li)
        div.name = "ul"
        table.replaceWith(div)

コード例 #45

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_rows_1_cols(soup, table):
    if table.get('id') == "linearize-rows-1-cols":
        div = Tag(soup, "div")
        div["class"] = "center"
        for tr in table.findAll("tr"):
            lista = tr.findAll("td")
            li = Tag(soup, "li")
            for td in lista:
                for p in td.findAll("p"):
                    p.name = "span"
                td.name = "span"
                if td == lista[0]:
                    td = BeautifulSoup('<b>' + td.prettify() + '</b>')
                else:
                    td = BeautifulSoup('<span>[</span>' + td.prettify() + '<span>]</span>')
                li.append(td)
            div.append(li)
        div.name = "ul"
        table.replaceWith(div)

コード例 #46

0

ファイルを表示

ファイル: xbmclibrary.py プロジェクト: Versatilus/xbmc

def SetupAmazonLibrary():
    common.Log('Trying to add Amazon source paths...')
    source_path = os.path.join(common.profilpath, 'sources.xml')
    source_added = False
    source = {'Amazon Movies': MOVIE_PATH, 'Amazon TV': TV_SHOWS_PATH}

    try:
        file = open(source_path)
        soup = BeautifulSoup(file)
        file.close()
    except:
        subtags = ['programs', 'video', 'music', 'pictures', 'files']
        soup = BeautifulSoup('<sources></sources>')
        root = soup.sources
        for cat in subtags:
            cat_tag = Tag(soup, cat)
            def_tag = Tag(soup, 'default')
            def_tag['pathversion'] = 1
            cat_tag.append(def_tag)
            root.append(cat_tag)

    video = soup.find("video")

    for name, path in source.items():
        path_tag = Tag(soup, "path")
        path_tag['pathversion'] = 1
        path_tag.append(path)
        source_text = soup.find(text=name)
        if not source_text:
            source_tag = Tag(soup, "source")
            name_tag = Tag(soup, "name")
            name_tag.append(name)
            source_tag.append(name_tag)
            source_tag.append(path_tag)
            video.append(source_tag)
            common.Log(name + ' source path added')
            source_added = True
        else:
            source_tag = source_text.findParent('source')
            old_path = source_tag.find('path').contents[0]
            if path not in old_path:
                source_tag.find('path').replaceWith(path_tag)
                common.Log(name + ' source path changed')
                source_added = True

    if source_added:
        SaveFile(source_path, str(soup))
        Dialog.ok(common.getString(30187), common.getString(30188),
                  common.getString(30189), common.getString(30190))
        if Dialog.yesno(common.getString(30191), common.getString(30192)):
            xbmc.executebuiltin('RestartApp')

コード例 #47

0

ファイルを表示

ファイル: modules.py プロジェクト: viniciuscainelli/pycielo

    def apply(self, xml):
        if isinstance(self.root_tag, tuple):
            self.childs = [(None, self.root_tag[1])]
            self.root_tag = self.root_tag[0]

        root_tag = Tag(xml, self.root_tag)

        if self.childs:
            for child, opts in self.childs:
                if not opts["param"] in self.data:
                    if "default" in opts:
                        v = opts.get("default")
                    else:
                        continue
                else:
                    v = self.data.get(opts["param"], "")

                if hasattr(self, "clean_%s" % opts["param"]):
                    v = getattr(self, "clean_%s" % opts["param"])()

                if not child:
                    root_tag.append(NavigableString(str(v)))
                    break

                tag = Tag(xml, child)
                tag.append(NavigableString(str(v)))
                root_tag.append(tag)

        xml.contents[0].append(root_tag)

コード例 #48

0

ファイルを表示

ファイル: xbmclibrary.py プロジェクト: Versatilus/xbmc

def SetupAmazonLibrary():
    common.Log('Trying to add Amazon source paths...')
    source_path = os.path.join(common.profilpath, 'sources.xml')
    source_added = False
    source = {'Amazon Movies': MOVIE_PATH, 'Amazon TV': TV_SHOWS_PATH}
    
    try:
        file = open(source_path)
        soup = BeautifulSoup(file)
        file.close()
    except:
        subtags = ['programs', 'video', 'music', 'pictures', 'files']
        soup = BeautifulSoup('<sources></sources>')
        root = soup.sources
        for cat in subtags:
            cat_tag = Tag(soup, cat)
            def_tag = Tag(soup, 'default')
            def_tag['pathversion'] = 1
            cat_tag.append(def_tag)
            root.append(cat_tag)

    video = soup.find("video")
    
    for name, path in source.items():
        path_tag = Tag(soup, "path")
        path_tag['pathversion'] = 1
        path_tag.append(path)
        source_text = soup.find(text=name)
        if not source_text:
            source_tag = Tag(soup, "source")
            name_tag = Tag(soup, "name")
            name_tag.append(name)
            source_tag.append(name_tag)
            source_tag.append(path_tag)
            video.append(source_tag)
            common.Log(name + ' source path added')
            source_added = True
        else:
            source_tag = source_text.findParent('source')
            old_path = source_tag.find('path').contents[0]
            if path not in old_path:
                source_tag.find('path').replaceWith(path_tag)
                common.Log(name + ' source path changed')
                source_added = True

    if source_added:
        SaveFile(source_path, str(soup))
        Dialog.ok(common.getString(30187), common.getString(30188), common.getString(30189), common.getString(30190))
        if Dialog.yesno(common.getString(30191), common.getString(30192)):
            xbmc.executebuiltin('RestartApp')

コード例 #49

0

ファイルを表示

ファイル: pavement.py プロジェクト: xifeiwu/workcode

def gen_blog_post(outdir, input_base, blog_base, url_base):
    """Generate the blog post body.
    """
    outdir = path(outdir)
    input_file = outdir / input_base
    blog_file = outdir / blog_base

    canonical_url = "http://www.doughellmann.com/" + url_base
    if not canonical_url.endswith('/'):
        canonical_url += '/'
    if input_base != "index.html":
        canonical_url += input_base

    module_name = MODULE
    title = '%s - ' % module_name

    # Get the intro paragraph
    from BeautifulSoup import BeautifulSoup, Tag
    raw_body = input_file.text().strip()
    soup = BeautifulSoup(raw_body)
    intro = soup.find('p')

    # Strip hyperlinks by replacing those nodes with their contents.
    for link in intro.findAll('a'):
        new_span = Tag(soup, 'span')
        for c in link.contents:
            new_span.append(c)
        link.replaceWith(new_span)

    output_body = '''%(intro)s
<p><a href="%(canonical_url)s">Read more...</a></p>
''' % locals()
    blog_file.write_text(output_body)

    home_page_reference = '''<p><a class="reference external" href="http://www.doughellmann.com/PyMOTW/">PyMOTW Home</a></p>'''
    canonical_reference = '''<p>The <a class="reference external" href="%(canonical_url)s">canonical version</a> of this article</p>''' % locals(
    )

    blog_file.write_text(output_body)
    return

コード例 #50

0

ファイルを表示

ファイル: sfusd_demo.py プロジェクト: schasins/school-program-scraping

 def highlightedNode(self, target_node, yes_phrase, parent_soup):
     content = str(target_node)
     text = content.lower()
     j = text.find(yes_phrase)
     tag = Tag(parent_soup, "div", [("style", "background-color:#FF8A0D")])
     if yes_phrase:
        tag.append(content[:j])
        bold = Tag(parent_soup, "b")
        bold.insert(0,content[j:(j + len(yes_phrase))])
        tag.append(bold)
        tag.append(content[(j + len(yes_phrase)):])
     else:
        tag.append(content)
     return tag

コード例 #51

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def get_first_two(soup, table):
    loop = 0
    enclose = Tag(soup, "div")
    for tr in table.findAll("tr"):
        li = Tag(soup, "li")
        for td in tr.findAll("td"):
            if loop != 2:
                try:
                    text = ''.join(td.findAll(text=True))
                    text = text.strip()
                    if text != '' and text != '&nbsp;':
                        td.name = "span"
                        if loop != 1: td.append(' - ')
                        li.append(td)
                except:
                    pass
            else:
                break
            loop += 1
        loop = 0
        if ''.join(li.findAll(text=True)) != '':
            enclose.append(li)
    return enclose

コード例 #52

0

ファイルを表示

 def CreateForm(self):
     '''Создаем form'''
     form = Tag(self.soup, 'form')
     form['action'] = random.choice(['', '.'])
     if self._Probability(70):
         form['method'] = random.choice(['post', 'get'])
     if self._Probability(70):
         form['name'] = self.GenerateName()
     self.AppendIds(form, 50, 30)
     div = self.CreateDiv()
     form.append(div)
     for _ in range(1, 3):
         div.append(self.CreateInput('text'))
     if self._Probability(10):
         div.append(self.CreateTextarea())
     if self._Probability(10):
         div.append(self.CreateSelect())
     for _ in range(0, 2):
         div.append(self.CreateInput('hidden'))
     self.ShuffleTags(div)
     div.append(self.CreateInput('submit'))
     self.ShuffleAttributes(form)
     return form

コード例 #53

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_alwd(soup, table):
    if table.get('id') == "linearize-alwd":
        div = Tag(soup, "div")
        first = 1
        lista = table.findAll("tr")
        tr1 = lista[0]
        div.append(tr1)
        for tr in lista[1:]:
            ul = Tag(soup, "ul")
            li = Tag(soup, "li")
            for td in tr.findAll("td"):
                for p in td.findAll("p"):
                    p.name = "span"
                try:
                    text = ''.join(td.findAll(text=True))
                    text = text.strip()
                    if text != '' and text != '&nbsp;':
                        if first == 1:
                            li.append(td)
                            first = 0
                        else:
                            td.replaceWith("")
                except:
                    pass
                for subtable in td.findAll("table"):
                    sub = sub_table2(soup, subtable)
                    try:
                        text = ''.join(td.findAll(text=True))
                        text = text.strip()
                        if text != '' and text != '&nbsp;':
                            li.append(sub)
                    except:
                        pass
            first = 1
            if li.contents: ul.append(li)
            div.append(ul)
        table.replaceWith(div)

コード例 #54

0

ファイルを表示

def createParentUlTag(targetSoup):
    parentUlTag = Tag(targetSoup,
                      'ul',
                      attrs={
                          'class': 'xbreadcrumbs',
                          'id': 'breadcrumbs'
                      })
    topListTag = Tag(targetSoup, 'li')
    topAnchorTag = Tag(targetSoup, 'a', attrs={'href': SITE_DOMAIN})
    topAnchorTag.append(NavigableString('TOP'))
    topListTag.append(topAnchorTag)
    parentUlTag.append(topListTag)
    return parentUlTag

コード例 #55

0

ファイルを表示

ファイル: sitemap_creater.py プロジェクト: Attsun1031/progre_meiban

def generateUlTag(path, file, ulClass):
    # バンド名タグを生成
    fileSoup = BeautifulSoup(open('/'.join([PARENT_DIR, path, file])))
    text = fileSoup.find('h1').renderContents()
    ulTag = Tag(formatSoup, 'ul', attrs={'class' : ulClass})
    liTag = Tag(formatSoup, 'li')
    link = '/'.join([path, file])
    aTag = Tag(formatSoup, 'a', attrs={'href' : link})
    aTag.append(NavigableString(text))
    liTag.append(aTag)
    ulTag.append(liTag)

    return ulTag

コード例 #56

0

ファイルを表示

def get_list_for_key(name, children):
    """
    Takes a key and a dictionary containing its children and recursively
    generates HTML lists items. Each item will contain the name and, if it has
    children, an unordered list containing those child items.
    """

    li = Tag(SOUP, "li")
    li.append(NavigableString(name))

    if children:
        ul = Tag(SOUP, "ul")
        for k, v in children.items():
            ul.append(get_list_for_key(k, v))
        li.append(ul)

    return li

コード例 #57

0

ファイルを表示

ファイル: standalone-converter.py プロジェクト: ninefu/Basic_Legal_Citation

def linearize_cols_1_4(soup, table):
    if table.get('id') == "linearize-cols-1-4":
        div = Tag(soup, "ul")
        for i in range(4):
            for tr in table.findAll("tr"):
                td = tr.find("td")
                tr.find("td").replaceWith("")
                div.append(td)
        list_a = div.findAll("a")
        composite_list = [list_a[x:x + 4] for x in range(0, len(list_a), 4)]
        ul = Tag(soup, "ul")
        for lista in composite_list:
            li = Tag(soup, "li")
            for a in lista:
                if a == lista[-1]:
                    a = BeautifulSoup(a.prettify())
                else:
                    a = BeautifulSoup(a.prettify() + '<span> | </span>')
                li.append(a)
            ul.append(li)
        table.replaceWith(ul)

コード例 #58

0

ファイルを表示

def get_slides(args):
    contents = get_file_contents(args.file)
    soup = BeautifulSoup(markdown(contents))

    hsoup = BeautifulSoup()
    html = Tag(hsoup, 'html')
    hsoup.append(html)

    head = Tag(hsoup, 'head')
    title = Tag(hsoup, 'title')
    title.setString(args.file)
    head.append(title)

    link = Tag(hsoup, 'link')
    link['rel'] = 'stylesheet'
    link['type'] = 'text/css'
    if args.offline:
        link['href'] = 'default.css'
    else:
        link[
            'href'] = 'http://gdg-xian.github.io/html5slides-markdown/themes/default.css'
    head.append(link)

    script = Tag(hsoup, 'script')
    if args.offline:
        script['src'] = 'html5slides.js'
    else:
        script[
            'src'] = 'http://gdg-xian.github.io/html5slides-markdown/javascripts/html5slides.js'
    head.append(script)
    html.append(head)

    body = Tag(hsoup, 'body')
    body['style'] = 'display:none'
    section = Tag(hsoup, 'section')
    section['class'] = 'slides layout-regular template-default'
    body.append(section)
    elements = []
    elements.append(soup.first())
    elements.extend(soup.first().findNextSiblings())
    article = Tag(hsoup, 'article')
    section.append(article)
    for element in elements:
        if element.name == 'hr':
            article = Tag(hsoup, 'article')
            section.append(article)
        else:
            article.append(element)

    html.append(body)

    return prettify(html)