Python Tag.contents Examples, BeautifulSoup.Tag.contents Python Examples

Example #1

0

Show file

File: HTML_Parser.py Project: iamutkarshtiwari/infoslicer

    def unTag(self, tag):
        """
            recursively removes unwanted tags according to defined lists
            @param tag: tag hierarchy to work on
        """
        for child in tag.findChildren(True, recursive=False):
            self.unTag(child)
        if (self.remove_classes_regexp != "") and (tag.has_key("class") and (re.match(self.remove_classes_regexp, tag["class"]) != None)):
            tag.extract()
        elif tag.name in self.keep_tags:
            new_tag = Tag(self.input, tag.name)
            new_tag.contents = tag.contents
            tag.replaceWith(new_tag)

        elif tag.name in self.remove_tags_keep_content:            
            children = tag.findChildren(True, recursive=False)
            if len(children)==1:
                tag.replaceWith(children[0])
            elif len(children) > 1:
                new_tag = Tag(self.input, "p")
                for child in tag.findChildren(True, recursive=False):
                    new_tag.append(child)
                tag.replaceWith(new_tag)
            else:
                tag.replaceWith(tag.renderContents())
        else:
            tag.extract()

Example #2

0

Show file

 def _linkify_headings(self, soup):
     md_el = soup.find('div', 'md')
     for heading in md_el.findAll(['h1', 'h2', 'h3'], recursive=False):
         heading_a = Tag(soup, "a", [('href', '#%s' % heading['id'])])
         heading_a.contents = heading.contents
         heading.contents = []
         heading.append(heading_a)

Example #3

0

Show file

File: HTML_Parser.py Project: sugar-activities/4042-activity

    def unTag(self, tag):
        """
            recursively removes unwanted tags according to defined lists
            @param tag: tag hierarchy to work on
        """
        for child in tag.findChildren(True, recursive=False):
            self.unTag(child)
        if (self.remove_classes_regexp != "") and (
                tag.has_key("class") and
            (re.match(self.remove_classes_regexp, tag["class"]) != None)):
            tag.extract()
        elif tag.name in self.keep_tags:
            new_tag = Tag(self.input, tag.name)
            new_tag.contents = tag.contents
            tag.replaceWith(new_tag)

        elif tag.name in self.remove_tags_keep_content:
            children = tag.findChildren(True, recursive=False)
            if len(children) == 1:
                tag.replaceWith(children[0])
            elif len(children) > 1:
                new_tag = Tag(self.input, "p")
                for child in tag.findChildren(True, recursive=False):
                    new_tag.append(child)
                tag.replaceWith(new_tag)
            else:
                tag.replaceWith(tag.renderContents())
        else:
            tag.extract()

Example #4

0

Show file

File: policies.py Project: rprz/reddit

 def _linkify_headings(self, soup):
     md_el = soup.find("div", "md")
     for heading in md_el.findAll(["h1", "h2", "h3"], recursive=False):
         heading_a = Tag(soup, "a", [("href", "#%s" % heading["id"])])
         heading_a.contents = heading.contents
         heading.contents = []
         heading.append(heading_a)

Example #5

0

Show file

File: tiddlywiki.py Project: FND/tiddlywiki-svn-mirror

	def convertStoreFormat(self):
		"""
		convert legacy to canonical store format

		N.B.:
		While the new canonical store format was introduced in
		TiddlyWiki v2.2 final, various v2.2 beta releases are still
		using the legacy store format.

		@return: None
		"""
		try:
			version = self.getVersion()
		except (ValueError, AttributeError):
			version = (0, 0, 0) # assume pre-v2.2 format
		if version and (version[0] + (version[1] / 10.0) < 2.3): # N.B.: addition works because all pre-v2.3 releases are known -- XXX: actual threshold is v2.2 final
			for tiddler in self.store.findChildren("div", tiddler = True):
				# convert tiddler attribute to title attribute
				tiddler["title"] = tiddler["tiddler"]
				del(tiddler["tiddler"])
				# decode tiddler contents
				tiddler.contents[0].replaceWith(decodeTiddlerText(tiddler.contents[0])) # XXX: use of contents[0] hacky?
				# add PRE wrapper
				pre = Tag(self.dom, "pre")
				pre.contents = tiddler.contents
				tiddler.contents = [pre]

Example #6

0

Show file

File: views.py Project: HM2MC/Webfront

def get_malott_menu(today):
    url = "http://www.scrippscollege.edu/students/dining-services/index.php"
    resp = requests.get(url)
    soup = BeautifulSoup(resp.content)
    head = BeautifulSoup("<thead><tr><td colspan=3>Malott Commons</td></tr></thead>")

    target = soup.find("div", {"id": "right_column_content"})
    target.extract()
    meals = []
    for meal in target.findAll("ul"):
        meal.extract()
        meals += [meal]

    labels = []
    for title in target.findAll("p"):
        title.extract()
        labels += [title]

    final_table = BeautifulSoup()
    table = Tag(final_table, "table")
    final_table.insert(0, table)
    table.insert(0, head)
    table["class"] = "mealtable"
    for meal in meals:
        tr = Tag(final_table, "tr")
        td = Tag(final_table, "td")
        tr.insert(0, td)
        td["class"] = "mealtime"
        td.contents = labels[1].contents
        table.insert(len(table.contents) - 1, tr)
        labels = labels[1:]
        for food in meal.findAll("li"):
            tr = Tag(final_table, "tr")
            td = Tag(final_table, "td")
            tr.insert(0, td)
            td.contents = food.contents
            table.insert(len(table.contents) - 1, tr)

    return final_table.prettify()

Example #7

0

Show file

File: sanitizer.py Project: braveulysses/backwater

def heading_to_bold(txt, add_break=False):
    """Replaces heading tags (<h1>, <h2>, etc.) with <b> tags.
    
    Optionally appends two <br>s."""
    soup = BeautifulSoup(txt)
    headers = soup.findAll([ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
    for header in headers:
        bold_tag = Tag(soup, "b", [])
        bold_tag.contents = header.contents
        header.replaceWith(bold_tag)
        if add_break:
            header.append(Tag(soup, "br"))
            header.append(Tag(soup, "br"))
    return unicode(soup)

Example #8

0

Show file

    def FixTableHeadings(self):
        '''Fixes the doxygen table headings to EZT's liking.

    This includes using <th> instead of <h2> for the heading, and putting
    the "name" attribute into the "id" attribute of the <tr> tag.

    For example, this html:
      <tr><td colspan="2"><h2><a name="pub-attribs"></a>
      Data Fields List</h2></td></tr>

    would be converted to this:
      <tr id="pub-attribs"><th colspan="2">Data Fields List</th></tr>

    Also, this function splits up tables into multiple separate tables if
    a table heading appears in the middle of a table.
    '''

        table_headers = []
        for tag in self.soup.findAll('tr'):
            if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
                tag['id'] = tag.td.h2.a['name']
                tag.td.string = tag.td.h2.a.next
                tag.td.name = 'th'
                table_headers.append(tag)

        # reverse the list so that earlier tags don't delete later tags
        table_headers.reverse()
        # Split up tables that have multiple table header (th) rows
        for tag in table_headers:
            # Is this a heading in the middle of a table?
            if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
                table = tag.parent
                table_parent = table.parent
                table_index = table_parent.contents.index(table)
                new_table = Tag(self.soup, name='table', attrs=table.attrs)
                table_parent.insert(table_index + 1, new_table)
                tag_index = table.contents.index(tag)
                new_table.contents = table.contents[tag_index:]
                del table.contents[tag_index:]

Example #9

0

Show file

  def FixTableHeadings(self):
    '''Fixes the doxygen table headings to EZT's liking.

    This includes using <th> instead of <h2> for the heading, and putting
    the "name" attribute into the "id" attribute of the <tr> tag.

    For example, this html:
      <tr><td colspan="2"><h2><a name="pub-attribs"></a>
      Data Fields List</h2></td></tr>

    would be converted to this:
      <tr id="pub-attribs"><th colspan="2">Data Fields List</th></tr>

    Also, this function splits up tables into multiple separate tables if
    a table heading appears in the middle of a table.
    '''

    table_headers = []
    for tag in self.soup.findAll('tr'):
      if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']:
        tag['id'] = tag.td.h2.a['name']
        tag.td.string = tag.td.h2.a.next
        tag.td.name = 'th'
        table_headers.append(tag)

    # reverse the list so that earlier tags don't delete later tags
    table_headers.reverse()
    # Split up tables that have multiple table header (th) rows
    for tag in table_headers:
      # Is this a heading in the middle of a table?
      if tag.findPreviousSibling('tr') and tag.parent.name == 'table':
        table = tag.parent
        table_parent = table.parent
        table_index = table_parent.contents.index(table)
        new_table = Tag(self.soup, name='table', attrs=table.attrs)
        table_parent.insert(table_index + 1, new_table)
        tag_index = table.contents.index(tag)
        new_table.contents = table.contents[tag_index:]
        del table.contents[tag_index:]

Example #10

0

Show file

File: views.py Project: HM2MC/Webfront

def get_frary_menu(today):
    data = []
    fr_url = "http://www.pomona.edu/administration/dining/menus/frary.aspx"
    fr_resp = requests.get(fr_url)
    fr_soup = BeautifulSoup(fr_resp.content)
    fr_head = BeautifulSoup("<thead><tr><td colspan=3>Frary</td></tr></thead>")
    # try:

    target_day = today.strftime("%A")

    day_div = fr_soup.find("div", text=target_day)
    if day_div == None:
        raise Exception("No menu available for today")

    table = fr_soup.findAll("table", {"class": re.compile("menu")})[today.isoweekday() - 1]  # find the right menu
    table.extract()

    table["class"] = "mealtable"

    stations = []
    for td in table.findAll(True, {"class": re.compile("station")}):
        td["class"] = "mealstation"
        stations += [td]

    final_table = BeautifulSoup()
    tabler = Tag(final_table, "table")
    tabler.insert(0, fr_head)
    final_table.insert(0, tabler)

    # if we get an error here, it's probably becase
    # it's the weekend - no breakfast or lunch, just bruuunch
    try:
        # build breakfast
        breakfast = []
        for td in table.findAll(True, {"class": re.compile("breakfast")}):
            breakfast += [td]

            breakfast[0]["class"] = "mealtime"
            breakfast[0]["colspan"] = 3

        lunch = []
        for td in table.findAll(True, {"class": re.compile("lunch")}):
            lunch += [td]
        lunch[0]["class"] = "mealtime"
        lunch[0]["colspan"] = 3

        count = 1
        for food in breakfast:
            tr = Tag(final_table, "tr")

            if food["class"] == "mealtime":
                tr.insert(0, food)
            else:
                tr.insert(0, food)
                tr.insert(0, stations[count])
                count += 1
            tabler.insert(len(tabler.contents) - 1, tr)
        count = 1
        for food in lunch:
            tr = Tag(final_table, "tr")
            if food["class"] == "mealtime":
                tr.insert(0, food)
            else:
                tr.insert(0, food)
                td = Tag(final_table, "td")
                td.contents = stations[count].contents
                td["class"] = "mealstation"
                tr.insert(0, td)
                count += 1

            tabler.insert(len(tabler.contents) - 1, tr)

    except:
        pass

    dinner = []
    for td in table.findAll(True, {"class": re.compile("dinner")}):
        dinner += [td]
    dinner[0]["class"] = "mealtime"
    dinner[0]["colspan"] = 3

    count = 1
    for food in dinner:
        tr = Tag(final_table, "tr")
        if food["class"] == "mealtime":
            tr.insert(0, food)
        else:
            tr.insert(0, food)
            td = Tag(final_table, "td")
            td.contents = stations[count].contents
            td["class"] = "mealstation"
            tr.insert(0, td)
            count += 1

        tabler.insert(len(tabler.contents) - 1, tr)

    tabler["class"] = "mealtable"

    return final_table.prettify()  # table.prettify()#fr_table