Beispiel #1
0

def q(e):
    if e:
        return e.text
    else:
        return e


file = '/Users/ilya/Desktop/cl_summary.txt'
page = open(file, 'r')
soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES)
# print soup
lst = [(p.a.text, p.a['href'], q(p.find('font')), p.small.a.text,
        p.small.a['href'])
       for p in soup.blockquote(lambda tag: tag.name == 'p' and not tag.attrs,
                                recursive=False)]
print lst

exit(1)

span = pdtl[0].span.text
a = pdtl[0].find('a')

# print span
print a.contents[0].strip()
print a.contents[1].text
# print dir(a)

exit(1)
print[e.text for e in siteMap.findAll('h3') if e.findNext().name == 'a']
print[(e.name, e['class'])
Beispiel #2
0
from BeautifulSoup import BeautifulSoup

def q(e):
  if e:
    return e.text
  else:
    return e

file = '/Users/ilya/Desktop/cl_summary.txt'
page = open(file, 'r')
soup = BeautifulSoup(page, convertEntities=BeautifulSoup.HTML_ENTITIES)
# print soup
lst = [(p.a.text, p.a['href'], q(p.find('font')), p.small.a.text, p.small.a['href'])
      for p in soup.blockquote(lambda tag: tag.name == 'p' and not tag.attrs, recursive=False)]
print lst

exit(1)

span = pdtl[0].span.text
a = pdtl[0].find('a')

# print span
print a.contents[0].strip()
print a.contents[1].text
# print dir(a)

exit(1)
print [e.text for e in siteMap.findAll('h3') if e.findNext().name == 'a']
print [(e.name, e['class']) for e in siteMap.findAll('div', 'content-group')[0].findAll(recursive=False)] # , attrs={'class':'sublist'})
print [e.text for e in siteMap.findAll('div', 'content-group')[0].findAll(recursive=False)[0] if e.findNext().name == 'a']