def fetch_top10(self): html = self._do_action('bbstop10') soup = BeautifulSoup(html) items = soup.findAll('tr')[1:] ret = Page(u'全站十大') for i in items: cells = i.findAll('td') h = Header() h.board = cells[1].text.strip() h.title = cells[2].text.strip() h.pid = parse_pid(cells[2].a['href']) h.author = cells[3].text.strip() h.reply_count = int(cells[4].text.strip()) ret.header_list.append(h) return ret
def fetch_topic(self, board, pid, start=None): params = {'board': board, 'file': pid2str(pid)} if start: params['start'] = start html = self._do_action('bbstcon', params) soup = BeautifulSoup(html) ret = Topic(board, pid) items = soup.findAll('table', {'class': 'main'}) if not items: raise ContentError() for i in items: c = i.tr.td.a['href'] p = Post(board, parse_pid(c), parse_num(c)) c = i.findAll('tr')[1].td.textarea.text p.parse_post(c) ret.post_list.append(p) for i in soup.body.center.findAll('a', recursive=False, limit=3): if i.text == u'本主题下30篇': ret.next_start = int(parse_href(i['href'], 'start')) return ret
def fetch_hot(self): html = self._do_action('bbstopall') soup = BeautifulSoup(html) items = soup.findAll('tr') ret = [] tmp = None for i in items: if i.img: tmp = [] continue cells = i.findAll('td') if not cells[0].text: ret.append(tmp) continue for j in cells: h = Header() links = j.findAll('a') h.title = links[0].text.strip() h.board = links[1].text.strip() h.pid = parse_pid(links[0]['href']) tmp.append(h) return ret
def fetch_page(self, board, start=None): params = {'board': board} if start: params['start'] = start html = self._do_action('bbstdoc', params) soup = BeautifulSoup(html) items = soup.findAll('tr')[1:] year = datetime.now().year ret = Page(board) for i in items: cells = i.findAll('td') h = Header() h.board = board try: h.num = int(cells[0].text) - 1 except ValueError: continue h.author = cells[2].text.strip() h.date = cells[3].text.strip() h.date = datetime.strptime(h.date, self.DATE_FORMAT) h.date = h.date.replace(year=year) h.title = cells[4].text.strip()[2:] h.pid = parse_pid(cells[4].a['href']) tmp = cells[5].text.strip() if tmp.find('/') != -1: tmp = tmp.split('/') h.reply_count = int(tmp[0]) h.view_count = int(tmp[1]) else: h.view_count = int(tmp) ret.header_list.append(h) # TODO for i in soup.body.center.findAll('a', recursive=False): if i.text == u'上一页': ret.prev_start = int(parse_href(i['href'], 'start')) - 1 return ret