Esempio n. 1
0
 def fetch_top10(self):
     html = self._do_action('bbstop10')
     soup = BeautifulSoup(html)
     items = soup.findAll('tr')[1:]
     ret = Page(u'全站十大')
     for i in items:
         cells = i.findAll('td')
         h = Header()
         h.board = cells[1].text.strip()
         h.title = cells[2].text.strip()
         h.pid = parse_pid(cells[2].a['href'])
         h.author = cells[3].text.strip()
         h.reply_count = int(cells[4].text.strip())
         ret.header_list.append(h)
     return ret
Esempio n. 2
0
 def fetch_top10(self):
     html = self._do_action('bbstop10')
     soup = BeautifulSoup(html)
     items = soup.findAll('tr')[1:]
     ret = Page(u'全站十大')
     for i in items:
         cells = i.findAll('td')
         h = Header()
         h.board = cells[1].text.strip()
         h.title = cells[2].text.strip()
         h.pid = parse_pid(cells[2].a['href'])
         h.author = cells[3].text.strip()
         h.reply_count = int(cells[4].text.strip())
         ret.header_list.append(h)
     return ret
Esempio n. 3
0
 def fetch_topic(self, board, pid, start=None):
     params = {'board': board, 'file': pid2str(pid)}
     if start:
         params['start'] = start
     html = self._do_action('bbstcon', params)
     soup = BeautifulSoup(html)
     ret = Topic(board, pid)
     items = soup.findAll('table', {'class': 'main'})
     if not items:
         raise ContentError()
     for i in items:
         c = i.tr.td.a['href']
         p = Post(board, parse_pid(c), parse_num(c))
         c = i.findAll('tr')[1].td.textarea.text
         p.parse_post(c)
         ret.post_list.append(p)
     for i in soup.body.center.findAll('a', recursive=False, limit=3):
         if i.text == u'本主题下30篇':
             ret.next_start = int(parse_href(i['href'], 'start'))
     return ret
Esempio n. 4
0
 def fetch_topic(self, board, pid, start=None):
     params = {'board': board, 'file': pid2str(pid)}
     if start:
         params['start'] = start
     html = self._do_action('bbstcon', params)
     soup = BeautifulSoup(html)
     ret = Topic(board, pid)
     items = soup.findAll('table', {'class': 'main'})
     if not items:
         raise ContentError()
     for i in items:
         c = i.tr.td.a['href']
         p = Post(board, parse_pid(c), parse_num(c))
         c = i.findAll('tr')[1].td.textarea.text
         p.parse_post(c)
         ret.post_list.append(p)
     for i in soup.body.center.findAll('a', recursive=False, limit=3):
         if i.text == u'本主题下30篇':
             ret.next_start = int(parse_href(i['href'], 'start'))
     return ret
Esempio n. 5
0
 def fetch_hot(self):
     html = self._do_action('bbstopall')
     soup = BeautifulSoup(html)
     items = soup.findAll('tr')
     ret = []
     tmp = None
     for i in items:
         if i.img:
             tmp = []
             continue
         cells = i.findAll('td')
         if not cells[0].text:
             ret.append(tmp)
             continue
         for j in cells:
             h = Header()
             links = j.findAll('a')
             h.title = links[0].text.strip()
             h.board = links[1].text.strip()
             h.pid = parse_pid(links[0]['href'])
             tmp.append(h)
     return ret
Esempio n. 6
0
 def fetch_hot(self):
     html = self._do_action('bbstopall')
     soup = BeautifulSoup(html)
     items = soup.findAll('tr')
     ret = []
     tmp = None
     for i in items:
         if i.img:
             tmp = []
             continue
         cells = i.findAll('td')
         if not cells[0].text:
             ret.append(tmp)
             continue
         for j in cells:
             h = Header()
             links = j.findAll('a')
             h.title = links[0].text.strip()
             h.board = links[1].text.strip()
             h.pid = parse_pid(links[0]['href'])
             tmp.append(h)
     return ret
Esempio n. 7
0
    def fetch_page(self, board, start=None):
        params = {'board': board}
        if start:
            params['start'] = start
        html = self._do_action('bbstdoc', params)
        soup = BeautifulSoup(html)

        items = soup.findAll('tr')[1:]
        year = datetime.now().year
        ret = Page(board)
        for i in items:
            cells = i.findAll('td')
            h = Header()
            h.board = board
            try:
                h.num = int(cells[0].text) - 1
            except ValueError:
                continue
            h.author = cells[2].text.strip()
            h.date = cells[3].text.strip()
            h.date = datetime.strptime(h.date, self.DATE_FORMAT)
            h.date = h.date.replace(year=year)
            h.title = cells[4].text.strip()[2:]
            h.pid = parse_pid(cells[4].a['href'])
            tmp = cells[5].text.strip()
            if tmp.find('/') != -1:
                tmp = tmp.split('/')
                h.reply_count = int(tmp[0])
                h.view_count = int(tmp[1])
            else:
                h.view_count = int(tmp)
            ret.header_list.append(h)
        # TODO
        for i in soup.body.center.findAll('a', recursive=False):
            if i.text == u'上一页':
                ret.prev_start = int(parse_href(i['href'], 'start')) - 1
        return ret
Esempio n. 8
0
    def fetch_page(self, board, start=None):
        params = {'board': board}
        if start:
            params['start'] = start
        html = self._do_action('bbstdoc', params)
        soup = BeautifulSoup(html)

        items = soup.findAll('tr')[1:]
        year = datetime.now().year
        ret = Page(board)
        for i in items:
            cells = i.findAll('td')
            h = Header()
            h.board = board
            try:
                h.num = int(cells[0].text) - 1
            except ValueError:
                continue
            h.author = cells[2].text.strip()
            h.date = cells[3].text.strip()
            h.date = datetime.strptime(h.date, self.DATE_FORMAT)
            h.date = h.date.replace(year=year)
            h.title = cells[4].text.strip()[2:]
            h.pid = parse_pid(cells[4].a['href'])
            tmp = cells[5].text.strip()
            if tmp.find('/') != -1:
                tmp = tmp.split('/')
                h.reply_count = int(tmp[0])
                h.view_count = int(tmp[1])
            else:
                h.view_count = int(tmp)
            ret.header_list.append(h)
        # TODO
        for i in soup.body.center.findAll('a', recursive=False):
            if i.text == u'上一页':
                ret.prev_start = int(parse_href(i['href'], 'start')) - 1
        return ret