コード例 #1
0
ファイル: pull.py プロジェクト: yonghongxy/tieba_pachong
def set_post_list(tieba):
    print(tieba.get_url())
    if tieba.posts is None:
        tieba.posts = []
    r = requests.get(tieba.get_url())
    soup = BeautifulSoup(r.text, "lxml")
    if len(soup.select('.icon-attention')) != 0 and soup.select('.icon-attention')[0].text == '抱歉,根据相关法律法规和政策,本吧暂不开放。':
        tieba.isMiss = 0
        return
    lis = soup.select('li.j_thread_list.clearfix')
    if tieba.currNum == 1:
        if len(soup.select('.last.pagination-item')) > 0:
            tieba.pageNum = int(int(str(soup.select('.last.pagination-item')[0].attrs['href']).split("pn=")[1]) / 50) - 1
        tieba_title = soup.select(".card_top_wrap.clearfix.card_top_theme")[0]
        tieba.menNum = tieba_title.select(".card_num")[0].select('.card_menNum')[0].text
        tieba.infoNum = tieba_title.select(".card_num")[0].select('.card_infoNum')[0].text
        tieba.slogan = tieba_title.select("p.card_slogan")[0].text
        pprint(vars(tieba))
    for item in lis:
        post = Post()
        if 'data-field' not in item.attrs.keys():
            continue
        __df_text__ = item.attrs['data-field']
        __df_text_dict__ = json.loads(__df_text__)
        post.id = __df_text_dict__['id']
        post.author_name = __df_text_dict__['author_name']
        post.author_nickname = __df_text_dict__['author_nickname']
        post.author_portrait = __df_text_dict__['author_portrait']
        post.reply_num = __df_text_dict__['reply_num']
        post.is_top = __df_text_dict__['is_top']
        post.tieba_name = tieba.name
        post.source_page_num = tieba.currNum
        if len(item.select('div.j_th_tit ')) != 0:
            post.title = item.select('div.j_th_tit ')[0].a.text
        if len(item.select('.tb_icon_author')) > 0:
            post.author_id = json.loads(item.select('.tb_icon_author')[0]['data-field'])['user_id']
        if post.is_use():
            tieba.posts.append(post)
    if tieba.currNum < tieba.pageNum:
        if tieba.maxNum > 0 and tieba.currNum > tieba.maxNum:
            return
        else:
            tieba.currNum = tieba.currNum + 1
            set_post_list(tieba)