def set_post_list(tieba): print(tieba.get_url()) if tieba.posts is None: tieba.posts = [] r = requests.get(tieba.get_url()) soup = BeautifulSoup(r.text, "lxml") if len(soup.select('.icon-attention')) != 0 and soup.select('.icon-attention')[0].text == '抱歉,根据相关法律法规和政策,本吧暂不开放。': tieba.isMiss = 0 return lis = soup.select('li.j_thread_list.clearfix') if tieba.currNum == 1: if len(soup.select('.last.pagination-item')) > 0: tieba.pageNum = int(int(str(soup.select('.last.pagination-item')[0].attrs['href']).split("pn=")[1]) / 50) - 1 tieba_title = soup.select(".card_top_wrap.clearfix.card_top_theme")[0] tieba.menNum = tieba_title.select(".card_num")[0].select('.card_menNum')[0].text tieba.infoNum = tieba_title.select(".card_num")[0].select('.card_infoNum')[0].text tieba.slogan = tieba_title.select("p.card_slogan")[0].text pprint(vars(tieba)) for item in lis: post = Post() if 'data-field' not in item.attrs.keys(): continue __df_text__ = item.attrs['data-field'] __df_text_dict__ = json.loads(__df_text__) post.id = __df_text_dict__['id'] post.author_name = __df_text_dict__['author_name'] post.author_nickname = __df_text_dict__['author_nickname'] post.author_portrait = __df_text_dict__['author_portrait'] post.reply_num = __df_text_dict__['reply_num'] post.is_top = __df_text_dict__['is_top'] post.tieba_name = tieba.name post.source_page_num = tieba.currNum if len(item.select('div.j_th_tit ')) != 0: post.title = item.select('div.j_th_tit ')[0].a.text if len(item.select('.tb_icon_author')) > 0: post.author_id = json.loads(item.select('.tb_icon_author')[0]['data-field'])['user_id'] if post.is_use(): tieba.posts.append(post) if tieba.currNum < tieba.pageNum: if tieba.maxNum > 0 and tieba.currNum > tieba.maxNum: return else: tieba.currNum = tieba.currNum + 1 set_post_list(tieba)