def worker(): global tid global datas global offset global session url = "https://www.zhihu.com/node/HomeFeedListV2" params = { "start": tid, "offset": 21 } data = { "method":"next", "_xsrf":_xsrf, "params":json.dumps(params) } while flag: if len(datas) - offset > 10 * limit: time.sleep(6) continue try: res = session.post(url, data, headers=headers) except: continue msgs = None try: msgs = res.json()["msg"] except: # print res.content # print "link error 1326" continue for msg in msgs: soup = BeautifulSoup(msg, "html.parser") item = soup.select(".feed-item.folding.feed-item-hook")[0] from TimeLine import TLItem iitem = TLItem(item, _xsrf) tid, t, l = iitem.get_item_info() datas.append([t, l, tid]) tlitems.append(iitem) params["start"] = tid params["offset"] += 21 data["params"] = json.dumps(params) time.sleep(6)
def index(): global tid global _xsrf global session #res = session.get(zhihu, headers=headers) res = mul_get_request(session=session, url=zhihu, headers=headers) if not res: sys.exit() #print res.content _xsrf = re.findall(r'name="_xsrf" value="(\S+)"', res.content)[0] soup = BeautifulSoup(res.content, "html.parser") items = soup.select(".feed-item.folding.feed-item-hook") for item in items: #tid, t, l = get_item_info_another(item) from TimeLine import TLItem iitem = TLItem(item, _xsrf) tid, t, l = iitem.get_item_info() datas.append([t, l, tid]) tlitems.append(iitem)
def worker(): global tid global datas global offset global session url = "https://www.zhihu.com/node/HomeFeedListV2" params = {"start": tid, "offset": 21} data = {"method": "next", "_xsrf": _xsrf, "params": json.dumps(params)} while flag: if len(datas) - offset > 10 * limit: time.sleep(6) continue try: res = session.post(url, data, headers=headers) except: continue msgs = None try: msgs = res.json()["msg"] except: # print res.content # print "link error 1326" continue for msg in msgs: soup = BeautifulSoup(msg, "html.parser") item = soup.select(".feed-item.folding.feed-item-hook")[0] from TimeLine import TLItem iitem = TLItem(item, _xsrf) tid, t, l = iitem.get_item_info() datas.append([t, l, tid]) tlitems.append(iitem) params["start"] = tid params["offset"] += 21 data["params"] = json.dumps(params) time.sleep(6)