Esempio n. 1
0
def worker():
    global tid
    global datas
    global offset
    global session

    url = "https://www.zhihu.com/node/HomeFeedListV2"
    params = {
        "start": tid,
        "offset": 21
    }
    data = {
        "method":"next",
        "_xsrf":_xsrf,
        "params":json.dumps(params)
    }
    while flag:
        if len(datas) - offset > 10 * limit:
            time.sleep(6)
            continue
        try:
            res = session.post(url, data, headers=headers)
        except:
            continue
        msgs = None
        try:
            msgs = res.json()["msg"]
        except:
            # print res.content
            # print "link error 1326"
            continue
        for msg in msgs:
            soup = BeautifulSoup(msg, "html.parser")
            item = soup.select(".feed-item.folding.feed-item-hook")[0]
            from TimeLine import TLItem
            iitem = TLItem(item, _xsrf)
            tid, t, l = iitem.get_item_info()
            datas.append([t, l, tid])
            tlitems.append(iitem)
        params["start"] = tid
        params["offset"] += 21
        data["params"] = json.dumps(params)
        time.sleep(6)
Esempio n. 2
0
def index():
    global tid
    global _xsrf
    global session

    #res = session.get(zhihu, headers=headers)
    res = mul_get_request(session=session, url=zhihu, headers=headers)
    if not res:
        sys.exit()
    #print res.content
    _xsrf = re.findall(r'name="_xsrf" value="(\S+)"', res.content)[0]

    soup = BeautifulSoup(res.content, "html.parser")
    items = soup.select(".feed-item.folding.feed-item-hook")
    for item in items:
        #tid, t, l = get_item_info_another(item)
        from TimeLine import TLItem
        iitem = TLItem(item, _xsrf)
        tid, t, l = iitem.get_item_info()
        datas.append([t, l, tid])
        tlitems.append(iitem)
Esempio n. 3
0
def index():
    global tid
    global _xsrf
    global session

    #res = session.get(zhihu, headers=headers)
    res = mul_get_request(session=session, url=zhihu, headers=headers)
    if not res:
        sys.exit()
    #print res.content
    _xsrf = re.findall(r'name="_xsrf" value="(\S+)"', res.content)[0]

    soup = BeautifulSoup(res.content, "html.parser")
    items = soup.select(".feed-item.folding.feed-item-hook")
    for item in items:
        #tid, t, l = get_item_info_another(item)
        from TimeLine import TLItem
        iitem = TLItem(item, _xsrf)
        tid, t, l = iitem.get_item_info()
        datas.append([t, l, tid])
        tlitems.append(iitem)
Esempio n. 4
0
def worker():
    global tid
    global datas
    global offset
    global session

    url = "https://www.zhihu.com/node/HomeFeedListV2"
    params = {"start": tid, "offset": 21}
    data = {"method": "next", "_xsrf": _xsrf, "params": json.dumps(params)}
    while flag:
        if len(datas) - offset > 10 * limit:
            time.sleep(6)
            continue
        try:
            res = session.post(url, data, headers=headers)
        except:
            continue
        msgs = None
        try:
            msgs = res.json()["msg"]
        except:
            # print res.content
            # print "link error 1326"
            continue
        for msg in msgs:
            soup = BeautifulSoup(msg, "html.parser")
            item = soup.select(".feed-item.folding.feed-item-hook")[0]
            from TimeLine import TLItem
            iitem = TLItem(item, _xsrf)
            tid, t, l = iitem.get_item_info()
            datas.append([t, l, tid])
            tlitems.append(iitem)
        params["start"] = tid
        params["offset"] += 21
        data["params"] = json.dumps(params)
        time.sleep(6)