Esempio n. 1
0
    def catch_content(self):
        column_id = Match.column(self.url).group("column_id")
        print("column_id: {}".format(column_id))
        headers = {
          'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Mobile Safari/537.36'
        }

        r = requests.get(
            url="https://zhuanlan.zhihu.com/api/columns/{}".format(column_id),
            headers=headers
        )
        columns_info = json.loads(r.text)
        print("Got column info: {}".format(columns_info["intro"] if columns_info["intro"] != '' else 'None'))

        offset = 0
        for offset in range(0, int(columns_info["postsCount"]), 50):
            self.send_bulk(headers, column_id, offset, 50, columns_info)
        self.send_bulk(headers, column_id, offset, columns_info["postsCount"]-offset, columns_info)
Esempio n. 2
0
    def catch_content(self):
        column_id = Match.column(self.url).group("column_id")
        print("column_id: {}".format(column_id))
        headers = {
            'User-Agent':
            'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Mobile Safari/537.36'
        }

        r = requests.get(
            url="https://zhuanlan.zhihu.com/api/columns/{}".format(column_id),
            headers=headers)
        columns_info = json.loads(r.text)
        print("Got column info: {}".format(
            columns_info["intro"] if columns_info["intro"] != '' else 'None'))

        offset = 0
        for offset in range(0, int(columns_info["postsCount"]), 50):
            self.send_bulk(headers, column_id, offset, 50, columns_info)
        self.send_bulk(headers, column_id, offset,
                       columns_info["postsCount"] - offset, columns_info)