def run(self): while True: self.offset += self.add # http://zhuanlan.zhihu.com/api/columns/ if self.http_method == 'get': r = requests.get(self.url + "&offset={0}".format(self.offset)) if not json.loads(r.text): break else: for each_article in json.loads(r.text): yield each_article['url'] # http://www.zhihu.com/node/ProfileFollowersListV2 if self.http_method == 'post' and self.start == None: params = """\"offset":{0},"order_by":"created","hash_id":"{1}\"""".format( self.offset, self.hash_id) payload = {'method': 'next', 'params': "{" + params + "}", '_xsrf': self._xsrf} r = requests.post(self.url, data=payload) self.result = json.loads(r.text)['msg'] if not self.result: break else: yield self.result # user_url/topics if self.http_method == 'post' and self.start != None: payload = {'start': self.start, 'offset': self.offset, '_xsrf': self._xsrf} r = requests.post(self.url, data=payload) self.result = json.loads(r.text)['msg'] if self.result[0] == 0: break else: yield self.result[1:]
def upload_form(cls, form): if "email" in form: url = "http://www.zhihu.com/login/email" elif "phone_num" in form: url = "http://www.zhihu.com/login/phone_num" else: raise ValueError(u"账号类型错误") headers = { 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 " "Safari/537.36", 'Host': "www.zhihu.com", 'Origin': "http://www.zhihu.com", 'Pragma': "no-cache", 'Referer': "http://www.zhihu.com/", 'X-Requested-With': "XMLHttpRequest" } from Requests import requests r = requests.post(url, data=form, headers=headers) if int(r.status_code) != 200: raise NetworkError(u"表单上传失败!") if r.headers['content-type'].lower() == "application/json": try: result = json.loads(r.content) except Exception as e: Logging.error(u"JSON解析失败!") Logging.debug(e) Logging.debug(r.content) result = {} if result["r"] == 0: Logging.success(u"登录成功!") return {"result": True} elif result["r"] == 1: Logging.success(u"登录失败!") return { "error": { "code": int(result['errcode']), "message": result['msg'], "data": result['data'] } } else: Logging.warn(u"表单上传出现未知错误: \n \t %s )" % (str(result))) return {"error": {"code": -1, "message": u"unknow error"}} else: Logging.warn(u"无法解析服务器的响应内容: \n \t %s " % r.text) return {"error": {"code": -2, "message": u"parse error"}}
def run(self): while True: self.offset += self.add # http://zhuanlan.zhihu.com/api/columns/ if self.http_method == 'get': r = requests.get(self.url + "&offset={0}".format(self.offset)) if not json.loads(r.text): break else: for each_article in json.loads(r.text): yield each_article['url'] # http://www.zhihu.com/node/ProfileFollowersListV2 if self.http_method == 'post' and self.start == None: params = """\"offset":{0},"order_by":"created","hash_id":"{1}\"""".format( self.offset, self.hash_id) payload = { 'method': 'next', 'params': "{" + params + "}", '_xsrf': self._xsrf } r = requests.post(self.url, data=payload) self.result = json.loads(r.text)['msg'] if not self.result: break else: yield self.result # user_url/topics if self.http_method == 'post' and self.start != None: payload = { 'start': self.start, 'offset': self.offset, '_xsrf': self._xsrf } r = requests.post(self.url, data=payload) self.result = json.loads(r.text)['msg'] if self.result[0] == 0: break else: yield self.result[1:]
def upload_form(cls, form): if "email" in form: url = "http://www.zhihu.com/login/email" elif "phone_num" in form: url = "http://www.zhihu.com/login/phone_num" else: raise ValueError(u"账号类型错误") headers = { 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 " "Safari/537.36", 'Host': "www.zhihu.com", 'Origin': "http://www.zhihu.com", 'Pragma': "no-cache", 'Referer': "http://www.zhihu.com/", 'X-Requested-With': "XMLHttpRequest" } from Requests import requests r = requests.post(url, data = form, headers = headers) if int(r.status_code) != 200: raise NetworkError(u"表单上传失败!") if r.headers['content-type'].lower() == "application/json": try: result = json.loads(r.content) except Exception as e: Logging.error(u"JSON解析失败!") Logging.debug(e) Logging.debug(r.content) result = {} if result["r"] == 0: Logging.success(u"登录成功!") return {"result": True} elif result["r"] == 1: Logging.success(u"登录失败!") return {"error": {"code": int(result['errcode']), "message": result['msg'], "data": result['data']}} else: Logging.warn(u"表单上传出现未知错误: \n \t %s )" % (str(result))) return {"error": {"code": -1, "message": u"unknow error"}} else: Logging.warn(u"无法解析服务器的响应内容: \n \t %s " % r.text) return {"error": {"code": -2, "message": u"parse error"}}