def getPost(self: None, pageNumber: int, ajax: bool = True, useTemp: bool = True): # 获得html源文件函数 self.__workPageNumber = pageNumber link = self.__postLink + str(pageNumber) existTemp = self.__tempSave.getSameTemp() if existTemp.get('html') and useTemp: for i in existTemp['html']: if int(i[1]) == int(pageNumber): Avalon.debug_info('第%d页已经在临时文件中存在,跳过' % pageNumber) return self.__tempSave.readFileByID(i) if ajax is False: link = link.replace('ajax=1&', '') for tryTimes in range(1, 11): try: postRequest = request.Request(link) try: # 设置程序请求头,伪装爬虫(必要性存疑) postRequest.add_header('User-Agent', (random.choice( self.__userAgent)).replace('\n', '')) postRequest.add_header('Referer', 'https://tieba.baidu.com') except: continue else: postRead: bytes = request.urlopen(postRequest, timeout=5).read() if self.debug: Avalon.debug_info('链接:"%s"请求头:%s.' % (link, postRequest.headers)) # 错误处理 except error.URLError as e: Avalon.warning("获取帖子正文失败!原因:%s(%s/10)" % (str(e.reason), str(tryTimes))) except timeout as e: Avalon.warning("获取帖子正文失败!原因:%s(%s/10)" % (str(e), str(tryTimes))) except KeyboardInterrupt: Avalon.critical("用户强制退出") quit(1) except: Avalon.warning("获取帖子正文失败!原因:未知错误(%s/10)" % tryTimes) # 没有错误,结束循环 else: if self.debug: Avalon.debug_info('Link %s Get Successed.' % link) break else: Avalon.error('获取失败!') if self.debug: Avalon.debug('Link:%s' % link) quit(1) if useTemp is True: self.__tempSave.savePostRaw(postRead.decode(errors='ignore'), pageNumber=pageNumber) return (postRead.decode(errors='ignore'))
def __getUserName(self, userID: str): dbResult = self.__db.checkExistUsers(userID) if not dbResult: Avalon.debug( 'User ID: %s Can\'t Get Username,Will Use ID Instead.' % userID) userName = str(userID) else: userName = str(dbResult[1]) return userName
def methodPost(url: str, datasEncoded: bytes, headers: dict = {}, maxTryTimes: int = 10): encodedUrl = parse.urlparse(url=url).geturl() requsetMaker = request.Request(url=encodedUrl, headers=headers, data=datasEncoded) for tryTimes in range(maxTryTimes): try: dataPost = request.urlopen(url=requsetMaker, timeout=10).read() except error.URLError as e: Avalon.debug( 'POST "%s" Failed,reason:%s,Program will Try %d Times Later.' % (url, e.reason, maxTryTimes - tryTimes)) except timeout as e: Avalon.debug( 'POST "%s" Timeout,reason:%s,Program will Try %d Times Later.' % (url, e, maxTryTimes - tryTimes)) except: Avalon.debug( 'POST "%s" Timeout with unknown reason,Program will Try %d Times Later.' % (url, maxTryTimes - tryTimes)) else: break else: Avalon.error( 'POST "%s" Failed during all request,please check your network status.' % url) __quitCheck(1) return bytes(dataPost)