def getComment(self, articleId, maxCommentCount): url = self.commentUrl + articleId + '/app/comments/newList' payload = dict() headers = dict() headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)' Limit = 10 payload['format'] = 'building' payload['ibc'] = 'newsappios' payload['headLimit'] = 2 payload['tailLimit'] = 3 payload['showLevelThreshold'] = 5 payload['limit'] = Limit offset = 0 commentList = list() retryTime = 0 maxRetryTime = 5 while offset < maxCommentCount: payload['offset'] = offset #print(url) try: r = requests.get(url, params=payload, headers=headers, timeout = 3) r = r.json() #except requests.exceptions.ConnectTimeout: #traceback.print_exc() #break except requests.exceptions.Timeout: offset += (Limit + payload['headLimit'] + payload['tailLimit']) traceback.print_exc() except: traceback.print_exc() break else: if 'comments' not in r.keys(): continue commentDict = r['comments'] count = len(commentDict) #print('offset is ' + str(offset)) #print('count is ' + str(count)) if count == 0: retryTime += 1 if retryTime >= maxRetryTime: break for id,comment in commentDict.items(): if 'createTime' in comment.keys(): publishTime = comment['createTime'] else: publishTime = '0000-00-00 00:00:00' if 'nickName' in comment['user'].keys() and comment['user']['nickname']!=None: userName = utils.formatContent(comment['user']['nickname']) userName = utils.formatComment(userName) else: userName = '******' content = utils.formatContent(comment['content']) content = utils.formatComment(content) content1 = publishTime + ' ' + userName + ' ' + content commentList.append(content1) offset += count return commentList
def getComment(self, articleId, maxCommentCount): url = self.commentUrl payload = dict() headers = dict() headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)' payload['limit'] = 20 payload['page'] = 1 payload['comments_url'] = articleId commentList1 = list() offset = 0 retryTime = 0 maxRetryTime = 5 while offset < maxCommentCount: try: r = requests.get(url, params=payload, headers=headers, timeout=3) r = r.json() commentList = r['data'] except requests.exceptions.ConnectTimeout: traceback.print_exc() break except: payload['page'] = payload['page'] + 1 traceback.print_exc() else: payload['page'] = payload['page'] + 1 count = len(commentList) #print('offset is ' + str(offset)) #print('count is ' + str(count)) if count == 0: retryTime += 1 if retryTime >= maxRetryTime: break for comment in commentList: if 'add_time' in comment['data'].keys(): publishTime = int(comment['data']['add_time']) publishTime = time.localtime(publishTime) publishTime = time.strftime("%Y-%m-%d %H:%M:%S", publishTime) else: publishTime = '0000-00-00 00:00:00' if 'nickname' in comment.keys( ) and comment['nickname'] != None: userName = utils.formatContent(comment['nickname']) userName = utils.formatComment(userName) else: userName = '******' content = utils.formatContent( comment['data']['comment_contents']) content = utils.formatComment(content) content1 = publishTime + ' ' + userName + ' ' + content commentList1.append(content1) offset += count return commentList1
def getComment(self, articleId, maxCommentCount): url = self.commentUrl payload = dict() headers = dict() headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)' payload['busiCode'] = 2 payload['id'] = articleId payload['page'] = 1 payload['rollType'] = 2 payload['size'] = 10 payload['type'] = 3 commentList1 = list() offset = 0 while offset < maxCommentCount: try: r = requests.get(url, params=payload, headers=headers, timeout=3) r = r.json() except requests.exceptions.ConnectTimeout: traceback.print_exc() break except: payload['page'] = payload['page'] + 1 traceback.print_exc() else: payload['page'] = payload['page'] + 1 commentList = r['response']['commentList'] count = len(commentList) #print('offset is ' + str(offset)) #print('count is ' + str(count)) if count == 0: break for comment in commentList: if 'ctime' in comment.keys(): publishTime = comment['ctime'] publishTime = int(publishTime) / 1000 publishTime = time.localtime(publishTime) publishTime = time.strftime("%Y-%m-%d %H:%M:%S", publishTime) else: publishTime = '0000-00-00 00:00:00' if 'author' in comment.keys( ) and comment['author'] != None: userName = utils.formatContent(comment['author']) userName = utils.formatComment(userName) else: userName = '******' content = utils.formatContent(comment['content']) content = utils.formatComment(content) content1 = publishTime + ' ' + userName + ' ' + content commentList1.append(content1) offset += count return commentList1
def getArticle(self, db, articleId, source): url = self.articleUrl headers = dict() payload = dict() headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)' payload['id'] = articleId try: r = requests.get(url, headers=headers, params=payload, timeout=3) r = r.json() except: traceback.print_exc() return None else: if 'title' not in r.keys(): return None title = r['title'] if title == None: return None webUrl = r['url'] webUrl = utils.formatUrl(webUrl) if utils.checkVisited(webUrl, db): return -1 content = r['content'] content = utils.formatContent(content) appUrl = url publishTime = r['pubtime'] news = utils.News(title, appUrl, webUrl, content, publishTime, source) return news
def getArticle( self, db, articleId, source): #print('getArticle') url = self.articleUrl + articleId + '/full.html' #print(url) headers = dict() headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)' try: r = requests.get(url, headers=headers, timeout = 3) r = r.json() except: traceback.print_exc() return None else: r = r[articleId] if 'title' not in r.keys(): return None title = r['title'] if title == None: return None webUrl = r['shareLink'] webUrl = utils.formatUrl(webUrl) if utils.checkVisited(webUrl, db): return -1 content = r['body'] content = utils.formatContent(content) appUrl = url commentCount = r['replyCount'] upCount = r['threadVote'] downCount = r['threadAgainst'] publishTime = r['ptime'] news = utils.News(title, appUrl, webUrl, content, publishTime, source) news.commentCount = commentCount news.upCount = upCount news.downCount = downCount return news
def getComment(self, articleId, maxCommentCount): url = self.commentUrl payload = dict() headers = dict() headers['User-Agent'] = 'NewsApp/29.1 iOS/11.0.3 (iPhone8,1)' payload['itemid'] = articleId payload['page'] = 1 payload['prepare'] = 20 payload['app'] = 'news' commentList1 = list() offset = 0 while offset < maxCommentCount: try: r = requests.get(url, params=payload, headers=headers, timeout=3) r = r.json() commentList = r['data']['content'] except requests.exceptions.ConnectTimeout: traceback.print_exc() break except: payload['page'] = payload['page'] + 1 traceback.print_exc() else: payload['page'] = payload['page'] + 1 count = len(commentList) if count == 0: break for key, comment in commentList.items(): if 'dateline' in comment.keys(): publishTime = comment['dateline'] publishTime = time.localtime(int(publishTime)) publishTime = time.strftime("%Y-%m-%d %H:%M:%S", publishTime) else: publishTime = '0000-00-00 00:00:00' if 'author' in comment.keys( ) and comment['author'] != None: userName = utils.formatComment(comment['author']) userName = utils.formatComment(userName) else: userName = '******' content = utils.formatContent(comment['message']) content = utils.formatComment(content) content1 = publishTime + ' ' + userName + ' ' + content commentList1.append(content1) offset += count return commentList1