def load_data(self, data): row = ACcommentsPO() row.set_cid(int(data[0])) row.set_content(data[1]) row.set_user_name(data[2]) row.set_layer(int(data[3])) row.set_acid(int(data[4])) row.set_delete(int(data[5])) row.set_siji(int(data[6])) row.set_check_time(data[7]) return row
def analyse(self, src): #初始化一个row,不然极端情况下程序会崩溃 row = [] #保存一篇投稿的评论 strACid = int(src.get_id()) acid = strACid #番剧的id小于0 if acid > 0: url = "http://www.acfun.tv/comment_list_json.aspx?contentId=" + str( acid) + "¤tPage=1" else: url = 'http://www.acfun.tv/comment/bangumi/web/list?bangumiId=' + str( -acid) + '&pageNo=1' jsonContent = self.sendGet(url) if not self.checkURL(jsonContent): logging.warning("connect acfun comments fail") return try: j_obj = json.loads(jsonContent) except Exception: logging.warning("get acfun comments fail") return #番剧的id小于0 try: if acid > 0: json_data = j_obj["commentContentArr"] else: json_data = j_obj['data']["commentContentArr"] except: logging.error("commentContentArr is not exist") return #偶尔会出现找不到commentContentArr的情况 try: #开始解析json评论 for m, n in enumerate(json_data): comment = ACcommentsPO() #保存一条评论的内容 comment.set_acid(int(acid)) #抓取投稿编号 comment.set_cid(int(json_data[n]["cid"])) #抓取评论cid comment.set_content(json_data[n]["content"]) #抓取评论内容 comment.set_user_name(json_data[n]["userName"]) #抓取评论人用户名 comment.set_layer(int(json_data[n]["count"])) #抓取该评论楼层数 userID = int(json_data[n]["userID"]) #抓取评论人用户ID #司机判断 self.checkSIJI(comment) #删除判断 self.checkDelete(comment, userID) #时间戳 comment.set_check_time(str(datetime.datetime.now())) #数据下盘时间需要商量一下 row.append(comment) #不能浪费太多时间在拥有超大评论量的投稿上 if m > 3000: logging.error("over 3000, drop it.") break except Exception: logging.error("commentContentArr is not exist") return return row