def run(self): log_str(TEMP_MSG["BEGIN_INFO"].format(self.class_name)) try: u_list = self.get_users() except Exception as e: log_str(e) log_str(TEMP_MSG["FOLLOW_ERROR_INFO"].format(self.class_name)) log_str(TEMP_MSG["SLEEP_INFO"].format(self.class_name)) return else: if u_list != []: log_str(TEMP_MSG["FOLLOW_SUCCESS_INFO"].format( self.class_name, len(u_list))) # 关注列表为空 elif u_list == []: log_str(TEMP_MSG["NO_FOLLOW_USERS"].format(self.class_name)) return # 未登录 elif u_list == TEMP_MSG["UL_TEXT"]: log_str(TEMP_MSG["UNLOGIN_INFO"].format(self.class_name)) exit() try: pool = ThreadPool(8) for i, u in enumerate(u_list): all_illust = self.get_user_illust(u) if hasattr(self.db, "pool"): latest_id = self.db.check_user(u) d_total = self.db.get_total(u) self.db.update_latest_id(u) else: latest_id, d_total = 0, 0 position = "({}/{})".format(i + 1, len(u_list)) if u["latest_id"] >= latest_id and d_total < len(all_illust): # 满足条件更新 log_str(TEMP_MSG["UPDATE_USER_INFO"].format( position, self.class_name, u["userName"], u["uid"], len(all_illust), u["latest_id"])) # if hasattr(self.db,"pool"): # self.db.update_latest_id(u) for pid in all_illust: pool.put(self.thread_by_illust, (pid, ), callback) time.sleep(5) else: log_str(TEMP_MSG["NOW_USER_INFO"].format( position, self.class_name, u["userName"], u["uid"], len(all_illust))) # 本次更新user无作品 if u["latest_id"] == -1: # 从数据库中删除所有符合u["uid"]的记录 result = self.db.delete_user_illust(key="uid", value=u["uid"]) if result: log_str( TEMP_MSG["DELELE_USER_ILLUST_SUCCESS_INFO"]. format(self.class_name, u["userName"], u["uid"])) else: log_str(TEMP_MSG["DELELE_USER_ILLUST_FAIL_INFO"]. format(self.class_name, u["userName"], u["uid"])) except Exception as e: log_str("Exception:{}".format(e)) pool.close() finally: pool.close() log_str(TEMP_MSG["SLEEP_INFO"].format(self.class_name)) # if __name__ == '__main__': # from config import USERS_CYCLE # c = Crawler() # while True: # c.run() # time.sleep(USERS_CYCLE)
def run(self): log_str(TEMP_MSG["BEGIN_INFO"].format(self.class_name)) # 更新机制判定 if self.check_update() == False: log_str(TEMP_MSG["SLEEP_INFO"].format(self.class_name)) return try: offset = 0 pool = ThreadPool(8) while True: # 累计更新小于5次,更新前800张,最多848张 if self.day_count < self.day_all_update_num: if offset > self.day_limit: log_str(TEMP_MSG["UPDATE_DAY_LIMIT_INFO"].format( self.class_name, self.day_limit, self.day_count)) break pid_list = self.get_page_bookmark(offset) # 获取异常返回None if pid_list == None: log_str(TEMP_MSG["BOOKMARK_PAGE_ERROR_INFO"].format( self.class_name, offset, offset + self.bookmark_page_offset)) continue # 未登录 if pid_list == TEMP_MSG["UL_TEXT"]: log_str(TEMP_MSG["UNLOGIN_INFO"].format(self.class_name)) break # 无收藏返回[] if pid_list == []: break log_str(TEMP_MSG["BOOKMARK_NOW_INFO"].format( self.class_name, offset, offset + self.bookmark_page_offset, len(pid_list))) for pid in pid_list: pool.put(self.thread_by_illust, (pid, ), callback) offset += self.bookmark_page_offset time.sleep(1) except Exception as e: log_str("Exception {}".format(e)) finally: # 累计等于5次,不触发更新限制机制,全更新完后恢复day_count if self.day_count == self.day_all_update_num: log_str(TEMP_MSG["UPDATE_DAY_ALL_INFO"].format( self.class_name)) self.day_count = 0 else: self.day_count += 1 pool.close() log_str(TEMP_MSG["SLEEP_INFO"].format(self.class_name)) log_str("=" * 48) # if __name__ == '__main__': # from config import BOOKMARK_CYCLE # b = Bookmark() # while True: # b.run() # time.sleep(BOOKMARK_CYCLE)