def illustMulti(self, data): """ 下载filter判定的多图id :param data: 作品数据 :return: """ pageCount = data["pageCount"] original = data["original"] path_ = data["path"] # original = "https://i.pixiv.cat/img-original/img/2020/01/20/04/13/16/78997178_p0.png" # 正序查找,获取"."前面数字的索引 | ['i', 'v', '0'] 取最后一个 # n记录original中变化页数的索引 n = [i - 1 for i in range(len(original) - 1) if original[i] == "."][-1] # 倒序切分1次,以p0的0进行切分 # end = original.rsplit(original[n],1) for i in range(0, int(pageCount)): # 用join方法将页数合成进新的url # new_original = "{}".join(end).format(i) new_original = original[::-1].replace(original[n], str(i), 1)[::-1] name = "{}-{}.{}".format(data["pid"], i, new_original.split(".")[-1]) illustPath = os.path.join(path_, name) if os.path.exists( illustPath) == True and os.path.getsize(illustPath) > 1000: # log_str("{}已存在".format(name)) pass else: c = self.baseRequest(options={"url": new_original}).content size = self.downSomething(illustPath, c) log_str("{}下载成功! 大小:{}".format(name, self.size2Mb(size)))
def update_illust(self, u, table="pixiv"): """ 更新作品数据,主要是浏览数,收藏数,评论数,喜欢数,path :params u:作品数据 :parmas table: 操作数据表 :return: True/False 更新11个字段 tag,pageCount,illustType,is_r18,score,illust_level, viewCount,bookmarkCount,likeCount,commentCount,path """ conn,cur = self.get_conn() # 更新sql sql = """UPDATE {} """.format(table) + """SET tag=%s,pageCount=%s,\ illustType=%s,is_r18=%s,score=%s,illust_level=%s,viewCount=%s,\ bookmarkCount=%s,likeCount=%s,commentCount=%s,path=%s WHERE pid=%s""" # 更新数据 data = ( u["tag"],u["pageCount"],u["illustType"],u["is_r18"],u["score"],u["illust_level"], u["viewCount"],u["bookmarkCount"],u["likeCount"],u["commentCount"],u["path"],u["pid"] ) try: cur.execute(sql,data) conn.commit() except Exception as e: log_str(TEMP_MSG["DB_UPDATE_ILLUST_ERROR_INFO"].format(self.class_name,u["pid"],e)) log_str(u) conn.rollback() return False else: return True finally: cur.close() conn.close()
def insert_illust(self, u, table="pixiv"): """ :params u 数据 :parmas table: 操作数据表 :return: True/False """ conn,cur = self.get_conn() sql = '''INSERT INTO {} '''.format(table) + '''(uid,userName,pid,purl,title,tag,pageCount,\ illustType,is_r18,score,illust_level,viewCount,bookmarkCount,likeCount,\ commentCount,urls,original,path) VALUES(%s,%s,%s,%s,\ %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''' data = ( u["uid"],u["userName"],u["pid"],u["purl"],u["title"],u["tag"], u["pageCount"],u["illustType"],u["is_r18"],u["score"],u["illust_level"], u["viewCount"],u["bookmarkCount"],u["likeCount"],u["commentCount"], u["urls"],u["original"],u["path"] ) try: cur.execute(sql,data) conn.commit() except Exception as e: log_str("{} | {}".format(e,u)) conn.rollback() return False else: return True finally: cur.close() conn.close()
def select_illust(self, pid, table="pixiv"): """ 查询作品数据,对接API接口方法 :params pid:作品pid :parmas table: 操作数据表 :return : """ conn,cur = self.get_conn() sql = """SELECT * FROM {} """.format(table) + """WHERE pid=%s""" data = (pid,) try: cur.execute(sql,data) except Exception as e: log_str(e) return else: r = cur.fetchall() if len(r) != 0: # API处增加[0]下标 # res = r[0] return r else: return finally: cur.close() conn.close()
def illustGif(self, data): """ 下载filter判定的动图图id :param data: 作品数据 :return: """ path_ = data["path"] # 动图info url zipInfoUrl = self.zip_url.format(data["pid"]) zip_name = "{}.zip".format(data["pid"]) zip_path = os.path.join(path_, zip_name) # 存储需要合成gif的图片列表 frames = [] name = "{}.gif".format(data["pid"]) illustPath = os.path.join(path_, name) if os.path.exists( illustPath) == True and os.path.getsize(illustPath) > 1000: # log_str("{}已存在".format(name)) pass else: z_info = self.baseRequest(options={"url": zipInfoUrl}) if z_info == None: return None z = json.loads(z_info.text) zip_url = z["body"]["originalSrc"] # item["delay"]为对应图片停留间隔,单位毫秒 delay = [item["delay"] / 1000 for item in z["body"]["frames"]] # 下载zip zip_resp = self.baseRequest(options={"url": zip_url}) if zip_resp == None: return None with open(zip_path, "ab") as f1: f1.write(zip_resp.content) # 解压zip with zipfile.ZipFile(zip_path, "r") as f2: for file in f2.namelist(): f2.extract(file, path_) # 删除zip os.remove(zip_path) # 扫描解压出来的图片 files = [os.path.join(path_, i) for i in os.listdir(path_)] # 添加图片到待合成列表 for i in range(1, len(files)): frames.append(imageio.imread(files[i])) # 合成gif imageio.mimsave(illustPath, frames, duration=delay) # 下载成功 size = os.path.getsize(illustPath) log_str(TEMP_MSG["DM_DOWNLOAD_SUCCESS_INFO"].format( self.class_name, name, self.size2Mb(size))) # 删除解压出来的图片 for j in files: os.remove(os.path.join(path_, j)) time.sleep(1)
def get_page_bookmark(self, offset): """ 根据offset和limit获取收藏插画的pid :params offset: 偏移量 :return :对应offset和limit的pid列表,int类型 """ params = { "tag":"", "offset":offset, "limit":100, "rest":"show", } try: r = json.loads(self.base_request({"url":self.bookmark_url},params=params).text) except Exception as e: # 网络请求出错 log_str(BOOKMARK_PAGE_ERROR_INFO.format(self.class_name)) return None else: # 未登录 if r["message"] == UNLOGIN_TEXT: log_str(UNLOGIN_INFO.format(self.class_name)) return UL_TEXT res = r["body"]["works"] illusts_pid = [int(i["illustId"]) for i in res] return illusts_pid
def check_update(self): """ 检查是否更新,获取收藏第一页前十个插画的id 更新机制: 获取最新收藏的10条插画id,与数据库中的记录进行比对 若最新收藏的10条插画id有一条在数据库中,则跳过;若不在则更新 实际上是以最快10分钟内收藏10条新作品这个标准作为界限 """ # 数据库开关若关闭,直接更新 if hasattr(self.db,"pool") == False: log_str(UPDATE_INFO.format(self.class_name)) return True res = self.get_page_bookmark(0) if res == UL_TEXT: log_str(UPDATE_CHECK_ERROR_INFO.format(self.class_name)) return False if res == None: log_str(UPDATE_CHECK_ERROR_INFO.format(self.class_name)) return false # 验证前十张 for pid in res[:10]: if self.db.check_illust(pid,table="bookmark")[0] == False: log_str(UPDATE_INFO.format(self.class_name)) return True else: log_str(UPDATE_CANLE_INFO.format(self.class_name)) return False
def get_cookie(self): ''' 配置selenium以访问站点,持久化cookie ''' log_str(GET_COOKIE_NOW_INFO.format(self.class_name)) chrome_options = webdriver.ChromeOptions() # 静默模式可能会导致获取不了cookie # chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--start-maximized') # 取消警告语 chrome_options.add_experimental_option('useAutomationExtension', False) chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) # 用户目录配置 chrome_options.add_argument('user-data-dir=' + PRO_DIR) try: driver = webdriver.Chrome(chrome_options=chrome_options) except InvalidArgumentException as e: log_str(GET_COOKIE_NOW_INFO.format(self.class_name)) exit() else: driver.get(self.host_url) cookies = driver.get_cookies() driver.quit() with open(COOKIE_NAME, "w") as fp: json.dump(cookies, fp) for _ in cookies: self.cookie.set(_['name'], _['value'])
def delete_user_illust(self, key="uid", value=None, table="pixiv"): """ 删除指定user的所有/单条作品记录 :params key: 用于判断的key,默认为uid :params value: 用于判断的值 :params table: 指定数据表,默认为pixiv :return: 默认None,异常则False """ if value == None: return False conn,cur = self.get_conn() sql = """DELETE FROM {} WHERE {} = %s""".format(table,str(key)) data = (value,) try: cur.execute(sql,data) conn.commit() except Exception as e: log_str("{} | {}".format(e,(key,value))) conn.rollback() return False else: return True finally: cur.close() conn.close()
def get_cookie(self): ''' 配置selenium以访问站点,持久化cookie ''' log_str(TEMP_MSG["GET_COOKIE_NOW_INFO"].format(self.class_name)) chrome_options = webdriver.ChromeOptions() # 静默模式可能会导致获取不了cookie # chrome_options.add_argument('--headless') chrome_options.add_argument('--no-sandbox') chrome_options.add_argument('--start-maximized') # 取消警告语 chrome_options.add_experimental_option('useAutomationExtension', False) chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) # 用户目录配置 chrome_options.add_argument('user-data-dir=' + PRO_DIR) try: driver = webdriver.Chrome(chrome_options=chrome_options) # selenium.common.exceptions.WebDriverException: # Message: unknown error: cannot create default profile directory # PRO_DIR错误 except InvalidArgumentException as e: log_str(TEMP_MSG["GET_COOKIE_NOW_INFO"].format(self.class_name)) exit() else: driver.get(self.host_url) cookies = driver.get_cookies() driver.quit() with open(COOKIE_NAME, "w") as fp: json.dump(cookies, fp) # for _ in cookies: # self.cookie.set(_['name'], _['value']) self.set_cookie()
def check(self): """ 用于在启动多进程前,获取并校验cookie和uid的获取 """ log_str(TEMP_MSG["GET_COOKIE_INFO"].format(self.class_name)) # 检查是否能支持用户自定义cookie if self.isExists_UserCookie: try: self.str2CookieJar() except Exception as e: log_str(e) log_str(TEMP_MSG["CONVERT_COOKIEJAR_ERROR_INFO"].format( self.class_name)) exit() # 检查是否能通过selenium/本地cookie文件获取 else: self.get_cookie( ) if COOKIE_UPDATE_ENABLED == True else self.set_cookie() if self.cookie_list == []: log_str(TEMP_MSG["LOGIN_ERROR_INFO"].format(self.class_name)) exit() # 检查是否能获取user_id if self.flag: self.user_id = self.get_user_id() log_str(TEMP_MSG["INIT_INFO"].format(self.class_name))
def insert_illust(self,u,table="pixiv"): """ data格式:{key:value,...} :params datas 数据 出现mysql 1366报错,按照https://blog.csdn.net/qq_31122833/article/details/83992085解决 """ conn,cur = self.get_conn() sql = '''INSERT INTO {} '''.format(table) + '''(uid,userName,pid,purl,title,tag,pageCount,\ illustType,is_r18,viewCount,bookmarkCount,likeCount,\ commentCount,urls,original,path) VALUES(%s,%s,%s,%s,\ %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''' data = ( u["uid"],u["userName"],u["pid"],u["purl"],u["title"],u["tag"], u["pageCount"],u["illustType"],u["is_r18"],u["viewCount"], u["bookmarkCount"],u["likeCount"],u["commentCount"],u["urls"], u["original"],u["path"] ) try: cur.execute(sql,data) # if len(data) == 1: # cur.execute(sql,data) # else: # cur.executemany(sql,data) conn.commit() except Exception as e: print(e) log_str(u) conn.rollback() return False else: return True finally: cur.close() conn.close()
def get_page_bookmark(self, offset): """ 根据offset和limit获取收藏插画的pid :params offset: 偏移量 :return :对应offset和limit的pid列表,int类型 """ params = { "tag": "", "offset": offset, "limit": 100, "rest": "show", } try: r = json.loads( self.base_request({ "url": self.bookmark_url }, params=params).text) res = r["body"]["works"] total = r["body"]["total"] illusts_pid = [int(i["illustId"]) for i in res] except Exception as e: log_str("bookmark获取收藏出错: 第{}-{}张失败".format(offset, offset + 100)) return None, None else: return illusts_pid, total
def updata_illust(self,u,table="pixiv"): """ 更新作品数据,主要是浏览数,收藏数,评论数,喜欢数 :params u:作品数据 :return : 主要更新 viewCount bookmarkCount commentCount likeCount """ conn,cur = self.get_conn() # 更新sql # 更新前 72 32 23 0 81265370 # 更新后 1 1 1 1 81265370 # 快速查询 SELECT viewCount,bookmarkCount,likeCount,commentCount,pid FROM pixiv WHERE id=53312; sql = """UPDATE {} """.format(table) + """SET viewCount=%s,\ bookmarkCount=%s,likeCount=%s,commentCount=%s WHERE pid=%s""" # 更新数据 data = ( u["viewCount"],u["bookmarkCount"],u["likeCount"],u["commentCount"],u["pid"] ) try: cur.execute(sql,data) conn.commit() except Exception as e: log_str("更新作品:{} 出错,{}".format(u["pid"],e)) conn.rollback() return False else: return True finally: cur.close() conn.close()
def get_users(self): """ :return: 所有关注画师的uid,userName,latest_id(最新的pid) :[{"uid":uid,"userName":userName,"latest_id":latest_id},...] """ offset = 0 users_info_list = [] while True: u_list = self.get_page_users(offset) if u_list == []: continue for u in u_list: user_info = {} user_info["uid"] = int(u["userId"]) user_info["userName"] = u["userName"] if u["illusts"] == []: user_info["latest_id"] = -1 log_str("{}无作品...".format(u["userId"])) # 无作品不做动作 continue else: user_info["latest_id"] = int(u["illusts"][0]["illustId"]) users_info_list.append(user_info) if 0 < len(u_list) < 100: break offset += 100 return users_info_list
def updata_illust(self, u, table="pixiv"): """ 更新作品数据,主要是浏览数,收藏数,评论数,喜欢数,path :params u:作品数据 :parmas table: 操作数据表 :return : 主要更新 viewCount bookmarkCount commentCount likeCount """ conn, cur = self.get_conn() # 更新sql sql = """UPDATE {} """.format(table) + """SET viewCount=%s,\ bookmarkCount=%s,likeCount=%s,commentCount=%s,path=%s WHERE pid=%s""" # 更新数据 data = (u["viewCount"], u["bookmarkCount"], u["likeCount"], u["commentCount"], u["path"], u["pid"]) try: cur.execute(sql, data) conn.commit() except Exception as e: log_str( DB_UPDATE_ILLUST_ERROR_INF.format(self.__class__.__name__, u["pid"], e)) conn.rollback() return False else: return True finally: cur.close() conn.close()
def __init__(self,thread_num=8): log_str("数据库连接池实例化...") try: self.pool = PooledDB( pymysql,thread_num,host=DB_HOST,user=DB_USER, passwd=DB_PASSWD,db=DB_DATABASE,port=DB_PORT,charset=DB_CHARSET) # 5为连接池里的最少连接数 except pymysql.err.OperationalError as e: log_str("请确保Mysql在运行/配置好\n{}".format(e)) exit()
def get_user_id(self): resp = requests.get(self.host_url, headers=headers, cookies=random.choice(self.cookie_list)).text if "Please turn JavaScript on and reload the page." in resp: log_str(TEMP_MSG["GOOGLE_CAPTCHA_ERROR_INFO"].format( self.class_name)) exit() user_id = re.findall(r'''.*?,user_id:"(.*?)",.*?''', resp.replace(" ", ""))[0] return user_id
def get_users(self): """ :return: 所有关注画师的uid,userName,latest_id(最新的pid) :[{"uid":uid,"userName":userName,"latest_id":latest_id},...] """ offset = 0 users_info_list = [] err_count = 0 err_limit = 10 for rest in self.rest_list: while True: u_list = self.get_page_users(offset, rest=rest) # 网络请求出错 if u_list == None: # 累计10次网络错误 if err_count < err_limit: offset += 100 err_count += 1 continue else: break # 未登录 if u_list == TEMP_MSG["UL_TEXT"]: users_info_list = TEMP_MSG["UL_TEXT"] break # 获取所有关注完毕 if u_list == []: break for u in u_list: user_info = {} user_info["uid"] = int(u["userId"]) # userName = re.sub('[\\\/:*?"<>|]','_',u["userName"]) userName = re.sub(r'[\s\/:*?"<>|\\]', '_', u["userName"]) user_info["userName"] = userName if u["illusts"] == []: user_info["latest_id"] = -1 log_str(TEMP_MSG["FOLLOW_NO_ILLUSTS_INFO"].format( self.class_name, u["userName"], u["userId"])) # 无作品不做动作 # continue else: user_info["latest_id"] = int(u["illusts"][0]["id"]) users_info_list.append(user_info) offset += 100 return users_info_list
def get_illust_level(self, score, bookmarkCount): """ 根据score及bookmarkCount确认作品评分等级,默认为R :params score: 得分,bookmarkCount/viewCount -> float :params bookmarkCount: 收藏数 -> int :return: 'R','SR','SSR','UR'其中一个,不满足则默认R -> str """ # 判断评分等级 illust_level_list = ['R', 'SR', 'SSR', 'UR'] # 评分区间右侧边界值 illust_interval = {'R': 0.140, 'SR': 0.260, 'SSR': 0.325, 'UR': 1.000} illust_default_level = "R" illust_level = "" # === 针对浏览量大的热门作品 === # 从R中寻找SSR,不取边界值 if 20000 < bookmarkCount and score < illust_interval['R']: illust_level = illust_level_list[2] # 从SR中寻找UR,不取边界值 if 25000 < bookmarkCount and illust_interval[ 'R'] < score < illust_interval['SR']: illust_level = illust_level_list[3] # 上述规则满足则返回illust_level if illust_level: return illust_level # === 基本评分等级判定 === # R if 0 <= score < illust_interval['R']: illust_level = illust_level_list[0] # SR elif illust_interval['R'] <= score < illust_interval['SR']: illust_level = illust_level_list[1] # SSR elif illust_interval['SR'] <= score < illust_interval['SSR']: illust_level = illust_level_list[2] # UR elif illust_interval['SSR'] <= score <= illust_interval['UR']: illust_level = illust_level_list[3] # 6条规则都不满足则默认返回R if not illust_level: log_str("采取默认规则 score:{} bookmarkCount:{}".format( score, bookmarkCount)) return illust_default_level return illust_level # Downloader = Down()
def check_user(self, u): """ 数据库中画师记录的latest_id与接口返回的latest_id是否一致 相同 --> False,不需要更新或下载该画师的作品 判断pxusers表是否含有该画师uid的记录 无 --> sql_2 有 --> sql_3 :params u: 用户数据 :return: latest_id """ conn,cur = self.get_conn() # 查询画师记录sql sql_1 = "SELECT COUNT(uid) FROM pxusers WHERE uid=%s" # 插入画师记录sql sql_2 = '''INSERT INTO pxusers(uid,userName,latest_id,path) VALUES(%s,%s,%s,%s)''' # 查询latest_id sql sql_3 = "SELECT latest_id FROM pxusers WHERE uid=%s" uid = u["uid"] data = ( u["uid"],u["userName"],u["latest_id"],u["path"] ) # 确认数据库是否有该画师记录 cur.execute(sql_1,uid) res = cur.fetchall() e = res[0]["COUNT(uid)"] # log_str("查询结果 :{}".format(e)) if e >= 1: # 返回数据库中查询的latest_id cur.execute(sql_3,uid) d = cur.fetchall()[0] latest_id = d["latest_id"] return latest_id else: try: cur.execute(sql_2,data) conn.commit() except Exception as e: log_str(e) conn.rollback() # 默认全更新 return u["latest_id"] else: return u["latest_id"] finally: cur.close() conn.close()
def thread_by_illust(self, *args): pid = args[0] isExists, path = self.db.check_illust(pid, table="bookmark") if path == None: # 会根据每次请求的收藏数来进行判断是否下载 try: info = Downloader.get_illust_info(pid, extra="bookmark") except Exception as e: log_str("{}请求错误:{}".format(pid, e)) return if info == None: log_str("该作品{}已被删除,或作品ID不存在.".format(pid)) return if isExists == False: # 数据库无该记录 res = self.db.insert_illust(info, table="bookmark") if res == False: log_str("插入{}失败".format(pid)) else: log_str("插入{}成功".format(pid)) else: # 更新记录 self.db.updata_illust(info)
def set_cookie(self): ''' 读取并返回cookie ''' try: with open(COOKIE_NAME, "r", encoding="utf8") as fp: # readlines(),读取之后,文件指针会在文件末尾,再执行只会读到空[] if fp.readlines() == []: log_str(COOKIE_EMPTY_INFO.format(self.class_name)) exit() fp.seek(0) cookies = json.load(fp) for cookie in cookies: self.cookie.set(cookie['name'], cookie['value']) except FileNotFoundError as e: log_str(FILE_NOT_FOUND_INFO_1.format(self.class_name)) log_str(FILE_NOT_FOUND_INFO_2.format(self.class_name)) log_str(e) exit() # 获取user_id if self.flag == True: self.user_id = self.get_user_id() else: self.user_id = USER_ID return self.cookie
def thread_by_illust(self, *args): pid = args[0] try: info = self.Downloader.get_illust_info(pid,extra="bookmark") except Exception as e: log_str(ILLUST_NETWORK_ERROR_INFO.format(self.class_name,pid,e)) return if info == None: log_str(ILLUST_EMPTY_INFO.format(self.class_name,pid)) return # 数据库开关关闭 if hasattr(self.db,"pool") == False: return isExists,path = self.db.check_illust(pid,table="bookmark") # 数据库无该记录 if isExists == False: res = self.db.insert_illust(info,table="bookmark") if res == False: log_str(INSERT_FAIL_INFO.format(self.class_name,pid)) else: log_str(INSERT_SUCCESS_INFO.format(self.class_name,pid)) else: self.db.updata_illust(info,table="bookmark")
def check_illust(self, value, key="pid", table="pixiv", database=None): """ 查询数据库中是否有该id的作品,table为非pixiv,bookmark时采用通用sql :parmas key: 对应字段名 :parmas value: 对应记录值 :parmas table: 数据表 :return: (True,path)/(False,"") Result--fetchall获取的原始数据 data in db: [{'COUNT(1)': 1, 'path': 'None'}] data not in db: () """ conn, cur = self.get_conn() if key == "": return False, "" if value == "": return False, "" # 切换数据库 if database != None: conn.select_db(database) # 查询id sql if table in ["pixiv", "bookmark"]: # path为下载地址,不存在该记录时为None sql = """SELECT COUNT(1),path FROM {} """.format( table) + """WHERE {}=%s GROUP BY path""".format(key) else: sql = """SELECT COUNT(1) FROM {} """.format( table) + """WHERE {}=%s""".format(key) # log_str(sql) data = (value) try: cur.execute(sql, data) except Exception as e: log_str("{}:check_illust | {}".format(self.class_name, e)) return False, "" else: # 未使用GROUP BY path,非严格模式报1140 # 使用GROUP BY path,不存在对应pid记录时,fetchall结果为() d = cur.fetchall() if d != () and d[0]["COUNT(1)"] >= 1: return True, d[0].get("path", "") else: return False, "" finally: cur.close() conn.close()
def baseRequest(self, options, data=None, params=None, retry_num=5): ''' :params options 请求参数 {"method":"get/post","url":"example.com"} :params data :params params :params retry_num 重试次数 :return response对象/False 如果options中有定义了headers参数,则使用定义的;否则使用init中初始化的headers 下面这行列表推导式作用在于: 添加referer时,referer需要是上一个页面的url,比如:画师/作品页面的url时,则可以自定义请求头 demo如下: demo_headers = headers.copy() demo_headers['referer'] = 'www.example.com' options ={ "method":"get", "url":"origin_url", "headers":demo_headers } baseRequest(options=options) 这样baseRequest中使用的headers则是定制化的headers,而非init中初始化的默认headers了 ''' # log_str(options["url"]) base_headers = [ options["headers"] if "headers" in options.keys() else self.headers ][0] try: # if options["method"].lower() == "get": # 网络请求函数get、post请求,暂时不判断method字段,待后续更新 response = self.se.get( options["url"], data=data, params=params, cookies=self.jar, headers=base_headers, verify=False, timeout=10, ) return response except Exception as e: if retry_num > 0: return self.baseRequest(options, data, params, retry_num - 1) else: log_str( DM_NETWORK_ERROR_INFO.format(self.class_name, options["url"], e))
def run(self): log_str(VERSION_INFO) # client更新cookie client.check() if PIXIV_CRAWLER_ENABLED: pixiv_crawler = Process(target=self.scheduler_crawler) pixiv_crawler.start() if PIXIV_BOOKMARK_ENABLED: pixiv_bookmark = Process(target=self.scheduler_bookmark) pixiv_bookmark.start() if PIXIV_API_ENABLED and DB_ENABLE: pixiv_api = Process(target=self.scheduler_api) pixiv_api.start()
def __init__(self, thread_num=8): if DB_ENABLE == False: return log_str(DB_INST) try: self.pool = PooledDB(pymysql, thread_num, host=DB_HOST, user=DB_USER, passwd=DB_PASSWD, db=DB_DATABASE, port=DB_PORT, charset=DB_CHARSET) # 5为连接池里的最少连接数 except pymysql.err.OperationalError as e: log_str(DB_CONNECT_ERROR_INFO.format(e)) exit()
def get_user_illust(self, u): """ :params u: 画师信息--字典 :return user_illust_list: 画师信息包括:uid,userName,latest_id,path """ u["path"] = self.file_manager.mkdir_painter(u) illust_url = self.all_illust_url.format(u["uid"]) try: u_json = json.loads(self.base_request({"url":illust_url}).text)["body"] i = u_json["illusts"] m = u_json["manga"] # 列表推导式合并取keys,转为list user_illust_list = list([dict(i) if len(m) == 0 else dict(i,**m)][0].keys()) except Exception as e: log_str(FOLLOW_DATA_ERROR_INFO.format(self.class_name,e)) return [] else: return user_illust_list
def get_users(self): """ :return: 所有关注画师的uid,userName,latest_id(最新的pid) :[{"uid":uid,"userName":userName,"latest_id":latest_id},...] """ offset = 0 users_info_list = [] while True: u_list = self.get_page_users(offset) # 网络请求出错 if u_list == None: continue # 未登录 if u_list == UL_TEXT: break # 获取所有关注完毕 if u_list == []: break for u in u_list: user_info = {} user_info["uid"] = int(u["userId"]) # userName = re.sub('[\\\/:*?"<>|]','_',u["userName"]) userName = re.sub(r'[\s\/:*?"<>|\\]','_',u["userName"]) user_info["userName"] = userName if u["illusts"] == []: user_info["latest_id"] = -1 log_str(FOLLOW_NO_ILLUSTS_INFO.format(self.class_name,u["userName"],u["userId"])) # 无作品不做动作 continue else: user_info["latest_id"] = int(u["illusts"][0]["illustId"]) users_info_list.append(user_info) offset += 100 return users_info_list