Ejemplo n.º 1
0
    def illustMulti(self, data):
        """
		下载filter判定的多图id
		:param data: 作品数据
		:return: 
		"""
        pageCount = data["pageCount"]
        original = data["original"]
        path_ = data["path"]

        # original = "https://i.pixiv.cat/img-original/img/2020/01/20/04/13/16/78997178_p0.png"
        # 正序查找,获取"."前面数字的索引 | ['i', 'v', '0'] 取最后一个
        # n记录original中变化页数的索引
        n = [i - 1 for i in range(len(original) - 1) if original[i] == "."][-1]

        # 倒序切分1次,以p0的0进行切分
        # end = original.rsplit(original[n],1)

        for i in range(0, int(pageCount)):
            # 用join方法将页数合成进新的url
            # new_original = "{}".join(end).format(i)
            new_original = original[::-1].replace(original[n], str(i), 1)[::-1]
            name = "{}-{}.{}".format(data["pid"], i,
                                     new_original.split(".")[-1])
            illustPath = os.path.join(path_, name)

            if os.path.exists(
                    illustPath) == True and os.path.getsize(illustPath) > 1000:
                # log_str("{}已存在".format(name))
                pass
            else:
                c = self.baseRequest(options={"url": new_original}).content
                size = self.downSomething(illustPath, c)
                log_str("{}下载成功! 大小:{}".format(name, self.size2Mb(size)))
Ejemplo n.º 2
0
	def update_illust(self, u, table="pixiv"):
		"""
		更新作品数据,主要是浏览数,收藏数,评论数,喜欢数,path
		:params u:作品数据
		:parmas table: 操作数据表
		:return: True/False
		更新11个字段 tag,pageCount,illustType,is_r18,score,illust_level,
				viewCount,bookmarkCount,likeCount,commentCount,path
		"""
		conn,cur = self.get_conn()

		# 更新sql
		sql = """UPDATE {} """.format(table) + """SET tag=%s,pageCount=%s,\
				illustType=%s,is_r18=%s,score=%s,illust_level=%s,viewCount=%s,\
				bookmarkCount=%s,likeCount=%s,commentCount=%s,path=%s WHERE pid=%s"""
		# 更新数据
		data = (
			u["tag"],u["pageCount"],u["illustType"],u["is_r18"],u["score"],u["illust_level"],
			u["viewCount"],u["bookmarkCount"],u["likeCount"],u["commentCount"],u["path"],u["pid"]
		)
		try:
			cur.execute(sql,data)
			conn.commit()
		except Exception as e:
			log_str(TEMP_MSG["DB_UPDATE_ILLUST_ERROR_INFO"].format(self.class_name,u["pid"],e))
			log_str(u)
			conn.rollback()
			return False
		else:
			return True
		finally:
			cur.close()
			conn.close()
Ejemplo n.º 3
0
	def insert_illust(self, u, table="pixiv"):
		"""
		:params u 数据
		:parmas table: 操作数据表
		:return: True/False
		"""
		conn,cur = self.get_conn()

		sql = '''INSERT INTO {} '''.format(table) + '''(uid,userName,pid,purl,title,tag,pageCount,\
						illustType,is_r18,score,illust_level,viewCount,bookmarkCount,likeCount,\
						commentCount,urls,original,path) VALUES(%s,%s,%s,%s,\
						%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
		data = (
			u["uid"],u["userName"],u["pid"],u["purl"],u["title"],u["tag"],
			u["pageCount"],u["illustType"],u["is_r18"],u["score"],u["illust_level"],
			u["viewCount"],u["bookmarkCount"],u["likeCount"],u["commentCount"],
			u["urls"],u["original"],u["path"]
				)
		try:
			cur.execute(sql,data)
			conn.commit()
		except Exception as e:
			log_str("{} | {}".format(e,u))
			conn.rollback()
			return False
		else:
			return True
		finally:
			cur.close()
			conn.close()
Ejemplo n.º 4
0
	def select_illust(self, pid, table="pixiv"):
		"""
		查询作品数据,对接API接口方法
		:params pid:作品pid
		:parmas table: 操作数据表
		:return :
		"""
		conn,cur = self.get_conn()
		sql = """SELECT * FROM {} """.format(table) + """WHERE pid=%s"""
		data = (pid,)
		try:
			cur.execute(sql,data)
		except Exception as e:
			log_str(e)
			return
		else:
			r = cur.fetchall()
			if len(r) != 0:
				# API处增加[0]下标
				# res = r[0]
				return r
			else:
				return
		finally:
			cur.close()
			conn.close()
Ejemplo n.º 5
0
    def illustGif(self, data):
        """
		下载filter判定的动图图id
		:param data: 作品数据
		:return: 
		"""
        path_ = data["path"]
        # 动图info url
        zipInfoUrl = self.zip_url.format(data["pid"])
        zip_name = "{}.zip".format(data["pid"])
        zip_path = os.path.join(path_, zip_name)
        # 存储需要合成gif的图片列表
        frames = []
        name = "{}.gif".format(data["pid"])
        illustPath = os.path.join(path_, name)

        if os.path.exists(
                illustPath) == True and os.path.getsize(illustPath) > 1000:
            # log_str("{}已存在".format(name))
            pass
        else:
            z_info = self.baseRequest(options={"url": zipInfoUrl})
            if z_info == None:
                return None

            z = json.loads(z_info.text)
            zip_url = z["body"]["originalSrc"]
            # item["delay"]为对应图片停留间隔,单位毫秒
            delay = [item["delay"] / 1000 for item in z["body"]["frames"]]

            # 下载zip
            zip_resp = self.baseRequest(options={"url": zip_url})
            if zip_resp == None:
                return None

            with open(zip_path, "ab") as f1:
                f1.write(zip_resp.content)

            # 解压zip
            with zipfile.ZipFile(zip_path, "r") as f2:
                for file in f2.namelist():
                    f2.extract(file, path_)

            # 删除zip
            os.remove(zip_path)
            # 扫描解压出来的图片
            files = [os.path.join(path_, i) for i in os.listdir(path_)]
            # 添加图片到待合成列表
            for i in range(1, len(files)):
                frames.append(imageio.imread(files[i]))
            # 合成gif
            imageio.mimsave(illustPath, frames, duration=delay)
            # 下载成功
            size = os.path.getsize(illustPath)
            log_str(TEMP_MSG["DM_DOWNLOAD_SUCCESS_INFO"].format(
                self.class_name, name, self.size2Mb(size)))
            # 删除解压出来的图片
            for j in files:
                os.remove(os.path.join(path_, j))
            time.sleep(1)
Ejemplo n.º 6
0
	def get_page_bookmark(self, offset):
		"""
		根据offset和limit获取收藏插画的pid
		:params offset: 偏移量
		:return :对应offset和limit的pid列表,int类型
		"""
		params = {
			"tag":"",
			"offset":offset,
			"limit":100,
			"rest":"show",			
		}
		try:
			r = json.loads(self.base_request({"url":self.bookmark_url},params=params).text)
		except Exception as e:
			# 网络请求出错
			log_str(BOOKMARK_PAGE_ERROR_INFO.format(self.class_name))
			return None
		else:
			# 未登录
			if r["message"] == UNLOGIN_TEXT:
				log_str(UNLOGIN_INFO.format(self.class_name))
				return UL_TEXT

			res = r["body"]["works"]
			illusts_pid = [int(i["illustId"]) for i in res]
			return illusts_pid
Ejemplo n.º 7
0
	def check_update(self):
		"""
		检查是否更新,获取收藏第一页前十个插画的id
		更新机制:
			获取最新收藏的10条插画id,与数据库中的记录进行比对
			若最新收藏的10条插画id有一条在数据库中,则跳过;若不在则更新
			实际上是以最快10分钟内收藏10条新作品这个标准作为界限
		"""

		# 数据库开关若关闭,直接更新
		if hasattr(self.db,"pool") == False:
			log_str(UPDATE_INFO.format(self.class_name))
			return True

		res = self.get_page_bookmark(0)

		if res == UL_TEXT:
			log_str(UPDATE_CHECK_ERROR_INFO.format(self.class_name))
			return False

		if res == None:
			log_str(UPDATE_CHECK_ERROR_INFO.format(self.class_name))
			return false
			
		# 验证前十张
		for pid in res[:10]:
			if self.db.check_illust(pid,table="bookmark")[0] == False:
				log_str(UPDATE_INFO.format(self.class_name))
				return True
		else:
			log_str(UPDATE_CANLE_INFO.format(self.class_name))
			return False
Ejemplo n.º 8
0
    def get_cookie(self):
        '''
		配置selenium以访问站点,持久化cookie
		'''
        log_str(GET_COOKIE_NOW_INFO.format(self.class_name))
        chrome_options = webdriver.ChromeOptions()
        # 静默模式可能会导致获取不了cookie
        # chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--start-maximized')
        # 取消警告语
        chrome_options.add_experimental_option('useAutomationExtension', False)
        chrome_options.add_experimental_option('excludeSwitches',
                                               ['enable-automation'])
        # 用户目录配置
        chrome_options.add_argument('user-data-dir=' + PRO_DIR)

        try:
            driver = webdriver.Chrome(chrome_options=chrome_options)
        except InvalidArgumentException as e:
            log_str(GET_COOKIE_NOW_INFO.format(self.class_name))
            exit()
        else:
            driver.get(self.host_url)
            cookies = driver.get_cookies()
            driver.quit()

            with open(COOKIE_NAME, "w") as fp:
                json.dump(cookies, fp)
                for _ in cookies:
                    self.cookie.set(_['name'], _['value'])
Ejemplo n.º 9
0
	def delete_user_illust(self, key="uid", value=None, table="pixiv"):
		"""
		删除指定user的所有/单条作品记录

		:params key: 用于判断的key,默认为uid
		:params value: 用于判断的值
		:params table: 指定数据表,默认为pixiv
		:return: 默认None,异常则False
		"""			
		if value == None:
			return False

		conn,cur = self.get_conn()
		sql = """DELETE FROM {} WHERE {} = %s""".format(table,str(key))

		data = (value,)
		try:
			cur.execute(sql,data)
			conn.commit()
		except Exception as e:
			log_str("{} | {}".format(e,(key,value)))
			conn.rollback()
			return False
		else:
			return True
		finally:
			cur.close()
			conn.close()
Ejemplo n.º 10
0
    def get_cookie(self):
        '''
		配置selenium以访问站点,持久化cookie 
		'''
        log_str(TEMP_MSG["GET_COOKIE_NOW_INFO"].format(self.class_name))
        chrome_options = webdriver.ChromeOptions()
        # 静默模式可能会导致获取不了cookie
        # chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--start-maximized')
        # 取消警告语
        chrome_options.add_experimental_option('useAutomationExtension', False)
        chrome_options.add_experimental_option('excludeSwitches',
                                               ['enable-automation'])
        # 用户目录配置
        chrome_options.add_argument('user-data-dir=' + PRO_DIR)

        try:
            driver = webdriver.Chrome(chrome_options=chrome_options)
            # selenium.common.exceptions.WebDriverException:
            # Message: unknown error: cannot create default profile directory
            # PRO_DIR错误
        except InvalidArgumentException as e:
            log_str(TEMP_MSG["GET_COOKIE_NOW_INFO"].format(self.class_name))
            exit()
        else:
            driver.get(self.host_url)
            cookies = driver.get_cookies()
            driver.quit()

            with open(COOKIE_NAME, "w") as fp:
                json.dump(cookies, fp)
                # for _ in cookies:
                # 	self.cookie.set(_['name'], _['value'])
            self.set_cookie()
Ejemplo n.º 11
0
    def check(self):
        """
		用于在启动多进程前,获取并校验cookie和uid的获取
		"""
        log_str(TEMP_MSG["GET_COOKIE_INFO"].format(self.class_name))
        # 检查是否能支持用户自定义cookie
        if self.isExists_UserCookie:
            try:
                self.str2CookieJar()
            except Exception as e:
                log_str(e)
                log_str(TEMP_MSG["CONVERT_COOKIEJAR_ERROR_INFO"].format(
                    self.class_name))
                exit()
        # 检查是否能通过selenium/本地cookie文件获取
        else:
            self.get_cookie(
            ) if COOKIE_UPDATE_ENABLED == True else self.set_cookie()
            if self.cookie_list == []:
                log_str(TEMP_MSG["LOGIN_ERROR_INFO"].format(self.class_name))
                exit()

        # 检查是否能获取user_id
        if self.flag:
            self.user_id = self.get_user_id()

        log_str(TEMP_MSG["INIT_INFO"].format(self.class_name))
Ejemplo n.º 12
0
	def insert_illust(self,u,table="pixiv"):
		"""
		data格式:{key:value,...}
		:params datas 数据
		出现mysql 1366报错,按照https://blog.csdn.net/qq_31122833/article/details/83992085解决
		"""
		conn,cur = self.get_conn()

		sql = '''INSERT INTO {} '''.format(table) + '''(uid,userName,pid,purl,title,tag,pageCount,\
						illustType,is_r18,viewCount,bookmarkCount,likeCount,\
						commentCount,urls,original,path) VALUES(%s,%s,%s,%s,\
						%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'''
		data = (
			u["uid"],u["userName"],u["pid"],u["purl"],u["title"],u["tag"],
			u["pageCount"],u["illustType"],u["is_r18"],u["viewCount"],
			u["bookmarkCount"],u["likeCount"],u["commentCount"],u["urls"],
			u["original"],u["path"]
				)
		try:
			cur.execute(sql,data)
			# if len(data) == 1:
			# 	cur.execute(sql,data)
			# else:
			# 	cur.executemany(sql,data)
			conn.commit()
		except Exception as e:
			print(e)
			log_str(u)
			conn.rollback()
			return False
		else:
			return True
		finally:
			cur.close()
			conn.close()
Ejemplo n.º 13
0
    def get_page_bookmark(self, offset):
        """
		根据offset和limit获取收藏插画的pid
		:params offset: 偏移量
		:return :对应offset和limit的pid列表,int类型
		"""
        params = {
            "tag": "",
            "offset": offset,
            "limit": 100,
            "rest": "show",
        }
        try:
            r = json.loads(
                self.base_request({
                    "url": self.bookmark_url
                }, params=params).text)
            res = r["body"]["works"]
            total = r["body"]["total"]
            illusts_pid = [int(i["illustId"]) for i in res]
        except Exception as e:
            log_str("bookmark获取收藏出错: 第{}-{}张失败".format(offset, offset + 100))
            return None, None
        else:
            return illusts_pid, total
Ejemplo n.º 14
0
	def updata_illust(self,u,table="pixiv"):
		"""
		更新作品数据,主要是浏览数,收藏数,评论数,喜欢数
		:params u:作品数据
		:return :
		主要更新 viewCount bookmarkCount commentCount likeCount
		"""
		conn,cur = self.get_conn()
		# 更新sql
		# 更新前 72 32 23 0 81265370
		# 更新后 1  1  1  1 81265370
		# 快速查询 SELECT viewCount,bookmarkCount,likeCount,commentCount,pid FROM pixiv WHERE id=53312;
		sql = """UPDATE {} """.format(table) + """SET viewCount=%s,\
				bookmarkCount=%s,likeCount=%s,commentCount=%s WHERE pid=%s"""
		# 更新数据
		data = (
			u["viewCount"],u["bookmarkCount"],u["likeCount"],u["commentCount"],u["pid"]
			)
		try:
			cur.execute(sql,data)
			conn.commit()
		except Exception as e:
			log_str("更新作品:{} 出错,{}".format(u["pid"],e))
			conn.rollback()
			return False
		else:
			return True
		finally:
			cur.close()
			conn.close()
Ejemplo n.º 15
0
    def get_users(self):
        """
		:return: 所有关注画师的uid,userName,latest_id(最新的pid)
		:[{"uid":uid,"userName":userName,"latest_id":latest_id},...]
		"""
        offset = 0
        users_info_list = []

        while True:
            u_list = self.get_page_users(offset)

            if u_list == []:
                continue

            for u in u_list:
                user_info = {}
                user_info["uid"] = int(u["userId"])
                user_info["userName"] = u["userName"]

                if u["illusts"] == []:
                    user_info["latest_id"] = -1
                    log_str("{}无作品...".format(u["userId"]))
                    # 无作品不做动作
                    continue
                else:
                    user_info["latest_id"] = int(u["illusts"][0]["illustId"])

                users_info_list.append(user_info)

            if 0 < len(u_list) < 100:
                break

            offset += 100

        return users_info_list
Ejemplo n.º 16
0
Archivo: db.py Proyecto: tylrr123/PixiC
    def updata_illust(self, u, table="pixiv"):
        """
		更新作品数据,主要是浏览数,收藏数,评论数,喜欢数,path
		:params u:作品数据
		:parmas table: 操作数据表
		:return :
		主要更新 viewCount bookmarkCount commentCount likeCount
		"""
        conn, cur = self.get_conn()

        # 更新sql
        sql = """UPDATE {} """.format(table) + """SET viewCount=%s,\
				bookmarkCount=%s,likeCount=%s,commentCount=%s,path=%s WHERE pid=%s"""
        # 更新数据
        data = (u["viewCount"], u["bookmarkCount"], u["likeCount"],
                u["commentCount"], u["path"], u["pid"])
        try:
            cur.execute(sql, data)
            conn.commit()
        except Exception as e:
            log_str(
                DB_UPDATE_ILLUST_ERROR_INF.format(self.__class__.__name__,
                                                  u["pid"], e))
            conn.rollback()
            return False
        else:
            return True
        finally:
            cur.close()
            conn.close()
Ejemplo n.º 17
0
	def __init__(self,thread_num=8):
		log_str("数据库连接池实例化...")
		try:
			self.pool = PooledDB(
			    pymysql,thread_num,host=DB_HOST,user=DB_USER,
			    passwd=DB_PASSWD,db=DB_DATABASE,port=DB_PORT,charset=DB_CHARSET) # 5为连接池里的最少连接数
		except pymysql.err.OperationalError as e:
			log_str("请确保Mysql在运行/配置好\n{}".format(e))
			exit()
Ejemplo n.º 18
0
 def get_user_id(self):
     resp = requests.get(self.host_url,
                         headers=headers,
                         cookies=random.choice(self.cookie_list)).text
     if "Please turn JavaScript on and reload the page." in resp:
         log_str(TEMP_MSG["GOOGLE_CAPTCHA_ERROR_INFO"].format(
             self.class_name))
         exit()
     user_id = re.findall(r'''.*?,user_id:"(.*?)",.*?''',
                          resp.replace(" ", ""))[0]
     return user_id
Ejemplo n.º 19
0
    def get_users(self):
        """
		:return: 所有关注画师的uid,userName,latest_id(最新的pid)
		:[{"uid":uid,"userName":userName,"latest_id":latest_id},...]
		"""
        offset = 0
        users_info_list = []
        err_count = 0
        err_limit = 10

        for rest in self.rest_list:
            while True:
                u_list = self.get_page_users(offset, rest=rest)

                # 网络请求出错
                if u_list == None:
                    # 累计10次网络错误
                    if err_count < err_limit:
                        offset += 100
                        err_count += 1
                        continue
                    else:
                        break

                # 未登录
                if u_list == TEMP_MSG["UL_TEXT"]:
                    users_info_list = TEMP_MSG["UL_TEXT"]
                    break

                # 获取所有关注完毕
                if u_list == []:
                    break

                for u in u_list:
                    user_info = {}
                    user_info["uid"] = int(u["userId"])
                    # userName = re.sub('[\\\/:*?"<>|]','_',u["userName"])
                    userName = re.sub(r'[\s\/:*?"<>|\\]', '_', u["userName"])
                    user_info["userName"] = userName

                    if u["illusts"] == []:
                        user_info["latest_id"] = -1
                        log_str(TEMP_MSG["FOLLOW_NO_ILLUSTS_INFO"].format(
                            self.class_name, u["userName"], u["userId"]))
                        # 无作品不做动作
                        # continue
                    else:
                        user_info["latest_id"] = int(u["illusts"][0]["id"])

                    users_info_list.append(user_info)

                offset += 100

        return users_info_list
Ejemplo n.º 20
0
    def get_illust_level(self, score, bookmarkCount):
        """
		根据score及bookmarkCount确认作品评分等级,默认为R
		
		:params score: 得分,bookmarkCount/viewCount -> float
		:params bookmarkCount: 收藏数 -> int
		:return: 'R','SR','SSR','UR'其中一个,不满足则默认R -> str
		"""
        # 判断评分等级
        illust_level_list = ['R', 'SR', 'SSR', 'UR']
        # 评分区间右侧边界值
        illust_interval = {'R': 0.140, 'SR': 0.260, 'SSR': 0.325, 'UR': 1.000}
        illust_default_level = "R"
        illust_level = ""

        # === 针对浏览量大的热门作品 ===
        # 从R中寻找SSR,不取边界值
        if 20000 < bookmarkCount and score < illust_interval['R']:
            illust_level = illust_level_list[2]

        # 从SR中寻找UR,不取边界值
        if 25000 < bookmarkCount and illust_interval[
                'R'] < score < illust_interval['SR']:
            illust_level = illust_level_list[3]

        # 上述规则满足则返回illust_level
        if illust_level:
            return illust_level

        # === 基本评分等级判定 ===
        # R
        if 0 <= score < illust_interval['R']:
            illust_level = illust_level_list[0]
        # SR
        elif illust_interval['R'] <= score < illust_interval['SR']:
            illust_level = illust_level_list[1]
        # SSR
        elif illust_interval['SR'] <= score < illust_interval['SSR']:
            illust_level = illust_level_list[2]
        # UR
        elif illust_interval['SSR'] <= score <= illust_interval['UR']:
            illust_level = illust_level_list[3]

        # 6条规则都不满足则默认返回R
        if not illust_level:
            log_str("采取默认规则 score:{} bookmarkCount:{}".format(
                score, bookmarkCount))
            return illust_default_level
        return illust_level


# Downloader = Down()
Ejemplo n.º 21
0
	def check_user(self, u):
		"""
		数据库中画师记录的latest_id与接口返回的latest_id是否一致
		相同 --> False,不需要更新或下载该画师的作品

		判断pxusers表是否含有该画师uid的记录
		无 --> sql_2
		有 --> sql_3

		:params u: 用户数据
		:return: latest_id
		"""
		conn,cur = self.get_conn()
		# 查询画师记录sql
		sql_1 = "SELECT COUNT(uid) FROM pxusers WHERE uid=%s"
		# 插入画师记录sql
		sql_2 = '''INSERT INTO pxusers(uid,userName,latest_id,path) VALUES(%s,%s,%s,%s)'''
		# 查询latest_id sql
		sql_3 = "SELECT latest_id FROM pxusers WHERE uid=%s"

		uid = u["uid"]
		data = (
			u["uid"],u["userName"],u["latest_id"],u["path"]
				)

		# 确认数据库是否有该画师记录
		cur.execute(sql_1,uid)
		res = cur.fetchall()
		e = res[0]["COUNT(uid)"]
		# log_str("查询结果 :{}".format(e))

		if e >= 1:
			# 返回数据库中查询的latest_id
			cur.execute(sql_3,uid)
			d = cur.fetchall()[0]
			latest_id = d["latest_id"]
			return latest_id
		else:
			try:
				cur.execute(sql_2,data)
				conn.commit()
			except Exception as e:
				log_str(e)
				conn.rollback()
				# 默认全更新
				return u["latest_id"]
			else:
				return u["latest_id"]
			finally:
				cur.close()
				conn.close()
Ejemplo n.º 22
0
    def thread_by_illust(self, *args):
        pid = args[0]
        isExists, path = self.db.check_illust(pid, table="bookmark")

        if path == None:
            # 会根据每次请求的收藏数来进行判断是否下载
            try:
                info = Downloader.get_illust_info(pid, extra="bookmark")
            except Exception as e:
                log_str("{}请求错误:{}".format(pid, e))
                return

            if info == None:
                log_str("该作品{}已被删除,或作品ID不存在.".format(pid))
                return

            if isExists == False:
                # 数据库无该记录
                res = self.db.insert_illust(info, table="bookmark")
                if res == False:
                    log_str("插入{}失败".format(pid))
                else:
                    log_str("插入{}成功".format(pid))
            else:
                # 更新记录
                self.db.updata_illust(info)
Ejemplo n.º 23
0
    def set_cookie(self):
        '''
		读取并返回cookie
		'''
        try:
            with open(COOKIE_NAME, "r", encoding="utf8") as fp:
                # readlines(),读取之后,文件指针会在文件末尾,再执行只会读到空[]
                if fp.readlines() == []:
                    log_str(COOKIE_EMPTY_INFO.format(self.class_name))
                    exit()
                fp.seek(0)
                cookies = json.load(fp)
                for cookie in cookies:
                    self.cookie.set(cookie['name'], cookie['value'])
        except FileNotFoundError as e:
            log_str(FILE_NOT_FOUND_INFO_1.format(self.class_name))
            log_str(FILE_NOT_FOUND_INFO_2.format(self.class_name))
            log_str(e)
            exit()

        # 获取user_id
        if self.flag == True:
            self.user_id = self.get_user_id()
        else:
            self.user_id = USER_ID
        return self.cookie
Ejemplo n.º 24
0
	def thread_by_illust(self, *args):
		pid = args[0]
		try:
			info = self.Downloader.get_illust_info(pid,extra="bookmark")
		except Exception as e:
			log_str(ILLUST_NETWORK_ERROR_INFO.format(self.class_name,pid,e))
			return 

		if info == None:
			log_str(ILLUST_EMPTY_INFO.format(self.class_name,pid))
			return

		# 数据库开关关闭
		if hasattr(self.db,"pool") == False:
			return 

		isExists,path = self.db.check_illust(pid,table="bookmark")
		# 数据库无该记录
		if isExists == False:
			res = self.db.insert_illust(info,table="bookmark")
			if res == False:
				log_str(INSERT_FAIL_INFO.format(self.class_name,pid))
			else:
				log_str(INSERT_SUCCESS_INFO.format(self.class_name,pid))
		else:
			self.db.updata_illust(info,table="bookmark")
Ejemplo n.º 25
0
    def check_illust(self, value, key="pid", table="pixiv", database=None):
        """
		查询数据库中是否有该id的作品,table为非pixiv,bookmark时采用通用sql		
		:parmas key: 对应字段名
		:parmas value: 对应记录值
		:parmas table: 数据表
		:return: (True,path)/(False,"")
		Result--fetchall获取的原始数据
			data in db: [{'COUNT(1)': 1, 'path': 'None'}]
			data not in db: ()
		"""
        conn, cur = self.get_conn()
        if key == "":
            return False, ""

        if value == "":
            return False, ""

        # 切换数据库
        if database != None:
            conn.select_db(database)

        # 查询id sql
        if table in ["pixiv", "bookmark"]:
            # path为下载地址,不存在该记录时为None
            sql = """SELECT COUNT(1),path FROM {} """.format(
                table) + """WHERE {}=%s GROUP BY path""".format(key)
        else:
            sql = """SELECT COUNT(1) FROM {} """.format(
                table) + """WHERE {}=%s""".format(key)
        # log_str(sql)
        data = (value)
        try:
            cur.execute(sql, data)
        except Exception as e:
            log_str("{}:check_illust | {}".format(self.class_name, e))
            return False, ""
        else:
            # 未使用GROUP BY path,非严格模式报1140
            # 使用GROUP BY path,不存在对应pid记录时,fetchall结果为()
            d = cur.fetchall()
            if d != () and d[0]["COUNT(1)"] >= 1:
                return True, d[0].get("path", "")
            else:
                return False, ""
        finally:
            cur.close()
            conn.close()
Ejemplo n.º 26
0
    def baseRequest(self, options, data=None, params=None, retry_num=5):
        '''
	    :params options 请求参数    {"method":"get/post","url":"example.com"}
	    :params data
	    :params params
	    :params retry_num 重试次数
	    :return response对象/False

	    如果options中有定义了headers参数,则使用定义的;否则使用init中初始化的headers

	    下面这行列表推导式作用在于:
	    添加referer时,referer需要是上一个页面的url,比如:画师/作品页面的url时,则可以自定义请求头
	    demo如下:
	    demo_headers = headers.copy()
	    demo_headers['referer']  = 'www.example.com'
	    options ={
	        "method":"get",
	        "url":"origin_url",
	        "headers":demo_headers
	    }
	    baseRequest(options=options)
	    这样baseRequest中使用的headers则是定制化的headers,而非init中初始化的默认headers了
	    '''
        # log_str(options["url"])
        base_headers = [
            options["headers"] if "headers" in options.keys() else self.headers
        ][0]

        try:
            # if options["method"].lower() == "get":
            # 网络请求函数get、post请求,暂时不判断method字段,待后续更新
            response = self.se.get(
                options["url"],
                data=data,
                params=params,
                cookies=self.jar,
                headers=base_headers,
                verify=False,
                timeout=10,
            )
            return response
        except Exception as e:
            if retry_num > 0:
                return self.baseRequest(options, data, params, retry_num - 1)
            else:
                log_str(
                    DM_NETWORK_ERROR_INFO.format(self.class_name,
                                                 options["url"], e))
Ejemplo n.º 27
0
    def run(self):
        log_str(VERSION_INFO)
        # client更新cookie
        client.check()

        if PIXIV_CRAWLER_ENABLED:
            pixiv_crawler = Process(target=self.scheduler_crawler)
            pixiv_crawler.start()

        if PIXIV_BOOKMARK_ENABLED:
            pixiv_bookmark = Process(target=self.scheduler_bookmark)
            pixiv_bookmark.start()

        if PIXIV_API_ENABLED and DB_ENABLE:
            pixiv_api = Process(target=self.scheduler_api)
            pixiv_api.start()
Ejemplo n.º 28
0
Archivo: db.py Proyecto: tylrr123/PixiC
    def __init__(self, thread_num=8):
        if DB_ENABLE == False:
            return

        log_str(DB_INST)
        try:
            self.pool = PooledDB(pymysql,
                                 thread_num,
                                 host=DB_HOST,
                                 user=DB_USER,
                                 passwd=DB_PASSWD,
                                 db=DB_DATABASE,
                                 port=DB_PORT,
                                 charset=DB_CHARSET)  # 5为连接池里的最少连接数
        except pymysql.err.OperationalError as e:
            log_str(DB_CONNECT_ERROR_INFO.format(e))
            exit()
Ejemplo n.º 29
0
	def get_user_illust(self, u):
		"""
		:params u: 画师信息--字典
		:return user_illust_list: 画师信息包括:uid,userName,latest_id,path
		"""
		u["path"] = self.file_manager.mkdir_painter(u)
		illust_url = self.all_illust_url.format(u["uid"])
		try:
			u_json = json.loads(self.base_request({"url":illust_url}).text)["body"]
			i = u_json["illusts"]
			m = u_json["manga"]
			# 列表推导式合并取keys,转为list
			user_illust_list = list([dict(i) if len(m) == 0 else dict(i,**m)][0].keys())
		except Exception as e:
			log_str(FOLLOW_DATA_ERROR_INFO.format(self.class_name,e))
			return []
		else:
			return user_illust_list
Ejemplo n.º 30
0
	def get_users(self):
		"""
		:return: 所有关注画师的uid,userName,latest_id(最新的pid)
		:[{"uid":uid,"userName":userName,"latest_id":latest_id},...]
		"""
		offset = 0
		users_info_list = []

		while True:
			u_list = self.get_page_users(offset)

			# 网络请求出错
			if u_list == None:
				continue

			# 未登录
			if u_list == UL_TEXT:
				break

			# 获取所有关注完毕
			if u_list == []:
				break

			for u in u_list:
				user_info = {}
				user_info["uid"] = int(u["userId"])
				# userName = re.sub('[\\\/:*?"<>|]','_',u["userName"])
				userName = re.sub(r'[\s\/:*?"<>|\\]','_',u["userName"])
				user_info["userName"] = userName

				if u["illusts"] == []:
					user_info["latest_id"] = -1
					log_str(FOLLOW_NO_ILLUSTS_INFO.format(self.class_name,u["userName"],u["userId"]))
					# 无作品不做动作
					continue	
				else:
					user_info["latest_id"] = int(u["illusts"][0]["illustId"])

				users_info_list.append(user_info)


			offset += 100

		return users_info_list