async def getPost(self, post_id: int): url = 'https://' + self.url + '/post/show/' + str(post_id) async with Request() as request: async with request.get(url) as response: try: response.raise_for_status() except ClientResponseError as err: raise NazurinError(err) from None response = await response.text() soup = BeautifulSoup(response, 'html.parser') tag = soup.find(id="post-view").find(recursive=False) if tag.name == 'script': content = str.strip(tag.string) elif tag.name == 'div' and ('status-notice' in tag['class']): raise NazurinError(tag.get_text(strip=True)) else: logger.error(tag) raise NazurinError('Unknown error') info = content[19:-2] try: info = json.loads(info) post = info['posts'][0] tags = info['tags'] except json.decoder.JSONDecodeError as err: logger.error(err) return post, tags
def parseHtml(self, html) -> dict: """ Extract post data from html <script> block as example below. We're lucky the JS objects are written in JSON syntax with quotes wrapped property names. <script> ... var $render_data = [{ "status": { ... }, ... }][0] || {}; ... </script> """ regex = r"\$render_data = \[\{\n([\s\S]+)\}\]\[0\] \|\| \{\};" matches = re.search(regex, html, re.MULTILINE) if not matches: raise NazurinError('Post not found') json_str = '[{' + matches.group(1) + '}]' try: render_data = json.loads(json_str) post = render_data[0]['status'] except json.JSONDecodeError: raise NazurinError('Failed to parse post data') from None return post
async def getArtwork(self, artwork_id: int): """Fetch an artwork.""" response = await self.call(Pixiv.illust_detail, artwork_id) if 'illust' in response.keys(): illust = response.illust else: error = response.error msg = error.user_message or error.message raise NazurinError(msg) if illust.restrict != 0: raise NazurinError("Artwork is private") return illust
async def requireAuth(self): if Pixiv.api.access_token and time.time() - Pixiv.updated_time < 3600: # Logged in, access_token not expired return if Pixiv.api.refresh_token: # Logged in, access_token expired await self.refreshToken() return # Haven't logged in tokens = await Pixiv.document.get() if tokens: Pixiv.api.access_token = tokens['access_token'] Pixiv.api.refresh_token = tokens['refresh_token'] Pixiv.updated_time = tokens['updated_time'] if time.time() - Pixiv.updated_time >= 3600: # Token expired await self.refreshToken() else: logger.info('Pixiv logged in through cached tokens') else: # Initialize database if not REFRESH_TOKEN: raise NazurinError('Pixiv refresh token is required') Pixiv.api.refresh_token = REFRESH_TOKEN await Pixiv.api_auth() Pixiv.updated_time = time.time() await Pixiv.collection.insert( DOCUMENT, { 'access_token': Pixiv.api.access_token, 'refresh_token': Pixiv.api.refresh_token, 'updated_time': Pixiv.updated_time }) logger.info('Pixiv tokens cached')
async def updateCollection(self, urls: List[str], message: Optional[Message] = None): result = self.sites.match(urls) if not result: raise NazurinError('No source matched') logger.info('Collection update: site=%s, match=%s', result['site'], result['match'].groups()) illust = await self.sites.handle_update(result) # Send / Forward to gallery & Save to album # If there're multiple images, then send a new message instead of # forwarding an existing one, since we currently can't forward albums correctly. if message and message.is_forward( ) and not illust.has_multiple_images(): save = asyncio.create_task(message.forward(config.GALLERY_ID)) elif not illust.has_image(): save = asyncio.create_task( self.send_message(config.GALLERY_ID, '\n'.join(urls))) else: save = asyncio.create_task( self.sendIllust(illust, message, config.GALLERY_ID)) download = asyncio.create_task(illust.download()) await asyncio.gather(save, download) await self.storage.store(illust) return True
async def chosen_url(self) -> str: # Conform with limitations of sending photos: https://core.telegram.org/bots/api#sendphoto if self._chosen_url: return self._chosen_url if self.height != 0 and self.width / self.height > 20: raise NazurinError( 'Width and height ratio of image exceeds 20, try download option.' ) self._chosen_url = self.url if self.thumbnail: # For safety reasons, use thumbnail when image size is unkown if (not self.width) or ( not self.height) or self.width + self.height > 10000: self._chosen_url = self.thumbnail logger.info( 'Use thumbnail (Unkown image size or width + height > 10000 [%s, %s]): %s', self.width, self.height, self._chosen_url) else: size = await self.size() if (not size) or size > 5 * 1024 * 1024: self._chosen_url = self.thumbnail logger.info( 'Use thumbnail (Unknown size or size > 5MB [%s]): %s', size, self._chosen_url) return self._chosen_url
async def bookmark(self, artwork_id: int): response = await self.call(Pixiv.illust_bookmark_add, artwork_id) if 'error' in response.keys(): logger.error(response) raise NazurinError(response['error']['user_message']) else: logger.info('Bookmarked artwork %s', artwork_id) return True
async def getPost(self, post_id: Optional[int] = None, md5: Optional[str] = None): """Fetch an post.""" try: if post_id: post = await self.post_show(post_id) else: post = await self.post_list(md5=md5) except PybooruHTTPError as err: if 'Not Found' in err._msg: raise NazurinError('Post not found') from None if 'file_url' not in post.keys(): raise NazurinError( 'You may need a gold account to view this post\nSource: ' + post['source']) return post
async def insert(self, key: Optional[Union[str, int]], data: dict) -> bool: if key: data['_id'] = key try: result = await self._collection.insert_one(data) return result.acknowledged except DuplicateKeyError as error: raise NazurinError('Already exists in database.') from error
async def getPost(self, post_id: str): """Fetch an post.""" api = f"https://www.artstation.com/projects/{post_id}.json" async with Request() as request: async with request.get(api) as response: if not response.status == 200: raise NazurinError('Post not found') post = await response.json() return post
async def getPost(self, post_id: str): """Fetch an post.""" api = f"https://m.weibo.cn/detail/{post_id}" async with Request() as request: async with request.get(api) as response: if not response.status == 200: raise NazurinError('Post not found') html = await response.text() post = self.parseHtml(html) return post
async def getPost(self, post_id: int): """Fetch an post.""" api = 'https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&id=' + str( post_id) async with Request() as request: async with request.get(api) as response: response = await response.json() if 'post' not in response.keys(): raise NazurinError('Post not found') post = response['post'][0] return post
async def getTweet(self, status_id: int): """Get a tweet from API.""" # Old: 'https://syndication.twitter.com/tweets.json?ids='+ status_id +'&lang=en' api = 'https://cdn.syndication.twimg.com/tweet?id=' + str( status_id) + '&lang=en' async with Request() as request: async with request.get(api) as response: if response.status == 404: raise NazurinError('Tweet not found or unavailable.') tweet = await response.json() return tweet
async def bookmark(self, artwork_id: int, privacy: PixivPrivacy = PixivPrivacy.PUBLIC): response = await self.call(Pixiv.illust_bookmark_add, artwork_id, privacy.value) if 'error' in response.keys(): logger.error(response) raise NazurinError(response.error.user_message or response.error.message) logger.info('Bookmarked artwork %s, privacy = %s', artwork_id, privacy.value) return True
def convert(config: File, output: File): cmd = f'ffmpeg -i {config.path} -vcodec libx264 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -y {output.path}' logger.info('Calling FFmpeg with command: %s', cmd) args = shlex.split(cmd) try: output = subprocess.check_output(args, stderr=subprocess.STDOUT, shell=False) except subprocess.CalledProcessError as error: logger.error('FFmpeg failed with code %s, output:\n %s', error.returncode, error.output) raise NazurinError( 'Failed to convert ugoira to mp4.') from None
def getImages(self, tweet) -> List[Image]: """Get all images in a tweet.""" if 'photos' not in tweet.keys(): raise NazurinError('No photo found.') photos = tweet['photos'] imgs = list() for photo in photos: filename, url, thumbnail = self.parseUrl(photo['url']) imgs.append( Image('twitter - ' + tweet['id_str'] + ' - ' + filename, url, thumbnail, width=photo['width'], height=photo['height'])) return imgs
def getImages(self, post) -> List[Image]: """Get images from post.""" if 'pics' not in post.keys(): raise NazurinError('No image found') pics = post['pics'] mid = post['mid'] imgs = list() for pic in pics: filename, url, thumbnail, width, height = self.parsePic(pic) imgs.append( Image(f"Weibo - {mid} - {filename}", url, thumbnail, width=width, height=height)) return imgs
async def sendPhotos(self, illust: Illust, chat_id: int, reply_to: Optional[int] = None): caption = sanitizeCaption(illust.caption) groups = list() imgs = illust.images if len(imgs) == 0: raise NazurinError('No image to send, try download option.') while imgs: groups.append(imgs[:10]) imgs = imgs[10:] for group in groups: await self.sendSingleGroup(group, caption, chat_id, reply_to)
def convert(config: File, output: File): # For some illustrations like https://www.pixiv.net/artworks/44298467, # the output video is in YUV444P colorspace, which can't be played on some devices, # thus we convert to YUV420P colorspace for better compatibility. cmd = f'ffmpeg -i "{config.path}" -vcodec libx264 -pix_fmt yuv420p -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -y "{output.path}"' logger.info('Calling FFmpeg with command: %s', cmd) args = shlex.split(cmd) try: output = subprocess.check_output(args, stderr=subprocess.STDOUT, shell=False) except subprocess.CalledProcessError as error: logger.error('FFmpeg failed with code %s, output:\n %s', error.returncode, error.output.decode()) raise NazurinError( 'Failed to convert ugoira to mp4.') from None
def auth(self): if GoogleDrive.drive: return gauth = GoogleAuth() scope = ['https://www.googleapis.com/auth/drive'] gauth.auth_method = 'service' if GD_CREDENTIALS: if GD_CREDENTIALS.startswith('{'): credentials = json.loads(GD_CREDENTIALS) gauth.credentials = ServiceAccountCredentials.from_json_keyfile_dict( credentials, scope) else: gauth.credentials = ServiceAccountCredentials.from_json_keyfile_name( GD_CREDENTIALS, scope) else: raise NazurinError( 'Credentials not found for Google Drive storage.') GoogleDrive.drive = GDrive(gauth)
async def getPost(self, post_id: int): async with Request() as request: async with request.get('https://www.zerochan.net/' + str(post_id)) as response: try: response.raise_for_status() except ClientResponseError as err: raise NazurinError(err) from None # Override post_id if there's a redirection TODO: Check if response.history: post_id = response.url.path[1:] response = await response.text() soup = BeautifulSoup(response, 'html.parser') info = soup.find("script", {"type": "application/ld+json"}).contents info = json.loads(''.join(info).replace('\\\'', '\'')) name = info['name'].split(' #')[0] created_at = int( datetime.strptime(info['datePublished'], '%c').replace(tzinfo=timezone.utc).timestamp()) size = int(info['contentSize'][:-2]) * 1024 tags = dict() for tag in soup.find('ul', id='tags').find_all('li'): tag_name, tag_type = tag.contents tag_name = unquote(tag_name['href'][1:]).replace('+', '_') tag_type = tag_type[1:] tags[tag_name] = tag_type post = { 'id': int(post_id), 'name': name, 'created_at': created_at, 'image_width': info['width'][:-3], 'image_height': info['height'][:-3], 'tags': tags, 'file_ext': info['encodingFormat'], 'file_size': size, 'file_url': info['contentUrl'], 'preview_file_url': info['thumbnail'], 'uploader': info['author'] } return post
def getImages(self, post) -> List[Image]: """Get images from post.""" if 'assets' not in post.keys(): raise NazurinError('No asset found.') assets = sorted(post['assets'], key=lambda x: x['position']) hash_id = post['hash_id'] imgs = list() for asset in assets: if asset['asset_type'] != 'image': continue # https://cdnb.artstation.com/p/assets/images/images/042/908/363/large/_z-ed_-da.jpg?1635784439 filename, url, thumbnail = self.parseUrl(asset['image_url']) imgs.append( Image(f"ArtStation - {hash_id} - {filename}", url, thumbnail, width=asset['width'], height=asset['height'])) return imgs
async def updateCollection(self, urls: List[str], message: Optional[Message] = None): result = self.sites.match(urls) if not result: raise NazurinError('No source matched') logger.info('Collection update: site=%s, match=%s', result['site'], result['match'].groups()) illust = await self.sites.handle_update(result) # Forward to gallery & Save to album if message and message.is_forward(): save = asyncio.create_task(message.forward(config.GALLERY_ID)) elif not illust.has_image(): save = asyncio.create_task( self.send_message(config.GALLERY_ID, '\n'.join(urls))) else: save = asyncio.create_task( self.sendIllust(illust, chat_id=config.GALLERY_ID)) download = asyncio.create_task(illust.download()) await asyncio.gather(save, download) await self.storage.store(illust) return True