def getAlbum(url, force_cache=True, word_limit=200, paragraph_limit=3, append_source=False, append_url=True): content = _getArticle(url, force_cache=force_cache).text album = AlbumResult() for item in content.findAll('img'): path = item.get('src') if not path: continue try: cached_url.get(path, mode='b', force_cache=True) img = Image.open(cached_url.getFilePath(path)) except: continue w, h = img.size file_size = os.stat(cached_url.getFilePath(path)).st_size if 36000 < file_size < 36200 and w == 1080 and h == 1080: # 界面文化题头 continue if 27000 < file_size < 27300 and w == 640 and h == 640: # 思想市场 continue if w == 750 and h == 234: # 界面文化题头 continue if 6000 < file_size < 9000 and w == 347 and h == 347: # 界面文化题头 continue if 87000 < file_size < 91000 and w == 900 and h == 500: # 美国华人杂谈题头 continue if 53000 < file_size < 56000 and w == 795 and h == 504: # 微信foot continue if 57000 < file_size < 61000 and w == 1011 and h == 282: # 短史记题头 continue if w * 0.25 < h < w * 4 and min(w, h) > 100 and max(w, h) > 300: # print(file_size, w, h) album.imgs.append(item.get('src')) break for tag in ['img', 'br']: for item in content.findAll(tag): item.replace_with('\n\n') for item in content.findAll('p'): item.append('\n\n') title = '【%s】\n\n' % getTitle(url) lines = content.text.split('\n') lines = [line.strip() for line in lines] lines = [line for line in lines if isGoodLine(line)] if paragraph_limit < 5: lines = [line for line in lines if not line or len(line) > 20] lines = cutCaptionHtml('\n'.join(lines), word_limit).strip().strip('\ufeff').strip() lines = lines.split('\n') lines = lines[:paragraph_limit * 2] album.cap_html_v2 = title + '\n'.join(lines).strip() if append_url: album.cap_html_v2 += '\n\n' + url if append_source: album.url = url return album
def cutSafe(image, size_factor=2): cached_url.get(image, force_cache=True, mode='b') fn = cached_url.getFilePath(image) if isAnimated(fn): return [fn] if not getImg(fn): return [] return list(cut(fn, size_factor=size_factor)) or [fn]
def isAnimated(path): cached_url.get(path, force_cache=True, mode='b') gif = Image.open(cached_url.getFilePath(path)) try: gif.seek(1) except EOFError: return False else: return True
def postVideo(subreddit, post_text, video): cached_url.get(video, mode='b', force_cache=True) title, content = splitText(post_text) content += '{video}' return subreddit.submit(title, selftext=content, inline_media={ "video": InlineVideo(path=cached_url.getFilePath(video)) })
async def sendSingle(client, source_channel, target, post, img_number, new_text): video = post.getVideo() if video: cached_url.get(video, mode='b', force_cache=True) await client.send_message(target, new_text, file=cached_url.getFilePath(video)) return if not img_number: await client.send_message(target, new_text) return fns = await telepost.getImages(source_channel, post.post_id, img_number) await client.send_message(target, new_text, file=fns)
def getJson(link): wid = getWid(link) url = wb_prefix + wid if not os.path.exists(cached_url.getFilePath(url)): return {} try: json = yaml.load(cached_url.get(wb_prefix + wid, force_cache=True, sleep=5), Loader=yaml.FullLoader) json['data']['user'] return json['data'] except: print('fetch failed: ' + link) return {}
def getSoup(link): if not os.path.exists(cached_url.getFilePath(link)): return return BeautifulSoup(cached_url.get(link, sleep=1), 'html.parser')
def getImage(img): cached_url.get(img, force_cache=True, mode='b') return cached_url.getFilePath(img)