Esempio n. 1
0
def getAlbum(url,
             force_cache=True,
             word_limit=200,
             paragraph_limit=3,
             append_source=False,
             append_url=True):
    content = _getArticle(url, force_cache=force_cache).text
    album = AlbumResult()
    for item in content.findAll('img'):
        path = item.get('src')
        if not path:
            continue
        try:
            cached_url.get(path, mode='b', force_cache=True)
            img = Image.open(cached_url.getFilePath(path))
        except:
            continue
        w, h = img.size
        file_size = os.stat(cached_url.getFilePath(path)).st_size
        if 36000 < file_size < 36200 and w == 1080 and h == 1080:  # 界面文化题头
            continue
        if 27000 < file_size < 27300 and w == 640 and h == 640:  # 思想市场
            continue
        if w == 750 and h == 234:  # 界面文化题头
            continue
        if 6000 < file_size < 9000 and w == 347 and h == 347:  # 界面文化题头
            continue
        if 87000 < file_size < 91000 and w == 900 and h == 500:  # 美国华人杂谈题头
            continue
        if 53000 < file_size < 56000 and w == 795 and h == 504:  # 微信foot
            continue
        if 57000 < file_size < 61000 and w == 1011 and h == 282:  # 短史记题头
            continue
        if w * 0.25 < h < w * 4 and min(w, h) > 100 and max(w, h) > 300:
            # print(file_size, w, h)
            album.imgs.append(item.get('src'))
            break
    for tag in ['img', 'br']:
        for item in content.findAll(tag):
            item.replace_with('\n\n')
    for item in content.findAll('p'):
        item.append('\n\n')
    title = '【%s】\n\n' % getTitle(url)
    lines = content.text.split('\n')
    lines = [line.strip() for line in lines]
    lines = [line for line in lines if isGoodLine(line)]
    if paragraph_limit < 5:
        lines = [line for line in lines if not line or len(line) > 20]
    lines = cutCaptionHtml('\n'.join(lines),
                           word_limit).strip().strip('\ufeff').strip()
    lines = lines.split('\n')
    lines = lines[:paragraph_limit * 2]
    album.cap_html_v2 = title + '\n'.join(lines).strip()
    if append_url:
        album.cap_html_v2 += '\n\n' + url
    if append_source:
        album.url = url
    return album
Esempio n. 2
0
def cutSafe(image, size_factor=2):
    cached_url.get(image, force_cache=True, mode='b')
    fn = cached_url.getFilePath(image)
    if isAnimated(fn):
        return [fn]
    if not getImg(fn):
        return []
    return list(cut(fn, size_factor=size_factor)) or [fn]
Esempio n. 3
0
def isAnimated(path):
    cached_url.get(path, force_cache=True, mode='b')
    gif = Image.open(cached_url.getFilePath(path))
    try:
        gif.seek(1)
    except EOFError:
        return False
    else:
        return True
Esempio n. 4
0
def postVideo(subreddit, post_text, video):
    cached_url.get(video, mode='b', force_cache=True)
    title, content = splitText(post_text)
    content += '{video}'
    return subreddit.submit(title,
                            selftext=content,
                            inline_media={
                                "video":
                                InlineVideo(path=cached_url.getFilePath(video))
                            })
async def sendSingle(client, source_channel, target, post, img_number,
                     new_text):
    video = post.getVideo()
    if video:
        cached_url.get(video, mode='b', force_cache=True)
        await client.send_message(target,
                                  new_text,
                                  file=cached_url.getFilePath(video))
        return
    if not img_number:
        await client.send_message(target, new_text)
        return
    fns = await telepost.getImages(source_channel, post.post_id, img_number)
    await client.send_message(target, new_text, file=fns)
def getJson(link):
    wid = getWid(link)
    url = wb_prefix + wid
    if not os.path.exists(cached_url.getFilePath(url)):
        return {}
    try:
        json = yaml.load(cached_url.get(wb_prefix + wid,
                                        force_cache=True,
                                        sleep=5),
                         Loader=yaml.FullLoader)
        json['data']['user']
        return json['data']
    except:
        print('fetch failed: ' + link)
        return {}
def getSoup(link):
    if not os.path.exists(cached_url.getFilePath(link)):
        return
    return BeautifulSoup(cached_url.get(link, sleep=1), 'html.parser')
def getImage(img):
    cached_url.get(img, force_cache=True, mode='b')
    return cached_url.getFilePath(img)