Ejemplo n.º 1
0
def handleUrl(update, context):
    msg = update.effective_message
    if not msg:
        return
    raw_links = msg.text.split()
    raw_links = [x for x in raw_links if 'http' in x]
    if not raw_links:
        return
    existing = set()
    items = []
    for raw_link in raw_links:
        link = getCnLink(raw_link)
        if not link:
            continue
        title = compactText(export_to_telegraph.getTitle(link))
        if link in existing or title in existing:
            continue
        existing.add(link)
        existing.add(title)
        items.append((title, link))
    lines = ['【%s】%s' % item for item in items]
    lines = ['%d. %s' % (index + 1, item) for index, item in enumerate(lines)]
    reply = ('《每日文章精选 %s》 https://t.me/daily_read \n\n' %
             date.today().strftime("%Y-%m-%d") + '\n\n'.join(lines))
    msg.reply_text(reply, disable_web_page_preview=True)
Ejemplo n.º 2
0
def yieldDailyRead():
    existing = set()
    start = time.time()
    posts = getPosts()
    posts = [(post.time + random.random(), post) for post in posts]
    posts.sort(reverse=True)
    posts = [item[1] for item in posts]
    for post in posts:
        link = getLink(post.text)
        if not link:
            continue
        if post.link:
            text = compactText(getTitle(post.link.text))
        else:
            text = compactText(getTitle(post.text.text))
        if not text or link in existing or text in existing:
            continue
        existing.add(link)
        existing.add(text)
        yield text, link
Ejemplo n.º 3
0
 def __init__(self, url):
     content = cached_url.get(url + '?json=1')
     content = yaml.load(content, Loader=yaml.FullLoader)
     self.title = content['title']
     self.soup = BeautifulSoup(content['content'], 'html.parser')
     self.evernote_urls = list(getEvernoteUrls(self.soup))
     self.next_url = self.evernote_urls and self.evernote_urls[0]
     self.text_soup = getTextSoup(content['content'])
     self.raw_text = compactText(self.text_soup.text.replace('~', '.'))
     self.text = clearText(self.raw_text)
     self.word_count = len(
         [c for c in self.text if c.isalpha() and ord(c) > 255])
Ejemplo n.º 4
0
def getHotText(soup):
    for item in soup.find_all("br"):
        item.replace_with('\n')
    try:
        item = soup.find_all("a")[-1]
    except:
        return compactText(soup.text)
    if item.get('href'):
        item.replace_with(decorate(item['href']))
    for item in soup.find_all('a'):
        if 'telegra.ph' in item.get('href'):
            item.replace_with(decorate(item['href']))
    return compactTextV2(soup.text)
Ejemplo n.º 5
0
def clearText(content):
    for key in ['next', 'Next', 'previous', 'Previous']:
        content = content.split(key)[0]
    result = []
    in_comment = 0
    for x in content:
        if x == '【':
            in_comment += 1
        if x == '】':
            in_comment -= 1
            continue
        if not in_comment:
            result.append(x)
    content = ''.join(result)
    return compactText(content)
def download(url, force_cache=False):
    nid = getNid(url)
    content = cached_url.get(chapter_prefix + nid, force_cache=force_cache)
    content = yaml.load(content, Loader=yaml.FullLoader)
    novel_name = None
    result = []
    for cid in getIds(content):
        raw_content = cached_url.get(detail_prefix % cid,
                                     force_cache=True,
                                     sleep=1)
        if not novel_name:
            novel_name = getNovelName(raw_content)
            os.system('mkdir download > /dev/null 2>&1')
        result.append(getContent(raw_content, debug_info=detail_prefix % cid))
    with open('download/%s.txt' % novel_name, 'w') as f:
        f.write(compactText(''.join(result)))