def quickBackfill(channel): posts = webgram.getPosts(channel) dbase.updateAll(posts) posts = posts[1:] for _ in range(getMaxIteration(channel)): if not posts or postTooOld(posts[0]): return post_id = posts[0].post_id posts = webgram.getPosts(channel, post_id, direction='before')[1:] dbase.updateAll(posts)
def getPosts(): start = time.time() result = [] for channel in pool: posts = webgram.getPosts(channel, force_cache=True)[1:] result += posts while posts and posts[0].time > time.time() - 3 * Day: posts = webgram.getPosts(channel, posts[0].post_id, direction='before', force_cache=True)[1:] result += posts return result
def indexingImp(): for channel, score in channels.items(): if score < 0 or random.random() > 1.0 / (score * score + 1): continue if 'test' in sys.argv and random.random() > 0.1: continue # better testing posts = webgram.getPosts(channel, 1) # force cache for post in posts: dbase.update(post) if len(posts) > 1: # save http call for post in webgram.getPosts(channel): dbase.update(post) sendDebugMessage(*(['indexingImpDuration'] + dbase.resetStatus()), persistent=True)
def backfill(channel): if not shouldBackfill(channel): return if len(webgram.getPosts(channel, 1)) > 1: quickBackfill(channel) else: slowBackfill(channel)
def yieldPoliticsRead(): posts = webgram.getPosts('freedom_watch', force_cache=True)[1:] for post in posts[::-1]: link = getLink(post.text, getShortLink) if not link: continue yield export_to_telegraph.getTitle(link), link
def getPosts(channel): start = time.time() result = [] posts = webgram.getPosts(channel)[1:] result += posts while posts and posts[0].time > ( time.time() - credential['channels'][channel]['back_days'] * Day): pivot = posts[0].post_id posts = webgram.getPosts(channel, posts[0].post_id, direction='before')[1:] result += posts for post in result: if post.time > time.time() - Day: continue try: yield post_2_album.get('https://t.me/' + post.getKey()), post except Exception as e: print('post_2_album failed', post.getKey(), str(e))
def indexingImp(): sendDebugMessage(*(['indexingImpStart'] + dbase.resetStatus())) for channel, score in channels.items(): if score < 0 or random.random() > 1.0 / min( score ** 3 + 1, score ** 2.5 * 2 + 1): continue if 'test' in sys.argv and random.random() > 0.1: continue # better testing if channel in dbase.delay._db.items and random.random() > 0.01: continue posts = webgram.getPosts(channel, 1) # force cache for post in posts: dbase.update(post) if len(posts) <= 1: # save http call continue dbase.updateAll(webgram.getPosts(channel)) dbase.updateDelayStatus(channel) sendDebugMessage(*(['indexingImpDuration'] + dbase.resetStatus()), persistent=True)
def quickBackfill(channel): sendDebugMessage('quickBackfill start', '@' + channel) start_time = time.time() post_id = 1 while True: posts = webgram.getPosts(channel, post_id) for post in posts[1:]: dbase.update(post) if post_id == posts[-1].post_id + 1: break post_id = posts[-1].post_id + 1 if time.time() - start_time > time_limit: break sendDebugMessage('quickBackfill end', '@' + channel, post_id)
def getVPNs(): vpns = set() for post in webgram.getPosts('MTP_roto', force_cache=True)[1:][::-1][:10]: item = post.text.find('a') if item and item.get('href'): vpns.add(item.get('href')) soup = BeautifulSoup(cached_url.get('https://t.me/s/ProxyMTProto'), 'html.parser') for item in list( soup.find_all( 'a', class_='tgme_widget_message_inline_button'))[::-1][:10]: if item.get('href', '').startswith('https://t.me/proxy?'): vpns.add(item.get('href')) lines = [ '%d. %s' % (index + 1, item) for index, item in enumerate(list(vpns)) ] return '\n\n'.join(lines)