Exemple #1
0
def export(update, context):
    if update.edited_message or update.edited_channel_post:
        return
    msg = update.effective_message
    if msg.chat_id < 0 and ('source' in msg.text) and ('[source]'
                                                       in msg.text_markdown):
        return
    if msg.chat.username == 'web_record':
        if (matchKey(msg.text_markdown,
                     ['twitter', 'weibo', 'douban', 't.me/'])
                and not matchKey(msg.text_markdown, ['article', 'note'])):
            tryDelete(msg)
            return
    try:
        r = msg.chat.send_message('received')
    except:
        return
    try:
        exportImp(msg)
        if msg.chat.username == 'web_record':
            tryDelete(msg)
    except Exception as e:
        msg.chat.send_message(str(e))
        if not matchKey(str(e), ['Content is too big.']):
            raise e
    finally:
        r.delete()
Exemple #2
0
def veryBadMsg(msg, has_similar_log):
    if msg.forward_from_chat:
        if matchKey(msg.forward_from_chat.title, [
                '新闻频道', '新闻网', '我的频道', '点我有惊喜', '引流推广', '自由之声🌈', '业务咨询', '大家好',
                '信息'
        ]):
            return True
        if badTextScore(msg.forward_from_chat.title)[0]:
            return True
        if badText(msg.text):
            return True
    if ((not msg.from_user.last_name) and (not msg.from_user.username)
            and len(msg.from_user.first_name) <= 3):
        return True
    if badText(msg.caption):
        return True
    if matchKey(msg.text, [
            '群发需要滴滴', 'joinchat', 'Pua把妹教程五百多套', '前一百名加我的兄弟', '联系我', '+v',
            'Louisville_fx', 'LY11618', '滴我', 'Binance'
    ]):
        return True
    if hasSeq(msg.text, ['原生话费收粮', '啦裙發']):
        return True
    if msg.contact:
        return True
    if has_similar_log:
        return True
    return False
Exemple #3
0
def _formatImgUrl(raw, domain):
	parts = raw.split('/')
	success = False
	for index, part in enumerate(parts):
		if part == 'max':
			try:
				if int(parts[index + 1]) > 0:
					success = True
					break
			except:
				pass
	if success and 'guim' not in raw:
		parts[index + 1] = '1300'
	raw = '/'.join(parts)
	if not matchKey(raw, ['guim']):
		raw = re.sub('width=\d\d*', 'width=1300', raw)
	if matchKey(raw, ['condecdn']):
		raw = re.sub('/\d\d*/', '/1300/', raw)
	if matchKey(raw, ['twreport']) and matchKey(raw, ['-tiny']):
		raw = raw.replace('-tiny', '-desktop')
	if raw.startswith('//'):
		return 'https:' + raw
	if raw.startswith('/'):
		return domain + raw
	return raw
Exemple #4
0
def _decompseAds(soup):
    for item in soup.find_all("div", class_="article-paragraph"):
        if matchKey(item.text, DIV_AD_WORDS):
            _decompose(item)
    for item in soup.find_all("p"):
        if matchKey(item.text, P_AD_WORDS) or item.text in ['广告']:
            _decompose(item)
Exemple #5
0
def export(update, context):
    if update.edited_message or update.edited_channel_post:
        return
    msg = update.effective_message
    if msg.chat_id < 0 and 'source</a>' in msg.text_html_urled:
        return
    if msg.chat.username == 'web_record':
        if (matchKey(msg.text_markdown,
                     ['twitter', 'weibo', 'douban', 't.me/'])
                and not matchKey(msg.text_markdown, ['article', 'note'])):
            return
    try:
        tmp_msg_1 = msg.chat.send_message('received')
    except:
        return
    error = ''
    result = []
    try:
        result = list(exportImp(msg))
        if str(msg.chat.id) in remove_origin._db.items:
            tryDelete(msg)
    except Exception as e:
        tmp_msg_2 = msg.chat.send_message(str(e))
        autoDestroy(tmp_msg_2, 0.05)
        error = ' error: ' + str(e)
    finally:
        info_log.send_message(getBasicLog(msg) + error + ' result: ' +
                              ' '.join(result),
                              parse_mode='html',
                              disable_web_page_preview=True)
        tmp_msg_1.delete()
Exemple #6
0
def command(update, context):
	msg = update.message
	if matchKey(msg.text, ['auth', 'token']):
		return msgTelegraphToken(msg)
	if matchKey(msg.text, ['toggle', 'source']):
		return toggleSourceLink(msg)
	if msg.chat_id > 0:
		msg.reply_text(help_message)
def commandInternal(msg):
	command, text = splitCommand(msg.text)
	if matchKey(command, ['/abl', '/d_ba', 'blocklist_ba']):
		return blocklist.add(text)
	if matchKey(command, ['/d_br', 'blocklist_br']):
		return blocklist.remove(text)
	if matchKey(command, ['/d_bl', 'blocklist_list']):
		return 'blocklist:\n' + '\n'.join(blocklist.items())
def shouldSend(card):
    if matchKey(str(card), db.whitelist.items):
        return True
    if matchKey(str(card), db.blacklist.items):
        return False
    if matchKey(str(card), db.preferlist.items):
        return getCount(card.get('mblog')) > 300
    if matchKey(str(card), db.popularlist.items):
        return getCount(card.get('mblog')) > 10000
    return getCount(card.get('mblog')) > 1000
Exemple #9
0
def shouldFlipFirst(key):
    channel = key.split('/')[0]
    if channels.get(channel) == -1:
        return False
    if 0 <= channels.get(channel) <= 2:
        return True
    if len(index.get(key)) < 20 and not matchKey(index.get(key),
                                                 ['hasFile', 'hasLink']):
        return False
    return not matchKey(index.get(key), blocklist.items())
Exemple #10
0
def command(update, context):
    msg = update.message or update.channel_post
    if matchKey(msg.text, ['auth', 'token']):
        return msgTelegraphToken(msg)
    if matchKey(msg.text, ['source', 'tnsl', 'toggle_no_source_link']):
        return toggleSourceLink(msg)
    if matchKey(msg.text, ['origin', 'trmo', 'toggle_remove_origin']):
        return toggleRemoveOrigin(msg)
    if msg.chat_id > 0:
        msg.reply_text(help_message)
def command(update, context):
    msg = update.message
    if matchKey(msg.text, ['auth', 'token']):
        return get_telegraph_token(msg)
    if matchKey(msg.text, ['source']):
        return switch_source_flag(msg)
    if matchKey(msg.text, ['simplify']):
        return switch_simplify_flag(msg)
    if msg.chat_id > 0:  # from private
        msg.reply_text(help_message)
Exemple #12
0
def getUrl(msg):
	if matchKey(msg.text_html_urled, ['source</a>']):
		return
	if (matchKey(msg.text_html_urled, 
			['mp.weixin.qq.com', 'telegra.ph']) 
			and msg.chat.username == 'web_record'):
		return
	soup = BeautifulSoup(msg.text_html_urled, 'html.parser')
	for item in soup.find_all('a'):
		if 'http' in item.get('href'):
			return item.get('href')
Exemple #13
0
def wantSee(item, page):
	if matchKey(str(item), ['people/gyz', '4898454']):
		return True
	if matchKey(str(item), blocklist.items()):
		return False
	require = 120 + page
	if 'people/renjiananhuo' in str(item.parent):
		require *= 4 # 这人太火,发什么都有人点赞。。。
	if sum(list(dataCount(item))[:3]) > require:
		return True
	return False
Exemple #14
0
def getDoubanId(link):
    if not matchKey(link, ['note', 'group/topic', 'status', 'album']):
        return
    if matchKey(link, ['notes', 'statuses']):
        return
    if 'http' not in link:
        return
    parts = link.split('/')
    for part in parts[:-1]:
        try:
            int(part)
            return part
        except:
            ...
Exemple #15
0
def getCnLink(link):
    link = getRawLink(link)
    if not link:
        return
    if not matchKey(
            link,
        ['douban.', 'thepaper', 'weixin', 'zhihu.', 'cnpoliti', 'weibo']):
        return
    link = clearUrl(link)
    # getTitle by default will cache
    if matchKey(export_to_telegraph.getTitle(link),
                ['链接已过期', '仅限男生', '男生350分', '常玮平', '【来自投稿】20世纪初', '做了套打拳入门']):
        return False
    return link
Exemple #16
0
def _findOrgName(soup):
    head = str(soup.find('head'))
    if matchKey(head, ['bbc.com']):
        return 'BBC', True
    if matchKey(head, ['nyt.com', 'new york times']):
        return 'NYT', True
    if matchKey(head, ['stackoverflow']):
        return 'StackOverflow', False
    if matchKey(head, ['medium.com']):
        return 'Medium', False
    if matchKey(head, ['dw.come']):
        return 'DW', True
    r = _findPossibleRawContent(_yieldPossibleOrgItem(soup))
    if r:
        return r, False
    return 'Source', False
Exemple #17
0
def log(url, card, key, channels, sent):
    if weiboo.getCount(card) < 20:
        return
    whash = weiboo.getHash(card)
    if not log_existing.add(whash):
        return
    additional_info = weibo_2_album.getAdditionalInfo(card['mblog'])
    if additional_info:
        additional_info += ' '
    disable_web_page_preview = not matchKey(additional_info,
                                            ['imgs:', 'video:'])
    if sent:
        sent = ' weibo_bot_sent'
    else:
        sent = ''
    if set([channel.id for channel in channels]) & core_channels_ids:
        mark = ''
    else:
        mark = ' weibo_channel_ignore'
    message = '%s\n\n%skey: %s channel_id: %s %s%s%s %s <a href="%s">source</a>' % (
        weibo_2_album.getCap(card['mblog']), additional_info, key, ' '.join([
            str(channel.id) for channel in channels
        ]), getChannelsLog(channels), sent, mark, url, url)
    try:
        logger.send_message(message,
                            parse_mode='html',
                            disable_web_page_preview=disable_web_page_preview)
    except Exception as e:
        print('log failed', str(e), message)
    time.sleep(5)
Exemple #18
0
def cleanupCap(text):
    text = getPrintableForProd(text).strip()
    soup = BeautifulSoup(text, features="html.parser")
    result = []
    for item in soup:
        if item.name == 'br':
            result.append('\n')
            continue
        if item.name == None:
            result.append(str(item))
            continue
        if item.name == 'a':
            link = item.get('href')
            if urlShouldRemove(link):
                continue
            if (not link or matchKey(
                    link, ['weibo.cn/p', 'weibo.cn/search', 'weibo.com/show'])
                    or item.text[:1] == '@'):
                result.append(item.text)
                continue
            result.append('<a href="%s">%s</a>' % (link, item.text))
    text = ''.join(result).strip()
    text = text.replace('\n', '\n\n')
    for _ in range(5):
        text = text.replace('\n\n\n', '\n\n')
    return text.strip()
Exemple #19
0
def handleCommand(update, context):
    global ban
    usr = update.effective_user
    msg = update.effective_message
    msg.forward(debug_group.id)
    usr = usr.username
    command, text = splitCommand(msg.text)
    if matchKey(command, ['get', 'search']):
        keys = text.split()
        usrs = [x for x in db.usrs() if matchAll(db.getRaw(x), keys)]
        usrs = [x for x in usrs if x != usr and x not in ban]
        random.shuffle(usrs)
        if usr != test_usr:
            usrs = usrs[:LIMIT]
        if not usrs:
            return msg.reply_text(strings['e4'])
        for x in usrs:
            sendUsr(x, msg)
        return
    if not usr:
        return msg.reply_text(strings['e0'])
    if 'start' in command:
        msg.reply_text(strings['h1'])
        return askNext(usr, msg)
    if 'question' in command:
        for q in db.questions:
            msg.reply_text(strings['q' + q])
        return
    if 'update' in command:
        db.save(usr, 'key', text)
    if not checkProfileFinish(usr, msg):
        return
    if 'preview' in command:
        sendUsr(usr, msg)
    return msg.reply_text(strings['h2'])
Exemple #20
0
def processSite(site):
	try:
		links = link_extractor.getLinks(site)
	except Exception as e:
		print('web_bot, getLinks fail', str(e), site)
		return
	count = 0
	if 'douban' in site and 'people' not in site:
		limit = 1 # may change later
	elif matchKey(site, ['tempo', 'kompas', 'nature.com']):
		limit = 1
	else:
		limit = 20
	for link in links:
		if db.existing.contain(link):
			continue
		title = ''.join(export_to_telegraph.getTitle(link).split())
		if db.existing.contain(title):
			continue
		success = sendLink(site, link)
		db.existing.add(link)
		db.existing.add(title)
		count += 1
		if (not success) or count >= limit:
			return
Exemple #21
0
def _isOffTopic(attrs):
    if not attrs:
        return
    r = []
    for k, v in attrs.items():
        if k != 'data-component' and matchKey(
                k, ['href', 'src', 'url', 'alt', 'data', 'xmlns:fb']):
            continue
        r.append(str(k) + ' : ' + str(v))
    r = '\n'.join(r)
    for att in OFFTOPIC_ATT:
        if att in r:
            return att
    for att in OFFTOPIC_ATT_WITH_EXCEPTION:
        if att in r and not matchKey(r, OFFTOPIC_ATT_WITH_EXCEPTION[att]):
            return att
    return
Exemple #22
0
def getShortLink(link):
    if matchKey(link, ['weibo.', 'twitter.', 't.me/']):
        return
    raw_link = getRawLink(link)
    if isCN(export_to_telegraph.getTitle(raw_link)):
        return shorter(raw_link, link)
    if isCN(export_to_telegraph.getTitle(link)):
        return link
Exemple #23
0
def getVideo(b):
	for video in b.find_all('video'):
		if not video.parent or not video.parent.parent:
			continue
		wrapper = video.parent.parent
		if not matchKey(str(wrapper.get('id')), ['video_info']):
			continue
		return video['src']
def search(keywords):
    pool = Pool()
    while pool.pool:
        for name, pos in pool.items():
            if matchKey(getSoup(name, pos).text, keywords):
                print(getRootUrl(name, post))
            next_pos = getNextPos(name, pos)
            pool.update(name, next_pos)
 def shouldKick(self, user):
     if (self.kick_if_name_longer_than and 
         len(user.first_name or '') + len(user.last_name or '') > 
             self.kick_if_name_longer_than):
         return True
     if matchKey(getDisplayUser(user), self.kick_if_name_contains):
         return True
     return False
def remindIfNecessary(msg):
    if not msg.text:
        return
    if matchKey(msg.text,
                better_avoid_words) and not matchKey(msg.text, quotes):
        reminder = '建议避免使用带有强烈主观判断的词哦,比如:' + ', '.join(better_avoid_words) + \
         '。 谢谢啦!'
        autoDestroy(msg.reply_text(reminder), 10)
    emotional_words = ['意淫', '凭什么']
    if matchKey(
            msg.text,
            emotional_words) or msg.text.count('?') + msg.text.count('?') >= 3:
        reminder = '反问,反讽不利于友好交流哦,建议您换成大家更容易理解的表达哦。谢谢啦!'
        autoDestroy(msg.reply_text(reminder), 10)
    attacking_words = ['太low']
    if matchKey(msg.text, attacking_words):
        reminder = '请友好交流,争取互相理解。谢谢啦!'
        autoDestroy(msg.reply_text(reminder), 10)
def decorate(text):
    if 'http' not in text:
        text = 'https://' + text
    text = getRawLink(text)
    if matchKey(text, [
            'www.douban.com/people/', 'twitter.com', 'facebook.com',
            'm.weibo.cn'
    ]):
        return '\n\n' + text
    return '\n\n【%s】 %s' % (export_to_telegraph.getTitle(text), text)
Exemple #28
0
def getSrc(img):
	src = img.get('src') and img.get('src').strip()
	if not src:
		return 
	if not img.parent or not img.parent.parent:
		return 
	wrapper = img.parent.parent
	if matchKey(str(wrapper.get('class')) or '', ['f-m-img', 'group-pic']):
		return src
	return
Exemple #29
0
def getMatches(text):
    if not text:
        return []
    if matchKey(text, ['/', '@']) or isInt(text):
        user_id = getUserId(text)
        if user_id and isInt(text):
            return [user_id]
        if user_id:
            return [user_id, text]
    return [text]
def handleCommand(update, context, dbs):
    msg = update.effective_message
    autoDestroy(msg, 0.1)
    if msg.from_user and matchKey(msg.from_user.first_name, ['telegram']):
        # don't deal with group message auto forwarded linked channel
        return
    command, text = splitCommand(msg.text)
    if 's3_l' in command:
        subscriptions = dbs.getList(msg.chat_id)
        subscriptions = [str(index) + ': ' + \
            formatChat(context.bot, x['id']) for \
            index, x in enumerate(subscriptions)]
        r = msg.reply_text('subscription list: \n\n' +
                           '\n'.join(subscriptions),
                           quote=False,
                           parse_mode='Markdown',
                           disable_web_page_preview=True)
        autoDestroy(r)
        return
    if 's3_un' in command:
        try:
            index = int(text)
        except:
            r = msg.reply_text('please give index')
            autoDestroy(r)
            return
        r = dbs.deleteIndex(msg.chat_id, index)
        autoDestroy(msg.reply_text(r, quote=False))
        return
    if 's3_s' in command:
        chat = getChat(context.bot, text)
        if not chat:
            return
        r = dbs.add(msg.chat, chat)
        autoDestroy(msg.reply_text(r, quote=False))
        return
    if not msg.from_user or msg.from_user.id != debug_group.id:
        return
    if 'repeat' in command:
        msg.bot.send_message(msg.chat.id,
                             msg.reply_to_message.text_markdown,
                             parse_mode='Markdown',
                             disable_web_page_preview=True)
    # guard this feature
    # if 'all' in command:
    #     sendAll(msg, dbs)
    #     return
    if 'delete' in command:
        to_delete = msg.reply_to_message
        key = (to_delete.chat_id, to_delete.message_id)
        for r in forward_all_record[key]:
            try:
                r.delete()
            except:
                pass