def test_search_urls_exclusion_char(): # assert url is excluded urls = list(search_urls('!http://example.com', exclusion_char='!')) assert not urls, 'Must not find URL, found %d' % len(urls) # assert the other url is not excluded urls = list(search_urls('http://b.com !http://a.com', exclusion_char='!')) assert len(urls) == 1, 'Must find 1 URL, found %d' % len(urls) assert 'http://b.com' in urls # assert the order of appearance does not matter urls = list(search_urls('!http://a.com http://b.com', exclusion_char='!')) assert len(urls) == 1, 'Must find 1 URL, found %d' % len(urls) assert 'http://b.com' in urls
def title_command(bot, trigger): """ Show the title or URL information for the given URL, or the last URL seen in this channel. """ if not trigger.group(2): if trigger.sender not in bot.memory['last_seen_url']: return matched = check_callbacks( bot, bot.memory['last_seen_url'][trigger.sender]) if matched: return else: urls = [bot.memory['last_seen_url'][trigger.sender]] else: urls = web.search_urls( trigger, exclusion_char=bot.config.url.exclusion_char) for url, title, domain, tinyurl in process_urls(bot, trigger, urls): message = '[ %s ] - %s' % (title, domain) if tinyurl: message += ' ( %s )' % tinyurl bot.reply(message) bot.memory['last_seen_url'][trigger.sender] = url
def title_auto(bot, trigger): """ Automatically show titles for URLs. For shortened URLs/redirects, find where the URL redirects to and show the title for that (or call a function from another module to give more information). """ if re.match(bot.config.core.prefix + 'title', trigger): return # Avoid fetching known malicious links if 'safety_cache' in bot.memory and trigger in bot.memory['safety_cache']: if bot.memory['safety_cache'][trigger]['positives'] > 1: return urls = web.search_urls(trigger, exclusion_char=bot.config.url.exclusion_char, clean=True) for url, title, domain, tinyurl in process_urls(bot, trigger, urls): message = '[ %s ] - %s' % (title, domain) if tinyurl: message += ' ( %s )' % tinyurl # Guard against responding to other instances of this bot. if message != trigger: bot.say(message) bot.memory['last_seen_url'][trigger.sender] = url
def test_search_urls_exclusion_char_only_once(): # assert only the instance excluded is excluded # ie. that it is not a global exclude, otherwise that would return 1 url urls = list( search_urls('!http://a.com http://a.com http://b.com', exclusion_char='!')) assert len(urls) == 2, 'Must find 1 URL, found %d' % len(urls) assert 'http://a.com' in urls assert 'http://b.com' in urls
def test_search_urls_multiple_urls_unique_keep_ordering(): urls = list( search_urls('http://a.com/ http://c.com/ http://b.com/ http://a.com/')) assert len(urls) == 3, 'Must find 3 URLs, found %d' % len(urls) assert 'http://a.com/' in urls assert 'http://b.com/' in urls assert 'http://c.com/' in urls assert urls == [ 'http://a.com/', 'http://c.com/', 'http://b.com/', ]
def test_search_urls_defined_schemes(scheme): expected = { 'http': 'http://a.com', 'https': 'https://c.com', 'ftp': 'ftp://b.com', 'steam': 'steam://portal2', }.get(scheme) urls = list( search_urls('http://a.com ftp://b.com https://c.com steam://portal2', schemes=[scheme])) assert len(urls) == 1, 'Only %s URLs must be found' % scheme assert expected in urls
def handle_url_callbacks(bot, trigger): """Dispatch callbacks on URLs For each URL found in the trigger, trigger the URL callback registered by the ``@url`` decorator. """ schemes = bot.config.core.auto_url_schemes # find URLs in the trigger for url in web.search_urls(trigger, schemes=schemes): # find callbacks for said URL for function, match in bot.search_url_callbacks(url): # trigger callback defined by the `@url` decorator if hasattr(function, 'url_regex'): function(bot, trigger, match=match)
def handle_url_callbacks(bot, trigger): """Dispatch callbacks on URLs For each URL found in the trigger, trigger the URL callback registered by the ``@url`` decorator. """ schemes = bot.config.core.auto_url_schemes # find URLs in the trigger for url in web.search_urls(trigger, schemes=schemes): # find callbacks for said URL for function, match in bot.search_url_callbacks(url): # trigger callback defined by the `@url` decorator if hasattr(function, 'url_regex'): # bake the `match` argument in before passing the callback on @functools.wraps(function) def decorated(bot, trigger): return function(bot, trigger, match=match) bot.call(decorated, bot, trigger)
def title_command(bot, trigger): """ Show the title or URL information for the given URL, or the last URL seen in this channel. """ if not trigger.group(2): if trigger.sender not in bot.memory['last_seen_url']: return matched = check_callbacks(bot, bot.memory['last_seen_url'][trigger.sender]) if matched: return else: urls = [bot.memory['last_seen_url'][trigger.sender]] else: urls = list( # needs to be a list so len() can be checked later web.search_urls(trigger, exclusion_char=bot.config.url.exclusion_char)) result_count = 0 for url, title, domain, tinyurl in process_urls(bot, trigger, urls): message = '%s | %s' % (title, domain) if tinyurl: message += ' ( %s )' % tinyurl bot.reply(message) bot.memory['last_seen_url'][trigger.sender] = url result_count += 1 expected_count = len(urls) if result_count < expected_count: if expected_count == 1: bot.reply( "Sorry, fetching that title failed. Make sure the site is working." ) elif result_count == 0: bot.reply("Sorry, I couldn't fetch titles for any of those.") else: bot.reply( "I couldn't get all of the titles, but I fetched what I could!" )
def title_command(bot: SopelWrapper, trigger: Trigger): """ Show the title or URL information for the given URL, or the last URL seen in this channel. """ result_count = 0 if not trigger.group(2): if trigger.sender not in bot.memory['last_seen_url']: return urls = [bot.memory["last_seen_url"][trigger.sender]] else: # needs to be a list so len() can be checked later urls = list(web.search_urls(trigger)) for url, title, domain, tinyurl, dispatched in process_urls( bot, trigger, urls, requested=True): if dispatched: result_count += 1 continue message = "%s | %s" % (title, domain) if tinyurl: message += ' ( %s )' % tinyurl bot.reply(message) bot.memory['last_seen_url'][trigger.sender] = url result_count += 1 expected_count = len(urls) if result_count < expected_count: if expected_count == 1: bot.reply( "Sorry, fetching that title failed. Make sure the site is working." ) elif result_count == 0: bot.reply("Sorry, I couldn't fetch titles for any of those.") else: bot.reply( "I couldn't get all of the titles, but I fetched what I could!" )
def title_auto(bot: SopelWrapper, trigger: Trigger): """ Automatically show titles for URLs. For shortened URLs/redirects, find where the URL redirects to and show the title for that (or call a function from another plugin to give more information). """ # Enabled or disabled by feature flag if not bot.settings.url.enable_auto_title: return # Avoid fetching links from another command if re.match(bot.config.core.prefix + r'\S+', trigger): return unchecked_urls = web.search_urls( trigger, exclusion_char=bot.config.url.exclusion_char, clean=True) urls = [] safety_cache = bot.memory.get("safety_cache", {}) safety_cache_local = bot.memory.get("safety_cache_local", {}) for url in unchecked_urls: # Avoid fetching known malicious links if url in safety_cache and safety_cache[url]["positives"] > 0: continue if urlparse(url).hostname.lower() in safety_cache_local: continue urls.append(url) for url, title, domain, tinyurl, dispatched in process_urls( bot, trigger, urls): if not dispatched: message = '%s | %s' % (title, domain) if tinyurl: message += ' ( %s )' % tinyurl # Guard against responding to other instances of this bot. if message != trigger: bot.say(message) bot.memory["last_seen_url"][trigger.sender] = url
def __init__(self, own_nick, line, url_schemes=None): line = line.strip('\r\n') self.line = line self.urls = tuple() self.plain = '' # Break off IRCv3 message tags, if present self.tags = {} if line.startswith('@'): tagstring, line = line.split(' ', 1) for tag in tagstring[1:].split(';'): tag = tag.split('=', 1) if len(tag) > 1: self.tags[tag[0]] = tag[1] else: self.tags[tag[0]] = None self.time = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc) if 'time' in self.tags: try: self.time = datetime.datetime.strptime( self.tags['time'], "%Y-%m-%dT%H:%M:%S.%fZ", ).replace(tzinfo=datetime.timezone.utc) except ValueError: pass # Server isn't conforming to spec, ignore the server-time # Grabs hostmask from line. # Example: line = ':Sopel!foo@bar PRIVMSG #sopel :foobar!' # print(hostmask) # Sopel!foo@bar # All lines start with ":" except PING. if line.startswith(':'): self.hostmask, line = line[1:].split(' ', 1) else: self.hostmask = None # Parses the line into a list of arguments. # Some events like MODE don't have a secondary string argument, i.e. no ' :' inside the line. # Example 1: line = ':nick!ident@domain PRIVMSG #sopel :foo bar!' # print(text) # 'foo bar!' # print(argstr) # ':nick!ident@domain PRIVMSG #sopel' # print(args) # [':nick!ident@domain', 'PRIVMSG', '#sopel', 'foo bar!'] # Example 2: line = 'irc.libera.chat MODE Sopel +i' # print(text) # '+i' # print(args) # ['irc.libera.chat', 'MODE', 'Sopel', '+i'] if ' :' in line: argstr, self.text = line.split(' :', 1) self.args = argstr.split(' ') self.args.append(self.text) else: self.args = line.split(' ') self.text = self.args[-1] self.event = self.args[0] self.args = self.args[1:] components = PreTrigger.component_regex.match(self.hostmask or '').groups() self.nick, self.user, self.host = components self.nick = tools.Identifier(self.nick) # If we have arguments, the first one is the sender # Unless it's a QUIT event if self.args and self.event != 'QUIT': target = tools.Identifier(self.args[0]) else: target = None # Unless we're messaging the bot directly, in which case that second # arg will be our bot's name. if target and target.lower() == own_nick.lower(): target = self.nick self.sender = target # Parse CTCP into a form consistent with IRCv3 intents if self.event == 'PRIVMSG' or self.event == 'NOTICE': intent_match = PreTrigger.intent_regex.match(self.args[-1]) if intent_match: intent, message = intent_match.groups() self.tags['intent'] = intent self.args[-1] = message or '' # Search URLs after CTCP parsing self.urls = tuple( web.search_urls(self.args[-1], schemes=url_schemes)) # Populate account from extended-join messages if self.event == 'JOIN' and len(self.args) == 3: # Account is the second arg `...JOIN #Sopel account :realname` self.tags['account'] = self.args[1] # get plain text message if self.args: self.plain = formatting.plain(self.args[-1])
def test_search_urls(): urls = list(search_urls('http://example.com')) assert len(urls) == 1, 'Must find 1 URL, found %d' % len(urls) assert 'http://example.com' in urls
def test_search_urls_default_schemes(): urls = list(search_urls('http://a.com ftp://b.com https://c.com')) assert len(urls) == 3, 'Must find all three URLs' assert 'http://a.com' in urls assert 'ftp://b.com' in urls assert 'https://c.com' in urls
def test_search_urls_with_text(): urls = list(search_urls('before http://example.com after')) assert len(urls) == 1, 'Must find 1 URL, found %d' % len(urls) assert 'http://example.com' in urls
def test_search_urls_exclusion_char_with_text(): urls = list( search_urls('before !http://a.com between http://b.com after', exclusion_char='!')) assert len(urls) == 1, 'Must find 1 URL, found %d' % len(urls) assert 'http://b.com' in urls
def __init__( self, own_nick: identifiers.Identifier, line: str, url_schemes: Optional[Sequence] = None, identifier_factory: IdentifierFactory = identifiers.Identifier, ): self.make_identifier = identifier_factory line = line.strip('\r\n') self.line: str = line self.urls: Tuple[str, ...] = tuple() self.plain: str = '' self.ctcp: Optional[str] = None # Break off IRCv3 message tags, if present self.tags: Dict[str, Optional[str]] = {} if line.startswith('@'): tagstring, line = line.split(' ', 1) for raw_tag in tagstring[1:].split(';'): tag = raw_tag.split('=', 1) if len(tag) > 1: self.tags[tag[0]] = tag[1] else: self.tags[tag[0]] = None # Client time or server time self.time = datetime.datetime.utcnow().replace( tzinfo=datetime.timezone.utc ) if 'time' in self.tags: # ensure "time" is a string (typecheck) tag_time = self.tags['time'] or '' try: self.time = datetime.datetime.strptime( tag_time, "%Y-%m-%dT%H:%M:%S.%fZ", ).replace(tzinfo=datetime.timezone.utc) except ValueError: pass # Server isn't conforming to spec, ignore the server-time # Grabs hostmask from line. # Example: line = ':Sopel!foo@bar PRIVMSG #sopel :foobar!' # print(hostmask) # Sopel!foo@bar # All lines start with ":" except PING. self.hostmask: Optional[str] if line.startswith(':'): self.hostmask, line = line[1:].split(' ', 1) else: self.hostmask = None # Parses the line into a list of arguments. # Some events like MODE don't have a secondary string argument, i.e. no ' :' inside the line. # Example 1: line = ':nick!ident@domain PRIVMSG #sopel :foo bar!' # print(text) # 'foo bar!' # print(argstr) # ':nick!ident@domain PRIVMSG #sopel' # print(args) # [':nick!ident@domain', 'PRIVMSG', '#sopel', 'foo bar!'] # Example 2: line = 'irc.libera.chat MODE Sopel +i' # print(text) # '+i' # print(args) # ['irc.libera.chat', 'MODE', 'Sopel', '+i'] if ' :' in line: argstr, self.text = line.split(' :', 1) self.args = argstr.split(' ') self.args.append(self.text) else: self.args = line.split(' ') self.text = self.args[-1] self.event = self.args[0] self.args = self.args[1:] # The regex will always match any string, even an empty one components_match = cast( Match, PreTrigger.component_regex.match(self.hostmask or '')) nick, self.user, self.host = components_match.groups() self.nick: identifiers.Identifier = self.make_identifier(nick) # If we have arguments, the first one is the sender # Unless it's a QUIT event target: Optional[identifiers.Identifier] = None if self.args and self.event != 'QUIT': target = self.make_identifier(self.args[0]) # Unless we're messaging the bot directly, in which case that # second arg will be our bot's name. if target.lower() == own_nick.lower(): target = self.nick self.sender = target # Parse CTCP if self.event == 'PRIVMSG' or self.event == 'NOTICE': ctcp_match = PreTrigger.ctcp_regex.match(self.args[-1]) if ctcp_match is not None: ctcp, message = ctcp_match.groups() self.ctcp = ctcp self.args[-1] = message or '' # Search URLs after CTCP parsing self.urls = tuple( web.search_urls(self.args[-1], schemes=url_schemes)) # Populate account from extended-join messages if self.event == 'JOIN' and len(self.args) == 3: # Account is the second arg `...JOIN #Sopel account :realname` self.tags['account'] = self.args[1] # get plain text message if self.args: self.plain = formatting.plain(self.args[-1])
def test_search_urls_multiple_urls_with_text(): urls = list( search_urls('before http://a.com/ between http://b.com/ after')) assert len(urls) == 2, 'Must find 2 URLs, found %d' % len(urls) assert 'http://a.com/' in urls assert 'http://b.com/' in urls
def test_search_urls_multiple_urls_unique(): urls = list(search_urls('http://a.com/ http://b.com/ http://a.com/')) assert len(urls) == 2, 'Must find 2 URLs, found %d' % len(urls) assert 'http://a.com/' in urls assert 'http://b.com/' in urls