def __init__(self, media_tweet: TweetCache, trigger: str = TRIGGER_SELF): self._log = logging.getLogger(__name__) self._trigger = trigger self.tweet_cache = media_tweet self.media = TweetManager.extract_media(media_tweet.tweet) or [] self._downloads_enabled = config.getboolean('SauceNao', 'download_files', fallback=False) self._previews_enabled = config.getboolean('TraceMoe', 'enabled', fallback=False) # SauceNao self.minsim_mentioned = float(config.get('SauceNao', 'min_similarity_mentioned', fallback=50.0)) self.minsim_monitored = float(config.get('SauceNao', 'min_similarity_monitored', fallback=65.0)) self.minsim_searching = float(config.get('SauceNao', 'min_similarity_searching', fallback=70.0)) self.persistent = config.getboolean('Twitter', 'enable_persistence', fallback=False) self.anime_link = config.get('SauceNao', 'source_link', fallback='anidb').lower() self.sauce = SauceNao( api_key=config.get('SauceNao', 'api_key', fallback=None), min_similarity=min(self.minsim_mentioned, self.minsim_monitored, self.minsim_searching), priority=[21, 22, 5, 37, 25] ) # Twython self.twython = Twython(config.get('Twitter', 'consumer_key'), config.get('Twitter', 'consumer_secret'), config.get('Twitter', 'access_token'), config.get('Twitter', 'access_secret')) self._sauce_cache = {}
def __init__(self): self.log = logging.getLogger(__name__) # Tweet Cache Manager self.twitter = TweetManager() self.twython = Twython(config.get('Twitter', 'consumer_key'), config.get('Twitter', 'consumer_secret'), config.get('Twitter', 'access_token'), config.get('Twitter', 'access_secret')) self.anime_link = config.get('SauceNao', 'source_link', fallback='anidb').lower() self.nsfw_previews = config.getboolean('TraceMoe', 'nsfw_previews', fallback=False) self.failed_responses = config.getboolean('SauceNao', 'respond_to_failed', fallback=True) self.ignored_indexes = [ int(i) for i in config.get( 'SauceNao', 'ignored_indexes', fallback='').split(',') ] # Pixiv self.pixiv = Pixiv() # Cache some information about ourselves self.my = api.me() self.log.info(f"Connected as: {self.my.screen_name}") # Image URL's are md5 hashed and cached here to prevent duplicate API queries. This is cleared every 24-hours. # I'll update this in the future to use a real caching mechanism (database or redis) self._cached_results = {} # A cached list of ID's for parent posts we've already processed # Used in the check_monitored() method to prevent re-posting sauces when posts are re-tweeted self._posts_processed = [] # The ID cutoff, we populate this once via an initial query at startup try: self.mention_id = tweepy.Cursor(api.mentions_timeline, tweet_mode='extended', count=1).items(1).next().id except StopIteration: self.mention_id = 0 try: self.self_id = tweepy.Cursor(api.user_timeline, tweet_mode='extended', count=1).items(1).next().id except StopIteration: self.self_id = 0 self.monitored_since = {}
async def main() -> None: """ Initialize / gather the methods to run in concurrent loops Returns: None """ tasks = [] if config.getboolean('Twitter', 'monitor_self', fallback=False): tasks.append(self()) if not config.getboolean('Twitter', 'disable_mentions', fallback=False): tasks.append(mentions()) tasks.append(monitored()) tasks.append(cleanup()) await asyncio.gather(*tasks)
def __init__(self): self.enabled = config.getboolean('Pixiv', 'enabled', fallback=False) self._refresh_token = config.get('Pixiv', 'refresh_token', fallback=None) self._log = logging.getLogger(__name__) self._pixiv = AppPixivAPI() self._pixiv.set_accept_language(config.get('Pixiv', 'language', fallback='en-US')) self._re_twitter = re.compile(r'^https?://(www.)?twitter.com/(?P<username>.+)$') if self.enabled: self._login()
async def send_reply(self, tweet_cache: TweetCache, media_cache: TweetCache, sauce_cache: TweetSauceCache, requested: bool = True, blocked: bool = False) -> None: """ Return the source of the image Args: tweet_cache (TweetCache): The tweet to reply to media_cache (TweetCache): The tweet containing media elements sauce_cache (Optional[GenericSource]): The sauce found (or None if nothing was found) requested (bool): True if the lookup was requested, or False if this is a monitored user account blocked (bool): If True, the account posting this has blocked the SauceBot Returns: None """ tweet = tweet_cache.tweet sauce = sauce_cache.sauce if sauce and self.ignored_indexes and (int(sauce.index_id) in self.ignored_indexes): self.log.info( f"Ignoring result from ignored index ID {sauce.index_id}") sauce = None if sauce is None: if self.failed_responses and requested: media = TweetManager.extract_media(media_cache.tweet) if not media: return yandex_url = f"https://yandex.com/images/search?url={media[sauce_cache.index_no]}&rpt=imageview" ascii_url = f"https://ascii2d.net/search/url/{media[sauce_cache.index_no]}" google_url = f"https://www.google.com/searchbyimage?image_url={media[sauce_cache.index_no]}&safe=off" message = lang('Errors', 'no_results', { 'yandex_url': yandex_url, 'ascii_url': ascii_url, 'google_url': google_url }, user=tweet.author) self._post(msg=message, to=tweet.id) return # Get the artists Twitter handle if possible twitter_sauce = None if isinstance(sauce, PixivSource): twitter_sauce = self.pixiv.get_author_twitter( sauce.data['member_id']) # If we're requesting sauce from the original artist, just say so if twitter_sauce and twitter_sauce.lstrip( '@').lower() == media_cache.tweet.author.screen_name.lower(): self.log.info( "User requested sauce from a post by the original artist") message = lang('Errors', 'sauced_the_artist') self._post(message, to=tweet.id) return # Lines with priority attributes incase we need to shorten them lines = [] # Add additional sauce URL's if available sauce_urls = [] if isinstance(sauce, AnimeSource): await sauce.load_ids() if self.anime_link in ['myanimelist', 'animal', 'all' ] and sauce.mal_url: sauce_urls.append(sauce.mal_url) if self.anime_link in ['anilist', 'animal', 'all' ] and sauce.anilist_url: sauce_urls.append(sauce.anilist_url) if self.anime_link in ['anidb', 'all']: sauce_urls.append(sauce.url) # Only add Twitter source URL's for booru's, otherwise we may link to something that angers the Twitter gods if isinstance(sauce, BooruSource): for url in sauce.urls: if 'twitter.com' in url: sauce_urls.append(url) if 'twitter.com' in sauce.source_url: sauce_urls.append(sauce.source_url) # For limiting the length of the title/author _repr = reprlib.Repr() _repr.maxstring = 32 # H-Misc doesn't have a source to link to, so we need to try and provide the full title if sauce.index not in ['H-Misc', 'E-Hentai', 'H-Anime']: title = _repr.repr(sauce.title).strip("'") else: _repr.maxstring = 128 title = _repr.repr(sauce.title).strip("'") # Format the similarity string similarity = lang('Accuracy', 'prefix', {'similarity': sauce.similarity}) if sauce.similarity >= 95: similarity = similarity + " " + lang('Accuracy', 'exact') elif sauce.similarity >= 85.0: similarity = similarity + " " + lang('Accuracy', 'high') elif sauce.similarity >= 70.0: similarity = similarity + " " + lang('Accuracy', 'medium') elif sauce.similarity >= 60.0: similarity = similarity + " " + lang('Accuracy', 'low') else: similarity = similarity + " " + lang('Accuracy', 'very_low') if requested: if sauce.similarity >= 60.0: reply = lang('Results', 'requested_found', {'index': sauce.index}, user=tweet.author) + "\n" lines.append(ReplyLine(reply, 1)) else: reply = lang('Results', 'requested_found_low_accuracy', {'index': sauce.index}, user=tweet.author) + "\n" lines.append(ReplyLine(reply, 1)) else: if sauce.similarity >= 60.0: reply = lang('Results', 'other_found', {'index': sauce.index}, user=tweet.author) + "\n" lines.append(ReplyLine(reply, 1)) else: reply = lang('Results', 'other_found_low_accuracy', {'index': sauce.index}, user=tweet.author) lines.append(ReplyLine(reply, 1)) # If it's a Pixiv source, try and get their Twitter handle (this is considered most important and displayed first) if twitter_sauce: reply = lang('Results', 'twitter', {'twitter': twitter_sauce}) lines.append(ReplyLine(reply, newlines=1)) # Print the author name if available if sauce.author_name: author = _repr.repr(sauce.author_name).strip("'") reply = lang('Results', 'author', {'author': author}) lines.append(ReplyLine(reply, newlines=1)) # Omit the title for Pixiv results since it's usually always non-romanized Japanese and not very helpful if not isinstance(sauce, PixivSource): reply = lang('Results', 'title', {'title': title}) lines.append(ReplyLine(reply, 10, newlines=1)) # Add the episode number and timestamp for video sources if isinstance(sauce, VideoSource) and sauce.episode: reply = lang('Results', 'episode', {'episode': sauce.episode}) if sauce.timestamp: reply += " " + lang('Results', 'timestamp', {'timestamp': sauce.timestamp}) lines.append(ReplyLine(reply, 5, newlines=1)) # Add character and material info for booru results if isinstance(sauce, BooruSource): if sauce.material: reply = lang('Results', 'material', {'material': sauce.material[0].title()}) lines.append(ReplyLine(reply, 5, newlines=1)) if sauce.characters: reply = lang('Results', 'character', {'character': sauce.characters[0].title()}) lines.append(ReplyLine(reply, 4, newlines=1)) # Add the chapter for manga sources if isinstance(sauce, MangaSource) and sauce.chapter: reply = lang('Results', 'chapter', {'chapter': sauce.chapter}) lines.append(ReplyLine(reply, 5, newlines=1)) # Display our confidence rating lines.append(ReplyLine(similarity, 2, newlines=1)) # Source URL's are not available in some indexes if sauce.index not in [ 'H-Misc', 'H-Anime', 'H-Magazines', 'H-Game CG', 'Mangadex' ]: if sauce_urls: reply = "\n".join(sauce_urls) lines.append(ReplyLine(reply, newlines=2)) elif sauce.source_url and not isinstance(sauce, BooruSource): lines.append(ReplyLine(sauce.source_url, newlines=2)) # Try and append bot instructions with monitored posts. This might make our post too long, though. if not requested: promo_footer = lang('Results', 'other_footer') if promo_footer: lines.append(ReplyLine(promo_footer, 0, newlines=2)) elif config.getboolean('System', 'display_patreon'): lines.append( ReplyLine( "Support SauceBot!\nhttps://www.patreon.com/saucebot", 3, newlines=2)) # trace.moe time! Let's get a video preview if sauce_cache.media_id: comment = self._post(msg=lines, to=tweet.id, media_ids=[sauce_cache.media_id]) # This was hentai and we want to avoid uploading hentai clips to this account else: comment = self._post(msg=lines, to=tweet.id) # If we've been blocked by this user and have the artists Twitter handle, send the artist a DMCA guide if blocked and twitter_sauce: self.log.info(f"Sending {twitter_sauce} DMCA takedown advice") message = lang('Errors', 'blocked_dmca', {'twitter_artist': twitter_sauce}) # noinspection PyUnboundLocalVariable self._post(msg=message, to=comment.id)
def __init__(self): self.log = logging.getLogger(__name__) # Tweet Cache Manager self.twitter = TweetManager() self.twython = Twython(config.get('Twitter', 'consumer_key'), config.get('Twitter', 'consumer_secret'), config.get('Twitter', 'access_token'), config.get('Twitter', 'access_secret')) # SauceNao self.minsim_mentioned = float( config.get('SauceNao', 'min_similarity_mentioned', fallback=50.0)) self.minsim_monitored = float( config.get('SauceNao', 'min_similarity_monitored', fallback=65.0)) self.minsim_searching = float( config.get('SauceNao', 'min_similarity_searching', fallback=70.0)) self.persistent = config.getboolean('Twitter', 'enable_persistence', fallback=False) self.anime_link = config.get('SauceNao', 'source_link', fallback='anidb').lower() self.sauce = SauceNao(api_key=config.get('SauceNao', 'api_key', fallback=None), min_similarity=min(self.minsim_mentioned, self.minsim_monitored, self.minsim_searching), priority=[21, 22, 5]) # Trace.moe self.tracemoe = None # type: Optional[ATraceMoe] if config.getboolean('TraceMoe', 'enabled', fallback=False): self.tracemoe = ATraceMoe( config.get('TraceMoe', 'token', fallback=None)) self.nsfw_previews = config.getboolean('TraceMoe', 'nsfw_previews', fallback=False) # Pixiv self.pixiv = Pixiv() # Cache some information about ourselves self.my = api.me() self.log.info(f"Connected as: {self.my.screen_name}") # Image URL's are md5 hashed and cached here to prevent duplicate API queries. This is cleared every 24-hours. # I'll update this in the future to use a real caching mechanism (database or redis) self._cached_results = {} # A cached list of ID's for parent posts we've already processed # Used in the check_monitored() method to prevent re-posting sauces when posts are re-tweeted self._posts_processed = [] # The ID cutoff, we populate this once via an initial query at startup try: self.since_id = tweepy.Cursor(api.mentions_timeline, tweet_mode='extended', count=1).items(1).next().id except StopIteration: self.since_id = 0 self.monitored_since = {}
async def get_sauce( self, tweet_cache: TweetCache, index_no: int = 0, log_index: Optional[str] = None, trigger: str = TRIGGER_MENTION ) -> Tuple[TweetSauceCache, Optional[bytes]]: """ Get the sauce of a media tweet """ log_index = log_index or 'SYSTEM' # Have we cached the sauce already? cache = TweetSauceCache.fetch(tweet_cache.tweet_id, index_no) if cache: return cache, None media = TweetManager.extract_media(tweet_cache.tweet)[index_no] # Execute a Tracemoe search query for anime results async def tracemoe_search(_sauce_results, _path: str, is_url: bool) -> Optional[dict]: if not self.tracemoe: return None if _sauce_results and isinstance(_sauce_results[0], AnimeSource): # noinspection PyBroadException try: _tracemoe_sauce = await self.tracemoe.search(_path, is_url=is_url) except Exception: self.log.warning( f"[{log_index}] Tracemoe returned an exception, aborting search query" ) return None if not _tracemoe_sauce.get('docs'): return None # Make sure our search results match if await _sauce_results[0].load_ids(): if _sauce_results[0].anilist_id != _tracemoe_sauce['docs'][ 0]['anilist_id']: self.log.info( f"[{log_index}] saucenao and trace.moe provided mismatched anilist entries: `{_sauce_results[0].anilist_id}` vs. `{_tracemoe_sauce['docs'][0]['anilist_id']}`" ) return None self.log.info( f'[{log_index}] Downloading video preview for AniList entry {_sauce_results[0].anilist_id} from trace.moe' ) _tracemoe_preview = await self.tracemoe.video_preview_natural( _tracemoe_sauce) _tracemoe_sauce['docs'][0]['preview'] = _tracemoe_preview return _tracemoe_sauce['docs'][0] return None # Look up the sauce try: if config.getboolean('SauceNao', 'download_files', fallback=False): self.log.debug(f"[{log_index}] Downloading image from Twitter") fd, path = tempfile.mkstemp() try: with os.fdopen(fd, 'wb') as tmp: async with aiohttp.ClientSession( raise_for_status=True) as session: try: async with await session.get(media ) as response: image = await response.read() tmp.write(image) if not image: self.log.error( f"[{log_index}] Empty file received from Twitter" ) sauce_cache = TweetSauceCache.set( tweet_cache, index_no=index_no, trigger=trigger) return sauce_cache except aiohttp.ClientResponseError as error: self.log.warning( f"[{log_index}] Twitter returned a {error.status} error when downloading from tweet {tweet_cache.tweet_id}" ) sauce_cache = TweetSauceCache.set( tweet_cache, index_no=index_no, trigger=trigger) return sauce_cache sauce_results = await self.sauce.from_file(path) tracemoe_sauce = await tracemoe_search(sauce_results, _path=path, is_url=False) finally: os.remove(path) else: self.log.debug(f"[{log_index}] Performing remote URL lookup") sauce_results = await self.sauce.from_url(media) tracemoe_sauce = await tracemoe_search(sauce_results, _path=media, is_url=True) if not sauce_results: sauce_cache = TweetSauceCache.set(tweet_cache, sauce_results, index_no, trigger=trigger) return sauce_cache, None except ShortLimitReachedException: self.log.warning( f"[{log_index}] Short API limit reached, throttling for 30 seconds" ) await asyncio.sleep(30.0) return await self.get_sauce(tweet_cache, index_no, log_index) except DailyLimitReachedException: self.log.error( f"[{log_index}] Daily API limit reached, throttling for 15 minutes. Please consider upgrading your API key." ) await asyncio.sleep(900.0) return await self.get_sauce(tweet_cache, index_no, log_index) except SauceNaoException as e: self.log.error(f"[{log_index}] SauceNao exception raised: {e}") sauce_cache = TweetSauceCache.set(tweet_cache, index_no=index_no, trigger=trigger) return sauce_cache, None sauce_cache = TweetSauceCache.set(tweet_cache, sauce_results, index_no, trigger=trigger) return sauce_cache, tracemoe_sauce
# Set up logging import logging import sentry_sdk from twsaucenao.config import config logLevel = getattr(logging, str(config.get('System', 'log_level', fallback='ERROR')).upper()) logFormat = logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S") log = logging.getLogger('twsaucenao') log.setLevel(logLevel) ch = logging.StreamHandler() ch.setLevel(logLevel) ch.setFormatter(logFormat) log.addHandler(ch) # Unless you're running your own custom fork of saucebot, you probably don't need this. if config.has_option('System', 'sentry_logging') and config.getboolean('System', 'sentry_logging'): sentry_sdk.init(config.get('System', 'sentry_dsn'), traces_sample_rate=0.25)
async def get_sauce( self, tweet_cache: TweetCache, index_no: int = 0, log_index: Optional[str] = None, trigger: str = TRIGGER_MENTION ) -> Tuple[TweetSauceCache, Optional[bytes]]: """ Get the sauce of a media tweet """ log_index = log_index or 'SYSTEM' # Have we cached the sauce already? cache = TweetSauceCache.fetch(tweet_cache.tweet_id, index_no) if cache: return cache, None media = TweetManager.extract_media(tweet_cache.tweet)[index_no] # Execute a Tracemoe search query for anime results async def tracemoe_search(_sauce, is_url: bool) -> Optional[dict]: if not self.tracemoe: return None if _sauce.results and _sauce.results[0].index_id in [21, 22]: # noinspection PyBroadException try: _tracemoe_sauce = await self.tracemoe.search(path, is_url=is_url) except Exception: self.log.warning( f"[{log_index}] Tracemoe returned an exception, aborting search query" ) return None if not _tracemoe_sauce.get('docs'): return None # Check for an exactly title match first, then fallback to a similarity check. # Obviously, this is not perfect. Titles don't always match, but sometimes tracemoe returns an accurate # result with a lower similarity, so we just.. try and guess the best we can for now. if _tracemoe_sauce['docs'][0]['similarity'] < 0.85: if _tracemoe_sauce['docs'][0]['title_english'].lower( ) != sauce.results[0].title.lower(): if _tracemoe_sauce['docs'][0]['title_romaji'].lower( ) != sauce.results[0].title.lower(): return None _tracemoe_preview = await self.tracemoe.video_preview_natural( _tracemoe_sauce) _tracemoe_sauce['docs'][0]['preview'] = _tracemoe_preview return _tracemoe_sauce['docs'][0] return None # Look up the sauce try: if config.getboolean('SauceNao', 'download_files', fallback=False): self.log.debug(f"[{log_index}] Downloading image from Twitter") fd, path = tempfile.mkstemp() try: with os.fdopen(fd, 'wb') as tmp: async with aiohttp.ClientSession( raise_for_status=True) as session: try: async with await session.get(media ) as response: image = await response.read() tmp.write(image) if not image: self.log.error( f"[{log_index}] Empty file received from Twitter" ) sauce_cache = TweetSauceCache.set( tweet_cache, index_no=index_no, trigger=trigger) return sauce_cache except aiohttp.ClientResponseError as error: self.log.warning( f"[{log_index}] Twitter returned a {error.status} error when downloading from tweet {tweet_cache.tweet_id}" ) sauce_cache = TweetSauceCache.set( tweet_cache, index_no=index_no, trigger=trigger) return sauce_cache sauce = await self.sauce.from_file(path) tracemoe_sauce = await tracemoe_search(sauce, is_url=False) finally: os.remove(path) else: self.log.debug(f"[{log_index}] Performing remote URL lookup") sauce = await self.sauce.from_url(media) tracemoe_sauce = await tracemoe_search(sauce, is_url=True) if not sauce.results: sauce_cache = TweetSauceCache.set(tweet_cache, sauce, index_no, trigger=trigger) return sauce_cache, None except ShortLimitReachedException: self.log.warning( f"[{log_index}] Short API limit reached, throttling for 30 seconds" ) await asyncio.sleep(30.0) return await self.get_sauce(tweet_cache, index_no, log_index) except DailyLimitReachedException: self.log.error( f"[{log_index}] Daily API limit reached, throttling for 15 minutes. Please consider upgrading your API key." ) await asyncio.sleep(900.0) return await self.get_sauce(tweet_cache, index_no, log_index) except SauceNaoException as e: self.log.error(f"[{log_index}] SauceNao exception raised: {e}") sauce_cache = TweetSauceCache.set(tweet_cache, index_no=index_no, trigger=trigger) return sauce_cache, None sauce_cache = TweetSauceCache.set(tweet_cache, sauce, index_no, trigger=trigger) return sauce_cache, tracemoe_sauce