def __init__(self, screen_name, corpus=None, **kwargs): ''' :screen_name User name to post as :corpus Text file to read to generate text. :api tweepy.API object :dry_run boolean If set, TwitterMarkov won't actually post tweets. ''' if 'api' in kwargs: self.api = kwargs.pop('api') else: self.api = tbu.API(screen_name=screen_name, **kwargs) try: self.log = self.api.logger except AttributeError: self.log = logging.getLogger(screen_name) self.screen_name = screen_name self.config = self.api.config self.dry_run = kwargs.pop('dry_run', False) try: corpus = corpus or self.config.get('corpus') if isinstance(corpus, basestring): corpora = [corpus] elif isinstance(corpus, Iterable): corpora = corpus else: raise RuntimeError('Unable to find any corpora!') self.corpora = [b for b in corpora if b is not None] self.log.debug('%s, %s', screen_name, self.corpora) state_size = kwargs.get('state_size', self.config.get('state_size')) self.models = self._setup_models(self.corpora, state_size) except RuntimeError as e: self.log.error(e) raise e self.log.debug('models: %s', list(self.models.keys())) blacklist = kwargs.get('blacklist') or self.config.get('blacklist', []) self.wordfilter = Wordfilter() self.wordfilter.add_words(blacklist) self.last_tweet = (self.api.user_timeline(count=1))[0] self.last_tweet = self.last_tweet.id if kwargs.get('learn', True): self.learn_parent()
def __init__(self, screen_name, corpus=None, **kwargs): if 'api' in kwargs: self.api = kwargs.pop('api') else: self.api = tbu.API(screen_name=screen_name, **kwargs) try: self.log = self.api.logger except AttributeError: self.log = logging.getLogger(screen_name) self.screen_name = screen_name self.config = self.api.config self.dry_run = kwargs.pop('dry_run', False) self.log.debug('screen name: %s', screen_name) self.log.debug("dry run: %s", self.dry_run) try: corpus = corpus or self.config.get('corpus') if isinstance(corpus, six.string_types): corpora = [corpus] elif isinstance(corpus, Iterable): corpora = corpus else: raise RuntimeError('Unable to find any corpora!') self.corpora = [b for b in corpora if b is not None] state_size = kwargs.get('state_size', self.config.get('state_size')) self.models = self._setup_models(self.corpora, state_size) except RuntimeError as e: self.log.error(e) raise e self.log.debug('models: %s', list(self.models.keys())) blacklist = kwargs.get('blacklist') or self.config.get('blacklist', []) self.wordfilter = Wordfilter() self.wordfilter.add_words(blacklist) self.log.debug('blacklist: %s terms', len(self.wordfilter.blacklist)) if kwargs.get('learn', True): self.log.debug('learning...') self.learn_parent()
def __init__(self, channels, nickname, server, port, owner, usessl, password, engine_host, engine_port, api_key, respond): # Initialize the class' attributes. for i in channels: self.joined_channels[i] = 1 self.canonical_name = nick self.nick = nick self.owner = owner self.server = server self.port = port self.password = password self.authenticated = False self.usessl = usessl self.engine = 'http://' + engine_host + ':' + engine_port self.api_key = api_key self.wordfilter = Wordfilter() self.respond = respond self.ghost = False # Connection factory object handle. factory = "" # If SSL/TLS support is requested, pass the ssl.wrap_socket() method # as a keyword argument. if self.usessl: logger.debug("Constructing SSL/TLS server connector.") factory = irc.connection.Factory(wrapper=ssl.wrap_socket) else: logger.debug("Constructing plaintext server connector.") factory = irc.connection.Factory() # Initialize an instance of this class by running the parent class' # default initializer method. # # [(server, port)] can be a list of one or more (server, port) tuples # because it can connect to more than one at once. # The other two arguments are the bot's nickname and realname. logger.debug("Instantiating SingleServerIRCBot superclass.") irc.bot.SingleServerIRCBot.__init__(self, [(self.server, self.port)], self.nick, self.nick, connect_factory=factory) logger.debug("Channels configured for this bot:") logger.debug(" " + str(self.joined_channels))
def __init__( self, bible: biblemunger.Bible, favdict, #: list[dict], apptitle: str, appsubtitle: str, dbpath: str, wordfilter: bool): self.bible = bible self.apptitle = apptitle self.appsubtitle = appsubtitle self.dbpath = dbpath if wordfilter: from wordfilter import Wordfilter self.wordfilter = Wordfilter() self.wordfilter.add_words(['QwertyStringUsedForTestingZxcvb']) else: self.wordfilter = False deploymentinfofile = os.path.join(scriptdir, 'deploymentinfo.txt') if os.path.exists(deploymentinfofile): with open(deploymentinfofile) as df: self.deploymentinfo = df.read() else: self.deploymentinfo = "development version" # TODO: refactor this, just use a dictionary directly elsewhere self.favorite_searches = [] for key in favdict.keys(): self.favorite_searches += [{ 'search': key, 'replace': favdict[key] }] conn = sqlite3.connect(self.dbpath) c = conn.cursor() c.execute( "select name from sqlite_master where type='table' and name='recent_searches'" ) if not c.fetchone(): self.initialize_database()
def clean_description(self): desc = self.cleaned_data['description'] if settings.TESTING: check_spam = False else: akismet = Akismet(settings.AKISMET_KEY, blog="CC Search") check_spam = akismet.check( self.request.get_host(), user_agent=self.request.META.get('user-agent'), comment_author=self.request.user.username, comment_content=desc) wordfilter = Wordfilter() check_words = wordfilter.blacklisted(desc) if check_spam or check_words: raise forms.ValidationError( "This description failed our spam or profanity check; the description has not been updated." ) return desc
def botechre(times=1): corpus = {} try: with codecs.open(CORPUS_FILENAME, encoding='utf-8') as fp: corpus = json.load(fp) except IOError: sys.stderr.write('File not found: %s\n' % CORPUS_FILENAME) sys.stderr.write('Run %s first.\n' % 'build.py') return assembler = BotechreAssembler(corpus) wordfilter = Wordfilter() max_times = times * 10 for i in range(max_times): title = random_title(assembler) if not wordfilter.blacklisted(title): yield title if times <= i: break
def __init__(self, screen_name, brains=None, **kwargs): self.screen_name = screen_name self.api = kwargs.get('api', tbu.api.API(screen_name, **kwargs)) self.config = kwargs.get('config', self.api.config) self.logger = logging.getLogger(screen_name) try: if isinstance(brains, str): brains = [brains] if not isinstance(brains, list): brain = self.config.get('brain', []) brains = brain + self.config.get('brains', []) if not brains: raise RuntimeError self.brains = self._setup_brains(brains) except (IOError, IndexError, RuntimeError) as e: self.logger.error('Feed me brains: unable to find any brains!') raise e self.logger.debug('Brains: {0}'.format(list(self.brains.keys()))) self.dry_run = kwargs.get('dry_run', False) self.wordfilter = Wordfilter() self.wordfilter.add_words(self.config.get('blacklist', [])) self.checker = checking.construct_tweet_checker( no_retweets=self.config.get('no_retweets'), no_replies=self.config.get('no_replies') ) if kwargs.get('learn', True): self.learn_parent()
import theano import theano.tensor as T from databases.textproject_reconstruction_database import TextProjectReconstructionDatabase from nn.containers import Sequential from nn.rnns import LNLSTM from nn.layers import OneHot from nn.utils import Vocabulary import nn.utils from lm_vae import Sampler from lm_vae_sample import LNLSTMStep from textproject_vae_charlevel import make_model from wordfilter import Wordfilter wordfilter = Wordfilter() t1 = time.time() session = "sp15_trial" vocab = Vocabulary() if os.path.exists("session/%s/vocab.pkl" % session): with open("session/%s/vocab.pkl" % session) as vocab_file: vocab = pickle.load(vocab_file) print("Loaded vocab with %i chars:" % len(vocab)) #print(vocab.index_to_word) else: print("Using default 256-char vocab") # old-school
import json import re import requests import time import lxml.html from . import logger from io import BytesIO log = logger.get("common") MEDIAWIKI_API = "https://commons.wikimedia.org/w/api.php" HEADERS = {"User-Agent": "picdescbot, http://github.com/elad661/picdescbot"} supported_formats = re.compile('\.(png|jpe?g|gif)$', re.I) word_filter = Wordfilter() # I really don't want the bot to show this kind of imagery! word_filter.add_words(['nazi', 'hitler', 'reich']) # I can't trust Microsoft's algorithm to not be racist, so I should probably # make the bot avoid posting images with the following words in them. # I'm not using wordfilter here because it would over-filter in some cases. # also filter "gun" because this is not the kind of content I want the bot to post # This is matched only against the caption generated by CVAPI. extra_filter = {'ape', 'apes', 'monkey', 'monkeys', 'gun'} # Blacklisted phrases (instead of words) to blacklist certain phrases # in the wikimedia description blacklisted_phrases = { 'comic strip', 'logo', 'biblical illustration', 'church',
class DixieBot(irc.bot.SingleServerIRCBot): # Class-level variables which form attributes. These all refer to aspects # of the bot. joined_channels = IRCDict() canonical_name = "" nick = "" owner = "" # Connection information. server = "" port = 0 # The bot's owner's authentication password. password = "" # Is the bot's owner authenticated or not? authenticated = "" # Whether or not the connection is SSL/TLS encrypted or not. usessl = "" # Response engine's hostname and port. engine = "" # Bot's API key to interface with the response engine. api_key = "" # One instance of wordfilter.Wordfilter() to rule them all... wordfilter = None # Whether or not to use the conversation engine to respond? respond = None # Whether or not the bot's owner can speak through the bot by using # private messages. By default, the bot doesn't let you do that. ghost = None # Methods on the connection object to investigate: # connect() - Connect to a server? # connected() - # disconnect() - # get_nickname() - # get_server_name() - # info() - # ircname() - # is_connected() - See if the connection is still up? # part() - Leave channel? # privmsg() - Send privmsg? # quit() - Terminate IRC connection? # reconnect() - Reconnect to server? # send_raw() - # stats() - # time() - def __init__(self, channels, nickname, server, port, owner, usessl, password, engine_host, engine_port, api_key, respond): # Initialize the class' attributes. for i in channels: self.joined_channels[i] = 1 self.canonical_name = nick self.nick = nick self.owner = owner self.server = server self.port = port self.password = password self.authenticated = False self.usessl = usessl self.engine = 'http://' + engine_host + ':' + engine_port self.api_key = api_key self.wordfilter = Wordfilter() self.respond = respond self.ghost = False # Connection factory object handle. factory = "" # If SSL/TLS support is requested, pass the ssl.wrap_socket() method # as a keyword argument. if self.usessl: logger.debug("Constructing SSL/TLS server connector.") factory = irc.connection.Factory(wrapper=ssl.wrap_socket) else: logger.debug("Constructing plaintext server connector.") factory = irc.connection.Factory() # Initialize an instance of this class by running the parent class' # default initializer method. # # [(server, port)] can be a list of one or more (server, port) tuples # because it can connect to more than one at once. # The other two arguments are the bot's nickname and realname. logger.debug("Instantiating SingleServerIRCBot superclass.") irc.bot.SingleServerIRCBot.__init__(self, [(self.server, self.port)], self.nick, self.nick, connect_factory=factory) logger.debug("Channels configured for this bot:") logger.debug(" " + str(self.joined_channels)) # This method fires if the configured nickname is already in use. If that # happens, change the bot's nick slightly. # Note that the name of this method is specifically what the irc module # looks for. def on_nicknameinuse(self, connection, event): logger.info("Bot nickname " + self.nick + " is already taken. Falling back to bot nickname " + self.nick + "_.") connection.privmsg( self.owner, self.nick + " seems to be taken already. Falling back to nickname " + self.nick + "_.") connection.nick(connection.get_nickname() + "_") # This method fires when the server accepts the bot's connection. It walks # through the IRCDict of channels and tries to join each one. def on_welcome(self, connection, event): logger.debug("Entered DixieBot.on_welcome().") for channel in self.joined_channels: logger.debug("Trying to join channel " + channel + ".") connection.join(channel) logger.info("Joined channel " + channel + ".") connection.privmsg(self.owner, "Joined " + channel + ".") # Just to be silly, roll 1d10. On a 1, say hello to the channel. roll = random.randint(1, 10) if roll == 1: pause = random.randint(1, 10) time.sleep(pause) logger.debug("Bot has randomly decided to announce itself.") connection.privmsg( channel, "Hey, bro! I'm " + self.nick + ", the best cowboy who ever punched deck!") logger.debug("Exiting DixieBot.on_welcome().") # This method fires if the bot gets kicked from a channel. The smart # thing to do is sleep for a random period of time (between one and three # minutes) before trying to join again. def on_kick(self, connection, event): delay = random.randint(60, 180) logger.debug("Got kicked from " + event.target + ". Sleeping for " + str(delay) + " seconds.") connection.privmsg( self.owner, "Got kicked from " + event.target + ". Sleeping for " + str(delay) + " seconds.") time.sleep(delay) logger.debug("Rejoining channel " + event.target + ".") connection.privmsg(self.owner, "Rejoining channel " + event.target + ".") connection.join(event.target) logger.info("Successfully re-joined channel " + event.target + ".") connection.privmsg( self.owner, "Successfully re-joined channel " + event.target + ".") return # This method fires if the bot gets kickbanned. def on_bannedfromchan(self, connection, event): logger.warn("Uh-oh - I got kickbanned from " + event.target + ". I know when I'm not wanted.") self.privmsg( self.owner, "Uh-oh - I got kickbanned from " + event.target + ". I know when I'm not wanted.") self.joined_channels.remove(event.target) return # This method fires when the server disconnects the bot for some reason. # Ideally, the bot should try to connect again after a random number of # seconds. def on_disconnect(self, connection, event): delay = random.randint(60, 180) logger.warn("Connection dropped from server " + self.server + ". Sleeping for " + str(delay) + " seconds.") time.sleep(delay) logger.warn("Reconnecting to server " + self.server + " on port " + str(self.port) + ".") try: irc.bot.SingleServerIRCBot.connect(self, [(self.server, self.port)], self.nick, self.nick) logger.info("Successfully reconnected to server " + self.server + ".") except: logger.warn("Unable to reconnect to " + self.server + ". Something's really wrong.") # This method fires when the bot receives a private message. For the # moment, if it's the bot's owner always learn from the text because this # is an ideal way to get more interesting stuff into the bot's brain. # It'll make a good place to look for and respond to specific commands, # too. def on_privmsg(self, connection, line): # IRC nick that sent a line to the bot in private chat. sending_nick = line.source.split("!~")[0] # Line of text sent from the channel or private message. irc_text = line.arguments[0] # String that holds what may or may not be a channel name. possible_channel_name = None # String that may or may not hold a respond to a channel in ghost mode. irc_response = None # Handle to an HTTP request object. http_connection = "" # JSON document containing responses from the conversation engine. json_response = {} # See if the owner is authenticating to the bot. if "!auth " in irc_text: self._authenticate(connection, sending_nick, irc_text) return # Handle messages from the bot's owner (if authenticated). if sending_nick == self.owner: if not self.authenticated: connection.privmsg(sending_nick, "You're not authenticated.") return # If the owner asks for online help, provide it. if irc_text == "!help" or irc_text == "!commands": self._help(connection, sending_nick) return # See if the owner is asking the bot to self-terminate. if irc_text == "!quit": logger.info("The bot's owner has told it to shut down.") connection.privmsg(sending_nick, "I get the hint. Shuttin' down.") sys.exit(0) # See if the owner is asking for the bot's current configuration. if irc_text == "!config": self._current_config(connection, sending_nick) return # See if the owner is asking the bot to ping the conversation # engine's server. if irc_text == "!ping": self._ping(connection, sending_nick) return # See if the owner is asking the bot to change its nick. if "!nick" in irc_text: self._nick(connection, irc_text, sending_nick) return # See if the owner is asking the bot to join a channel. if "!join " in irc_text: self._join(connection, irc_text, sending_nick) return # See if the owner is flipping the self.respond flag. if "!respond" in irc_text: self._respond(connection, irc_text, sending_nick) return # See if the owner is asking for help on ghost mode. if "!ghosthelp" in irc_text: self._ghost_help(connection, sending_nick) return # See if the owner is flipping the self.ghost flag. if "!ghost" in irc_text: self._ghost_mode(connection, sending_nick) return # If the bot's in ghost mode, determine whether or not the bot's # owner has sent text destined for a channel the bot's sitting in. # If this is the case, send the channel the text sent by the # bot's owner. possible_channel_name = irc_text.split()[0] logger.debug("Value of possible_channel_name: " + possible_channel_name) if self.ghost: if "#" in possible_channel_name: # Test to see if the bot is in the channel in question. in_channel = False for channel in self.joined_channels: if channel == possible_channel_name: in_channel = True break if not in_channel: logger.debug("Not in channel " + possible_channel_name + ".") connection.privmsg( sending_nick, "I'm not in channel " + possible_channel_name + ".") return logger.debug("In channel " + possible_channel_name + ".") # Send the text to the channel. irc_response = " ".join(irc_text.split()[1:]) logger.debug("Value of irc_response: " + irc_response) connection.privmsg(possible_channel_name, irc_response) # Always learn from private messages from the bot's owner. Do not # respond to them if the bot's in ghost mode. Determine whether # or not a #channelname is at the head of the text and if so # elide it by setting the line of text from the IRC channel to # the IRC response which already has the #channelname removed. if "#" in possible_channel_name: irc_text = " ".join(irc_text.split()[1:]) logger.debug( "Got a possible channel name. Set value of irc_text to: " + str(irc_text)) # Train the bot on text sent by the bot's owner. json_response = json.loads(self._teach_brain(irc_text)) if json_response['id'] != 200: logger.warn( "DixieBot.on_privmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") # Don't get responses when in ghost mode. if self.ghost: return # Get a response for text sent by the bot's owner. json_response = json.loads(self._get_response(irc_text)) if json_response['id'] != 200: logger.warn( "DixieBot.on_privmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") return # Send the response text back to the bot's owner. connection.privmsg(sending_nick, json_response['response']) return else: logger.debug( "Somebody messaged me. The content of the message was: " + irc_text) # Helper method for authenticating the bot's owner. def _authenticate(self, connection, nick, text): logger.warn("IRC user " + nick + " is attempting to authenticate to the bot.") if self.password in text: connection.privmsg(nick, "Authentication confirmed. Welcome back.") self.owner = nick self.authenticated = True return else: connection.privmsg(nick, "Incorrect.") return # Helper method that implements online help. def _help(self, connection, nick): connection.privmsg(nick, "Here are the commands I support:") connection.privmsg( nick, "!help and !commands - You're reading them right now.") connection.privmsg(nick, "!quit - Shut me down.") connection.privmsg( nick, "!auth - Authenticate your current IRC nick as my admin.") connection.privmsg(nick, "!config - Send my current configuration.") connection.privmsg( nick, "!ping - Ping the conversation engine to make sure I can contact it." ) connection.privmsg(nick, "!nick <new nick> - Try to change my IRC nick.") connection.privmsg(nick, "!join <channel> - Join a channel.") connection.privmsg( nick, "!respond - Toggle respond/don't respond to users flag.") connection.privmsg(nick, "!ghosthelp - Get online help for ghost mode.") connection.privmsg( nick, "!ghost - Whether or not the bot's registered owner can remotely interact with a channel the bot's a member of using the bot as a client." ) return # Helper method that tells the bot's owner what the bot's current runtime # configuration is. def _current_config(self, connection, nick): connection.privmsg(nick, "Here's my current runtime configuration.") connection.privmsg(nick, "Channels I'm connected to: ") for channel in self.joined_channels: connection.privmsg(nick, " " + channel) connection.privmsg(nick, "Current nick: " + self.nick) connection.privmsg( nick, "Canonical name (for interacting with the conversation engine): " + self.canonical_name) connection.privmsg( nick, "Server and port: " + self.server + " " + str(self.port) + "/tcp") if self.usessl: connection.privmsg(nick, "My connection to the server is encrypted.") else: connection.privmsg(nick, "My connection to the server isn't encrypted.") if self.respond: connection.privmsg(nick, "I respond to people talking to me.") else: connection.privmsg(nick, "I don't respond to people talking to me.") if self.ghost: connection.privmsg(nick, "I am monitoring IRC channels in ghost mode.") else: connection.privmsg(nick, "I am not in ghost mode.") return # Helper method that pings the bot's conversation engine. I realize that # doing this is probably a little weird, but seeing as how I'm splitting # everything else out into helper methods to make adding functionality # later on easier I may as well. def _ping(self, connection, nick): connection.privmsg(nick, "Pinging the conversation engine...") http_connection = requests.get(self.engine + "/ping") if http_connection.text == "pong": connection.privmsg(nick, "I can hit the conversation engine.") else: connection.privmsg( nick, "I don't seem to be able to reach the conversation engine.") return # Helper method that will allow the bot to change its nick. def _nick(self, connection, text, nick): connection.privmsg(nick, "Trying to change my IRC nick...") self.nick = text.split()[1].strip() connection.nick(self.nick) logger.debug("New IRC nick: " + self.nick) connection.privmsg(nick, "Done.") return # Helper method that will allow the bot to join a channel. def _join(self, connection, text, nick): new_channel = text.split()[1].strip() connection.privmsg(nick, "Trying to join channel " + new_channel + ".") logger.debug("Trying to join channel " + new_channel + ".") connection.join(new_channel) self.joined_channels[new_channel] = 1 connection.privmsg(nick, "Joined " + new_channel + ".") return # Helper method that flips the bot's mode from "respond when spoken to" to # don't respond when spoken to. def _respond(self, connection, text, nick): if self.respond == True: self.respond = False logger.info("Turn off the bot's auto-response mode.") connection.privmsg(nick, "I won't respond to people talking to me.") return if self.respond == False: self.respond = True logger.info("Turn on the bot's auto-response mode.") connection.privmsg(nick, "Now responding to people talking to me.") return # Send the user online help for ghost mode. def _ghost_help(self, connection, nick): connection.privmsg( nick, "Ghost mode lets you interact with any channel I'm sitting in remotely so you don't have to join it." ) connection.privmsg( nick, "This is ideal if you want to maintain a certain degree of stealth." ) connection.privmsg( nick, "I can join the channel from one server and interact with everyone like a bot, and you can connect from another server without joining any channels, !auth to me, and communicate through me." ) connection.privmsg( nick, "If I get rumbled, I get bounced and your disposable server can be banned, and all you have to do is get a copy of my conversation engine to preserve me. You should be okay." ) connection.privmsg( nick, "Please note that if you have me join a number of busy channels you may not be able to keep up with all the traffic, so choose the channels I join wisely. Keep the number small for best results." ) connection.privmsg( nick, "Put the name of the channel you want me to send text to at the front of a private message, like this:" ) connection.privmsg(nick, "/msg botname") connection.privmsg(nick, "#somechannel Hello, world.") connection.privmsg( nick, "I will send activity in the channel back to you via the same privmsg as long as you're authenticated." ) return # Flips the ghost mode flag. def _ghost_mode(self, connection, nick): if self.ghost == False: self.ghost = True logger.info("Ghost mode now activated.") connection.privmsg(nick, "Ghost mode activated.") connection.privmsg( nick, "You can now interact with the following channels through me: " ) for channel in self.joined_channels: connection.privmsg(nick, " " + channel) return if self.ghost == True: self.ghost = False logger.info("Ghost mode now deactivated.") connection.privmsg(nick, "Ghost mode deactivated.") return # This method fires every time a public message is posted to an IRC # channel. Technically, 'line' should be 'event' but I'm just now getting # this module figured out... def on_pubmsg(self, connection, line): # JSON document from the conversation engine. json_response = {} # IRC nick that sent a line to the channel. sending_nick = line.source.split("!~")[0] logger.debug("Sending nick: " + sending_nick) # Line of text sent from the channel. irc_text = line.arguments[0] # If the line is from the bot's owner, learn from it and then decide # whether to respond or not. Just in case somebody grabs the nick of # the bot's owner, don't respond if they're not authenticated (because # that could go real bad, real fast...) if sending_nick == self.owner and self.authenticated: # If the bot's owner addressed it directly, always respond. Just # make sure to remove the bot's nick from the text to minimize # spurious entries in the bot's brain. asked_directly = irc_text.split(':')[0].strip() if asked_directly == self.nick: logger.debug( "The bot's owner addressed the construct directly. This is a special case." ) # Extract the dialogue from the text in the IRC channel. dialogue_text = irc_text.split(':')[1].strip() # Send a request to train the conversation engine on the text. logger.debug("Training engine on text: " + dialogue_text) json_response = json.loads(self._teach_brain(dialogue_text)) if json_response['id'] != int(200): logger.warn( "DixieBot.on_pubmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") return # If the bot is in ghost mode, do not respond. if self.ghost: return # Get a response to the text from the channel. json_response = json.loads(self._get_response(irc_text)) if json_response['id'] != int(200): logger.warn( "DixieBot.on_pubmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") return # Send the reply to the channel. connection.privmsg(line.target, json_response['response']) return # Otherwise, just learn from the bot's owner. json_response = json.loads(self._teach_brain(irc_text)) if json_response['id'] != int(200): logger.warn( "DixieBot.on_pubmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") return # Check the respond/don't respond flag. If it's set to False, # don't say anything. if not self.respond: return # If the respond/don't respond flag it set to True, decide if the # bot is going to respond or not. To be polite to people, only # respond 5% of the time. 10% was too much. roll = random.randint(1, 100) if roll <= 5: json_response = json.loads(self._get_response(irc_text)) if json_response['id'] != int(200): logger.warn( "DixieBot.on_pubmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") return # connection.privmsg() can be used to send text to either a # channel or a user. # Send the response. connection.privmsg(line.target, json_response['response']) return # If the line is not from the bot's owner, and the bot is in ghost # mode, relay the line to the bot's owner via privmsg. if self.ghost and self.authenticated: logger.debug("Relaying a line of text from " + line.target + " to the bot's owner.") connection.privmsg(self.owner, line.target + ":: " + irc_text) # If the line is not from the bot's owner, decide randomly if the bot # should learn from it, or learn from and respond to it. Respect the # respond/don't respond flag. roll = random.randint(1, 10) if roll == 1: logger.debug("Learning from the last line seen in the channel.") if self.wordfilter.blacklisted(irc_text): logger.warn("Wordfilter: Nope nope nope...") return json_response = json.loads(self._teach_brain(irc_text)) if json_response['id'] != int(200): logger.warn( "DixieBot.on_pubmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") return if roll == 2: logger.debug( "Learning from the last line seen in the channel. I might respond to it." ) if self.wordfilter.blacklisted(irc_text): logger.warn("Wordfilter: Nope nope nope...") return json_response = json.loads(self._teach_brain(irc_text)) if json_response['id'] != int(200): logger.warn( "DixieBot.on_pubmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") return # Check the respond/don't respond flag. If it's set to False, # don't say anything. if not self.respond: return # Get and send a response. json_response = json.loads(self._get_response(irc_text)) if json_response['id'] != int(200): logger.warn( "DixieBot.on_pubmsg(): Conversation engine returned error code " + str(json_response['id']) + ".") return connection.privmsg(line.target, json_response['response']) return # This method should fire when a client in the current channel emits a QUIT # event relayed by the server. It detects the bot's owner disconnecting # and deauthenticates them. def on_quit(self, connection, event): sending_nick = event.source.split("!~")[0] if event.type == "quit" and sending_nick == self.owner and self.authenticated: logger.info("The bot's owner has disconnected. Deauthenticating.") self.authenticated = False connection.privmsg(line.target, "Seeya, boss.") return # Sends text to train the conversation engine on. def _teach_brain(self, text): # Custom headers required by the conversation engine. headers = {"Content-Type": "application/json"} # HTTP request object handle. http_request = "" # JSON documents sent to and received from the conversation engine. json_request = {} json_request['botname'] = self.canonical_name json_request['apikey'] = self.api_key json_request['stimulus'] = text json_response = {} # Make an HTTP request to the conversation engine. http_request = requests.put(self.engine + "/learn", headers=headers, data=json.dumps(json_request)) json_response = json.loads(http_request.content) return json_response # Gets a response from the conversation engine. Return a response. def _get_response(self, text): # Custom headers required by the conversation engine. headers = {"Content-Type": "application/json"} # HTTP request object handle. http_request = "" # Response to send to the channel or user. response = "" # JSON documents sent to and received from the conversation engine. json_request = {} json_request['botname'] = self.canonical_name json_request['apikey'] = self.api_key json_request['stimulus'] = text json_response = {} # Contact the conversation engine to get a response. http_request = requests.get(self.engine + "/response", headers=headers, data=json.dumps(json_request)) json_response = json.loads(http_request.content) return json_response
class TwitterMarkov(object): """ Posts markov-generated text to twitter Args: screen_name (str): Twitter user account corpus (str): Text file to read to generate text. api (:ref:`tweepy.API <tweepy:tweepy.api>`): API to use to post tweets. dry_run (boolean): If set, TwitterMarkov won't actually post tweets. blacklist (Sequence): A list of words to avoid generating. """ default_model = None _recently_tweeted = [] def __init__(self, screen_name, corpus=None, **kwargs): if 'api' in kwargs: self.api = kwargs.pop('api') else: self.api = tbu.API(screen_name=screen_name, **kwargs) try: self.log = self.api.logger except AttributeError: self.log = logging.getLogger(screen_name) self.screen_name = screen_name self.config = self.api.config self.dry_run = kwargs.pop('dry_run', False) self.log.debug('screen name: %s', screen_name) self.log.debug("dry run: %s", self.dry_run) try: corpus = corpus or self.config.get('corpus') if isinstance(corpus, six.string_types): corpora = [corpus] elif isinstance(corpus, Iterable): corpora = corpus else: raise RuntimeError('Unable to find any corpora!') self.corpora = [b for b in corpora if b is not None] state_size = kwargs.get('state_size', self.config.get('state_size')) self.models = self._setup_models(self.corpora, state_size) except RuntimeError as e: self.log.error(e) raise e self.log.debug('models: %s', list(self.models.keys())) blacklist = kwargs.get('blacklist') or self.config.get('blacklist', []) self.wordfilter = Wordfilter() self.wordfilter.add_words(blacklist) self.log.debug('blacklist: %s terms', len(self.wordfilter.blacklist)) if kwargs.get('learn', True): self.log.debug('learning...') self.learn_parent() def _setup_models(self, corpora, state_size): """ Given a list of paths to corpus text files or file-like objects, set up markovify models for each. These models are returned in a dict, (with the basename as key). """ out = dict() state_size = state_size or 2 self.log.debug('setting up models (state_size=%s)', state_size) try: for pth in corpora: if isinstance(pth, six.string_types): corpus_path = os.path.expanduser(pth) name = os.path.basename(corpus_path) m = open(corpus_path) else: m = pth try: name = m.name except AttributeError: name = repr(m) try: out[name] = markovify.text.NewlineText( m.read(), state_size=state_size) finally: m.close() except AttributeError as e: self.log.error(e) self.log.error("Probably couldn't find the model file.") raise e except IOError as e: self.log.error(e) self.log.error('Error reading %s', corpus_path) raise e self.default_model = os.path.basename(corpora[0]) return out @property def recently_tweeted(self): '''Returns recent tweets from ``self.screen_name``.''' if not self._recently_tweeted: recent_tweets = self.api.user_timeline(self.screen_name, count=self.config.get( 'checkback', 20)) self._recently_tweeted = [x.text for x in recent_tweets] return self._recently_tweeted def check_tweet(self, text): '''Check if a string contains blacklisted words or is similar to a recent tweet.''' text = text.strip().lower() if not text: self.log.info("Rejected (empty)") return False if self.wordfilter.blacklisted(text): self.log.info("Rejected (blacklisted)") return False if tbu.helpers.length(text) > 280: self.log.info("Rejected (too long)") return False for line in self.recently_tweeted: if text in line.strip().lower(): self.log.info("Rejected (Identical)") return False if Levenshtein.ratio(re.sub(r'\W+', '', text), re.sub(r'\W+', '', line.lower())) >= LEVENSHTEIN_LIMIT: self.log.info("Rejected (Levenshtein.ratio)") return False return True def reply_all(self, model=None, **kwargs): '''Reply to all mentions since the last time ``self.screen_name`` sent a reply tweet.''' mentions = self.api.mentions_timeline(since_id=self.api.last_reply) self.log.info('replying to all...') self.log.debug('mentions found: %d', len(mentions)) if not self.dry_run: for status in mentions: self.reply(status, model, **kwargs) def reply(self, status, model=None, max_len=140, **kwargs): ''' Compose a reply to the given ``tweepy.Status``. Args: status (tweepy.Status): status to reply to. model (str): name of model. max_len (int): maximum length of tweet (default: 140) ''' self.log.debug('Replying to a mention') if status.user.screen_name == self.screen_name: self.log.debug('Not replying to self') return if self.wordfilter.blacklisted(status.text): self.log.debug( 'Not replying to tweet with a blacklisted word (%d)', status.id) return text = self.compose(model, max_len=max_len - 2 - len(status.user.screen_name), **kwargs) reply = '@{} {}'.format(status.user.screen_name, text) self.log.info(reply) self._update(reply, in_reply=status.id_str) def tweet(self, model=None, **kwargs): ''' Post a tweet composed by "model" (or the default model). Most of these arguments are passed on to Markovify. Args: model (str): one of self.models max_len (int): maximum length of the output (default: 140). init_state (tuple): tuple of words to seed the model tries (int): (default: 10) max_overlap_ratio (float): Used for testing output (default: 0.7). max_overlap_total (int): Used for testing output (default: 15) ''' model = self.models[model or self.default_model] text = self.compose(model, **kwargs) if text: self._update(text) def _update(self, tweet, in_reply=None): if not self.dry_run: self.api.update_status(status=tweet, in_reply_to_status_id=in_reply) def compose(self, model=None, max_len=140, **kwargs): ''' Returns a string generated from "model" (or the default model). Most of these arguments are passed on to Markovify. Args: model (str): one of self.models max_len (int): maximum length of the output (max: 280, default: 140). init_state (tuple): tuple of words to seed the model tries (int): (default: 10) max_overlap_ratio (float): Used for testing output (default: 0.7). max_overlap_total (int): Used for testing output (default: 15) Returns: str ''' model = self.models.get(model or self.default_model) max_len = min(280, max_len) self.log.debug('making sentence, max_len=%s, %s', max_len, kwargs) text = model.make_short_sentence(max_len, **kwargs) if text is None: self.log.error('model failed to generate a sentence') raise RuntimeError('model failed to generate a sentence') # convert to unicode in Python 2 if hasattr(text, 'decode'): text = text.decode('utf8') else: # Check tweet against blacklist and recent tweets if not self.check_tweet(text): # checked out: break and return text = self.compose(model=model, max_len=max_len, **kwargs) self.log.debug('TwitterMarkov: %s', text) return text def learn_parent(self, corpus=None, parent=None): ''' Add recent tweets from the parent account (since the last time ``self.screen_name`` tweeted) to the corpus. This is subject to the filters described in ``bots.yaml``. ''' parent = parent or self.config.get('parent') corpus = corpus or self.corpora[0] if not parent or not self.api.last_tweet: self.log.debug('Cannot teach: missing parent or tweets') return tweets = self.api.user_timeline(parent, since_id=self.api.last_tweet) try: gen = checking.generator( tweets, no_mentions=self.config.get('filter_mentions'), no_hashtags=self.config.get('filter_hashtags'), no_urls=self.config.get('filter_urls'), no_media=self.config.get('filter_media'), no_symbols=self.config.get('filter_symbols'), no_badwords=self.config.get('filter_parent_badwords', True), no_retweets=self.config.get('no_retweets'), no_replies=self.config.get('no_replies')) self.log.debug('%s is learning', corpus) with open(corpus, 'a') as f: f.writelines(tweet + '\n' for tweet in gen) except IOError as e: self.log.error('Learning failed for %s', corpus) self.log.error(e)
indexFile.write("0") indexFile.close() indexFile = open(indexDOTtxt, 'r') index = int(indexFile.read()) indexFile.close() ## Open the text file containing our dictionary. ## We are taking the argument from the command line, ## but you can also hardcode your file here by following ## the above procedure for index.txt wordFile = open(argFile, 'r') words = wordFile.readlines() wordFile.close() ## Set up Wordfilter. This is used to help us avoid auto-tweeting words that are not nice. wordfilter = Wordfilter() ## I have a few extras that I specifically do not want my bot tweeting, so I add them to the filter here. wordfilter.add_words(["rape", "rapist", "sex", "molest", "drug"]) ############################################################################## ## Here we go. ## For each line in the words file, until we run out of lines, do some things: for line in words: ## We check to see if the next word in queue is caught by Wordfilter... if wordfilter.blacklisted(str.upper(words[index].rstrip("\r\n"))): print("Yikes, " + str.upper(words[index].rstrip("\r\n")) + " might be problematic.\n We'll skip that one.") index = index + 1 else: ## ...and if not, we continue on to tweet it. ## Print the word at the current index, make it UPPER CASE, and chomp() the trailing newline off of it. ## Remove the .upper method if that's not what you want
if os.path.exists(historyFilename): history = set( [s.strip() for s in codecs.open(historyFilename, 'r', 'utf-8')]) else: history = set() if not testing: mastodon = Mastodon(client_id='clientcred.txt', api_base_url=mastodonUrl) mastodon.log_in(open('email.txt').read().strip(), open('password.txt').read().strip(), scopes=['write']) publicStatusCycle = 0 wordfilter = Wordfilter() #mashapeKey = open('mashapekey.txt').read().strip() def domainrStatus(domain): params = {'mashape-key': mashapeKey, 'domain': domain} url = domainrEndpoint + 'status?' + urllib.urlencode(params) urldoc = urllib.urlopen(url) result = json.load(urldoc) urldoc.close() return result['status'][0]['status'].split() while True: random.shuffle(words)
# Outputs two texts: # changewords(text)[0] has suggestions for replacing words (human-directed). # changewords(text)[1] has randomly replaced words (automatic). # New addition: sentences are split with __ as delimiter for textprint (editable area) from random import randint from nltk.corpus import wordnet as wn from nltk import word_tokenize as tok from wordfilter import Wordfilter wf = Wordfilter() #https://github.com/dariusk/wordfilter #Words that may be offensive or that have undesirable results in WordNet: ignore = [ 'will', 'more', 'must', 'there', 'john', 'screw', 'queer', 'crap', 'shit', 'ass', 'sex', 'f**k', 'f****r', 'm**********r', 'f***s', 'f****d', 'f*****g' ] with open('data/top1000.txt') as vocdoc: topwords = [w[:-1] for w in vocdoc.readlines()] def changewords(text): """Returns two texts [T1, T2]: T1 text with certain words (in all caps) followed by potential synonyms in parentheses, T2 text with randomly-chosen synonyms in all caps that replace certain words.""" i = 0 text = text.split() #text = tok(text) - more accurate, but difficult to join below textprint = [] #Text will appear as so: she SHOUTED (shout out, call...
async def ytplay(requested_by, query, message): global playing ydl_opts = {"format": "bestaudio"} #n = await send(f"__**Searching for {query} on YouTube.**__") m = await message.reply_text( f"__**Searching for {query} on YouTube.**__", quote=False ) try: results = await arq.youtube(query) if not results.ok: await message.reply_text(results.result) return results = results.result link = f"https://youtube.com{results[0].url_suffix}" title = results[0].title thumbnail = results[0].thumbnails[0] duration = results[0].duration views = results[0].views songname = title.lower() detecting = detect(songname) wordfilter = Wordfilter() wordfilter.addWords(['yamete', 'kudasai', 'arigato', 'hentai']) if wordfilter.blacklisted(songname): await m.edit(f"__**Shame on you ! {requested_by}\nNot allowed song !!!**__\n@wuminjun block him!\n{songname}") playing = False return if detecting == "ko": await m.edit(f"__**Not allowed Language !!!**__ {songname}") playing = False return if time_to_seconds(duration) >= 3600: await m.edit("__**Bruh! Only songs within 60 Mins.**__") playing = False return except Exception as e: await m.edit("__**Found No Song Matching Your Query.**__") playing = False print(str(e)) return await m.edit("__**Processing Thumbnail.**__") await app.update_profile(first_name=f"🔉{title[:35]} ",bio = f"__{title[:35]}__ ijro etilmoqda") await generate_cover(requested_by, title, views, duration, thumbnail) await m.edit("__**Downloading Music.**__") with youtube_dl.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(link, download=False) audio_file = ydl.prepare_filename(info_dict) ydl.process_info(info_dict) await m.edit("__**Transcoding.**__") os.rename(audio_file, "audio.webm") transcode("audio.webm") await m.delete() caption = f"🏷 **Name:** [{title}]({link})\n⏳ **Duration:** {duration}\n" \ + f"🎧 **Requested By:** {requested_by}\n📡 **Platform:** YouTube" m = await message.reply_photo( photo="final.png", caption=caption, ) msg_id = m.message_id if message.chat.username != "music_streaming_channel": copy = await app.copy_message(SUDO_CHANNEL, message.chat.username, msg_id) await app.set_profile_photo(photo="final.png") #await app.pin_chat_message(SUDO_CHAT_ID, msg_id, disable_notification=True) os.remove("final.png") await asyncio.sleep(int(time_to_seconds(duration))) await m.delete() await copy.delete() photos = await app.get_profile_photos("me") await app.delete_profile_photos([p.file_id for p in photos[1:]]) playing = False
class BibleMungingServer(object): def __init__( self, bible: biblemunger.Bible, favdict, #: list[dict], apptitle: str, appsubtitle: str, dbpath: str, wordfilter: bool): self.bible = bible self.apptitle = apptitle self.appsubtitle = appsubtitle self.dbpath = dbpath if wordfilter: from wordfilter import Wordfilter self.wordfilter = Wordfilter() self.wordfilter.add_words(['QwertyStringUsedForTestingZxcvb']) else: self.wordfilter = False deploymentinfofile = os.path.join(scriptdir, 'deploymentinfo.txt') if os.path.exists(deploymentinfofile): with open(deploymentinfofile) as df: self.deploymentinfo = df.read() else: self.deploymentinfo = "development version" # TODO: refactor this, just use a dictionary directly elsewhere self.favorite_searches = [] for key in favdict.keys(): self.favorite_searches += [{ 'search': key, 'replace': favdict[key] }] conn = sqlite3.connect(self.dbpath) c = conn.cursor() c.execute( "select name from sqlite_master where type='table' and name='recent_searches'" ) if not c.fetchone(): self.initialize_database() @classmethod def fromconfig(cls, configuration: configparser.ConfigParser): return BibleMungingServer( biblemunger.Bible(configuration.get('biblemunger', 'bible')), configuration['favorites'], configuration.get('biblemunger', 'apptitle'), configuration.get('biblemunger', 'appsubtitle'), configuration.get('bmweb', 'dbpath'), configuration.getboolean('bmweb', 'wordfilter')) def search_in_list(self, searchlist, search, replace): for s in searchlist: if s['search'] == search and s['replace'] == replace: return True else: return False @property def recent_searches(self): conn = sqlite3.connect(self.dbpath) c = conn.cursor() c.execute("select search, replace from recent_searches") results = c.fetchall() conn.close() searches = [] for r in results: searches += [{'search': r[0], 'replace': r[1]}] return searches def initialize_database(self): conn = sqlite3.connect(self.dbpath) c = conn.cursor() c.execute('''create table recent_searches (search, replace)''') conn.commit() conn.close() def add_recent_search(self, search, replace): in_faves = self.search_in_list(self.favorite_searches, search, replace) in_recent = self.search_in_list(self.recent_searches, search, replace) if self.wordfilter: filtered = self.wordfilter.blacklisted(replace) else: filtered = False if (in_faves or in_recent or filtered): return conn = sqlite3.connect(self.dbpath) c = conn.cursor() c.execute("insert into recent_searches values (?, ?)", (search, replace)) conn.commit() conn.close() @cherrypy.expose @cherrypy.tools.mako(filename='index.mako') def index(self, search=None, replace=None): pagetitle = self.apptitle queried = False resultstitle = None results = None sampleresult = None if search and replace: #resultstitle = "{} ⇒ {}".format(search, replace) resultstitle = "{} ⇒ {}".format(search, replace) pagetitle = "{}: {}".format(self.apptitle, resultstitle) queried = True results = self.bible.replace(search, replace) if results: self.add_recent_search(search, replace) return { 'pagetitle': pagetitle, 'apptitle': self.apptitle, 'appsubtitle': self.appsubtitle, 'queried': queried, 'resultstitle': resultstitle, 'results': results, 'favorites': self.favorite_searches, 'recents': self.recent_searches, 'search': search, 'replace': replace, 'deploymentinfo': self.deploymentinfo, 'filterinuse': bool(self.wordfilter) }
def loadlines(filename='poetry.json-stream.gz', startidx=0, count=None, modulo=1): """Yields successive dictionaries from my Gutenberg Poetry corpus gzip. Lines are returned as dictionaries with keys for the Gutenberg ID of the text containing the line of poetry and the line itself. Optional startidx and count parameters allow you to load only a subset of lines (starting at one index and collecting until the count is reached); a modulo parameter, if specified, will only yield the line if its index is divisible by the modulo. (This is a simple proxy for getting a "sampling" of lines.) >>> for line in loadlines(startidx=100, count=5): ... print(line['line']) By the alders in the Summer, By the white fog in the Autumn, By the black line in the Winter; And beside them dwelt the singer, In the green and silent valley. >>> for line in loadlines(modulo=250000): ... print(line['gutenberg_id']) 617 6130 9567 10161 12137 13561 16209 18466 20174 22692 25599 28621 30720 36508 """ wordfilter = Wordfilter() already_seen = set() for i, line in enumerate(gzip.open(filename, mode='rt')): if i < startidx: continue if count is not None and i > startidx + count: break if i % modulo != 0: continue # load the data and decode line = json.loads(line) if wordfilter.blacklisted(line['line']): continue # disqualifying characteristics (looks like a title, has brackets) if isprobablytitle(line['line']): continue if '[' in line['line'] or ']' in line['line']: continue if re.search(r"^\d", line['line']): continue # parse into words words = tuple([x.lower() for x in tokens(line['line'])]) # no short lines, as they're not very interesting if len(words) <= 2: continue # skip if we've already seen something like this if words in already_seen: continue already_seen.add(words) yield line
class TwitterMarkov(object): """ Posts markov-generated text to twitter Args: screen_name (str): Twitter user account corpus (str): Text file to read to generate text. api (:ref:`tweepy.API <tweepy:tweepy.api>`): API to use to post tweets. dry_run (boolean): If set, TwitterMarkov won't actually post tweets. blacklist (Sequence): A list of words to avoid generating. """ default_model = None _recently_tweeted = [] def __init__(self, screen_name, corpus=None, **kwargs): if 'api' in kwargs: self.api = kwargs.pop('api') else: self.api = tbu.API(screen_name=screen_name, **kwargs) try: self.log = self.api.logger except AttributeError: self.log = logging.getLogger(screen_name) self.screen_name = screen_name self.config = self.api.config self.dry_run = kwargs.pop('dry_run', False) self.log.debug('screen name: %s', screen_name) self.log.debug("dry run: %s", self.dry_run) try: corpus = corpus or self.config.get('corpus') if isinstance(corpus, six.string_types): corpora = [corpus] elif isinstance(corpus, Iterable): corpora = corpus else: raise RuntimeError('Unable to find any corpora!') self.corpora = [b for b in corpora if b is not None] state_size = kwargs.get('state_size', self.config.get('state_size')) self.models = self._setup_models(self.corpora, state_size) except RuntimeError as e: self.log.error(e) raise e self.log.debug('models: %s', list(self.models.keys())) blacklist = kwargs.get('blacklist') or self.config.get('blacklist', []) self.wordfilter = Wordfilter() self.wordfilter.add_words(blacklist) self.log.debug('blacklist: %s terms', len(self.wordfilter.blacklist)) if kwargs.get('learn', True): self.log.debug('learning...') self.learn_parent() def _setup_models(self, corpora, state_size): """ Given a list of paths to corpus text files or file-like objects, set up markovify models for each. These models are returned in a dict, (with the basename as key). """ out = dict() state_size = state_size or 2 self.log.debug('setting up models (state_size=%s)', state_size) try: for pth in corpora: if isinstance(pth, six.string_types): corpus_path = os.path.expanduser(pth) name = os.path.basename(corpus_path) m = open(corpus_path) else: m = pth try: name = m.name except AttributeError: name = repr(m) try: out[name] = markovify.text.NewlineText(m.read(), state_size=state_size) finally: m.close() except AttributeError as e: self.log.error(e) self.log.error("Probably couldn't find the model file.") raise e except IOError as e: self.log.error(e) self.log.error('Error reading %s', corpus_path) raise e self.default_model = os.path.basename(corpora[0]) return out @property def recently_tweeted(self): '''Returns recent tweets from ``self.screen_name``.''' if not self._recently_tweeted: recent_tweets = self.api.user_timeline(self.screen_name, count=self.config.get('checkback', 20)) self._recently_tweeted = [x.text for x in recent_tweets] return self._recently_tweeted def check_tweet(self, text): '''Check if a string contains blacklisted words or is similar to a recent tweet.''' text = text.strip().lower() if not text: self.log.info("Rejected (empty)") return False if self.wordfilter.blacklisted(text): self.log.info("Rejected (blacklisted)") return False if tbu.helpers.length(text) > 280: self.log.info("Rejected (too long)") return False for line in self.recently_tweeted: if text in line.strip().lower(): self.log.info("Rejected (Identical)") return False if Levenshtein.ratio(re.sub(r'\W+', '', text), re.sub(r'\W+', '', line.lower())) >= LEVENSHTEIN_LIMIT: self.log.info("Rejected (Levenshtein.ratio)") return False return True def reply_all(self, model=None, **kwargs): '''Reply to all mentions since the last time ``self.screen_name`` sent a reply tweet.''' mentions = self.api.mentions_timeline(since_id=self.api.last_reply) self.log.info('replying to all...') self.log.debug('mentions found: %d', len(mentions)) if not self.dry_run: for status in mentions: self.reply(status, model, **kwargs) def reply(self, status, model=None, max_len=140, **kwargs): ''' Compose a reply to the given ``tweepy.Status``. Args: status (tweepy.Status): status to reply to. model (str): name of model. max_len (int): maximum length of tweet (default: 140) ''' self.log.debug('Replying to a mention') if status.user.screen_name == self.screen_name: self.log.debug('Not replying to self') return if self.wordfilter.blacklisted(status.text): self.log.debug('Not replying to tweet with a blacklisted word (%d)', status.id) return text = self.compose(model, max_len=max_len - 2 - len(status.user.screen_name), **kwargs) reply = '@{} {}'.format(status.user.screen_name, text) self.log.info(reply) self._update(reply, in_reply=status.id_str) def tweet(self, model=None, **kwargs): ''' Post a tweet composed by "model" (or the default model). Most of these arguments are passed on to Markovify. Args: model (str): one of self.models max_len (int): maximum length of the output (default: 140). init_state (tuple): tuple of words to seed the model tries (int): (default: 10) max_overlap_ratio (float): Used for testing output (default: 0.7). max_overlap_total (int): Used for testing output (default: 15) ''' model = self.models[model or self.default_model] text = self.compose(model, **kwargs) if text: self._update(text) def _update(self, tweet, in_reply=None): if not self.dry_run: self.api.update_status(status=tweet, in_reply_to_status_id=in_reply) def compose(self, model=None, max_len=140, **kwargs): ''' Returns a string generated from "model" (or the default model). Most of these arguments are passed on to Markovify. Args: model (str): one of self.models max_len (int): maximum length of the output (max: 280, default: 140). init_state (tuple): tuple of words to seed the model tries (int): (default: 10) max_overlap_ratio (float): Used for testing output (default: 0.7). max_overlap_total (int): Used for testing output (default: 15) Returns: str ''' model = self.models.get(model or self.default_model) max_len = min(280, max_len) self.log.debug('making sentence, max_len=%s, %s', max_len, kwargs) text = model.make_short_sentence(max_len, **kwargs) if text is None: self.log.error('model failed to generate a sentence') raise RuntimeError('model failed to generate a sentence') # convert to unicode in Python 2 if hasattr(text, 'decode'): text = text.decode('utf8') else: # Check tweet against blacklist and recent tweets if not self.check_tweet(text): # checked out: break and return text = self.compose(model=model, max_len=max_len, **kwargs) self.log.debug('TwitterMarkov: %s', text) return text def learn_parent(self, corpus=None, parent=None): ''' Add recent tweets from the parent account (since the last time ``self.screen_name`` tweeted) to the corpus. This is subject to the filters described in ``bots.yaml``. ''' parent = parent or self.config.get('parent') corpus = corpus or self.corpora[0] if not parent or not self.api.last_tweet: self.log.debug('Cannot teach: missing parent or tweets') return tweets = self.api.user_timeline(parent, since_id=self.api.last_tweet) try: gen = checking.generator(tweets, no_mentions=self.config.get('filter_mentions'), no_hashtags=self.config.get('filter_hashtags'), no_urls=self.config.get('filter_urls'), no_media=self.config.get('filter_media'), no_symbols=self.config.get('filter_symbols'), no_badwords=self.config.get('filter_parent_badwords', True), no_retweets=self.config.get('no_retweets'), no_replies=self.config.get('no_replies') ) self.log.debug('%s is learning', corpus) with open(corpus, 'a') as f: f.writelines(tweet + '\n' for tweet in gen) except IOError as e: self.log.error('Learning failed for %s', corpus) self.log.error(e)
from __future__ import unicode_literals, absolute_import, print_function from wordfilter import Wordfilter import json import re import requests import time from io import BytesIO MEDIAWIKI_API = "https://commons.wikimedia.org/w/api.php" CVAPI = "https://api.projectoxford.ai/vision/v1.0/analyze" HEADERS = {"User-Agent": "picdescbot, http://github.com/elad661/picdescbot"} supported_formats = re.compile('\.(png|jpe?g|gif)$', re.I) word_filter = Wordfilter() # I really don't want the bot to show this kind of imagery! word_filter.add_words(['nazi', 'hitler']) # Blacklist some categories, just in case. These are matched on a substring # basis, against the page's categories and the titles of the wikipages using # the picture. category_blacklist = ['september 11', 'hitler', 'nazi', 'antisemit', 'libel', 'apartheid', 'racism', 'lynching', 'cartoons', 'holocaust', 'stereotypes', 'flags', 'p**n', 'homophobia', 'transpobia', 'logos'] # Gender neutralization helps prevent accidental transphobic juxtapositions # which can occur when CVAPI uses gendered words in the description, but their # gender detection is wrong. Computers shouldn't try to detect gender, and
MUTE_TIME = 14 COOLDOWN = 2 BAND_SERVER = 743519350501277716 TEST_SERVER = 746851271901708428 MESSAGES_CHANNEL = 784197374959943731 weatherUrl = config.weatherUrl forecastUrl = config.forecastUrl mtUrl = config.mtUrl timeFormat = "%A %I:%M%p" intents = discord.Intents.default() intents.members = True intents.reactions = True client = commands.Bot(command_prefix='!', intents=intents, help_command=None) client.agreeCounter = 0 wordfilter = Wordfilter() wordfilter.clearList() wordfilter.addWords(config.banned_words) client.last_response_time = datetime.now() - timedelta(minutes=COOLDOWN + 1) client.mutedTime = datetime.now() - timedelta(minutes=MUTE_TIME + 1) client.prev_dm_user = None class GameDay: def __init__(self, opponent, date): self.opponent = opponent self.date = date gamedays = { 1: GameDay('University of Northern Iowa', datetime(2021, 9, 4)),
class TwitterMarkov(object): """Posts markov-generated text to twitter""" default_model = None _recently_tweeted = [] last_tweet = None def __init__(self, screen_name, corpus=None, **kwargs): ''' :screen_name User name to post as :corpus Text file to read to generate text. :api tweepy.API object :dry_run boolean If set, TwitterMarkov won't actually post tweets. ''' if 'api' in kwargs: self.api = kwargs.pop('api') else: self.api = tbu.API(screen_name=screen_name, **kwargs) try: self.log = self.api.logger except AttributeError: self.log = logging.getLogger(screen_name) self.screen_name = screen_name self.config = self.api.config self.dry_run = kwargs.pop('dry_run', False) try: corpus = corpus or self.config.get('corpus') if isinstance(corpus, basestring): corpora = [corpus] elif isinstance(corpus, Iterable): corpora = corpus else: raise RuntimeError('Unable to find any corpora!') self.corpora = [b for b in corpora if b is not None] self.log.debug('%s, %s', screen_name, self.corpora) state_size = kwargs.get('state_size', self.config.get('state_size')) self.models = self._setup_models(self.corpora, state_size) except RuntimeError as e: self.log.error(e) raise e self.log.debug('models: %s', list(self.models.keys())) blacklist = kwargs.get('blacklist') or self.config.get('blacklist', []) self.wordfilter = Wordfilter() self.wordfilter.add_words(blacklist) self.last_tweet = (self.api.user_timeline(count=1))[0] self.last_tweet = self.last_tweet.id if kwargs.get('learn', True): self.learn_parent() def _setup_models(self, corpora, state_size): """ Given a list of paths to corpus text files, set up markovify models for each. These models are returned in a dict, (with the basename as key). """ self.log.debug('setting up models') out = dict() state_size = state_size or 3 try: for pth in corpora: corpus_path = os.path.expanduser(pth) name = os.path.basename(corpus_path) with open(corpus_path) as m: out[name] = markovify.text.NewlineText(m.read(), state_size=state_size) except AttributeError as e: self.log.error(e) self.log.error("Probably couldn't find the model file.") raise e except IOError as e: self.log.error(e) self.log.error('Error reading %s', corpus_path) raise e self.default_model = os.path.basename(corpora[0]) return out @property def recently_tweeted(self): if len(self._recently_tweeted) == 0: #recent_tweets = self.api.user_timeline(self.screen_name, count=self.config.get('checkback', 20)) recent_tweets = self.api.user_timeline() self._recently_tweeted = [x.text for x in recent_tweets] return self._recently_tweeted def check_tweet(self, text): text = text.strip().lower() if len(text) == 0: self.log.info("Rejected (empty)") return False if self.wordfilter.blacklisted(text): self.log.info("Rejected (blacklisted)") self.log.info(text) return False for line in self.recently_tweeted: if text in line.strip().lower(): self.log.info("Rejected (Identical)") return False if Levenshtein.ratio(re.sub(r'\W+', '', text), re.sub(r'\W+', '', line.lower())) >= LEVENSHTEIN_LIMIT: self.log.info("Rejected (Levenshtein.ratio)") return False return True def reply_all(self, model=None, **kwargs): mentions = self.api.mentions_timeline(since_id=self.api.last_reply) self.log.info('%replying to all...') self.log.debug('%s mentions found', len(mentions)) for status in mentions: self.reply(status, model, **kwargs) def reply(self, status, model=None, **kwargs): self.log.debug('Replying to a mention') if status.user.screen_name == self.screen_name: self.log.debug('Not replying to self') return text = self.compose(model, max_len=138 - len(status.user.screen_name), **kwargs) reply = '@' + status.user.screen_name + ' ' + text self.log.info(reply) self._update(reply, in_reply=status.id_str) def tweet(self, model=None, **kwargs): text = self.compose(model, **kwargs) self.log.info(text) self._update(text) def _update(self, tweet, in_reply=None): if not self.dry_run: self.api.update_status(status=tweet, in_reply_to_status_id=in_reply) def compose(self, model=None, max_len=None, **kwargs): '''Format a tweet with a reply.''' max_len = min(140, (max_len or self.config.get('tweet_size'))) model = self.models[model or self.default_model] eols = '.!?'#'.?!/:;,' text = '' while True: sent = model.make_sentence(**kwargs) if not sent: continue # convert to unicode in Python 2 if hasattr(sent, 'decode'): sent = sent.decode('utf8') # Add eol delimiter if one is missing if sent[-1] not in eols and (sent[-2] not in eols and sent[-1] not in u'"\'’”〞❞'): sent = sent + choice('?..!!!') #'.!?' if len(text) + len(sent) < max_len - 1: text = (text + ' ' + sent).strip() else: # Check tweet against blacklist and recent tweets if self.check_tweet(text): # checked out: break and return break else: # didn't check out, start over text = '' self.log.debug('TwitterMarkov: %s', text) return text def learn_peer(self, corpus=None, peer=None): '''Add recent tweets from peers to corpus''' peer = peer or self.config.get('peer') corpus = corpus or self.corpora[0] if not peer: self.log.debug('Cannot teach: missing parent or tweets') return tweets = self.api.home_timeline(count=150, since_id=self.last_tweet) #self.api.user_timeline(parent, since_id=self.api.last_tweet) #print(tweets[0]) try: gen = checking.generator(tweets, no_mentions=self.config.get('filter_mentions'), no_hashtags=self.config.get('filter_hashtags'), no_urls=self.config.get('filter_urls'), no_media=self.config.get('filter_media'), no_symbols=self.config.get('filter_symbols'), no_badwords=self.config.get('filter_parent_badwords', True), no_retweets=self.config.get('no_retweets'), no_replies=self.config.get('no_replies') ) #print str(gen) #print 'foo' #self.log.error(gen.next()) self.log.debug('%s is learning', corpus) with open(corpus, 'a') as f: for tweet in gen: try: #utweet = unicode(tweet, "utf-8") #f.write(str(tweet)+'\n') utweet = unicodedata.normalize('NFKD', tweet).encode('ascii','ignore') utweet = re.sub('\s+', ' ', utweet) f.write(utweet+'\n') except UnicodeEncodeError as e: self.log.error(tweet) #f.writelines(tweet+ '\n' for tweet in gen) except IOError as e: self.log.error('Learning failed for %s', corpus) self.log.error(e) def learn_search(self, search=None, corpus=None): '''Add recent tweets from search to corpus''' #search = 'nuclear war' search = search or self.config.get('search') corpus = corpus or self.corpora[0] if not search: self.log.debug('Cannot teach: missing search or tweets') return #tweets = self.api.home_timeline(count=150) #self.api.user_timeline(parent, since_id=self.api.last_tweet) #print(tweets[0]) tweets = self.api.search(search, since_id=self.last_tweet) try: gen = checking.generator(tweets, no_mentions=self.config.get('filter_mentions'), no_hashtags=self.config.get('filter_hashtags'), no_urls=self.config.get('filter_urls'), no_media=self.config.get('filter_media'), no_symbols=self.config.get('filter_symbols'), no_badwords=self.config.get('filter_parent_badwords', True), no_retweets=self.config.get('no_retweets'), no_replies=self.config.get('no_replies') ) #print str(gen) #print 'foo' #self.log.error(gen.next()) self.log.debug('%s is learning', corpus) with open(corpus, 'a') as f: for tweet in gen: try: #utweet = unicode(tweet, "utf-8") #f.write(str(tweet)+'\n') utweet = unicodedata.normalize('NFKD', tweet).encode('ascii','ignore') utweet = re.sub('\s+', ' ', utweet) f.write(utweet+'\n') except UnicodeEncodeError as e: self.log.error(tweet) #f.writelines(tweet+ '\n' for tweet in gen) except IOError as e: self.log.error('Learning failed for %s', corpus) self.log.error(e) def learn_parent(self, corpus=None, parent=None): '''Add recent tweets from @parent to corpus''' parent = parent or self.config.get('parent') corpus = corpus or self.corpora[0] if not parent or not last_tweet: self.log.debug('Cannot teach: missing parent or tweets') return tweets = self.api.user_timeline(parent, since_id=self.last_tweet) try: gen = checking.generator(tweets, no_mentions=self.config.get('filter_mentions'), no_hashtags=self.config.get('filter_hashtags'), no_urls=self.config.get('filter_urls'), no_media=self.config.get('filter_media'), no_symbols=self.config.get('filter_symbols'), no_badwords=self.config.get('filter_parent_badwords', True), no_retweets=self.config.get('no_retweets'), no_replies=self.config.get('no_replies') ) self.log.debug('%s is learning', corpus) with open(corpus, 'a') as f: f.writelines(tweet + '\n' for tweet in gen) except IOError as e: self.log.error('Learning failed for %s', corpus) self.log.error(e)
import spacy import random import annoy import string from wordfilter import Wordfilter from itertools import islice from spacy.lang.en.stop_words import STOP_WORDS as stop_words wf = Wordfilter() def prepare_nlp(): nlp = spacy.load('en_core_web_md') # or en_core_web_md qualified = [item for item in nlp.vocab if item.has_vector and item.is_alpha] lexmap = [] t = annoy.AnnoyIndex(300) for i, item in enumerate(islice(sorted(qualified, key=lambda x: x.prob, reverse=True), 100000)): t.add_item(i, item.vector) lexmap.append(item) t.build(25) p = annoy.AnnoyIndex(50) phonmap = [] phonlookup = {} for i, line in enumerate(open("./cmudict-0.7b-simvecs")): word, vec_raw = line.split(" ") word = word.lower().rstrip("(0123)") vec = [float(v) for v in vec_raw.split()] p.add_item(i, vec)
class Twitter_markov(object): """Posts markov-generated text to twitter""" default_brain = None _recently_tweeted = [] def __init__(self, screen_name, brains=None, **kwargs): self.screen_name = screen_name self.api = kwargs.get('api', tbu.api.API(screen_name, **kwargs)) self.config = kwargs.get('config', self.api.config) self.logger = logging.getLogger(screen_name) try: if isinstance(brains, str): brains = [brains] if not isinstance(brains, list): brain = self.config.get('brain', []) brains = brain + self.config.get('brains', []) if not brains: raise RuntimeError self.brains = self._setup_brains(brains) except (IOError, IndexError, RuntimeError) as e: self.logger.error('Feed me brains: unable to find any brains!') raise e self.logger.debug('Brains: {0}'.format(list(self.brains.keys()))) self.dry_run = kwargs.get('dry_run', False) self.wordfilter = Wordfilter() self.wordfilter.add_words(self.config.get('blacklist', [])) self.checker = checking.construct_tweet_checker( no_retweets=self.config.get('no_retweets'), no_replies=self.config.get('no_replies') ) if kwargs.get('learn', True): self.learn_parent() def _setup_brains(self, brains): self.logger.debug('setting up brains') out = dict() try: for pth in brains: brainpath = os.path.expanduser(pth) name = os.path.basename(brainpath).replace('.brain', '') if not os.path.exists(brainpath): raise IOError("Brain file '{0}' missing".format(brainpath)) out[name] = Brain(brainpath) out[name].scorer.add_scorer(2.0, scoring.LengthScorer()) except AttributeError as e: self.logger.error(e) self.logger.error("Probably couldn't find the brain file.") raise e except IOError as e: self.logger.error(e) self.logger.error(brains) raise e self.default_brain = os.path.basename(brains[0]).replace('.brain', '') return out @property def recently_tweeted(self): if len(self._recently_tweeted) == 0: recent_tweets = self.api.user_timeline(self.screen_name, count=self.config.get('checkback', 20)) self._recently_tweeted = [x.text for x in recent_tweets] return self._recently_tweeted def check_tweet(self, text): text = text.strip().lower() if len(text) == 0: self.logger.info("Rejected (empty)") return False if not self.checker(text): self.logger.info("Rejected (retweet or reply)") return False if self.wordfilter.blacklisted(text): self.logger.info("Rejected (blacklisted)") return False for line in self.recently_tweeted: if text in line.strip().lower(): self.logger.info("Rejected (Identical)") return False if Levenshtein.ratio(re.sub(r'\W+', '', text), re.sub(r'\W+', '', line.lower())) >= 0.70: self.logger.info("Rejected (Levenshtein.ratio)") return False return True def reply_all(self, brainname=None): mentions = self.api.mentions_timeline(since_id=self.api.last_reply) self.logger.debug('{0} mentions found'.format(len(mentions))) for status in mentions: self.reply(status, brainname) def reply(self, status, brainname=None): self.logger.debug('Replying to a mention') if status.user.screen_name == self.screen_name: self.logger.debug('Not replying to self') return catalyst = tbu.helpers.format_status(status) text = self.compose(catalyst, brainname, max_len=138 - len(status.user.screen_name)) reply = u'@' + status.user.screen_name + ' ' + text self.logger.info(reply) self._update(reply, in_reply=status.id_str) def tweet(self, catalyst='', brainname=None): self.logger.debug('tweeting') text = self.compose(catalyst, brainname) self.logger.info(text) self._update(text) def _update(self, tweet, in_reply=None): if not self.dry_run: self.api.update_status(status=tweet, in_reply_to_status_id=in_reply) def compose(self, catalyst='', brainname=None, max_len=140): '''Format a tweet with a reply from brainname''' max_len = min(140, max_len) brainname = brainname or self.default_brain brain = self.brains[brainname] reply = brain.reply(catalyst, max_len=max_len) self.logger.debug(u'input> ' + catalyst) self.logger.debug(u'reply> ' + reply) if len(reply) <= 140: return reply else: self.logger.debug('Tweet was too long, trying again') return self.compose(catalyst, brainname, max_len) def learn_parent(self, brainname=None): parent = self.config.get('parent') last_tweet = self.api.last_tweet if not parent or not last_tweet: return tweet_filter = checking.construct_tweet_filter( no_mentions=self.config.get('filter_mentions'), no_hashtags=self.config.get('filter_hashtags'), no_urls=self.config.get('filter_urls'), no_media=self.config.get('filter_media'), no_symbols=self.config.get('filter_symbols') ) tweet_checker = checking.construct_tweet_checker( no_badwords=self.config.get('filter_parent_badwords', True), no_retweets=self.config.get('no_retweets'), no_replies=self.config.get('no_replies') ) tweets = self.api.user_timeline(parent, since_id=last_tweet) brain = brainname or self.default_brain for status in tweets: if not tweet_checker(status): continue text = tweet_filter(status) text = tbu.helpers.format_text(text) self.brains[brain].learn(text)