Ejemplo n.º 1
0
class DataCollector():

    def __init__(self):
        self.config = BotConfig()
        try:
            # Opening the links file with utf encoding is required because unicode data
            # has to be written to the file at times -- specifically the em character (\u2014)
            # which represents an underscore in the username
            self.link_log = codecs.open(self.config.get_link_path(), "a", encoding="utf-8")
            self.link_log.write(self.get_date() + "\n")
        except IOError:
            "links log can't be opened."

        # matches all valid urls but not ie. google.com
        # taken from regexlib.com
        self.pattern = """(((http|ftp|https|ftps|sftp)://)|(www\.))+(([a-zA-Z
                          0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.
                          [0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9\&%_\./-~-]*)?"""

        # pattern used to filter links
        # keywords included in this pattern will NOT be included in the links log
        self.filter_pat = "(codepad|pastebin|pocoo)"

    def extract_links(self, user, chan, msg):
        # extracts all urls found in message and store in an iterator
        matches = re.finditer(self.pattern, msg.strip(), re.IGNORECASE + re.VERBOSE)
        for link in matches:
            # link.group() contains the full valid url extracted by the regex
            link = link.group()
            if not re.search(self.filter_pat, link, re.IGNORECASE) and not user == "ChanServ":
                title = self.get_title(link)
                self.link_log.write("%s - %s: %s > %s\n" % (chan, user, link, title))
                self.link_log.flush()

    def close(self):
        self.link_log.close()

    def get_date(self):
        # Return date in format: Sunday, August 01, 2010
        return datetime.now().strftime("%A, %B %d, %Y")

    def get_title(self, url):
        # Get contents of <title> tag in the url
        try:
            source = urllib.urlopen(url).read()
            return BeautifulSoup(source).title.text
        except:
            return "No Title"
Ejemplo n.º 2
0
    def __init__(self):
        self.config = BotConfig()
        try:
            # Opening the links file with utf encoding is required because unicode data
            # has to be written to the file at times -- specifically the em character (\u2014)
            # which represents an underscore in the username
            self.link_log = codecs.open(self.config.get_link_path(), "a", encoding="utf-8")
            self.link_log.write(self.get_date() + "\n")
        except IOError:
            "links log can't be opened."

        # matches all valid urls but not ie. google.com
        # taken from regexlib.com
        self.pattern = """(((http|ftp|https|ftps|sftp)://)|(www\.))+(([a-zA-Z
                          0-9\._-]+\.[a-zA-Z]{2,6})|([0-9]{1,3}\.[0-9]{1,3}\.
                          [0-9]{1,3}\.[0-9]{1,3}))(/[a-zA-Z0-9\&amp;%_\./-~-]*)?"""

        # pattern used to filter links
        # keywords included in this pattern will NOT be included in the links log
        self.filter_pat = "(codepad|pastebin|pocoo)"
Ejemplo n.º 3
0
"""
   ircbot.py

   Handles all the protocols and factories. This is where the behavior of the bot is 
   defined and it is where the bot is connected.

"""

from sys import stdout
from twisted.python.log import startLogging
from twisted.internet import reactor, protocol
from twisted.words.protocols import irc
import datacollect
from botconfig import BotConfig

config = BotConfig()

class IRCProtocol(irc.IRCClient):
    nickname = config.get_nick().encode("ascii")

    def signedOn(self):
        # Identify myself to NickServ so I can join
        # +r (must be registered) channels
        self.msg("NickServ", "identify " + config.get_pass().encode("ascii"))

    def privmsg(self, user, channel, message):
        # This method logs ALL messages by users in channel

        username = self.extract_nick(user)

        # have the data collector instance parse the message
Ejemplo n.º 4
0
# Scripts running location. Only set if called via python.exe
__location__ = os.path.realpath(
    # From https://docs.python.org/3/library/os.path.html
    # If a component is an absolute path, all previous components
    # are thrown away and joining continues from the absolute path component.
    os.path.join(os.getcwd(), os.path.dirname(__file__)))

# Load Configuration File
config_file_path = Path(os.path.join(__location__, config_file_name))

# Read in configuration file.
if(config_file_path.is_file()):
    print("Configuration found in: {}".format(config_file_path))

    # Initiate the bot config object from file
    bot_config = BotConfig.from_json_config(config_file_path)
    print(str(bot_config))
else:
    print("The configuration file {} does not exist".format(path=config_file_path))

# Initialize the bot
bot = Bot(command_prefix=bot_config.command_prefix)

# Prep SQLAlchemy
engine = create_engine(bot_config.db_url, pool_recycle=3600)
session = Session(bind=engine)
Base.metadata.create_all(engine)

@bot.event
async def on_ready():
    '''Event for when the bot is ready to start working'''