Python clean_urlの例、twitbot.clean_url Pythonの例

コード例 #1

0

ファイルを表示

ファイル: MarkovBot.py プロジェクト: J-Spade/slackov

    def clean_urls_in_dictionary(self):
	self.dictLock.acquire()
        newdict = copy.deepcopy(self.DEFAULT_DICTIONARY)
        for key in self.dictionary:
            firsts = self.dictionary.get(key)[0]
            for i in range(0, len(firsts)):
                firsts[i] = (clean_url(firsts[i][0]), firsts[i][1])
            seconds = self.dictionary.get(key)[1]
            for i in range(0, len(seconds)):
                seconds[i] = (clean_url(seconds[i][0]), seconds[i][1])
	    newkey = clean_url(key.split()[0])
	    if len(key.split()) > 1:
		newkey = newkey + ' ' + clean_url(key.split()[1])
            newdict[newkey] = (firsts, seconds)
        self.dictionary = newdict
	self.dictLock.release()

コード例 #2

0

ファイルを表示

ファイル: MarkovBot.py プロジェクト: J-Spade/slackov

 def clean_urls_in_dictionary(self):
     self.dictLock.acquire()
     newdict = copy.deepcopy(self.DEFAULT_DICTIONARY)
     for key in self.dictionary:
         firsts = self.dictionary.get(key)[0]
         for i in range(0, len(firsts)):
             firsts[i] = (clean_url(firsts[i][0]), firsts[i][1])
         seconds = self.dictionary.get(key)[1]
         for i in range(0, len(seconds)):
             seconds[i] = (clean_url(seconds[i][0]), seconds[i][1])
         newkey = clean_url(key.split()[0])
         if len(key.split()) > 1:
             newkey = newkey + ' ' + clean_url(key.split()[1])
         newdict[newkey] = (firsts, seconds)
     self.dictionary = newdict
     self.dictLock.release()

コード例 #3

0

ファイルを表示

ファイル: MarkovBot.py プロジェクト: J-Spade/slackov

    def generate_chain(self, message):
        """Generates a Markov chain from a message"""

        self.dictLock.acquire()

        words = message.split()
        words.append(self.STOPWORD)
        words.insert(0, self.STOPWORD)

        # find URLs, neaten them up
        for i in range(0, len(words)):
            words[i] = clean_url(words[i])
        if '<{}>'.format(self.users[self.BOT_ID]) in words[1]:
            del words[1]

        if len(words) < 2:
            return ''


        # try to guess which word is the most important
        subject = self.STOPWORD
        confidence = 0

        for word in words:
            if self.wordcounts.has_key(word):
                tfidf = tf_idf(word, words, self.wordcounts, self.sentences_ever)
                if tfidf > confidence:
                    confidence = tfidf
                    subject = word

        # pick a word pair we've seen used with that word before as a seed
        pairs = []
        for wordpair in self.paircounts:
            temp = wordpair.split()
            if (temp[0] == subject) or ((len(temp) > 1) and (temp[1] == subject)):
                pairs.append((wordpair, self.paircounts.get(wordpair)))

        seed = choose_word_from_list(pairs)

        chain = ''

        # forwards
        wordpair = seed
        if self.dictionary.has_key(wordpair):
            chain = wordpair
        #print wordpair
        while (wordpair.split()[1] != self.STOPWORD) and (self.dictionary.has_key(wordpair)):
            wordpair = wordpair.split()[1] + u' ' + \
                        choose_word_from_list(self.dictionary.get(wordpair)[1])
            #print wordpair
            chain = chain + u' ' + wordpair.split()[1]

        # backwards
        wordpair = seed
        if self.dictionary.has_key(wordpair) and wordpair.split()[0] != self.STOPWORD:
            wordpair = choose_word_from_list(
                self.dictionary.get(wordpair)[0]) + \
                u' ' + wordpair.split()[0]
        # so we don't have the seed twice


        while (wordpair.split()[0] != self.STOPWORD) and (self.dictionary.has_key(wordpair)):
            #print wordpair
            chain = wordpair.split()[0] + u' ' + chain
            wordpair = choose_word_from_list(
                self.dictionary.get(wordpair)[0]) + \
                u' ' + wordpair.split()[0]

        self.dictLock.release()

        return chain.replace(self.STOPWORD, u'')

コード例 #4

0

ファイルを表示

ファイル: MarkovBot.py プロジェクト: J-Spade/slackov

    def interpret_message(self, message):
        """Interprets a message"""
	
	self.dictLock.acquire()
        words = message.split()
        words.append(self.STOPWORD)
        words.insert(0, self.STOPWORD)

        self.sentences_ever = self.sentences_ever + 1

        # find URLs, neaten them up
        for i in range(0, len(words)):
            words[i] = clean_url(words[i])

	for word in words:
            if not (self.wordcounts.has_key(word)):
                self.wordcounts[word] = 0
            self.wordcounts[word] = self.wordcounts.get(word) + 1

        index = 0
        word = words[index]
        # cannot be out of range; at least (stop, stop, word, stop, stop)
        wordpair = words[index] + u' ' + words[index + 1]

        while True:
            try:
                next = words[index + 2]
                nextpair = words[index + 1] + u' ' + words[index + 2]
            except IndexError:
                # this means we got to the end of the sentence
                break

            if not (self.paircounts.has_key(wordpair)):
                self.paircounts[wordpair] = 0
            self.paircounts[wordpair] = self.paircounts.get(wordpair) + 1

            # add 'next' as a word that comes after 'wordpair'
            if self.dictionary.has_key(wordpair):
                temp = self.dictionary.get(wordpair)[1]
                wordindex = word_index_in_list(next, temp)
                if wordindex == -1:
                    temp.append((next, 1))
                else:
                    prevcount = temp[wordindex][1]
                    temp[wordindex] = (next, prevcount + 1)
            else:
                self.dictionary[wordpair] = ([], [(next, 1)])


            # add 'word' as a word that comes before 'nextpair'
            if self.dictionary.has_key(nextpair):
                othertemp = self.dictionary.get(nextpair)[0]
                wordindex = word_index_in_list(word, othertemp)
                if wordindex == -1:
                    othertemp.append((word, 1))
                else:
                    prevcount = othertemp[wordindex][1]
                    othertemp[wordindex] = (word, prevcount + 1)

            else:
                self.dictionary[nextpair] = ([(word, 1)], [])

            index = index + 1
            word = words[index]
            wordpair = word + u' ' + words[index + 1]

        #print self.dictionary
	self.dictLock.release()

コード例 #5

0

ファイルを表示

ファイル: MarkovBot.py プロジェクト: J-Spade/slackov

    def do_commands(self, target, sender, message, sentByAdmin):
        if sentByAdmin and ('!saveDict' in message):
            try:
                self.save_dictionary()
                self.send_message(target, 'DICTIONARY SAVED SUCCESSFULLY (%s bytes)' % str(os.path.getsize('Markov_Dict.pkl')))
            except IOError:
                self.send_message(target, 'DICTIONARY COULD NOT BE SAVED')
            return True
        elif sentByAdmin and ('!loadDict' in message):
            try:
                self.load_dictionary()
                self.send_message(target, 'DICTIONARY LOADED SUCCESSFULLY (%s bytes)' % str(os.path.getsize('Markov_Dict.pkl')))
            except IOError:
                self.send_message(target, 'DICTIONARY COULD NOT BE LOADED')
            return True
        elif sentByAdmin and ('!eraseDict' in message):
            self.dictionary = {
                self.STOPWORD : ([self.STOPWORD], [self.STOPWORD])
            }
            self.send_message(target, 'DICTIONARY ERASED (NOT SAVED YET)')
            return True
        elif sentByAdmin and ('!learn' in message):
            self.toggle_learn()
            print_message = 'I AM {} LEARNING'
            self.send_message(target,
                              print_message.format('NOW' if self.isLearning else 'NO LONGER'))
            return True
        elif sentByAdmin and ('!cleanURL' in message):
            self.clean_urls_in_dictionary()
            self.send_message(target, 'LINKS IN DICTIONARY HAVE BEEN CLEANED')
            return True
        elif '!search' in message:
            try:
                message = message.lower()
                searchterms = message.split()[1:]
		for i in range(0, len(searchterms)):
		    searchterms[i] = clean_url(searchterms[i])
                if len(searchterms) == 1:
                    phrases = []
                    for key in self.dictionary:
                        if searchterms[0] == key.split()[0] or \
                                             (len(key.split()) > 1 and \
                                             searchterms[0] == key.split()[1]):
                            phrases.append(key)
                    self.send_message(target, u'"%s" in pairs: %s' % (searchterms[0], str(phrases)))
                else:
                    key = searchterms[0] + u' ' + searchterms[1]
                    if self.dictionary.has_key(key):
                        self.send_message(target, u'"%s": %s' % (key, str(self.dictionary.get(key))))
                    else:
                        self.send_message(target, u'"%s" not found in dictionary' % key)
            except IndexError:
                self.send_message(target, u'MALFORMED COMMAND')
            return True
        elif '!talkback' in message:
            try:
                self.talkBackFreq = float(message.split()[1])
                self.send_message(target, ('RESPONDING PROBABILITY SET TO %3f' % self.talkBackFreq))
            except (IndexError, TypeError, ValueError):
                self.send_message(target, 'MALFORMED COMMAND')
            return True
        elif sentByAdmin and ('!quit' in message):
            self.quit()
            return True
        elif '!avatar' in message:
            self.send_message(target, 'SOURCE OF MY CURRENT AVATAR: %s' % self.AVATARSOURCE)
            return True

        elif ('!nowplaying' in message):
           songname, songartist = self.generate_song()
           self.send_message(target, u'Now Playing: "%s", by %s' % (string.capwords(songname), string.capwords(songartist)))
           return True

        return False # did not find a command

コード例 #6

0

ファイルを表示

ファイル: MarkovBot.py プロジェクト: J-Spade/slackov

    def generate_chain(self, message):
        """Generates a Markov chain from a message"""

        self.dictLock.acquire()

        words = message.split()
        words.append(self.STOPWORD)
        words.insert(0, self.STOPWORD)

        # find URLs, neaten them up
        for i in range(0, len(words)):
            words[i] = clean_url(words[i])
        if '<{}>'.format(self.users[self.BOT_ID]) in words[1]:
            del words[1]

        if len(words) < 2:
            return ''

        # try to guess which word is the most important
        subject = self.STOPWORD
        confidence = 0

        for word in words:
            if self.wordcounts.has_key(word):
                tfidf = tf_idf(word, words, self.wordcounts,
                               self.sentences_ever)
                if tfidf > confidence:
                    confidence = tfidf
                    subject = word

        # pick a word pair we've seen used with that word before as a seed
        pairs = []
        for wordpair in self.paircounts:
            temp = wordpair.split()
            if (temp[0] == subject) or ((len(temp) > 1) and
                                        (temp[1] == subject)):
                pairs.append((wordpair, self.paircounts.get(wordpair)))

        seed = choose_word_from_list(pairs)

        chain = ''

        # forwards
        wordpair = seed
        if self.dictionary.has_key(wordpair):
            chain = wordpair
        #print wordpair
        while (wordpair.split()[1] !=
               self.STOPWORD) and (self.dictionary.has_key(wordpair)):
            wordpair = wordpair.split()[1] + u' ' + \
                        choose_word_from_list(self.dictionary.get(wordpair)[1])
            #print wordpair
            chain = chain + u' ' + wordpair.split()[1]

        # backwards
        wordpair = seed
        if self.dictionary.has_key(
                wordpair) and wordpair.split()[0] != self.STOPWORD:
            wordpair = choose_word_from_list(
                self.dictionary.get(wordpair)[0]) + \
                u' ' + wordpair.split()[0]
        # so we don't have the seed twice

        while (wordpair.split()[0] !=
               self.STOPWORD) and (self.dictionary.has_key(wordpair)):
            #print wordpair
            chain = wordpair.split()[0] + u' ' + chain
            wordpair = choose_word_from_list(
                self.dictionary.get(wordpair)[0]) + \
                u' ' + wordpair.split()[0]

        self.dictLock.release()

        return chain.replace(self.STOPWORD, u'')

コード例 #7

0

ファイルを表示

ファイル: MarkovBot.py プロジェクト: J-Spade/slackov

    def interpret_message(self, message):
        """Interprets a message"""

        self.dictLock.acquire()
        words = message.split()
        words.append(self.STOPWORD)
        words.insert(0, self.STOPWORD)

        self.sentences_ever = self.sentences_ever + 1

        # find URLs, neaten them up
        for i in range(0, len(words)):
            words[i] = clean_url(words[i])

        for word in words:
            if not (self.wordcounts.has_key(word)):
                self.wordcounts[word] = 0
            self.wordcounts[word] = self.wordcounts.get(word) + 1

        index = 0
        word = words[index]
        # cannot be out of range; at least (stop, stop, word, stop, stop)
        wordpair = words[index] + u' ' + words[index + 1]

        while True:
            try:
                next = words[index + 2]
                nextpair = words[index + 1] + u' ' + words[index + 2]
            except IndexError:
                # this means we got to the end of the sentence
                break

            if not (self.paircounts.has_key(wordpair)):
                self.paircounts[wordpair] = 0
            self.paircounts[wordpair] = self.paircounts.get(wordpair) + 1

            # add 'next' as a word that comes after 'wordpair'
            if self.dictionary.has_key(wordpair):
                temp = self.dictionary.get(wordpair)[1]
                wordindex = word_index_in_list(next, temp)
                if wordindex == -1:
                    temp.append((next, 1))
                else:
                    prevcount = temp[wordindex][1]
                    temp[wordindex] = (next, prevcount + 1)
            else:
                self.dictionary[wordpair] = ([], [(next, 1)])

            # add 'word' as a word that comes before 'nextpair'
            if self.dictionary.has_key(nextpair):
                othertemp = self.dictionary.get(nextpair)[0]
                wordindex = word_index_in_list(word, othertemp)
                if wordindex == -1:
                    othertemp.append((word, 1))
                else:
                    prevcount = othertemp[wordindex][1]
                    othertemp[wordindex] = (word, prevcount + 1)

            else:
                self.dictionary[nextpair] = ([(word, 1)], [])

            index = index + 1
            word = words[index]
            wordpair = word + u' ' + words[index + 1]

        #print self.dictionary
        self.dictLock.release()

コード例 #8

0

ファイルを表示

ファイル: MarkovBot.py プロジェクト: J-Spade/slackov

    def do_commands(self, target, sender, message, sentByAdmin):
        if sentByAdmin and ('!saveDict' in message):
            try:
                self.save_dictionary()
                self.send_message(
                    target, 'DICTIONARY SAVED SUCCESSFULLY (%s bytes)' %
                    str(os.path.getsize('Markov_Dict.pkl')))
            except IOError:
                self.send_message(target, 'DICTIONARY COULD NOT BE SAVED')
            return True
        elif sentByAdmin and ('!loadDict' in message):
            try:
                self.load_dictionary()
                self.send_message(
                    target, 'DICTIONARY LOADED SUCCESSFULLY (%s bytes)' %
                    str(os.path.getsize('Markov_Dict.pkl')))
            except IOError:
                self.send_message(target, 'DICTIONARY COULD NOT BE LOADED')
            return True
        elif sentByAdmin and ('!eraseDict' in message):
            self.dictionary = {
                self.STOPWORD: ([self.STOPWORD], [self.STOPWORD])
            }
            self.send_message(target, 'DICTIONARY ERASED (NOT SAVED YET)')
            return True
        elif sentByAdmin and ('!learn' in message):
            self.toggle_learn()
            print_message = 'I AM {} LEARNING'
            self.send_message(
                target,
                print_message.format(
                    'NOW' if self.isLearning else 'NO LONGER'))
            return True
        elif sentByAdmin and ('!cleanURL' in message):
            self.clean_urls_in_dictionary()
            self.send_message(target, 'LINKS IN DICTIONARY HAVE BEEN CLEANED')
            return True
        elif '!search' in message:
            try:
                message = message.lower()
                searchterms = message.split()[1:]
                for i in range(0, len(searchterms)):
                    searchterms[i] = clean_url(searchterms[i])
                if len(searchterms) == 1:
                    phrases = []
                    for key in self.dictionary:
                        if searchterms[0] == key.split()[0] or \
                                             (len(key.split()) > 1 and \
                                             searchterms[0] == key.split()[1]):
                            phrases.append(key)
                    self.send_message(
                        target,
                        u'"%s" in pairs: %s' % (searchterms[0], str(phrases)))
                else:
                    key = searchterms[0] + u' ' + searchterms[1]
                    if self.dictionary.has_key(key):
                        self.send_message(
                            target,
                            u'"%s": %s' % (key, str(self.dictionary.get(key))))
                    else:
                        self.send_message(
                            target, u'"%s" not found in dictionary' % key)
            except IndexError:
                self.send_message(target, u'MALFORMED COMMAND')
            return True
        elif '!talkback' in message:
            try:
                self.talkBackFreq = float(message.split()[1])
                self.send_message(
                    target,
                    ('RESPONDING PROBABILITY SET TO %3f' % self.talkBackFreq))
            except (IndexError, TypeError, ValueError):
                self.send_message(target, 'MALFORMED COMMAND')
            return True
        elif sentByAdmin and ('!quit' in message):
            self.quit()
            return True
        elif '!avatar' in message:
            self.send_message(
                target, 'SOURCE OF MY CURRENT AVATAR: %s' % self.AVATARSOURCE)
            return True

        elif ('!nowplaying' in message):
            songname, songartist = self.generate_song()
            self.send_message(
                target, u'Now Playing: "%s", by %s' %
                (string.capwords(songname), string.capwords(songartist)))
            return True

        return False  # did not find a command