Python cleanText Examples, purify.cleanText Python Examples

Example #1

0

Show file

File: classifier.py Project: Lanka/feedIO

def voteArticle(upOrDown, item, topic="General"):
    """
    voteArticle function, takes arguments upOrDown vote, topic to vote,
    and the text to vote for
    """
    text = purify.cleanText(item.description)
    title = purify.cleanText(item.title)

    c = Classifier(classifierDir, [topic,"not"+topic])
    c2 = Classifier(classifierDir, [topic+"Title", "not"+topic+"Title"])
    try:
        if upOrDown is "up":
            c.learn(topic, text)
            c2.learn(topic+"Title", title)

        elif upOrDown is "down":
            c.learn("not"+topic, text)
            c2.learn("not"+topic+"Title", title)

        if topic is not "General":
            #always add the upvote to the General interest category as well.
            d = Classifier(classifierDir, ["General","notGeneral"])
            d2 = Classifier(classifierDir, ["GeneralTitle","notGeneralTitle"])
            if upOrDown is "up":
                d.learn("General", text)
                d2.learn("GeneralTitle", title)

            elif upOrDown is "down":
                d.learn("notGeneral", text)
                d2.learn("notGeneralTitle", title)
    except UnicodeEncodeError:
        print "Article content contains invalid characters!"

Example #2

0

Show file

    def calcScore(self, article):
        """
        when an article is passed this function calculates its overall score, by
        adding content score + feed score + feed update frequency(should be
        caluculated by taking the deviation from the mean update friquency.)

        """
        # Get the Article title and content in plain text.
        text = purify.cleanText(article.description)

        titleText = purify.cleanText(article.title)

        #Calculate the Score for the texual content of the article
        (textTopic, textScore) = classifier.classifyArticleText(self.topic.title, text)
        textTopic = textTopic.replace("_", " ")
#        print "text : %s, %s" % (textTopic, textScore)

        #Calculate the Score for the title of the article.
        (titleTopic,titleScore) = classifier.classifyArticleTitle(self.topic.title, titleText)
        titleTopic = titleTopic.replace("_", " ")

#        print "Title : %s, %s" % (titleTopic,titleScore)

        #Now set the textual scores to minus values if the article "notTopic"
        if textTopic ==self.topic.title:
            textScore = textScore * 10000
        else:
            textScore = textScore * (-100)

        if titleTopic ==self.topic.title+"Title":
            titleScore = titleScore * 10000
        else:
            titleScore = titleScore * (-100)

        #Get the Score for the feed from the db
        scoreFeed = ScoreFeed.query.filter_by(feed = article.feed, topic = self.topic).first()
        feedScore = scoreFeed.score * 100

        #updateFrequencyScore - score based on the feeds update frequncy.
        #less frequently updated content would get fairly better scores.

        # Set weights to be given for the calculated individual scores.
        #TODO: Give an option for the user to set the weights of these scores from GUI.

        textScoreWeight = TEXT_SCORE_WEIGHT
        titleScoreWeight = TITLE_SCORE_WEIGHT
        feedScoreWeight = FEED_SCORE_WEIGHT
#        updateFrequencyWeight = UPDATE_FREQUENCY_WEIGHT

        finalScore = ( ( textScoreWeight * textScore ) +
                        ( titleScoreWeight * titleScore ) +
                        ( feedScoreWeight * feedScore ) )

#                        ( updateFrequencyWeight * updateFrequencyScore ) )


        return finalScore

Example #3

0

Show file

File: prioritizer.py Project: Lanka/feedIO

    def calcScore(self, article):
        """
        when an article is passed this function calculates its overall score, by
        adding content score + feed score + feed update frequency(should be
        caluculated by taking the deviation from the mean update friquency.)

        """
        # Get the Article title and content in plain text.
        text = purify.cleanText(article.description)

        titleText = purify.cleanText(article.title)

        #Calculate the Score for the texual content of the article
        (textTopic, textScore) = classifier.classifyArticleText(self.topic.title, text)

#        print "text : %s, %s" % (textTopic, textScore)

        #Calculate the Score for the title of the article.
        (titleTopic,titleScore) = classifier.classifyArticleTitle(self.topic.title, titleText)
#        print "Title : %s, %s" % (titleTopic,titleScore)

        #Now set the textual scores to minus values if the article "notTopic"
        if textTopic ==self.topic.title:
            textScore = textScore * 10000
        else:
            textScore = textScore * (-100)

        if titleTopic ==self.topic.title+"Title":
            titleScore = titleScore * 10000
        else:
            titleScore = titleScore * (-100)

        #Get the Score for the feed from the db
        feedScore = article.feed.numVotes * 200

        #updateFrequencyScore - score based on the feeds update frequncy.
        #less frequently updated content would get fairly better scores.

        # Set weights to be given for the calculated individual scores.
        #TODO: Give an option for the user to set the weights of these scores from GUI.

        textScoreWeight = 0.55
        titleScoreWeight = 0.35
        feedScoreWeight = 0.1
#        updateFrequencyWeight = 0.1

        finalScore = ( ( textScoreWeight * textScore ) +
                        ( titleScoreWeight * titleScore ) +
                        ( feedScoreWeight * feedScore ) )

#                        ( updateFrequencyWeight * updateFrequencyScore ) )


        return finalScore

Example #4

0

Show file

def voteArticle(upOrDown, item, topic):
    """
    voteArticle function, takes arguments upOrDown vote, topic to vote,
    and the text to vote for
    """
    # vote for the feed that the article belongs.
    _voteFeed(upOrDown, item.feed, topic)

    # we'll be working with strings from now on, so get the topic title.
    topic = topic.title
    # calls to CRM breaks when a topic title contains spaces, replace them with underscores.
    topic = topic.replace(" ", "_")

    text = purify.cleanText(item.description)
    title = purify.cleanText(item.title)

    c = Classifier(classifierDir, [topic, "not" + topic])
    c2 = Classifier(classifierDir, [topic + "Title", "not" + topic + "Title"])
    try:
        if upOrDown is "up":
            c.learn(topic, text)
            c2.learn(topic + "Title", title)

        elif upOrDown is "down":
            c.learn("not" + topic, text)
            c2.learn("not" + topic + "Title", title)

        if topic is not "General":
            #always add the upvote to the General interest category as well.
            d = Classifier(classifierDir, ["General", "notGeneral"])
            d2 = Classifier(classifierDir, ["GeneralTitle", "notGeneralTitle"])
            if upOrDown is "up":
                d.learn("General", text)
                d2.learn("GeneralTitle", title)

            # We sould only get the Upvote for general. So disabled the DownVote.


#            elif upOrDown is "down":
#                d.learn("notGeneral", text)
#                d2.learn("notGeneralTitle", title)
    except UnicodeEncodeError:
        print "Article content contains invalid characters!"

Example #5

0

Show file

File: classifier.py Project: onenonlycasper/feedIO

def voteArticle(upOrDown, item, topic):
    """
    voteArticle function, takes arguments upOrDown vote, topic to vote,
    and the text to vote for
    """
    # vote for the feed that the article belongs.
    _voteFeed(upOrDown, item.feed, topic)

    # we'll be working with strings from now on, so get the topic title.
    topic = topic.title
    # calls to CRM breaks when a topic title contains spaces, replace them with underscores.
    topic = topic.replace(" ", "_")

    text = purify.cleanText(item.description)
    title = purify.cleanText(item.title)

    c = Classifier(classifierDir, [topic,"not"+topic])
    c2 = Classifier(classifierDir, [topic+"Title", "not"+topic+"Title"])
    try:
        if upOrDown is "up":
            c.learn(topic, text)
            c2.learn(topic+"Title", title)

        elif upOrDown is "down":
            c.learn("not"+topic, text)
            c2.learn("not"+topic+"Title", title)

        if topic is not "General":
            #always add the upvote to the General interest category as well.
            d = Classifier(classifierDir, ["General","notGeneral"])
            d2 = Classifier(classifierDir, ["GeneralTitle","notGeneralTitle"])
            if upOrDown is "up":
                d.learn("General", text)
                d2.learn("GeneralTitle", title)

            # We sould only get the Upvote for general. So disabled the DownVote.
#            elif upOrDown is "down":
#                d.learn("notGeneral", text)
#                d2.learn("notGeneralTitle", title)
    except UnicodeEncodeError:
        print "Article content contains invalid characters!"

Example #6

0

Show file

File: ui.py Project: onenonlycasper/feedIO

 def on_actionRead_activated(self, i = None):
     """
     Read article implementataion.Can play or stop the selected article.
     """
     if i is None: return
     selected = self.currentItem
     if self.parent.playerState =='standby':
         self.parent.playerState = 'playing'
         self.parent.sp.say(purify.cleanText(str(selected.article.title + "....." + selected.article.description)))
         #self.sp.say(selected.article.description)
     	
     else:
         self.parent.sp.stop()
         self.parent.playerState='standby'

Example #7

0

Show file

File: ui.py Project: seejay/feedIO

 def on_actionRead_activated(self, i = None):
     """
     Read article implementataion.Can play or stop the selected article.
     """
     if i is None: return
     selected = self.currentItem
     if self.parent.playerState =='standby':
         self.parent.playerState = 'playing'
         self.parent.sp.say(purify.cleanText(str(selected.article.title + "....." + selected.article.description)))
         #self.sp.say(selected.article.description)
     	
     else:
         self.parent.sp.stop()
         self.parent.playerState='standby'

Example #8

0

Show file

File: ui.py Project: seejay/feedIO

    def on_actionTranslate_the_article_activated(self, i = None):
        """
        Translate the given article in a different language into English language.
        
        """
        if i is None: return
        try:
            toTranslate = self.currentItem
            selectedItem = toTranslate.article
            #thisOne = selectedItem.description
            v1 = purify.cleanText(str(selectedItem.title))
            vx = purify.cleanText(str(selectedItem.description))
            v2 = ' '.join(vx.split())
            translate = Translator().translate
            translatedTitle = translate(v1, lang_to="en")
            translatedDescription = translate(v2, lang_to="en")                
            selectedItem.title = translatedTitle
            selectedItem.description = translatedDescription
            self.displayArticle()
            
            

        except:
            text = "Error Translating the article"

Example #9

0

Show file

File: ui.py Project: Lanka/feedIO

 def addTopic(self):
     topic = unicode(self.ui.addTopicLinedit.text())
     topic = purify.cleanText(topic)
     classifier.addTopic(topic)
     self.close()