Python Naive Examples

Programming Language: Python

Namespace/Package Name: NaiveClassifier

Class/Type: Naive

Examples at hotexamples.com: 2

Python Naive - 2 examples found. These are the top rated real world Python examples of NaiveClassifier.Naive extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Naive(2)

inferPolarity(1)

tokenizeSentiTokens(1)

Example #1

Show file

def processTweets(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets):
    
    """ 
        Processes a list of tweets, for each:
        1. Identifies the target
        2. If the message contains a target of interest infer the polarity
        
        targetsFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
        multiWordsFile -> path to a file that contains the words that should be considered as a unit, e.g. "primeiro ministro"
         tweets -> list of tweets
    """
    
    print "hell yeah!"
    print "Loading resources...\nTargets: " + targetsFile
        
    targets = Utils.getFromCache(PERSONS_CACHE)
    
    if targets != None:
        print "Target list found on cache!"
    else:
        targets = Persons.loadPoliticians(targetsFile)
        Utils.putInCache(targets, PERSONS_CACHE) 
    
    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " +  exceptSentiTokens
    
    sentiTokens = Utils.getFromCache(SENTI_CACHE)  
    
    if sentiTokens != None:
        print "SentiTokens found on cache!"
    else:
        sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
        Utils.putInCache(sentiTokens, SENTI_CACHE)
    
    print "Multiword Tokenizer: " + multiWordsFile
    
    multiWordTokenizer = Utils.getFromCache(MULTIWORD_CACHE)
    
    if multiWordTokenizer != None:
        print "Multiword Tokenizer found on cache"
    else:
        multiWordTokenizer = MultiWordHelper(multiWordsFile)
        multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets))
        multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))
        Utils.putInCache(multiWordTokenizer, MULTIWORD_CACHE)
    
    print "Resources loaded! Starting analysis..."
    
    
    targetDetector = TargetDetector(targets)
    #TODO:Estes senhores já não precisam de receber os targets
    naive = Naive(sentiTokens)
    rules = Rules(None,sentiTokens)   
    
    analyzedTweets = []
    rejectedTweets = []
    
    for tweet in tweets:
        
        t0 = datetime.now()
        
        tweetsWithTarget = targetDetector.inferTarget(tweet)
        
        if tweetsWithTarget != None :
            
            #a tweet can have multiple targets (in that case the message is replicated)
            for tweet in tweetsWithTarget:       
        
                #try to classify with rules...
                analyzedTweet = rules.inferPolarity(tweet,False)
                
                #if not possible use the naive classifier
                if analyzedTweet.polarity == 0:
                    analyzedTweet = naive.inferPolarity(analyzedTweet,False)
                
                #If the polarity is still 0 it can mean:
                #1) The sum of the polarities of the sentiTokens is 0,
                #2) There was no evidence usable to assess the sentiment                
                if analyzedTweet.polarity == 0:
                    
                    regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)'            
                    
                    #Try to find if there are any evidence of matched sentiTokens
                    match = re.search(regex,analyzedTweet.metadata).group(2)
                    
                    if debug:
                        print "match: ", match
                    
                    if len(match.strip(' ')) == 0:
        
                        rejectedTweets.append(analyzedTweet)
                    else:
                        analyzedTweets.append(analyzedTweet)
                else:
                    analyzedTweets.append(analyzedTweet)
                
                t1 = datetime.now()
            
                print tweet.id + " ("+ str(t1-t0) + ")"
            
    logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv")    
    
    return analyzedTweets

Example #2

Show file

File: PPCTopSentiTokens.py Project: craighagerman/reaction-tweetonizer

def processComments(sentiTokensFile, exceptSentiTokens, multiWordsFile,
                    messages):
    """ 
        Processes a list of tweets, for each:
        1. Identifies the target
        2. If the message contains a target of interest infer the polarity
        
        targetsFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
        multiWordsFile -> path to a file that contains the words that should be considered as a unit, e.g. "primeiro ministro"
         tweets -> list of tweets
    """

    if debug:
        print "DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG \n\n"

    print "Loading resources..."

    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens

    sentiTokens = Utils.getFromCache(SENTI_CACHE)

    if sentiTokens != None:
        print "SentiTokens found on cache!"
    else:
        sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,
                                                  exceptSentiTokens)
        Utils.putInCache(sentiTokens, SENTI_CACHE)

    print "Multiword Tokenizer: " + multiWordsFile

    multiWordTokenizer = Utils.getFromCache(MULTIWORD_CACHE)

    if multiWordTokenizer != None:
        print "Multiword Tokenizer found on cache"
    else:
        multiWordTokenizer = MultiWordHelper(multiWordsFile)
        multiWordTokenizer.addMultiWords(
            SentiTokens.getMultiWords(sentiTokens))
        Utils.putInCache(multiWordTokenizer, MULTIWORD_CACHE)

    print "Resources loaded! Starting analysis..."

    naive = Naive(sentiTokens)
    #rules = Rules(None,sentiTokens)
    rows = 0

    positiveTokens = {}
    negativeTokens = {}

    for message in messages:

        rows += 1

        t0 = datetime.now()

        tokens = naive.tokenizeSentiTokens(message, True)

        for token in tokens[0]:

            if token not in positiveTokens:
                positiveTokens[token] = 1
            else:
                positiveTokens[token] += 1

        for token in tokens[1]:

            if token not in negativeTokens:
                negativeTokens[token] = 1
            else:
                negativeTokens[token] += 1

        if rows % 1000 == 0 and rows != 0:

            writeResults(positiveTokens, "./positive" + str(rows) + ".csv")
            writeResults(negativeTokens, "./negative" + str(rows) + ".csv")

        if debug:
            t1 = datetime.now()
            print "Time: " + str(t1 - t0)
            print message.sentence
            print "positive: ", tokens[0]
            print "negative: ", tokens[1]
            print "\n------------------\n"

    writeResults(positiveTokens, "./positive.csv")
    writeResults(negativeTokens, "./negative.csv")
    print "done!"