Python Rules.getRulesScore Examples

Programming Language: Python

Namespace/Package Name: EuroOpinionizers

Class/Type: Rules

Method/Function: getRulesScore

Examples at hotexamples.com: 1

Python Rules.getRulesScore - 1 examples found. These are the top rated real world Python examples of EuroOpinionizers.Rules.getRulesScore extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Rules(3)

getCluesFeats(1)

getCluesScore(1)

getRulesFeats(1)

getRulesScore(1)

inferPolarity(1)

Example #1

Show file

def processTweets(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets):
    
    """ 
        Processes a list of tweets:
        1. Identify target
        2. If target is one of the politicians infer the comment's polarity
        
        politiciansFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
         tweets -> list of tweets
    """
    
    print "Loading resources...\nTargets: " + targetsFile
        
    targets = None#getFromCache(WIN_PERSONS_CACHE)
    
    if targets != None:
        print "Target list found on cache!"
    else:
        targets = Persons.loadPoliticians(targetsFile)
        putInCache(targets, WIN_PERSONS_CACHE) 
    
    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " +  exceptSentiTokens
    
    sentiTokens = None#getFromCache(WIN_SENTI_CACHE)  
    
    if sentiTokens != None:
        print "SentiTokens found on cache!"
    else:
        sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
       
        putInCache(sentiTokens, WIN_SENTI_CACHE)
    
    print "Multiword Tokenizer: " + multiWordsFile
    
    multiWordTokenizer = None#getFromCache(WIN_MULTIWORD_CACHE)
    
    if multiWordTokenizer != None:
        print "Multiword Tokenizer found on cache"
    else:
        multiWordTokenizer = MultiWordHandler(multiWordsFile)
        multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets))
        multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))
        putInCache(multiWordTokenizer, WIN_MULTIWORD_CACHE)
    
    print  "Inferring polarity..."
    
    naive = Naive(targets,sentiTokens)
    rules = Rules(targets,sentiTokens)   
    
    analyzedTweets = []
    rejectedTweets = []
    
    for tweet in tweets:
        
        t0 = datetime.now()
        
        rulesScore,rulesInfo = rules.getRulesScore(tweet,True)
        cluesScore,clueInfo = rules.getCluesScore(tweet,True)        
        sentiScore,sentiInfo = naive.getSentiScore(tweet,True)
        
        tweetScore = int(sentiScore) + int(rulesScore) + int(cluesScore)
        
        if tweetScore > 0:
            tweet.polarity = 1
        elif tweetScore < 0:
            tweet.polarity = -1
        else:
            tweet.polarity = 0
        
        tweet.metadata = sentiInfo+";"+clueInfo+";"+rulesInfo 
        
        if tweet.polarity == 0:
            
            regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)'            
            
            match = re.search(regex,tweet.metadata).group(2)
            
            if len(match.strip(' ')) == 0:

                rejectedTweets.append(tweet)
            else:
                analyzedTweets.append(tweet)
        else:
            analyzedTweets.append(tweet)
        
        t1 = datetime.now()
        
        print tweet.id + " ("+ str(t1-t0) + ")"
        
    logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv")    
    
    return analyzedTweets