コード例 #1
0
def processSingleSentence(politiciansFile, sentiTokensFile, exceptSentiTokens,
                          sentence, webOutput):

    print "<br>Loading resources...<br>Politicians: " + politiciansFile
    politicians = Persons.loadPoliticians(politiciansFile)

    print "<br>SentiTokens: " + sentiTokensFile + "<br>ExceptTokens: " + exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,
                                              exceptSentiTokens)

    naive = Naive(politicians, sentiTokens)

    singleSentence = Opinion(1, sentence=sentence)
    targets = naive.inferTarget(singleSentence)
    results = []

    print "<br>Inferring targets...<br>"

    if targets != None:

        print "<br>Inferring polarity...<br>"

        for target in targets:

            rules = Rules(politicians, sentiTokens)
            results.append(rules.inferPolarity(target, False))
    else:
        print "<br>No targets were identified...<br>"
    if webOutput:
        return printResultsWeb(results, sentence)
    else:
        return printResultsConsole(results)
コード例 #2
0
def processSingleSentence(politiciansFile,sentiTokensFile,exceptSentiTokens,sentence,webOutput):
            
    print "<br>Loading resources...<br>Politicians: " + politiciansFile    
    politicians = Persons.loadPoliticians(politiciansFile)
    
    print "<br>SentiTokens: " + sentiTokensFile + "<br>ExceptTokens: " +  exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
    
    naive = Naive(politicians,sentiTokens)
    
    singleSentence = Opinion(1, sentence=sentence)
    targets = naive.inferTarget(singleSentence)
    results = []
    
    print "<br>Inferring targets...<br>"
        
    if targets != None:    
        
        print "<br>Inferring polarity...<br>"
        
        for target in targets:
            
            rules = Rules(politicians,sentiTokens)
            results.append(rules.inferPolarity(target, False))
    else:
        print "<br>No targets were identified...<br>"
    if webOutput:
        return printResultsWeb(results,sentence)
    else:
        return printResultsConsole(results)
コード例 #3
0
def getMultiWordsTokenizer(politicians,sentiTokens):
    
    multiWordsFile = "../Resources/multiwords.txt"    
    
    multiWordTokenizer = EuroOpinionizers.MultiWordHandler(multiWordsFile)
    multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians))
    multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))

    return multiWordTokenizer
コード例 #4
0
def getMultiWordsTokenizer(politicians, sentiTokens):

    multiWordsFile = "../Resources/multiwords.txt"

    multiWordTokenizer = EuroOpinionizers.MultiWordHandler(multiWordsFile)
    multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians))
    multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))

    return multiWordTokenizer
コード例 #5
0
def getNaiveClassifier(politicians, sentiTokens):

    politiciansFile = "../Resources/politicians.txt"
    sentiTokensFile = "../Resources/sentiTokens-2011-05-30.txt"
    exceptTokensFile = "../Resources/SentiLexAccentExcpt.txt"
    
    politicians = Persons.loadPoliticians(politiciansFile)
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptTokensFile)
    
    return Opinionizers.Naive(politicians,sentiTokens)    
コード例 #6
0
def getNaiveClassifier(politicians, sentiTokens):

    politiciansFile = "../Resources/politicians.txt"
    sentiTokensFile = "../Resources/sentiTokens-2011-05-30.txt"
    exceptTokensFile = "../Resources/SentiLexAccentExcpt.txt"

    politicians = Persons.loadPoliticians(politiciansFile)
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,
                                              exceptTokensFile)

    return Opinionizers.Naive(politicians, sentiTokens)
コード例 #7
0
def processSingleSentence(politiciansFile, sentiTokensFile, exceptSentiTokens,
                          sentence, webOutput):

    print "Loading resources..."
    print "Politicians: " + politiciansFile
    politicians = Persons.loadPoliticians(politiciansFile)

    print "SentiTokens: " + sentiTokensFile
    print "ExceptTokens: " + exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,
                                              exceptSentiTokens)

    naive = Naive(politicians, sentiTokens)

    singleSentence = Opinion(1, sentence=sentence)

    print "Inferring targets..."
    targets = naive.inferTarget(singleSentence)

    results = []

    if targets != None:

        print "Inferring polarity..."

        for target in targets:

            rules = Rules(politicians, sentiTokens)

            #if not possible to classify with rules use the naive classifier
            classifiedTweet = rules.inferPolarity(target, False)

            if classifiedTweet.polarity == 0:
                classifiedTweet = naive.inferPolarity(classifiedTweet, True)

            results.append(classifiedTweet)
    else:
        print "No targets were identified..."
    if webOutput:
        return printResultsWeb(results, sentence)
    else:
        return printResultsConsole(results)
コード例 #8
0
def processSingleSentence(politiciansFile,sentiTokensFile,exceptSentiTokens,sentence,webOutput):
            
    print "Loading resources..."
    print "Politicians: " + politiciansFile    
    politicians = Persons.loadPoliticians(politiciansFile)
    
    print "SentiTokens: " + sentiTokensFile 
    print "ExceptTokens: " +  exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
    
    naive = Naive(politicians,sentiTokens)
    
    singleSentence = Opinion(1, sentence=sentence)
    
    print "Inferring targets..."
    targets = naive.inferTarget(singleSentence)
            
    results = []
        
    if targets != None:    
        
        print "Inferring polarity..."
        
        for target in targets:
            
            rules = Rules(politicians,sentiTokens)
            
            #if not possible to classify with rules use the naive classifier
            classifiedTweet = rules.inferPolarity(target, False)
            
            if classifiedTweet.polarity == 0:
                classifiedTweet = naive.inferPolarity(classifiedTweet,True)
            
            results.append(classifiedTweet)
    else:
        print "No targets were identified..."
    if webOutput:
        return printResultsWeb(results,sentence)
    else:
        return printResultsConsole(results)
コード例 #9
0
# !/usr/local/bin/python3
import Persons

person = Persons.Person()
person.FirstName = "James"
person.LastName = "Cameron"
print(person)
print(person.GetObjectCount())

import Customers

customer = Customers.Customer()
customer.Id = 1
customer.FirstName = "Alisha"
customer.LastName = "Banner"
print("This is the Customer Object: ",  customer)

import DataProcessing

file = DataProcessing.DataProcessing()
file.FileName = "Testfile.txt"
file.TextData = "Blah"
file.SaveData()
コード例 #10
0
def processTweets(politiciansFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets):
    
    """ 
        Processes a list of tweets:
        1. Identify target
        2. If target is one of the politicians infer the comment's polarity
        
        politiciansFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
         tweets -> list of tweets
    """
    
    print "Loading resources...\nPoliticians: " + politiciansFile   
    politicians = Persons.loadPoliticians(politiciansFile)
    
    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " +  exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
    
    print "Multiword Tokenizer " + multiWordsFile    
    multiWordTokenizer = MultiWordHandler(multiWordsFile)
    multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians))
    multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))
    
    naive = Naive(politicians,sentiTokens)    
    
    targetedTweets = {}    
    classifiedTweets = {}
    
    #Process tweets...
    #First step: infer targets and create a dictionary {target,listOfTweets}    
    print "Identifying targets..."
     
    for tweet in tweets:        
        
        tweetsWithTarget = naive.inferTarget(tweet)
        
        if tweetsWithTarget != None :
            
            #a tweet can have multiple targets (in that case the message is replicated)
            for tweet in tweetsWithTarget:
            
                if tweet.target not in targetedTweets:                    
                
                    targetedTweets[tweet.target] = []
                
                tweet.taggedSentence = multiWordTokenizer.tokenizeMultiWords(tweet.sentence)  
                targetedTweets[tweet.target].append(tweet)
    
    print  len(targetedTweets), " targets Identified! Inferring polarity..."
    
    rules = Rules(politicians,sentiTokens)
    
    #Second step infer polarity 
    for target,tweets in targetedTweets.items():
        
        for tweet in tweets: 
            
            if target not in classifiedTweets:
                classifiedTweets[target]  = []
            
            #try to classify with rules...
            classifiedTweet = rules.inferPolarity(tweet,True)
            
            #if not possible use the naive classifier
            if classifiedTweet.polarity == 0:
                classifiedTweet = naive.inferPolarity(classifiedTweet,True)
            
            classifiedTweets[target].append(classifiedTweet)
            
    return classifiedTweets
コード例 #11
0
ファイル: TestHarness2.py プロジェクト: gjkim44/Test
print("Test the DataProcessor.File class")
objDP = DataProcessor.File()
objDP.FileName = "Test.txt"
objDP.TextData = "This is a test"
strMessage = objDP.SaveData()
print(strMessage)

print("\n Test the DataProcessor.Database class")
try:
    print("Trying to create an object, but the class is not ready")
    objDP = DataProcessor.Database()
except:
    print("This should fail")

print("\n Test the Persons.Person class")
objP = Persons.Person()
objP.FirstName = "Bob"
objP.LastName = "Smith"
print(objP.ToString())

print("\n Test the Employees.Employee class")
objE = Employees.Employee(1, "Bob")
# objE.Id = 1
# objE.FirstName = "Bob"
# objE.LastName = "Smith"
print(objE.ToString())

print("\n Test the Employee.EmployeeList class")
objEL = Employees.EmployeeList()
try:
    print("Trying the wrong object type")
コード例 #12
0
def processTweets_old(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets):
    
    """ 
        Processes a list of tweets:
        1. Identify target
        2. If target is one of the politicians infer the comment's polarity
        
        politiciansFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
         tweets -> list of tweets
    """
    
    print "hell yeah!"
    print "Loading resources...\nTargets: " + targetsFile
        
    targets = getFromCache(PERSONS_CACHE)
    
    if targets != None:
        print "Target list found on cache!"
    else:
        targets = Persons.loadPoliticians(targetsFile)
        putInCache(targets, PERSONS_CACHE) 
    
    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " +  exceptSentiTokens
    
    sentiTokens = getFromCache(SENTI_CACHE)  
    
    if sentiTokens != None:
        print "SentiTokens found on cache!"
    else:
        sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
        putInCache(sentiTokens, SENTI_CACHE)
    
    print "Multiword Tokenizer: " + multiWordsFile
    
    multiWordTokenizer = getFromCache(MULTIWORD_CACHE)
    
    if multiWordTokenizer != None:
        print "Multiword Tokenizer found on cache"
    else:
        multiWordTokenizer = MultiWordHandler(multiWordsFile)
        multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets))
        multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))
        putInCache(multiWordTokenizer, MULTIWORD_CACHE)
    
    print  "Inferring polarity..."
    
    naive = Naive(targets,sentiTokens)
    rules = Rules(targets,sentiTokens)   
    
    analyzedTweets = []
    rejectedTweets = []
    
    for tweet in tweets:
        
        t0 = datetime.now()
        
        #print "antes"
        
        tweetsWithTarget = naive.inferTarget(tweet)
        
        if tweetsWithTarget != None :
            
            #a tweet can have multiple targets (in that case the message is replicated)
            for tweet in tweetsWithTarget:       
        
                #try to classify with rules...
                analyzedTweet = rules.inferPolarity(tweet,False)
                print "depois"
                #if not possible use the naive classifier
                if analyzedTweet.polarity == 0:
                    analyzedTweet = naive.inferPolarity(analyzedTweet,False)
                
                if analyzedTweet.polarity == 0:
                    
                    regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)'            
                    
                    match = re.search(regex,analyzedTweet.metadata).group(2)
                    print "match: ", match
                    if len(match.strip(' ')) == 0:
        
                        rejectedTweets.append(analyzedTweet)
                    else:
                        analyzedTweets.append(analyzedTweet)
                else:
                    analyzedTweets.append(analyzedTweet)
                
                t1 = datetime.now()
            
                print tweet.id + " ("+ str(t1-t0) + ")"
            
    logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv")    
    
    return analyzedTweets    
コード例 #13
0
def processTweets(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets):
    
    """ 
        Processes a list of tweets:
        1. Identify target
        2. If target is one of the politicians infer the comment's polarity
        
        politiciansFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
         tweets -> list of tweets
    """
    
    print "Loading resources...\nTargets: " + targetsFile
        
    targets = None#getFromCache(WIN_PERSONS_CACHE)
    
    if targets != None:
        print "Target list found on cache!"
    else:
        targets = Persons.loadPoliticians(targetsFile)
        putInCache(targets, WIN_PERSONS_CACHE) 
    
    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " +  exceptSentiTokens
    
    sentiTokens = None#getFromCache(WIN_SENTI_CACHE)  
    
    if sentiTokens != None:
        print "SentiTokens found on cache!"
    else:
        sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
       
        putInCache(sentiTokens, WIN_SENTI_CACHE)
    
    print "Multiword Tokenizer: " + multiWordsFile
    
    multiWordTokenizer = None#getFromCache(WIN_MULTIWORD_CACHE)
    
    if multiWordTokenizer != None:
        print "Multiword Tokenizer found on cache"
    else:
        multiWordTokenizer = MultiWordHandler(multiWordsFile)
        multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets))
        multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))
        putInCache(multiWordTokenizer, WIN_MULTIWORD_CACHE)
    
    print  "Inferring polarity..."
    
    naive = Naive(targets,sentiTokens)
    rules = Rules(targets,sentiTokens)   
    
    analyzedTweets = []
    rejectedTweets = []
    
    for tweet in tweets:
        
        t0 = datetime.now()
        
        rulesScore,rulesInfo = rules.getRulesScore(tweet,True)
        cluesScore,clueInfo = rules.getCluesScore(tweet,True)        
        sentiScore,sentiInfo = naive.getSentiScore(tweet,True)
        
        tweetScore = int(sentiScore) + int(rulesScore) + int(cluesScore)
        
        if tweetScore > 0:
            tweet.polarity = 1
        elif tweetScore < 0:
            tweet.polarity = -1
        else:
            tweet.polarity = 0
        
        tweet.metadata = sentiInfo+";"+clueInfo+";"+rulesInfo 
        
        if tweet.polarity == 0:
            
            regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)'            
            
            match = re.search(regex,tweet.metadata).group(2)
            
            if len(match.strip(' ')) == 0:

                rejectedTweets.append(tweet)
            else:
                analyzedTweets.append(tweet)
        else:
            analyzedTweets.append(tweet)
        
        t1 = datetime.now()
        
        print tweet.id + " ("+ str(t1-t0) + ")"
        
    logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv")    
    
    return analyzedTweets            
コード例 #14
0
def getFeatures(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,listOfTweets):
    
    print "Loading resources...\nTargets: " + targetsFile
        
    targets = None#getFromCache(PERSONS_CACHE)
    
    if targets != None:
        print "Target list found on cache!"
    else:
        targets = Persons.loadPoliticians(targetsFile)
        putInCache(targets, PERSONS_CACHE) 
    
    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " +  exceptSentiTokens
    
    sentiTokens = None#getFromCache(SENTI_CACHE)  
    
    if sentiTokens != None:
        print "SentiTokens found on cache!"
    else:
        sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
        putInCache(sentiTokens, SENTI_CACHE)
    
    print "Multiword Tokenizer: " + multiWordsFile
    
    multiWordTokenizer = None#getFromCache(MULTIWORD_CACHE)
    
    if multiWordTokenizer != None:
        print "Multiword Tokenizer found on cache"
    else:
        multiWordTokenizer = MultiWordHandler(multiWordsFile)
        multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets))
        multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))
        putInCache(multiWordTokenizer, MULTIWORD_CACHE)
    
    print  "Calculating features..."
    
    naive = Naive(targets,sentiTokens)
    rules = Rules(targets,sentiTokens)   
    
    analyzedTweets = []
    rejectedTweets = []
    
    for tweet in listOfTweets:
        
        feats = []
        
        t0 = datetime.now()
        
        rulesFeats = rules.getRulesFeats(tweet,True)
        cluesFeats = rules.getCluesFeats(tweet,True)        
        sentiFeats = naive.getSentiFeats(tweet,True)
        
        x = int(rulesFeats[0])+int(rulesFeats[1])+int(cluesFeats[0])+int(cluesFeats[1])+int(sentiFeats[0])+int(sentiFeats[1])+int(sentiFeats[2])
        
        if x == 0:
            tweetTest = naive.inferPolarity(tweet, True)
            regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)'            
            
            match = re.search(regex,tweetTest.metadata).group(2)
            
            if len(match.strip(' ')) == 0:
                
                rejectedTweets.append(tweet)
                print "rejected: "
                print tweet.tostring()
            else:
                feats.append(tweet.id)
                feats.append(rulesFeats[0])
                feats.append(rulesFeats[1])
                feats.append(cluesFeats[0])
                feats.append(cluesFeats[1])
                feats.append(sentiFeats[0])
                feats.append(sentiFeats[1])
                feats.append(sentiFeats[2])
                feats.append(int(tweet.irony))
                
                analyzedTweets.append(feats)
        else:
            feats.append(tweet.id)
            feats.append(rulesFeats[0])
            feats.append(rulesFeats[1])
            feats.append(cluesFeats[0])
            feats.append(cluesFeats[1])
            feats.append(sentiFeats[0])
            feats.append(sentiFeats[1])
            feats.append(sentiFeats[2])
            feats.append(int(tweet.irony))
            
            analyzedTweets.append(feats)
        
        t1 = datetime.now()
        
        print tweet.id + " ("+ str(t1-t0) + ")"
        
    #logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv")    
    
    return analyzedTweets            
コード例 #15
0
def processTweets(politiciansFile, sentiTokensFile, exceptSentiTokens,
                  multiWordsFile, tweets):
    """ 
        Processes a list of tweets:
        1. Identify target
        2. If target is one of the politicians infer the comment's polarity
        
        politiciansFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
         tweets -> list of tweets
    """

    print "Loading resources...\nPoliticians: " + politiciansFile
    politicians = Persons.loadPoliticians(politiciansFile)

    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,
                                              exceptSentiTokens)

    print "Multiword Tokenizer " + multiWordsFile
    multiWordTokenizer = MultiWordHandler(multiWordsFile)
    multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians))
    multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))

    naive = Naive(politicians, sentiTokens)

    targetedTweets = {}
    classifiedTweets = {}

    #Process tweets...
    #First step: infer targets and create a dictionary {target,listOfTweets}
    print "Identifying targets..."

    for tweet in tweets:

        tweetsWithTarget = naive.inferTarget(tweet)

        if tweetsWithTarget != None:

            #a tweet can have multiple targets (in that case the message is replicated)
            for tweet in tweetsWithTarget:

                if tweet.target not in targetedTweets:

                    targetedTweets[tweet.target] = []

                tweet.taggedSentence = multiWordTokenizer.tokenizeMultiWords(
                    tweet.sentence)
                targetedTweets[tweet.target].append(tweet)

    print len(targetedTweets), " targets Identified! Inferring polarity..."

    rules = Rules(politicians, sentiTokens)

    #Second step infer polarity
    for target, tweets in targetedTweets.items():

        for tweet in tweets:

            if target not in classifiedTweets:
                classifiedTweets[target] = []

            #try to classify with rules...
            classifiedTweet = rules.inferPolarity(tweet, True)

            #if not possible use the naive classifier
            if classifiedTweet.polarity == 0:
                classifiedTweet = naive.inferPolarity(classifiedTweet, True)

            classifiedTweets[target].append(classifiedTweet)

    return classifiedTweets
コード例 #16
0
def processTweets(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets):
    
    """ 
        Processes a list of tweets, for each:
        1. Identifies the target
        2. If the message contains a target of interest infer the polarity
        
        targetsFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
        multiWordsFile -> path to a file that contains the words that should be considered as a unit, e.g. "primeiro ministro"
         tweets -> list of tweets
    """
    
    print "hell yeah!"
    print "Loading resources...\nTargets: " + targetsFile
        
    targets = Utils.getFromCache(PERSONS_CACHE)
    
    if targets != None:
        print "Target list found on cache!"
    else:
        targets = Persons.loadPoliticians(targetsFile)
        Utils.putInCache(targets, PERSONS_CACHE) 
    
    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " +  exceptSentiTokens
    
    sentiTokens = Utils.getFromCache(SENTI_CACHE)  
    
    if sentiTokens != None:
        print "SentiTokens found on cache!"
    else:
        sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens)
        Utils.putInCache(sentiTokens, SENTI_CACHE)
    
    print "Multiword Tokenizer: " + multiWordsFile
    
    multiWordTokenizer = Utils.getFromCache(MULTIWORD_CACHE)
    
    if multiWordTokenizer != None:
        print "Multiword Tokenizer found on cache"
    else:
        multiWordTokenizer = MultiWordHelper(multiWordsFile)
        multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets))
        multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))
        Utils.putInCache(multiWordTokenizer, MULTIWORD_CACHE)
    
    print "Resources loaded! Starting analysis..."
    
    
    targetDetector = TargetDetector(targets)
    #TODO:Estes senhores já não precisam de receber os targets
    naive = Naive(sentiTokens)
    rules = Rules(None,sentiTokens)   
    
    analyzedTweets = []
    rejectedTweets = []
    
    for tweet in tweets:
        
        t0 = datetime.now()
        
        tweetsWithTarget = targetDetector.inferTarget(tweet)
        
        if tweetsWithTarget != None :
            
            #a tweet can have multiple targets (in that case the message is replicated)
            for tweet in tweetsWithTarget:       
        
                #try to classify with rules...
                analyzedTweet = rules.inferPolarity(tweet,False)
                
                #if not possible use the naive classifier
                if analyzedTweet.polarity == 0:
                    analyzedTweet = naive.inferPolarity(analyzedTweet,False)
                
                #If the polarity is still 0 it can mean:
                #1) The sum of the polarities of the sentiTokens is 0,
                #2) There was no evidence usable to assess the sentiment                
                if analyzedTweet.polarity == 0:
                    
                    regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)'            
                    
                    #Try to find if there are any evidence of matched sentiTokens
                    match = re.search(regex,analyzedTweet.metadata).group(2)
                    
                    if debug:
                        print "match: ", match
                    
                    if len(match.strip(' ')) == 0:
        
                        rejectedTweets.append(analyzedTweet)
                    else:
                        analyzedTweets.append(analyzedTweet)
                else:
                    analyzedTweets.append(analyzedTweet)
                
                t1 = datetime.now()
            
                print tweet.id + " ("+ str(t1-t0) + ")"
            
    logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv")    
    
    return analyzedTweets    
コード例 #17
0
if __name__ == "__main__":
    import Customers, DataProcessor, Persons
else:
    raise Exception("This file was not created to be imported ")

p1 = Persons.Person("Bob", "Smith", "50")
print(p1)

e1 = Customers.Customer(1, "Sue", "Jones", "49",
                        "1111 Main St  Seattle, Wa 98103", "206 931 6127",
                        "*****@*****.**")
# print(e1)

Customers.CustomerList.AddCustomer(e1)
print(Customers.CustomerList.ToString())
コード例 #18
0
def getPoliticians():
    
    politiciansFile = "../Resources/politicians.txt"
    politicians = Persons.loadPoliticians(politiciansFile)
    
    return politicians
コード例 #19
0
def getPoliticians():

    politiciansFile = "../Resources/politicians.txt"
    politicians = Persons.loadPoliticians(politiciansFile)

    return politicians