def processSingleSentence(politiciansFile, sentiTokensFile, exceptSentiTokens, sentence, webOutput): print "<br>Loading resources...<br>Politicians: " + politiciansFile politicians = Persons.loadPoliticians(politiciansFile) print "<br>SentiTokens: " + sentiTokensFile + "<br>ExceptTokens: " + exceptSentiTokens sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile, exceptSentiTokens) naive = Naive(politicians, sentiTokens) singleSentence = Opinion(1, sentence=sentence) targets = naive.inferTarget(singleSentence) results = [] print "<br>Inferring targets...<br>" if targets != None: print "<br>Inferring polarity...<br>" for target in targets: rules = Rules(politicians, sentiTokens) results.append(rules.inferPolarity(target, False)) else: print "<br>No targets were identified...<br>" if webOutput: return printResultsWeb(results, sentence) else: return printResultsConsole(results)
def processSingleSentence(politiciansFile,sentiTokensFile,exceptSentiTokens,sentence,webOutput): print "<br>Loading resources...<br>Politicians: " + politiciansFile politicians = Persons.loadPoliticians(politiciansFile) print "<br>SentiTokens: " + sentiTokensFile + "<br>ExceptTokens: " + exceptSentiTokens sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens) naive = Naive(politicians,sentiTokens) singleSentence = Opinion(1, sentence=sentence) targets = naive.inferTarget(singleSentence) results = [] print "<br>Inferring targets...<br>" if targets != None: print "<br>Inferring polarity...<br>" for target in targets: rules = Rules(politicians,sentiTokens) results.append(rules.inferPolarity(target, False)) else: print "<br>No targets were identified...<br>" if webOutput: return printResultsWeb(results,sentence) else: return printResultsConsole(results)
def getMultiWordsTokenizer(politicians,sentiTokens): multiWordsFile = "../Resources/multiwords.txt" multiWordTokenizer = EuroOpinionizers.MultiWordHandler(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) return multiWordTokenizer
def getMultiWordsTokenizer(politicians, sentiTokens): multiWordsFile = "../Resources/multiwords.txt" multiWordTokenizer = EuroOpinionizers.MultiWordHandler(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) return multiWordTokenizer
def getNaiveClassifier(politicians, sentiTokens): politiciansFile = "../Resources/politicians.txt" sentiTokensFile = "../Resources/sentiTokens-2011-05-30.txt" exceptTokensFile = "../Resources/SentiLexAccentExcpt.txt" politicians = Persons.loadPoliticians(politiciansFile) sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptTokensFile) return Opinionizers.Naive(politicians,sentiTokens)
def getNaiveClassifier(politicians, sentiTokens): politiciansFile = "../Resources/politicians.txt" sentiTokensFile = "../Resources/sentiTokens-2011-05-30.txt" exceptTokensFile = "../Resources/SentiLexAccentExcpt.txt" politicians = Persons.loadPoliticians(politiciansFile) sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile, exceptTokensFile) return Opinionizers.Naive(politicians, sentiTokens)
def processSingleSentence(politiciansFile, sentiTokensFile, exceptSentiTokens, sentence, webOutput): print "Loading resources..." print "Politicians: " + politiciansFile politicians = Persons.loadPoliticians(politiciansFile) print "SentiTokens: " + sentiTokensFile print "ExceptTokens: " + exceptSentiTokens sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile, exceptSentiTokens) naive = Naive(politicians, sentiTokens) singleSentence = Opinion(1, sentence=sentence) print "Inferring targets..." targets = naive.inferTarget(singleSentence) results = [] if targets != None: print "Inferring polarity..." for target in targets: rules = Rules(politicians, sentiTokens) #if not possible to classify with rules use the naive classifier classifiedTweet = rules.inferPolarity(target, False) if classifiedTweet.polarity == 0: classifiedTweet = naive.inferPolarity(classifiedTweet, True) results.append(classifiedTweet) else: print "No targets were identified..." if webOutput: return printResultsWeb(results, sentence) else: return printResultsConsole(results)
def processSingleSentence(politiciansFile,sentiTokensFile,exceptSentiTokens,sentence,webOutput): print "Loading resources..." print "Politicians: " + politiciansFile politicians = Persons.loadPoliticians(politiciansFile) print "SentiTokens: " + sentiTokensFile print "ExceptTokens: " + exceptSentiTokens sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens) naive = Naive(politicians,sentiTokens) singleSentence = Opinion(1, sentence=sentence) print "Inferring targets..." targets = naive.inferTarget(singleSentence) results = [] if targets != None: print "Inferring polarity..." for target in targets: rules = Rules(politicians,sentiTokens) #if not possible to classify with rules use the naive classifier classifiedTweet = rules.inferPolarity(target, False) if classifiedTweet.polarity == 0: classifiedTweet = naive.inferPolarity(classifiedTweet,True) results.append(classifiedTweet) else: print "No targets were identified..." if webOutput: return printResultsWeb(results,sentence) else: return printResultsConsole(results)
# !/usr/local/bin/python3 import Persons person = Persons.Person() person.FirstName = "James" person.LastName = "Cameron" print(person) print(person.GetObjectCount()) import Customers customer = Customers.Customer() customer.Id = 1 customer.FirstName = "Alisha" customer.LastName = "Banner" print("This is the Customer Object: ", customer) import DataProcessing file = DataProcessing.DataProcessing() file.FileName = "Testfile.txt" file.TextData = "Blah" file.SaveData()
def processTweets(politiciansFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets): """ Processes a list of tweets: 1. Identify target 2. If target is one of the politicians infer the comment's polarity politiciansFile -> path to the politicians list file sentiTokensFile -> path to the sentiTokens list file exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without causing ambiguity for ex: más -> mas tweets -> list of tweets """ print "Loading resources...\nPoliticians: " + politiciansFile politicians = Persons.loadPoliticians(politiciansFile) print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens) print "Multiword Tokenizer " + multiWordsFile multiWordTokenizer = MultiWordHandler(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) naive = Naive(politicians,sentiTokens) targetedTweets = {} classifiedTweets = {} #Process tweets... #First step: infer targets and create a dictionary {target,listOfTweets} print "Identifying targets..." for tweet in tweets: tweetsWithTarget = naive.inferTarget(tweet) if tweetsWithTarget != None : #a tweet can have multiple targets (in that case the message is replicated) for tweet in tweetsWithTarget: if tweet.target not in targetedTweets: targetedTweets[tweet.target] = [] tweet.taggedSentence = multiWordTokenizer.tokenizeMultiWords(tweet.sentence) targetedTweets[tweet.target].append(tweet) print len(targetedTweets), " targets Identified! Inferring polarity..." rules = Rules(politicians,sentiTokens) #Second step infer polarity for target,tweets in targetedTweets.items(): for tweet in tweets: if target not in classifiedTweets: classifiedTweets[target] = [] #try to classify with rules... classifiedTweet = rules.inferPolarity(tweet,True) #if not possible use the naive classifier if classifiedTweet.polarity == 0: classifiedTweet = naive.inferPolarity(classifiedTweet,True) classifiedTweets[target].append(classifiedTweet) return classifiedTweets
print("Test the DataProcessor.File class") objDP = DataProcessor.File() objDP.FileName = "Test.txt" objDP.TextData = "This is a test" strMessage = objDP.SaveData() print(strMessage) print("\n Test the DataProcessor.Database class") try: print("Trying to create an object, but the class is not ready") objDP = DataProcessor.Database() except: print("This should fail") print("\n Test the Persons.Person class") objP = Persons.Person() objP.FirstName = "Bob" objP.LastName = "Smith" print(objP.ToString()) print("\n Test the Employees.Employee class") objE = Employees.Employee(1, "Bob") # objE.Id = 1 # objE.FirstName = "Bob" # objE.LastName = "Smith" print(objE.ToString()) print("\n Test the Employee.EmployeeList class") objEL = Employees.EmployeeList() try: print("Trying the wrong object type")
def processTweets_old(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets): """ Processes a list of tweets: 1. Identify target 2. If target is one of the politicians infer the comment's polarity politiciansFile -> path to the politicians list file sentiTokensFile -> path to the sentiTokens list file exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without causing ambiguity for ex: más -> mas tweets -> list of tweets """ print "hell yeah!" print "Loading resources...\nTargets: " + targetsFile targets = getFromCache(PERSONS_CACHE) if targets != None: print "Target list found on cache!" else: targets = Persons.loadPoliticians(targetsFile) putInCache(targets, PERSONS_CACHE) print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens sentiTokens = getFromCache(SENTI_CACHE) if sentiTokens != None: print "SentiTokens found on cache!" else: sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens) putInCache(sentiTokens, SENTI_CACHE) print "Multiword Tokenizer: " + multiWordsFile multiWordTokenizer = getFromCache(MULTIWORD_CACHE) if multiWordTokenizer != None: print "Multiword Tokenizer found on cache" else: multiWordTokenizer = MultiWordHandler(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) putInCache(multiWordTokenizer, MULTIWORD_CACHE) print "Inferring polarity..." naive = Naive(targets,sentiTokens) rules = Rules(targets,sentiTokens) analyzedTweets = [] rejectedTweets = [] for tweet in tweets: t0 = datetime.now() #print "antes" tweetsWithTarget = naive.inferTarget(tweet) if tweetsWithTarget != None : #a tweet can have multiple targets (in that case the message is replicated) for tweet in tweetsWithTarget: #try to classify with rules... analyzedTweet = rules.inferPolarity(tweet,False) print "depois" #if not possible use the naive classifier if analyzedTweet.polarity == 0: analyzedTweet = naive.inferPolarity(analyzedTweet,False) if analyzedTweet.polarity == 0: regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)' match = re.search(regex,analyzedTweet.metadata).group(2) print "match: ", match if len(match.strip(' ')) == 0: rejectedTweets.append(analyzedTweet) else: analyzedTweets.append(analyzedTweet) else: analyzedTweets.append(analyzedTweet) t1 = datetime.now() print tweet.id + " ("+ str(t1-t0) + ")" logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv") return analyzedTweets
def processTweets(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets): """ Processes a list of tweets: 1. Identify target 2. If target is one of the politicians infer the comment's polarity politiciansFile -> path to the politicians list file sentiTokensFile -> path to the sentiTokens list file exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without causing ambiguity for ex: más -> mas tweets -> list of tweets """ print "Loading resources...\nTargets: " + targetsFile targets = None#getFromCache(WIN_PERSONS_CACHE) if targets != None: print "Target list found on cache!" else: targets = Persons.loadPoliticians(targetsFile) putInCache(targets, WIN_PERSONS_CACHE) print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens sentiTokens = None#getFromCache(WIN_SENTI_CACHE) if sentiTokens != None: print "SentiTokens found on cache!" else: sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens) putInCache(sentiTokens, WIN_SENTI_CACHE) print "Multiword Tokenizer: " + multiWordsFile multiWordTokenizer = None#getFromCache(WIN_MULTIWORD_CACHE) if multiWordTokenizer != None: print "Multiword Tokenizer found on cache" else: multiWordTokenizer = MultiWordHandler(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) putInCache(multiWordTokenizer, WIN_MULTIWORD_CACHE) print "Inferring polarity..." naive = Naive(targets,sentiTokens) rules = Rules(targets,sentiTokens) analyzedTweets = [] rejectedTweets = [] for tweet in tweets: t0 = datetime.now() rulesScore,rulesInfo = rules.getRulesScore(tweet,True) cluesScore,clueInfo = rules.getCluesScore(tweet,True) sentiScore,sentiInfo = naive.getSentiScore(tweet,True) tweetScore = int(sentiScore) + int(rulesScore) + int(cluesScore) if tweetScore > 0: tweet.polarity = 1 elif tweetScore < 0: tweet.polarity = -1 else: tweet.polarity = 0 tweet.metadata = sentiInfo+";"+clueInfo+";"+rulesInfo if tweet.polarity == 0: regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)' match = re.search(regex,tweet.metadata).group(2) if len(match.strip(' ')) == 0: rejectedTweets.append(tweet) else: analyzedTweets.append(tweet) else: analyzedTweets.append(tweet) t1 = datetime.now() print tweet.id + " ("+ str(t1-t0) + ")" logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv") return analyzedTweets
def getFeatures(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,listOfTweets): print "Loading resources...\nTargets: " + targetsFile targets = None#getFromCache(PERSONS_CACHE) if targets != None: print "Target list found on cache!" else: targets = Persons.loadPoliticians(targetsFile) putInCache(targets, PERSONS_CACHE) print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens sentiTokens = None#getFromCache(SENTI_CACHE) if sentiTokens != None: print "SentiTokens found on cache!" else: sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens) putInCache(sentiTokens, SENTI_CACHE) print "Multiword Tokenizer: " + multiWordsFile multiWordTokenizer = None#getFromCache(MULTIWORD_CACHE) if multiWordTokenizer != None: print "Multiword Tokenizer found on cache" else: multiWordTokenizer = MultiWordHandler(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) putInCache(multiWordTokenizer, MULTIWORD_CACHE) print "Calculating features..." naive = Naive(targets,sentiTokens) rules = Rules(targets,sentiTokens) analyzedTweets = [] rejectedTweets = [] for tweet in listOfTweets: feats = [] t0 = datetime.now() rulesFeats = rules.getRulesFeats(tweet,True) cluesFeats = rules.getCluesFeats(tweet,True) sentiFeats = naive.getSentiFeats(tweet,True) x = int(rulesFeats[0])+int(rulesFeats[1])+int(cluesFeats[0])+int(cluesFeats[1])+int(sentiFeats[0])+int(sentiFeats[1])+int(sentiFeats[2]) if x == 0: tweetTest = naive.inferPolarity(tweet, True) regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)' match = re.search(regex,tweetTest.metadata).group(2) if len(match.strip(' ')) == 0: rejectedTweets.append(tweet) print "rejected: " print tweet.tostring() else: feats.append(tweet.id) feats.append(rulesFeats[0]) feats.append(rulesFeats[1]) feats.append(cluesFeats[0]) feats.append(cluesFeats[1]) feats.append(sentiFeats[0]) feats.append(sentiFeats[1]) feats.append(sentiFeats[2]) feats.append(int(tweet.irony)) analyzedTweets.append(feats) else: feats.append(tweet.id) feats.append(rulesFeats[0]) feats.append(rulesFeats[1]) feats.append(cluesFeats[0]) feats.append(cluesFeats[1]) feats.append(sentiFeats[0]) feats.append(sentiFeats[1]) feats.append(sentiFeats[2]) feats.append(int(tweet.irony)) analyzedTweets.append(feats) t1 = datetime.now() print tweet.id + " ("+ str(t1-t0) + ")" #logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv") return analyzedTweets
def processTweets(politiciansFile, sentiTokensFile, exceptSentiTokens, multiWordsFile, tweets): """ Processes a list of tweets: 1. Identify target 2. If target is one of the politicians infer the comment's polarity politiciansFile -> path to the politicians list file sentiTokensFile -> path to the sentiTokens list file exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without causing ambiguity for ex: más -> mas tweets -> list of tweets """ print "Loading resources...\nPoliticians: " + politiciansFile politicians = Persons.loadPoliticians(politiciansFile) print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile, exceptSentiTokens) print "Multiword Tokenizer " + multiWordsFile multiWordTokenizer = MultiWordHandler(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) naive = Naive(politicians, sentiTokens) targetedTweets = {} classifiedTweets = {} #Process tweets... #First step: infer targets and create a dictionary {target,listOfTweets} print "Identifying targets..." for tweet in tweets: tweetsWithTarget = naive.inferTarget(tweet) if tweetsWithTarget != None: #a tweet can have multiple targets (in that case the message is replicated) for tweet in tweetsWithTarget: if tweet.target not in targetedTweets: targetedTweets[tweet.target] = [] tweet.taggedSentence = multiWordTokenizer.tokenizeMultiWords( tweet.sentence) targetedTweets[tweet.target].append(tweet) print len(targetedTweets), " targets Identified! Inferring polarity..." rules = Rules(politicians, sentiTokens) #Second step infer polarity for target, tweets in targetedTweets.items(): for tweet in tweets: if target not in classifiedTweets: classifiedTweets[target] = [] #try to classify with rules... classifiedTweet = rules.inferPolarity(tweet, True) #if not possible use the naive classifier if classifiedTweet.polarity == 0: classifiedTweet = naive.inferPolarity(classifiedTweet, True) classifiedTweets[target].append(classifiedTweet) return classifiedTweets
def processTweets(targetsFile,sentiTokensFile,exceptSentiTokens,multiWordsFile,tweets): """ Processes a list of tweets, for each: 1. Identifies the target 2. If the message contains a target of interest infer the polarity targetsFile -> path to the politicians list file sentiTokensFile -> path to the sentiTokens list file exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without causing ambiguity for ex: más -> mas multiWordsFile -> path to a file that contains the words that should be considered as a unit, e.g. "primeiro ministro" tweets -> list of tweets """ print "hell yeah!" print "Loading resources...\nTargets: " + targetsFile targets = Utils.getFromCache(PERSONS_CACHE) if targets != None: print "Target list found on cache!" else: targets = Persons.loadPoliticians(targetsFile) Utils.putInCache(targets, PERSONS_CACHE) print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens sentiTokens = Utils.getFromCache(SENTI_CACHE) if sentiTokens != None: print "SentiTokens found on cache!" else: sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,exceptSentiTokens) Utils.putInCache(sentiTokens, SENTI_CACHE) print "Multiword Tokenizer: " + multiWordsFile multiWordTokenizer = Utils.getFromCache(MULTIWORD_CACHE) if multiWordTokenizer != None: print "Multiword Tokenizer found on cache" else: multiWordTokenizer = MultiWordHelper(multiWordsFile) multiWordTokenizer.addMultiWords(Persons.getMultiWords(targets)) multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens)) Utils.putInCache(multiWordTokenizer, MULTIWORD_CACHE) print "Resources loaded! Starting analysis..." targetDetector = TargetDetector(targets) #TODO:Estes senhores já não precisam de receber os targets naive = Naive(sentiTokens) rules = Rules(None,sentiTokens) analyzedTweets = [] rejectedTweets = [] for tweet in tweets: t0 = datetime.now() tweetsWithTarget = targetDetector.inferTarget(tweet) if tweetsWithTarget != None : #a tweet can have multiple targets (in that case the message is replicated) for tweet in tweetsWithTarget: #try to classify with rules... analyzedTweet = rules.inferPolarity(tweet,False) #if not possible use the naive classifier if analyzedTweet.polarity == 0: analyzedTweet = naive.inferPolarity(analyzedTweet,False) #If the polarity is still 0 it can mean: #1) The sum of the polarities of the sentiTokens is 0, #2) There was no evidence usable to assess the sentiment if analyzedTweet.polarity == 0: regex = ur'(\W|^)sentiTokens:(.*?);(\W|$)' #Try to find if there are any evidence of matched sentiTokens match = re.search(regex,analyzedTweet.metadata).group(2) if debug: print "match: ", match if len(match.strip(' ')) == 0: rejectedTweets.append(analyzedTweet) else: analyzedTweets.append(analyzedTweet) else: analyzedTweets.append(analyzedTweet) t1 = datetime.now() print tweet.id + " ("+ str(t1-t0) + ")" logClassifiedTweets(rejectedTweets, "./rejectedTweets.csv") return analyzedTweets
if __name__ == "__main__": import Customers, DataProcessor, Persons else: raise Exception("This file was not created to be imported ") p1 = Persons.Person("Bob", "Smith", "50") print(p1) e1 = Customers.Customer(1, "Sue", "Jones", "49", "1111 Main St Seattle, Wa 98103", "206 931 6127", "*****@*****.**") # print(e1) Customers.CustomerList.AddCustomer(e1) print(Customers.CustomerList.ToString())
def getPoliticians(): politiciansFile = "../Resources/politicians.txt" politicians = Persons.loadPoliticians(politiciansFile) return politicians