def processSingleSentence(politiciansFile, sentiTokensFile, exceptSentiTokens,
                          sentence, webOutput):

    print "<br>Loading resources...<br>Politicians: " + politiciansFile
    politicians = Persons.loadPoliticians(politiciansFile)

    print "<br>SentiTokens: " + sentiTokensFile + "<br>ExceptTokens: " + exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,
                                              exceptSentiTokens)

    naive = Naive(politicians, sentiTokens)

    singleSentence = Opinion(1, sentence=sentence)
    targets = naive.inferTarget(singleSentence)
    results = []

    print "<br>Inferring targets...<br>"

    if targets != None:

        print "<br>Inferring polarity...<br>"

        for target in targets:

            rules = Rules(politicians, sentiTokens)
            results.append(rules.inferPolarity(target, False))
    else:
        print "<br>No targets were identified...<br>"
    if webOutput:
        return printResultsWeb(results, sentence)
    else:
        return printResultsConsole(results)
Ejemplo n.º 2
0
def processSingleSentence(politiciansFile, sentiTokensFile, exceptSentiTokens,
                          sentence, webOutput):

    print "Loading resources..."
    print "Politicians: " + politiciansFile
    politicians = Persons.loadPoliticians(politiciansFile)

    print "SentiTokens: " + sentiTokensFile
    print "ExceptTokens: " + exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,
                                              exceptSentiTokens)

    naive = Naive(politicians, sentiTokens)

    singleSentence = Opinion(1, sentence=sentence)

    print "Inferring targets..."
    targets = naive.inferTarget(singleSentence)

    results = []

    if targets != None:

        print "Inferring polarity..."

        for target in targets:

            rules = Rules(politicians, sentiTokens)

            #if not possible to classify with rules use the naive classifier
            classifiedTweet = rules.inferPolarity(target, False)

            if classifiedTweet.polarity == 0:
                classifiedTweet = naive.inferPolarity(classifiedTweet, True)

            results.append(classifiedTweet)
    else:
        print "No targets were identified..."
    if webOutput:
        return printResultsWeb(results, sentence)
    else:
        return printResultsConsole(results)
def processTweets(politiciansFile, sentiTokensFile, exceptSentiTokens,
                  multiWordsFile, tweets):
    """ 
        Processes a list of tweets:
        1. Identify target
        2. If target is one of the politicians infer the comment's polarity
        
        politiciansFile -> path to the politicians list file
        sentiTokensFile -> path to the sentiTokens list file
        exceptSentiTokens -> path to the list of sentiTokens that cannot lose their accents without
                             causing ambiguity for ex: más -> mas
         tweets -> list of tweets
    """

    print "Loading resources...\nPoliticians: " + politiciansFile
    politicians = Persons.loadPoliticians(politiciansFile)

    print "SentiTokens: " + sentiTokensFile + "\nExceptTokens: " + exceptSentiTokens
    sentiTokens = SentiTokens.loadSentiTokens(sentiTokensFile,
                                              exceptSentiTokens)

    print "Multiword Tokenizer " + multiWordsFile
    multiWordTokenizer = MultiWordHandler(multiWordsFile)
    multiWordTokenizer.addMultiWords(Persons.getMultiWords(politicians))
    multiWordTokenizer.addMultiWords(SentiTokens.getMultiWords(sentiTokens))

    naive = Naive(politicians, sentiTokens)

    targetedTweets = {}
    classifiedTweets = {}

    #Process tweets...
    #First step: infer targets and create a dictionary {target,listOfTweets}
    print "Identifying targets..."

    for tweet in tweets:

        tweetsWithTarget = naive.inferTarget(tweet)

        if tweetsWithTarget != None:

            #a tweet can have multiple targets (in that case the message is replicated)
            for tweet in tweetsWithTarget:

                if tweet.target not in targetedTweets:

                    targetedTweets[tweet.target] = []

                tweet.taggedSentence = multiWordTokenizer.tokenizeMultiWords(
                    tweet.sentence)
                targetedTweets[tweet.target].append(tweet)

    print len(targetedTweets), " targets Identified! Inferring polarity..."

    rules = Rules(politicians, sentiTokens)

    #Second step infer polarity
    for target, tweets in targetedTweets.items():

        for tweet in tweets:

            if target not in classifiedTweets:
                classifiedTweets[target] = []

            #try to classify with rules...
            classifiedTweet = rules.inferPolarity(tweet, True)

            #if not possible use the naive classifier
            if classifiedTweet.polarity == 0:
                classifiedTweet = naive.inferPolarity(classifiedTweet, True)

            classifiedTweets[target].append(classifiedTweet)

    return classifiedTweets