Exemple #1
0
 def __init__(self):
     self.splitter = Splitter()
     self.postagger = POSTagger()
     self.dicttagger = DictionaryTagger([
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml',
         '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml'
     ])
Exemple #2
0
def main(keys='keys.ini',
         raw_tweets_file='twitter_data.txt',
         no_tweets=1000,
         tracked_words_file='tracks.csv',
         formatted_tweets_file='formatted_tweets.txt',
         dictionaries=['misoginy_dictionary.yml', 'curses_dictionary.yml']):
    """Perform an analyisis to find sexist and rude words in tweets

    This module employs every other module to perform a full analysis on data
    retrieved from the Twitter stream. First a TwitterMiner retrieves data and
    dumps it, then a TweetFormatter parses the data into a list of tweets that
    are lists of words. Then it uses the spaghetti tagger to POStag every word,
    yielding a list of tweets that are lists with elements with the form (word,
    [tags]). A DictionaryTagger adds our custom tags to the [tags] list. Finally
    a TagCounter perform a count of every tag found in tweets. This program 
    prints the number of coincidences of our custom tags.
    """

    miner = TwitterMiner(keys, raw_tweets_file, no_tweets)
    miner.mine(tracked_words_file)

    formatter = TweetFormatter(raw_tweets_file)
    tweets = formatter.convert2json()
    tweets = formatter.convert2text(tweets, formatted_tweets_file)
    tweets = formatter.clean_tweets(tweets)
    tweets = [tweet.split() for tweet in tweets]

    tagger = DictionaryTagger(dictionaries)
    postagged_sents = spgt.pos_tag_sents(tweets)
    tagged_sents = tagger.tag(postagged_sents)

    counter = TagCounter(tagged_sents)
    res = counter.count()

    try:
        print("Palabras misóginas: {}".format(res['misóginia']))
    except KeyError:
        pass

    try:
        print("Palabras groseras: {}".format(res['grosería']))
    except KeyError:
        pass
 def __init__(self):
     self.splitter = Splitter()
     self.postagger = POSTagger()
     self.dicttagger = DictionaryTagger(
         [
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml",
             "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml",
         ]
     )
Exemple #4
0
class SentimentAnalyzingService(object):
    def __init__(self):
        self.splitter = Splitter()
        self.postagger = POSTagger()
        self.dicttagger = DictionaryTagger([
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml',
            '/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml'
        ])

    def valueOf(self, sentiment):
        if sentiment == 'positive': return 1
        if sentiment == 'negative': return -1
        return 0

    def sentence_score(self, sentence_tokens, previous_token, acum_score):
        if not sentence_tokens:
            return acum_score
        else:
            current_token = sentence_tokens[0]
            tags = current_token[2]
            token_score = sum([self.valueOf(tag) for tag in tags])
            if previous_token is not None:
                previous_tags = previous_token[2]
                if 'inc' in previous_tags:
                    token_score *= 2.0
                elif 'dec' in previous_tags:
                    token_score /= 2.0
                elif 'inv' in previous_tags:
                    token_score *= -1.0
            return self.sentence_score(sentence_tokens[1:], current_token,
                                       acum_score + token_score)

    def sentiment_score(self, dictTaggedSentences):
        return sum([
            self.sentence_score(sentence, None, 0.0)
            for sentence in dictTaggedSentences
        ])

    def performBasicSentimentAnalysis(self, textToBeAnalysed):
        sentences = self.splitter.splitParagraphToListOfSentences(
            textToBeAnalysed)
        pos_tagged_sentences = self.postagger.pos_tag(sentences)
        dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences)

        score = self.sentiment_score(dict_tagged_sentences)
        return score
class SentimentAnalyzingService(object):
    def __init__(self):
        self.splitter = Splitter()
        self.postagger = POSTagger()
        self.dicttagger = DictionaryTagger(
            [
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/positive.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/negative.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/morePositive.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/moreNegative.yml",
                "/home/msinghal/PycharmProjects/basic_sentiment_analysis/dicts/invert.yml",
            ]
        )

    def valueOf(self, sentiment):
        if sentiment == "positive":
            return 1
        if sentiment == "negative":
            return -1
        return 0

    def sentence_score(self, sentence_tokens, previous_token, acum_score):
        if not sentence_tokens:
            return acum_score
        else:
            current_token = sentence_tokens[0]
            tags = current_token[2]
            token_score = sum([self.valueOf(tag) for tag in tags])
            if previous_token is not None:
                previous_tags = previous_token[2]
                if "inc" in previous_tags:
                    token_score *= 2.0
                elif "dec" in previous_tags:
                    token_score /= 2.0
                elif "inv" in previous_tags:
                    token_score *= -1.0
            return self.sentence_score(sentence_tokens[1:], current_token, acum_score + token_score)

    def sentiment_score(self, dictTaggedSentences):
        return sum([self.sentence_score(sentence, None, 0.0) for sentence in dictTaggedSentences])

    def performBasicSentimentAnalysis(self, textToBeAnalysed):
        sentences = self.splitter.splitParagraphToListOfSentences(textToBeAnalysed)
        pos_tagged_sentences = self.postagger.pos_tag(sentences)
        dict_tagged_sentences = self.dicttagger.tag(pos_tagged_sentences)

        score = self.sentiment_score(dict_tagged_sentences)
        return score