예제 #1
class Sentiment(object):
    Makes a Sentiment analysis of the posts

    def __init__(self):
        self.dbc = DBConnection()
        self.postBlockVersionAltered = True
        self.postHistoryAltered = True
        self.postsAltered = True
        self.commentsAltered = True

        # calculates sentiment score of all text blocks

        # calculates sentiment score of text blocks from history entries

        # calculates sentiment score of most recent post versions

        # calculates sentiment score of the post comments

    def postblockversion_sentiment(self):
        # alter posts table: add metric columns
        if (not self.postBlockVersionAltered):
        """ For every entry in the database """

        entries = self.dbc.get_id_content_from_postblockversion()

        counter = 0
        for entry in entries:
            # id from table entry
            id_ = entry[0]

            # Calculates the metrics of the text
            results = self.calc_sentiment(entry[1])

            if (results != 0):
                # Print the metrics in stdout

                # Stores the metrics in the database
                self.dbc.store_sentiment(id_, "postblockversion", results)

            counter += 1
            if counter % 10000 == 0:
                print str(counter) + ' statements executed!'

    def posthistory_sentiment(self):
        # adds the columns for the sentiment
        # analysis to the posthistory table
        if (not self.postHistoryAltered):

        count = 0
        phIds = self.dbc.get_ids_from_posthistory()

        for id_ in phIds:
            # get all textblocks from the posthistory entry
                textblocks = self.dbc.get_content_from_posthistory(id_[0])
                # get all the text blocks from the history version of the post
                text = ""
                for block in textblocks:
                    text = text + "\n" + block[0]

                # Calculates the metrics of the text
                results = self.calc_sentiment(text)
                if (results != 0):
                    # Print the metrics in stdout
                    # self.print_sentiment(results)

                    # Stores the metrics in the database
                    self.dbc.store_sentiment(id_[0], "posthistory", results)
            except UnboundLocalError:
                print "No post block verion"

            count += 1
            if count % 10000 == 0:
                print str(count) + " statements executed!"

    def posts_sentiment(self):
        # adds the columns for the sentiment analysis to the posts table
        if (not self.postsAltered):

        results = self.dbc.get_most_recent_sentiment()
        count = 0
        for result in results:
            count += 1
            if count % 10000 == 0:
                print "10.000 querys executed!"

            # post id
            id_ = result[0]
            # sentiment metrics
            metrics = {
                'neg': result[1],
                'neu': result[2],
                'pos': result[3],
                'compound': result[4]

            # store in posts table
            #print id_
            #print metrics
            self.dbc.store_sentiment(id_, "posts", metrics)

    def comment_sentiment(self):
        # adds the columns for the sentiment analysis to the comments table
        if (not self.commentsAltered):

        comments = self.dbc.get_comment_text()
        count = 0
        for comment in comments:

            id_ = comment[0]
            result = self.calc_sentiment(comment[1])

            #print id_
            #print result

            self.dbc.store_sentiment(id_, 'comments', result)

            count += 1
            if count % 10000 == 0:
                print str(count) + ' Comments done!'

    def calc_sentiment(self, text):
        # in the csv files are these unconverted special chars
        text = text.replace('
', '\n')
        sid = SentimentIntensityAnalyzer()
        return sid.polarity_scores(text)

    def print_sentiment(self, result):
        for k in result:
            print('{0}: {1}, '.format(k, result[k]))