Beispiel #1
0
    def graph(self, data, output_folder, parent_folder):
        data = util.group_words_by_sender(data)
        to_plot = data[data['type'] == 'hashtag']

        sns.set(style="darkgrid")
        plot = sns.barplot(
            y=to_plot['word'],
            x=to_plot['n_w'],
            hue=to_plot[config.SENDER_COLUMN_NAME],
            data=to_plot,
            palette = config.PALETTE,
            orient="h",
            order=to_plot.groupby('word').n_w.sum().sort_values(ascending=False).head(10).index,
        )

        TITLE = "Most frequent hashtags"
        plt.suptitle(TITLE, y = 1)
        plot.set(xlabel='', ylabel='')
        plot.legend(bbox_to_anchor=(1.04,1), loc="upper left")
        plot.get_figure().savefig(
            "{}/{}.png".format(output_folder, slugify(TITLE)),
            bbox_inches='tight',
            pad_inches=config.PAD_INCHES
        )
        plot.get_figure().clf()
Beispiel #2
0
    def graph(self, data, output_folder, parent_folder):
        data = util.group_words_by_sender(data)
        names = data[config.SENDER_COLUMN_NAME].unique().tolist()
        first_names = sorted([x.split()[0].lower() for x in names])
        to_plot = data[data['word'].isin(first_names)].groupby(['word', config.SENDER_COLUMN_NAME], as_index=False)[['n_w']].sum()

        sns.set(style="darkgrid")
        plot = sns.barplot(
            x=to_plot['word'],
            y=to_plot['n_w'],
            hue=to_plot[config.SENDER_COLUMN_NAME],
            data=to_plot,
            palette = config.PALETTE,
        )

        TITLE = "Names said in chat"
        plt.suptitle(TITLE, y = 1)
        plot.set(xlabel='', ylabel='', xticklabels=["\"{}\"".format(x) for x in first_names])
        plot.legend(bbox_to_anchor=(1.04,1), loc="upper left")
        plot.get_figure().savefig(
            "{}/{}.png".format(output_folder, slugify(TITLE)),
            bbox_inches='tight',
            pad_inches=config.PAD_INCHES
        )
        plot.get_figure().clf()
Beispiel #3
0
    def graph(self, data, output_folder, parent_folder):
        data = util.group_words_by_sender(data)
        to_plot = data[data['type'] == 'emoji']

        sns.set(style="darkgrid")
        plot = sns.barplot(
            x=to_plot['word'],
            y=to_plot['n_w'],
            hue=to_plot[config.SENDER_COLUMN_NAME],
            data=to_plot,
            palette = config.PALETTE,
            order=to_plot.groupby('word')[['n_w']].sum().sort_values('n_w',ascending=False).head(10).index,
        )

        util.add_custom_fonts()

        for item in plot.get_xticklabels():
            item.set_family('EmojiOne')
            item.set_fontsize(20)

        emojis = [x.get_text() for x in plot.get_xticklabels()]
        print("Your top emojis:")
        print("   ".join(["{}. {}".format(i+1, e) for i, e in enumerate(emojis)]))

        TITLE = "Most frequent emoji"
        plt.suptitle(TITLE, y = 1)
        plot.set(xlabel='', ylabel='')
        plot.legend(bbox_to_anchor=(1.04,1), loc="upper left")
        plot.get_figure().savefig(
            "{}/{}.png".format(output_folder, slugify(TITLE)),
            bbox_inches='tight',
            pad_inches=config.PAD_INCHES
        )
        plot.get_figure().clf()
Beispiel #4
0
    def graph(self, data, output_folder, parent_folder):
        if self.type == None:
            raise ValueError("Grapher type must be set to a string")

        data = util.group_words_by_sender(data, get_tfidf=True)
        data = data[data['word'].str.len() > 1]
        data = data[data['type'] == 'word']
        senders = data[config.SENDER_COLUMN_NAME].unique().tolist()
        N = len(senders)
        rows, cols = util.get_rows_cols(N)

        fig, ax = plt.subplots(figsize=(cols * 2, rows * 3), ncols=cols, nrows=rows, squeeze=False)
        plt.subplots_adjust(
            left    =  0.2,
            bottom  =  0.1,
            right   =  2,
            top     =  0.9,
            wspace  =  0.5,
            hspace  =  1.1
        )

        for i in range(N):
            ax[int(i / cols)][i % cols].set_title(senders[i], y = 1)
            to_plot = data[data[config.SENDER_COLUMN_NAME] == senders[i]]
            to_plot = to_plot.head(10)[[config.SENDER_COLUMN_NAME,'word','tf_idf']]
            plot = sns.barplot(
                y=to_plot['word'],
                x=to_plot['tf_idf'],
                data=to_plot,
                palette = config.PALETTE,
                orient="h",
                ax=ax[int(i / cols)][i % cols]
            )
            plot.set(
                ylabel="",
                xlabel="Distinctiveness Score"
            )

        TITLE = "Our Most Distinguishing {}".format(self.type)
        plt.suptitle(TITLE, y = 1.09, fontsize=20)
        fig.savefig(
            "{}/{}.png".format(output_folder, slugify(TITLE)),
            bbox_inches='tight',
            pad_inches=config.PAD_INCHES
        )
        fig.clf()
Beispiel #5
0
    def graph(self, data, output_folder, parent_folder):
        data = util.group_words_by_sender(data)
        # words only
        data = data[data['type'] == 'word']
        # ignore contractions
        data = data[~data.word.str.contains("\'", na=False)]
        # ignore words under 4 chars
        data = data[data['word'].str.len() > 4]

        # filter out most common words
        with open("word_lists/common.txt") as f:
            common = f.readlines()
            common = [x.lower().strip() for x in common]
        to_plot = data.groupby([config.SENDER_COLUMN_NAME, 'word'],
                               as_index=False)[['n_w']].sum()
        to_plot = to_plot[~to_plot.word.isin(common)]

        # ignore numbers
        to_plot = to_plot[~to_plot.word.isin([str(x) for x in range(0, 10)])]

        sns.set(style="darkgrid")
        plot = sns.barplot(
            y=to_plot['word'],
            x=to_plot['n_w'],
            hue=to_plot[config.SENDER_COLUMN_NAME],
            data=to_plot,
            palette=config.PALETTE,
            orient="h",
            order=to_plot.groupby('word').n_w.sum().sort_values(
                ascending=False).head(10).index,
        )

        TITLE = "Most common words"
        plt.suptitle(TITLE, y=1)
        plot.set(xlabel='', ylabel='')
        plot.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
        plot.get_figure().savefig("{}/{}.png".format(output_folder,
                                                     slugify(TITLE)),
                                  bbox_inches='tight',
                                  pad_inches=config.PAD_INCHES)
        plot.get_figure().clf()