def plot_totals(ts_dict):
    """
    Plot the total statistics (number of words, number of texts) per person.

    Args:
        ts_dict (dict): {"PersonName": TextSummary for that person}.

    Returns:
        None. Opens plot.

    """
    total_attrs = ['texts', 'words']
    titles = ['Texts', 'Words']
    number_of_plots = len(total_attrs)
    fig = make_subplots(rows=number_of_plots,
                        cols=1,
                        shared_xaxes=False,
                        subplot_titles=(titles))

    for index in range(number_of_plots):
        for sender in list(COLORS.keys()):
            fig.append_trace(
                go.Bar(x=[titles[index]],
                       y=[ts_dict[sender].count[total_attrs[index]]],
                       name=sender,
                       showlegend=True,
                       marker_color=COLORS[sender]), index + 1, 1)

    fig.update_layout(width=600, height=400 * number_of_plots, barmode='group')
    fig.update_yaxes(title_text='Total')

    plot(fig, auto_open=True, filename="Total Stats.html")
예제 #2
0
def sub_parser_add(sub_parser):
    add_parser = sub_parser.add_parser('add', help="Add event to calendar")
    add_parser.add_argument('-c', '--calendar', type=str, help="Calendar name")
    add_parser.add_argument('title', type=str, help="Event title")
    add_parser.add_argument('start_date',
                            type=valid_date,
                            help="Event start time, format: YYYY-MM-DD")
    add_parser.add_argument('start_time',
                            type=valid_time,
                            help="Event start time, format: HH:MM")
    add_parser.add_argument('-d',
                            '--duration',
                            type=int,
                            help="Event duration (minutes)")
    add_parser.add_argument('-a',
                            '--attendees',
                            type=valid_attendees,
                            help="List of emails of attendees",
                            required=False)
    add_parser.add_argument('-o',
                            '--override-color',
                            type=str,
                            choices=[c for c in sorted(COLORS.keys())],
                            help="List of emails of attendees",
                            default="")
예제 #3
0
    def prepare_summary(self, n_keywords: int = 4):
        """
        Now prepare data for summary page
        The output is [(topic_idx, [(word, weight, color_in_hex), ...]), ...]
        """

        # `temp_all_topic_word_weight_sent` = [(topic_idx, word, weight, sentiment), ...]
        temp_all_topic_word_weight_sent = []
        for topic in self.topic_words:
            temp_list = []
            for ww in topic[1]:
                if ww[0] in self.sent.keys():
                    temp_list.append(
                        (topic[0], ww[0], ww[1], self.sent[ww[0]]))
            temp_all_topic_word_weight_sent.extend(temp_list)
        # sort by sentiment
        temp_all_topic_word_weight_sent.sort(key=lambda x: x[3], reverse=True)

        # initialize topic_summary [(topic_idx, topic_sent_value, topic_keywords, [(word,weight,color_hex), ...]), ...]
        topic_summary = []
        for idx in range(self.n_topics):
            topic_summary.append([idx, None, None, []])

        for x in enumerate(temp_all_topic_word_weight_sent):
            color = list(COLORS.keys())[int(
                float(x[0]) / len(temp_all_topic_word_weight_sent) * 7.999)]
            topic_summary[x[1][0]][3].append((
                x[1][1],  # word
                np.float64(x[1][2]),  # weight
                color,  # sentiment color
            ))

        def get_topic_sent(t):
            d = {}
            for w in t:
                if w[2] in d.keys():
                    d[w[2]] += w[1]
                else:
                    d[w[2]] = w[1]
            return sorted(d.items(), key=lambda x: x[1], reverse=True)[0][0]

        for topic in topic_summary:
            # topic_sent_value color string
            topic[1] = get_topic_sent(topic[3])
            # topic_keywords
            topic[2] = [
                kw[0] for kw in sorted(
                    topic[3], key=lambda x: x[1], reverse=True)[:n_keywords]
            ]

        # Save the data
        self._dump_intermediate(f"{self.topic_summary_fname}.pkl",
                                topic_summary)
    x = [c['duration'].delta / 60e9 for c in convos]
    y = [c['total_words'] for c in convos]
    fig = go.Figure(data=go.Scatter(x=x, y=y, mode='markers'))
    fig.update_layout(title='Convo duration vs words',
                      xaxis_title="Duration (minutes)",
                      yaxis_title="Words",
                      yaxis_scaleanchor="x",
                      yaxis_scaleratio=1)
    fig.update_xaxes(type='log')
    fig.update_yaxes(type='log')
    plot(fig, auto_open=True, filename="Convo Duration vs Words.html")


if __name__ == "__main__":
    START_TIME = time.time()

    df = pd.read_csv(LOADPATH, index_col=False)
    df = convert_raw_csv_types(df)

    names = list(COLORS.keys())
    df_person_list = [df[df['sender'] == name] for name in names]
    summary_all = TextSummary(df)
    ts_dict = dict(
        zip(names, [TextSummary(df_person) for df_person in df_person_list]))
    plot_totals(ts_dict)
    conversations = summary_all.get_conversations(names)
    plot_convo_words(conversations, names)
    plot_convo_length(conversations)

    print("--- %s sec execution time ---" % (time.time() - START_TIME))
def make_scatter_plots(text_summary_by_person):
    """
    Create scatter plot of ratio of each person's usage of words

    Args:
        text_summary_by_person (dict): {"Person1": TextSummary for that person, etc.}.

    Returns:
        None. Opens plot.

    """

    MIN_WORD_OCCUR = 10  # minimum number of occurrences to be plotted
    MIN_EMOTE_OCCUR = 5

    names = list(COLORS.keys())

    diffs = dict()  # stores dictionaries with key word and value (total, ratio)

    # make dictionaries
    diffs['words'] = text_summary_by_person[0].compare_freq(text_summary_by_person[1], 'words')
    diffs['emotes'] = text_summary_by_person[0].compare_freq(text_summary_by_person[1], 'emotes')

    # strip less frequent tokens
    diffs['words'] = {key: value for (key, value) in diffs['words'].items() if
                      value[0] >= MIN_WORD_OCCUR}
    diffs['emotes'] = {key: value for (key, value) in diffs['emotes'].items() if
                       value[0] >= MIN_EMOTE_OCCUR}

    number_of_plots = 2
    titles = ['Words', 'Emotes']

    # initialize subplot figure
    fig = make_subplots(rows=number_of_plots, cols=1, shared_xaxes=True,
                        subplot_titles=(titles))

    subplot = 0
    for token_type, diff_dict in diffs.items():

        x, y, customdata = [], [], []
        for word, v in diff_dict.items():
            y.append(v[0])
            x.append(v[1])
            customdata.append([word, names[0], v[2], names[1], v[3]])

        fig.append_trace(go.Scatter(x=x,
                                    y=y,
                                    customdata=customdata,
                                    hovertemplate="<b>%{customdata[0]}</b><br>" +\
                                                  "Total: %{y} <br>Ratio: %{x:.2f}<br>" +\
                                                  "%{customdata[1]}: %{customdata[2]}<br>" +\
                                                  "%{customdata[3]}: %{customdata[4]}<extra></extra>",
                                    showlegend=False,
                                    mode='markers'),
                         subplot+1, 1)
        subplot += 1

    fig.update_layout(width=1200, height=600*number_of_plots)
    fig.update_xaxes(type='log', title_text=f'{names[0]}: {names[1]} ratio')
    fig.update_yaxes(type='log', title_text='Total')

    plot(fig, auto_open=True, filename="Word Ratio Scatterplot.html")