def plot_totals(ts_dict): """ Plot the total statistics (number of words, number of texts) per person. Args: ts_dict (dict): {"PersonName": TextSummary for that person}. Returns: None. Opens plot. """ total_attrs = ['texts', 'words'] titles = ['Texts', 'Words'] number_of_plots = len(total_attrs) fig = make_subplots(rows=number_of_plots, cols=1, shared_xaxes=False, subplot_titles=(titles)) for index in range(number_of_plots): for sender in list(COLORS.keys()): fig.append_trace( go.Bar(x=[titles[index]], y=[ts_dict[sender].count[total_attrs[index]]], name=sender, showlegend=True, marker_color=COLORS[sender]), index + 1, 1) fig.update_layout(width=600, height=400 * number_of_plots, barmode='group') fig.update_yaxes(title_text='Total') plot(fig, auto_open=True, filename="Total Stats.html")
def sub_parser_add(sub_parser): add_parser = sub_parser.add_parser('add', help="Add event to calendar") add_parser.add_argument('-c', '--calendar', type=str, help="Calendar name") add_parser.add_argument('title', type=str, help="Event title") add_parser.add_argument('start_date', type=valid_date, help="Event start time, format: YYYY-MM-DD") add_parser.add_argument('start_time', type=valid_time, help="Event start time, format: HH:MM") add_parser.add_argument('-d', '--duration', type=int, help="Event duration (minutes)") add_parser.add_argument('-a', '--attendees', type=valid_attendees, help="List of emails of attendees", required=False) add_parser.add_argument('-o', '--override-color', type=str, choices=[c for c in sorted(COLORS.keys())], help="List of emails of attendees", default="")
def prepare_summary(self, n_keywords: int = 4): """ Now prepare data for summary page The output is [(topic_idx, [(word, weight, color_in_hex), ...]), ...] """ # `temp_all_topic_word_weight_sent` = [(topic_idx, word, weight, sentiment), ...] temp_all_topic_word_weight_sent = [] for topic in self.topic_words: temp_list = [] for ww in topic[1]: if ww[0] in self.sent.keys(): temp_list.append( (topic[0], ww[0], ww[1], self.sent[ww[0]])) temp_all_topic_word_weight_sent.extend(temp_list) # sort by sentiment temp_all_topic_word_weight_sent.sort(key=lambda x: x[3], reverse=True) # initialize topic_summary [(topic_idx, topic_sent_value, topic_keywords, [(word,weight,color_hex), ...]), ...] topic_summary = [] for idx in range(self.n_topics): topic_summary.append([idx, None, None, []]) for x in enumerate(temp_all_topic_word_weight_sent): color = list(COLORS.keys())[int( float(x[0]) / len(temp_all_topic_word_weight_sent) * 7.999)] topic_summary[x[1][0]][3].append(( x[1][1], # word np.float64(x[1][2]), # weight color, # sentiment color )) def get_topic_sent(t): d = {} for w in t: if w[2] in d.keys(): d[w[2]] += w[1] else: d[w[2]] = w[1] return sorted(d.items(), key=lambda x: x[1], reverse=True)[0][0] for topic in topic_summary: # topic_sent_value color string topic[1] = get_topic_sent(topic[3]) # topic_keywords topic[2] = [ kw[0] for kw in sorted( topic[3], key=lambda x: x[1], reverse=True)[:n_keywords] ] # Save the data self._dump_intermediate(f"{self.topic_summary_fname}.pkl", topic_summary)
x = [c['duration'].delta / 60e9 for c in convos] y = [c['total_words'] for c in convos] fig = go.Figure(data=go.Scatter(x=x, y=y, mode='markers')) fig.update_layout(title='Convo duration vs words', xaxis_title="Duration (minutes)", yaxis_title="Words", yaxis_scaleanchor="x", yaxis_scaleratio=1) fig.update_xaxes(type='log') fig.update_yaxes(type='log') plot(fig, auto_open=True, filename="Convo Duration vs Words.html") if __name__ == "__main__": START_TIME = time.time() df = pd.read_csv(LOADPATH, index_col=False) df = convert_raw_csv_types(df) names = list(COLORS.keys()) df_person_list = [df[df['sender'] == name] for name in names] summary_all = TextSummary(df) ts_dict = dict( zip(names, [TextSummary(df_person) for df_person in df_person_list])) plot_totals(ts_dict) conversations = summary_all.get_conversations(names) plot_convo_words(conversations, names) plot_convo_length(conversations) print("--- %s sec execution time ---" % (time.time() - START_TIME))
def make_scatter_plots(text_summary_by_person): """ Create scatter plot of ratio of each person's usage of words Args: text_summary_by_person (dict): {"Person1": TextSummary for that person, etc.}. Returns: None. Opens plot. """ MIN_WORD_OCCUR = 10 # minimum number of occurrences to be plotted MIN_EMOTE_OCCUR = 5 names = list(COLORS.keys()) diffs = dict() # stores dictionaries with key word and value (total, ratio) # make dictionaries diffs['words'] = text_summary_by_person[0].compare_freq(text_summary_by_person[1], 'words') diffs['emotes'] = text_summary_by_person[0].compare_freq(text_summary_by_person[1], 'emotes') # strip less frequent tokens diffs['words'] = {key: value for (key, value) in diffs['words'].items() if value[0] >= MIN_WORD_OCCUR} diffs['emotes'] = {key: value for (key, value) in diffs['emotes'].items() if value[0] >= MIN_EMOTE_OCCUR} number_of_plots = 2 titles = ['Words', 'Emotes'] # initialize subplot figure fig = make_subplots(rows=number_of_plots, cols=1, shared_xaxes=True, subplot_titles=(titles)) subplot = 0 for token_type, diff_dict in diffs.items(): x, y, customdata = [], [], [] for word, v in diff_dict.items(): y.append(v[0]) x.append(v[1]) customdata.append([word, names[0], v[2], names[1], v[3]]) fig.append_trace(go.Scatter(x=x, y=y, customdata=customdata, hovertemplate="<b>%{customdata[0]}</b><br>" +\ "Total: %{y} <br>Ratio: %{x:.2f}<br>" +\ "%{customdata[1]}: %{customdata[2]}<br>" +\ "%{customdata[3]}: %{customdata[4]}<extra></extra>", showlegend=False, mode='markers'), subplot+1, 1) subplot += 1 fig.update_layout(width=1200, height=600*number_of_plots) fig.update_xaxes(type='log', title_text=f'{names[0]}: {names[1]} ratio') fig.update_yaxes(type='log', title_text='Total') plot(fig, auto_open=True, filename="Word Ratio Scatterplot.html")