import matplotlib.pyplot as plt import mpld3 from mpld3._server import serve #firstgraph x = [1, 2, 3] y = [2, 3, 4] fig1 = plt.figure() plt.xlabel("xlabel 1") plt.ylabel("ylabel 1") plt.title("Plot 1") plt.legend() plt.bar(x, y, label='label for bar', color='b') #secondgraph x = [1, 2, 3] y = [5, 3, 1] fig2 = plt.figure() plt.xlabel("xlabel 2") plt.ylabel("ylabel 2") plt.title("Plot 2") plt.bar(x, y, color='r') # create html for both graphs html1 = mpld3.fig_to_html(fig1) html2 = mpld3.fig_to_html(fig2) ipdb.set_trace() # serve joined html to browser serve(html1 + html2)
def apply_lda(sentences): def format_topics_sentences(ldamodel, local_corpus, texts): # Init output sent_topics_df = pd.DataFrame() # Get main topic in each document for _i, row_list in enumerate(ldamodel[local_corpus]): row = row_list[0] if ldamodel.per_word_topics else row_list row = sorted(row, key=lambda x: (x[1]), reverse=True) # Get the Dominant topic, Perc Contribution and Keywords for each document for _j, (topic_num, prop_topic) in enumerate(row): if j == 0: # => dominant topic wp = ldamodel.show_topic(topic_num) topic_keywords = ", ".join( [_word for _word, prop in wp]) sent_topics_df = sent_topics_df.append( pd.Series([ int(topic_num), round(prop_topic, 4), topic_keywords ]), ignore_index=True) else: break sent_topics_df.columns = [ 'Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords' ] # Add original text to the end of the output contents = pd.Series(texts) sent_topics_df = pd.concat([sent_topics_df, contents], axis=1) return sent_topics_df id2word = gensim.corpora.Dictionary(sentences) corpus = [id2word.doc2bow(text) for text in sentences] lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus, id2word=id2word, num_topics=4, update_every=1, chunksize=10, passes=10, alpha='symmetric', per_word_topics=True) topics = lda_model.print_topics() for topic in topics: logger.info(topic) df_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model, local_corpus=corpus, texts=sentences) df_dominant_topic = df_topic_sents_keywords.reset_index() df_dominant_topic.columns = [ 'Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords', 'Text' ] sent_topics_sorteddf_mallet = pd.DataFrame() sent_topics_outdf_grpd = df_topic_sents_keywords.groupby( 'Dominant_Topic') for i, grp in sent_topics_outdf_grpd: sent_topics_sorteddf_mallet = pd.concat([ sent_topics_sorteddf_mallet, grp.sort_values(['Perc_Contribution'], ascending=False).head(1) ], axis=0) sent_topics_sorteddf_mallet.reset_index(drop=True, inplace=True) sent_topics_sorteddf_mallet.columns = [ 'Topic_Num', "Topic_Perc_Contrib", "Keywords", "Representative Text" ] doc_lens = [len(d) for d in df_dominant_topic.Text] # Plot fig1 = plt.figure(figsize=(16, 7), dpi=100) plt.hist(doc_lens, bins=1000, color='navy') plt.text(750, 100, "Mean : " + str(np.round(np.mean(doc_lens)))) plt.text(750, 90, "Median : " + str(np.round(np.median(doc_lens)))) plt.text(750, 80, "Stdev : " + str(np.round(np.std(doc_lens)))) plt.text(750, 70, "1%ile : " + str(np.round(np.quantile(doc_lens, q=0.01)))) plt.text(750, 60, "99%ile : " + str(np.round(np.quantile(doc_lens, q=0.99)))) plt.gca().set(xlim=(0, 1000), ylabel='Number of Documents', xlabel='Document Word Count') plt.tick_params(size=16) plt.xticks(np.linspace(0, 1000, 9)) plt.title('Distribution of Document Word Counts', fontdict=dict(size=22)) cols = [color for name, color in mcolors.TABLEAU_COLORS.items() ] # more colors: 'mcolors.XKCD_COLORS' fig2, axes = plt.subplots(2, 2, figsize=(16, 14), dpi=100, sharex=True, sharey=True) for i, ax in enumerate(axes.flatten()): df_dominant_topic_sub = df_dominant_topic.loc[ df_dominant_topic.Dominant_Topic == i, :] doc_lens = [len(d) for d in df_dominant_topic_sub.Text] ax.hist(doc_lens, bins=1000, color=cols[i]) ax.tick_params(axis='y', labelcolor=cols[i], color=cols[i]) sns.kdeplot(doc_lens, color="black", shade=False, ax=ax.twinx()) ax.set(xlim=(0, 1000), xlabel='Document Word Count') ax.set_ylabel('Number of Documents', color=cols[i]) ax.set_title('Topic: ' + str(i), fontdict=dict(size=16, color=cols[i])) fig2.tight_layout() fig2.subplots_adjust(top=0.90) plt.xticks(np.linspace(0, 1000, 9)) fig2.suptitle('Distribution of Document Word Counts by Dominant Topic', fontsize=22) cols = [color for name, color in mcolors.TABLEAU_COLORS.items() ] # more colors: 'mcolors.XKCD_COLORS' cloud = WordCloud(background_color='white', width=2500, height=1800, max_words=10, colormap='tab10', color_func=lambda *args, **kwargs: cols[i], prefer_horizontal=1.0) topics = lda_model.show_topics(formatted=False) fig3, axes = plt.subplots(2, 2, figsize=(10, 10), sharex=True, sharey=True) for i, ax in enumerate(axes.flatten()): fig3.add_subplot(ax) topic_words = dict(topics[i][1]) cloud.generate_from_frequencies(topic_words, max_font_size=300) plt.gca().imshow(cloud) plt.gca().set_title('Topic ' + str(i), fontdict=dict(size=16)) plt.gca().axis('off') plt.subplots_adjust(wspace=0, hspace=0) plt.axis('off') plt.margins(x=0, y=0) plt.tight_layout() topics = lda_model.show_topics(formatted=False) data_flat = [w for w_list in sentences for w in w_list] counter = Counter(data_flat) out = [] for i, topic in topics: for word, weight in topic: out.append([word, i, weight, counter[word]]) df = pd.DataFrame( out, columns=['word', 'topic_id', 'importance', 'word_count']) # Plot Word Count and Weights of Topic Keywords fig4, axes = plt.subplots(2, 2, figsize=(16, 10), sharey=True, dpi=100) cols = [color for name, color in mcolors.TABLEAU_COLORS.items()] for i, ax in enumerate(axes.flatten()): ax.bar(x='word', height="word_count", data=df.loc[df.topic_id == i, :], color=cols[i], width=0.5, alpha=0.3, label='Word Count') ax_twin = ax.twinx() ax_twin.bar(x='word', height="importance", data=df.loc[df.topic_id == i, :], color=cols[i], width=0.2, label='Weights') ax.set_ylabel('Word Count', color=cols[i]) ax_twin.set_ylim(0, 0.030) ax.set_ylim(0, 3500) ax.set_title('Topic: ' + str(i), color=cols[i], fontsize=16) ax.tick_params(axis='y', left=False) ax.set_xticklabels(df.loc[df.topic_id == i, 'word'], rotation=30, horizontalalignment='right') ax.legend(loc='upper left') ax_twin.legend(loc='upper right') fig4.tight_layout(w_pad=2) fig4.suptitle('Word Count and Importance of Topic Keywords', fontsize=22, y=1.05) start = 0 end = 13 corp = corpus[start:end] mycolors = [color for name, color in mcolors.TABLEAU_COLORS.items()] fig5, axes = plt.subplots(end - start, 1, figsize=(20, (end - start) * 0.95), dpi=100) axes[0].axis('off') for i, ax in enumerate(axes): if i > 0: corp_cur = corp[i - 1] topic_percs, wordid_topics, wordid_phivalues = lda_model[ corp_cur] word_dominanttopic = [(lda_model.id2word[wd], topic[0]) for wd, topic in wordid_topics] ax.text(0.01, 0.5, "Doc " + str(i - 1) + ": ", verticalalignment='center', fontsize=16, color='black', transform=ax.transAxes, fontweight=700) # Draw Rectange topic_percs_sorted = sorted(topic_percs, key=lambda x: (x[1]), reverse=True) ax.add_patch( Rectangle((0.0, 0.05), 0.99, 0.90, fill=None, alpha=1, color=mycolors[topic_percs_sorted[0][0]], linewidth=2)) word_pos = 0.06 for j, (word, topics) in enumerate(word_dominanttopic): if j < 14: ax.text(word_pos, 0.5, word, horizontalalignment='left', verticalalignment='center', fontsize=16, color=mycolors[topics], transform=ax.transAxes, fontweight=700) word_pos += .009 * len( word) # to move the word for the next iter ax.axis('off') ax.text(word_pos, 0.5, '. . .', horizontalalignment='left', verticalalignment='center', fontsize=16, color='black', transform=ax.transAxes) plt.subplots_adjust(wspace=0, hspace=0) plt.suptitle('Sentence Topic Coloring for Documents: ' + str(start) + ' to ' + str(end - 2), fontsize=22, y=0.95, fontweight=700) plt.tight_layout() html1 = mpld3.fig_to_html(fig1) html2 = mpld3.fig_to_html(fig2) html3 = mpld3.fig_to_html(fig3) html4 = mpld3.fig_to_html(fig4) html5 = mpld3.fig_to_html(fig5) serve(html1 + html2 + html3 + html4 + html5)
def plot(channels=[0], path='./data', datetime_start=None, datetime_end=None, ip='0.0.0.0', open_browser=True): '''Plot Power/SNR vs time for all the channels specified within channels list''' FIG_SIZE_X = int(os.getenv('FIG_SIZE_X', 12)) FIG_SIZE_Y = int(os.getenv('FIG_SIZE_Y', 7)) PLOT_TITLE_FONT_SIZE = int(os.getenv('PLOT_TITLE_FONT_SIZE', 12)) PLOT_POWER_LINE_WIDTH = int(os.getenv('PLOT_POWER_LINE_WIDTH', 1.2)) PLOT_POWER_COLOR = os.getenv('PLOT_POWER_COLOR', 'blue') PLOT_SNR_LINE_WIDTH = int(os.getenv('PLOT_SNR_LINE_WIDTH', 1.2)) PLOT_SNR_COLOR = os.getenv('PLOT_SNR_COLOR', 'red') SNR_MIN_THRESHOLD = int(os.getenv('SNR_MIN_THRESHOLD', 30)) # For limiting SNR y axis plot SNR_MIN_Y_VALUE_1 = int(os.getenv('SNR_MIN_Y_VALUE_1', 32)) SNR_MAX_Y_VALUE_1 = int(os.getenv('SNR_MAX_Y_VALUE_1', 40)) SNR_MIN_Y_VALUE_2 = int(os.getenv('SNR_MIN_Y_VALUE_2', 5)) SNR_MAX_Y_VALUE_2 = int(os.getenv('SNR_MAX_Y_VALUE_2', 40)) datetime_format = '%Y-%m-%d %H:%M:%S' figures = dict() # Do not print warning if more than 20 plots are opened plt.rcParams.update({'figure.max_open_warning': 0}) print('') for c in channels: f = path + '/' + str(c) + '.csv' exists = os.path.isfile(f) if not exists: continue data = np.genfromtxt(f, delimiter=',') # Crop input based on start/end datetime start = None end = None if datetime_start is not None or datetime_end is not None: if datetime_start is not None: timestamp_from = int( time.mktime( datetime.datetime.strptime( datetime_start, datetime_format).timetuple())) if datetime_end is not None: timestamp_to = int( time.mktime( datetime.datetime.strptime( datetime_end, datetime_format).timetuple())) for i, line in enumerate(data): if datetime_start is not None and line[0] >= timestamp_from: if start is None: start = i if datetime_end is not None and line[0] >= timestamp_to: if end is None: end = i timestamp = np.genfromtxt( f, delimiter=',', unpack=True, converters={0: convertfunc}, skip_header=start if start is not None else 1, skip_footer=(len(data) - end) if end is not None else 0, usecols=0) power = np.genfromtxt(f, delimiter=',', unpack=True, skip_header=start if start is not None else 1, skip_footer=(len(data) - end) if end is not None else 0, usecols=1) snr = np.genfromtxt(f, delimiter=',', unpack=True, skip_header=start if start is not None else 1, skip_footer=(len(data) - end) if end is not None else 0, usecols=2) avg_snr = sum(snr) / float(len(snr)) avg_pwr = sum(power) / float(len(power)) sd_snr = np.std(np.array(snr)) sd_pwr = np.std(np.array(power)) # Print statistics to STDOUT print('Ch ' + str("{:02d}".format(c)) + ': PWR avg: ' + str("{:05.2f}".format(avg_pwr)) + ' dBmV / PWR std: ' + str("{:05.2f}".format(sd_pwr)) + ' - SNR avg: ' + str("{:05.2f}".format(avg_snr)) + ' dB / SNR std: ' + str("{:05.2f}".format(sd_snr))) figures[c] = plt.figure(c, figsize=(FIG_SIZE_X, FIG_SIZE_Y)) # Power Plot plt.subplot(211) # 2 rows, 1 column, subplot #1 plt.plot(timestamp, power, linewidth=PLOT_POWER_LINE_WIDTH, color=PLOT_POWER_COLOR) plt.title('Channel ' + str(c), fontsize=PLOT_TITLE_FONT_SIZE) plt.ylabel('Power (dBmV)') plt.xlabel('datetime') plt.grid(True) # SNR Plot plt.subplot(212) # 2 rows, 1 column, subplot #2 plt.plot(timestamp, snr, linewidth=PLOT_SNR_LINE_WIDTH, color=PLOT_SNR_COLOR) plt.ylabel('SNR (dB)') plt.xlabel('datetime') plt.grid(True) # Limit SNR Y axis based on minimum value for better visualization axes = plt.gca() axes.set_ylim([SNR_MIN_Y_VALUE_1, SNR_MAX_Y_VALUE_1]) if min(snr) < SNR_MIN_THRESHOLD: axes.set_ylim([SNR_MIN_Y_VALUE_2, SNR_MAX_Y_VALUE_2]) # Serve all plots in the same html page html_sum = '' for f in figures: html = fig_to_html(figures[f]) html_sum = html_sum + html print('') serve(html_sum, open_browser=open_browser, ip=ip)
names = [] for i in smallest.index: names.append(unicode(i, 'utf-8')) ind = np.arange(len(names)) plt.xticks(ind, names) plt.title("Bairros com maior insatisfacao") plt.bar(ind, smallest, 1, color='rgb') ############################################################## imgs.append(plt.figure()) biggest = mean.nlargest(5) names = [] for i in biggest.index: names.append(unicode(i, 'utf-8')) ind = np.arange(len(names)) plt.xticks(ind, names) plt.title("Bairros com mais satisfacao") plt.bar(ind, biggest, 1, color='rgb') # plt.show() html = '' for img in imgs: html += mpld3.fig_to_html(img) from mpld3._server import serve serve(html, port=9000, ip='0.0.0.0')
def show(self, ): self.__end_html__() serve(self.html)
fig_amps_errs = _gen_amps_errs_plot(amps_orig, singles_or, amps_extr, singles_det, amps_errs_bins, amps_errs_range) fig_times_errs = _gen_times_errs_plot(pos_orig, singles_or, pos_extr, singles_det, times_errs_bins, times_errs_range) md = markdown(OUT_TEMPLATE) html = Environment(loader=BaseLoader()).from_string(md).render( method=metrics_all['method'], total=len(amps_orig), extr_all=len(amps_extr), extr_all_perc=len(amps_extr) / len(amps_orig) * 100, extr_fpos=len(false_pos), extr_fpos_perc=len(false_pos) / len(amps_extr) * 100, extr_ok=len(singles_or), extr_ok_perc=len(singles_or) / len(amps_extr) * 100, extr_ovlpd=len(mult_or), extr_ovlpd_perc=len(mult_or) / len(amps_extr) * 100, hist_plot=mpld3.fig_to_html(fig_hist), amp_errs_plot=mpld3.fig_to_html(fig_amps_errs), times_errs_plot=mpld3.fig_to_html(fig_times_errs)) fig_hist.show() fig_amps_errs.show() fig_times_errs.show() plt.show() serve(html)