Ejemplo n.º 1
0
import matplotlib.pyplot as plt
import mpld3
from mpld3._server import serve

#firstgraph
x = [1, 2, 3]
y = [2, 3, 4]
fig1 = plt.figure()
plt.xlabel("xlabel 1")
plt.ylabel("ylabel 1")
plt.title("Plot 1")
plt.legend()
plt.bar(x, y, label='label for bar', color='b')

#secondgraph
x = [1, 2, 3]
y = [5, 3, 1]
fig2 = plt.figure()
plt.xlabel("xlabel 2")
plt.ylabel("ylabel 2")
plt.title("Plot 2")
plt.bar(x, y, color='r')

# create html for both graphs
html1 = mpld3.fig_to_html(fig1)
html2 = mpld3.fig_to_html(fig2)
ipdb.set_trace()

# serve joined html to browser
serve(html1 + html2)
Ejemplo n.º 2
0
    def apply_lda(sentences):
        def format_topics_sentences(ldamodel, local_corpus, texts):
            # Init output
            sent_topics_df = pd.DataFrame()

            # Get main topic in each document
            for _i, row_list in enumerate(ldamodel[local_corpus]):
                row = row_list[0] if ldamodel.per_word_topics else row_list
                row = sorted(row, key=lambda x: (x[1]), reverse=True)
                # Get the Dominant topic, Perc Contribution and Keywords for each document
                for _j, (topic_num, prop_topic) in enumerate(row):
                    if j == 0:  # => dominant topic
                        wp = ldamodel.show_topic(topic_num)
                        topic_keywords = ", ".join(
                            [_word for _word, prop in wp])
                        sent_topics_df = sent_topics_df.append(
                            pd.Series([
                                int(topic_num),
                                round(prop_topic, 4), topic_keywords
                            ]),
                            ignore_index=True)
                    else:
                        break
            sent_topics_df.columns = [
                'Dominant_Topic', 'Perc_Contribution', 'Topic_Keywords'
            ]

            # Add original text to the end of the output
            contents = pd.Series(texts)
            sent_topics_df = pd.concat([sent_topics_df, contents], axis=1)
            return sent_topics_df

        id2word = gensim.corpora.Dictionary(sentences)
        corpus = [id2word.doc2bow(text) for text in sentences]
        lda_model = gensim.models.ldamodel.LdaModel(corpus=corpus,
                                                    id2word=id2word,
                                                    num_topics=4,
                                                    update_every=1,
                                                    chunksize=10,
                                                    passes=10,
                                                    alpha='symmetric',
                                                    per_word_topics=True)
        topics = lda_model.print_topics()
        for topic in topics:
            logger.info(topic)

        df_topic_sents_keywords = format_topics_sentences(ldamodel=lda_model,
                                                          local_corpus=corpus,
                                                          texts=sentences)
        df_dominant_topic = df_topic_sents_keywords.reset_index()
        df_dominant_topic.columns = [
            'Document_No', 'Dominant_Topic', 'Topic_Perc_Contrib', 'Keywords',
            'Text'
        ]

        sent_topics_sorteddf_mallet = pd.DataFrame()
        sent_topics_outdf_grpd = df_topic_sents_keywords.groupby(
            'Dominant_Topic')
        for i, grp in sent_topics_outdf_grpd:
            sent_topics_sorteddf_mallet = pd.concat([
                sent_topics_sorteddf_mallet,
                grp.sort_values(['Perc_Contribution'], ascending=False).head(1)
            ],
                                                    axis=0)
        sent_topics_sorteddf_mallet.reset_index(drop=True, inplace=True)
        sent_topics_sorteddf_mallet.columns = [
            'Topic_Num', "Topic_Perc_Contrib", "Keywords",
            "Representative Text"
        ]

        doc_lens = [len(d) for d in df_dominant_topic.Text]
        # Plot
        fig1 = plt.figure(figsize=(16, 7), dpi=100)
        plt.hist(doc_lens, bins=1000, color='navy')
        plt.text(750, 100, "Mean   : " + str(np.round(np.mean(doc_lens))))
        plt.text(750, 90, "Median : " + str(np.round(np.median(doc_lens))))
        plt.text(750, 80, "Stdev   : " + str(np.round(np.std(doc_lens))))
        plt.text(750, 70,
                 "1%ile    : " + str(np.round(np.quantile(doc_lens, q=0.01))))
        plt.text(750, 60,
                 "99%ile  : " + str(np.round(np.quantile(doc_lens, q=0.99))))

        plt.gca().set(xlim=(0, 1000),
                      ylabel='Number of Documents',
                      xlabel='Document Word Count')
        plt.tick_params(size=16)
        plt.xticks(np.linspace(0, 1000, 9))
        plt.title('Distribution of Document Word Counts',
                  fontdict=dict(size=22))

        cols = [color for name, color in mcolors.TABLEAU_COLORS.items()
                ]  # more colors: 'mcolors.XKCD_COLORS'
        fig2, axes = plt.subplots(2,
                                  2,
                                  figsize=(16, 14),
                                  dpi=100,
                                  sharex=True,
                                  sharey=True)

        for i, ax in enumerate(axes.flatten()):
            df_dominant_topic_sub = df_dominant_topic.loc[
                df_dominant_topic.Dominant_Topic == i, :]
            doc_lens = [len(d) for d in df_dominant_topic_sub.Text]
            ax.hist(doc_lens, bins=1000, color=cols[i])
            ax.tick_params(axis='y', labelcolor=cols[i], color=cols[i])
            sns.kdeplot(doc_lens, color="black", shade=False, ax=ax.twinx())
            ax.set(xlim=(0, 1000), xlabel='Document Word Count')
            ax.set_ylabel('Number of Documents', color=cols[i])
            ax.set_title('Topic: ' + str(i),
                         fontdict=dict(size=16, color=cols[i]))

        fig2.tight_layout()
        fig2.subplots_adjust(top=0.90)
        plt.xticks(np.linspace(0, 1000, 9))
        fig2.suptitle('Distribution of Document Word Counts by Dominant Topic',
                      fontsize=22)

        cols = [color for name, color in mcolors.TABLEAU_COLORS.items()
                ]  # more colors: 'mcolors.XKCD_COLORS'
        cloud = WordCloud(background_color='white',
                          width=2500,
                          height=1800,
                          max_words=10,
                          colormap='tab10',
                          color_func=lambda *args, **kwargs: cols[i],
                          prefer_horizontal=1.0)

        topics = lda_model.show_topics(formatted=False)

        fig3, axes = plt.subplots(2,
                                  2,
                                  figsize=(10, 10),
                                  sharex=True,
                                  sharey=True)

        for i, ax in enumerate(axes.flatten()):
            fig3.add_subplot(ax)
            topic_words = dict(topics[i][1])
            cloud.generate_from_frequencies(topic_words, max_font_size=300)
            plt.gca().imshow(cloud)
            plt.gca().set_title('Topic ' + str(i), fontdict=dict(size=16))
            plt.gca().axis('off')

        plt.subplots_adjust(wspace=0, hspace=0)
        plt.axis('off')
        plt.margins(x=0, y=0)
        plt.tight_layout()

        topics = lda_model.show_topics(formatted=False)
        data_flat = [w for w_list in sentences for w in w_list]
        counter = Counter(data_flat)
        out = []
        for i, topic in topics:
            for word, weight in topic:
                out.append([word, i, weight, counter[word]])

        df = pd.DataFrame(
            out, columns=['word', 'topic_id', 'importance', 'word_count'])

        # Plot Word Count and Weights of Topic Keywords
        fig4, axes = plt.subplots(2, 2, figsize=(16, 10), sharey=True, dpi=100)
        cols = [color for name, color in mcolors.TABLEAU_COLORS.items()]
        for i, ax in enumerate(axes.flatten()):
            ax.bar(x='word',
                   height="word_count",
                   data=df.loc[df.topic_id == i, :],
                   color=cols[i],
                   width=0.5,
                   alpha=0.3,
                   label='Word Count')
            ax_twin = ax.twinx()
            ax_twin.bar(x='word',
                        height="importance",
                        data=df.loc[df.topic_id == i, :],
                        color=cols[i],
                        width=0.2,
                        label='Weights')
            ax.set_ylabel('Word Count', color=cols[i])
            ax_twin.set_ylim(0, 0.030)
            ax.set_ylim(0, 3500)
            ax.set_title('Topic: ' + str(i), color=cols[i], fontsize=16)
            ax.tick_params(axis='y', left=False)
            ax.set_xticklabels(df.loc[df.topic_id == i, 'word'],
                               rotation=30,
                               horizontalalignment='right')
            ax.legend(loc='upper left')
            ax_twin.legend(loc='upper right')

        fig4.tight_layout(w_pad=2)
        fig4.suptitle('Word Count and Importance of Topic Keywords',
                      fontsize=22,
                      y=1.05)

        start = 0
        end = 13
        corp = corpus[start:end]
        mycolors = [color for name, color in mcolors.TABLEAU_COLORS.items()]

        fig5, axes = plt.subplots(end - start,
                                  1,
                                  figsize=(20, (end - start) * 0.95),
                                  dpi=100)
        axes[0].axis('off')
        for i, ax in enumerate(axes):
            if i > 0:
                corp_cur = corp[i - 1]
                topic_percs, wordid_topics, wordid_phivalues = lda_model[
                    corp_cur]
                word_dominanttopic = [(lda_model.id2word[wd], topic[0])
                                      for wd, topic in wordid_topics]
                ax.text(0.01,
                        0.5,
                        "Doc " + str(i - 1) + ": ",
                        verticalalignment='center',
                        fontsize=16,
                        color='black',
                        transform=ax.transAxes,
                        fontweight=700)

                # Draw Rectange
                topic_percs_sorted = sorted(topic_percs,
                                            key=lambda x: (x[1]),
                                            reverse=True)
                ax.add_patch(
                    Rectangle((0.0, 0.05),
                              0.99,
                              0.90,
                              fill=None,
                              alpha=1,
                              color=mycolors[topic_percs_sorted[0][0]],
                              linewidth=2))

                word_pos = 0.06
                for j, (word, topics) in enumerate(word_dominanttopic):
                    if j < 14:
                        ax.text(word_pos,
                                0.5,
                                word,
                                horizontalalignment='left',
                                verticalalignment='center',
                                fontsize=16,
                                color=mycolors[topics],
                                transform=ax.transAxes,
                                fontweight=700)
                        word_pos += .009 * len(
                            word)  # to move the word for the next iter
                        ax.axis('off')
                ax.text(word_pos,
                        0.5,
                        '. . .',
                        horizontalalignment='left',
                        verticalalignment='center',
                        fontsize=16,
                        color='black',
                        transform=ax.transAxes)

        plt.subplots_adjust(wspace=0, hspace=0)
        plt.suptitle('Sentence Topic Coloring for Documents: ' + str(start) +
                     ' to ' + str(end - 2),
                     fontsize=22,
                     y=0.95,
                     fontweight=700)
        plt.tight_layout()

        html1 = mpld3.fig_to_html(fig1)
        html2 = mpld3.fig_to_html(fig2)
        html3 = mpld3.fig_to_html(fig3)
        html4 = mpld3.fig_to_html(fig4)
        html5 = mpld3.fig_to_html(fig5)

        serve(html1 + html2 + html3 + html4 + html5)
Ejemplo n.º 3
0
def plot(channels=[0],
         path='./data',
         datetime_start=None,
         datetime_end=None,
         ip='0.0.0.0',
         open_browser=True):
    '''Plot Power/SNR vs time for all the channels specified within channels list'''

    FIG_SIZE_X = int(os.getenv('FIG_SIZE_X', 12))
    FIG_SIZE_Y = int(os.getenv('FIG_SIZE_Y', 7))
    PLOT_TITLE_FONT_SIZE = int(os.getenv('PLOT_TITLE_FONT_SIZE', 12))
    PLOT_POWER_LINE_WIDTH = int(os.getenv('PLOT_POWER_LINE_WIDTH', 1.2))
    PLOT_POWER_COLOR = os.getenv('PLOT_POWER_COLOR', 'blue')
    PLOT_SNR_LINE_WIDTH = int(os.getenv('PLOT_SNR_LINE_WIDTH', 1.2))
    PLOT_SNR_COLOR = os.getenv('PLOT_SNR_COLOR', 'red')

    SNR_MIN_THRESHOLD = int(os.getenv('SNR_MIN_THRESHOLD',
                                      30))  # For limiting SNR y axis plot
    SNR_MIN_Y_VALUE_1 = int(os.getenv('SNR_MIN_Y_VALUE_1', 32))
    SNR_MAX_Y_VALUE_1 = int(os.getenv('SNR_MAX_Y_VALUE_1', 40))
    SNR_MIN_Y_VALUE_2 = int(os.getenv('SNR_MIN_Y_VALUE_2', 5))
    SNR_MAX_Y_VALUE_2 = int(os.getenv('SNR_MAX_Y_VALUE_2', 40))

    datetime_format = '%Y-%m-%d %H:%M:%S'

    figures = dict()

    # Do not print warning if more than 20 plots are opened
    plt.rcParams.update({'figure.max_open_warning': 0})

    print('')

    for c in channels:
        f = path + '/' + str(c) + '.csv'
        exists = os.path.isfile(f)
        if not exists:
            continue

        data = np.genfromtxt(f, delimiter=',')

        # Crop input based on start/end datetime
        start = None
        end = None

        if datetime_start is not None or datetime_end is not None:
            if datetime_start is not None:
                timestamp_from = int(
                    time.mktime(
                        datetime.datetime.strptime(
                            datetime_start, datetime_format).timetuple()))
            if datetime_end is not None:
                timestamp_to = int(
                    time.mktime(
                        datetime.datetime.strptime(
                            datetime_end, datetime_format).timetuple()))

            for i, line in enumerate(data):
                if datetime_start is not None and line[0] >= timestamp_from:
                    if start is None:
                        start = i
                if datetime_end is not None and line[0] >= timestamp_to:
                    if end is None:
                        end = i

        timestamp = np.genfromtxt(
            f,
            delimiter=',',
            unpack=True,
            converters={0: convertfunc},
            skip_header=start if start is not None else 1,
            skip_footer=(len(data) - end) if end is not None else 0,
            usecols=0)
        power = np.genfromtxt(f,
                              delimiter=',',
                              unpack=True,
                              skip_header=start if start is not None else 1,
                              skip_footer=(len(data) -
                                           end) if end is not None else 0,
                              usecols=1)
        snr = np.genfromtxt(f,
                            delimiter=',',
                            unpack=True,
                            skip_header=start if start is not None else 1,
                            skip_footer=(len(data) -
                                         end) if end is not None else 0,
                            usecols=2)

        avg_snr = sum(snr) / float(len(snr))
        avg_pwr = sum(power) / float(len(power))
        sd_snr = np.std(np.array(snr))
        sd_pwr = np.std(np.array(power))

        # Print statistics to STDOUT
        print('Ch ' + str("{:02d}".format(c)) + ': PWR avg: ' +
              str("{:05.2f}".format(avg_pwr)) + ' dBmV / PWR std: ' +
              str("{:05.2f}".format(sd_pwr)) + ' - SNR avg: ' +
              str("{:05.2f}".format(avg_snr)) + ' dB  / SNR std: ' +
              str("{:05.2f}".format(sd_snr)))

        figures[c] = plt.figure(c, figsize=(FIG_SIZE_X, FIG_SIZE_Y))

        # Power Plot
        plt.subplot(211)  # 2 rows, 1 column, subplot #1
        plt.plot(timestamp,
                 power,
                 linewidth=PLOT_POWER_LINE_WIDTH,
                 color=PLOT_POWER_COLOR)
        plt.title('Channel ' + str(c), fontsize=PLOT_TITLE_FONT_SIZE)
        plt.ylabel('Power (dBmV)')
        plt.xlabel('datetime')
        plt.grid(True)

        # SNR Plot
        plt.subplot(212)  # 2 rows, 1 column, subplot #2
        plt.plot(timestamp,
                 snr,
                 linewidth=PLOT_SNR_LINE_WIDTH,
                 color=PLOT_SNR_COLOR)
        plt.ylabel('SNR (dB)')
        plt.xlabel('datetime')
        plt.grid(True)

        # Limit SNR Y axis based on minimum value for better visualization
        axes = plt.gca()
        axes.set_ylim([SNR_MIN_Y_VALUE_1, SNR_MAX_Y_VALUE_1])
        if min(snr) < SNR_MIN_THRESHOLD:
            axes.set_ylim([SNR_MIN_Y_VALUE_2, SNR_MAX_Y_VALUE_2])

    # Serve all plots in the same html page
    html_sum = ''
    for f in figures:
        html = fig_to_html(figures[f])
        html_sum = html_sum + html

    print('')
    serve(html_sum, open_browser=open_browser, ip=ip)
Ejemplo n.º 4
0
names = []
for i in smallest.index:
    names.append(unicode(i, 'utf-8'))
ind = np.arange(len(names))

plt.xticks(ind, names)
plt.title("Bairros com maior insatisfacao")
plt.bar(ind, smallest, 1, color='rgb')

##############################################################
imgs.append(plt.figure())

biggest = mean.nlargest(5)
names = []
for i in biggest.index:
    names.append(unicode(i, 'utf-8'))
ind = np.arange(len(names))
plt.xticks(ind, names)
plt.title("Bairros com mais satisfacao")
plt.bar(ind, biggest, 1, color='rgb')
# plt.show()


html = ''
for img in imgs:
    html += mpld3.fig_to_html(img)

from mpld3._server import serve
serve(html, port=9000, ip='0.0.0.0')
Ejemplo n.º 5
0
    def show(self, ):

        self.__end_html__()
        serve(self.html)
Ejemplo n.º 6
0
        fig_amps_errs = _gen_amps_errs_plot(amps_orig, singles_or, amps_extr,
                                            singles_det, amps_errs_bins,
                                            amps_errs_range)

        fig_times_errs = _gen_times_errs_plot(pos_orig, singles_or, pos_extr,
                                              singles_det, times_errs_bins,
                                              times_errs_range)

        md = markdown(OUT_TEMPLATE)
        html = Environment(loader=BaseLoader()).from_string(md).render(
            method=metrics_all['method'],
            total=len(amps_orig),
            extr_all=len(amps_extr),
            extr_all_perc=len(amps_extr) / len(amps_orig) * 100,
            extr_fpos=len(false_pos),
            extr_fpos_perc=len(false_pos) / len(amps_extr) * 100,
            extr_ok=len(singles_or),
            extr_ok_perc=len(singles_or) / len(amps_extr) * 100,
            extr_ovlpd=len(mult_or),
            extr_ovlpd_perc=len(mult_or) / len(amps_extr) * 100,
            hist_plot=mpld3.fig_to_html(fig_hist),
            amp_errs_plot=mpld3.fig_to_html(fig_amps_errs),
            times_errs_plot=mpld3.fig_to_html(fig_times_errs))

        fig_hist.show()
        fig_amps_errs.show()
        fig_times_errs.show()
        plt.show()

        serve(html)