Exemple #1
0
    def generate_bokeh_umap(self, media_type):
        output_notebook()

        topics = []
        labels = []
        for key, value in self.top_words_map.items():
            topics.append(value)
            labels.append(key)

        if len(labels) >= 20000:
            reducer = umap.UMAP(n_neighbors=100, metric='hellinger')
        if len(labels) >= 5000:
            reducer = umap.UMAP(n_neighbors=50, metric='hellinger')
        else:
            reducer = umap.UMAP(metric='hellinger')

        X = self.vectorized_out.copy()
        X_embedded = reducer.fit_transform(X)

        # tsne = TSNE(verbose=1, perplexity=100, random_state=42)
        # X = self.vectorized_out
        # X_embedded = tsne.fit_transform(X.toarray())

        df_tmp = pd.DataFrame(self.doc_topic_dists)
        df_tmp['topic'] = df_tmp.idxmax(axis=1)
        y_labels = df_tmp['topic'].values
        y_labels_new = []
        for i in y_labels:
            y_labels_new.append(labels[i])

        df = self.original_df.copy()

        # data sources
        if media_type == 'videos':
            source = ColumnDataSource(
                data=dict(x=X_embedded[:, 0],
                          y=X_embedded[:, 1],
                          x_backup=X_embedded[:, 0],
                          y_backup=X_embedded[:, 1],
                          desc=y_labels,
                          ids=df['id'],
                          titles=df['title'],
                          published_times=df['first_airing'],
                          text=df['text'],
                          publication_end_times=df['publication_end_time'],
                          media_availables=df['media_available'],
                          duration_minutes=df['duration_minutes'],
                          finnpanel_genres=df['finnpanel_genre'],
                          labels=["Topic " + str(x) for x in y_labels_new],
                          links=df['link']))

            # hover over information
            hover = HoverTool(
                tooltips=[
                    ("Id", "@ids{safe}"),
                    ("Title", "@titles{safe}"),
                    ("Published", "@published_times{safe}"),
                    # ("Text", "@texts{safe}"),
                    ("Publication ends", "@publication_end_times{safe}"),
                    ("Currently available", "@media_availables{safe}"),
                    ("Duration (minutes)", "@duration_minutes{safe}"),
                    ("Finnpanel genres", "@finnpanel_genres{safe}"),
                    ("Link", "@links")
                ],
                point_policy="follow_mouse")

        elif media_type == 'articles':
            source = ColumnDataSource(
                data=dict(x=X_embedded[:, 0],
                          y=X_embedded[:, 1],
                          x_backup=X_embedded[:, 0],
                          y_backup=X_embedded[:, 1],
                          desc=y_labels,
                          ids=df.index,
                          titles=df['title'],
                          published_times=df['published_time'].dt.strftime(
                              '%Y-%m-%d %H:%M'),
                          text=df['text'],
                          labels=["Topic " + str(x) for x in y_labels_new],
                          links=df['link']))

            # hover over information
            hover = HoverTool(
                tooltips=[
                    ("Id", "@ids{safe}"),
                    ("Title", "@titles{safe}"),
                    ("Published", "@published_times{safe}"),
                    # ("Text", "@texts{safe}"),
                    ("Link", "@links")
                ],
                point_policy="follow_mouse")

        # map colors
        mapper = linear_cmap(field_name='desc',
                             palette=Category20[20],
                             low=min(y_labels),
                             high=max(y_labels))

        # prepare the figure
        plot = figure(plot_width=1200,
                      plot_height=850,
                      tools=[
                          hover, 'pan', 'wheel_zoom', 'box_zoom', 'reset',
                          'save', 'tap'
                      ],
                      title="Clustering of the content with UMAP and NMF",
                      toolbar_location="above")

        # plot settings
        plot.scatter('x',
                     'y',
                     size=5,
                     source=source,
                     fill_color=mapper,
                     line_alpha=0.3,
                     line_color="black",
                     legend='labels')
        plot.legend.background_fill_alpha = 0.6

        # Keywords
        text_banner = Paragraph(
            text='Keywords: Slide to specific cluster to see the keywords.',
            height=45)
        input_callback_1 = input_callback(plot, source, text_banner, topics,
                                          self.nr_of_topics)

        # currently selected article
        div_curr = Div(
            text="""Click on a plot to see the link to the article.""",
            height=150)
        if media_type == 'videos':
            callback_selected = CustomJS(args=dict(source=source,
                                                   current_selection=div_curr),
                                         code=selected_code_videos())
        elif media_type == 'articles':
            callback_selected = CustomJS(args=dict(source=source,
                                                   current_selection=div_curr),
                                         code=selected_code_articles())
        taptool = plot.select(type=TapTool)
        taptool.callback = callback_selected

        # WIDGETS
        slider = Slider(
            start=0,
            end=self.nr_of_topics,
            value=self.nr_of_topics,
            step=1,
            title="Topic #")  #, js_event_callbacks=input_callback_1)
        slider.js_on_change("value", input_callback_1)
        keyword = TextInput(
            title="Search:")  #, js_event_callbacks=input_callback_1)
        keyword.js_on_change("value", input_callback_1)

        # pass call back arguments
        input_callback_1.args["text"] = keyword
        input_callback_1.args["slider"] = slider

        # STYLE
        slider.sizing_mode = "stretch_width"
        slider.margin = 15

        keyword.sizing_mode = "scale_both"
        keyword.margin = 15

        div_curr.style = {
            'color': '#BF0A30',
            'font-family': 'Helvetica Neue, Helvetica, Arial, sans-serif;',
            'font-size': '1.1em'
        }
        div_curr.sizing_mode = "scale_both"
        div_curr.margin = 20

        text_banner.style = {
            'color': '#0269A4',
            'font-family': 'Helvetica Neue, Helvetica, Arial, sans-serif;',
            'font-size': '1.1em'
        }
        text_banner.sizing_mode = "scale_both"
        text_banner.margin = 20

        plot.sizing_mode = "scale_both"
        plot.margin = 5

        r = row(div_curr, text_banner)
        r.sizing_mode = "stretch_width"

        # LAYOUT OF THE PAGE
        l = layout([
            [slider, keyword],
            [text_banner],
            [div_curr],
            [plot],
        ])
        l.sizing_mode = "scale_both"

        # show
        output_file('t-sne_interactive_streamlit.html')
        show(l)

        return (l)
Exemple #2
0
taptool.callback = callback_selected

# WIDGETS
slider = Slider(start=0, end=20, value=20, step=1, title="Cluster #", callback=input_callback_1)
keyword = TextInput(title="Search:", callback=input_callback_1)

# pass call back arguments
input_callback_1.args["text"] = keyword
input_callback_1.args["slider"] = slider


# STYLE
slider.sizing_mode = "stretch_width"
slider.margin=15

keyword.sizing_mode = "scale_both"
keyword.margin=15

div_curr.style={'color': '#BF0A30', 'font-family': 'Helvetica Neue, Helvetica, Arial, sans-serif;', 'font-size': '1.1em'}
div_curr.sizing_mode = "scale_both"
div_curr.margin = 20

text_banner.style={'color': '#0269A4', 'font-family': 'Helvetica Neue, Helvetica, Arial, sans-serif;', 'font-size': '1.1em'}
text_banner.sizing_mode = "scale_both"
text_banner.margin = 20

plot.sizing_mode = "scale_both"
plot.margin = 5

r = row(div_curr,text_banner)
r.sizing_mode = "stretch_width"