Exemple #1
0
    def generate_bokeh_umap(self, media_type):
        output_notebook()

        topics = []
        labels = []
        for key, value in self.top_words_map.items():
            topics.append(value)
            labels.append(key)

        if len(labels) >= 20000:
            reducer = umap.UMAP(n_neighbors=100, metric='hellinger')
        if len(labels) >= 5000:
            reducer = umap.UMAP(n_neighbors=50, metric='hellinger')
        else:
            reducer = umap.UMAP(metric='hellinger')

        X = self.vectorized_out.copy()
        X_embedded = reducer.fit_transform(X)

        # tsne = TSNE(verbose=1, perplexity=100, random_state=42)
        # X = self.vectorized_out
        # X_embedded = tsne.fit_transform(X.toarray())

        df_tmp = pd.DataFrame(self.doc_topic_dists)
        df_tmp['topic'] = df_tmp.idxmax(axis=1)
        y_labels = df_tmp['topic'].values
        y_labels_new = []
        for i in y_labels:
            y_labels_new.append(labels[i])

        df = self.original_df.copy()

        # data sources
        if media_type == 'videos':
            source = ColumnDataSource(
                data=dict(x=X_embedded[:, 0],
                          y=X_embedded[:, 1],
                          x_backup=X_embedded[:, 0],
                          y_backup=X_embedded[:, 1],
                          desc=y_labels,
                          ids=df['id'],
                          titles=df['title'],
                          published_times=df['first_airing'],
                          text=df['text'],
                          publication_end_times=df['publication_end_time'],
                          media_availables=df['media_available'],
                          duration_minutes=df['duration_minutes'],
                          finnpanel_genres=df['finnpanel_genre'],
                          labels=["Topic " + str(x) for x in y_labels_new],
                          links=df['link']))

            # hover over information
            hover = HoverTool(
                tooltips=[
                    ("Id", "@ids{safe}"),
                    ("Title", "@titles{safe}"),
                    ("Published", "@published_times{safe}"),
                    # ("Text", "@texts{safe}"),
                    ("Publication ends", "@publication_end_times{safe}"),
                    ("Currently available", "@media_availables{safe}"),
                    ("Duration (minutes)", "@duration_minutes{safe}"),
                    ("Finnpanel genres", "@finnpanel_genres{safe}"),
                    ("Link", "@links")
                ],
                point_policy="follow_mouse")

        elif media_type == 'articles':
            source = ColumnDataSource(
                data=dict(x=X_embedded[:, 0],
                          y=X_embedded[:, 1],
                          x_backup=X_embedded[:, 0],
                          y_backup=X_embedded[:, 1],
                          desc=y_labels,
                          ids=df.index,
                          titles=df['title'],
                          published_times=df['published_time'].dt.strftime(
                              '%Y-%m-%d %H:%M'),
                          text=df['text'],
                          labels=["Topic " + str(x) for x in y_labels_new],
                          links=df['link']))

            # hover over information
            hover = HoverTool(
                tooltips=[
                    ("Id", "@ids{safe}"),
                    ("Title", "@titles{safe}"),
                    ("Published", "@published_times{safe}"),
                    # ("Text", "@texts{safe}"),
                    ("Link", "@links")
                ],
                point_policy="follow_mouse")

        # map colors
        mapper = linear_cmap(field_name='desc',
                             palette=Category20[20],
                             low=min(y_labels),
                             high=max(y_labels))

        # prepare the figure
        plot = figure(plot_width=1200,
                      plot_height=850,
                      tools=[
                          hover, 'pan', 'wheel_zoom', 'box_zoom', 'reset',
                          'save', 'tap'
                      ],
                      title="Clustering of the content with UMAP and NMF",
                      toolbar_location="above")

        # plot settings
        plot.scatter('x',
                     'y',
                     size=5,
                     source=source,
                     fill_color=mapper,
                     line_alpha=0.3,
                     line_color="black",
                     legend='labels')
        plot.legend.background_fill_alpha = 0.6

        # Keywords
        text_banner = Paragraph(
            text='Keywords: Slide to specific cluster to see the keywords.',
            height=45)
        input_callback_1 = input_callback(plot, source, text_banner, topics,
                                          self.nr_of_topics)

        # currently selected article
        div_curr = Div(
            text="""Click on a plot to see the link to the article.""",
            height=150)
        if media_type == 'videos':
            callback_selected = CustomJS(args=dict(source=source,
                                                   current_selection=div_curr),
                                         code=selected_code_videos())
        elif media_type == 'articles':
            callback_selected = CustomJS(args=dict(source=source,
                                                   current_selection=div_curr),
                                         code=selected_code_articles())
        taptool = plot.select(type=TapTool)
        taptool.callback = callback_selected

        # WIDGETS
        slider = Slider(
            start=0,
            end=self.nr_of_topics,
            value=self.nr_of_topics,
            step=1,
            title="Topic #")  #, js_event_callbacks=input_callback_1)
        slider.js_on_change("value", input_callback_1)
        keyword = TextInput(
            title="Search:")  #, js_event_callbacks=input_callback_1)
        keyword.js_on_change("value", input_callback_1)

        # pass call back arguments
        input_callback_1.args["text"] = keyword
        input_callback_1.args["slider"] = slider

        # STYLE
        slider.sizing_mode = "stretch_width"
        slider.margin = 15

        keyword.sizing_mode = "scale_both"
        keyword.margin = 15

        div_curr.style = {
            'color': '#BF0A30',
            'font-family': 'Helvetica Neue, Helvetica, Arial, sans-serif;',
            'font-size': '1.1em'
        }
        div_curr.sizing_mode = "scale_both"
        div_curr.margin = 20

        text_banner.style = {
            'color': '#0269A4',
            'font-family': 'Helvetica Neue, Helvetica, Arial, sans-serif;',
            'font-size': '1.1em'
        }
        text_banner.sizing_mode = "scale_both"
        text_banner.margin = 20

        plot.sizing_mode = "scale_both"
        plot.margin = 5

        r = row(div_curr, text_banner)
        r.sizing_mode = "stretch_width"

        # LAYOUT OF THE PAGE
        l = layout([
            [slider, keyword],
            [text_banner],
            [div_curr],
            [plot],
        ])
        l.sizing_mode = "scale_both"

        # show
        output_file('t-sne_interactive_streamlit.html')
        show(l)

        return (l)
Exemple #2
0
def show_camera(content,
                geom,
                pad_width,
                pad_height,
                label,
                titles=None,
                showlog=True,
                display_range=None,
                content_lowlim=None,
                content_upplim=None):
    """

    Parameters
    ----------
    content: pixel-wise quantity to be plotted, ndarray with shape (N,
    number_of_pixels) where N is the number of different sets of pixel
    values, for example N different data runs or whatever. The shape can also
    be just (number_of_pixels), in case a single camera display is to be shown

    geom: camera geometry
    pad_width: width in pixels of each of the 3 pads in the plot
    pad_height: height in pixels of each of the 3 pads in the plot
    label: string to label the quantity which is displayed, the same for the N
    sets of pixels inside "content"
    titles: list of N strings, with the title specific to each of the sets
    of pixel values to be displayed: for example, indicating run numbers

    content_lowlim: scalar or ndarray of shape(N, number_of_pixels),
    same as content: lowest value of "content" which is considered healthy,
    below which a message will be written out
    content_upplim: highest value considered healthy, same as above
    display_range: range of "content" to be displayed


    Returns
    -------
    [slider, p1, range_slider, p2, p3]: three bokeh figures, intended for
    showing them on the same row, and two sliders, one for the run numbers (
    or whatever "sets" of data we are displaying) and the other for the
    z-range of the plots.
    p1 is the camera display (with "content" in linear & logarithmic scale)
    p2: content vs. pixel
    p3: histogram of content (with one entry per pixel)

    """

    # patch to reduce gaps between bokeh's cam circular pixels:
    camgeom = copy.deepcopy(geom)

    numsets = 1
    if np.ndim(content) > 1:
        numsets = content.shape[0]
    # numsets is the number of different sets of pixel data to be displayed

    allimages = []
    if np.ndim(content) == 1:
        allimages.append(content)
    else:
        for i in range(1, numsets + 1):
            allimages.append(content[i - 1])

    if titles is None:
        titles = [''] * numsets

    # By default we plot the range which contains 99.8 of all events, so that
    # outliers do not prevent us from seing the bulk of the data:
    display_min = np.nanquantile(allimages, 0.001)
    display_max = np.nanquantile(allimages, 0.999)

    if display_range is not None:
        display_min = display_range[0]
        display_max = display_range[1]

    cam = CameraDisplay(camgeom,
                        display_min,
                        display_max,
                        label,
                        titles[0],
                        use_notebook=False,
                        autoshow=False)
    cam.image = allimages[0]
    cam.figure.title.text = titles[0]

    allimageslog = []
    camlog = None
    source1log = None
    color_mapper_log = None
    titlelog = None

    if showlog:
        for image in allimages:
            logcontent = np.copy(image)
            for i, x in enumerate(logcontent):
                # workaround as long as log z-scale is not implemented in bokeh camera:
                if x <= 0:
                    logcontent[i] = np.nan
                else:
                    logcontent[i] = np.log10(image[i])
            allimageslog.append(logcontent)

        camlog = CameraDisplay(camgeom,
                               np.nanquantile(allimageslog, 0.001),
                               np.nanquantile(allimageslog, 0.999),
                               label,
                               titles[0],
                               use_notebook=False,
                               autoshow=False)
        camlog.image = allimageslog[0]
        camlog.figure.title.text = titles[0]
        source1log = camlog.datasource
        color_mapper_log = camlog._color_mapper
        titlelog = camlog.figure.title

    cluster_i = []
    cluster_j = []
    pix_id_in_cluster = []
    for i in camgeom.pix_id:
        data = get_pixel_location(i)
        cluster_i.append(data[0])
        cluster_j.append(data[1])
        pix_id_in_cluster.append(data[2])

    for c in [cam, camlog]:
        if c is None:
            continue
        c.datasource.add(list(c.geom.pix_id), 'pix_id')
        c.datasource.add(cluster_i, 'cluster_i')
        c.datasource.add(cluster_j, 'cluster_j')
        c.datasource.add(pix_id_in_cluster, 'pix_id_in_cluster')

        # c.add_colorbar()
        c.figure.plot_width = pad_width
        c.figure.plot_height = int(pad_height * 0.85)
        c.figure.grid.visible = False
        c.figure.axis.visible = True
        c.figure.xaxis.axis_label = 'X position (m)'
        c.figure.yaxis.axis_label = 'Y position (m)'
        c.figure.add_tools(
            HoverTool(tooltips=[('pix_id', '@pix_id'), ('value', '@image'),
                                ('cluster (i,j)', '(@cluster_i, @cluster_j)'),
                                ('pix # in cluster', '@pix_id_in_cluster')],
                      mode='mouse',
                      point_policy='snap_to_data'))

    tab1 = Panel(child=cam.figure, title='linear')
    if showlog:
        tab2 = Panel(child=camlog.figure, title='logarithmic')
        p1 = Tabs(tabs=[tab1, tab2])
    else:
        p1 = Tabs(tabs=[tab1])
    p1.margin = (0, 0, 0, 25)

    p2 = figure(background_fill_color='#ffffff',
                y_range=(display_min, display_max),
                x_axis_label='Pixel id',
                y_axis_label=label)
    p2.min_border_top = 60
    p2.min_border_bottom = 70

    source2 = ColumnDataSource(
        data=dict(pix_id=cam.geom.pix_id, value=cam.image))
    pixel_data = p2.circle(x='pix_id', y='value', size=2, source=source2)

    if content_lowlim is None:
        content_lowlim = np.nan * np.ones_like(content)
    if content_upplim is None:
        content_upplim = np.nan * np.ones_like(content)

    if np.isscalar(content_lowlim):
        content_lowlim = content_lowlim * np.ones_like(content)
    source2_lowlim = ColumnDataSource(
        data=dict(pix_id=cam.geom.pix_id, value=content_lowlim[0]))
    p2.line(x='pix_id',
            y='value',
            source=source2_lowlim,
            line_dash='dashed',
            color='orange',
            line_width=2)

    if np.isscalar(content_upplim):
        content_upplim = content_upplim * np.ones_like(content)
    source2_upplim = ColumnDataSource(
        data=dict(pix_id=cam.geom.pix_id, value=content_upplim[0]))
    p2.line(x='pix_id',
            y='value',
            source=source2_upplim,
            line_dash='dashed',
            color='red')

    p2.add_tools(
        HoverTool(tooltips=[('(pix_id, value)', '(@pix_id, @value)')],
                  mode='mouse',
                  point_policy='snap_to_data',
                  renderers=[pixel_data]))

    p2.y_range = Range1d(display_min, display_max)

    allhists = []
    alledges = []

    # We define 100 bins between display_min and display_max
    # Note that values beyond that range won't be histogrammed and hence will
    # not appear on the "p3" figure below.
    nbins = 100
    for image in allimages:
        hist, edges = np.histogram(image[~np.isnan(image)],
                                   bins=nbins,
                                   range=(display_min, display_max))
        allhists.append(hist)
        alledges.append(edges)

    source3 = ColumnDataSource(data=dict(top=allhists[0],
                                         bottom=0.7 *
                                         np.ones_like(allhists[0]),
                                         left=alledges[0][:-1],
                                         right=alledges[0][1:]))

    p3 = figure(background_fill_color='#ffffff',
                y_range=(0.7, np.max(allhists) * 1.1),
                x_range=(display_min, display_max),
                x_axis_label=label,
                y_axis_label='Number of pixels',
                y_axis_type='log')
    p3.quad(top='top',
            bottom='bottom',
            left='left',
            right='right',
            source=source3)

    if titles is None:
        titles = [None] * len(allimages)

    cdsdata = dict(z=allimages, hist=allhists, edges=alledges, titles=titles)
    # BEWARE!! these have to be lists of arrays. Not 2D numpy arrays!!
    cdsdata['lowlim'] = [x for x in content_lowlim]
    cdsdata['upplim'] = [x for x in content_upplim]

    if showlog:
        cdsdata['zlog'] = allimageslog

    cds_allimages = ColumnDataSource(data=cdsdata)
    # One has to add here everything that must change when moving the slider:
    callback = CustomJS(args=dict(source1=cam.datasource,
                                  source1log=source1log,
                                  source2=source2,
                                  source2_lowlim=source2_lowlim,
                                  source2_upplim=source2_upplim,
                                  source3=source3,
                                  zz=cds_allimages,
                                  title=cam.figure.title,
                                  titlelog=titlelog,
                                  showlog=showlog),
                        code="""
        var slider_value = cb_obj.value
        var z = zz.data['z']
        varzlow = zz.data['lowlim']
        varzupp = zz.data['upplim']
        var edges = zz.data['edges']
        var hist = zz.data['hist']
        for (var i = 0; i < source1.data['image'].length; i++) {
             source1.data['image'][i] = z[slider_value-1][i]
             if (showlog) {
                 var zlog = zz.data['zlog']
                 source1log.data['image'][i] = zlog[slider_value-1][i]
             }
             source2.data['value'][i] = source1.data['image'][i]
             source2_lowlim.data['value'][i] = varzlow[slider_value-1][i]
             source2_upplim.data['value'][i] = varzupp[slider_value-1][i]
        }
        for (var j = 0; j < source3.data['top'].length; j++) {
            source3.data['top'][j] = hist[slider_value-1][j]
            source3.data['left'][j] = edges[slider_value-1][j]
            source3.data['right'][j] = edges[slider_value-1][j+1]
        }

        title.text = zz.data['titles'][slider_value-1]
        source1.change.emit()
        if (showlog) {
            titlelog.text = title.text
            source1log.change.emit()
        }
        source2.change.emit()
        source2_lowlim.change.emit()
        source2_upplim.change.emit()
        source3.change.emit()
    """)

    slider = None
    if numsets > 1:
        slider_height = 300
        # WARNING: the html won't look nice for number of sets much larger
        # than 300! But in this way we avoid that the slider skips elements:
        if numsets > 299:
            slider_height = numsets + 1
        slider = Slider(start=1,
                        end=numsets,
                        value=1,
                        step=1,
                        title="run",
                        orientation='vertical',
                        show_value=False,
                        height=slider_height)

        slider.margin = (0, 0, 0, 35)
        slider.js_on_change('value', callback)

    callback2 = CustomJS(args=dict(color_mapper=cam._color_mapper,
                                   color_mapper_log=color_mapper_log,
                                   showlog=showlog),
                         code="""
        var range = cb_obj.value
        color_mapper.low = range[0]
        color_mapper.high = range[1]
        color_mapper.change.emit()
        if (showlog) {
            if (range[0] > 0.)
                color_mapper_log.low = Math.log(range[0])/Math.LN10    
            color_mapper_log.high = Math.log(range[1])/Math.LN10
            color_mapper_log.change.emit()
        }
    """)
    step = (display_max - display_min) / 100.
    range_slider = RangeSlider(start=display_min,
                               end=display_max,
                               value=(display_min, display_max),
                               step=step,
                               title="z_range",
                               orientation='vertical',
                               direction='rtl',
                               height=300,
                               show_value=False)
    range_slider.js_on_change('value', callback2)

    return [slider, p1, range_slider, p2, p3]
Exemple #3
0
callback_selected = CustomJS(args=dict(source=source, current_selection=div_curr), code=selected_code())
taptool = plot.select(type=TapTool)
taptool.callback = callback_selected

# WIDGETS
slider = Slider(start=0, end=20, value=20, step=1, title="Cluster #", callback=input_callback_1)
keyword = TextInput(title="Search:", callback=input_callback_1)

# pass call back arguments
input_callback_1.args["text"] = keyword
input_callback_1.args["slider"] = slider


# STYLE
slider.sizing_mode = "stretch_width"
slider.margin=15

keyword.sizing_mode = "scale_both"
keyword.margin=15

div_curr.style={'color': '#BF0A30', 'font-family': 'Helvetica Neue, Helvetica, Arial, sans-serif;', 'font-size': '1.1em'}
div_curr.sizing_mode = "scale_both"
div_curr.margin = 20

text_banner.style={'color': '#0269A4', 'font-family': 'Helvetica Neue, Helvetica, Arial, sans-serif;', 'font-size': '1.1em'}
text_banner.sizing_mode = "scale_both"
text_banner.margin = 20

plot.sizing_mode = "scale_both"
plot.margin = 5