Beispiel #1
0
class jitter:

    #https://bokeh.pydata.org/en/latest/docs/gallery/jitter.html
    colors = [
        "red", "olive", "darkred", "goldenrod", "skyblue", "orange", "salmon"
    ]

    p1 = figure(plot_width=600,
                plot_height=300,
                title="Years vs mpg without jittering")
    p2 = figure(plot_width=600,
                plot_height=300,
                title="Years vs mpg with jittering")

    for i, year in enumerate(list(df.yr.unique())):
        y = df[df['yr'] == year]['mpg']
        color = colors[i % len(colors)]

        p1.circle(x=year, y=y, color=color)
        p2.circle(x={
            'value': year,
            'transform': Jitter(width=1)
        },
                  y=y,
                  color=color)

    output_file("jitter.html")

    show(column(p1, p2))
Beispiel #2
0
import numpy as np

from bokeh.models import Jitter
from bokeh.plotting import figure, show, output_file

p = figure(plot_width=500, plot_height=400, x_range=(0,3), y_range=(0,10),
           title="Demonstration of Jitter transform")

y1 = np.random.random(2500) * 10
y2 = np.random.normal(size=2500)*2 + 5

p.circle(x={'value': 1, 'transform': Jitter(width=0.4)}, y=y1,
         color="navy", alpha=0.3)

p.circle(x={'value': 2, 'transform': Jitter(width=0.4)}, y=y2,
         color="firebrick", alpha=0.3)

output_file("jitter.html")

show(p)
Beispiel #3
0
def plot_clusters(vectors,
                  clusters,
                  texts,
                  labels,
                  algorithm_name='Clusterisation',
                  plot_path='',
                  plot_name='clustering.html',
                  plot_size=1000,
                  plot_title="Embeddings clusters"):
    """
    Plots clusters of embeddings .
    Parameters
    ----------
    vectors : list
              A list of embeddings
    clusters : list
               A list of true clusters
    texts : list
            A list of texts corresponding to the embeddings
    clusters : list
               A list of predicted clusters
    algorithm : string, optional
                A name of clustering algorithm
    plot_path : int, optional
                A path to output plot
    plot_name : int, optional
                A name of output plot
    plot_size : int, optional
                A size of output plot
    plot_title : string, optional
                 A title of output plot
    """

    colors = []
    unique_labels = set(labels)
    if len(unique_labels) > 100:
        pal = Turbo256
    else:
        pal = Category20[20]
    if len(unique_labels) <= len(pal):
        palette = pal
    else:
        palette = [
            pal[each] for each in np.linspace(
                0, 19, num=len(unique_labels), dtype=np.int)
        ]

    #assign colors from palette
    for cl in labels:
        colors.append(palette[cl])

    source = ColumnDataSource(data=dict(x=vectors[:, 0],
                                        y=vectors[:, 1],
                                        colors=colors,
                                        texts=texts,
                                        clusters=clusters))

    TOOLTIPS = [
        ("index", "$index"),
        ("(x,y)", "($x, $y)"),
        ("cluster", "@clusters"),
        ("text", "@texts"),
    ]

    p = figure(output_backend="webgl",
               title=algorithm_name,
               plot_width=plot_size,
               plot_height=plot_size,
               tooltips=TOOLTIPS)

    p.scatter(x={
        'field': 'x',
        'transform': Jitter(width=0.4)
    },
              y={
                  'field': 'y',
                  'transform': Jitter(width=0.4)
              },
              color='colors',
              alpha=0.5,
              source=source)

    output_file(plot_path + plot_name, title=plot_title)

    show(p)
Beispiel #4
0
import numpy as np

from bokeh.models import Button, Column, ColumnDataSource, CustomJS, Jitter, LabelSet
from bokeh.plotting import figure, output_file, show

N = 1000

source = ColumnDataSource(data=dict(
    x=np.ones(N), xn=2*np.ones(N), xu=3*np.ones(N), y=np.random.random(N)*10
))

normal = Jitter(width=0.2, distribution="normal")
uniform = Jitter(width=0.2, distribution="uniform")

p = figure(x_range=(0, 4), y_range=(0,10), toolbar_location=None, x_axis_location="above")
p.circle(x='x',  y='y', color='firebrick', source=source, size=5, alpha=0.5)
p.circle(x='xn', y='y', color='olive',     source=source, size=5, alpha=0.5)
p.circle(x='xu', y='y', color='navy',      source=source, size=5, alpha=0.5)

label_data = ColumnDataSource(data=dict(
    x=[1,2,3], y=[0, 0, 0], t=['Original', 'Normal', 'Uniform']
))
label_set = LabelSet(x='x', y='y', text='t', y_offset=-4, source=label_data, render_mode='css',
                     text_baseline="top", text_align='center')
p.add_layout(label_set)

callback = CustomJS(args=dict(source=source, normal=normal, uniform=uniform), code="""
    const data = source.data;
    for (let i = 0; i < data.y.length; i++) {
        data.xn[i] = normal.compute(data.x[i] + 1);
    }
Beispiel #5
0
from bokeh.models import Jitter
from bokeh.layouts import column
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.autompg import autompg as df


colors = ["red", "olive", "darkred", "goldenrod", "skyblue", "orange", "salmon"]

p1 = figure(plot_width=600, plot_height=300, title="Years vs mpg without jittering")
p2 = figure(plot_width=600, plot_height=300, title="Years vs mpg with jittering")

for i, year in enumerate(list(df.yr.unique())):
    y = df[df['yr'] == year]['mpg']
    color = colors[i % len(colors)]

    p1.circle(x=year, y=y, color=color)
    p2.circle(x={'value': year, 'transform': Jitter(width=1)}, y=y, color=color)

output_file("jitter.html")

show(column(p1, p2))
Beispiel #6
0
                (df_6['Dept'] == 'Engineering') |
                (df_6['Dept'] == 'Animal Control')]

df_6.reset_index(drop=True, inplace=True)

colors = brewer['Set1'][7]
p6 = figure(y_range=(0, 6200),
            tools=["hover", 'box_zoom', 'reset', 'save'],
            tooltips="@Topic; @Count calls")

for i, d in enumerate(list(df_6['Dept'].unique())):
    y = df_6[df_6['Dept'] == d][['Count', 'Topic']]
    color = colors[i % len(colors)]
    p6.circle(x={
        'value': i,
        'transform': Jitter(width=0.4)
    },
              y='Count',
              source=y,
              color=color,
              size=10,
              alpha=0.75)

sw_trash = df_6[df_6['Topic'] == "Miscellaneous Trash Information"]
p6.diamond(x={
    'value': 1,
    'transform': Jitter(width=0.4)
},
           y=5999,
           source=sw_trash,
           size=24,
def test_Jitter() -> None:
    jitter = Jitter()
    assert jitter.mean == 0
    assert jitter.width == 1
    assert jitter.distribution == "uniform"
    assert jitter.range is None
Beispiel #8
0
def modify_doc(doc):
    SIZES = list(range(6, 22, 3))

    # define available palettes
    palettes = {
        k: v
        for k, v in cc.palette.items()
        if ("_" not in k and k not in ["bkr", "coolwarm", "bjy", "bky", "gwv"])
    }

    #################
    # data handling #
    #################

    def get_data(path, force_discrete_colorable):
        """Read data from csv and transform map coordinates."""
        data = pd.read_csv(path)

        # data from columns in force_discrete_colorable will be treated as discrete even if numeric
        for col in data.columns:
            if col in force_discrete_colorable:
                data[col] = data[col].apply(str)

        data = data.applymap(lambda x: "NaN" if pd.isnull(x) else x)

        # transform coords to map projection
        wgs84 = pyproj.Proj(init="epsg:4326")
        web_mer = pyproj.Proj(init="epsg:3857")
        data["easting"] = "NaN"
        data["northing"] = "NaN"
        data["easting"] = data["easting"].astype("float64")
        data["northing"] = data["northing"].astype("float64")
        data.loc[pd.notnull(data["lon"]), "easting"], data.loc[
            pd.notnull(data["lat"]), "northing"] = zip(*data.loc[
                pd.notnull(data["lon"])
                & pd.notnull(data["lat"])].apply(lambda x: pyproj.transform(
                    wgs84, web_mer, x["lon"], x["lat"]),
                                                 axis=1))

        # show unknown locations on map in antarctic
        default_wgs84 = config.get('default_coords') or {'lon': 0, 'lat': -80}
        default_web_mer = dict(
            zip(("lon", "lat"),
                pyproj.transform(wgs84, web_mer, default_wgs84["lon"],
                                 default_wgs84["lat"])))

        data.easting = data.easting.apply(lambda x: default_web_mer["lon"]
                                          if pd.isnull(x) else x)
        data.northing = data.northing.apply(lambda x: default_web_mer["lat"]
                                            if pd.isnull(x) else x)

        return data

    def update_df(_df, _size, _color, _palette, _continuous,
                  _discrete_sizeable, _discrete_colorable):
        """update the size and color columns of the given df based on widget selections and column classifications"""
        _df["size"] = 9
        if _size != 'None' and _size in _discrete_sizeable:
            values = _df[_size][pd.notnull(_df[_size])].unique()
            if all([val.isnumeric() for val in values]):
                values = sorted(values, key=lambda x: float(x))
            codes = dict(zip(values, range(len(values))))
            groups = [codes[val] for val in _df[_size].values]
            _df["size"] = [SIZES[xx] for xx in groups]
        elif _size != 'None' and _size in _continuous:
            try:
                groups = pd.qcut(_df[_size].values, len(SIZES))
            except ValueError:
                groups = pd.cut(_df[_size].values, len(SIZES))
            _df["size"] = [SIZES[xx] for xx in groups.codes]

        _df["color"] = "#31AADE"
        if _color != 'None' and _color in _discrete_colorable:
            values = _df[_color][pd.notnull(_df[_color])].unique()
            colors = linear_palette(palettes[_palette], len(values))
            if all([val.isnumeric() for val in values]):
                values = sorted(values, key=lambda x: float(x))
            codes = dict(zip(values, range(len(values))))
            groups = [codes[val] for val in _df[_color].values]
            _df["color"] = [colors[xx] for xx in groups]
        elif _color != 'None' and _color in _continuous:
            colors = palettes[_palette]
            groups = pd.cut(_df[_color].values, len(colors))
            _df["color"] = [colors[xx] for xx in groups.codes]

    def create_source(_df, _size, _color, _palette, _continuous,
                      _discrete_sizeable, _discrete_colorable):
        """Update df and return new ColumnDataSource."""
        update_df(_df, _size, _color, _palette, _continuous,
                  _discrete_sizeable, _discrete_colorable)

        _df["ns"] = _df["northing"]
        _df["es"] = _df["easting"]

        # create a ColumnDataSource from the  data set
        return ColumnDataSource(_df)

    def update_source(_source, _df, _size, _color, _palette, _continuous,
                      _discrete_sizeable, _discrete_colorable):
        """update df and and propagate changes to source"""
        update_df(_df, _size, _color, _palette, _continuous,
                  _discrete_sizeable, _discrete_colorable)

        # create a ColumnDataSource from the  data set
        _source.data.update({"size": _df["size"], "color": _df["color"]})

    #######################
    # Data Visualizations #
    #######################

    def create_crossfilter(_df, _source, _discrete, _x, _y):
        """Return a crossfilter plot linked to ColumnDataSource '_source'."""
        kw = dict()
        if _x in _discrete:
            values = _df[_x][pd.notnull(_df[_x])].unique()
            if all([val.isnumeric() for val in values]):
                kw["x_range"] = sorted(values, key=lambda x: float(x))
            else:
                kw["x_range"] = sorted(values)
        if _y in _discrete:
            values = _df[_y][pd.notnull(_df[_y])].unique()
            if all([val.isnumeric() for val in values]):
                kw["y_range"] = sorted(values, key=lambda x: float(x))
            else:
                kw["y_range"] = sorted(values)

        x_title = _x.title()
        y_title = _y.title()

        p = figure(
            plot_height=700,
            plot_width=700,  # responsive=True,
            tools="wheel_zoom, pan, save, reset, box_select, tap",
            active_drag="box_select",
            active_scroll="wheel_zoom",
            title="%s vs %s" % (y_title, x_title),
            **kw,
        )

        if _x in _discrete:
            p.xaxis.major_label_orientation = pd.np.pi / 4

        # plot data on crossfilter
        p.circle(
            x=_x,
            y=_y,
            color="color",
            size="size",
            source=_source,
            line_color="white",
            alpha=0.6,
            # set visual properties for selected glyphs
            selection_fill_color="color",
            selection_fill_alpha=0.6,
            selection_line_color="white",
            selection_line_alpha=0.6,

            # set visual properties for non-selected glyphs
            nonselection_fill_color="white",
            nonselection_fill_alpha=0.1,
            nonselection_line_color="color",
            nonselection_line_alpha=0.6,
        )

        return p

    def create_map(_source):
        """Return map linked to ColumnDataSource '_source'."""
        stamen = copy.copy(STAMEN_TERRAIN)
        # create map
        bound = 20000000  # meters
        m = figure(
            plot_height=700,
            plot_width=700,  # responsive=True,
            tools="wheel_zoom, pan, reset, box_select, tap",
            active_drag="box_select",
            active_scroll="wheel_zoom",
            x_range=(-bound, bound),
            y_range=(-bound, bound))
        m.axis.visible = False
        m.add_tile(stamen)

        # plot data on world map
        m.circle(
            x="es",
            y="ns",
            color="color",
            size="size",
            source=_source,
            line_color="white",
            alpha=0.6,
            # set visual properties for selected glyphs
            selection_fill_color="color",
            selection_fill_alpha=0.6,
            selection_line_color="white",
            selection_line_alpha=0.6,

            # set visual properties for non-selected glyphs
            nonselection_fill_color="black",
            nonselection_fill_alpha=0.01,
            nonselection_line_color="color",
            nonselection_line_alpha=0.6,
        )

        return m

    def create_table(_columns, _source):
        """Return table linked to ColumnDataSource '_source'."""
        table_cols = [TableColumn(field=col, title=col) for col in _columns]
        return DataTable(
            source=_source,
            columns=table_cols,
            width=1600,
            height=250,
            fit_columns=False,
        )

    #############
    # callbacks #
    #############

    # noinspection PyUnusedLocal
    def x_change(attr, old, new):
        """Replece crossfilter plot."""
        l.children[0].children[1] = create_crossfilter(df, source, discrete,
                                                       x.value, y.value)

    # noinspection PyUnusedLocal
    def y_change(attr, old, new):
        """Replece crossfilter plot."""
        l.children[0].children[1] = create_crossfilter(df, source, discrete,
                                                       x.value, y.value)

    # noinspection PyUnusedLocal
    def size_change(attr, old, new):
        """Update ColumnDataSource 'source'."""
        update_source(source, df, size.value, color.value, palette.value,
                      continuous, discrete_sizeable, discrete_colorable)

    # noinspection PyUnusedLocal
    def color_change(attr, old, new):
        """Update ColumnDataSource 'source'."""
        update_source(source, df, size.value, color.value, palette.value,
                      continuous, discrete_sizeable, discrete_colorable)

    # noinspection PyUnusedLocal
    def selection_change(attr, old, new):
        """Update ColumnDataSource 'table_source' with selection found in 'source'."""
        selected = source.selected['1d']['indices']
        table_source.data = table_source.from_df(df.iloc[selected, :])

    # noinspection PyUnusedLocal
    def palette_change(attr, old, new):
        """Update ColumnDataSource 'source'."""
        update_source(source, df, size.value, color.value, palette.value,
                      continuous, discrete_sizeable, discrete_colorable)

    ########
    # Main #
    ########

    # get user config and data paths from session arguments
    args = doc.session_context.request.arguments

    # validate config parameter
    if 'c' in args:
        configPath = tornado.escape.url_unescape(args.get('c')[0])
        # check that file name is valid
        cleanName = "".join(
            c for c in configPath
            if c.isalnum() or (c in ".-_"))  # insure filename is safe
        if cleanName != configPath:
            # emit error, load error page: invalid character(s) in config parameter
            message = "Invalid character(s) in config parameter: {}".format(
                configPath)
            log.info(message)
            raise ValueError(message)
        # check that file exists
        elif not os.path.isfile("config/" + configPath):
            # emit error, load error page: no such config file found
            message = "No such config file found: {}".format(configPath)
            log.info(message)
            raise FileNotFoundError(message)
        # valid name and file exists, therefore pass argument
        else:
            configPath = "config/" + configPath
    else:
        configPath = "defaultConfig.toml"

    # load config file
    with open(configPath) as toml_data:
        config = pytoml.load(toml_data)

    # validate data parameter
    if 'd' in args:
        dataPath = tornado.escape.url_unescape(args.get('d')[0])
        # check that file name is valid
        cleanName = "".join(
            c for c in dataPath
            if c.isalnum() or (c in ".-_"))  # insure filename is safe
        if cleanName != dataPath:
            # emit error, load error page: invalid character(s) in data parameter
            message = "Invalid character(s) in data parameter: {}".format(
                dataPath)
            log.info(message)
            raise ValueError(message)
        # check that file exists
        elif not os.path.isfile("data/" + dataPath):
            # emit error, load error page: no such data file found
            message = "No such data file found: {}".format(dataPath)
            log.info(message)
            raise FileNotFoundError(message)
        # valid name and file exists, therefore pass argument
        else:
            dataPath = "data/" + dataPath
    else:
        dataPath = config.get("defaultDataPath")
        if not os.path.isfile(dataPath):
            message = 'defaultDataPath "{}" from config file "{}" does not point to a file'.format(
                dataPath, configPath)
            raise FileNotFoundError(message)

    df = get_data(dataPath, config.get("force_discrete_colorable", []))

    # catigorize columns
    columns = [c for c in df.columns if c not in {"easting", "northing"}]
    discrete = [x for x in columns if df[x].dtype == object]
    continuous = [x for x in columns if x not in discrete]
    discrete_sizeable = [
        x for x in discrete if len(df[x].unique()) <= len(SIZES)
    ]
    discrete_colorable = [
        x for x in discrete
        if (len(df[x].unique()) <= config.get("max_discrete_colors", 256)) or (
            (x in config.get("force_discrete_colorable", [])) and
            (len(df[x].unique()) < 256))
    ]

    # create widgets
    x = Select(title='X-Axis',
               value=(config.get("default_xAxis") if
                      config.get("default_xAxis") in columns else columns[1]),
               options=columns)
    x.on_change('value', x_change)

    y = Select(title='Y-Axis',
               value=(config.get("default_yAxis") if
                      config.get("default_yAxis") in columns else columns[2]),
               options=columns)
    y.on_change('value', y_change)

    sizeOptions = ['None'] + discrete_sizeable + continuous
    size = Select(title='Size',
                  value=config.get("default_sizeBy", "None"),
                  options=sizeOptions)
    size.on_change('value', size_change)

    colorOptions = ['None'] + discrete_colorable + continuous
    color = Select(title='Color',
                   value=config.get("default_colorBy", "None"),
                   options=colorOptions)
    color.on_change('value', color_change)

    palleteOptions = [k for k in palettes.keys()]
    palette = Select(title='Palette',
                     value=config.get("default_palette", "inferno"),
                     options=palleteOptions)
    palette.on_change('value', palette_change)

    ######################
    # initialize sources #
    ######################

    source = create_source(df, size.value, color.value, palette.value,
                           continuous, discrete_sizeable, discrete_colorable)
    source.on_change('selected', selection_change)
    table_source = ColumnDataSource(df)

    ########################
    # javascript callbacks #
    ########################

    download_callback = CustomJS(args=dict(table_source=table_source),
                                 code=r"""
            var data = table_source.data;
            var columns = %s;
            var n = columns.length;
            var m = data[columns[0]].length;

            var csvLines = [];

            var currRow = [];
            for (j=0; j<n; j++) {
                currRow.push("\"" + columns[j].toString() + "\"");
            }

            csvLines.push(currRow.join(","));

            for (i=0; i < m; i++) {
                var currRow = [];
                for (j=0; j<n; j++) {
                    if (typeof(data[columns[j]][i]) == 'string') {
                        currRow.push("\"" + data[columns[j]][i].toString() + "\"");
                    } else {
                        currRow.push(data[columns[j]][i].toString());
                    }
                }
                csvLines.push(currRow.join(","));
            }

            var filetext = csvLines.join("\n");

            var filename = 'data_result.csv';
            var blob = new Blob([filetext], { type: 'text/csv;charset=utf-8;' });

            //addresses IE
            if (navigator.msSaveBlob) {
                navigator.msSaveBlob(blob, filename);
            }

            else {
                var link = document.createElement("a");
                link = document.createElement('a');
                link.href = URL.createObjectURL(blob);
                link.download = filename;
                link.target = "_blank";
                link.style.visibility = 'hidden';
                link.dispatchEvent(new MouseEvent('click'));
            }
        """ % json.dumps(columns))

    jitter_callback = CustomJS(args=dict(source=source, map_jitter=Jitter()),
                               code=r"""
            var data = source.data;
            if (slider.value == 0) {
                for (var i = 0; i < data['easting'].length; i++) {
                    data['es'][i] = data['easting'][i];
                }
                for (var i = 0; i < data['northing'].length; i++) {
                    data['ns'][i] = data['northing'][i];
                }
            }

            else {
                map_jitter.distribution = dist.value
                map_jitter.width = slider.value * 1000
                for (var i = 0; i < data['easting'].length; i++) {
                    data['es'][i] = map_jitter.compute(data['easting'][i]);
                }
                for (var i = 0; i < data['northing'].length; i++) {
                    data['ns'][i] = map_jitter.compute(data['northing'][i]);
                }
            }
            source.trigger('change');
        """)

    download_button = Button(label="Download Selected",
                             button_type="success",
                             callback=download_callback)

    jitter_selector = Select(title="Map Jitter Distribution:",
                             value="uniform",
                             options=["uniform", "normal"],
                             callback=jitter_callback)

    jitter_slider = Slider(start=0,
                           end=1000,
                           value=0,
                           step=10,
                           title="Map Jitter Width (Km):",
                           callback=jitter_callback)

    jitter_callback.args["dist"] = jitter_selector
    jitter_callback.args["slider"] = jitter_slider

    # initialize plots
    crossfilter = create_crossfilter(df, source, discrete, x.value, y.value)
    mapPlot = create_map(source)

    # create layout
    controls = widgetbox([
        x, y, color, palette, size, jitter_selector, jitter_slider,
        download_button
    ],
                         width=200)
    table = widgetbox(create_table(columns, table_source))
    l = layout([[controls, crossfilter, mapPlot], [row(table)]])

    # add layout to document
    doc.add_root(l)
    doc.title = "Crossfilter"

    doc.theme = Theme(json=yaml.load("""
        attrs:
            Figure:
                background_fill_color: '#2F2F2F'
                border_fill_color: '#2F2F2F'
                outline_line_color: '#444444'
            Axis:
                axis_line_color: "white"
                axis_label_text_color: "white"
                major_label_text_color: "white"
                major_tick_line_color: "white"
                minor_tick_line_color: "white"
                minor_tick_line_color: "white"
            Grid:
                grid_line_dash: [6, 4]
                grid_line_alpha: .3
            Title:
                text_color: "white"
    """))