Example #1
0
 def test_basic(self):
     t = bt.factor_cmap("foo", ["red", "green"], ["foo", "bar"], start=1, end=2, nan_color="pink")
     assert isinstance(t, dict)
     assert set(t) == {"field", "transform"}
     assert t['field'] == "foo"
     assert isinstance(t['transform'], CategoricalColorMapper)
     assert t['transform'].palette == ["red", "green"]
     assert t['transform'].factors == ["foo", "bar"]
     assert t['transform'].start == 1
     assert t['transform'].end is 2
     assert t['transform'].nan_color == "pink"
Example #2
0
 def test_defaults(self):
     t = bt.factor_cmap("foo", ["red", "green"], ["foo", "bar"])
     assert isinstance(t, dict)
     assert set(t) == {"field", "transform"}
     assert t['field'] == "foo"
     assert isinstance(t['transform'], CategoricalColorMapper)
     assert t['transform'].palette == ["red", "green"]
     assert t['transform'].factors == ["foo", "bar"]
     assert t['transform'].start == 0
     assert t['transform'].end is None
     assert t['transform'].nan_color == "gray"
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
from bokeh.transform import factor_cmap

output_file("colormapped_bars.html")

fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

source = ColumnDataSource(data=dict(fruits=fruits, counts=counts))

p = figure(x_range=fruits,
           plot_height=250,
           toolbar_location=None,
           title="Fruit Counts")
p.vbar(x='fruits',
       top='counts',
       width=0.9,
       source=source,
       legend="fruits",
       line_color='white',
       fill_color=factor_cmap('fruits', palette=Spectral6, factors=fruits))

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
from bokeh.transform import factor_cmap

output_file("bar_colormapped.html")

fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

source = ColumnDataSource(data=dict(fruits=fruits, counts=counts))

p = figure(x_range=fruits, plot_height=350, toolbar_location=None, title="Fruit Counts")
p.vbar(x='fruits', top='counts', width=0.9, source=source, legend="fruits",
       line_color='white', fill_color=factor_cmap('fruits', palette=Spectral6, factors=fruits))

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)
source = ColumnDataSource(
    data=dict(fruits=mi_df.Ciudad, counts=mi_df.Poblacion))

plot = figure(x_range=mi_df.Ciudad,
              plot_height=350,
              toolbar_location=None,
              tools="crosshair,pan,reset,save,wheel_zoom",
              title="Ciuades")
plot.vbar(x='fruits',
          top='counts',
          width=0.9,
          source=source,
          legend="fruits",
          line_color='white',
          fill_color=factor_cmap('fruits',
                                 palette=Spectral6,
                                 factors=mi_df.Ciudad))

plot.xgrid.grid_line_color = None
plot.y_range.start = 0
plot.y_range.end = 450000
plot.legend.orientation = "horizontal"
plot.legend.location = "top_center"

# Set up widgets
text = TextInput(title="title", value=u'mi gráfica')
v1 = Slider(title="Loja", value=0.0, start=1000.0, end=200000, step=5000.0)
v2 = Slider(title="Quito", value=0.0, start=10000.0, end=300000, step=5000.0)
v3 = Slider(title="Guayaquil", value=0.0, start=1000.0, end=30000, step=5000.0)
v4 = Slider(title="Cuenca", value=0.0, start=1000.0, end=20000, step=5000.0)
Example #6
0
def plot_scatter(label=None,
                 water=None,
                 stakeholders=None,
                 costs=None,
                 potAll=None):

    # needs some heavy refactoring...

    data_path = os.path.join(os.getcwd(), '..', 'input_files', 'input',
                             'measures')

    data_fname = 'stats_measures.csv'

    df = pd.read_csv(os.path.join(data_path, data_fname))

    fill_alpha = 0.7
    line_width = 1

    # Plain adding a column with marker sizes
    marker_size = len(df) * [10]

    df['marker_size'] = marker_size

    # Add the user defined measure, in case
    if not (label is None and water is None and stakeholders is None
            and costs is None and potAll is None):

        colour = 'gold'
        marker = 'hex'
        marker_size = 20

        row = pd.DataFrame([[
            label, water, colour, costs, potAll, stakeholders, marker,
            marker_size
        ]],
                           columns=[
                               'labels', 'dwl_Qref', 'colour', 'cost_sum',
                               'FI', 'nr_stakeholders', 'marker', 'marker_size'
                           ])

        df = df.append(row)

    subplot_width = 275
    subplot_height = subplot_width
    min_border = 0
    delta_offset_left = 50

    categories = df['labels']
    markers = df['marker']
    marker_sizes = df['marker_size']

    pot_ymin = 60
    pot_ymax = 180

    colours = df['colour']

    y = 'nr_stakeholders'

    toolset = ['pan', 'box_zoom', 'wheel_zoom', 'zoom_in', 'zoom_out', 'reset']

    subfig11 = figure(plot_width=subplot_width + delta_offset_left,
                      plot_height=subplot_height,
                      min_border_left=min_border,
                      min_border_bottom=min_border,
                      toolbar_location='above',
                      tools=toolset)
    x = 'dwl_Qref'

    v1 = 'dwl_Qref'
    v2 = 'nr_stakeholders'
    pp = pareto_points(df[[v1, v2]])
    subfig11.line(pp[v1], pp[v2], line_width=20, color='gray', line_alpha=0.25)

    scatter11 = subfig11.scatter(x,
                                 y,
                                 source=df,
                                 size='marker_size',
                                 marker=factor_mark('labels', markers,
                                                    categories),
                                 color=factor_cmap('labels', colours,
                                                   categories),
                                 fill_alpha=fill_alpha,
                                 line_width=line_width)
    subfig11.yaxis.axis_label = 'No. of stakeholders (-)'

    subfig11.add_tools(
        HoverTool(tooltips=[('', '@labels')], renderers=[scatter11]))

    y = 'FI'

    subfig21 = figure(plot_width=subplot_width + delta_offset_left,
                      plot_height=subplot_height,
                      min_border_left=min_border,
                      min_border_bottom=min_border,
                      tools=toolset,
                      toolbar_location=None,
                      x_range=subfig11.x_range)
    x = 'dwl_Qref'

    v1 = 'dwl_Qref'
    v2 = 'FI'
    pp = pareto_points(pd.concat([df[['dwl_Qref']], -df[['FI']]], axis=1))
    subfig21.line(pp[v1],
                  -pp[v2],
                  line_width=20,
                  color='gray',
                  line_alpha=0.25)

    scatter21 = subfig21.scatter(x,
                                 y,
                                 source=df,
                                 size='marker_size',
                                 marker=factor_mark('labels', markers,
                                                    categories),
                                 color=factor_cmap('labels', colours,
                                                   categories),
                                 fill_alpha=fill_alpha,
                                 line_width=line_width)
    subfig21.yaxis.axis_label = 'PotAll (-)'
    subfig21.y_range = Range1d(pot_ymax, pot_ymin)

    subfig21.add_tools(
        HoverTool(tooltips=[('', '@labels')], renderers=[scatter21]))

    subfig22 = figure(plot_width=subplot_width,
                      plot_height=subplot_height,
                      min_border_left=min_border,
                      min_border_bottom=min_border,
                      tools=toolset,
                      toolbar_location=None,
                      y_range=subfig21.y_range)
    x = 'nr_stakeholders'

    v1 = 'nr_stakeholders'
    v2 = 'FI'
    pp = pareto_points(
        pd.concat([df[['nr_stakeholders']], -df[['FI']]], axis=1))
    subfig22.line(pp[v1],
                  -pp[v2],
                  line_width=20,
                  color='gray',
                  line_alpha=0.25)

    scatter22 = subfig22.scatter(x,
                                 y,
                                 source=df,
                                 size='marker_size',
                                 marker=factor_mark('labels', markers,
                                                    categories),
                                 color=factor_cmap('labels', colours,
                                                   categories),
                                 fill_alpha=fill_alpha,
                                 line_width=line_width)
    subfig22.yaxis.major_label_text_font_size = '0pt'

    subfig22.add_tools(
        HoverTool(tooltips=[('', '@labels')], renderers=[scatter22]))

    y = 'cost_sum'

    subfig31 = figure(plot_width=subplot_width + delta_offset_left,
                      plot_height=subplot_height,
                      min_border_left=min_border,
                      min_border_bottom=min_border,
                      tools=toolset,
                      toolbar_location=None,
                      x_range=subfig11.x_range)
    x = 'dwl_Qref'

    v1 = 'dwl_Qref'
    v2 = 'cost_sum'
    pp = pareto_points(df[[v1, v2]])
    subfig31.line(pp[v1], pp[v2], line_width=20, color='gray', line_alpha=0.25)

    scatter31 = subfig31.scatter(x,
                                 y,
                                 source=df,
                                 size='marker_size',
                                 marker=factor_mark('labels', markers,
                                                    categories),
                                 color=factor_cmap('labels', colours,
                                                   categories),
                                 fill_alpha=fill_alpha,
                                 line_width=line_width)
    subfig31.yaxis.axis_label = 'Implementation costs (\u20AC)'
    subfig31.xaxis.axis_label = 'Water level lowering (m)'

    subfig31.add_tools(
        HoverTool(tooltips=[('', '@labels')], renderers=[scatter31]))

    subfig32 = figure(plot_width=subplot_width,
                      plot_height=subplot_height,
                      min_border_left=min_border,
                      min_border_bottom=min_border,
                      tools=toolset,
                      toolbar_location=None,
                      x_range=subfig22.x_range,
                      y_range=subfig31.y_range)
    x = 'nr_stakeholders'

    v1 = 'nr_stakeholders'
    v2 = 'cost_sum'
    subfig32.circle(0,
                    0,
                    line_width=20,
                    fill_color='gray',
                    color='gray',
                    line_alpha=0.25)

    scatter32 = subfig32.scatter(x,
                                 y,
                                 source=df,
                                 size='marker_size',
                                 marker=factor_mark('labels', markers,
                                                    categories),
                                 color=factor_cmap('labels', colours,
                                                   categories),
                                 fill_alpha=fill_alpha,
                                 line_width=line_width)
    subfig32.yaxis.major_label_text_font_size = '0pt'
    subfig32.xaxis.axis_label = 'No. of stakeholders (-)'

    subfig32.add_tools(
        HoverTool(tooltips=[('', '@labels')], renderers=[scatter32]))

    subfig33 = figure(plot_width=subplot_width,
                      plot_height=subplot_height,
                      min_border_left=min_border,
                      min_border_bottom=min_border,
                      tools=toolset,
                      toolbar_location=None,
                      y_range=subfig31.y_range)
    x = 'FI'

    v1 = 'FI'
    v2 = 'cost_sum'
    pp = pareto_points(
        pd.concat([-df[[v1]], df[[v2]]],
                  axis=1))  # pd.concat([df[[v1], -df[[v2]]], axis=1))
    #df[[v1, v2]])
    subfig33.line(-pp[v1],
                  pp[v2],
                  line_width=20,
                  color='gray',
                  line_alpha=0.25)

    scatter33 = subfig33.scatter(x,
                                 y,
                                 source=df,
                                 size='marker_size',
                                 marker=factor_mark('labels', markers,
                                                    categories),
                                 color=factor_cmap('labels', colours,
                                                   categories),
                                 fill_alpha=fill_alpha,
                                 line_width=line_width)
    subfig33.yaxis.major_label_text_font_size = '0pt'
    subfig33.x_range = Range1d(pot_ymax, pot_ymin)
    subfig33.xaxis.axis_label = 'PotAll (-)'

    subfig33.add_tools(
        HoverTool(tooltips=[('', '@labels')], renderers=[scatter33]))

    matrix = gridplot([[subfig11, None, None], [subfig21, subfig22, None],
                       [subfig31, subfig32, subfig33]],
                      toolbar_location='above')

    show(matrix)
Example #7
0
                                end=proteins.iloc[proteins.shape[0] - 1, 2]))

        # Plots by nucleotide letter change.
        if (args.nuc):
            genome_plot.circle(
                x='Position',
                y=jitter('AF', width=2, range=genome_plot.y_range),
                size=15,
                alpha=0.6,
                hover_alpha=1,
                legend='LetterChange',
                line_color='white',
                line_width=2,
                line_alpha=1,
                fill_color=factor_cmap('LetterChange',
                                       palette=color_palette,
                                       factors=merged.LetterChange.unique()),
                hover_color=factor_cmap('LetterChange',
                                        palette=color_palette,
                                        factors=merged.LetterChange.unique()),
                source=depth_sample,
                hover_line_color='white')
        # Plots by amino acid change type.
        else:
            genome_plot.circle(x='Position',
                               y=jitter('AF',
                                        width=2,
                                        range=genome_plot.y_range),
                               size=15,
                               alpha=0.6,
                               hover_alpha=1,
Example #8
0
SPECIES = ['setosa', 'versicolor', 'virginica']
MARKERS = ['hex', 'circle_x', 'triangle']

p = figure(title="Iris Morphology")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Sepal Width'

p.scatter("petal_length",
          "sepal_width",
          source=flowers,
          legend_group="species",
          fill_alpha=0.4,
          size=12,
          marker=factor_mark('species', MARKERS, SPECIES),
          color=factor_cmap('species', 'Category10_3', SPECIES))

p.legend.background_fill_color = "#3f3f3f"

theme = Theme(
    json={
        'attrs': {
            'Figure': {
                'background_fill_color': '#3f3f3f',
                'border_fill_color': '#3f3f3f',
                'outline_line_color': '#444444'
            },
            'Axis': {
                'axis_line_color': "white",
                'axis_label_text_color': "white",
                'major_label_text_color': "white",
Example #9
0
locationHistogramSource = ColumnDataSource(
    data=dict(data1=locationKeys, data2=locationValues))

locationHistogram = figure(x_range=locationKeys,
                           plot_height=350,
                           toolbar_location=None,
                           title="Location")
locationHistogram.vbar(x='data1',
                       top='data2',
                       width=0.9,
                       source=locationHistogramSource,
                       legend="data1",
                       line_color='white',
                       fill_color=factor_cmap('data1',
                                              palette=Spectral6,
                                              factors=locationKeys))

ratio = [
    locationValue / locationTotal
    for locationValue in list(locationRawData.values())
]
percents = [0]
for r in ratio:
    percents = percents + [percents[-1] + r]
starts = [p * 2 * pi for p in percents[:-1]]
ends = [p * 2 * pi for p in percents[1:]]
locationPieChartSource = ColumnDataSource(data=dict(data1=locationKeys,
                                                    data2=[0] * len(starts),
                                                    data3=[0] * len(starts),
                                                    starts=starts,
    "alkaline earth metal" : "#1f78b4",
    "metal"                : "#d93b43",
    "halogen"              : "#999d9a",
    "metalloid"            : "#e08d49",
    "noble gas"            : "#eaeaea",
    "nonmetal"             : "#f1d4Af",
    "transition metal"     : "#599d7A",
}

source = ColumnDataSource(df)

p = figure(plot_width=900, plot_height=500, title="Periodic Table (omitting LA and AC Series)",
           x_range=groups, y_range=list(reversed(periods)), toolbar_location=None, tools="hover")

p.rect("group", "period", 0.95, 0.95, source=source, fill_alpha=0.6, legend="metal",
       color=factor_cmap('metal', palette=list(cmap.values()), factors=list(cmap.keys())))

text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}

x = dodge("group", -0.4, range=p.x_range)

r = p.text(x=x, y="period", text="symbol", **text_props)
r.glyph.text_font_style="bold"

r = p.text(x=x, y=dodge("period", 0.3, range=p.y_range), text="atomic number", **text_props)
r.glyph.text_font_size="8pt"

r = p.text(x=x, y=dodge("period", -0.35, range=p.y_range), text="name", **text_props)
r.glyph.text_font_size="5pt"

r = p.text(x=x, y=dodge("period", -0.2, range=p.y_range), text="atomic mass", **text_props)
plt.ylabel('Author', fontsize=10)
plt.title('Best Author', fontsize=15)
ax = sns.barplot(x=bestauthor['Customers_Rated'],
                 y=bestauthor['Author'],
                 palette='vlag')
for i, (value, name) in enumerate(
        zip(bestauthor['Customers_Rated'], bestauthor['Author'])):
    ax.text(value, i - .05, f'{value:,.0f}', size=8, ha='left', va='center')
ax.set(xlabel='Customers_Rated', ylabel='Author')
plt.show()

# Rating vs Books and Author
# Bokeh Plot
palette = d3['Category20'][20]
index_cmap = factor_cmap('Author',
                         palette=palette,
                         factors=bestbookauthor["Author"])
p = figure(plot_width=700,
           plot_height=700,
           title="Top Authors: Rating vs. Customers Rated")
p.scatter('Rating',
          'Customers_Rated',
          source=bestbookauthor,
          fill_alpha=0.6,
          fill_color=index_cmap,
          size=20,
          legend='Author')
p.xaxis.axis_label = 'RATING'
p.yaxis.axis_label = 'CUSTOMERS RATED'
p.legend.location = 'top_left'
show(p)
                                      z_data_1=z_data_1,
                                      members_1=members_1))

p = figure(x_range=x_data_1,
           plot_height=350,
           toolbar_location=None,
           title="",
           active_drag=None,
           active_scroll=None)
p.vbar(x='x_data_1',
       top='y_data_1',
       width=0.5,
       source=source_1,
       line_color='white',
       fill_color=factor_cmap('x_data_1',
                              palette=palette_1,
                              factors=x_data_1,
                              nan_color="#9E2963"))

p.xgrid.grid_line_color = None
p.tools.append(hover_1)


def update():
    party_1_name = party_1.value
    party_2_name = party_2.value
    parameter_name = parameter_1.value
    average_score = average_scores_1[parameter_1.value]
    max_score = max_scores_1[parameter_1.value]
    min_score = min_scores_1[parameter_1.value]
    if party_1_name == "None" and party_2_name == "None":
        p.yaxis.axis_label = parameter_name
Example #13
0
source = ColumnDataSource(data=dict(ticket=ticket, counts=counts))

p = figure(x_range=ticket,
           plot_height=600,
           plot_width=971,
           toolbar_location=None,
           title="Average Titanic Fare, by Class")
p.vbar(x='ticket',
       top='counts',
       width=0.7,
       source=source,
       legend="ticket",
       line_color='white',
       fill_color=factor_cmap('ticket',
                              palette=['#3a6587', '#aeb3b7', '#aeb3b7'],
                              factors=ticket))

# Removes the chart gridlines (i.e.. removes the chart clutter)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# change just some things about the x-axes
p.xaxis.axis_label = "Class Type"
p.xaxis.axis_line_width = 2
p.xaxis.major_label_text_color = "#aeb3b7"
p.xaxis.axis_line_color = "#aeb3b7"

# change just some things about the y-axes
p.yaxis.axis_label = "Average Fare Price (in Pounds)"
p.yaxis.axis_line_width = 2
Example #14
0
def plot_hail_file_metadata(
        t_path: str) -> Optional[Union[Grid, Tabs, bokeh.plotting.Figure]]:
    """
    Takes path to hail Table or MatrixTable (gs://bucket/path/hail.mt), outputs Grid or Tabs, respectively
    Or if an unordered Table is provided, a Figure with file sizes is output
    If metadata file or rows directory is missing, returns None
    """
    panel_size = 600
    subpanel_size = 150

    files = hl.hadoop_ls(t_path)
    rows_file = [x['path'] for x in files if x['path'].endswith('rows')]
    entries_file = [x['path'] for x in files if x['path'].endswith('entries')]
    # cols_file = [x['path'] for x in files if x['path'].endswith('cols')]
    success_file = [
        x['modification_time'] for x in files if x['path'].endswith('SUCCESS')
    ]

    data_type = 'Table'

    metadata_file = [
        x['path'] for x in files if x['path'].endswith('metadata.json.gz')
    ]
    if not metadata_file:
        warnings.warn('No metadata file found. Exiting...')
        return None

    with hl.hadoop_open(metadata_file[0], 'rb') as f:
        overall_meta = json.loads(f.read())
        rows_per_partition = overall_meta['components']['partition_counts'][
            'counts']

    if not rows_file:
        warnings.warn('No rows directory found. Exiting...')
        return None
    rows_files = hl.hadoop_ls(rows_file[0])

    if entries_file:
        data_type = 'MatrixTable'
        rows_file = [
            x['path'] for x in rows_files if x['path'].endswith('rows')
        ]
        rows_files = hl.hadoop_ls(rows_file[0])
    row_partition_bounds, row_file_sizes = get_rows_data(rows_files)

    total_file_size, row_file_sizes, row_scale = scale_file_sizes(
        row_file_sizes)

    if not row_partition_bounds:
        warnings.warn('Table is not partitioned. Only plotting file sizes')
        row_file_sizes_hist, row_file_sizes_edges = np.histogram(
            row_file_sizes, bins=50)
        p_file_size = figure(plot_width=panel_size, plot_height=panel_size)
        p_file_size.quad(right=row_file_sizes_hist,
                         left=0,
                         bottom=row_file_sizes_edges[:-1],
                         top=row_file_sizes_edges[1:],
                         fill_color="#036564",
                         line_color="#033649")
        p_file_size.yaxis.axis_label = f'File size ({row_scale}B)'
        return p_file_size

    all_data = {
        'partition_widths':
        [-1 if x[0] != x[2] else x[3] - x[1] for x in row_partition_bounds],
        'partition_bounds':
        [f'{x[0]}:{x[1]}-{x[2]}:{x[3]}' for x in row_partition_bounds],
        'spans_chromosome': [
            'Spans chromosomes' if x[0] != x[2] else 'Within chromosome'
            for x in row_partition_bounds
        ],
        'row_file_sizes':
        row_file_sizes,
        'row_file_sizes_human':
        [f'{x:.1f} {row_scale}B' for x in row_file_sizes],
        'rows_per_partition':
        rows_per_partition,
        'index':
        list(range(len(rows_per_partition)))
    }

    if entries_file:
        entries_rows_files = hl.hadoop_ls(entries_file[0])
        entries_rows_file = [
            x['path'] for x in entries_rows_files if x['path'].endswith('rows')
        ]
        if entries_rows_file:
            entries_files = hl.hadoop_ls(entries_rows_file[0])
            entry_partition_bounds, entry_file_sizes = get_rows_data(
                entries_files)
            total_entry_file_size, entry_file_sizes, entry_scale = scale_file_sizes(
                entry_file_sizes)
            all_data['entry_file_sizes'] = entry_file_sizes
            all_data['entry_file_sizes_human'] = [
                f'{x:.1f} {entry_scale}B' for x in row_file_sizes
            ]

    title = f'{data_type}: {t_path}'

    msg = f"Rows: {sum(all_data['rows_per_partition']):,}<br/>Partitions: {len(all_data['rows_per_partition']):,}<br/>Size: {total_file_size}<br/>"
    if success_file[0]:
        msg += success_file[0]

    source = ColumnDataSource(pd.DataFrame(all_data))
    p = figure(tools=TOOLS, plot_width=panel_size, plot_height=panel_size)
    p.title.text = title
    p.xaxis.axis_label = 'Number of rows'
    p.yaxis.axis_label = f'File size ({row_scale}B)'
    color_map = factor_cmap('spans_chromosome',
                            palette=Spectral8,
                            factors=list(set(all_data['spans_chromosome'])))
    p.scatter('rows_per_partition',
              'row_file_sizes',
              color=color_map,
              legend='spans_chromosome',
              source=source)
    p.legend.location = 'bottom_right'
    p.select_one(HoverTool).tooltips = [
        (x, f'@{x}') for x in ('rows_per_partition', 'row_file_sizes_human',
                               'partition_bounds', 'index')
    ]

    p_stats = Div(text=msg)
    p_rows_per_partition = figure(x_range=p.x_range,
                                  plot_width=panel_size,
                                  plot_height=subpanel_size)
    p_file_size = figure(y_range=p.y_range,
                         plot_width=subpanel_size,
                         plot_height=panel_size)

    rows_per_partition_hist, rows_per_partition_edges = np.histogram(
        all_data['rows_per_partition'], bins=50)
    p_rows_per_partition.quad(top=rows_per_partition_hist,
                              bottom=0,
                              left=rows_per_partition_edges[:-1],
                              right=rows_per_partition_edges[1:],
                              fill_color="#036564",
                              line_color="#033649")
    row_file_sizes_hist, row_file_sizes_edges = np.histogram(
        all_data['row_file_sizes'], bins=50)
    p_file_size.quad(right=row_file_sizes_hist,
                     left=0,
                     bottom=row_file_sizes_edges[:-1],
                     top=row_file_sizes_edges[1:],
                     fill_color="#036564",
                     line_color="#033649")

    rows_grid = gridplot([[p_rows_per_partition, p_stats], [p, p_file_size]])

    if 'entry_file_sizes' in all_data:
        title = f'Statistics for {data_type}: {t_path}'

        msg = f"Rows: {sum(all_data['rows_per_partition']):,}<br/>Partitions: {len(all_data['rows_per_partition']):,}<br/>Size: {total_entry_file_size}<br/>"
        if success_file[0]:
            msg += success_file[0]

        source = ColumnDataSource(pd.DataFrame(all_data))
        panel_size = 600
        subpanel_size = 150
        p = figure(tools=TOOLS, plot_width=panel_size, plot_height=panel_size)
        p.title.text = title
        p.xaxis.axis_label = 'Number of rows'
        p.yaxis.axis_label = f'File size ({entry_scale}B)'
        color_map = factor_cmap('spans_chromosome',
                                palette=Spectral8,
                                factors=list(set(
                                    all_data['spans_chromosome'])))
        p.scatter('rows_per_partition',
                  'entry_file_sizes',
                  color=color_map,
                  legend='spans_chromosome',
                  source=source)
        p.legend.location = 'bottom_right'
        p.select_one(HoverTool).tooltips = [
            (x, f'@{x}')
            for x in ('rows_per_partition', 'entry_file_sizes_human',
                      'partition_bounds', 'index')
        ]

        p_stats = Div(text=msg)
        p_rows_per_partition = figure(x_range=p.x_range,
                                      plot_width=panel_size,
                                      plot_height=subpanel_size)
        p_rows_per_partition.quad(top=rows_per_partition_hist,
                                  bottom=0,
                                  left=rows_per_partition_edges[:-1],
                                  right=rows_per_partition_edges[1:],
                                  fill_color="#036564",
                                  line_color="#033649")
        p_file_size = figure(y_range=p.y_range,
                             plot_width=subpanel_size,
                             plot_height=panel_size)

        row_file_sizes_hist, row_file_sizes_edges = np.histogram(
            all_data['entry_file_sizes'], bins=50)
        p_file_size.quad(right=row_file_sizes_hist,
                         left=0,
                         bottom=row_file_sizes_edges[:-1],
                         top=row_file_sizes_edges[1:],
                         fill_color="#036564",
                         line_color="#033649")
        entries_grid = gridplot([[p_rows_per_partition, p_stats],
                                 [p, p_file_size]])

        return Tabs(tabs=[
            Panel(child=entries_grid, title='Entries'),
            Panel(child=rows_grid, title='Rows')
        ])
    else:
        return rows_grid
Example #15
0
def query():
    """Query script entry point."""

    hl.init(default_reference='GRCh38')

    scores = hl.read_table(SCORES)
    tob_wgs = hl.read_matrix_table(TOB_WGS)
    snp_chip_names = scores.s.collect()
    wgs_names = tob_wgs.s.collect()

    def sample_type(sample_name):
        return 'dual_sample' if sample_name in wgs_names else 'snp_chip_only'

    labels = list(map(sample_type, snp_chip_names))

    # get percent variance explained
    eigenvalues = hl.import_table(EIGENVALUES)
    eigenvalues = eigenvalues.to_pandas()
    eigenvalues.columns = ['eigenvalue']
    eigenvalues = pd.to_numeric(eigenvalues.eigenvalue)
    variance = eigenvalues.divide(float(eigenvalues.sum())) * 100
    variance = variance.round(2)

    # Get number of PCs
    number_of_pcs = len(eigenvalues)

    # plot
    cohort_sample_codes = list(set(labels))
    tooltips = [('labels', '@label'), ('samples', '@samples')]
    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        plot = figure(
            title='SNP Chip Samples',
            x_axis_label=f'PC{pc1 + 1} ({variance[pc1]})%)',
            y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)',
            tooltips=tooltips,
        )
        source = ColumnDataSource(
            dict(
                x=scores.scores[pc1].collect(),
                y=scores.scores[pc2].collect(),
                label=labels,
                samples=snp_chip_names,
            ))
        plot.circle(
            'x',
            'y',
            alpha=0.5,
            source=source,
            size=8,
            color=factor_cmap('label', ['#1b9e77', '#d95f02'],
                              cohort_sample_codes),
            legend_group='label',
        )
        plot.add_layout(plot.legend[0], 'left')
        plot_filename = output_path(f'pc{pc2}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'pc{pc2}.html', 'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)
Example #16
0
TOOLTIPS_SCATTER = [
    ("(Fare,AGE)", "$x, $y"),
]


# Set the Title
p = figure(title = "Titanic Passenger Age & Fare by Survial Type",
    tooltips=TOOLTIPS_SCATTER)



# Construnct the colours
p.scatter("Fare", "Age", source=titanic_df, legend="Survived", fill_alpha=0.3, size=12,
          marker=factor_mark('Survived', MARKERS, FATE),
          color=factor_cmap('Survived', palette=['#3a6587', '#aeb3b7'], factors=FATE))

#Set the axis labels
p.xaxis.axis_label = 'Fare (In Pounds)'
p.yaxis.axis_label = 'Age (In Years)'

# Remove the Grid lines
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# change just some things about the x-axis
p.xaxis.axis_line_width = 2
p.xaxis.major_label_text_color = "black"
p.xaxis.axis_line_color = "#aeb3b7"

# change just some things about the y-axis
output_file("bar_nested_colormapped.html")

fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']

data = {'fruits' : fruits,
        '2015'   : [2, 1, 4, 3, 2, 4],
        '2016'   : [5, 3, 3, 2, 4, 6],
        '2017'   : [3, 2, 4, 4, 5, 3]}

palette = ["#c9d9d3", "#718dbf", "#e84d60"]

# this creates [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ("Pears", "2015), ... ]
x = [ (fruit, year) for fruit in fruits for year in years ]
counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack

source = ColumnDataSource(data=dict(x=x, counts=counts))

p = figure(x_range=FactorRange(*x), plot_height=350, title="Fruit Counts by Year",
           toolbar_location=None, tools="")

p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
       fill_color=factor_cmap('x', palette=palette, factors=years, start=1, end=2))

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

show(p)
Example #18
0
    var val = new Float64Array(xs.length)
    for (var i = 0; i < xs.length; i++) {
        if (source.data['Sex'][i] == 'Male')
            val[i] = -xs[i]
        else
            val[i] = xs[i]
    }
    return val
""")

pyramid = figure(plot_width=600, plot_height=500, toolbar_location=None, y_range=groups,
                 title="Population Breakdown by Age Group and Gender",
                 x_axis_label="Population (Millions)",y_axis_label="Age Group")
pyramid.hbar(y="AgeGrp", height=1, right=transform('Value', gender_transform),
             source=ages, legend="Sex", line_color="white",
             fill_color=factor_cmap('Sex', palette=["#3B8686", "#CFF09E"], factors=["Male", "Female"]))

pyramid.ygrid.grid_line_color = None
pyramid.xaxis[0].formatter = FuncTickFormatter(code="""
    return (Math.abs(tick) / 1e6) + " M"
""")

# line plot of known and predicted population

known = ColumnDataSource(data=dict(x=[], y=[]))
predicted = ColumnDataSource(data=dict(x=[], y=[]))

population = figure(plot_width=600, plot_height=180, toolbar_location=None,
                    title="Total Population by Year",
                    x_axis_label="Year",y_axis_label="Population")
population.line("x", "y", color="violet", line_width=2, source=known, legend="known")
Example #19
0
#tile_provider = get_provider(OSM)
#tile_provider = get_provider(STAMEN_TERRAIN)

#create a map figure
p = figure(x_range=(-220000, -60000),
           y_range=(6675593, 6835593),
           x_axis_type="mercator",
           y_axis_type="mercator",
           plot_width=1400,
           plot_height=800)
#p = figure(x_range=DataRange1d(default_span=20000, min_interval=40000), y_range=DataRange1d(default_span=20000, min_interval=40000), x_axis_type="mercator", y_axis_type="mercator", plot_width=1400, plot_height=800, aspect_ratio=1.75)
p.add_tile(tile_provider)
p.circle(x="x",
         y="y",
         source=latlonsrc,
         color=factor_cmap("verif_status", ["blue", "green", "red", "black"],
                           ["NEW", "True", "False", "None"]))
p.diamond(x="x", y="y", source=receiversrc, color="purple", size=10)
p.toolbar.active_scroll = p.select_one(WheelZoomTool)

t = DataTable(source=latlonsrc,
              columns=[
                  TableColumn(field="rcvtime",
                              title="Time",
                              default_sort="descending"),
                  TableColumn(field="icao", title="ICAO24"),
                  TableColumn(field="lat", title="Lat"),
                  TableColumn(field="lon", title="Lon"),
                  TableColumn(field="verif_status", title="Verified?")
              ],
              width=350,
              height=800)
Example #20
0
from bokeh.io import show
from bokeh.palettes import Spectral11
from bokeh.plotting import figure
from bokeh.transform import factor_cmap

import pandas as pd
odf = pd.read_json(open("outputs/user-year-country-count.json"))
# odf = odf.loc[odf.year != 2017]
# odf['year'] = pd.to_datetime(odf['year'], format="%Y")

# odf.head(50).country_code.unique()
odf2 = odf.loc[odf.year == 2014]
odf2.sort_values("count")

odf3 = odf2.head(10)
# odf3.country_code.unique()

p = figure(x_range=(odf3.country_code.unique()), plot_height=500)

p.vbar(source=odf3,
       x='country_code',
       top='count',
       width=1,
       line_color='white',
       fill_color=factor_cmap('country_code',
                              palette=Spectral11,
                              factors=odf3.country_code.unique()))

show(p)
Example #21
0
from bokeh.plotting import figure, show, output_file
from bokeh.sampledata.iris import flowers
from bokeh.transform import factor_cmap, factor_mark

SPECIES = ['setosa', 'versicolor', 'virginica']
MARKERS = ['hex', 'circle_x', 'triangle']

p = figure(title = "Iris Morphology", background_fill_color="#fafafa")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Sepal Width'

p.scatter("petal_length", "sepal_width", source=flowers, legend="species", fill_alpha=0.4, size=12,
          marker=factor_mark('species', MARKERS, SPECIES),
          color=factor_cmap('species', 'Category10_3', SPECIES))

output_file("marker_map.html")

show(p)
Example #22
0
titanic_df[['Survived']] = titanic_df[['Survived']].replace(0, 'Died')
titanic_df[['Survived']] = titanic_df[['Survived']].replace(1, 'Lived')

# Control the shape the text of the legend
FATE = ['Died', 'Lived']
MARKERS = ['cross', 'circle']

# Set the Title a and size of plot
p = figure(plot_height=600, plot_width= 971,
    title = "Titanic Passenger Age & Fare by Survial Type")

# Construnct the colours
p.scatter("Fare", "Age", source=titanic_df, legend="Survived", fill_alpha=0.3,
            size=12,
            marker=factor_mark('Survived', MARKERS, FATE),
            color=factor_cmap('Survived', ['#3a6587', '#aeb3b7'], FATE))

# Set axis labels
p.xaxis.axis_label = 'Fare (In Pounds)'
p.yaxis.axis_label = 'Age (In Years)'

# Remove the Grid lines
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# change just some things about the x-axis
p.xaxis.axis_line_width = 2
p.xaxis.major_label_text_color = "black"
p.xaxis.axis_line_color = "#aeb3b7"

# change just some things about the y-axis
Example #23
0
def select_points_scatter(data,
                          X='X',
                          Y='Y',
                          hue='hue',
                          factor_type='categorical',
                          group='group',
                          alpha=.6,
                          plot_width=400,
                          plot_height=400,
                          palette=Spectral6,
                          vmin=0,
                          vmax=3):
    '''source:  dataframe with required columns for x and y positions as well as group name and color for each group.'''
    #initialize coloring
    if factor_type == 'categorical':
        color = factor_cmap(hue,
                            palette=palette,
                            factors=list(data[hue].unique()))
    elif factor_type == 'continuous':
        color_mapper = LinearColorMapper(palette=palette, low=vmin, high=vmax)
        color = {'field': hue, 'transform': color_mapper}
    else:
        raise ValueError(
            'factor_type must be \'continuous\' or \'categorical\'')

    #initialize  main plot
    s1 = ColumnDataSource(data=data)

    p1 = figure(plot_width=400,
                plot_height=400,
                tools="pan,wheel_zoom,lasso_select,reset",
                title="Select Here")
    p1.circle(X, Y, source=s1, alpha=alpha, color=color)

    #### initialize selected plot
    s2 = ColumnDataSource(data={X: [], Y: [], group: [], hue: []})
    p2 = figure(plot_width=400,
                plot_height=400,
                tools="",
                title="Watch Here",
                x_range=p1.x_range,
                y_range=p1.y_range)
    p2.circle(X, Y, source=s2, alpha=alpha, color=color)

    #initialize table to show selected points
    columns = [
        TableColumn(field=X, title="X axis"),
        TableColumn(field=Y, title="Y axis"),
        TableColumn(field=group, title=group)
    ]

    table = DataTable(source=s2,
                      columns=columns,
                      width=155,
                      height=plot_height - 20)

    #define callback when points are selected
    s1.selected.js_on_change(
        'indices',
        CustomJS(args=dict(
            s1=s1,
            s2=s2,
            table=table,
            X=X,
            Y=Y,
            hue=hue,
            group=group,
        ),
                 code="""
        var inds = cb_obj.indices;
        var d1 = s1.data;
        var d2 = s2.data;
        d2[X] = []
        d2[Y] = []
        d2[hue] = []
        d2[group] = []
        
        for (var i = 0; i < inds.length; i++) {
            d2[X].push(d1[X][inds[i]])
            d2[Y].push(d1[Y][inds[i]])
            d2[hue].push(d1[hue][inds[i]])
            d2[group].push(d1[group][inds[i]])
        }
        s2.change.emit();
        table.change.emit();
    """))
    savebutton = Button(label="Save", button_type="success", width=155)

    javaScript = """
    function table_to_csv(source) {
        const columns = Object.keys(source.data)
        const nrows = source.get_length()
        const lines = [columns.join(',')]

        for (let i = 0; i < nrows; i++) {
            let row = [];
            for (let j = 0; j < columns.length; j++) {
                const column = columns[j]
                row.push(source.data[column][i].toString())
            }
            lines.push(row.join(','))
        }
        return lines.join('\\n').concat('\\n')
    }


    const filename = 'data_result.csv'
    filetext = table_to_csv(source)
    const blob = new Blob([filetext], { type: 'text/csv;charset=utf-8;' })

    //addresses IE
    if (navigator.msSaveBlob) {
        navigator.msSaveBlob(blob, filename)
    } else {
        const link = document.createElement('a')
        link.href = URL.createObjectURL(blob)
        link.download = filename
        link.target = '_blank'
        link.style.visibility = 'hidden'
        link.dispatchEvent(new MouseEvent('click'))
    }
    """

    savebutton.callback = CustomJS(args=dict(source=s2, index_col=group),
                                   code=javaScript)

    layout = row(p1, p2, column(table, savebutton))

    show(layout)
Example #24
0
def bar_chart(dataframe, groupcol, datacols=None, **kwargs):
    """Create a pie chart from a Pandas DataFrame

    Parameters
    ----------
    dataframe : pandas.DataFrame
        A dataframe of values
    groupcol : str
        The name of the column with the group labels
    datacol : str, sequence (optional)
        The name or list of names of the column containing the data.
        In None, uses all columns except **groupcol**

    Returns
    -------
    plt : obj
        The generated bokeh.figure object
    """

    # Get the groups
    groups = list(dataframe[groupcol])

    # Get the datacols
    if datacols is None:
        datacols = [col for col in list(dataframe.columns) if col != groupcol]

    # Make a dictionary of the groups and data
    data = {'groups': groups}
    for col in datacols:
        data.update({col: list(dataframe[col])})

    # hstack it
    x = [(group, datacol) for group in groups for datacol in datacols]
    counts = sum(zip(*[data[col] for col in datacols]), ())
    colors = max(3, len(datacols))
    source = ColumnDataSource(data=dict(x=x, counts=counts))

    # Make the figure
    hover = HoverTool(tooltips=[('count', '@counts')])
    plt = figure(x_range=FactorRange(*x),
                 plot_height=250,
                 tools=[hover],
                 **kwargs)
    plt.vbar(x='x',
             top='counts',
             width=0.9,
             source=source,
             line_color="white",
             fill_color=factor_cmap('x',
                                    palette=Category20c[colors],
                                    factors=datacols,
                                    start=1,
                                    end=2))

    # Formatting
    plt.y_range.start = 0
    plt.x_range.range_padding = 0.1
    plt.xaxis.major_label_orientation = 1
    plt.xgrid.grid_line_color = None

    return plt
Example #25
0
def custom_reports(report_id):

    if report_id == 'A':

        # result = db_session.execute('''select ga_date,sum(page_views),floor(dbms_random.value(2000, 6000)) as sales
        #                                from ga_sink
        #                                group by ga_date''' ).fetchall()

        result = db_session.execute(
            '''select T1.ga_date,T1.page_views, T2.total_sale
                                       from (select ga_date,sum(page_views) as page_views from ga_sink group by ga_date) T1
                                       join (select sale_date,sum(amount) as total_sale from demo_sales group by sale_date) T2
                                       on T1.ga_date=T2.sale_date''').fetchall(
            )

        # result = db_session.execute('''select T1."date",T1.page_views, T2.total_sale
        #                                from (select "date",sum(page_views) as page_views from test group by "date") T1
        #                                join (select sale_date,sum(amount) as total_sale from demo_sales group by sale_date) T2
        #                                on T1."date"=T2.sale_date''' ).fetchall()
        print(result)

        test = pd.DataFrame(result,
                            columns=['date', 'page_views', 'total_sale'])
        test['date'] = pd.to_datetime(test['date'])
        test.set_index(keys=['date'], inplace=True)
        test.sort_index(inplace=True)

        cds = ColumnDataSource(test)

        p = Figure(plot_width=1000,
                   plot_height=500,
                   title="Sales Vs Views",
                   y_range=Range1d(start=2500, end=33000),
                   x_axis_type='datetime',
                   x_axis_label='Date',
                   y_axis_label='Revenue($)')
        l1 = p.line('date',
                    'page_views',
                    source=cds,
                    line_color=d3['Category10'][10][0],
                    line_width=5,
                    legend="Page Views")
        l2 = p.line('date',
                    'total_sale',
                    source=cds,
                    line_color=d3['Category10'][10][1],
                    line_width=5,
                    legend="Revenue")
        p.extra_y_ranges = {"foo": Range1d(start=0, end=6000)}
        p.add_layout(
            LinearAxis(y_range_name='foo', axis_label="Number of Views"),
            'right')
        p.legend.location = "bottom_right"
        p.background_fill_color = "beige"
        p.background_fill_alpha = 0.5
        p.border_fill_color = "#F8F8FF"

        p.add_tools(
            HoverTool(
                renderers=[l1],
                tooltips=[
                    ('date',
                     '@date{%F}'),  # use @{ } for field names with spaces
                    ('views', '@page_views'),
                ],
                formatters={
                    'date':
                    'datetime',  # use 'datetime' formatter for 'date' field
                    # use default 'numeral' formatter for other fields
                },

                # display a tooltip whenever the cursor is vertically in line with a glyph
                mode='vline'))

        p.add_tools(
            HoverTool(
                renderers=[l2],
                tooltips=[
                    # ( 'date',   '@date{%F}'            ),
                    ('revenue', '$@{total_sale}'
                     ),  # use @{ } for field names with spaces
                ],
                formatters={
                    # 'date'      : 'datetime', # use 'datetime' formatter for 'date' field
                    'revenue':
                    'printf',  # use 'printf' formatter for 'adj close' field
                    # use default 'numeral' formatter for other fields
                },

                # display a tooltip whenever the cursor is vertically in line with a glyph
                mode='vline'))

        return json.dumps(json_item(p))

    if report_id == "B":
        result = db_session.execute(
            '''select product_id,sum(page_views) as views
                                       from ga_sink
                                       group by product_id
                                       order by views desc ''').fetchall()

        # result = db_session.execute('''select product_id,sum(page_views) as views
        #                                from test
        #                                group by product_id
        #                                order by views desc ''' ).fetchall()

        test = pd.DataFrame(result, columns=['product_id', 'page_views'])
        test.set_index(keys=['product_id'], inplace=True)

        cds = ColumnDataSource(test)

        p = Figure(x_range=cds.data['product_id'],
                   plot_height=350,
                   title="Top Products by Views",
                   tools="")

        p.vbar(x='product_id',
               top='page_views',
               source=cds,
               width=0.9,
               fill_color=factor_cmap(field_name='product_id',
                                      palette=d3['Category10'][10],
                                      factors=cds.data['product_id']))
        p.xgrid.grid_line_color = None
        p.y_range.start = 0
        p.background_fill_color = "beige"
        p.background_fill_alpha = 0.5
        p.border_fill_color = "#F8F8FF"

        return json.dumps(json_item(p))
    if report_id == "C":
        # cdata= [{'product_id':'BGB-US-001','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-002','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-003','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-004','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-005','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-006','total_sale': random.randint(1000,8000)},
        #             {'product_id':'BGB-US-007','total_sale': random.randint(1000,8000)}]

        cdata = db_session.execute('''select product_id,sum(amount)
                                     from demo_sales
                                     group by product_id''').fetchall()
        c = pd.DataFrame(cdata, columns=['product_id', 'amount'])
        c.rename(columns={"amount": "total_sale"}, inplace=True)
        print(c)
        c.set_index(keys=['product_id'], inplace=True)
        c['angle'] = c['total_sale'] / c['total_sale'].sum() * 2 * pi
        c['color'] = d3['Category10'][10][len(c) - 1::-1]
        c['percent'] = round(c['total_sale'] / c['total_sale'].sum() * 100, 0)

        cds = ColumnDataSource(c)

        p = Figure(plot_height=350,
                   title="Revenue Breakdown by Product",
                   tools="hover",
                   tooltips="@product_id: @percent %",
                   x_range=(-0.5, 1.0))

        p.wedge(x=0,
                y=1,
                radius=0.4,
                start_angle=cumsum('angle', include_zero=True),
                end_angle=cumsum('angle'),
                line_color="white",
                fill_color='color',
                legend='product_id',
                source=cds)

        p.axis.axis_label = None
        p.axis.visible = False
        p.grid.grid_line_color = None
        p.background_fill_color = "beige"
        p.background_fill_alpha = 0.5
        p.border_fill_color = "#F8F8FF"

        return json.dumps(json_item(p))
Example #26
0
def query(output, pop):  # pylint: disable=too-many-locals
    """Query script entry point."""

    hl.init(default_reference='GRCh38')

    mt = hl.read_matrix_table(HGDP1KG_TOBWGS)
    if pop:
        # Get samples from the specified population only
        mt = mt.filter_cols((
            mt.hgdp_1kg_metadata.population_inference.pop == pop.lower())
                            | (mt.s.contains('TOB')))
    else:
        mt = mt.filter_cols(mt.s.contains('TOB'))

    # Get allele-frequency and loadings for pc_project function
    mt = mt.annotate_rows(af=hl.agg.mean(mt.GT.n_alt_alleles()) / 2)
    loadings = hl.read_table(LOADINGS)
    loadings = loadings.annotate(af=mt.rows()[loadings.key].af)
    reprocessed_samples = hl.read_matrix_table(REPROCESSED_1KG)
    reprocessed_samples = hl.experimental.densify(reprocessed_samples)
    reprocessed_samples = reprocessed_samples.annotate_entries(
        GT=lgt_to_gt(reprocessed_samples.LGT, reprocessed_samples.LA))
    # Project new genotypes onto loadings
    ht = pc_project(reprocessed_samples.GT, loadings.loadings, loadings.af)
    ht = ht.key_by(s=ht.s + '_reprocessed')
    pcs = hl.read_table(SCORES)
    union_scores = ht.union(pcs)
    union_scores = union_scores.annotate(
        original=(union_scores.s == 'HG01513')
        | (union_scores.s == 'HG02238')
        | (union_scores.s == 'NA12248')
        | (union_scores.s == 'NA20502')
        | (union_scores.s == 'NA20826'),
        reprocessed=union_scores.s.contains('reprocessed'),
    )
    expr = (
        hl.case().when(
            (union_scores.original)
            & (
                union_scores.reprocessed  # pylint: disable=singleton-comparison
                == False  # noqa: E712
            ),
            'original',
        ).when(
            (union_scores.original == False)  # pylint: disable=singleton-comparison
            & (union_scores.reprocessed),
            'reprocessed',
        ).default('unedited'))
    union_scores = union_scores.annotate(cohort_sample_codes=expr)
    # get percentage of variance explained
    eigenvalues = hl.import_table(EIGENVALUES)
    eigenvalues = eigenvalues.to_pandas()
    eigenvalues.columns = ['eigenvalue']
    eigenvalues = pd.to_numeric(eigenvalues.eigenvalue)
    variance = eigenvalues.divide(float(eigenvalues.sum())) * 100
    variance = variance.round(2)

    # plot
    labels = union_scores.cohort_sample_codes
    sample_names = union_scores.s
    cohort_sample_codes = list(set(labels.collect()))
    tooltips = [('labels', '@label'), ('samples', '@samples')]
    for i in range(0, 10):
        pc1 = i
        pc2 = i + 1
        plot_filename = (f'{output}/reprocessed_sample_projection_pc' +
                         str(i + 1) + '.png')
        if not hl.hadoop_exists(plot_filename):
            plot = figure(
                title='Reprocessed Sample Projection',
                x_axis_label='PC' + str(pc1 + 1) + ' (' + str(variance[pc1]) +
                '%)',
                y_axis_label='PC' + str(pc2 + 1) + ' (' + str(variance[pc1]) +
                '%)',
                tooltips=tooltips,
            )
            source = ColumnDataSource(
                dict(
                    x=union_scores.scores[pc1].collect(),
                    y=union_scores.scores[pc2].collect(),
                    label=labels.collect(),
                    samples=sample_names.collect(),
                ))
            plot.circle(
                'x',
                'y',
                alpha=0.5,
                source=source,
                size=8,
                color=factor_cmap('label', Dark2[len(cohort_sample_codes)],
                                  cohort_sample_codes),
                legend_group='label',
            )
            plot.add_layout(plot.legend[0], 'left')
            with hl.hadoop_open(plot_filename, 'wb') as f:
                get_screenshot_as_png(plot).save(f, format='PNG')
            plot_filename_html = ('reprocessed_sample_projection_pc' +
                                  str(i + 1) + '.html')
            output_file(plot_filename_html)
            save(plot)
            subprocess.run(['gsutil', 'cp', plot_filename_html, output],
                           check=False)
Example #27
0
def Electron_Energy_Graph(conn):

    output_file(
        "Electron_Energy_Graph2.html"
    )  #????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????

    ############################################################################
    ############################# USER INPUTS ##################################

    # Decide what the default viewing option is going to be. (i.e. the fields to
    # be plotted on the x and y axis when the graph is opened).
    # NB: Have it set that if axis is 'adate' then will automatically update
    # to plot datetime.
    x_data1 = 'adate'
    y_data1 = '6fwhm'
    plot_title1 = 'Electron Energy'
    x_axis_title1 = x_data1
    y_axis_title1 = y_data1
    plot_size_height1 = 450
    plot_size_width1 = 800
    legend_location = 'bottom_left'
    hover_tool_fields = ['comments']
    # Create a list of the plot parameters that will be used as input to a
    # function later.
    list_plot_parameters = [
        x_data1, y_data1, plot_title1, x_axis_title1, y_axis_title1,
        plot_size_height1, plot_size_width1, legend_location
    ]
    # Define the fields that the legend will be based off. If there is only
    # one field then put it in both columns.
    color_column = 'machinename'
    custom_color_boolean = False
    custom_color_palette = []
    marker_column = 'machinename'
    custom_marker_boolean = False
    custom_marker_palette = []
    # From the legend defined above give the values that will be pre-ticked when
    # the plot is opened. NB: Bokeh will throw an error if one of these lists is
    # empty (i.e. =[]) If only using color or marker then set the color_to plot
    # and then enter the command:  marker_to_plot = color_to_plot.
    color_to_plot = ['TrueBeam B', 'TrueBeam C']
    marker_to_plot = ['option1', 'option2', 'option3']
    marker_to_plot = color_to_plot

    ############################################################################
    #################### CREATE THE DATA FOR THE GRAPH #########################

    # Do this in a function so it can be used in an update callback later

    def Create_df():

        # Use the connection passed to the function to read the data into a
        # dataframe via an SQL query.
        df = pd.read_sql('SELECT * FROM [eEnergyICP]', conn)

        # Delete empty rows where the data is very important to have
        df = df.dropna(subset=['protocol id'], how='any')

        # The format is complicated for this field but seems to be that the date is
        # always the first element and the machine is always the last regardless of
        # how many elements there are.
        # Seperate on the first '_'
        df_left = df['protocol id'].str.partition(sep='_')
        # Seperate on the last '_'
        df_right = df['protocol id'].str.rpartition(sep='_')
        # From these sperated dataframes add the appropriate columns back into
        # the main dataframe.
        df.loc[:, 'adate'] = df_left[0]
        df.loc[:, 'machinename'] = df_right[2]
        # Turn 'adate' into datetime. An annoying factor in the database is a
        # few entries with a different datetime format. In combination with the
        # dayfirst=True parameter to override the American date default the
        # to_datetime function seems to solve this. NB: Might be a little slow
        # without feeding it a specific format but unlikely to be an issue given
        # relatively small datasets. Possibly someway to feed multiple formats
        # but currently more effort than it's worth.
        df.loc[:, 'adate'] = pd.to_datetime(df.loc[:, 'adate'], dayfirst=True)

        # Drop any rows that aren't related to the Truebeams (ditches the old
        # uneeded data). Might be possible to put this in the SQL query but
        # difficult as machinename is embedded in the protocol ID.
        df = df[df['machinename'].isin(
            ['TrueBeam B', 'TrueBeam C', 'TrueBeam D', 'TrueBeam F'])]

        # Drop any columns where there is no data (likely because of the
        # dropping of the old linacs (e.g. data that used to be collected from
        # them that is no longer collected for the Truebeams))
        df = df.dropna(axis='columns', how='all')

        return df

    df = Create_df()

    # Create a list of the fields using the dataframe. By doing it now before
    # the extra legend fields are added it's easy to limit what is displayed in
    # the select widgets.
    TableFields = (list(df.columns))

    ############################################################################
    ############################################################################

    ############################################################################
    ################ CREATE THE DATAFRAME FOR THE TOLERANCES ###################

    # If you want to add tolerances change the boolean to True and construct the
    # dataframe in the correct format.
    tolerance_boolean = True
    # The format of the dataframe should be the first line being the x_axis
    # (with some values taken from the main dataframe to get the right
    # formatting). The subsequent columns are the tolerances [low, high].
    # NB: column names should match those from the main dataframe.
    if tolerance_boolean == True:
        df_tol1 = pd.DataFrame({
            'adate': [df['adate'].max(), df['adate'].max()],
            '6fwhm': [6, 10],
            '9fwhm': [9, 12]
        })

        df_tol1 = pd.read_sql('SELECT * FROM [ElectronFWHMLimits]', conn)
        df_tol1 = df_tol1.set_index('class')
        df_tol1 = pd.DataFrame({
            'adate': [df['adate'].max(), df['adate'].max()],
            '6fwhm':
            [df_tol1.loc['TBUCLH', 'lower6'], df_tol1.loc['TBUCLH', 'upper6']],
            '9fwhm':
            [df_tol1.loc['TBUCLH', 'lower9'], df_tol1.loc['TBUCLH', 'upper9']],
            '12fwhm': [
                df_tol1.loc['TBUCLH', 'lower12'], df_tol1.loc['TBUCLH',
                                                              'upper12']
            ],
            '15fwhm': [
                df_tol1.loc['TBUCLH', 'lower15'], df_tol1.loc['TBUCLH',
                                                              'upper15']
            ]
        })

    ############################################################################
    ############################################################################

    ############################################################################
    ############################################################################
    '''

	This is the end of the user input section. If you don't need to make any
	other changes you can end here.

	'''

    ##########################################################################
    ################### CREATE THE COLUMNS FOR THE LEGEND ######################

    (color_list, color_palette, marker_list, marker_palette, df,
     add_legend_to_df) = Create_Legend(df, color_column, custom_color_boolean,
                                       custom_color_palette, marker_column,
                                       custom_marker_boolean,
                                       custom_marker_palette)

    ############################################################################
    ############################################################################

    ############################################################################
    ################## FORMATTING AND CREATING A BASIC PLOT ####################

    ######### Make Dataset:
    # Run the Make_Dataset function to create a sub dataframe that the plot will
    # be made from.
    Sub_df1 = Make_Dataset(df, color_column, color_to_plot, marker_column,
                           marker_to_plot, x_data1, y_data1)

    # Make the ColumnDataSource (when making convert dataframe to a dictionary,
    # which is helpful for the callback).
    src1 = ColumnDataSource(Sub_df1.to_dict(orient='list'))

    ######### Make Plot:
    # Create an empty plot (plot parameters will be applied later in a way that
    # can be manipulated in the callbacks)
    p1 = figure()
    p1.scatter(
        source=src1,
        x='x',
        y='y',
        fill_alpha=0.4,
        size=12,
        # NB: Always use legend_field for this not legend_group as the
        # former acts on the javascript side but the latter the Python
        # side. Therefore the former will update automatically when the
        # plot is changed with no need for a callback.
        legend_field='legend',
        marker=factor_mark('marker1', marker_palette, marker_list),
        color=factor_cmap('color1', color_palette, color_list))

    ######### Add plot parameters:
    Define_Plot_Parameters(p1, list_plot_parameters)

    ############################################################################
    ############################################################################

    ############################################################################
    ############################ ADD TOLERANCES ################################

    # We defined the tolerances further up and now want to add the correct ones
    # to the plot. Only do this through if the boolean is set to True as
    # otherwise the user doesn't want tolerances.

    if tolerance_boolean == True:

        Sub_df1_tol1 = Make_Dataset_Tolerance(x_data1, y_data1, Sub_df1,
                                              df_tol1)

        src1_tol = ColumnDataSource(Sub_df1_tol1.to_dict(orient='list'))

        p1.line(source=src1_tol, x='x', y='y_low', color='firebrick')
        p1.line(source=src1_tol, x='x', y='y_high', color='firebrick')

    ############################################################################
    ############################################################################

    ############################################################################
    ################## ADD MORE COMPLEX TOOLS TO THE PLOT ######################

    ######## 1)
    # Create a hover tool and add it to the plot
    hover1 = HoverTool()

    if len(hover_tool_fields) < 11:
        kwargs = {}
        i = 0
        for x in hover_tool_fields:
            i = i + 1
            kwargs['Field' + str(i)] = x
    else:
        kwargs = {}
        msgbox('Too many fields selected to display on HoverTool ' \
         '(Max = 10). Please reduce number of fields selected')

    Update_HoverTool(hover1, x_data1, y_data1, **kwargs)

    p1.add_tools(hover1)

    ############################################################################
    ############################################################################

    ############################################################################
    ################# CREATE WIDGETS TO BE ADDED TO THE PLOT ###################

    ######## 1)
    # This select funtion will be used to create dropdown lists to change the
    # data plotted on the x/y-axis.
    select_xaxis, select_yaxis = Create_Select_Axis(TableFields, x_axis_title1,
                                                    y_axis_title1)

    ######## 2)
    # This select widget will be used to create dropdown lists to change the
    # legend position.
    select_legend = Create_Select_Legend(legend_location)

    ######## 3)
    # These checkbox widgets will be used to create a tool to select the machine
    # and energy that are being plotted.
    checkbox_color, checkbox_marker = Create_Checkbox_Legend(
        df, color_column, color_to_plot, marker_column, marker_to_plot)

    ######## 4)
    # These checkbox widgets will be used to create a tool to select the machine
    # and energy that are being plotted.
    checkbox_hovertool = Create_Checkbox_HoverTool(TableFields,
                                                   hover_tool_fields)

    ######## 5)
    # Make an 'Update Button' to requery the database and get up to date data.
    update_button = Button(label='Update', button_type='success')

    ######## 6)
    # Make a Range Button
    range_button = Button(label='Range', button_type='primary')

    ######## 7)
    # Make some titles for the checkboxes
    color_title = Div(text='<b>Machine Choice</b>')
    marker_title = Div(text='<b>Marker</b>')
    hover_title = Div(text='<b>Hovertool Fields</b>')

    ############################################################################
    ############################################################################

    ############################################################################
    ########################### CREATE A LAYOUT ################################

    # Create a layout to add widgets and arrange the display.
    if color_column == marker_column:
        layout_checkbox = column(
            [color_title, checkbox_color, hover_title, checkbox_hovertool])
    else:
        layout_checkbox = column([
            color_title, checkbox_color, marker_title, checkbox_marker,
            hover_title, checkbox_hovertool
        ])

    button_row = row([update_button, range_button])

    layout_plots = column(
        [button_row, select_xaxis, select_yaxis, select_legend, p1])

    tab_layout = row([layout_plots, layout_checkbox])

    ############################################################################
    ############################################################################

    ############################################################################
    ####################### CREATE CALLBACK FUNCTIONS ##########################

    # Create a big callback that does most stuff
    def callback(attr, old, new):

        # Want to acquire the current values of all of the checkboxes and select
        # widgets to provide as inputs for the re-plot.
        color_to_plot = [
            checkbox_color.labels[i] for i in checkbox_color.active
        ]
        if color_column != marker_column:
            marker_to_plot = [
                checkbox_marker.labels[i] for i in checkbox_marker.active
            ]
        else:
            marker_to_plot = color_to_plot
        hovertool_to_plot = [
            checkbox_hovertool.labels[i] for i in checkbox_hovertool.active
        ]
        plot1_xdata_to_plot = select_xaxis.value
        plot1_ydata_to_plot = select_yaxis.value
        legend_location = select_legend.value
        # Set the new axis titles
        x_axis_title1 = plot1_xdata_to_plot
        y_axis_title1 = plot1_ydata_to_plot

        # Use the pre-defined Make_Dataset function with these new inputs to
        # create new versions of the sub dataframes.
        Sub_df1 = Make_Dataset(df, color_column, color_to_plot, marker_column,
                               marker_to_plot, plot1_xdata_to_plot,
                               plot1_ydata_to_plot)

        # Use the pre-defined Define_Plot_Parameters function with these new
        # inputs to update the plot parameters.
        Define_Plot_Parameters(p1, [
            plot1_xdata_to_plot, plot1_ydata_to_plot, plot_title1,
            x_axis_title1, y_axis_title1, plot_size_height1, plot_size_width1,
            legend_location
        ])

        # Update the hovertool
        if len(hovertool_to_plot) < 11:
            kwargs = {}
            i = 0
            for x in hovertool_to_plot:
                i = i + 1
                kwargs['Field' + str(i)] = x
        else:
            kwargs = {}
            msgbox('Too many fields selected to display on HoverTool ' \
             '(Max = 10). Please reduce number of fields selected')

        Update_HoverTool(hover1, plot1_xdata_to_plot, plot1_ydata_to_plot,
                         **kwargs)

        # Use the pre-defined tolerances function with these new inputs to
        # make a new version of the tolerances sub dataframe.
        if tolerance_boolean == True:
            Sub_df1_tol1 = Make_Dataset_Tolerance(plot1_xdata_to_plot,
                                                  plot1_ydata_to_plot, Sub_df1,
                                                  df_tol1)

        # Update the ColumnDataSources.
        src1.data = Sub_df1.to_dict(orient='list')
        if tolerance_boolean == True:
            src1_tol.data = Sub_df1_tol1.to_dict(orient='list')

        return

    select_xaxis.on_change('value', callback)
    select_yaxis.on_change('value', callback)
    select_legend.on_change('value', callback)
    checkbox_color.on_change('active', callback)
    checkbox_marker.on_change('active', callback)
    checkbox_hovertool.on_change('active', callback)

    # Callback for the Update Button
    def callback_update():

        # Make a new version of the dataframe using the original Create_df
        # function that connects to the database.
        df = Create_df()
        df = add_legend_to_df(df)

        color_to_plot = [
            checkbox_color.labels[i] for i in checkbox_color.active
        ]
        if color_column != marker_column:
            marker_to_plot = [
                checkbox_marker.labels[i] for i in checkbox_marker.active
            ]
        else:
            marker_to_plot = color_to_plot
        hovertool_to_plot = [
            checkbox_hovertool.labels[i] for i in checkbox_hovertool.active
        ]
        plot1_xdata_to_plot = select_xaxis.value
        plot1_ydata_to_plot = select_yaxis.value
        x_axis_title1 = plot1_xdata_to_plot
        y_axis_title1 = plot1_ydata_to_plot
        legend_location = select_legend.value

        Sub_df1 = Make_Dataset(df, color_column, color_to_plot, marker_column,
                               marker_to_plot, plot1_xdata_to_plot,
                               plot1_ydata_to_plot)

        Define_Plot_Parameters(p1, [
            plot1_xdata_to_plot, plot1_ydata_to_plot, plot_title1,
            x_axis_title1, y_axis_title1, plot_size_height1, plot_size_width1,
            legend_location
        ])

        if len(hovertool_to_plot) < 11:
            kwargs = {}
            i = 0
            for x in hovertool_to_plot:
                i = i + 1
                kwargs['Field' + str(i)] = x
        else:
            kwargs = {}
            msgbox('Too many fields selected to display on HoverTool ' \
             '(Max = 10). Please reduce number of fields selected')

        Update_HoverTool(hover1, plot1_xdata_to_plot, plot1_ydata_to_plot,
                         **kwargs)

        if tolerance_boolean == True:
            Sub_df1_tol1 = Make_Dataset_Tolerance(plot1_xdata_to_plot,
                                                  plot1_ydata_to_plot, Sub_df1,
                                                  df_tol1)
            src1_tol.data = Sub_df1_tol1.to_dict(orient='list')

        src1.data = Sub_df1.to_dict(orient='list')

        return

    update_button.on_click(callback_update)

    # Callback for the Range Button
    def callback_range():

        color_to_plot = [
            checkbox_color.labels[i] for i in checkbox_color.active
        ]
        if color_column != marker_column:
            marker_to_plot = [
                checkbox_marker.labels[i] for i in checkbox_marker.active
            ]
        else:
            marker_to_plot = color_to_plot
        plot1_xdata_to_plot = select_xaxis.value
        plot1_ydata_to_plot = select_yaxis.value

        # Use the pre-defined Make_Dataset function with these new inputs to
        # create new versions of the sub dataframes.
        Sub_df1 = Make_Dataset(df, color_column, color_to_plot, marker_column,
                               marker_to_plot, plot1_xdata_to_plot,
                               plot1_ydata_to_plot)

        x_data1 = select_xaxis.value
        y_data1 = select_yaxis.value

        if (x_data1 == 'adate') and ((y_data1 == '6fwhm') or
                                     (y_data1 == '9fwhm') or
                                     (y_data1 == '12fwhm') or
                                     (y_data1 == '15fwhm') or
                                     (y_data1 == '16fwhm')):

            p1.x_range.start = Sub_df1['x'].max() - timedelta(weeks=53)
            p1.x_range.end = Sub_df1['x'].max() + timedelta(weeks=2)

            if y_data1 == '6fwhm':
                p1.y_range.start = 9.6
                p1.y_range.end = 10.3
            elif y_data1 == '9fwhm':
                p1.y_range.start = 12.6
                p1.y_range.end = 13.32
            elif y_data1 == '12fwhm':
                p1.y_range.start = 16.25
                p1.y_range.end = 17.01
            elif y_data1 == '15fwhm':
                p1.y_range.start = 19.4
                p1.y_range.end = 20.16
            elif y_data1 == '16fwhm':
                p1.y_range.start = 19.5
                p1.y_range.end = 19.9

        return

    range_button.on_click(callback_range)

    ############################################################################
    ############################################################################

    ############################################################################
    ####################### RETURN TO THE MAIN SCRIPT ##########################

    return Panel(child=tab_layout, title='Electron Energy')
Example #28
0
output_file("bars.html")

fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ['2015', '2016', '2017']

data = {'fruits' : fruits,
        '2015'   : [2, 1, 4, 3, 2, 4],
        '2016'   : [5, 3, 3, 2, 4, 6],
        '2017'   : [3, 2, 4, 4, 5, 3]}

palette = ["#c9d9d3", "#718dbf", "#e84d60"]

# this creates [ ("Apples", "2015"), ("Apples", "2016"), ("Apples", "2017"), ("Pears", "2015), ... ]
x = [ (fruit, year) for fruit in fruits for year in years ]
counts = sum(zip(data['2015'], data['2016'], data['2017']), ()) # like an hstack

source = ColumnDataSource(data=dict(x=x, counts=counts))

p = figure(x_range=FactorRange(*x), plot_height=250, title="Fruit Counts by Year",
           toolbar_location=None, tools="")

p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white",
       fill_color=factor_cmap('x', palette=palette, factors=years, start=1, end=2))

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

show(p)
                output_backend="webgl")

Jitter.xaxis[0].formatter.days = ['%Hh']
Jitter.x_range.range_padding = 0
Jitter.ygrid.grid_line_color = None
tab_points = Panel(child=Jitter, title="Données")

# données du graphe scatter avec tout les elements repartis par heure et jour de la semaine
CircleChart = Jitter.circle(x='H_VA',
                            y=jitter('WeekDay',
                                     width=0.8,
                                     range=Jitter.y_range),
                            size=3,
                            legend="KiKo",
                            color=factor_cmap('KiKo',
                                              palette=Category10[10],
                                              factors=ListKiKo),
                            source=events_sources,
                            alpha=0.8,
                            hover_color='gold')

# GRAPHE DES VITESSES MOYENNES

# initialise les données
VitessesStartDict = dict(heure=[], Jour=[], rate=[])
VitesseSource = ColumnDataSource(data=VitessesStartDict)


# fonction pour extraire les vitesses depuis la DataFrame
def vitesses_mediannes(df):
    DAYS = ['Lun', 'Mar', 'Mer', 'Jeu', 'Ven', 'Sam', 'Dim']
Example #30
0
def query():
    """Query script entry point."""

    hl.init(default_reference='GRCh38')

    mt = hl.read_matrix_table(HGDP1KG_TOBWGS)
    scores = hl.read_table(SCORES)

    # Filter outliers and related samples
    mt = mt.semi_join_cols(scores)
    mt = mt.annotate_cols(scores=scores[mt.s].scores)
    mt = mt.annotate_cols(
        study=hl.if_else(mt.s.contains('TOB'), 'TOB-WGS', 'HGDP-1kG'))

    # PCA plot must all come from the same object
    columns = mt.cols()
    pca_scores = columns.scores
    labels = columns.study
    sample_names = columns.s
    cohort_sample_codes = list(set(labels.collect()))
    tooltips = [('labels', '@label'), ('samples', '@samples')]

    # get percent variance explained
    eigenvalues = hl.import_table(EIGENVALUES)
    eigenvalues = eigenvalues.to_pandas()
    eigenvalues.columns = ['eigenvalue']
    eigenvalues = pd.to_numeric(eigenvalues.eigenvalue)
    variance = eigenvalues.divide(float(eigenvalues.sum())) * 100
    variance = variance.round(2)

    # Get number of PCs
    number_of_pcs = len(eigenvalues)

    print('Making PCA plots labelled by study')
    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        plot = figure(
            title='TOB-WGS + HGDP/1kG Dataset',
            x_axis_label=f'PC{pc1 + 1} ({variance[pc1]}%)',
            y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)',
            tooltips=tooltips,
        )
        source = ColumnDataSource(
            dict(
                x=pca_scores[pc1].collect(),
                y=pca_scores[pc2].collect(),
                label=labels.collect(),
                samples=sample_names.collect(),
            ))
        plot.circle(
            'x',
            'y',
            alpha=0.5,
            source=source,
            size=4,
            color=factor_cmap('label', ['#1b9e77', '#d95f02'],
                              cohort_sample_codes),
            legend_group='label',
        )
        plot.add_layout(plot.legend[0], 'left')
        plot_filename = output_path(f'study_pc{pc2}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'study_pc{pc2}.html', 'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)

    print('Making PCA plots labelled by the subpopulation')
    labels = columns.hgdp_1kg_metadata.labeled_subpop.collect()
    labels = ['TOB-WGS' if x is None else x for x in labels]
    subpopulation = list(set(labels))
    # change ordering of subpopulations
    # so TOB-WGS is at the end and glyphs appear on top
    subpopulation.append(subpopulation.pop(subpopulation.index('TOB-WGS')))
    tooltips = [('labels', '@label'), ('samples', '@samples')]

    for i in range(0, (number_of_pcs - 1)):
        pc1 = i
        pc2 = i + 1
        print(f'PC{pc1 + 1} vs PC{pc2 + 1}')
        plot = figure(
            title='Subpopulation',
            x_axis_label=f'PC{pc1 + 1} ({variance[pc1]}%)',
            y_axis_label=f'PC{pc2 + 1} ({variance[pc2]}%)',
            tooltips=tooltips,
        )
        source = ColumnDataSource(
            dict(
                x=pca_scores[pc1].collect(),
                y=pca_scores[pc2].collect(),
                label=labels,
                samples=sample_names.collect(),
            ))
        plot.circle(
            'x',
            'y',
            alpha=0.5,
            source=source,
            size=4,
            color=factor_cmap('label', turbo(len(subpopulation)),
                              subpopulation),
            legend_group='label',
        )
        plot.add_layout(plot.legend[0], 'left')
        plot_filename = output_path(f'subpopulation_pc{pc2}.png', 'web')
        with hl.hadoop_open(plot_filename, 'wb') as f:
            get_screenshot_as_png(plot).save(f, format='PNG')
        html = file_html(plot, CDN, 'my plot')
        plot_filename_html = output_path(f'subpopulation_pc{pc2}.html', 'web')
        with hl.hadoop_open(plot_filename_html, 'w') as f:
            f.write(html)
Example #31
0
def returnBarGraph(df,
                   title=' ',
                   y_label='score/percentage',
                   label_suffix='',
                   palette=paletteR):
    pp = pprint.PrettyPrinter(indent=4)

    # All but first column are the categories
    categories = list(df.columns)[1:]
    # Convert every value to an int
    for cat in categories:
        df = df.astype({cat: int})

    # Content of the first column are the subcategories per category
    subcats = list(df.iloc[:, 0])
    x = [(category, subcat) for category in categories for subcat in subcats]

    values = []
    for cat in categories:
        values.extend(df[cat].tolist())

    value_labels = []
    for value in values:
        value_labels.append(str(value) + label_suffix)

    source = ColumnDataSource(data=dict(x=x, y=values, labels=value_labels))

    p = figure(x_range=FactorRange(*x),
               y_range=ranges.Range1d(start=0, end=105),
               y_minor_ticks=10,
               y_axis_label=y_label,
               plot_height=800,
               plot_width=1280,
               title=title,
               title_location='above',
               toolbar_location=None,
               tools="")
    # min_border_top

    labels = LabelSet(x='x',
                      y='y',
                      text='labels',
                      level='glyph',
                      x_offset=7,
                      y_offset=5,
                      angle=90,
                      angle_units='deg',
                      source=source,
                      render_mode='canvas',
                      text_font_size="9pt")

    p.vbar(x='x',
           top='y',
           width=0.9,
           source=source,
           line_color="white",
           fill_color=factor_cmap('x',
                                  palette=palette,
                                  factors=subcats,
                                  start=1))

    p.add_layout(labels)

    p.x_range.range_padding = 0.05
    p.title.align = 'center'
    p.title.text_font_size = "12pt"
    p.title.text_font_style = "bold"
    p.xaxis.major_label_orientation = math.pi / 2
    p.xgrid.grid_line_color = None

    return p
from bokeh.io import output_file, show
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg as df
from bokeh.transform import factor_cmap


df.cyl = df.cyl.astype(str)
group = df.groupby('cyl')

cyl_cmap = factor_cmap('cyl', palette=Spectral5, factors=sorted(df.cyl.unique()))

p = figure(plot_height=350, x_range=group, title="MPG by # Cylinders",
           toolbar_location=None, tools="")

p.vbar(x='cyl', top='mpg_mean', width=1, source=group,
       line_color=cyl_cmap, fill_color=cyl_cmap)

p.y_range.start = 0
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "some stuff"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None

output_file("../template/bar_pandas_group_by_color_mapped.html")

show(p)
    def bokehB_mort(num=100):

        # Bokeh bar plots. The function already includes the confirmed and deaths dataframes,
        # and operates over them to calculate th mortality rate depending on num (number of
        # minimum deaths to consider for a country). The rest is equivalent to the BokehB()
        # function.

        from bokeh.io import output_file, show, output_notebook, save
        from bokeh.plotting import figure
        from bokeh.models import ColumnDataSource, HoverTool
        from bokeh.palettes import Viridis as palette
        from bokeh.transform import factor_cmap

        # top countries by deaths rate with at least num deaths
        top_death = sets_grouped[1][yesterday].sort_values(ascending=False)
        top_death = top_death[top_death > num]

        # Inner join to the confirmed set, compute mortality rate and take top 20
        df_mort = pd.concat([sets_grouped[0][yesterday], top_death],
                            axis=1,
                            join='inner')
        mort_rate = round(df_mort.iloc[:, 1] / df_mort.iloc[:, 0] * 100, 2)
        mort_rate = mort_rate.sort_values(ascending=False).to_frame().head(20)

        # take yesterday's data
        df = mort_rate.iloc[:, -1].sort_values(
            ascending=False).head(20).to_frame()
        df['totals'] = df.iloc[:, -1]
        df.drop(df.columns[0], axis=1, inplace=True)

        import country_converter as coco
        continent = coco.convert(names=df.index.to_list(), to='Continent')
        df['Continent'] = continent
        cont_cat = len(df['Continent'].unique())

        source = ColumnDataSource(df)

        select_tools = ['save']
        tooltips = [('Country', '@Country'), ('Rate', '@totals{0.00}%')]

        p = figure(x_range=df.index.tolist(), plot_width=840, plot_height=600,
                   x_axis_label='Country',
                   y_axis_label='Rate (%)',
                   title="Mortality rate of countries with at least {} deaths " \
                         "as of ".format(num) + today_date,
                   tools=select_tools)

        p.vbar(x='Country',
               top='totals',
               width=0.9,
               alpha=0.7,
               source=source,
               legend_field="Continent",
               fill_color=factor_cmap('Continent',
                                      palette=palette[cont_cat],
                                      factors=df.Continent.unique()))

        p.xgrid.grid_line_color = None
        p.y_range.start = 0
        p.xaxis.major_label_orientation = 1
        p.left[0].formatter.use_scientific = False

        p.add_tools(HoverTool(tooltips=tooltips))

        output_file('top_mortality.html')

        return save(p, 'top_mortality.html')
Example #34
0
location_views = {}

# Build the CDSViews for every location
for loc in locations:
    location_views[loc] = CDSView(source=cds_data, filters=[GroupFilter(column_name='location', group=loc)])

TOOLTIPS = [
    ("Name", "@name"),
    ("Location", "@location"),
]
# Base plot
p = figure(title="brain-tec: Skills Map by location", plot_width=1400, plot_height=1600,
           x_range=levels, y_range=skills,
           tools="hover", toolbar_location=None, tooltips=TOOLTIPS)

# Plot the data for every location
for name, l_view in location_views.items():
    p.rect(x="level", y="skill", width=1, height=1, source=cds_data, view=l_view, fill_alpha=0.6,
           legend=name, color=factor_cmap("location", palette=Spectral6, factors=locations))

p.outline_line_color = None
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_standoff = 0
p.legend.orientation = "vertical"
p.legend.location ="top_right"
p.legend.click_policy="hide"

show(p)
Example #35
0
]

# Set the Title
p = figure(title="Titanic Passenger Age & Fare by Survial Type",
           tooltips=TOOLTIPS_SCATTER)

# Construnct the colours
p.scatter("Fare",
          "Age",
          source=titanic_df,
          legend="Survived",
          fill_alpha=0.3,
          size=12,
          marker=factor_mark('Survived', MARKERS, FATE),
          color=factor_cmap('Survived',
                            palette=['#3a6587', '#aeb3b7'],
                            factors=FATE))

#Set the axis labels
p.xaxis.axis_label = 'Fare (In Pounds)'
p.yaxis.axis_label = 'Age (In Years)'

# Remove the Grid lines
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# change just some things about the x-axis
p.xaxis.axis_line_width = 2
p.xaxis.major_label_text_color = "black"
p.xaxis.axis_line_color = "#aeb3b7"
        def groupedBar(df, xlabel, vFields, color=None, clustered=False, title=None):
            ylabel = ','.join(v for v in vFields)

            if clustered:
                factors=list(df.index)
                x = [ (b, a) for b in list(df.columns.values) for a in list(df.index) ]
                l = [ (a) for b in list(df.columns.values) for a in list(df.index) ]
                counts = sum(zip(df.at[a,b] for b in list(df.columns.values) for a in list(df.index)), ())
            else:
                factors=vFields
                x = [ (b,a) for b in list(df[xlabel].values) for a in vFields ]
                l = [ (a) for b in list(df[xlabel].values) for a in vFields ]
                counts = [ df[df[xlabel] == b][a].values[0] for b in list(df[xlabel].values) for a in vFields ]

            src = ColumnDataSource(data=dict(x=x, counts=counts, l=l))
            colors = self.colorPalette(len(factors)) if color is None else color

            p = figure(x_range=FactorRange(*x), y_axis_label=ylabel, x_axis_label=xlabel, title=title)
            p.vbar(x='x', top='counts', width=0.925, source=src, legend='l' if self.showLegend() else None, color=factor_cmap('x', palette=colors, factors=factors, start=1, end=2))

            p.y_range.start = 0 if not counts else min(0, min(counts))
            p.axis.minor_tick_line_color = None
            p.outline_line_color = None
            p.x_range.range_padding = 0.1
            p.xaxis.major_label_orientation = 1
            p.xaxis.major_label_text_font_size = "0px"
            p.xaxis.major_label_text_color = None
            p.xaxis.major_tick_line_color = None
            p.xgrid.grid_line_color = None
            p.legend.location = "top_left"

            hover = HoverTool()
            hover.tooltips = [(xlabel, '@x'), (ylabel, '@counts{0.00}')]
            p.add_tools(hover)

            return p
Example #37
0
        row(dropdown, dropdown1, bt, bt1, bt2, bt3, dropdown2, dropdown3,
            dropdown4, dropdown5, bt8)))
#show(layout)

sales_data = df.groupby(
    df['Inv Date'].dt.strftime('%B'))['Sales Qty'].sum().sort_values()
#sales_data1=df.sort_values(by='Inv Date', ascending= False)
grouped = sales_data / 1000
print(grouped)

source = ColumnDataSource(pd.DataFrame(grouped))
states = source.data['Inv Date'].tolist()
p = figure(x_range=states, plot_width=250, plot_height=200)

color_map = factor_cmap(field_name='Inv Date',
                        palette=Spectral5,
                        factors=states)
p.vbar(x='Inv Date',
       top='Sales Qty',
       source=source,
       width=0.70,
       color="#FFFF99")

p.title.text = 'Total Sales'
p.title.align = 'center'
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.yaxis.visible = False
p.xaxis.visible = True
p.outline_line_width = 7
p.outline_line_alpha = 0.3
from bokeh.io import show, output_file
from bokeh.palettes import Spectral5
from bokeh.plotting import figure
from bokeh.sampledata.autompg import autompg as df
from bokeh.transform import factor_cmap

output_file("bar_pandas_groupby_colormapped.html")

df.cyl = df.cyl.astype(str)
group = df.groupby('cyl')

cyl_cmap = factor_cmap('cyl', palette=Spectral5, factors=sorted(df.cyl.unique()))

p = figure(plot_height=350, x_range=group, title="MPG by # Cylinders",
           toolbar_location=None, tools="")

p.vbar(x='cyl', top='mpg_mean', width=1, source=group,
       line_color=cyl_cmap, fill_color=cyl_cmap)

p.y_range.start = 0
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "some stuff"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None

show(p)
Example #39
0
    def _plot_superimposed_ohlc():
        """Superimposed, downsampled vbars"""
        resample_rule = (superimpose if isinstance(superimpose, str) else dict(
            day='W', hour='D', minute='H', second='T',
            millisecond='S').get(time_resolution))
        if not resample_rule:
            warnings.warn(
                "'Can't superimpose OHLC data with rule '{}' (index datetime resolution: '{}'). "
                "Skipping.".format(resample_rule, time_resolution),
                stacklevel=4)
            return

        orig_df['_width'] = 1
        from .lib import OHLCV_AGG
        df2 = orig_df.resample(resample_rule, label='left').agg(
            dict(OHLCV_AGG, _width='count'))

        # Check if resampling was downsampling; error on upsampling
        orig_freq = _data_period(orig_df)
        resample_freq = _data_period(df2)
        if resample_freq < orig_freq:
            raise ValueError(
                'Invalid value for `superimpose`: Upsampling not supported.')
        if resample_freq == orig_freq:
            warnings.warn(
                'Superimposed OHLC plot matches the original plot. Skipping.',
                stacklevel=4)
            return

        if omit_missing:
            width2 = '_width'
            df2.index = df2['_width'].cumsum().shift(1).fillna(0)
            df2.index += df2['_width'] / 2 - .5
            df2['_width'] -= .1  # Candles don't touch
        else:
            del df['_width']
            width2 = dict(day=86400 * 5, hour=86400, minute=3600,
                          second=60)[time_resolution] * 1000
            df2.index += pd.Timedelta(
                width2 / 2 +
                (width2 /
                 5 if resample_rule == 'W' else 0),  # Sunday week start
                unit='ms')
        df2['inc'] = (df2.Close >= df2.Open).astype(np.uint8).astype(str)
        df2.index.name = None
        source2 = ColumnDataSource(df2)
        fig_ohlc.segment('index',
                         'High',
                         'index',
                         'Low',
                         source=source2,
                         color='#bbbbbb')
        colors_lighter = [
            lightness(BEAR_COLOR, .92),
            lightness(BULL_COLOR, .92)
        ]
        fig_ohlc.vbar('index',
                      width2,
                      'Open',
                      'Close',
                      source=source2,
                      line_color=None,
                      fill_color=factor_cmap('inc', colors_lighter,
                                             ['0', '1']))
Example #40
0
output_file("bar_v_titanic.html")

ticket = ['First', 'Second', 'Third']
counts = [84.15, 20.66, 13.68]


# .var width parameter controls the width of the columns
# We can add the colours to the barchart as part of a palette list.
# Note the width to height ratio should be 1:1.618:1 ish ;-)

source = ColumnDataSource(data=dict(ticket=ticket, counts=counts))

p = figure(x_range=ticket, plot_height=600, plot_width= 971, toolbar_location=None,
    title="Average Titanic Fare, by Class")
p.vbar(x='ticket', top='counts', width=0.7, source=source, legend="ticket",
       line_color='white', fill_color=factor_cmap('ticket',
        palette=['#3a6587', '#aeb3b7', '#aeb3b7'], factors=ticket))


# Removes the chart gridlines (i.e.. removes the chart clutter)
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# change just some things about the x-axes
p.xaxis.axis_label = "Class Type"
p.xaxis.axis_line_width = 2
p.xaxis.major_label_text_color = "#aeb3b7"
p.xaxis.axis_line_color = "#aeb3b7"


# change just some things about the y-axes
p.yaxis.axis_label = "Average Fare Price (in Pounds)"
Example #41
0
def plot(*,
         results,
         df,
         indicators,
         filename='',
         plot_width=None,
         plot_equity=True,
         plot_pl=True,
         plot_volume=True,
         plot_drawdown=False,
         smooth_equity=False,
         relative_equity=True,
         omit_missing=True,
         superimpose=True,
         show_legend=True,
         open_browser=True):
    """
    Like much of GUI code everywhere, this is a mess.
    """

    # We need to reset global Bokeh state, otherwise subsequent runs of
    # plot() contain some previous run's cruft data (was noticed when
    # TestPlot.test_file_size() test was failing).
    _bokeh_reset(filename)

    COLORS = [BEAR_COLOR, BULL_COLOR]

    orig_trade_data = trade_data = results._trade_data.copy(False)

    orig_df = df = df.copy(False)
    df.index.name = None  # Provides source name @index
    index = df.index
    time_resolution = getattr(index, 'resolution', None)
    is_datetime_index = index.is_all_dates

    # If all Volume is NaN, don't plot volume
    plot_volume = plot_volume and not df.Volume.isnull().all()

    # OHLC vbar width in msec.
    # +1 will work in case of non-datetime index where vbar width should just be =1
    bar_width = 1 + dict(day=86400, hour=3600, minute=60, second=1).get(
        time_resolution, 0) * 1000 * .85

    if is_datetime_index:
        # Add index as a separate data source column because true .index is offset to align vbars
        df['datetime'] = index
        df.index = df.index + pd.Timedelta(bar_width / 2, unit='ms')

    if omit_missing:
        bar_width = .8
        df = df.reset_index(drop=True)
        trade_data = trade_data.reset_index(drop=True)
        index = df.index

    new_bokeh_figure = partial(
        _figure,
        x_axis_type='datetime'
        if is_datetime_index and not omit_missing else 'linear',
        plot_width=plot_width,
        plot_height=400,
        tools="xpan,xwheel_zoom,box_zoom,undo,redo,reset,crosshair,save",
        active_drag='xpan',
        active_scroll='xwheel_zoom')

    pad = (index[-1] - index[0]) / 20

    fig_ohlc = new_bokeh_figure(x_range=Range1d(
        index[0], index[-1], bounds=(index[0] - pad, index[-1] +
                                     pad)) if index.size > 1 else None)
    figs_above_ohlc, figs_below_ohlc = [], []

    source = ColumnDataSource(df)
    source.add((df.Close >= df.Open).values.astype(np.uint8).astype(str),
               'inc')
    returns = trade_data['Returns'].dropna()
    trade_source = ColumnDataSource(
        dict(
            index=returns.index,
            datetime=orig_trade_data['Returns'].dropna().index,
            exit_price=trade_data['Exit Price'].dropna(),
            returns_pos=(returns > 0).astype(np.int8).astype(str),
        ))

    inc_cmap = factor_cmap('inc', COLORS, ['0', '1'])
    cmap = factor_cmap('returns_pos', COLORS, ['0', '1'])
    colors_darker = [lightness(BEAR_COLOR, .35), lightness(BULL_COLOR, .35)]
    trades_cmap = factor_cmap('returns_pos', colors_darker, ['0', '1'])

    if is_datetime_index and omit_missing:
        fig_ohlc.xaxis.formatter = FuncTickFormatter(args=dict(
            axis=fig_ohlc.xaxis[0],
            formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
                                            months=['%m/%Y', "%b'%y"]),
            source=source),
                                                     code='''
this.labels = this.labels || formatter.doFormat(ticks
                                                .map(i => source.data.datetime[i])
                                                .filter(t => t !== undefined));
return this.labels[index] || "";
        ''')

    NBSP = '&nbsp;' * 4
    ohlc_extreme_values = df[['High', 'Low']].copy(False)
    ohlc_tooltips = [('x, y', NBSP.join(('$index', '$y{0,0.0[0000]}'))),
                     ('OHLC',
                      NBSP.join(('@Open{0,0.0[0000]}', '@High{0,0.0[0000]}',
                                 '@Low{0,0.0[0000]}', '@Close{0,0.0[0000]}'))),
                     ('Volume', '@Volume{0,0}')]

    def new_indicator_figure(**kwargs):
        kwargs.setdefault('plot_height', 90)
        fig = new_bokeh_figure(x_range=fig_ohlc.x_range,
                               active_scroll='xwheel_zoom',
                               active_drag='xpan',
                               **kwargs)
        fig.xaxis.visible = False
        fig.yaxis.minor_tick_line_color = None
        return fig

    def set_tooltips(fig,
                     tooltips=(),
                     vline=True,
                     renderers=(),
                     show_arrow=True):
        tooltips = list(tooltips)
        renderers = list(renderers)

        if is_datetime_index:
            formatters = dict(datetime='datetime')
            tooltips = [("Date", "@datetime{%c}")] + tooltips
        else:
            formatters = {}
            tooltips = [("#", "@index")] + tooltips
        fig.add_tools(
            HoverTool(point_policy='follow_mouse',
                      renderers=renderers,
                      formatters=formatters,
                      show_arrow=show_arrow,
                      tooltips=tooltips,
                      mode='vline' if vline else 'mouse'))

    def _plot_equity_section():
        """Equity section"""
        # Max DD Dur. line
        equity = trade_data['Equity']
        argmax = trade_data['Drawdown Duration'].idxmax()
        try:
            dd_start = equity[:argmax].idxmax()
        except Exception:  # ValueError: attempt to get argmax of an empty sequence
            dd_start = dd_end = equity.index[0]
            timedelta = 0
        else:
            dd_end = (equity[argmax:] > equity[dd_start]).idxmax()
            if dd_end == argmax:
                dd_end = index[-1]
            if is_datetime_index and omit_missing:
                # "Calendar" duration
                timedelta = df.datetime.iloc[dd_end] - df.datetime.iloc[
                    dd_start]
            else:
                timedelta = dd_end - dd_start
            # Get point intersection
            if dd_end != index[-1]:
                x1, x2 = index.get_loc(dd_end) - 1, index.get_loc(dd_end)
                y, y1, y2 = equity[dd_start], equity[x1], equity[x2]
                dd_end -= (1 - (y - y1) /
                           (y2 - y1)) * (dd_end - index[x1])  # y = a x + b

        if smooth_equity:
            select = (
                trade_data[['Entry Price', 'Exit Price'
                            ]].dropna(how='all').index |
                # Include beginning
                equity.index[:1] |
                # Include max dd end points. Otherwise, the MaxDD line looks amiss.
                pd.Index([dd_start, dd_end]))
            equity = equity[select].reindex(equity.index)
            equity.interpolate(inplace=True)

        if relative_equity:
            equity /= equity.iloc[0]

        source.add(equity, 'equity')
        fig = new_indicator_figure(
            y_axis_label="Equity",
            **({} if plot_drawdown else dict(plot_height=110)))

        # High-watermark drawdown dents
        fig.patch('index',
                  'equity_dd',
                  source=ColumnDataSource(
                      dict(index=np.r_[index, index[::-1]],
                           equity_dd=np.r_[equity,
                                           equity.cummax()[::-1]])),
                  fill_color='#ffffea',
                  line_color='#ffcb66')

        # Equity line
        r = fig.line('index',
                     'equity',
                     source=source,
                     line_width=1.5,
                     line_alpha=1)
        if relative_equity:
            tooltip_format = '@equity{+0,0.[000]%}'
            tick_format = '0,0.[00]%'
            legend_format = '{:,.0f}%'
        else:
            tooltip_format = '@equity{$ 0,0}'
            tick_format = '$ 0.0 a'
            legend_format = '${:,.0f}'
        set_tooltips(fig, [('Equity', tooltip_format)], renderers=[r])
        fig.yaxis.formatter = NumeralTickFormatter(format=tick_format)

        # Peaks
        argmax = equity.idxmax()
        fig.scatter(argmax,
                    equity[argmax],
                    legend='Peak ({})'.format(
                        legend_format.format(equity[argmax] *
                                             (100 if relative_equity else 1))),
                    color='cyan',
                    size=8)
        fig.scatter(index[-1],
                    equity.values[-1],
                    legend='Final ({})'.format(
                        legend_format.format(equity.iloc[-1] *
                                             (100 if relative_equity else 1))),
                    color='blue',
                    size=8)

        if not plot_drawdown:
            drawdown = trade_data['Drawdown']
            argmax = drawdown.idxmax()
            fig.scatter(argmax,
                        equity[argmax],
                        legend='Max Drawdown (-{:.1f}%)'.format(
                            100 * drawdown[argmax]),
                        color='red',
                        size=8)
        fig.line([dd_start, dd_end],
                 equity[dd_start],
                 line_color='red',
                 line_width=2,
                 legend='Max Dd Dur. ({})'.format(timedelta).replace(
                     ' 00:00:00', '').replace('(0 days ', '('))

        figs_above_ohlc.append(fig)

    def _plot_drawdown_section():
        """Drawdown section"""
        fig = new_indicator_figure(y_axis_label="Drawdown")
        drawdown = trade_data['Drawdown']
        argmax = drawdown.idxmax()
        source.add(drawdown, 'drawdown')
        r = fig.line('index', 'drawdown', source=source, line_width=1.3)
        fig.scatter(argmax,
                    drawdown[argmax],
                    legend='Peak (-{:.1f}%)'.format(100 * drawdown[argmax]),
                    color='red',
                    size=8)
        set_tooltips(fig, [('Drawdown', '@drawdown{-0.[0]%}')], renderers=[r])
        fig.yaxis.formatter = NumeralTickFormatter(format="-0.[0]%")
        return fig

    def _plot_pl_section():
        """Profit/Loss markers section"""
        fig = new_indicator_figure(y_axis_label="Profit / Loss")
        fig.add_layout(
            Span(location=0,
                 dimension='width',
                 line_color='#666666',
                 line_dash='dashed',
                 line_width=1))
        position = trade_data['Exit Position'].dropna()
        returns_long = returns.copy()
        returns_short = returns.copy()
        returns_long[position < 0] = np.nan
        returns_short[position > 0] = np.nan
        trade_source.add(returns_long, 'returns_long')
        trade_source.add(returns_short, 'returns_short')
        MARKER_SIZE = 13
        r1 = fig.scatter('index',
                         'returns_long',
                         source=trade_source,
                         fill_color=cmap,
                         marker='triangle',
                         line_color='black',
                         size=MARKER_SIZE)
        r2 = fig.scatter('index',
                         'returns_short',
                         source=trade_source,
                         fill_color=cmap,
                         marker='inverted_triangle',
                         line_color='black',
                         size=MARKER_SIZE)
        set_tooltips(fig, [("P/L", "@returns_long{+0.[000]%}")],
                     vline=False,
                     renderers=[r1])
        set_tooltips(fig, [("P/L", "@returns_short{+0.[000]%}")],
                     vline=False,
                     renderers=[r2])
        fig.yaxis.formatter = NumeralTickFormatter(format="0.[00]%")
        return fig

    def _plot_volume_section():
        """Volume section"""
        fig = new_indicator_figure(y_axis_label="Volume")
        fig.xaxis.formatter = fig_ohlc.xaxis[0].formatter
        fig.xaxis.visible = True
        fig_ohlc.xaxis.visible = False  # Show only Volume's xaxis
        r = fig.vbar('index',
                     bar_width,
                     'Volume',
                     source=source,
                     color=inc_cmap)
        set_tooltips(fig, [('Volume', '@Volume{0.00 a}')], renderers=[r])
        fig.yaxis.formatter = NumeralTickFormatter(format="0 a")
        return fig

    def _plot_superimposed_ohlc():
        """Superimposed, downsampled vbars"""
        resample_rule = (superimpose if isinstance(superimpose, str) else dict(
            day='W', hour='D', minute='H', second='T',
            millisecond='S').get(time_resolution))
        if not resample_rule:
            warnings.warn(
                "'Can't superimpose OHLC data with rule '{}' (index datetime resolution: '{}'). "
                "Skipping.".format(resample_rule, time_resolution),
                stacklevel=4)
            return

        orig_df['_width'] = 1
        from .lib import OHLCV_AGG
        df2 = orig_df.resample(resample_rule, label='left').agg(
            dict(OHLCV_AGG, _width='count'))

        # Check if resampling was downsampling; error on upsampling
        orig_freq = _data_period(orig_df)
        resample_freq = _data_period(df2)
        if resample_freq < orig_freq:
            raise ValueError(
                'Invalid value for `superimpose`: Upsampling not supported.')
        if resample_freq == orig_freq:
            warnings.warn(
                'Superimposed OHLC plot matches the original plot. Skipping.',
                stacklevel=4)
            return

        if omit_missing:
            width2 = '_width'
            df2.index = df2['_width'].cumsum().shift(1).fillna(0)
            df2.index += df2['_width'] / 2 - .5
            df2['_width'] -= .1  # Candles don't touch
        else:
            del df['_width']
            width2 = dict(day=86400 * 5, hour=86400, minute=3600,
                          second=60)[time_resolution] * 1000
            df2.index += pd.Timedelta(
                width2 / 2 +
                (width2 /
                 5 if resample_rule == 'W' else 0),  # Sunday week start
                unit='ms')
        df2['inc'] = (df2.Close >= df2.Open).astype(np.uint8).astype(str)
        df2.index.name = None
        source2 = ColumnDataSource(df2)
        fig_ohlc.segment('index',
                         'High',
                         'index',
                         'Low',
                         source=source2,
                         color='#bbbbbb')
        colors_lighter = [
            lightness(BEAR_COLOR, .92),
            lightness(BULL_COLOR, .92)
        ]
        fig_ohlc.vbar('index',
                      width2,
                      'Open',
                      'Close',
                      source=source2,
                      line_color=None,
                      fill_color=factor_cmap('inc', colors_lighter,
                                             ['0', '1']))

    def _plot_ohlc():
        """Main OHLC bars"""
        fig_ohlc.segment('index',
                         'High',
                         'index',
                         'Low',
                         source=source,
                         color="black")
        r = fig_ohlc.vbar('index',
                          bar_width,
                          'Open',
                          'Close',
                          source=source,
                          line_color="black",
                          fill_color=inc_cmap)
        return r

    def _plot_ohlc_trades():
        """Trade entry / exit markers on OHLC plot"""
        exit_price = trade_data['Exit Price'].dropna()
        entry_price = trade_data['Entry Price'].dropna(
        ).iloc[:exit_price.
               size]  # entry can be one more at the end  # noqa: E501
        trade_source.add(
            np.column_stack((entry_price.index, exit_price.index)).tolist(),
            'position_lines_xs')
        trade_source.add(
            np.column_stack((entry_price, exit_price)).tolist(),
            'position_lines_ys')
        fig_ohlc.multi_line(xs='position_lines_xs',
                            ys='position_lines_ys',
                            source=trade_source,
                            line_color=trades_cmap,
                            legend='Trades',
                            line_width=8,
                            line_alpha=1,
                            line_dash='dotted')

    def _plot_indicators():
        """Strategy indicators"""
        def _too_many_dims(value):
            assert value.ndim >= 2
            if value.ndim > 2:
                warnings.warn("Can't plot indicators with >2D ('{}')".format(
                    value.name),
                              stacklevel=5)
                return True
            return False

        class LegendStr(str):
            # The legend string is such a string that only matches
            # itself if it's the exact same object. This ensures
            # legend items are listed separately even when they have the
            # same string contents. Otherwise, Bokeh would always consider
            # equal strings as one and the same legend item.
            # This also prevents legend items named the same as some
            # ColumnDataSource's column to be replaced with that column's
            # values.
            def __eq__(self, other):
                return self is other

        ohlc_colors = colorgen()

        for value in indicators:
            value = np.atleast_2d(value)

            # Use .get()! A user might have assigned a Strategy.data-evolved
            # _Array without Strategy.I()
            if not value._opts.get('plot') or _too_many_dims(value):
                continue

            tooltips = []

            # Overlay indicators on the OHLC figure
            if value._opts['overlay']:
                color = value._opts['color']
                color = color and _as_list(color)[0] or next(ohlc_colors)
                legend = LegendStr(value.name)
                for i, arr in enumerate(value):
                    source_name = '{}_{}'.format(value.name, i)
                    source.add(arr, source_name)
                    if value._opts.get('scatter'):
                        fig_ohlc.scatter('index',
                                         source_name,
                                         source=source,
                                         color=color,
                                         line_color='black',
                                         fill_alpha=.8,
                                         marker='circle',
                                         radius=bar_width / 2 * 1.5,
                                         legend=legend)
                    else:
                        fig_ohlc.line('index',
                                      source_name,
                                      source=source,
                                      line_width=1.3,
                                      line_color=color,
                                      legend=legend)
                    ohlc_extreme_values[source_name] = arr
                    tooltips.append(
                        '@{{{}}}{{0,0.0[0000]}}'.format(source_name))
                ohlc_tooltips.append((value.name, NBSP.join(tooltips)))
            else:
                # Standalone indicator sections at the bottom
                color = value._opts['color']
                color = color and cycle(_as_list(color)) or colorgen()
                fig = new_indicator_figure()
                for i, arr in enumerate(value, 1):
                    legend = '{}-{}'.format(
                        value.name, i) if len(value) > 1 else value.name
                    name = legend + '_'  # Otherwise fig.line(legend=) is interpreted as col of source  # noqa: E501
                    tooltips.append('@{{{}}}'.format(name))
                    source.add(arr.astype(int if arr.dtype == bool else float),
                               name)
                    if value._opts.get('scatter'):
                        r = fig.scatter('index',
                                        name,
                                        source=source,
                                        color=next(color),
                                        marker='circle',
                                        radius=bar_width / 2 * .9,
                                        legend=LegendStr(legend))
                    else:
                        r = fig.line('index',
                                     name,
                                     source=source,
                                     line_color=next(color),
                                     line_width=1.3,
                                     legend=LegendStr(legend))

                    # Add dashed centerline just because
                    mean = float(pd.Series(arr).mean())
                    if not np.isnan(mean) and (abs(mean) < .1 or round(
                            abs(mean), -1) in (50, 100, 200)):
                        fig.add_layout(
                            Span(location=float(mean),
                                 dimension='width',
                                 line_color='#666666',
                                 line_dash='dashed',
                                 line_width=.5))

                set_tooltips(fig, [(value.name, NBSP.join(tooltips))],
                             vline=True,
                             renderers=[r])

                # If the sole indicator line on this figure,
                # have the legend only contain text without the glyph
                if len(value) == 1:
                    fig.legend.glyph_width = 0

                figs_below_ohlc.append(fig)

    # Construct figure ...

    if plot_equity:
        _plot_equity_section()

    if plot_drawdown:
        figs_above_ohlc.append(_plot_drawdown_section())

    if plot_pl:
        figs_above_ohlc.append(_plot_pl_section())

    if plot_volume:
        fig_volume = _plot_volume_section()
        figs_below_ohlc.append(fig_volume)

    if superimpose and is_datetime_index:
        _plot_superimposed_ohlc()

    ohlc_bars = _plot_ohlc()
    _plot_ohlc_trades()
    _plot_indicators()

    set_tooltips(fig_ohlc, ohlc_tooltips, vline=True, renderers=[ohlc_bars])

    source.add(ohlc_extreme_values.min(1), 'ohlc_low')
    source.add(ohlc_extreme_values.max(1), 'ohlc_high')

    custom_js_args = dict(ohlc_range=fig_ohlc.y_range, source=source)
    if plot_volume:
        custom_js_args.update(volume_range=fig_volume.y_range)

    fig_ohlc.x_range.callback = CustomJS(args=custom_js_args,
                                         code=_AUTOSCALE_JS_CALLBACK)

    plots = figs_above_ohlc + [fig_ohlc] + figs_below_ohlc
    for f in plots:
        if f.legend:
            f.legend.location = 'top_left' if show_legend else None
            f.legend.border_line_width = 1
            f.legend.border_line_color = '#333333'
            f.legend.padding = 5
            f.legend.spacing = 0
            f.legend.margin = 0
            f.legend.label_text_font_size = '8pt'
        f.min_border_left = 0
        f.min_border_top = 3
        f.min_border_bottom = 6
        f.min_border_right = 10
        f.outline_line_color = '#666666'

        wheelzoom_tool = next(wz for wz in f.tools
                              if isinstance(wz, WheelZoomTool))
        wheelzoom_tool.maintain_focus = False

    kwargs = {}
    if plot_width is None:
        kwargs['sizing_mode'] = 'stretch_width'

    fig = gridplot(plots,
                   ncols=1,
                   toolbar_location='right',
                   toolbar_options=dict(logo=None),
                   merge_tools=True,
                   **kwargs)
    show(fig, browser=None if open_browser else 'none')
    return fig
Example #42
0
def plot(*,
         results: pd.Series,
         df: pd.DataFrame,
         indicators: List[_Indicator],
         filename='',
         plot_width=None,
         plot_equity=True,
         plot_return=False,
         plot_pl=True,
         plot_volume=True,
         plot_drawdown=False,
         smooth_equity=False,
         relative_equity=True,
         superimpose=True,
         resample=True,
         reverse_indicators=True,
         show_legend=True,
         open_browser=True):
    """
    Like much of GUI code everywhere, this is a mess.
    """
    # We need to reset global Bokeh state, otherwise subsequent runs of
    # plot() contain some previous run's cruft data (was noticed when
    # TestPlot.test_file_size() test was failing).
    if not filename and not IS_JUPYTER_NOTEBOOK:
        filename = _windos_safe_filename(str(results._strategy))
    _bokeh_reset(filename)

    COLORS = [BEAR_COLOR, BULL_COLOR]
    BAR_WIDTH = .8

    assert df.index.equals(results['_equity_curve'].index)
    equity_data = results['_equity_curve'].copy(deep=False)
    trades = results['_trades']

    plot_volume = plot_volume and not df.Volume.isnull().all()
    plot_equity = plot_equity and not trades.empty
    plot_return = plot_return and not trades.empty
    plot_pl = plot_pl and not trades.empty
    is_datetime_index = isinstance(df.index, pd.DatetimeIndex)

    from .lib import OHLCV_AGG
    # ohlc df may contain many columns. We're only interested in, and pass on to Bokeh, these
    df = df[list(OHLCV_AGG.keys())].copy(deep=False)

    # Limit data to max_candles
    if is_datetime_index:
        df, indicators, equity_data, trades = _maybe_resample_data(
            resample, df, indicators, equity_data, trades)

    df.index.name = None  # Provides source name @index
    df['datetime'] = df.index  # Save original, maybe datetime index
    df = df.reset_index(drop=True)
    equity_data = equity_data.reset_index(drop=True)
    index = df.index

    new_bokeh_figure = partial(
        _figure,
        x_axis_type='linear',
        plot_width=plot_width,
        plot_height=400,
        tools="xpan,xwheel_zoom,box_zoom,undo,redo,reset,save",
        active_drag='xpan',
        active_scroll='xwheel_zoom')

    pad = (index[-1] - index[0]) / 20

    fig_ohlc = new_bokeh_figure(
        x_range=Range1d(index[0],
                        index[-1],
                        min_interval=10,
                        bounds=(index[0] - pad,
                                index[-1] + pad)) if index.size > 1 else None)
    figs_above_ohlc, figs_below_ohlc = [], []

    source = ColumnDataSource(df)
    source.add((df.Close >= df.Open).values.astype(np.uint8).astype(str),
               'inc')

    trade_source = ColumnDataSource(
        dict(
            index=trades['ExitBar'],
            datetime=trades['ExitTime'],
            exit_price=trades['ExitPrice'],
            size=trades['Size'],
            returns_positive=(trades['ReturnPct'] > 0).astype(int).astype(str),
        ))

    inc_cmap = factor_cmap('inc', COLORS, ['0', '1'])
    cmap = factor_cmap('returns_positive', COLORS, ['0', '1'])
    colors_darker = [lightness(BEAR_COLOR, .35), lightness(BULL_COLOR, .35)]
    trades_cmap = factor_cmap('returns_positive', colors_darker, ['0', '1'])

    if is_datetime_index:
        fig_ohlc.xaxis.formatter = FuncTickFormatter(args=dict(
            axis=fig_ohlc.xaxis[0],
            formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
                                            months=['%m/%Y', "%b'%y"]),
            source=source),
                                                     code='''
this.labels = this.labels || formatter.doFormat(ticks
                                                .map(i => source.data.datetime[i])
                                                .filter(t => t !== undefined));
return this.labels[index] || "";
        ''')

    NBSP = '\N{NBSP}' * 4
    ohlc_extreme_values = df[['High', 'Low']].copy(deep=False)
    ohlc_tooltips = [('x, y', NBSP.join(('$index', '$y{0,0.0[0000]}'))),
                     ('OHLC',
                      NBSP.join(('@Open{0,0.0[0000]}', '@High{0,0.0[0000]}',
                                 '@Low{0,0.0[0000]}', '@Close{0,0.0[0000]}'))),
                     ('Volume', '@Volume{0,0}')]

    def new_indicator_figure(**kwargs):
        kwargs.setdefault('plot_height', 90)
        fig = new_bokeh_figure(x_range=fig_ohlc.x_range,
                               active_scroll='xwheel_zoom',
                               active_drag='xpan',
                               **kwargs)
        fig.xaxis.visible = False
        fig.yaxis.minor_tick_line_color = None
        return fig

    def set_tooltips(fig, tooltips=(), vline=True, renderers=()):
        tooltips = list(tooltips)
        renderers = list(renderers)

        if is_datetime_index:
            formatters = {'@datetime': 'datetime'}
            tooltips = [("Date", "@datetime{%c}")] + tooltips
        else:
            formatters = {}
            tooltips = [("#", "@index")] + tooltips
        fig.add_tools(
            HoverTool(point_policy='follow_mouse',
                      renderers=renderers,
                      formatters=formatters,
                      tooltips=tooltips,
                      mode='vline' if vline else 'mouse'))

    def _plot_equity_section(is_return=False):
        """Equity section"""
        # Max DD Dur. line
        equity = equity_data['Equity'].copy()
        dd_end = equity_data['DrawdownDuration'].idxmax()
        if np.isnan(dd_end):
            dd_start = dd_end = equity.index[0]
        else:
            dd_start = equity[:dd_end].idxmax()
            # If DD not extending into the future, get exact point of intersection with equity
            if dd_end != equity.index[-1]:
                dd_end = np.interp(equity[dd_start],
                                   (equity[dd_end - 1], equity[dd_end]),
                                   (dd_end - 1, dd_end))

        if smooth_equity:
            interest_points = pd.Index([
                # Beginning and end
                equity.index[0],
                equity.index[-1],
                # Peak equity and peak DD
                equity.idxmax(),
                equity_data['DrawdownPct'].idxmax(),
                # Include max dd end points. Otherwise the MaxDD line looks amiss.
                dd_start,
                int(dd_end),
                min(int(dd_end + 1), equity.size - 1),
            ])
            select = pd.Index(trades['ExitBar']).union(interest_points)
            select = select.unique().dropna()
            equity = equity.iloc[select].reindex(equity.index)
            equity.interpolate(inplace=True)

        assert equity.index.equals(equity_data.index)

        if relative_equity:
            equity /= equity.iloc[0]
        if is_return:
            equity -= equity.iloc[0]

        yaxis_label = 'Return' if is_return else 'Equity'
        source_key = 'eq_return' if is_return else 'equity'
        source.add(equity, source_key)
        fig = new_indicator_figure(
            y_axis_label=yaxis_label,
            **({} if plot_drawdown else dict(plot_height=110)))

        # High-watermark drawdown dents
        fig.patch('index',
                  'equity_dd',
                  source=ColumnDataSource(
                      dict(index=np.r_[index, index[::-1]],
                           equity_dd=np.r_[equity,
                                           equity.cummax()[::-1]])),
                  fill_color='#ffffea',
                  line_color='#ffcb66')

        # Equity line
        r = fig.line('index',
                     source_key,
                     source=source,
                     line_width=1.5,
                     line_alpha=1)
        if relative_equity:
            tooltip_format = f'@{source_key}{{+0,0.[000]%}}'
            tick_format = '0,0.[00]%'
            legend_format = '{:,.0f}%'
        else:
            tooltip_format = f'@{source_key}{{$ 0,0}}'
            tick_format = '$ 0.0 a'
            legend_format = '${:,.0f}'
        set_tooltips(fig, [(yaxis_label, tooltip_format)], renderers=[r])
        fig.yaxis.formatter = NumeralTickFormatter(format=tick_format)

        # Peaks
        argmax = equity.idxmax()
        fig.scatter(argmax,
                    equity[argmax],
                    legend_label='Peak ({})'.format(
                        legend_format.format(equity[argmax] *
                                             (100 if relative_equity else 1))),
                    color='cyan',
                    size=8)
        fig.scatter(index[-1],
                    equity.values[-1],
                    legend_label='Final ({})'.format(
                        legend_format.format(equity.iloc[-1] *
                                             (100 if relative_equity else 1))),
                    color='blue',
                    size=8)

        if not plot_drawdown:
            drawdown = equity_data['DrawdownPct']
            argmax = drawdown.idxmax()
            fig.scatter(argmax,
                        equity[argmax],
                        legend_label='Max Drawdown (-{:.1f}%)'.format(
                            100 * drawdown[argmax]),
                        color='red',
                        size=8)
        dd_timedelta_label = df['datetime'].iloc[int(
            round(dd_end))] - df['datetime'].iloc[dd_start]
        fig.line([dd_start, dd_end],
                 equity.iloc[dd_start],
                 line_color='red',
                 line_width=2,
                 legend_label=f'Max Dd Dur. ({dd_timedelta_label})'.replace(
                     ' 00:00:00', '').replace('(0 days ', '('))

        figs_above_ohlc.append(fig)

    def _plot_drawdown_section():
        """Drawdown section"""
        fig = new_indicator_figure(y_axis_label="Drawdown")
        drawdown = equity_data['DrawdownPct']
        argmax = drawdown.idxmax()
        source.add(drawdown, 'drawdown')
        r = fig.line('index', 'drawdown', source=source, line_width=1.3)
        fig.scatter(argmax,
                    drawdown[argmax],
                    legend_label='Peak (-{:.1f}%)'.format(100 *
                                                          drawdown[argmax]),
                    color='red',
                    size=8)
        set_tooltips(fig, [('Drawdown', '@drawdown{-0.[0]%}')], renderers=[r])
        fig.yaxis.formatter = NumeralTickFormatter(format="-0.[0]%")
        return fig

    def _plot_pl_section():
        """Profit/Loss markers section"""
        fig = new_indicator_figure(y_axis_label="Profit / Loss")
        fig.add_layout(
            Span(location=0,
                 dimension='width',
                 line_color='#666666',
                 line_dash='dashed',
                 line_width=1))
        returns_long = np.where(trades['Size'] > 0, trades['ReturnPct'],
                                np.nan)
        returns_short = np.where(trades['Size'] < 0, trades['ReturnPct'],
                                 np.nan)
        size = trades['Size'].abs()
        size = np.interp(size, (size.min(), size.max()), (8, 20))
        trade_source.add(returns_long, 'returns_long')
        trade_source.add(returns_short, 'returns_short')
        trade_source.add(size, 'marker_size')
        if 'count' in trades:
            trade_source.add(trades['count'], 'count')
        r1 = fig.scatter('index',
                         'returns_long',
                         source=trade_source,
                         fill_color=cmap,
                         marker='triangle',
                         line_color='black',
                         size='marker_size')
        r2 = fig.scatter('index',
                         'returns_short',
                         source=trade_source,
                         fill_color=cmap,
                         marker='inverted_triangle',
                         line_color='black',
                         size='marker_size')
        tooltips = [("Size", "@size{0,0}")]
        if 'count' in trades:
            tooltips.append(("Count", "@count{0,0}"))
        set_tooltips(fig,
                     tooltips + [("P/L", "@returns_long{+0.[000]%}")],
                     vline=False,
                     renderers=[r1])
        set_tooltips(fig,
                     tooltips + [("P/L", "@returns_short{+0.[000]%}")],
                     vline=False,
                     renderers=[r2])
        fig.yaxis.formatter = NumeralTickFormatter(format="0.[00]%")
        return fig

    def _plot_volume_section():
        """Volume section"""
        fig = new_indicator_figure(y_axis_label="Volume")
        fig.xaxis.formatter = fig_ohlc.xaxis[0].formatter
        fig.xaxis.visible = True
        fig_ohlc.xaxis.visible = False  # Show only Volume's xaxis
        r = fig.vbar('index',
                     BAR_WIDTH,
                     'Volume',
                     source=source,
                     color=inc_cmap)
        set_tooltips(fig, [('Volume', '@Volume{0.00 a}')], renderers=[r])
        fig.yaxis.formatter = NumeralTickFormatter(format="0 a")
        return fig

    def _plot_superimposed_ohlc():
        """Superimposed, downsampled vbars"""
        time_resolution = pd.DatetimeIndex(df['datetime']).resolution
        resample_rule = (superimpose if isinstance(superimpose, str) else dict(
            day='M', hour='D', minute='H', second='T',
            millisecond='S').get(time_resolution))
        if not resample_rule:
            warnings.warn(
                f"'Can't superimpose OHLC data with rule '{resample_rule}'"
                f"(index datetime resolution: '{time_resolution}'). Skipping.",
                stacklevel=4)
            return

        df2 = (df.assign(_width=1).set_index('datetime').resample(
            resample_rule, label='left').agg(dict(OHLCV_AGG, _width='count')))

        # Check if resampling was downsampling; error on upsampling
        orig_freq = _data_period(df['datetime'])
        resample_freq = _data_period(df2.index)
        if resample_freq < orig_freq:
            raise ValueError(
                'Invalid value for `superimpose`: Upsampling not supported.')
        if resample_freq == orig_freq:
            warnings.warn(
                'Superimposed OHLC plot matches the original plot. Skipping.',
                stacklevel=4)
            return

        df2.index = df2['_width'].cumsum().shift(1).fillna(0)
        df2.index += df2['_width'] / 2 - .5
        df2['_width'] -= .1  # Candles don't touch

        df2['inc'] = (df2.Close >= df2.Open).astype(int).astype(str)
        df2.index.name = None
        source2 = ColumnDataSource(df2)
        fig_ohlc.segment('index',
                         'High',
                         'index',
                         'Low',
                         source=source2,
                         color='#bbbbbb')
        colors_lighter = [
            lightness(BEAR_COLOR, .92),
            lightness(BULL_COLOR, .92)
        ]
        fig_ohlc.vbar('index',
                      '_width',
                      'Open',
                      'Close',
                      source=source2,
                      line_color=None,
                      fill_color=factor_cmap('inc', colors_lighter,
                                             ['0', '1']))

    def _plot_ohlc():
        """Main OHLC bars"""
        fig_ohlc.segment('index',
                         'High',
                         'index',
                         'Low',
                         source=source,
                         color="black")
        r = fig_ohlc.vbar('index',
                          BAR_WIDTH,
                          'Open',
                          'Close',
                          source=source,
                          line_color="black",
                          fill_color=inc_cmap)
        return r

    def _plot_ohlc_trades():
        """Trade entry / exit markers on OHLC plot"""
        trade_source.add(trades[['EntryBar', 'ExitBar']].values.tolist(),
                         'position_lines_xs')
        trade_source.add(trades[['EntryPrice', 'ExitPrice']].values.tolist(),
                         'position_lines_ys')
        fig_ohlc.multi_line(xs='position_lines_xs',
                            ys='position_lines_ys',
                            source=trade_source,
                            line_color=trades_cmap,
                            legend_label=f'Trades ({len(trades)})',
                            line_width=8,
                            line_alpha=1,
                            line_dash='dotted')

    def _plot_indicators():
        """Strategy indicators"""
        def _too_many_dims(value):
            assert value.ndim >= 2
            if value.ndim > 2:
                warnings.warn(
                    f"Can't plot indicators with >2D ('{value.name}')",
                    stacklevel=5)
                return True
            return False

        class LegendStr(str):
            # The legend string is such a string that only matches
            # itself if it's the exact same object. This ensures
            # legend items are listed separately even when they have the
            # same string contents. Otherwise, Bokeh would always consider
            # equal strings as one and the same legend item.
            def __eq__(self, other):
                return self is other

        ohlc_colors = colorgen()
        indicator_figs = []

        for i, value in enumerate(indicators):
            value = np.atleast_2d(value)

            # Use .get()! A user might have assigned a Strategy.data-evolved
            # _Array without Strategy.I()
            if not value._opts.get('plot') or _too_many_dims(value):
                continue

            is_overlay = value._opts['overlay']
            is_scatter = value._opts['scatter']
            if is_overlay:
                fig = fig_ohlc
            else:
                fig = new_indicator_figure()
                indicator_figs.append(fig)
            tooltips = []
            colors = value._opts['color']
            colors = colors and cycle(_as_list(colors)) or (cycle(
                [next(ohlc_colors)]) if is_overlay else colorgen())
            legend_label = LegendStr(value.name)
            for j, arr in enumerate(value, 1):
                color = next(colors)
                source_name = f'{legend_label}_{i}_{j}'
                if arr.dtype == bool:
                    arr = arr.astype(int)
                source.add(arr, source_name)
                tooltips.append(f'@{{{source_name}}}{{0,0.0[0000]}}')
                if is_overlay:
                    ohlc_extreme_values[source_name] = arr
                    if is_scatter:
                        fig.scatter('index',
                                    source_name,
                                    source=source,
                                    legend_label=legend_label,
                                    color=color,
                                    line_color='black',
                                    fill_alpha=.8,
                                    marker='circle',
                                    radius=BAR_WIDTH / 2 * 1.5)
                    else:
                        fig.line('index',
                                 source_name,
                                 source=source,
                                 legend_label=legend_label,
                                 line_color=color,
                                 line_width=1.3)
                else:
                    if is_scatter:
                        r = fig.scatter('index',
                                        source_name,
                                        source=source,
                                        legend_label=LegendStr(legend_label),
                                        color=color,
                                        marker='circle',
                                        radius=BAR_WIDTH / 2 * .9)
                    else:
                        r = fig.line('index',
                                     source_name,
                                     source=source,
                                     legend_label=LegendStr(legend_label),
                                     line_color=color,
                                     line_width=1.3)
                    # Add dashed centerline just because
                    mean = float(pd.Series(arr).mean())
                    if not np.isnan(mean) and (
                            abs(mean) < .1 or round(abs(mean), 1) == .5
                            or round(abs(mean), -1) in (50, 100, 200)):
                        fig.add_layout(
                            Span(location=float(mean),
                                 dimension='width',
                                 line_color='#666666',
                                 line_dash='dashed',
                                 line_width=.5))
            if is_overlay:
                ohlc_tooltips.append((legend_label, NBSP.join(tooltips)))
            else:
                set_tooltips(fig, [(legend_label, NBSP.join(tooltips))],
                             vline=True,
                             renderers=[r])
                # If the sole indicator line on this figure,
                # have the legend only contain text without the glyph
                if len(value) == 1:
                    fig.legend.glyph_width = 0
        return indicator_figs

    # Construct figure ...

    if plot_equity:
        _plot_equity_section()

    if plot_return:
        _plot_equity_section(is_return=True)

    if plot_drawdown:
        figs_above_ohlc.append(_plot_drawdown_section())

    if plot_pl:
        figs_above_ohlc.append(_plot_pl_section())

    if plot_volume:
        fig_volume = _plot_volume_section()
        figs_below_ohlc.append(fig_volume)

    if superimpose and is_datetime_index:
        _plot_superimposed_ohlc()

    ohlc_bars = _plot_ohlc()
    _plot_ohlc_trades()
    indicator_figs = _plot_indicators()
    if reverse_indicators:
        indicator_figs = indicator_figs[::-1]
    figs_below_ohlc.extend(indicator_figs)

    set_tooltips(fig_ohlc, ohlc_tooltips, vline=True, renderers=[ohlc_bars])

    source.add(ohlc_extreme_values.min(1), 'ohlc_low')
    source.add(ohlc_extreme_values.max(1), 'ohlc_high')

    custom_js_args = dict(ohlc_range=fig_ohlc.y_range, source=source)
    if plot_volume:
        custom_js_args.update(volume_range=fig_volume.y_range)

    fig_ohlc.x_range.js_on_change(
        'end', CustomJS(args=custom_js_args, code=_AUTOSCALE_JS_CALLBACK))

    plots = figs_above_ohlc + [fig_ohlc] + figs_below_ohlc
    linked_crosshair = CrosshairTool(dimensions='both')

    for f in plots:
        if f.legend:
            f.legend.visible = show_legend
            f.legend.location = 'top_left'
            f.legend.border_line_width = 1
            f.legend.border_line_color = '#333333'
            f.legend.padding = 5
            f.legend.spacing = 0
            f.legend.margin = 0
            f.legend.label_text_font_size = '8pt'
            f.legend.click_policy = "hide"
        f.min_border_left = 0
        f.min_border_top = 3
        f.min_border_bottom = 6
        f.min_border_right = 10
        f.outline_line_color = '#666666'

        f.add_tools(linked_crosshair)
        wheelzoom_tool = next(wz for wz in f.tools
                              if isinstance(wz, WheelZoomTool))
        wheelzoom_tool.maintain_focus = False

    kwargs = {}
    if plot_width is None:
        kwargs['sizing_mode'] = 'stretch_width'

    fig = gridplot(plots,
                   ncols=1,
                   toolbar_location='right',
                   toolbar_options=dict(logo=None),
                   merge_tools=True,
                   **kwargs)
    show(fig, browser=None if open_browser else 'none')
    return fig
    def make_plot(src, src2):
        # plot 1
        p = figure(
            plot_width=800,
            plot_height=400,
            x_range=FactorRange(*src.data["factor"]),
            tooltips="@factor: @change{0€} €",
        )

        p.vbar(
            x="factor",
            top="change",
            width=0.8,
            source=src,
            fill_color=factor_cmap(
                "factor",
                palette=Category10[4][1:],
                factors=["delta_tax_base", "externalities", "total"],
                start=1,
                end=2,
            ),
            line_color=None,
        )

        labels = LabelSet(
            x="factor",
            y="change",
            text="label",
            source=src,
            render_mode="canvas",
            y_offset=-7,
        )

        p.add_layout(labels)

        p.xgrid.grid_line_color = None

        # Static styling
        p.x_range.range_padding = 0.1
        p.xaxis.major_label_orientation = 1.4

        plot = plotstyle(p, plot_dict1)

        # Plot 2
        p2 = figure(
            plot_width=800,
            plot_height=400,
            y_range=src2.data["deciles"],
            x_range=[-180, 187],
            tooltips="@deciles: @aggr_delta_after_eti{0€} Mio.€",
        )

        labels2 = LabelSet(
            x="aggr_delta_after_eti",
            y="deciles",
            text="label",
            source=src2,
            x_offset="offset",
            y_offset=-10,
            render_mode="canvas",
        )

        color_mapper = LinearColorMapper(
            palette=RdYlGn[10],
            low=max(src2.data["aggr_delta_after_eti"]),
            high=min(src2.data["aggr_delta_after_eti"]),
        )

        p2.hbar(
            y="deciles",
            right="aggr_delta_after_eti",
            source=src2,
            height=0.8,
            color={
                "field": "aggr_delta_after_eti",
                "transform": color_mapper
            },
            line_color=None,
        )

        p2.add_layout(labels2)

        p2.xaxis.tags = ["numeric"]
        p2.yaxis.tags = ["categorical"]

        plot2 = plotstyle(p2, plot_dict2)

        return plot, plot2
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure
from bokeh.palettes import Spectral5
from bokeh.sampledata.autompg import autompg_clean as df
from bokeh.transform import factor_cmap

output_file("bars.html")

df.cyl = df.cyl.astype(str)
df.yr = df.yr.astype(str)

group = df.groupby(('cyl', 'mfr'))

source = ColumnDataSource(group)
index_cmap = factor_cmap('cyl_mfr', palette=Spectral5, factors=sorted(df.cyl.unique()), end=1)

p = figure(plot_width=800, plot_height=300, title="Mean MPG by # Cylinders and Manufacturer",
           x_range=group, toolbar_location=None, tools="")

p.vbar(x='cyl_mfr', top='mpg_mean', width=1, source=source,
       line_color="white", fill_color=index_cmap, )

p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.axis_label = "Manufacturer grouped by # Cylinders"
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None

p.add_tools(HoverTool(tooltips=[("MPG", "@mpg_mean"), ("Cyl, Mfr", "@cyl_mfr")]))
Example #45
0
from bokeh.io import show, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import Spectral6
from bokeh.plotting import figure, output_file, show
from bokeh.transform import factor_cmap

output_file("bar_v_initial.html")

ticket= ['First', 'Second', 'Third']
counts = [84.15, 20.66, 13.68]

source = ColumnDataSource(data=dict(fruits=fruits, counts=counts))

p = figure(x_range=fruits, plot_height=250,
    title="Average Titanic Fare, by Class")

p.vbar(x='ticket', top='counts', width=0.9,
    source=source, legend="ticket,
    line_color='white',
    fill_color=factor_cmap('ticket', palette=Spectral6, factors=fruits))

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 90
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)