Esempio n. 1
0
def plot_boxes(df: pd.DataFrame, cols: list = None, out_path: str = None, show_p: bool = True, return_p: bool = False,
               h: int = None, w: int = None, spacing: float = 0.05, theme: str = 'simple_white',
               renderer: str = 'browser', n_cols: int = 3, shared_yaxes: bool = True, cols_like: list = None):
    """plot box plots"""
    # get cols to plot
    if not cols:
        if cols_like:
            cols = get_cols_like(df, cols_like)
        else:
            cols = df._get_numeric_data().columns
    n_rows = math.ceil(len(cols) / n_cols)
    p = make_subplots(rows=n_rows, cols=n_cols, shared_yaxes=shared_yaxes, vertical_spacing=spacing, horizontal_spacing=spacing)
    # figure out what to plot where on the subplot
    axes_dict = dict()
    i = 0
    for index, x in np.ndenumerate(np.zeros((n_cols, n_rows))):
        axes_dict[i] = index
        i += 1
    # make each plot
    for i, col in enumerate(cols):
        p.add_trace(go.Box(name=col, y=df[col]), row=axes_dict[i][1]+1, col=axes_dict[i][0]+1)
    if h:
        p.update_layout(height=h)
    if w:
        p.update_layout(width=w)
    p.update_layout(template=theme)
    if out_path:
        plotly.offline.plot(p, filename=out_path, auto_open=False)
    if show_p:
        p.show(renderer=renderer)
    if return_p:
        return p
Esempio n. 2
0
def plot_hists(df: pd.DataFrame, cols: list = None, out_path: str = None, show_p: bool = True, return_p: bool = False,
               h: int = None, w: int = None, spacing: float = 0.05, theme: str = 'simple_white',
               renderer: str = 'browser', n_cols: int = 3, shared_yaxes: bool = True, cols_like: list = None,
               cumulative: bool = False, dim: str = None):
    """plot histogram"""
    # get cols to plot
    if not cols:
        if cols_like:
            cols = get_cols_like(df, cols_like)
        else:
            cols = df._get_numeric_data().columns
    n_rows = math.ceil(len(cols) / n_cols)
    p = make_subplots(rows=n_rows, cols=n_cols, shared_yaxes=shared_yaxes, vertical_spacing=spacing, horizontal_spacing=spacing)
    # figure out what to plot where on the subplot
    axes_dict = dict()
    i = 0
    for index, x in np.ndenumerate(np.zeros((n_cols, n_rows))):
        axes_dict[i] = index
        i += 1
    # make each plot
    for i, col in enumerate(cols):
        if dim:
            for dim_value in df[dim].unique():
                p.add_trace(
                    go.Histogram(
                        name=f'{col} - {dim_value}',
                        x=df[df[dim] == dim_value][col],
                        cumulative_enabled=cumulative,
                        bingroup=1,
                        histnorm='probability density'
                    ),
                    row=axes_dict[i][1]+1,
                    col=axes_dict[i][0]+1
                )
            p.update_layout(barmode='overlay')
            p.update_traces(opacity=0.5)
        else:
            p.add_trace(
                go.Histogram(
                    name=col, x=df[col], cumulative_enabled=cumulative
                ),
                row=axes_dict[i][1] + 1,
                col=axes_dict[i][0] + 1
            )

    if h:
        p.update_layout(height=h)
    if w:
        p.update_layout(width=w)
    p.update_layout(template=theme)
    if out_path:
        plotly.offline.plot(p, filename=out_path, auto_open=False)
    if show_p:
        p.show(renderer=renderer)
    if return_p:
        return p
Esempio n. 3
0
def plot_lines(df: pd.DataFrame,
               cols: list = None,
               cols_like: list = None,
               x: str = None,
               h: int = 300,
               w: int = 1200,
               t_str: str = 'box_zoom,pan,hover,reset,save',
               x_type: str = 'datetime',
               show_p: bool = True,
               t_loc: str = 'right',
               out_path: str = None,
               return_p: bool = False,
               palette: str = 'Category20',
               p_theme: str = 'light_minimal',
               notebook: bool = False):
    """Plot lines.
    """
    # get cols to plot
    if not cols:
        if cols_like:
            cols = get_cols_like(df, cols_like)
        else:
            cols = df._get_numeric_data().columns
    # define x axis if needed
    if not x:
        x = df.index.name
    # define source
    source = ColumnDataSource(df)
    # define palette
    if palette == 'Category20':
        p_palette = Category20[20]
    else:
        raise NotImplementedError(f'... palette {palette} not implemented ...')
    p = make_figure(h=h, w=w, x_type=x_type, t_loc=t_loc, t_str=t_str)
    for i, col in enumerate(cols):
        p.line(x, col, source=source, name=col, color=p_palette[i])
        add_hover(p, cols)
    if out_path:
        output_file(out_path)
    curdoc().theme = p_theme
    if notebook:
        output_notebook()
    if show_p:
        show(p)
    if return_p:
        return p
Esempio n. 4
0
def plot_heatmap(df: pd.DataFrame,
                 cols: list = None,
                 cols_like: list = None,
                 id_vars: list = None,
                 out_path: str = None,
                 show_p: bool = True,
                 return_p: bool = False,
                 h: int = None,
                 w: int = None,
                 theme: str = 'plotly_white',
                 renderer: str = 'browser',
                 colorscale: str = 'RdBu',
                 showscale: bool = False):
    """plot heatmap"""
    # get cols to plot
    if not cols:
        if cols_like:
            cols = get_cols_like(df, cols_like)
        else:
            cols = df._get_numeric_data().columns
    if not id_vars:
        id_vars = list(df.index.names)
    df = pd.melt(df.reset_index(), id_vars=id_vars, value_vars=cols)
    p = go.Figure(data=go.Heatmap(z=df['value'],
                                  x=df[','.join(id_vars)],
                                  y=df['variable'],
                                  colorscale=colorscale,
                                  showscale=showscale))
    if h:
        p.update_layout(height=h)
    if w:
        p.update_layout(width=w)
    p.update_layout(template=theme)
    if out_path:
        plotly.offline.plot(p, filename=out_path, auto_open=False)
    if show_p:
        p.show(renderer=renderer)
    if return_p:
        return p
Esempio n. 5
0
def plot_scatters(df: pd.DataFrame,
                  cols: list = None,
                  cols_like: list = None,
                  x: str = None,
                  title: str = None,
                  out_path: str = None,
                  show_p: bool = True,
                  return_p: bool = False,
                  h: int = None,
                  w: int = None,
                  marker_size: int = 4,
                  vertical_spacing: float = 0.1,
                  horizontal_spacing: float = 0.1,
                  theme: str = 'simple_white',
                  n_cols: int = 3,
                  renderer: str = 'browser',
                  show_axis: bool = False,
                  show_titles: bool = False,
                  normalize_method: str = None,
                  colors: list = None):
    """Plot scatters with plotly"""
    # get cols to plot
    if not cols:
        if cols_like:
            cols = get_cols_like(df, cols_like)
        else:
            cols = df._get_numeric_data().columns

    # normalize if specified
    if normalize_method == 'minmax':
        df = (df - df.min()) / (df.max() - df.min())

    if not colors:
        colors = [1 for x in range(len(df))]

    num_plots = len(list(itertools.combinations(cols, 2)))
    n_rows = math.ceil(num_plots / n_cols)
    if show_titles:
        subplot_titles = tuple(f'{x[0]} vs {x[1]}'
                               for x in itertools.combinations(cols, 2))
    else:
        subplot_titles = None
    p = make_subplots(rows=n_rows,
                      cols=n_cols,
                      vertical_spacing=vertical_spacing,
                      horizontal_spacing=horizontal_spacing,
                      subplot_titles=subplot_titles)
    # figure out what to plot where on the subplot
    axes_dict = dict()
    i = 0
    for index, x in np.ndenumerate(np.zeros((n_cols, n_rows))):
        axes_dict[i] = index
        i += 1
    # make each plot
    for i, pair in enumerate(itertools.combinations(cols, 2)):
        x = pair[0]
        y = pair[1]
        i_row = axes_dict[i][1] + 1
        i_col = axes_dict[i][0] + 1
        p.add_trace(go.Scatter(x=df[x],
                               y=df[y],
                               name=f'{x} vs {y}',
                               mode='markers',
                               marker=dict(size=marker_size, color=colors)),
                    row=i_row,
                    col=i_col)
        p.update_xaxes(title_text=x,
                       row=i_row,
                       col=i_col,
                       title_standoff=0,
                       showline=show_axis,
                       linewidth=1,
                       linecolor='grey',
                       showticklabels=show_axis,
                       ticks='')
        p.update_yaxes(title_text=y,
                       row=i_row,
                       col=i_col,
                       title_standoff=0,
                       showline=show_axis,
                       linewidth=1,
                       linecolor='grey',
                       showticklabels=show_axis,
                       ticks='')
    p.update_layout(showlegend=False)
    if title:
        p.update_layout(title_text=title)
    if h:
        p.update_layout(height=h)
    if w:
        p.update_layout(width=w)
    p.update_layout(template=theme)
    if out_path:
        out_dir = '/'.join(out_path.split('/')[0:-1])
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        plotly.offline.plot(p, filename=out_path, auto_open=False)
    if show_p:
        p.show(renderer=renderer)
    if return_p:
        return p
Esempio n. 6
0
def plot_lines(df: pd.DataFrame,
               cols: list = None,
               cols_like: list = None,
               x: str = None,
               title: str = None,
               slider: bool = True,
               out_path: str = None,
               show_p: bool = True,
               return_p: bool = False,
               h: int = None,
               w: int = None,
               theme: str = 'simple_white',
               lw: int = 1,
               renderer: str = 'browser',
               stacked: bool = False,
               filltozero: bool = False,
               shade_regions: list = None,
               shade_color: str = 'Yellow',
               shade_opacity: float = 0.2,
               shade_line_width: int = 0,
               marker_list: list = None,
               marker_mode: str = "markers",
               marker_position: str = "bottom center",
               marker_color: str = 'Red',
               marker_size: int = 5,
               marker_symbol: str = 'circle-open',
               normalize_method: str = None):
    """Plot lines with plotly"""

    # set stackedgroup if stacked flag set
    if stacked:
        stackgroup = 'one'
    else:
        stackgroup = None
    if filltozero:
        fill = 'tozeroy'
    else:
        fill = None

    # create figure object
    p = go.Figure()

    # get cols to plot
    if not cols:
        if cols_like:
            cols = get_cols_like(df, cols_like)
        else:
            cols = df._get_numeric_data().columns

    # normalize if specified
    if normalize_method == 'minmax':
        df = (df - df.min()) / (df.max() - df.min())

    # define x axis if needed
    if not x:
        # if looks like int6e then convert to datetime
        if str(df.index.dtype) == 'int64':
            x = pd.to_datetime(df.index, unit='s')
        else:
            x = df.index
    else:
        x = df[x]

    for i, col in enumerate(cols):
        p.add_trace(
            go.Scatter(x=x,
                       y=df[col],
                       name=col,
                       line=dict(width=lw),
                       fill=fill,
                       stackgroup=stackgroup,
                       hoverlabel=dict(namelength=-1)))
    if title:
        p.update_layout(title_text=title)
    if slider:
        p.update_layout(xaxis_rangeslider_visible=slider)
    if h:
        p.update_layout(height=h)
    if w:
        p.update_layout(width=w)

    # add any shaded regions
    if shade_regions:
        shapes_to_add = []
        for x_from, x_to, shade_color in shade_regions:
            # check if region is in the data to be plotted and only plot if is
            if x_from >= x.min() and x_to <= x.max():
                shapes_to_add.append(
                    dict(type="rect",
                         xref="x",
                         yref="paper",
                         x0=x_from,
                         y0=0,
                         x1=x_to,
                         y1=1,
                         fillcolor=shade_color,
                         opacity=shade_opacity,
                         layer="below",
                         line_width=shade_line_width))
        # now add relevant shapes
        p.update_layout(shapes=shapes_to_add)

    # add any markers
    if marker_list:
        for x_at, marker_label in marker_list:
            # check if region is in the data to be plotted and only plot if is
            if x_at >= x.min() and x_at <= x.max():
                p.add_trace(
                    go.Scatter(x=[x_at],
                               y=[0],
                               mode=marker_mode,
                               text=[str(marker_label)],
                               textposition=marker_position,
                               marker=dict(symbol=marker_symbol,
                                           color=marker_color,
                                           size=marker_size),
                               showlegend=False))

    p.update_layout(template=theme)

    if out_path:
        out_dir = '/'.join(out_path.split('/')[0:-1])
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        plotly.offline.plot(p, filename=out_path, auto_open=False)
    if show_p:
        p.show(renderer=renderer)
    if return_p:
        return p
Esempio n. 7
0
def plot_lines_grid(df: pd.DataFrame,
                    cols: list = None,
                    cols_like: list = None,
                    x: str = None,
                    title: str = None,
                    slider: bool = False,
                    out_path: str = None,
                    show_p: bool = True,
                    return_p: bool = False,
                    h: int = None,
                    w: int = None,
                    vertical_spacing: float = 0.002,
                    theme: str = 'simple_white',
                    lw: int = 1,
                    renderer: str = 'browser',
                    shade_regions: list = None,
                    shade_opacity: float = 0.5,
                    shade_line_width: int = 0,
                    marker_list: list = None,
                    marker_mode: str = "markers",
                    marker_position: str = "bottom center",
                    marker_color: str = 'Red',
                    marker_size: int = 5,
                    marker_symbol: str = 'circle-open',
                    h_each: int = None,
                    legend: bool = True,
                    yaxes_visible: bool = True,
                    xaxes_visible: bool = True,
                    subplot_titles: list = None,
                    subplot_titles_size: int = 12,
                    subplot_titles_x: float = 0.2,
                    subplot_titles_color: str = 'grey'):
    """Plot lines with plotly"""

    # get cols to plot
    if not cols:
        if cols_like:
            cols = get_cols_like(df, cols_like)
        else:
            cols = df._get_numeric_data().columns

    # define x axis if needed
    if not x:
        x = df.index
    else:
        x = df[x]

    # define subplot titles if needed
    if not subplot_titles:
        subplot_titles = cols

    # make subplots
    p = make_subplots(rows=len(cols),
                      cols=1,
                      shared_xaxes=True,
                      vertical_spacing=vertical_spacing,
                      subplot_titles=subplot_titles)

    # update subplot titles
    for annotation in p['layout']['annotations']:
        annotation['x'] = subplot_titles_x
        annotation['font'] = {
            'size': subplot_titles_size,
            'color': subplot_titles_color
        }

    # add lines
    for i, col in enumerate(cols):
        if isinstance(col, list):
            for c in col:
                p.add_trace(go.Scatter(x=x,
                                       y=df[c],
                                       name=c,
                                       line=dict(width=lw),
                                       hoverlabel=dict(namelength=-1)),
                            row=(1 + i),
                            col=1)
        else:
            p.add_trace(go.Scatter(x=x,
                                   y=df[col],
                                   name=col,
                                   line=dict(width=lw),
                                   hoverlabel=dict(namelength=-1)),
                        row=(1 + i),
                        col=1)

    #p.update_layout(hoverlabel=dict(namelength=-1))

    if title:
        p.update_layout(title_text=title)
    if slider:
        p.update_layout(xaxis_rangeslider_visible=slider)
    if h_each:
        h = len(cols) * h_each
    if h:
        p.update_layout(height=h)
    if w:
        p.update_layout(width=w)
    p.update_layout(template=theme)

    # add any shaded regions
    if shade_regions:
        shapes_to_add = []
        for x_from, x_to, shade_color in shade_regions:
            # check if region is in the data to be plotted and only plot if is
            if x_from >= x.min() and x_to <= x.max():
                shapes_to_add.append(
                    dict(type="rect",
                         xref="x",
                         x0=x_from,
                         y0=0,
                         x1=x_to,
                         y1=1,
                         fillcolor=shade_color,
                         opacity=shade_opacity,
                         layer="below",
                         line_width=shade_line_width,
                         yref='paper'))
        # now add relevant shapes
        p.update_layout(shapes=shapes_to_add)

    # add any markers
    if marker_list:
        for x_at, marker_label in marker_list:
            # check if region is in the data to be plotted and only plot if is
            if x.min() <= x_at <= x.max():
                p.add_trace(
                    go.Scatter(x=[x_at],
                               y=[0],
                               mode=marker_mode,
                               text=[str(marker_label)],
                               textposition=marker_position,
                               marker=dict(symbol=marker_symbol,
                                           color=marker_color,
                                           size=marker_size),
                               showlegend=False))

    # some other options
    p.update_layout(showlegend=legend)
    p.update_yaxes(visible=yaxes_visible)
    p.update_xaxes(visible=xaxes_visible)

    # save file
    if out_path:
        out_dir = '/'.join(out_path.split('/')[0:-1])
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)
        plotly.offline.plot(p, filename=out_path, auto_open=False)

    if show_p:
        p.show(renderer=renderer)

    if return_p:
        return p