Ejemplo n.º 1
0
def create_facet_grid(df,
                      x=None,
                      y=None,
                      facet_row=None,
                      facet_col=None,
                      color_name=None,
                      colormap=None,
                      color_is_cat=False,
                      facet_row_labels=None,
                      facet_col_labels=None,
                      height=None,
                      width=None,
                      trace_type='scatter',
                      scales='fixed',
                      dtick_x=None,
                      dtick_y=None,
                      show_boxes=True,
                      ggplot2=False,
                      binsize=1,
                      **kwargs):
    """
    Returns figure for facet grid.

    :param (pd.DataFrame) df: the dataframe of columns for the facet grid.
    :param (str) x: the name of the dataframe column for the x axis data.
    :param (str) y: the name of the dataframe column for the y axis data.
    :param (str) facet_row: the name of the dataframe column that is used to
        facet the grid into row panels.
    :param (str) facet_col: the name of the dataframe column that is used to
        facet the grid into column panels.
    :param (str) color_name: the name of your dataframe column that will
        function as the colormap variable.
    :param (str|list|dict) colormap: the param that determines how the
        color_name column colors the data. If the dataframe contains numeric
        data, then a dictionary of colors will group the data categorically
        while a Plotly Colorscale name or a custom colorscale will treat it
        numerically. To learn more about colors and types of colormap, run
        `help(plotly.colors)`.
    :param (bool) color_is_cat: determines whether a numerical column for the
        colormap will be treated as categorical (True) or sequential (False).
            Default = False.
    :param (str|dict) facet_row_labels: set to either 'name' or a dictionary
        of all the unique values in the faceting row mapped to some text to
        show up in the label annotations. If None, labeling works like usual.
    :param (str|dict) facet_col_labels: set to either 'name' or a dictionary
        of all the values in the faceting row mapped to some text to show up
        in the label annotations. If None, labeling works like usual.
    :param (int) height: the height of the facet grid figure.
    :param (int) width: the width of the facet grid figure.
    :param (str) trace_type: decides the type of plot to appear in the
        facet grid. The options are 'scatter', 'scattergl', 'histogram',
        'bar', and 'box'.
        Default = 'scatter'.
    :param (str) scales: determines if axes have fixed ranges or not. Valid
        settings are 'fixed' (all axes fixed), 'free_x' (x axis free only),
        'free_y' (y axis free only) or 'free' (both axes free).
    :param (float) dtick_x: determines the distance between each tick on the
        x-axis. Default is None which means dtick_x is set automatically.
    :param (float) dtick_y: determines the distance between each tick on the
        y-axis. Default is None which means dtick_y is set automatically.
    :param (bool) show_boxes: draws grey boxes behind the facet titles.
    :param (bool) ggplot2: draws the facet grid in the style of `ggplot2`. See
        http://ggplot2.tidyverse.org/reference/facet_grid.html for reference.
        Default = False
    :param (int) binsize: groups all data into bins of a given length.
    :param (dict) kwargs: a dictionary of scatterplot arguments.

    Examples 1: One Way Faceting
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')

    fig = ff.create_facet_grid(
        mpg,
        x='displ',
        y='cty',
        facet_col='cyl',
    )
    py.iplot(fig, filename='facet_grid_mpg_one_way_facet')
    ```

    Example 2: Two Way Faceting
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')

    fig = ff.create_facet_grid(
        mpg,
        x='displ',
        y='cty',
        facet_row='drv',
        facet_col='cyl',
    )
    py.iplot(fig, filename='facet_grid_mpg_two_way_facet')
    ```

    Example 3: Categorical Coloring
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')

    fig = ff.create_facet_grid(
        mtcars,
        x='mpg',
        y='wt',
        facet_col='cyl',
        color_name='cyl',
        color_is_cat=True,
    )
    py.iplot(fig, filename='facet_grid_mpg_default_colors')
    ```

    Example 4: Sequential Coloring
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    tips = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/tips.csv')

    fig = ff.create_facet_grid(
        tips,
        x='total_bill',
        y='tip',
        facet_row='sex',
        facet_col='smoker',
        color_name='size',
        colormap='Viridis',
    )
    py.iplot(fig, filename='facet_grid_tips_sequential_colors')
    ```

    Example 5: Custom labels
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv')

    fig = ff.create_facet_grid(
        mtcars,
        x='wt',
        y='mpg',
        facet_col='cyl',
        facet_col_labels={4: "$\\alpha$", 6: '$\\beta$', 8: '$\sqrt[y]{x}$'},
    )

    py.iplot(fig, filename='facet_grid_mtcars_custom_labels')
    ```

    Example 6: Other Trace Type
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv')

    fig = ff.create_facet_grid(
        mtcars,
        x='wt',
        facet_col='cyl',
        trace_type='histogram',
    )

    py.iplot(fig, filename='facet_grid_mtcars_other_trace_type')
    ```
    """
    if not pd:
        raise exceptions.ImportError(
            "'pandas' must be installed for this figure_factory.")

    if not isinstance(df, pd.DataFrame):
        raise exceptions.PlotlyError("You must input a pandas DataFrame.")

    # make sure all columns are of homogenous datatype
    utils.validate_dataframe(df)

    if trace_type in ['scatter', 'scattergl']:
        if not x or not y:
            raise exceptions.PlotlyError(
                "You need to input 'x' and 'y' if you are you are using a "
                "trace_type of 'scatter' or 'scattergl'.")

    for key in [x, y, facet_row, facet_col, color_name]:
        if key is not None:
            try:
                df[key]
            except KeyError:
                raise exceptions.PlotlyError(
                    "x, y, facet_row, facet_col and color_name must be keys "
                    "in your dataframe.")
    # autoscale histogram bars
    if trace_type not in ['scatter', 'scattergl']:
        scales = 'free'

    # validate scales
    if scales not in ['fixed', 'free_x', 'free_y', 'free']:
        raise exceptions.PlotlyError(
            "'scales' must be set to 'fixed', 'free_x', 'free_y' and 'free'.")

    if trace_type not in VALID_TRACE_TYPES:
        raise exceptions.PlotlyError(
            "'trace_type' must be in {}".format(VALID_TRACE_TYPES))

    if trace_type == 'histogram':
        SUBPLOT_SPACING = 0.06
    else:
        SUBPLOT_SPACING = 0.015

    # seperate kwargs for marker and else
    if 'marker' in kwargs:
        kwargs_marker = kwargs['marker']
    else:
        kwargs_marker = {}
    marker_color = kwargs_marker.pop('color', None)
    kwargs.pop('marker', None)
    kwargs_trace = kwargs

    if 'size' not in kwargs_marker:
        if ggplot2:
            kwargs_marker['size'] = 5
        else:
            kwargs_marker['size'] = 8

    if 'opacity' not in kwargs_marker:
        if not ggplot2:
            kwargs_trace['opacity'] = 0.6

    if 'line' not in kwargs_marker:
        if not ggplot2:
            kwargs_marker['line'] = {'color': 'darkgrey', 'width': 1}
        else:
            kwargs_marker['line'] = {}

    # default marker size
    if not ggplot2:
        if not marker_color:
            marker_color = 'rgb(31, 119, 180)'
    else:
        marker_color = 'rgb(0, 0, 0)'

    num_of_rows = 1
    num_of_cols = 1
    flipped_rows = False
    flipped_cols = False
    if facet_row:
        num_of_rows = len(df[facet_row].unique())
        flipped_rows = _is_flipped(num_of_rows)
        if isinstance(facet_row_labels, dict):
            for key in df[facet_row].unique():
                if key not in facet_row_labels.keys():
                    unique_keys = df[facet_row].unique().tolist()
                    raise exceptions.PlotlyError(
                        CUSTOM_LABEL_ERROR.format(unique_keys))
    if facet_col:
        num_of_cols = len(df[facet_col].unique())
        flipped_cols = _is_flipped(num_of_cols)
        if isinstance(facet_col_labels, dict):
            for key in df[facet_col].unique():
                if key not in facet_col_labels.keys():
                    unique_keys = df[facet_col].unique().tolist()
                    raise exceptions.PlotlyError(
                        CUSTOM_LABEL_ERROR.format(unique_keys))
    show_legend = False
    if color_name:
        if isinstance(df[color_name].iloc[0], str) or color_is_cat:
            show_legend = True
            if isinstance(colormap, dict):
                clrs.validate_colors_dict(colormap, 'rgb')

                for val in df[color_name].unique():
                    if val not in colormap.keys():
                        raise exceptions.PlotlyError(
                            "If using 'colormap' as a dictionary, make sure "
                            "all the values of the colormap column are in "
                            "the keys of your dictionary.")
            else:
                # use default plotly colors for dictionary
                default_colors = clrs.DEFAULT_PLOTLY_COLORS
                colormap = {}
                j = 0
                for val in df[color_name].unique():
                    if j >= len(default_colors):
                        j = 0
                    colormap[val] = default_colors[j]
                    j += 1
            fig, annotations = _facet_grid_color_categorical(
                df, x, y, facet_row, facet_col, color_name, colormap,
                num_of_rows, num_of_cols, facet_row_labels, facet_col_labels,
                trace_type, flipped_rows, flipped_cols, show_boxes,
                SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker)

        elif isinstance(df[color_name].iloc[0], Number):
            if isinstance(colormap, dict):
                show_legend = True
                clrs.validate_colors_dict(colormap, 'rgb')

                for val in df[color_name].unique():
                    if val not in colormap.keys():
                        raise exceptions.PlotlyError(
                            "If using 'colormap' as a dictionary, make sure "
                            "all the values of the colormap column are in "
                            "the keys of your dictionary.")
                fig, annotations = _facet_grid_color_categorical(
                    df, x, y, facet_row, facet_col, color_name, colormap,
                    num_of_rows, num_of_cols, facet_row_labels,
                    facet_col_labels, trace_type, flipped_rows, flipped_cols,
                    show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace,
                    kwargs_marker)

            elif isinstance(colormap, list):
                colorscale_list = colormap
                clrs.validate_colorscale(colorscale_list)

                fig, annotations = _facet_grid_color_numerical(
                    df, x, y, facet_row, facet_col, color_name,
                    colorscale_list, num_of_rows, num_of_cols,
                    facet_row_labels, facet_col_labels, trace_type,
                    flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING,
                    marker_color, kwargs_trace, kwargs_marker)
            elif isinstance(colormap, str):
                if colormap in clrs.PLOTLY_SCALES.keys():
                    colorscale_list = clrs.PLOTLY_SCALES[colormap]
                else:
                    raise exceptions.PlotlyError(
                        "If 'colormap' is a string, it must be the name "
                        "of a Plotly Colorscale. The available colorscale "
                        "names are {}".format(clrs.PLOTLY_SCALES.keys()))
                fig, annotations = _facet_grid_color_numerical(
                    df, x, y, facet_row, facet_col, color_name,
                    colorscale_list, num_of_rows, num_of_cols,
                    facet_row_labels, facet_col_labels, trace_type,
                    flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING,
                    marker_color, kwargs_trace, kwargs_marker)
            else:
                colorscale_list = clrs.PLOTLY_SCALES['Reds']
                fig, annotations = _facet_grid_color_numerical(
                    df, x, y, facet_row, facet_col, color_name,
                    colorscale_list, num_of_rows, num_of_cols,
                    facet_row_labels, facet_col_labels, trace_type,
                    flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING,
                    marker_color, kwargs_trace, kwargs_marker)

    else:
        fig, annotations = _facet_grid(
            df, x, y, facet_row, facet_col, num_of_rows, num_of_cols,
            facet_row_labels, facet_col_labels, trace_type, flipped_rows,
            flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color,
            kwargs_trace, kwargs_marker)

    if not height:
        height = max(600, 100 * num_of_rows)
    if not width:
        width = max(600, 100 * num_of_cols)

    fig['layout'].update(height=height,
                         width=width,
                         title='',
                         paper_bgcolor='rgb(251, 251, 251)')
    if ggplot2:
        fig['layout'].update(plot_bgcolor=PLOT_BGCOLOR,
                             paper_bgcolor='rgb(255, 255, 255)',
                             hovermode='closest')

    # axis titles
    x_title_annot = _axis_title_annotation(x, 'x')
    y_title_annot = _axis_title_annotation(y, 'y')

    # annotations
    annotations.append(x_title_annot)
    annotations.append(y_title_annot)

    # legend
    fig['layout']['showlegend'] = show_legend
    fig['layout']['legend']['bgcolor'] = LEGEND_COLOR
    fig['layout']['legend']['borderwidth'] = LEGEND_BORDER_WIDTH
    fig['layout']['legend']['x'] = 1.05
    fig['layout']['legend']['y'] = 1
    fig['layout']['legend']['yanchor'] = 'top'

    if show_legend:
        fig['layout']['showlegend'] = show_legend
        if ggplot2:
            if color_name:
                legend_annot = _legend_annotation(color_name)
                annotations.append(legend_annot)
            fig['layout']['margin']['r'] = 150

    # assign annotations to figure
    fig['layout']['annotations'] = annotations

    # add shaded boxes behind axis titles
    if show_boxes and ggplot2:
        _add_shapes_to_fig(fig, ANNOT_RECT_COLOR, flipped_rows, flipped_cols)

    # all xaxis and yaxis labels
    axis_labels = {'x': [], 'y': []}
    for key in fig['layout']:
        if 'xaxis' in key:
            axis_labels['x'].append(key)
        elif 'yaxis' in key:
            axis_labels['y'].append(key)

    string_number_in_data = False
    for var in [v for v in [x, y] if v]:
        if isinstance(df[var].tolist()[0], str):
            for item in df[var]:
                try:
                    int(item)
                    string_number_in_data = True
                except ValueError:
                    pass

    if string_number_in_data:
        for x_y in axis_labels.keys():
            for axis_name in axis_labels[x_y]:
                fig['layout'][axis_name]['type'] = 'category'

    if scales == 'fixed':
        fixed_axes = ['x', 'y']
    elif scales == 'free_x':
        fixed_axes = ['y']
    elif scales == 'free_y':
        fixed_axes = ['x']
    elif scales == 'free':
        fixed_axes = []

    # fixed ranges
    for x_y in fixed_axes:
        min_ranges = []
        max_ranges = []
        for trace in fig['data']:
            if trace[x_y] is not None and len(trace[x_y]) > 0:
                min_ranges.append(min(trace[x_y]))
                max_ranges.append(max(trace[x_y]))
        while None in min_ranges:
            min_ranges.remove(None)
        while None in max_ranges:
            max_ranges.remove(None)

        min_range = min(min_ranges)
        max_range = max(max_ranges)

        range_are_numbers = (isinstance(min_range, Number)
                             and isinstance(max_range, Number))

        if range_are_numbers:
            min_range = math.floor(min_range)
            max_range = math.ceil(max_range)

            # extend widen frame by 5% on each side
            min_range -= 0.05 * (max_range - min_range)
            max_range += 0.05 * (max_range - min_range)

            if x_y == 'x':
                if dtick_x:
                    dtick = dtick_x
                else:
                    dtick = math.floor(
                        (max_range - min_range) / MAX_TICKS_PER_AXIS)
            elif x_y == 'y':
                if dtick_y:
                    dtick = dtick_y
                else:
                    dtick = math.floor(
                        (max_range - min_range) / MAX_TICKS_PER_AXIS)
        else:
            dtick = 1

        for axis_title in axis_labels[x_y]:
            fig['layout'][axis_title]['dtick'] = dtick
            fig['layout'][axis_title]['ticklen'] = 0
            fig['layout'][axis_title]['zeroline'] = False
            if ggplot2:
                fig['layout'][axis_title]['tickwidth'] = 1
                fig['layout'][axis_title]['ticklen'] = 4
                fig['layout'][axis_title]['gridwidth'] = GRID_WIDTH

                fig['layout'][axis_title]['gridcolor'] = GRID_COLOR
                fig['layout'][axis_title]['gridwidth'] = 2
                fig['layout'][axis_title]['tickfont'] = {
                    'color': TICK_COLOR,
                    'size': 10
                }

        # insert ranges into fig
        if x_y in fixed_axes:
            for key in fig['layout']:
                if '{}axis'.format(x_y) in key and range_are_numbers:
                    fig['layout'][key]['range'] = [min_range, max_range]

    return fig
Ejemplo n.º 2
0
def create_gantt(
    df,
    colors=None,
    index_col=None,
    show_colorbar=False,
    reverse_colors=False,
    title="Gantt Chart",
    bar_width=0.2,
    showgrid_x=False,
    showgrid_y=False,
    height=600,
    width=None,
    tasks=None,
    task_names=None,
    data=None,
    group_tasks=False,
    show_hover_fill=True,
):
    """
    Returns figure for a gantt chart

    :param (array|list) df: input data for gantt chart. Must be either a
        a dataframe or a list. If dataframe, the columns must include
        'Task', 'Start' and 'Finish'. Other columns can be included and
        used for indexing. If a list, its elements must be dictionaries
        with the same required column headers: 'Task', 'Start' and
        'Finish'.
    :param (str|list|dict|tuple) colors: either a plotly scale name, an
        rgb or hex color, a color tuple or a list of colors. An rgb color
        is of the form 'rgb(x, y, z)' where x, y, z belong to the interval
        [0, 255] and a color tuple is a tuple of the form (a, b, c) where
        a, b and c belong to [0, 1]. If colors is a list, it must
        contain the valid color types aforementioned as its members.
        If a dictionary, all values of the indexing column must be keys in
        colors.
    :param (str|float) index_col: the column header (if df is a data
        frame) that will function as the indexing column. If df is a list,
        index_col must be one of the keys in all the items of df.
    :param (bool) show_colorbar: determines if colorbar will be visible.
        Only applies if values in the index column are numeric.
    :param (bool) show_hover_fill: enables/disables the hovertext for the
        filled area of the chart.
    :param (bool) reverse_colors: reverses the order of selected colors
    :param (str) title: the title of the chart
    :param (float) bar_width: the width of the horizontal bars in the plot
    :param (bool) showgrid_x: show/hide the x-axis grid
    :param (bool) showgrid_y: show/hide the y-axis grid
    :param (float) height: the height of the chart
    :param (float) width: the width of the chart

    Example 1: Simple Gantt Chart

    >>> from plotly.figure_factory import create_gantt

    >>> # Make data for chart
    >>> df = [dict(Task="Job A", Start='2009-01-01', Finish='2009-02-30'),
    ...       dict(Task="Job B", Start='2009-03-05', Finish='2009-04-15'),
    ...       dict(Task="Job C", Start='2009-02-20', Finish='2009-05-30')]

    >>> # Create a figure
    >>> fig = create_gantt(df)
    >>> fig.show()


    Example 2: Index by Column with Numerical Entries

    >>> from plotly.figure_factory import create_gantt

    >>> # Make data for chart
    >>> df = [dict(Task="Job A", Start='2009-01-01',
    ...            Finish='2009-02-30', Complete=10),
    ...       dict(Task="Job B", Start='2009-03-05',
    ...            Finish='2009-04-15', Complete=60),
    ...       dict(Task="Job C", Start='2009-02-20',
    ...            Finish='2009-05-30', Complete=95)]

    >>> # Create a figure with Plotly colorscale
    >>> fig = create_gantt(df, colors='Blues', index_col='Complete',
    ...                    show_colorbar=True, bar_width=0.5,
    ...                    showgrid_x=True, showgrid_y=True)
    >>> fig.show()


    Example 3: Index by Column with String Entries

    >>> from plotly.figure_factory import create_gantt

    >>> # Make data for chart
    >>> df = [dict(Task="Job A", Start='2009-01-01',
    ...            Finish='2009-02-30', Resource='Apple'),
    ...       dict(Task="Job B", Start='2009-03-05',
    ...            Finish='2009-04-15', Resource='Grape'),
    ...       dict(Task="Job C", Start='2009-02-20',
    ...            Finish='2009-05-30', Resource='Banana')]

    >>> # Create a figure with Plotly colorscale
    >>> fig = create_gantt(df, colors=['rgb(200, 50, 25)', (1, 0, 1), '#6c4774'],
    ...                    index_col='Resource', reverse_colors=True,
    ...                    show_colorbar=True)
    >>> fig.show()


    Example 4: Use a dictionary for colors

    >>> from plotly.figure_factory import create_gantt
    >>> # Make data for chart
    >>> df = [dict(Task="Job A", Start='2009-01-01',
    ...            Finish='2009-02-30', Resource='Apple'),
    ...       dict(Task="Job B", Start='2009-03-05',
    ...            Finish='2009-04-15', Resource='Grape'),
    ...       dict(Task="Job C", Start='2009-02-20',
    ...            Finish='2009-05-30', Resource='Banana')]

    >>> # Make a dictionary of colors
    >>> colors = {'Apple': 'rgb(255, 0, 0)',
    ...           'Grape': 'rgb(170, 14, 200)',
    ...           'Banana': (1, 1, 0.2)}

    >>> # Create a figure with Plotly colorscale
    >>> fig = create_gantt(df, colors=colors, index_col='Resource',
    ...                    show_colorbar=True)

    >>> fig.show()

    Example 5: Use a pandas dataframe

    >>> from plotly.figure_factory import create_gantt
    >>> import pandas as pd

    >>> # Make data as a dataframe
    >>> df = pd.DataFrame([['Run', '2010-01-01', '2011-02-02', 10],
    ...                    ['Fast', '2011-01-01', '2012-06-05', 55],
    ...                    ['Eat', '2012-01-05', '2013-07-05', 94]],
    ...                   columns=['Task', 'Start', 'Finish', 'Complete'])

    >>> # Create a figure with Plotly colorscale
    >>> fig = create_gantt(df, colors='Blues', index_col='Complete',
    ...                    show_colorbar=True, bar_width=0.5,
    ...                    showgrid_x=True, showgrid_y=True)
    >>> fig.show()
    """
    # validate gantt input data
    chart = validate_gantt(df)

    if index_col:
        if index_col not in chart[0]:
            raise exceptions.PlotlyError(
                "In order to use an indexing column and assign colors to "
                "the values of the index, you must choose an actual "
                "column name in the dataframe or key if a list of "
                "dictionaries is being used.")

        # validate gantt index column
        index_list = []
        for dictionary in chart:
            index_list.append(dictionary[index_col])
        utils.validate_index(index_list)

    # Validate colors
    if isinstance(colors, dict):
        colors = clrs.validate_colors_dict(colors, "rgb")
    else:
        colors = clrs.validate_colors(colors, "rgb")

    if reverse_colors is True:
        colors.reverse()

    if not index_col:
        if isinstance(colors, dict):
            raise exceptions.PlotlyError(
                "Error. You have set colors to a dictionary but have not "
                "picked an index. An index is required if you are "
                "assigning colors to particular values in a dictioanry.")
        fig = gantt(
            chart,
            colors,
            title,
            bar_width,
            showgrid_x,
            showgrid_y,
            height,
            width,
            tasks=None,
            task_names=None,
            data=None,
            group_tasks=group_tasks,
            show_hover_fill=show_hover_fill,
            show_colorbar=show_colorbar,
        )
        return fig
    else:
        if not isinstance(colors, dict):
            fig = gantt_colorscale(
                chart,
                colors,
                title,
                index_col,
                show_colorbar,
                bar_width,
                showgrid_x,
                showgrid_y,
                height,
                width,
                tasks=None,
                task_names=None,
                data=None,
                group_tasks=group_tasks,
                show_hover_fill=show_hover_fill,
            )
            return fig
        else:
            fig = gantt_dict(
                chart,
                colors,
                title,
                index_col,
                show_colorbar,
                bar_width,
                showgrid_x,
                showgrid_y,
                height,
                width,
                tasks=None,
                task_names=None,
                data=None,
                group_tasks=group_tasks,
                show_hover_fill=show_hover_fill,
            )
            return fig
Ejemplo n.º 3
0
def create_violin(
    data,
    data_header=None,
    group_header=None,
    colors=None,
    use_colorscale=False,
    group_stats=None,
    rugplot=True,
    sort=False,
    height=450,
    width=600,
    title="Violin and Rug Plot",
):
    """
    **deprecated**, use instead the plotly.graph_objects trace
    :class:`plotly.graph_objects.Violin`.

    :param (list|array) data: accepts either a list of numerical values,
        a list of dictionaries all with identical keys and at least one
        column of numeric values, or a pandas dataframe with at least one
        column of numbers.
    :param (str) data_header: the header of the data column to be used
        from an inputted pandas dataframe. Not applicable if 'data' is
        a list of numeric values.
    :param (str) group_header: applicable if grouping data by a variable.
        'group_header' must be set to the name of the grouping variable.
    :param (str|tuple|list|dict) colors: either a plotly scale name,
        an rgb or hex color, a color tuple, a list of colors or a
        dictionary. An rgb color is of the form 'rgb(x, y, z)' where
        x, y and z belong to the interval [0, 255] and a color tuple is a
        tuple of the form (a, b, c) where a, b and c belong to [0, 1].
        If colors is a list, it must contain valid color types as its
        members.
    :param (bool) use_colorscale: only applicable if grouping by another
        variable. Will implement a colorscale based on the first 2 colors
        of param colors. This means colors must be a list with at least 2
        colors in it (Plotly colorscales are accepted since they map to a
        list of two rgb colors). Default = False
    :param (dict) group_stats: a dictioanry where each key is a unique
        value from the group_header column in data. Each value must be a
        number and will be used to color the violin plots if a colorscale
        is being used.
    :param (bool) rugplot: determines if a rugplot is draw on violin plot.
        Default = True
    :param (bool) sort: determines if violins are sorted
        alphabetically (True) or by input order (False). Default = False
    :param (float) height: the height of the violin plot.
    :param (float) width: the width of the violin plot.
    :param (str) title: the title of the violin plot.

    Example 1: Single Violin Plot

    >>> from plotly.figure_factory import create_violin
    >>> import plotly.graph_objs as graph_objects

    >>> import numpy as np
    >>> from scipy import stats

    >>> # create list of random values
    >>> data_list = np.random.randn(100)

    >>> # create violin fig
    >>> fig = create_violin(data_list, colors='#604d9e')

    >>> # plot
    >>> fig.show()

    Example 2: Multiple Violin Plots with Qualitative Coloring

    >>> from plotly.figure_factory import create_violin
    >>> import plotly.graph_objs as graph_objects

    >>> import numpy as np
    >>> import pandas as pd
    >>> from scipy import stats

    >>> # create dataframe
    >>> np.random.seed(619517)
    >>> Nr=250
    >>> y = np.random.randn(Nr)
    >>> gr = np.random.choice(list("ABCDE"), Nr)
    >>> norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

    >>> for i, letter in enumerate("ABCDE"):
    ...     y[gr == letter] *=norm_params[i][1]+ norm_params[i][0]
    >>> df = pd.DataFrame(dict(Score=y, Group=gr))

    >>> # create violin fig
    >>> fig = create_violin(df, data_header='Score', group_header='Group',
    ...                    sort=True, height=600, width=1000)

    >>> # plot
    >>> fig.show()

    Example 3: Violin Plots with Colorscale

    >>> from plotly.figure_factory import create_violin
    >>> import plotly.graph_objs as graph_objects

    >>> import numpy as np
    >>> import pandas as pd
    >>> from scipy import stats

    >>> # create dataframe
    >>> np.random.seed(619517)
    >>> Nr=250
    >>> y = np.random.randn(Nr)
    >>> gr = np.random.choice(list("ABCDE"), Nr)
    >>> norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

    >>> for i, letter in enumerate("ABCDE"):
    ...     y[gr == letter] *=norm_params[i][1]+ norm_params[i][0]
    >>> df = pd.DataFrame(dict(Score=y, Group=gr))

    >>> # define header params
    >>> data_header = 'Score'
    >>> group_header = 'Group'

    >>> # make groupby object with pandas
    >>> group_stats = {}
    >>> groupby_data = df.groupby([group_header])

    >>> for group in "ABCDE":
    ...     data_from_group = groupby_data.get_group(group)[data_header]
    ...     # take a stat of the grouped data
    ...     stat = np.median(data_from_group)
    ...     # add to dictionary
    ...     group_stats[group] = stat

    >>> # create violin fig
    >>> fig = create_violin(df, data_header='Score', group_header='Group',
    ...                     height=600, width=1000, use_colorscale=True,
    ...                     group_stats=group_stats)

    >>> # plot
    >>> fig.show()
    """

    # Validate colors
    if isinstance(colors, dict):
        valid_colors = clrs.validate_colors_dict(colors, "rgb")
    else:
        valid_colors = clrs.validate_colors(colors, "rgb")

    # validate data and choose plot type
    if group_header is None:
        if isinstance(data, list):
            if len(data) <= 0:
                raise exceptions.PlotlyError("If data is a list, it must be "
                                             "nonempty and contain either "
                                             "numbers or dictionaries.")

            if not all(isinstance(element, Number) for element in data):
                raise exceptions.PlotlyError("If data is a list, it must "
                                             "contain only numbers.")

        if pd and isinstance(data, pd.core.frame.DataFrame):
            if data_header is None:
                raise exceptions.PlotlyError("data_header must be the "
                                             "column name with the "
                                             "desired numeric data for "
                                             "the violin plot.")

            data = data[data_header].values.tolist()

        # call the plotting functions
        plot_data, plot_xrange = violinplot(data,
                                            fillcolor=valid_colors[0],
                                            rugplot=rugplot)

        layout = graph_objs.Layout(
            title=title,
            autosize=False,
            font=graph_objs.layout.Font(size=11),
            height=height,
            showlegend=False,
            width=width,
            xaxis=make_XAxis("", plot_xrange),
            yaxis=make_YAxis(""),
            hovermode="closest",
        )
        layout["yaxis"].update(
            dict(showline=False, showticklabels=False, ticks=""))

        fig = graph_objs.Figure(data=plot_data, layout=layout)

        return fig

    else:
        if not isinstance(data, pd.core.frame.DataFrame):
            raise exceptions.PlotlyError("Error. You must use a pandas "
                                         "DataFrame if you are using a "
                                         "group header.")

        if data_header is None:
            raise exceptions.PlotlyError("data_header must be the column "
                                         "name with the desired numeric "
                                         "data for the violin plot.")

        if use_colorscale is False:
            if isinstance(valid_colors, dict):
                # validate colors dict choice below
                fig = violin_dict(
                    data,
                    data_header,
                    group_header,
                    valid_colors,
                    use_colorscale,
                    group_stats,
                    rugplot,
                    sort,
                    height,
                    width,
                    title,
                )
                return fig
            else:
                fig = violin_no_colorscale(
                    data,
                    data_header,
                    group_header,
                    valid_colors,
                    use_colorscale,
                    group_stats,
                    rugplot,
                    sort,
                    height,
                    width,
                    title,
                )
                return fig
        else:
            if isinstance(valid_colors, dict):
                raise exceptions.PlotlyError("The colors param cannot be "
                                             "a dictionary if you are "
                                             "using a colorscale.")

            if len(valid_colors) < 2:
                raise exceptions.PlotlyError("colors must be a list with "
                                             "at least 2 colors. A "
                                             "Plotly scale is allowed.")

            if not isinstance(group_stats, dict):
                raise exceptions.PlotlyError("Your group_stats param "
                                             "must be a dictionary.")

            fig = violin_colorscale(
                data,
                data_header,
                group_header,
                valid_colors,
                use_colorscale,
                group_stats,
                rugplot,
                sort,
                height,
                width,
                title,
            )
            return fig
Ejemplo n.º 4
0
def create_gantt(df, colors=None, index_col=None, show_colorbar=False,
                 reverse_colors=False, title='Gantt Chart', bar_width=0.2,
                 showgrid_x=False, showgrid_y=False, height=600, width=900,
                 tasks=None, task_names=None, data=None, group_tasks=False):
    """
    Returns figure for a gantt chart

    :param (array|list) df: input data for gantt chart. Must be either a
        a dataframe or a list. If dataframe, the columns must include
        'Task', 'Start' and 'Finish'. Other columns can be included and
        used for indexing. If a list, its elements must be dictionaries
        with the same required column headers: 'Task', 'Start' and
        'Finish'.
    :param (str|list|dict|tuple) colors: either a plotly scale name, an
        rgb or hex color, a color tuple or a list of colors. An rgb color
        is of the form 'rgb(x, y, z)' where x, y, z belong to the interval
        [0, 255] and a color tuple is a tuple of the form (a, b, c) where
        a, b and c belong to [0, 1]. If colors is a list, it must
        contain the valid color types aforementioned as its members.
        If a dictionary, all values of the indexing column must be keys in
        colors.
    :param (str|float) index_col: the column header (if df is a data
        frame) that will function as the indexing column. If df is a list,
        index_col must be one of the keys in all the items of df.
    :param (bool) show_colorbar: determines if colorbar will be visible.
        Only applies if values in the index column are numeric.
    :param (bool) reverse_colors: reverses the order of selected colors
    :param (str) title: the title of the chart
    :param (float) bar_width: the width of the horizontal bars in the plot
    :param (bool) showgrid_x: show/hide the x-axis grid
    :param (bool) showgrid_y: show/hide the y-axis grid
    :param (float) height: the height of the chart
    :param (float) width: the width of the chart

    Example 1: Simple Gantt Chart
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_gantt

    # Make data for chart
    df = [dict(Task="Job A", Start='2009-01-01', Finish='2009-02-30'),
          dict(Task="Job B", Start='2009-03-05', Finish='2009-04-15'),
          dict(Task="Job C", Start='2009-02-20', Finish='2009-05-30')]

    # Create a figure
    fig = create_gantt(df)

    # Plot the data
    py.iplot(fig, filename='Simple Gantt Chart', world_readable=True)
    ```

    Example 2: Index by Column with Numerical Entries
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_gantt

    # Make data for chart
    df = [dict(Task="Job A", Start='2009-01-01',
               Finish='2009-02-30', Complete=10),
          dict(Task="Job B", Start='2009-03-05',
               Finish='2009-04-15', Complete=60),
          dict(Task="Job C", Start='2009-02-20',
               Finish='2009-05-30', Complete=95)]

    # Create a figure with Plotly colorscale
    fig = create_gantt(df, colors='Blues', index_col='Complete',
                       show_colorbar=True, bar_width=0.5,
                       showgrid_x=True, showgrid_y=True)

    # Plot the data
    py.iplot(fig, filename='Numerical Entries', world_readable=True)
    ```

    Example 3: Index by Column with String Entries
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_gantt

    # Make data for chart
    df = [dict(Task="Job A", Start='2009-01-01',
               Finish='2009-02-30', Resource='Apple'),
          dict(Task="Job B", Start='2009-03-05',
               Finish='2009-04-15', Resource='Grape'),
          dict(Task="Job C", Start='2009-02-20',
               Finish='2009-05-30', Resource='Banana')]

    # Create a figure with Plotly colorscale
    fig = create_gantt(df, colors=['rgb(200, 50, 25)', (1, 0, 1), '#6c4774'],
                       index_col='Resource', reverse_colors=True,
                       show_colorbar=True)

    # Plot the data
    py.iplot(fig, filename='String Entries', world_readable=True)
    ```

    Example 4: Use a dictionary for colors
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_gantt

    # Make data for chart
    df = [dict(Task="Job A", Start='2009-01-01',
               Finish='2009-02-30', Resource='Apple'),
          dict(Task="Job B", Start='2009-03-05',
               Finish='2009-04-15', Resource='Grape'),
          dict(Task="Job C", Start='2009-02-20',
               Finish='2009-05-30', Resource='Banana')]

    # Make a dictionary of colors
    colors = {'Apple': 'rgb(255, 0, 0)',
              'Grape': 'rgb(170, 14, 200)',
              'Banana': (1, 1, 0.2)}

    # Create a figure with Plotly colorscale
    fig = create_gantt(df, colors=colors, index_col='Resource',
                       show_colorbar=True)

    # Plot the data
    py.iplot(fig, filename='dictioanry colors', world_readable=True)
    ```

    Example 5: Use a pandas dataframe
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_gantt

    import pandas as pd

    # Make data as a dataframe
    df = pd.DataFrame([['Run', '2010-01-01', '2011-02-02', 10],
                       ['Fast', '2011-01-01', '2012-06-05', 55],
                       ['Eat', '2012-01-05', '2013-07-05', 94]],
                      columns=['Task', 'Start', 'Finish', 'Complete'])

    # Create a figure with Plotly colorscale
    fig = create_gantt(df, colors='Blues', index_col='Complete',
                       show_colorbar=True, bar_width=0.5,
                       showgrid_x=True, showgrid_y=True)

    # Plot the data
    py.iplot(fig, filename='data with dataframe', world_readable=True)
    ```
    """
    # validate gantt input data
    chart = validate_gantt(df)

    if index_col:
        if index_col not in chart[0]:
            raise exceptions.PlotlyError(
                "In order to use an indexing column and assign colors to "
                "the values of the index, you must choose an actual "
                "column name in the dataframe or key if a list of "
                "dictionaries is being used.")

        # validate gantt index column
        index_list = []
        for dictionary in chart:
            index_list.append(dictionary[index_col])
        utils.validate_index(index_list)

    # Validate colors
    if isinstance(colors, dict):
        colors = clrs.validate_colors_dict(colors, 'rgb')
    else:
        colors = clrs.validate_colors(colors, 'rgb')

    if reverse_colors is True:
        colors.reverse()

    if not index_col:
        if isinstance(colors, dict):
            raise exceptions.PlotlyError(
                "Error. You have set colors to a dictionary but have not "
                "picked an index. An index is required if you are "
                "assigning colors to particular values in a dictioanry."
            )
        fig = gantt(
            chart, colors, title, bar_width, showgrid_x, showgrid_y,
            height, width, tasks=None, task_names=None, data=None,
            group_tasks=group_tasks
        )
        return fig
    else:
        if not isinstance(colors, dict):
            fig = gantt_colorscale(
                chart, colors, title, index_col, show_colorbar, bar_width,
                showgrid_x, showgrid_y, height, width,
                tasks=None, task_names=None, data=None, group_tasks=group_tasks
            )
            return fig
        else:
            fig = gantt_dict(
                chart, colors, title, index_col, show_colorbar, bar_width,
                showgrid_x, showgrid_y, height, width,
                tasks=None, task_names=None, data=None, group_tasks=group_tasks
            )
            return fig
Ejemplo n.º 5
0
def create_scatterplotmatrix(df, index=None, endpts=None, diag='scatter',
                             height=500, width=500, size=6,
                             title='Scatterplot Matrix', colormap=None,
                             colormap_type='cat', dataframe=None,
                             headers=None, index_vals=None, **kwargs):
    """
    Returns data for a scatterplot matrix.

    :param (array) df: array of the data with column headers
    :param (str) index: name of the index column in data array
    :param (list|tuple) endpts: takes an increasing sequece of numbers
        that defines intervals on the real line. They are used to group
        the entries in an index of numbers into their corresponding
        interval and therefore can be treated as categorical data
    :param (str) diag: sets the chart type for the main diagonal plots.
        The options are 'scatter', 'histogram' and 'box'.
    :param (int|float) height: sets the height of the chart
    :param (int|float) width: sets the width of the chart
    :param (float) size: sets the marker size (in px)
    :param (str) title: the title label of the scatterplot matrix
    :param (str|tuple|list|dict) colormap: either a plotly scale name,
        an rgb or hex color, a color tuple, a list of colors or a
        dictionary. An rgb color is of the form 'rgb(x, y, z)' where
        x, y and z belong to the interval [0, 255] and a color tuple is a
        tuple of the form (a, b, c) where a, b and c belong to [0, 1].
        If colormap is a list, it must contain valid color types as its
        members.
        If colormap is a dictionary, all the string entries in
        the index column must be a key in colormap. In this case, the
        colormap_type is forced to 'cat' or categorical
    :param (str) colormap_type: determines how colormap is interpreted.
        Valid choices are 'seq' (sequential) and 'cat' (categorical). If
        'seq' is selected, only the first two colors in colormap will be
        considered (when colormap is a list) and the index values will be
        linearly interpolated between those two colors. This option is
        forced if all index values are numeric.
        If 'cat' is selected, a color from colormap will be assigned to
        each category from index, including the intervals if endpts is
        being used
    :param (dict) **kwargs: a dictionary of scatterplot arguments
        The only forbidden parameters are 'size', 'color' and
        'colorscale' in 'marker'

    Example 1: Vanilla Scatterplot Matrix
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe
    df = pd.DataFrame(np.random.randn(10, 2),
                    columns=['Column 1', 'Column 2'])

    # Create scatterplot matrix
    fig = create_scatterplotmatrix(df)

    # Plot
    py.iplot(fig, filename='Vanilla Scatterplot Matrix')
    ```

    Example 2: Indexing a Column
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe with index
    df = pd.DataFrame(np.random.randn(10, 2),
                       columns=['A', 'B'])

    # Add another column of strings to the dataframe
    df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',
                             'grape', 'pear', 'pear', 'apple', 'pear'])

    # Create scatterplot matrix
    fig = create_scatterplotmatrix(df, index='Fruit', size=10)

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix with Index')
    ```

    Example 3: Styling the Diagonal Subplots
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe with index
    df = pd.DataFrame(np.random.randn(10, 4),
                       columns=['A', 'B', 'C', 'D'])

    # Add another column of strings to the dataframe
    df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',
                             'grape', 'pear', 'pear', 'apple', 'pear'])

    # Create scatterplot matrix
    fig = create_scatterplotmatrix(df, diag='box', index='Fruit', height=1000,
                                   width=1000)

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix - Diagonal Styling')
    ```

    Example 4: Use a Theme to Style the Subplots
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe with random data
    df = pd.DataFrame(np.random.randn(100, 3),
                       columns=['A', 'B', 'C'])

    # Create scatterplot matrix using a built-in
    # Plotly palette scale and indexing column 'A'
    fig = create_scatterplotmatrix(df, diag='histogram', index='A',
                                   colormap='Blues', height=800, width=800)

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix - Colormap Theme')
    ```

    Example 5: Example 4 with Interval Factoring
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe with random data
    df = pd.DataFrame(np.random.randn(100, 3),
                       columns=['A', 'B', 'C'])

    # Create scatterplot matrix using a list of 2 rgb tuples
    # and endpoints at -1, 0 and 1
    fig = create_scatterplotmatrix(df, diag='histogram', index='A',
                                   colormap=['rgb(140, 255, 50)',
                                             'rgb(170, 60, 115)', '#6c4774',
                                             (0.5, 0.1, 0.8)],
                                   endpts=[-1, 0, 1], height=800, width=800)

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix - Intervals')
    ```

    Example 6: Using the colormap as a Dictionary
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd
    import random

    # Create dataframe with random data
    df = pd.DataFrame(np.random.randn(100, 3),
                       columns=['Column A',
                                'Column B',
                                'Column C'])

    # Add new color column to dataframe
    new_column = []
    strange_colors = ['turquoise', 'limegreen', 'goldenrod']

    for j in range(100):
        new_column.append(random.choice(strange_colors))
    df['Colors'] = pd.Series(new_column, index=df.index)

    # Create scatterplot matrix using a dictionary of hex color values
    # which correspond to actual color names in 'Colors' column
    fig = create_scatterplotmatrix(
        df, diag='box', index='Colors',
        colormap= dict(
            turquoise = '#00F5FF',
            limegreen = '#32CD32',
            goldenrod = '#DAA520'
        ),
        colormap_type='cat',
        height=800, width=800
    )

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix - colormap dictionary ')
    ```
    """
    # TODO: protected until #282
    if dataframe is None:
        dataframe = []
    if headers is None:
        headers = []
    if index_vals is None:
        index_vals = []

    validate_scatterplotmatrix(df, index, diag, colormap_type, **kwargs)

    # Validate colormap
    if isinstance(colormap, dict):
        colormap = clrs.validate_colors_dict(colormap, 'rgb')
    elif isinstance(colormap, six.string_types) and 'rgb' not in colormap and '#' not in colormap:
        if colormap not in clrs.PLOTLY_SCALES.keys():
            raise exceptions.PlotlyError(
                "If 'colormap' is a string, it must be the name "
                "of a Plotly Colorscale. The available colorscale "
                "names are {}".format(clrs.PLOTLY_SCALES.keys())
            )
        else:
            # TODO change below to allow the correct Plotly colorscale
            colormap = clrs.colorscale_to_colors(clrs.PLOTLY_SCALES[colormap])
            # keep only first and last item - fix later
            colormap = [colormap[0]] + [colormap[-1]]
        colormap = clrs.validate_colors(colormap, 'rgb')
    else:
        colormap = clrs.validate_colors(colormap, 'rgb')


    if not index:
        for name in df:
            headers.append(name)
        for name in headers:
            dataframe.append(df[name].values.tolist())
        # Check for same data-type in df columns
        utils.validate_dataframe(dataframe)
        figure = scatterplot(dataframe, headers, diag, size, height, width,
                             title, **kwargs)
        return figure
    else:
        # Validate index selection
        if index not in df:
            raise exceptions.PlotlyError("Make sure you set the index "
                                         "input variable to one of the "
                                         "column names of your "
                                         "dataframe.")
        index_vals = df[index].values.tolist()
        for name in df:
            if name != index:
                headers.append(name)
        for name in headers:
            dataframe.append(df[name].values.tolist())

        # check for same data-type in each df column
        utils.validate_dataframe(dataframe)
        utils.validate_index(index_vals)

        # check if all colormap keys are in the index
        # if colormap is a dictionary
        if isinstance(colormap, dict):
            for key in colormap:
                if not all(index in colormap for index in index_vals):
                    raise exceptions.PlotlyError("If colormap is a "
                                                 "dictionary, all the "
                                                 "names in the index "
                                                 "must be keys.")
            figure = scatterplot_dict(
                dataframe, headers, diag, size, height, width, title,
                index, index_vals, endpts, colormap, colormap_type,
                **kwargs
            )
            return figure

        else:
            figure = scatterplot_theme(
                dataframe, headers, diag, size, height, width, title,
                index, index_vals, endpts, colormap, colormap_type,
                **kwargs
            )
            return figure
Ejemplo n.º 6
0
def create_facet_grid(df,
                      x=None,
                      y=None,
                      facet_row=None,
                      facet_col=None,
                      color_name=None,
                      colormap=None,
                      color_is_cat=False,
                      facet_row_labels=None,
                      facet_col_labels=None,
                      height=None,
                      width=None,
                      trace_type="scatter",
                      scales="fixed",
                      dtick_x=None,
                      dtick_y=None,
                      show_boxes=True,
                      ggplot2=False,
                      binsize=1,
                      **kwargs):
    """
    Returns figure for facet grid; **this function is deprecated**, since
    plotly.express functions should be used instead, for example

    >>> import plotly.express as px
    >>> tips = px.data.tips()
    >>> fig = px.scatter(tips, 
    ...     x='total_bill',
    ...     y='tip',
    ...     facet_row='sex',
    ...     facet_col='smoker',
    ...     color='size')


    :param (pd.DataFrame) df: the dataframe of columns for the facet grid.
    :param (str) x: the name of the dataframe column for the x axis data.
    :param (str) y: the name of the dataframe column for the y axis data.
    :param (str) facet_row: the name of the dataframe column that is used to
        facet the grid into row panels.
    :param (str) facet_col: the name of the dataframe column that is used to
        facet the grid into column panels.
    :param (str) color_name: the name of your dataframe column that will
        function as the colormap variable.
    :param (str|list|dict) colormap: the param that determines how the
        color_name column colors the data. If the dataframe contains numeric
        data, then a dictionary of colors will group the data categorically
        while a Plotly Colorscale name or a custom colorscale will treat it
        numerically. To learn more about colors and types of colormap, run
        `help(plotly.colors)`.
    :param (bool) color_is_cat: determines whether a numerical column for the
        colormap will be treated as categorical (True) or sequential (False).
            Default = False.
    :param (str|dict) facet_row_labels: set to either 'name' or a dictionary
        of all the unique values in the faceting row mapped to some text to
        show up in the label annotations. If None, labeling works like usual.
    :param (str|dict) facet_col_labels: set to either 'name' or a dictionary
        of all the values in the faceting row mapped to some text to show up
        in the label annotations. If None, labeling works like usual.
    :param (int) height: the height of the facet grid figure.
    :param (int) width: the width of the facet grid figure.
    :param (str) trace_type: decides the type of plot to appear in the
        facet grid. The options are 'scatter', 'scattergl', 'histogram',
        'bar', and 'box'.
        Default = 'scatter'.
    :param (str) scales: determines if axes have fixed ranges or not. Valid
        settings are 'fixed' (all axes fixed), 'free_x' (x axis free only),
        'free_y' (y axis free only) or 'free' (both axes free).
    :param (float) dtick_x: determines the distance between each tick on the
        x-axis. Default is None which means dtick_x is set automatically.
    :param (float) dtick_y: determines the distance between each tick on the
        y-axis. Default is None which means dtick_y is set automatically.
    :param (bool) show_boxes: draws grey boxes behind the facet titles.
    :param (bool) ggplot2: draws the facet grid in the style of `ggplot2`. See
        http://ggplot2.tidyverse.org/reference/facet_grid.html for reference.
        Default = False
    :param (int) binsize: groups all data into bins of a given length.
    :param (dict) kwargs: a dictionary of scatterplot arguments.

    Examples 1: One Way Faceting

    >>> import plotly.figure_factory as ff
    >>> import pandas as pd
    >>> mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')

    >>> fig = ff.create_facet_grid(
    ...     mpg,
    ...     x='displ',
    ...     y='cty',
    ...     facet_col='cyl',
    ... )
    >>> fig.show()

    Example 2: Two Way Faceting

    >>> import plotly.figure_factory as ff

    >>> import pandas as pd

    >>> mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')

    >>> fig = ff.create_facet_grid(
    ...     mpg,
    ...     x='displ',
    ...     y='cty',
    ...     facet_row='drv',
    ...     facet_col='cyl',
    ... )
    >>> fig.show()

    Example 3: Categorical Coloring

    >>> import plotly.figure_factory as ff
    >>> import pandas as pd
    >>> mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv')
    >>> mtcars.cyl = mtcars.cyl.astype(str)
    >>> fig = ff.create_facet_grid(
    ...     mtcars,
    ...     x='mpg',
    ...     y='wt',
    ...     facet_col='cyl',
    ...     color_name='cyl',
    ...     color_is_cat=True,
    ... )
    >>> fig.show()


    """
    if not pd:
        raise ImportError(
            "'pandas' must be installed for this figure_factory.")

    if not isinstance(df, pd.DataFrame):
        raise exceptions.PlotlyError("You must input a pandas DataFrame.")

    # make sure all columns are of homogenous datatype
    utils.validate_dataframe(df)

    if trace_type in ["scatter", "scattergl"]:
        if not x or not y:
            raise exceptions.PlotlyError(
                "You need to input 'x' and 'y' if you are you are using a "
                "trace_type of 'scatter' or 'scattergl'.")

    for key in [x, y, facet_row, facet_col, color_name]:
        if key is not None:
            try:
                df[key]
            except KeyError:
                raise exceptions.PlotlyError(
                    "x, y, facet_row, facet_col and color_name must be keys "
                    "in your dataframe.")
    # autoscale histogram bars
    if trace_type not in ["scatter", "scattergl"]:
        scales = "free"

    # validate scales
    if scales not in ["fixed", "free_x", "free_y", "free"]:
        raise exceptions.PlotlyError(
            "'scales' must be set to 'fixed', 'free_x', 'free_y' and 'free'.")

    if trace_type not in VALID_TRACE_TYPES:
        raise exceptions.PlotlyError(
            "'trace_type' must be in {}".format(VALID_TRACE_TYPES))

    if trace_type == "histogram":
        SUBPLOT_SPACING = 0.06
    else:
        SUBPLOT_SPACING = 0.015

    # seperate kwargs for marker and else
    if "marker" in kwargs:
        kwargs_marker = kwargs["marker"]
    else:
        kwargs_marker = {}
    marker_color = kwargs_marker.pop("color", None)
    kwargs.pop("marker", None)
    kwargs_trace = kwargs

    if "size" not in kwargs_marker:
        if ggplot2:
            kwargs_marker["size"] = 5
        else:
            kwargs_marker["size"] = 8

    if "opacity" not in kwargs_marker:
        if not ggplot2:
            kwargs_trace["opacity"] = 0.6

    if "line" not in kwargs_marker:
        if not ggplot2:
            kwargs_marker["line"] = {"color": "darkgrey", "width": 1}
        else:
            kwargs_marker["line"] = {}

    # default marker size
    if not ggplot2:
        if not marker_color:
            marker_color = "rgb(31, 119, 180)"
    else:
        marker_color = "rgb(0, 0, 0)"

    num_of_rows = 1
    num_of_cols = 1
    flipped_rows = False
    flipped_cols = False
    if facet_row:
        num_of_rows = len(df[facet_row].unique())
        flipped_rows = _is_flipped(num_of_rows)
        if isinstance(facet_row_labels, dict):
            for key in df[facet_row].unique():
                if key not in facet_row_labels.keys():
                    unique_keys = df[facet_row].unique().tolist()
                    raise exceptions.PlotlyError(
                        CUSTOM_LABEL_ERROR.format(unique_keys))
    if facet_col:
        num_of_cols = len(df[facet_col].unique())
        flipped_cols = _is_flipped(num_of_cols)
        if isinstance(facet_col_labels, dict):
            for key in df[facet_col].unique():
                if key not in facet_col_labels.keys():
                    unique_keys = df[facet_col].unique().tolist()
                    raise exceptions.PlotlyError(
                        CUSTOM_LABEL_ERROR.format(unique_keys))
    show_legend = False
    if color_name:
        if isinstance(df[color_name].iloc[0], str) or color_is_cat:
            show_legend = True
            if isinstance(colormap, dict):
                clrs.validate_colors_dict(colormap, "rgb")

                for val in df[color_name].unique():
                    if val not in colormap.keys():
                        raise exceptions.PlotlyError(
                            "If using 'colormap' as a dictionary, make sure "
                            "all the values of the colormap column are in "
                            "the keys of your dictionary.")
            else:
                # use default plotly colors for dictionary
                default_colors = clrs.DEFAULT_PLOTLY_COLORS
                colormap = {}
                j = 0
                for val in df[color_name].unique():
                    if j >= len(default_colors):
                        j = 0
                    colormap[val] = default_colors[j]
                    j += 1
            fig, annotations = _facet_grid_color_categorical(
                df,
                x,
                y,
                facet_row,
                facet_col,
                color_name,
                colormap,
                num_of_rows,
                num_of_cols,
                facet_row_labels,
                facet_col_labels,
                trace_type,
                flipped_rows,
                flipped_cols,
                show_boxes,
                SUBPLOT_SPACING,
                marker_color,
                kwargs_trace,
                kwargs_marker,
            )

        elif isinstance(df[color_name].iloc[0], Number):
            if isinstance(colormap, dict):
                show_legend = True
                clrs.validate_colors_dict(colormap, "rgb")

                for val in df[color_name].unique():
                    if val not in colormap.keys():
                        raise exceptions.PlotlyError(
                            "If using 'colormap' as a dictionary, make sure "
                            "all the values of the colormap column are in "
                            "the keys of your dictionary.")
                fig, annotations = _facet_grid_color_categorical(
                    df,
                    x,
                    y,
                    facet_row,
                    facet_col,
                    color_name,
                    colormap,
                    num_of_rows,
                    num_of_cols,
                    facet_row_labels,
                    facet_col_labels,
                    trace_type,
                    flipped_rows,
                    flipped_cols,
                    show_boxes,
                    SUBPLOT_SPACING,
                    marker_color,
                    kwargs_trace,
                    kwargs_marker,
                )

            elif isinstance(colormap, list):
                colorscale_list = colormap
                clrs.validate_colorscale(colorscale_list)

                fig, annotations = _facet_grid_color_numerical(
                    df,
                    x,
                    y,
                    facet_row,
                    facet_col,
                    color_name,
                    colorscale_list,
                    num_of_rows,
                    num_of_cols,
                    facet_row_labels,
                    facet_col_labels,
                    trace_type,
                    flipped_rows,
                    flipped_cols,
                    show_boxes,
                    SUBPLOT_SPACING,
                    marker_color,
                    kwargs_trace,
                    kwargs_marker,
                )
            elif isinstance(colormap, str):
                if colormap in clrs.PLOTLY_SCALES.keys():
                    colorscale_list = clrs.PLOTLY_SCALES[colormap]
                else:
                    raise exceptions.PlotlyError(
                        "If 'colormap' is a string, it must be the name "
                        "of a Plotly Colorscale. The available colorscale "
                        "names are {}".format(clrs.PLOTLY_SCALES.keys()))
                fig, annotations = _facet_grid_color_numerical(
                    df,
                    x,
                    y,
                    facet_row,
                    facet_col,
                    color_name,
                    colorscale_list,
                    num_of_rows,
                    num_of_cols,
                    facet_row_labels,
                    facet_col_labels,
                    trace_type,
                    flipped_rows,
                    flipped_cols,
                    show_boxes,
                    SUBPLOT_SPACING,
                    marker_color,
                    kwargs_trace,
                    kwargs_marker,
                )
            else:
                colorscale_list = clrs.PLOTLY_SCALES["Reds"]
                fig, annotations = _facet_grid_color_numerical(
                    df,
                    x,
                    y,
                    facet_row,
                    facet_col,
                    color_name,
                    colorscale_list,
                    num_of_rows,
                    num_of_cols,
                    facet_row_labels,
                    facet_col_labels,
                    trace_type,
                    flipped_rows,
                    flipped_cols,
                    show_boxes,
                    SUBPLOT_SPACING,
                    marker_color,
                    kwargs_trace,
                    kwargs_marker,
                )

    else:
        fig, annotations = _facet_grid(
            df,
            x,
            y,
            facet_row,
            facet_col,
            num_of_rows,
            num_of_cols,
            facet_row_labels,
            facet_col_labels,
            trace_type,
            flipped_rows,
            flipped_cols,
            show_boxes,
            SUBPLOT_SPACING,
            marker_color,
            kwargs_trace,
            kwargs_marker,
        )

    if not height:
        height = max(600, 100 * num_of_rows)
    if not width:
        width = max(600, 100 * num_of_cols)

    fig["layout"].update(height=height,
                         width=width,
                         title="",
                         paper_bgcolor="rgb(251, 251, 251)")
    if ggplot2:
        fig["layout"].update(
            plot_bgcolor=PLOT_BGCOLOR,
            paper_bgcolor="rgb(255, 255, 255)",
            hovermode="closest",
        )

    # axis titles
    x_title_annot = _axis_title_annotation(x, "x")
    y_title_annot = _axis_title_annotation(y, "y")

    # annotations
    annotations.append(x_title_annot)
    annotations.append(y_title_annot)

    # legend
    fig["layout"]["showlegend"] = show_legend
    fig["layout"]["legend"]["bgcolor"] = LEGEND_COLOR
    fig["layout"]["legend"]["borderwidth"] = LEGEND_BORDER_WIDTH
    fig["layout"]["legend"]["x"] = 1.05
    fig["layout"]["legend"]["y"] = 1
    fig["layout"]["legend"]["yanchor"] = "top"

    if show_legend:
        fig["layout"]["showlegend"] = show_legend
        if ggplot2:
            if color_name:
                legend_annot = _legend_annotation(color_name)
                annotations.append(legend_annot)
            fig["layout"]["margin"]["r"] = 150

    # assign annotations to figure
    fig["layout"]["annotations"] = annotations

    # add shaded boxes behind axis titles
    if show_boxes and ggplot2:
        _add_shapes_to_fig(fig, ANNOT_RECT_COLOR, flipped_rows, flipped_cols)

    # all xaxis and yaxis labels
    axis_labels = {"x": [], "y": []}
    for key in fig["layout"]:
        if "xaxis" in key:
            axis_labels["x"].append(key)
        elif "yaxis" in key:
            axis_labels["y"].append(key)

    string_number_in_data = False
    for var in [v for v in [x, y] if v]:
        if isinstance(df[var].tolist()[0], str):
            for item in df[var]:
                try:
                    int(item)
                    string_number_in_data = True
                except ValueError:
                    pass

    if string_number_in_data:
        for x_y in axis_labels.keys():
            for axis_name in axis_labels[x_y]:
                fig["layout"][axis_name]["type"] = "category"

    if scales == "fixed":
        fixed_axes = ["x", "y"]
    elif scales == "free_x":
        fixed_axes = ["y"]
    elif scales == "free_y":
        fixed_axes = ["x"]
    elif scales == "free":
        fixed_axes = []

    # fixed ranges
    for x_y in fixed_axes:
        min_ranges = []
        max_ranges = []
        for trace in fig["data"]:
            if trace[x_y] is not None and len(trace[x_y]) > 0:
                min_ranges.append(min(trace[x_y]))
                max_ranges.append(max(trace[x_y]))
        while None in min_ranges:
            min_ranges.remove(None)
        while None in max_ranges:
            max_ranges.remove(None)

        min_range = min(min_ranges)
        max_range = max(max_ranges)

        range_are_numbers = isinstance(min_range, Number) and isinstance(
            max_range, Number)

        if range_are_numbers:
            min_range = math.floor(min_range)
            max_range = math.ceil(max_range)

            # extend widen frame by 5% on each side
            min_range -= 0.05 * (max_range - min_range)
            max_range += 0.05 * (max_range - min_range)

            if x_y == "x":
                if dtick_x:
                    dtick = dtick_x
                else:
                    dtick = math.floor(
                        (max_range - min_range) / MAX_TICKS_PER_AXIS)
            elif x_y == "y":
                if dtick_y:
                    dtick = dtick_y
                else:
                    dtick = math.floor(
                        (max_range - min_range) / MAX_TICKS_PER_AXIS)
        else:
            dtick = 1

        for axis_title in axis_labels[x_y]:
            fig["layout"][axis_title]["dtick"] = dtick
            fig["layout"][axis_title]["ticklen"] = 0
            fig["layout"][axis_title]["zeroline"] = False
            if ggplot2:
                fig["layout"][axis_title]["tickwidth"] = 1
                fig["layout"][axis_title]["ticklen"] = 4
                fig["layout"][axis_title]["gridwidth"] = GRID_WIDTH

                fig["layout"][axis_title]["gridcolor"] = GRID_COLOR
                fig["layout"][axis_title]["gridwidth"] = 2
                fig["layout"][axis_title]["tickfont"] = {
                    "color": TICK_COLOR,
                    "size": 10,
                }

        # insert ranges into fig
        if x_y in fixed_axes:
            for key in fig["layout"]:
                if "{}axis".format(x_y) in key and range_are_numbers:
                    fig["layout"][key]["range"] = [min_range, max_range]

    return fig
Ejemplo n.º 7
0
def create_facet_grid(df, x=None, y=None, facet_row=None, facet_col=None,
                      color_name=None, colormap=None, color_is_cat=False,
                      facet_row_labels=None, facet_col_labels=None,
                      height=None, width=None, trace_type='scatter',
                      scales='fixed', dtick_x=None, dtick_y=None,
                      show_boxes=True, ggplot2=False, binsize=1, **kwargs):
    """
    Returns figure for facet grid.

    :param (pd.DataFrame) df: the dataframe of columns for the facet grid.
    :param (str) x: the name of the dataframe column for the x axis data.
    :param (str) y: the name of the dataframe column for the y axis data.
    :param (str) facet_row: the name of the dataframe column that is used to
        facet the grid into row panels.
    :param (str) facet_col: the name of the dataframe column that is used to
        facet the grid into column panels.
    :param (str) color_name: the name of your dataframe column that will
        function as the colormap variable.
    :param (str|list|dict) colormap: the param that determines how the
        color_name column colors the data. If the dataframe contains numeric
        data, then a dictionary of colors will group the data categorically
        while a Plotly Colorscale name or a custom colorscale will treat it
        numerically. To learn more about colors and types of colormap, run
        `help(plotly.colors)`.
    :param (bool) color_is_cat: determines whether a numerical column for the
        colormap will be treated as categorical (True) or sequential (False).
            Default = False.
    :param (str|dict) facet_row_labels: set to either 'name' or a dictionary
        of all the unique values in the faceting row mapped to some text to
        show up in the label annotations. If None, labeling works like usual.
    :param (str|dict) facet_col_labels: set to either 'name' or a dictionary
        of all the values in the faceting row mapped to some text to show up
        in the label annotations. If None, labeling works like usual.
    :param (int) height: the height of the facet grid figure.
    :param (int) width: the width of the facet grid figure.
    :param (str) trace_type: decides the type of plot to appear in the
        facet grid. The options are 'scatter', 'scattergl', 'histogram',
        'bar', and 'box'.
        Default = 'scatter'.
    :param (str) scales: determines if axes have fixed ranges or not. Valid
        settings are 'fixed' (all axes fixed), 'free_x' (x axis free only),
        'free_y' (y axis free only) or 'free' (both axes free).
    :param (float) dtick_x: determines the distance between each tick on the
        x-axis. Default is None which means dtick_x is set automatically.
    :param (float) dtick_y: determines the distance between each tick on the
        y-axis. Default is None which means dtick_y is set automatically.
    :param (bool) show_boxes: draws grey boxes behind the facet titles.
    :param (bool) ggplot2: draws the facet grid in the style of `ggplot2`. See
        http://ggplot2.tidyverse.org/reference/facet_grid.html for reference.
        Default = False
    :param (int) binsize: groups all data into bins of a given length.
    :param (dict) kwargs: a dictionary of scatterplot arguments.

    Examples 1: One Way Faceting
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')

    fig = ff.create_facet_grid(
        mpg,
        x='displ',
        y='cty',
        facet_col='cyl',
    )
    py.iplot(fig, filename='facet_grid_mpg_one_way_facet')
    ```

    Example 2: Two Way Faceting
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')

    fig = ff.create_facet_grid(
        mpg,
        x='displ',
        y='cty',
        facet_row='drv',
        facet_col='cyl',
    )
    py.iplot(fig, filename='facet_grid_mpg_two_way_facet')
    ```

    Example 3: Categorical Coloring
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')

    fig = ff.create_facet_grid(
        mtcars,
        x='mpg',
        y='wt',
        facet_col='cyl',
        color_name='cyl',
        color_is_cat=True,
    )
    py.iplot(fig, filename='facet_grid_mpg_default_colors')
    ```

    Example 4: Sequential Coloring
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    tips = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/tips.csv')

    fig = ff.create_facet_grid(
        tips,
        x='total_bill',
        y='tip',
        facet_row='sex',
        facet_col='smoker',
        color_name='size',
        colormap='Viridis',
    )
    py.iplot(fig, filename='facet_grid_tips_sequential_colors')
    ```

    Example 5: Custom labels
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv')

    fig = ff.create_facet_grid(
        mtcars,
        x='wt',
        y='mpg',
        facet_col='cyl',
        facet_col_labels={4: "$\\alpha$", 6: '$\\beta$', 8: '$\sqrt[y]{x}$'},
    )

    py.iplot(fig, filename='facet_grid_mtcars_custom_labels')
    ```

    Example 6: Other Trace Type
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff

    import pandas as pd

    mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv')

    fig = ff.create_facet_grid(
        mtcars,
        x='wt',
        facet_col='cyl',
        trace_type='histogram',
    )

    py.iplot(fig, filename='facet_grid_mtcars_other_trace_type')
    ```
    """
    if not pd:
        raise ImportError(
            "'pandas' must be installed for this figure_factory."
        )

    if not isinstance(df, pd.DataFrame):
        raise exceptions.PlotlyError(
            "You must input a pandas DataFrame."
        )

    # make sure all columns are of homogenous datatype
    utils.validate_dataframe(df)

    if trace_type in ['scatter', 'scattergl']:
        if not x or not y:
            raise exceptions.PlotlyError(
                "You need to input 'x' and 'y' if you are you are using a "
                "trace_type of 'scatter' or 'scattergl'."
            )

    for key in [x, y, facet_row, facet_col, color_name]:
        if key is not None:
            try:
                df[key]
            except KeyError:
                raise exceptions.PlotlyError(
                    "x, y, facet_row, facet_col and color_name must be keys "
                    "in your dataframe."
                )
    # autoscale histogram bars
    if trace_type not in ['scatter', 'scattergl']:
        scales = 'free'

    # validate scales
    if scales not in ['fixed', 'free_x', 'free_y', 'free']:
        raise exceptions.PlotlyError(
            "'scales' must be set to 'fixed', 'free_x', 'free_y' and 'free'."
        )

    if trace_type not in VALID_TRACE_TYPES:
        raise exceptions.PlotlyError(
            "'trace_type' must be in {}".format(VALID_TRACE_TYPES)
        )

    if trace_type == 'histogram':
        SUBPLOT_SPACING = 0.06
    else:
        SUBPLOT_SPACING = 0.015

    # seperate kwargs for marker and else
    if 'marker' in kwargs:
        kwargs_marker = kwargs['marker']
    else:
        kwargs_marker = {}
    marker_color = kwargs_marker.pop('color', None)
    kwargs.pop('marker', None)
    kwargs_trace = kwargs

    if 'size' not in kwargs_marker:
        if ggplot2:
            kwargs_marker['size'] = 5
        else:
            kwargs_marker['size'] = 8

    if 'opacity' not in kwargs_marker:
        if not ggplot2:
            kwargs_trace['opacity'] = 0.6

    if 'line' not in kwargs_marker:
        if not ggplot2:
            kwargs_marker['line'] = {'color': 'darkgrey', 'width': 1}
        else:
            kwargs_marker['line'] = {}

    # default marker size
    if not ggplot2:
        if not marker_color:
            marker_color = 'rgb(31, 119, 180)'
    else:
        marker_color = 'rgb(0, 0, 0)'

    num_of_rows = 1
    num_of_cols = 1
    flipped_rows = False
    flipped_cols = False
    if facet_row:
        num_of_rows = len(df[facet_row].unique())
        flipped_rows = _is_flipped(num_of_rows)
        if isinstance(facet_row_labels, dict):
            for key in df[facet_row].unique():
                if key not in facet_row_labels.keys():
                    unique_keys = df[facet_row].unique().tolist()
                    raise exceptions.PlotlyError(
                        CUSTOM_LABEL_ERROR.format(unique_keys)
                    )
    if facet_col:
        num_of_cols = len(df[facet_col].unique())
        flipped_cols = _is_flipped(num_of_cols)
        if isinstance(facet_col_labels, dict):
            for key in df[facet_col].unique():
                if key not in facet_col_labels.keys():
                    unique_keys = df[facet_col].unique().tolist()
                    raise exceptions.PlotlyError(
                        CUSTOM_LABEL_ERROR.format(unique_keys)
                    )
    show_legend = False
    if color_name:
        if isinstance(df[color_name].iloc[0], str) or color_is_cat:
            show_legend = True
            if isinstance(colormap, dict):
                clrs.validate_colors_dict(colormap, 'rgb')

                for val in df[color_name].unique():
                    if val not in colormap.keys():
                        raise exceptions.PlotlyError(
                            "If using 'colormap' as a dictionary, make sure "
                            "all the values of the colormap column are in "
                            "the keys of your dictionary."
                        )
            else:
                # use default plotly colors for dictionary
                default_colors = clrs.DEFAULT_PLOTLY_COLORS
                colormap = {}
                j = 0
                for val in df[color_name].unique():
                    if j >= len(default_colors):
                        j = 0
                    colormap[val] = default_colors[j]
                    j += 1
            fig, annotations = _facet_grid_color_categorical(
                df, x, y, facet_row, facet_col, color_name, colormap,
                num_of_rows, num_of_cols, facet_row_labels, facet_col_labels,
                trace_type, flipped_rows, flipped_cols, show_boxes,
                SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker
            )

        elif isinstance(df[color_name].iloc[0], Number):
            if isinstance(colormap, dict):
                show_legend = True
                clrs.validate_colors_dict(colormap, 'rgb')

                for val in df[color_name].unique():
                    if val not in colormap.keys():
                        raise exceptions.PlotlyError(
                            "If using 'colormap' as a dictionary, make sure "
                            "all the values of the colormap column are in "
                            "the keys of your dictionary."
                        )
                fig, annotations = _facet_grid_color_categorical(
                    df, x, y, facet_row, facet_col, color_name, colormap,
                    num_of_rows, num_of_cols, facet_row_labels,
                    facet_col_labels, trace_type, flipped_rows,
                    flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color,
                    kwargs_trace, kwargs_marker
                )

            elif isinstance(colormap, list):
                colorscale_list = colormap
                clrs.validate_colorscale(colorscale_list)

                fig, annotations = _facet_grid_color_numerical(
                    df, x, y, facet_row, facet_col, color_name,
                    colorscale_list, num_of_rows, num_of_cols,
                    facet_row_labels, facet_col_labels, trace_type,
                    flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING,
                    marker_color, kwargs_trace, kwargs_marker
                )
            elif isinstance(colormap, str):
                if colormap in clrs.PLOTLY_SCALES.keys():
                    colorscale_list = clrs.PLOTLY_SCALES[colormap]
                else:
                    raise exceptions.PlotlyError(
                        "If 'colormap' is a string, it must be the name "
                        "of a Plotly Colorscale. The available colorscale "
                        "names are {}".format(clrs.PLOTLY_SCALES.keys())
                    )
                fig, annotations = _facet_grid_color_numerical(
                    df, x, y, facet_row, facet_col, color_name,
                    colorscale_list, num_of_rows, num_of_cols,
                    facet_row_labels, facet_col_labels, trace_type,
                    flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING,
                    marker_color, kwargs_trace, kwargs_marker
                )
            else:
                colorscale_list = clrs.PLOTLY_SCALES['Reds']
                fig, annotations = _facet_grid_color_numerical(
                    df, x, y, facet_row, facet_col, color_name,
                    colorscale_list, num_of_rows, num_of_cols,
                    facet_row_labels, facet_col_labels, trace_type,
                    flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING,
                    marker_color, kwargs_trace, kwargs_marker
                )

    else:
        fig, annotations = _facet_grid(
            df, x, y, facet_row, facet_col, num_of_rows, num_of_cols,
            facet_row_labels, facet_col_labels, trace_type, flipped_rows,
            flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color,
            kwargs_trace, kwargs_marker
        )

    if not height:
        height = max(600, 100 * num_of_rows)
    if not width:
        width = max(600, 100 * num_of_cols)

    fig['layout'].update(height=height, width=width, title='',
                         paper_bgcolor='rgb(251, 251, 251)')
    if ggplot2:
        fig['layout'].update(plot_bgcolor=PLOT_BGCOLOR,
                             paper_bgcolor='rgb(255, 255, 255)',
                             hovermode='closest')

    # axis titles
    x_title_annot = _axis_title_annotation(x, 'x')
    y_title_annot = _axis_title_annotation(y, 'y')

    # annotations
    annotations.append(x_title_annot)
    annotations.append(y_title_annot)

    # legend
    fig['layout']['showlegend'] = show_legend
    fig['layout']['legend']['bgcolor'] = LEGEND_COLOR
    fig['layout']['legend']['borderwidth'] = LEGEND_BORDER_WIDTH
    fig['layout']['legend']['x'] = 1.05
    fig['layout']['legend']['y'] = 1
    fig['layout']['legend']['yanchor'] = 'top'

    if show_legend:
        fig['layout']['showlegend'] = show_legend
        if ggplot2:
            if color_name:
                legend_annot = _legend_annotation(color_name)
                annotations.append(legend_annot)
            fig['layout']['margin']['r'] = 150

    # assign annotations to figure
    fig['layout']['annotations'] = annotations

    # add shaded boxes behind axis titles
    if show_boxes and ggplot2:
        _add_shapes_to_fig(fig, ANNOT_RECT_COLOR, flipped_rows, flipped_cols)

    # all xaxis and yaxis labels
    axis_labels = {'x': [], 'y': []}
    for key in fig['layout']:
        if 'xaxis' in key:
            axis_labels['x'].append(key)
        elif 'yaxis' in key:
            axis_labels['y'].append(key)

    string_number_in_data = False
    for var in [v for v in [x, y] if v]:
        if isinstance(df[var].tolist()[0], str):
            for item in df[var]:
                try:
                    int(item)
                    string_number_in_data = True
                except ValueError:
                    pass

    if string_number_in_data:
        for x_y in axis_labels.keys():
            for axis_name in axis_labels[x_y]:
                fig['layout'][axis_name]['type'] = 'category'

    if scales == 'fixed':
        fixed_axes = ['x', 'y']
    elif scales == 'free_x':
        fixed_axes = ['y']
    elif scales == 'free_y':
        fixed_axes = ['x']
    elif scales == 'free':
        fixed_axes = []

    # fixed ranges
    for x_y in fixed_axes:
        min_ranges = []
        max_ranges = []
        for trace in fig['data']:
            if trace[x_y] is not None and len(trace[x_y]) > 0:
                min_ranges.append(min(trace[x_y]))
                max_ranges.append(max(trace[x_y]))
        while None in min_ranges:
            min_ranges.remove(None)
        while None in max_ranges:
            max_ranges.remove(None)

        min_range = min(min_ranges)
        max_range = max(max_ranges)

        range_are_numbers = (isinstance(min_range, Number) and
                             isinstance(max_range, Number))

        if range_are_numbers:
            min_range = math.floor(min_range)
            max_range = math.ceil(max_range)

            # extend widen frame by 5% on each side
            min_range -= 0.05 * (max_range - min_range)
            max_range += 0.05 * (max_range - min_range)

            if x_y == 'x':
                if dtick_x:
                    dtick = dtick_x
                else:
                    dtick = math.floor(
                        (max_range - min_range) / MAX_TICKS_PER_AXIS
                    )
            elif x_y == 'y':
                if dtick_y:
                    dtick = dtick_y
                else:
                    dtick = math.floor(
                        (max_range - min_range) / MAX_TICKS_PER_AXIS
                    )
        else:
            dtick = 1

        for axis_title in axis_labels[x_y]:
            fig['layout'][axis_title]['dtick'] = dtick
            fig['layout'][axis_title]['ticklen'] = 0
            fig['layout'][axis_title]['zeroline'] = False
            if ggplot2:
                fig['layout'][axis_title]['tickwidth'] = 1
                fig['layout'][axis_title]['ticklen'] = 4
                fig['layout'][axis_title]['gridwidth'] = GRID_WIDTH

                fig['layout'][axis_title]['gridcolor'] = GRID_COLOR
                fig['layout'][axis_title]['gridwidth'] = 2
                fig['layout'][axis_title]['tickfont'] = {
                    'color': TICK_COLOR, 'size': 10
                }

        # insert ranges into fig
        if x_y in fixed_axes:
            for key in fig['layout']:
                if '{}axis'.format(x_y) in key and range_are_numbers:
                    fig['layout'][key]['range'] = [min_range, max_range]

    return fig
Ejemplo n.º 8
0
def create_violin(data, data_header=None, group_header=None, colors=None,
                  use_colorscale=False, group_stats=None, rugplot=True,
                  sort=False, height=450, width=600,
                  title='Violin and Rug Plot'):
    """
    Returns figure for a violin plot

    :param (list|array) data: accepts either a list of numerical values,
        a list of dictionaries all with identical keys and at least one
        column of numeric values, or a pandas dataframe with at least one
        column of numbers.
    :param (str) data_header: the header of the data column to be used
        from an inputted pandas dataframe. Not applicable if 'data' is
        a list of numeric values.
    :param (str) group_header: applicable if grouping data by a variable.
        'group_header' must be set to the name of the grouping variable.
    :param (str|tuple|list|dict) colors: either a plotly scale name,
        an rgb or hex color, a color tuple, a list of colors or a
        dictionary. An rgb color is of the form 'rgb(x, y, z)' where
        x, y and z belong to the interval [0, 255] and a color tuple is a
        tuple of the form (a, b, c) where a, b and c belong to [0, 1].
        If colors is a list, it must contain valid color types as its
        members.
    :param (bool) use_colorscale: only applicable if grouping by another
        variable. Will implement a colorscale based on the first 2 colors
        of param colors. This means colors must be a list with at least 2
        colors in it (Plotly colorscales are accepted since they map to a
        list of two rgb colors). Default = False
    :param (dict) group_stats: a dictioanry where each key is a unique
        value from the group_header column in data. Each value must be a
        number and will be used to color the violin plots if a colorscale
        is being used.
    :param (bool) rugplot: determines if a rugplot is draw on violin plot.
        Default = True
    :param (bool) sort: determines if violins are sorted
        alphabetically (True) or by input order (False). Default = False
    :param (float) height: the height of the violin plot.
    :param (float) width: the width of the violin plot.
    :param (str) title: the title of the violin plot.

    Example 1: Single Violin Plot
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    from scipy import stats

    # create list of random values
    data_list = np.random.randn(100)
    data_list.tolist()

    # create violin fig
    fig = create_violin(data_list, colors='#604d9e')

    # plot
    py.iplot(fig, filename='Violin Plot')
    ```

    Example 2: Multiple Violin Plots with Qualitative Coloring
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    import pandas as pd
    from scipy import stats

    # create dataframe
    np.random.seed(619517)
    Nr=250
    y = np.random.randn(Nr)
    gr = np.random.choice(list("ABCDE"), Nr)
    norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

    for i, letter in enumerate("ABCDE"):
        y[gr == letter] *=norm_params[i][1]+ norm_params[i][0]
    df = pd.DataFrame(dict(Score=y, Group=gr))

    # create violin fig
    fig = create_violin(df, data_header='Score', group_header='Group',
                        sort=True, height=600, width=1000)

    # plot
    py.iplot(fig, filename='Violin Plot with Coloring')
    ```

    Example 3: Violin Plots with Colorscale
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    import pandas as pd
    from scipy import stats

    # create dataframe
    np.random.seed(619517)
    Nr=250
    y = np.random.randn(Nr)
    gr = np.random.choice(list("ABCDE"), Nr)
    norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

    for i, letter in enumerate("ABCDE"):
        y[gr == letter] *=norm_params[i][1]+ norm_params[i][0]
    df = pd.DataFrame(dict(Score=y, Group=gr))

    # define header params
    data_header = 'Score'
    group_header = 'Group'

    # make groupby object with pandas
    group_stats = {}
    groupby_data = df.groupby([group_header])

    for group in "ABCDE":
        data_from_group = groupby_data.get_group(group)[data_header]
        # take a stat of the grouped data
        stat = np.median(data_from_group)
        # add to dictionary
        group_stats[group] = stat

    # create violin fig
    fig = create_violin(df, data_header='Score', group_header='Group',
                        height=600, width=1000, use_colorscale=True,
                        group_stats=group_stats)

    # plot
    py.iplot(fig, filename='Violin Plot with Colorscale')
    ```
    """

    # Validate colors
    if isinstance(colors, dict):
        valid_colors = clrs.validate_colors_dict(colors, 'rgb')
    else:
        valid_colors = clrs.validate_colors(colors, 'rgb')

    # validate data and choose plot type
    if group_header is None:
        if isinstance(data, list):
            if len(data) <= 0:
                raise exceptions.PlotlyError("If data is a list, it must be "
                                             "nonempty and contain either "
                                             "numbers or dictionaries.")

            if not all(isinstance(element, Number) for element in data):
                raise exceptions.PlotlyError("If data is a list, it must "
                                             "contain only numbers.")

        if pd and isinstance(data, pd.core.frame.DataFrame):
            if data_header is None:
                raise exceptions.PlotlyError("data_header must be the "
                                             "column name with the "
                                             "desired numeric data for "
                                             "the violin plot.")

            data = data[data_header].values.tolist()

        # call the plotting functions
        plot_data, plot_xrange = violinplot(data, fillcolor=valid_colors[0],
                                            rugplot=rugplot)

        layout = graph_objs.Layout(
            title=title,
            autosize=False,
            font=graph_objs.layout.Font(size=11),
            height=height,
            showlegend=False,
            width=width,
            xaxis=make_XAxis('', plot_xrange),
            yaxis=make_YAxis(''),
            hovermode='closest'
        )
        layout['yaxis'].update(dict(showline=False,
                                    showticklabels=False,
                                    ticks=''))

        fig = graph_objs.Figure(data=plot_data,
                                layout=layout)

        return fig

    else:
        if not isinstance(data, pd.core.frame.DataFrame):
            raise exceptions.PlotlyError("Error. You must use a pandas "
                                         "DataFrame if you are using a "
                                         "group header.")

        if data_header is None:
            raise exceptions.PlotlyError("data_header must be the column "
                                         "name with the desired numeric "
                                         "data for the violin plot.")

        if use_colorscale is False:
            if isinstance(valid_colors, dict):
                # validate colors dict choice below
                fig = violin_dict(
                    data, data_header, group_header, valid_colors,
                    use_colorscale, group_stats, rugplot, sort,
                    height, width, title
                )
                return fig
            else:
                fig = violin_no_colorscale(
                    data, data_header, group_header, valid_colors,
                    use_colorscale, group_stats, rugplot, sort,
                    height, width, title
                )
                return fig
        else:
            if isinstance(valid_colors, dict):
                raise exceptions.PlotlyError("The colors param cannot be "
                                             "a dictionary if you are "
                                             "using a colorscale.")

            if len(valid_colors) < 2:
                raise exceptions.PlotlyError("colors must be a list with "
                                             "at least 2 colors. A "
                                             "Plotly scale is allowed.")

            if not isinstance(group_stats, dict):
                raise exceptions.PlotlyError("Your group_stats param "
                                             "must be a dictionary.")

            fig = violin_colorscale(
                data, data_header, group_header, valid_colors,
                use_colorscale, group_stats, rugplot, sort, height,
                width, title
            )
            return fig
Ejemplo n.º 9
0
def create_project_gantt(
    df,
    colors,
    title="Gantt Chart",
    height=None,
    width=None,
    bar_width=0.2,
    showgrid_x=False,
    showgrid_y=False,
    task_length=27,
    index_col='Resource',
    showlegend=True
):
    # validate gantt input data
    chart = validate_gantt(df)

    if index_col:
        if index_col not in chart[0]:
            raise exceptions.PlotlyError(
                "In order to use an indexing column and assign colors to "
                "the values of the index, you must choose an actual "
                "column name in the dataframe or key if a list of "
                "dictionaries is being used."
            )

        # validate gantt index column
        index_list = []
        for dictionary in chart:
            index_list.append(dictionary[index_col])
        utils.validate_index(index_list)

    # Validate colors
    colors = clrs.validate_colors_dict(colors, "rgb")

    hoverinfo = "text"

    scatter_data_template = {
        "x": [],
        "y": [],
        "mode": "none",
        "fill": "toself",
        "taskname": "",
        "percent": 0.0,
        "hoverinfo": hoverinfo,
        "legendgroup": "",
    }

    marker_data_template = {
        "x": [],
        "y": [],
        "mode": "markers",
        "text": [],
        "marker": dict(color="", size=1, opacity=0),
        "name": "",
        "showlegend": False,
    }

    # create a scatter trace for every task
    scatter_data_dict = OrderedDict()

    # create scatter traces for the start- and endpoints
    marker_data_dict = OrderedDict()

    tasks = []
    task_names = []
    index_vals = []

    # Generate list of tasks
    for index in range(len(chart)):
        task = dict(
            x0=chart[index]["Start"],
            x1=chart[index]["Finish"],
            resource=chart[index]["Resource"],
            percent = chart[index]["Percent"]
        )
        if len(chart[index]["Task"]) > task_length:
            str_break = task_length
            for i in range(task_length, 0, -1):
                if chart[index]["Task"][i] == ' ':
                    str_break = i
                    break
            name = chart[index]["Task"][0:str_break]
            name += '...'
        else:
            name = chart[index]["Task"]
        task["name"] = name

        if chart[index]['Description']:
            task["description"] = chart[index]["Description"]
        else:
            task["description"] = chart[index]["Task"]
        tasks.append(task)

        # Make sure the resource column has an associated color
        if task['resource'] not in colors:
            raise exceptions.PlotlyError(
                "If you are using colors as a dictionary, all of its "
                "keys must be all the values in the index column."
            )

    # create the list of task names
    for index in range(len(tasks)):
        tn = tasks[index]["name"]
        if tn not in task_names:
            task_names.append(tn)
            # Shorten task names if needed?

    for index in range(len(tasks)):
        # del tasks[index]["name"]

        # Separate task bars by index
        tasks[index]["y0"] = index - bar_width
        tasks[index]["y1"] = index + bar_width

        # Get the fill color from the color dictionary
        # tasks[index]["fillcolor"] = colors[chart[index][index_col]]
        # color_id = tasks[index]["fillcolor"]

        scatter_data_dict[index] = copy.deepcopy(scatter_data_template)

        color = colors[chart[index][index_col]]
        scatter_data_dict[index]["legendgroup"] = color
        scatter_data_dict[index]["fillcolor"] = color
        scatter_data_dict[index]["text"] = tasks[index]['description']
        scatter_data_dict[index]["percent"] = tasks[index]['percent']
        scatter_data_dict[index]["taskname"] = tasks[index]["name"]  # Only used for processing at the end

        # if this is the first instance of the group name appearing, make sure to put it in the legend
        group = tasks[index]['resource']
        if group not in index_vals and showlegend:
            scatter_data_dict[index]["name"] = group
            scatter_data_dict[index]["showlegend"] = True
            index_vals.append(group)
        else:
            scatter_data_dict[index]["name"] = None
            scatter_data_dict[index]["showlegend"] = False

        xs, ys = _get_corner_points(
            tasks[index]["x0"],
            tasks[index]["y0"],
            tasks[index]["x1"],
            tasks[index]["y1"],
        )

        scatter_data_dict[index]["x"] += xs
        scatter_data_dict[index]["y"] += ys

        # append dummy markers for showing start and end of interval
        marker_data_dict[index] = copy.deepcopy(marker_data_template)
        marker_data_dict[index]["marker"]["color"] = color
        marker_data_dict[index]["legendgroup"] = color

        marker_data_dict[index]["x"].append(tasks[index]["x0"])
        marker_data_dict[index]["x"].append(tasks[index]["x1"])
        marker_data_dict[index]["y"].append(index)
        marker_data_dict[index]["y"].append(index)

        marker_data_dict[index]["text"].append(tasks[index]["description"])
        marker_data_dict[index]["text"].append(tasks[index]["description"])

    layout = dict(
        title=title,
        showlegend=True,
        height=height,
        width=width,
        shapes=[],
        hovermode="closest",
        yaxis=dict(
            showgrid=showgrid_y,
            ticktext=task_names,
            tickvals=list(range(len(task_names))),
            range=[-1, len(task_names) + 1],
            autorange=False,
            zeroline=False,
        ),
        xaxis=dict(
            showgrid=showgrid_x,
            zeroline=False,
            rangeselector=dict(
                buttons=list(
                    [
                        dict(count=7, label="1w", step="day", stepmode="backward"),
                        dict(count=1, label="1m", step="month", stepmode="backward"),
                        dict(count=6, label="6m", step="month", stepmode="backward"),
                        dict(count=1, label="YTD", step="year", stepmode="todate"),
                        dict(count=1, label="1y", step="year", stepmode="backward"),
                        dict(step="all"),
                    ]
                )
            ),
            type="date",
        ),
    )

    data = [scatter_data_dict[k] for k in scatter_data_dict]
    data += [marker_data_dict[k] for k in marker_data_dict]

    percent_data = []
    rgb_pattern = re.compile('rgb\((?P<r>[0-9]+), (?P<g>[0-9]+), (?P<b>[0-9]+)\)')
    for item in data:
        if 'taskname' in item:
            entry = copy.deepcopy(item)
            match = re.match(rgb_pattern, entry['fillcolor'])
            if match:
                colors = [int(match.groupdict()['r']), int(match.groupdict()['g']), int(match.groupdict()['b'])]
                new_colors = []
                for i in range(0, len(colors)):
                    new_colors.append(int(max(0, floor((float(colors[i]) - (float(colors[i]) * 0.35))))))
                entry['fillcolor'] = 'rgb({r}, {g}, {b})'.format(r=new_colors[0], g=new_colors[1], b=new_colors[2])
            else:
                entry['fillcolor'] = '#000000'
            entry['name'] = ''
            entry['showlegend'] = False

            start = datetime.strptime(entry['x'][0], '%Y-%m-%d')
            end = datetime.strptime(entry['x'][1], '%Y-%m-%d')
            delta = (end - start).total_seconds()
            delta = int(delta * item['percent'])
            days = delta // 86400  # seconds per day
            hours = (delta - (days * 86400)) // 3600  # seconds per hour
            end = start + timedelta(days=days, hours=hours)
            end_date = str(adjust_end_date(end))

            entry['x'][1] = end_date
            entry['x'][2] = end_date
            percent_data.append(entry)
    data += percent_data

    fig = go.Figure(data=data, layout=layout)
    return fig
Ejemplo n.º 10
0
def create_facet_grid(df,
                      x=None,
                      y=None,
                      facet_row=None,
                      facet_col=None,
                      color_name=None,
                      colormap=None,
                      color_is_cat=False,
                      facet_row_labels=None,
                      facet_col_labels=None,
                      height=None,
                      width=None,
                      trace_type='scatter',
                      hide_x_labels=False,
                      hide_y_labels=False,
                      scales='fixed',
                      dtick_x=None,
                      dtick_y=None,
                      text_name=None,
                      show_boxes=True,
                      ggplot2=False,
                      binsize=1,
                      jitter=0,
                      **kwargs):
    """
    Returns figure for facet grid.
    :param (pd.DataFrame) df: the dataframe of columns for the facet grid.
    :param (str) x: the name of the dataframe column for the x axis data.
    :param (str) y: the name of the dataframe column for the y axis data.
    :param (str) facet_row: the name of the dataframe column that is used to
        facet the grid into row panels.
    :param (str) facet_col: the name of the dataframe column that is used to
        facet the grid into column panels.
    :param (str) color_name: the name of your dataframe column that will
        function as the colormap variable.
    :param (str|list|dict) colormap: the param that determines how the
        color_name column colors the data. If the dataframe contains numeric
        data, then a dictionary of colors will group the data categorically
        while a Plotly Colorscale name or a custom colorscale will treat it
        numerically. To learn more about colors and types of colormap, run
        `help(plotly.colors)`.
    :param (bool) color_is_cat: determines whether a numerical column for the
        colormap will be treated as categorical (True) or sequential (False).
            Default = False.
    :param (str|dict) facet_row_labels: set to either 'name' or a dictionary
        of all the unique values in the faceting row mapped to some text to
        show up in the label annotations. If None, labeling works like usual.
    :param (str|dict) facet_col_labels: set to either 'name' or a dictionary
        of all the values in the faceting row mapped to some text to show up
        in the label annotations. If None, labeling works like usual.
    :param (int) height: the height of the facet grid figure.
    :param (int) width: the width of the facet grid figure.
    :param (str) trace_type: decides the type of plot to appear in the
        facet grid. The options are 'scatter', 'scattergl', 'histogram',
        'bar', and 'box'.
        Default = 'scatter'.
    :param (str) scales: determines if axes have fixed ranges or not. Valid
        settings are 'fixed' (all axes fixed), 'free_x' (x axis free only),
        'free_y' (y axis free only) or 'free' (both axes free).
    :param (float) dtick_x: determines the distance between each tick on the
        x-axis. Default is None which means dtick_x is set automatically.
    :param (float) dtick_y: determines the distance between each tick on the
        y-axis. Default is None which means dtick_y is set automatically.
    :param (bool) show_boxes: draws grey boxes behind the facet titles.
    :param (bool) ggplot2: draws the facet grid in the style of `ggplot2`. See
        http://ggplot2.tidyverse.org/reference/facet_grid.html for reference.
        Default = False
    :param (int) binsize: groups all data into bins of a given length.
    :param (int) jitter: Amount to offset an individual categorical x-axis
        datapoint.  The higher the number, the more extreme the jitter
        Default: 0 (no jitter)
    :param (dict) kwargs: a dictionary of scatterplot arguments.
    Examples 1: One Way Faceting
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff
    import pandas as pd
    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')
    fig = ff.create_facet_grid(
        mpg,
        x='displ',
        y='cty',
        facet_col='cyl',
    )
    py.iplot(fig, filename='facet_grid_mpg_one_way_facet')
    ```
    Example 2: Two Way Faceting
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff
    import pandas as pd
    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')
    fig = ff.create_facet_grid(
        mpg,
        x='displ',
        y='cty',
        facet_row='drv',
        facet_col='cyl',
    )
    py.iplot(fig, filename='facet_grid_mpg_two_way_facet')
    ```
    Example 3: Categorical Coloring
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff
    import pandas as pd
    mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt')
    fig = ff.create_facet_grid(
        mtcars,
        x='mpg',
        y='wt',
        facet_col='cyl',
        color_name='cyl',
        color_is_cat=True,
    )
    py.iplot(fig, filename='facet_grid_mpg_default_colors')
    ```
    Example 4: Sequential Coloring
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff
    import pandas as pd
    tips = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/tips.csv')
    fig = ff.create_facet_grid(
        tips,
        x='total_bill',
        y='tip',
        facet_row='sex',
        facet_col='smoker',
        color_name='size',
        colormap='Viridis',
    )
    py.iplot(fig, filename='facet_grid_tips_sequential_colors')
    ```
    Example 5: Custom labels
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff
    import pandas as pd
    mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv')
    fig = ff.create_facet_grid(
        mtcars,
        x='wt',
        y='mpg',
        facet_col='cyl',
        facet_col_labels={4: "$\\alpha$", 6: '$\\beta$', 8: '$\sqrt[y]{x}$'},
    )
    py.iplot(fig, filename='facet_grid_mtcars_custom_labels')
    ```
    Example 6: Other Trace Type
    ```
    import plotly.plotly as py
    import plotly.figure_factory as ff
    import pandas as pd
    mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv')
    fig = ff.create_facet_grid(
        mtcars,
        x='wt',
        facet_col='cyl',
        trace_type='histogram',
    )
    py.iplot(fig, filename='facet_grid_mtcars_other_trace_type')
    ```
    """

    if not pd:
        raise exceptions.ImportError(
            "'pandas' must be installed for this figure_factory.")

    if not isinstance(df, pd.DataFrame):
        raise exceptions.PlotlyError("You must input a pandas DataFrame.")

    # make sure all columns are of homogenous datatype
    utils.validate_dataframe(df)

    # the tsne_dynamic trace type is an alias for scatter
    if trace_type == 'tsne_dynamic':
        trace_type = 'scatter'

    if PLOT_LOGGING:
        print("DEBUG: trace_type is: {0}".format(trace_type), file=sys.stderr)

    if trace_type in ['scatter', 'scattergl']:
        if not x or not y:
            raise exceptions.PlotlyError(
                "You need to input 'x' and 'y' if you are you are using a "
                "trace_type of 'scatter' or 'scattergl'.")

    for key in [x, y, facet_row, facet_col, color_name]:
        if key is not None:
            try:
                df[key]
            except KeyError:
                raise exceptions.PlotlyError(
                    "x, y, facet_row, facet_col and color_name must be keys "
                    "in your dataframe.")

    if trace_type not in VALID_TRACE_TYPES:
        raise exceptions.PlotlyError(
            "'trace_type' must be in {}".format(VALID_TRACE_TYPES))

    if trace_type == 'histogram' or trace_type == 'line':
        SUBPLOT_SPACING = 0.06
    else:
        SUBPLOT_SPACING = 0.015

    # seperate kwargs for marker and else
    if 'marker' in kwargs:
        kwargs_marker = kwargs['marker']
    else:
        kwargs_marker = {}
    marker_color = kwargs_marker.pop('color', None)
    kwargs.pop('marker', None)
    kwargs_trace = kwargs

    if 'size' not in kwargs_marker:
        kwargs_marker['size'] = 3

    # Bar plots do not accept size markers
    if trace_type == 'bar':
        kwargs_marker.pop('size', None)

    if 'opacity' not in kwargs_marker:
        kwargs_trace['opacity'] = 0.6

    # if 'line' not in kwargs_marker:
    # kwargs_marker['line'] = {'color': 'darkgrey', 'width': 1}

    # default marker size
    if not ggplot2:
        if not marker_color:
            marker_color = '#401362'
    else:
        marker_color = 'rgb(0, 0, 0)'

    num_of_rows = 1
    num_of_cols = 1
    flipped_rows = False
    flipped_cols = False
    if facet_row:
        num_of_rows = len(df[facet_row].unique())
        flipped_rows = _is_flipped(num_of_rows)
        if isinstance(facet_row_labels, dict):
            for key in df[facet_row].unique():
                if key not in facet_row_labels.keys():
                    unique_keys = df[facet_row].unique().tolist()
                    raise exceptions.PlotlyError(
                        CUSTOM_LABEL_ERROR.format(unique_keys))
    if facet_col:
        num_of_cols = len(df[facet_col].unique())
        flipped_cols = _is_flipped(num_of_cols)
        if isinstance(facet_col_labels, dict):
            for key in df[facet_col].unique():
                if key not in facet_col_labels.keys():
                    unique_keys = df[facet_col].unique().tolist()
                    raise exceptions.PlotlyError(
                        CUSTOM_LABEL_ERROR.format(unique_keys))

    # Set up some args to pass to _gear_facet_grid function
    show_legend = False
    colormapping = None
    color_type = None  # None, 'categorical', or 'numerical'

    # If there is a color label, use either the categorial or numerical facet grid
    if color_name:
        if isinstance(colormap, dict):
            show_legend = True
            color_type = "categorical"

            clrs.validate_colors_dict(colormap, 'rgb')

            for val in df[color_name].unique():
                if val not in colormap.keys():
                    raise exceptions.PlotlyError(
                        "If using 'colormap' as a dictionary, make sure "
                        "all the values of the colormap column are in "
                        "the keys of your dictionary.")

            colormapping = colormap
            if PLOT_LOGGING:
                print("DEBUG: Color type is 'categorical' with colormap dict",
                      file=sys.stderr)

        elif isinstance(colormap, list):
            color_type = "numerical"
            colormapping = colormap
            clrs.validate_colorscale(colormapping)
            if PLOT_LOGGING:
                print("DEBUG: Color type is 'numerical' from colormap list",
                      file=sys.stderr)
        elif isinstance(colormap, str):
            color_type = "numerical"
            if colormap in clrs.PLOTLY_SCALES.keys():
                colormapping = clrs.PLOTLY_SCALES[colormap]
            else:
                raise exceptions.PlotlyError(
                    "If 'colormap' is a string, it must be the name "
                    "of a Plotly Colorscale. The available colorscale "
                    "names are {}".format(clrs.PLOTLY_SCALES.keys()))
            if PLOT_LOGGING:
                print("DEBUG: Color type is 'numerical' from colormap string",
                      file=sys.stderr)
        else:
            if isinstance(df[color_name].iloc[0], str) or color_is_cat:
                color_type = "categorical"
                # use default plotly colors for dictionary
                default_colors = clrs.DEFAULT_PLOTLY_COLORS
                colormap = {}
                j = 0
                for val in df[color_name].unique():
                    if j >= len(default_colors):
                        j = 0
                    colormap[val] = default_colors[j]
                    j += 1
                colormapping = colormap
            else:
                color_type = "numerical"
                colormapping = [[0, 'rgb(218, 183, 193)'],
                                [0.35, 'rgb(194, 137, 166)'],
                                [0.5, 'rgb(169, 98, 151)'],
                                [0.6, 'rgb(145, 66, 143)'],
                                [0.7, 'rgb(105, 39, 122)'],
                                [1, 'rgb(63, 19, 98)']]
                if PLOT_LOGGING:
                    print("DEBUG: Color type is 'numerical' with no colormap",
                          file=sys.stderr)
    else:
        if PLOT_LOGGING:
            print("DEBUG: Color type is 'None'", file=sys.stderr)

    fig, annotations = _gear_facet_grid(
        df, x, y, facet_row, facet_col, color_name, colormapping, color_type,
        num_of_rows, num_of_cols, facet_row_labels, facet_col_labels,
        trace_type, flipped_rows, flipped_cols, SUBPLOT_SPACING, marker_color,
        text_name, jitter, kwargs_trace, kwargs_marker)

    ### General layout adjustments
    fig['layout'].update(title='', paper_bgcolor=PAPER_BGCOLOR)
    fig['layout']['hovermode'] = "closest"
    # Default "plotly" theme produces gray plot backgrounds
    fig['layout']['template'] = "none"

    # axis titles
    x_title_annot = _axis_title_annotation('', 'x')
    y_title_annot = _axis_title_annotation('', 'y')

    # annotations
    annotations.append(x_title_annot)
    annotations.append(y_title_annot)

    # all xaxis and yaxis labels
    axis_labels = {'x': [], 'y': []}
    for key in fig['layout']:
        if 'xaxis' in key:
            axis_labels['x'].append(key)
        elif 'yaxis' in key:
            axis_labels['y'].append(key)

    string_number_in_data = False
    for var in [v for v in [x, y] if v]:
        if isinstance(df[var].tolist()[0], str):
            for item in df[var]:
                try:
                    int(item)
                    string_number_in_data = True
                except ValueError:
                    pass

    # Iterated through 'x' or 'y' axis
    for x_y in axis_labels.keys():
        # Iterate through all faceted axes
        for axis_name in axis_labels[x_y]:
            # Common to both x and y
            if string_number_in_data:
                fig['layout'][axis_name]['type'] = 'category'
            fig['layout'][axis_name]['showgrid'] = False
            fig['layout'][axis_name]['automargin'] = True
            fig['layout'][axis_name]['zeroline'] = False
            # Specific axis only
            if x_y == 'x':
                if hide_x_labels:
                    #TODO: test with 'visible' attribute instead of 'showticklabels'
                    fig['layout'][axis_name]['showticklabels'] = False

                # Uniformity of tick angles if facet groupings are present
                if facet_col:
                    fig['layout'][axis_name]['tickangle'] = 270
            elif x_y == 'y':
                fig['layout'][axis_name]['hoverformat'] = '.2f'
                if hide_y_labels:
                    fig['layout'][axis_name]['showticklabels'] = False

    fig['layout']['autosize'] = True

    # legend
    fig['layout']['showlegend'] = show_legend
    fig['layout']['legend']['bgcolor'] = LEGEND_COLOR
    fig['layout']['legend']['borderwidth'] = LEGEND_BORDER_WIDTH
    fig['layout']['legend']['x'] = 1.05
    fig['layout']['legend']['y'] = 1
    fig['layout']['legend']['yanchor'] = 'top'

    # Colorbar adjustments
    if color_type == "numerical":
        fig['layout']['coloraxis'] = {
            "colorscale":
            colormapping,  # Defines the range of colors for a numerical color group
            "colorbar": {
                'x': 1.15
            },
            "showscale": True,
        }

    # Violin plot settings
    if trace_type == 'violin':
        if color_name:
            fig['layout']['violinmode'] = 'group'
        else:
            fig['layout']['violinmode'] = 'overlay'

    # assign annotations to figure
    fig['layout']['annotations'] = annotations

    # autoscale histogram bars
    if trace_type not in ['scatter', 'line', 'scattergl']:
        scales = 'free'

    # validate scales
    if scales not in ['fixed', 'free_x', 'free_y', 'free']:
        raise exceptions.PlotlyError(
            "'scales' must be set to 'fixed', 'free_x', 'free_y' and 'free'.")
    fixed_axes = None
    if scales == 'fixed':
        fixed_axes = ['x', 'y']
    elif scales == 'free_x':
        fixed_axes = ['y']
    elif scales == 'free_y':
        fixed_axes = ['x']
    elif scales == 'free':
        fixed_axes = []
    else:
        raise (
            "Invalid scale type provided.  Must be 'fixed', 'free_x', 'free_y', or 'free'"
        )

    # SAdkins - Removed checks for None and length and sparse matrix check
    # since recent edits should have all traces populated with data
    if len(fig['data']):
        # fixed ranges
        for x_y in fixed_axes:
            min_range = min(chain(*(trace[x_y] for trace in fig['data'])))
            max_range = max(chain(*(trace[x_y] for trace in fig['data'])))
            range_are_numbers = (isinstance(min_range, Number)
                                 and isinstance(max_range, Number))
            if PLOT_LOGGING:
                print("DEBUG: On axis:{0} min_range:{1} max_range:{2}".format(
                    x_y, min_range, max_range),
                      file=sys.stderr)

            user_dtick = None
            if x_y == 'x':
                user_dtick = dtick_x
            elif x_y == 'y':
                user_dtick = dtick_y

            dtick, min_range, max_range = _calculate_dtick(
                min_range, max_range, range_are_numbers, user_dtick)

            # For the given axis dimension set tick attributes
            for axis_title in axis_labels[x_y]:
                fig['layout'][axis_title]['dtick'] = dtick
                fig['layout'][axis_title]['ticklen'] = 0
                if range_are_numbers:
                    fig['layout'][axis_title]['range'] = [min_range, max_range]

    else:
        if PLOT_LOGGING:
            print("DEBUG: No trace data for current plot")

    return fig