def create_facet_grid(df, x=None, y=None, facet_row=None, facet_col=None, color_name=None, colormap=None, color_is_cat=False, facet_row_labels=None, facet_col_labels=None, height=None, width=None, trace_type='scatter', scales='fixed', dtick_x=None, dtick_y=None, show_boxes=True, ggplot2=False, binsize=1, **kwargs): """ Returns figure for facet grid. :param (pd.DataFrame) df: the dataframe of columns for the facet grid. :param (str) x: the name of the dataframe column for the x axis data. :param (str) y: the name of the dataframe column for the y axis data. :param (str) facet_row: the name of the dataframe column that is used to facet the grid into row panels. :param (str) facet_col: the name of the dataframe column that is used to facet the grid into column panels. :param (str) color_name: the name of your dataframe column that will function as the colormap variable. :param (str|list|dict) colormap: the param that determines how the color_name column colors the data. If the dataframe contains numeric data, then a dictionary of colors will group the data categorically while a Plotly Colorscale name or a custom colorscale will treat it numerically. To learn more about colors and types of colormap, run `help(plotly.colors)`. :param (bool) color_is_cat: determines whether a numerical column for the colormap will be treated as categorical (True) or sequential (False). Default = False. :param (str|dict) facet_row_labels: set to either 'name' or a dictionary of all the unique values in the faceting row mapped to some text to show up in the label annotations. If None, labeling works like usual. :param (str|dict) facet_col_labels: set to either 'name' or a dictionary of all the values in the faceting row mapped to some text to show up in the label annotations. If None, labeling works like usual. :param (int) height: the height of the facet grid figure. :param (int) width: the width of the facet grid figure. :param (str) trace_type: decides the type of plot to appear in the facet grid. The options are 'scatter', 'scattergl', 'histogram', 'bar', and 'box'. Default = 'scatter'. :param (str) scales: determines if axes have fixed ranges or not. Valid settings are 'fixed' (all axes fixed), 'free_x' (x axis free only), 'free_y' (y axis free only) or 'free' (both axes free). :param (float) dtick_x: determines the distance between each tick on the x-axis. Default is None which means dtick_x is set automatically. :param (float) dtick_y: determines the distance between each tick on the y-axis. Default is None which means dtick_y is set automatically. :param (bool) show_boxes: draws grey boxes behind the facet titles. :param (bool) ggplot2: draws the facet grid in the style of `ggplot2`. See http://ggplot2.tidyverse.org/reference/facet_grid.html for reference. Default = False :param (int) binsize: groups all data into bins of a given length. :param (dict) kwargs: a dictionary of scatterplot arguments. Examples 1: One Way Faceting ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mpg, x='displ', y='cty', facet_col='cyl', ) py.iplot(fig, filename='facet_grid_mpg_one_way_facet') ``` Example 2: Two Way Faceting ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mpg, x='displ', y='cty', facet_row='drv', facet_col='cyl', ) py.iplot(fig, filename='facet_grid_mpg_two_way_facet') ``` Example 3: Categorical Coloring ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mtcars, x='mpg', y='wt', facet_col='cyl', color_name='cyl', color_is_cat=True, ) py.iplot(fig, filename='facet_grid_mpg_default_colors') ``` Example 4: Sequential Coloring ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd tips = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/tips.csv') fig = ff.create_facet_grid( tips, x='total_bill', y='tip', facet_row='sex', facet_col='smoker', color_name='size', colormap='Viridis', ) py.iplot(fig, filename='facet_grid_tips_sequential_colors') ``` Example 5: Custom labels ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv') fig = ff.create_facet_grid( mtcars, x='wt', y='mpg', facet_col='cyl', facet_col_labels={4: "$\\alpha$", 6: '$\\beta$', 8: '$\sqrt[y]{x}$'}, ) py.iplot(fig, filename='facet_grid_mtcars_custom_labels') ``` Example 6: Other Trace Type ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv') fig = ff.create_facet_grid( mtcars, x='wt', facet_col='cyl', trace_type='histogram', ) py.iplot(fig, filename='facet_grid_mtcars_other_trace_type') ``` """ if not pd: raise exceptions.ImportError( "'pandas' must be installed for this figure_factory.") if not isinstance(df, pd.DataFrame): raise exceptions.PlotlyError("You must input a pandas DataFrame.") # make sure all columns are of homogenous datatype utils.validate_dataframe(df) if trace_type in ['scatter', 'scattergl']: if not x or not y: raise exceptions.PlotlyError( "You need to input 'x' and 'y' if you are you are using a " "trace_type of 'scatter' or 'scattergl'.") for key in [x, y, facet_row, facet_col, color_name]: if key is not None: try: df[key] except KeyError: raise exceptions.PlotlyError( "x, y, facet_row, facet_col and color_name must be keys " "in your dataframe.") # autoscale histogram bars if trace_type not in ['scatter', 'scattergl']: scales = 'free' # validate scales if scales not in ['fixed', 'free_x', 'free_y', 'free']: raise exceptions.PlotlyError( "'scales' must be set to 'fixed', 'free_x', 'free_y' and 'free'.") if trace_type not in VALID_TRACE_TYPES: raise exceptions.PlotlyError( "'trace_type' must be in {}".format(VALID_TRACE_TYPES)) if trace_type == 'histogram': SUBPLOT_SPACING = 0.06 else: SUBPLOT_SPACING = 0.015 # seperate kwargs for marker and else if 'marker' in kwargs: kwargs_marker = kwargs['marker'] else: kwargs_marker = {} marker_color = kwargs_marker.pop('color', None) kwargs.pop('marker', None) kwargs_trace = kwargs if 'size' not in kwargs_marker: if ggplot2: kwargs_marker['size'] = 5 else: kwargs_marker['size'] = 8 if 'opacity' not in kwargs_marker: if not ggplot2: kwargs_trace['opacity'] = 0.6 if 'line' not in kwargs_marker: if not ggplot2: kwargs_marker['line'] = {'color': 'darkgrey', 'width': 1} else: kwargs_marker['line'] = {} # default marker size if not ggplot2: if not marker_color: marker_color = 'rgb(31, 119, 180)' else: marker_color = 'rgb(0, 0, 0)' num_of_rows = 1 num_of_cols = 1 flipped_rows = False flipped_cols = False if facet_row: num_of_rows = len(df[facet_row].unique()) flipped_rows = _is_flipped(num_of_rows) if isinstance(facet_row_labels, dict): for key in df[facet_row].unique(): if key not in facet_row_labels.keys(): unique_keys = df[facet_row].unique().tolist() raise exceptions.PlotlyError( CUSTOM_LABEL_ERROR.format(unique_keys)) if facet_col: num_of_cols = len(df[facet_col].unique()) flipped_cols = _is_flipped(num_of_cols) if isinstance(facet_col_labels, dict): for key in df[facet_col].unique(): if key not in facet_col_labels.keys(): unique_keys = df[facet_col].unique().tolist() raise exceptions.PlotlyError( CUSTOM_LABEL_ERROR.format(unique_keys)) show_legend = False if color_name: if isinstance(df[color_name].iloc[0], str) or color_is_cat: show_legend = True if isinstance(colormap, dict): utils.validate_colors_dict(colormap, 'rgb') for val in df[color_name].unique(): if val not in colormap.keys(): raise exceptions.PlotlyError( "If using 'colormap' as a dictionary, make sure " "all the values of the colormap column are in " "the keys of your dictionary.") else: # use default plotly colors for dictionary default_colors = utils.DEFAULT_PLOTLY_COLORS colormap = {} j = 0 for val in df[color_name].unique(): if j >= len(default_colors): j = 0 colormap[val] = default_colors[j] j += 1 fig, annotations = _facet_grid_color_categorical( df, x, y, facet_row, facet_col, color_name, colormap, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker) elif isinstance(df[color_name].iloc[0], Number): if isinstance(colormap, dict): show_legend = True utils.validate_colors_dict(colormap, 'rgb') for val in df[color_name].unique(): if val not in colormap.keys(): raise exceptions.PlotlyError( "If using 'colormap' as a dictionary, make sure " "all the values of the colormap column are in " "the keys of your dictionary.") fig, annotations = _facet_grid_color_categorical( df, x, y, facet_row, facet_col, color_name, colormap, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker) elif isinstance(colormap, list): colorscale_list = colormap utils.validate_colorscale(colorscale_list) fig, annotations = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker) elif isinstance(colormap, str): if colormap in colors.PLOTLY_SCALES.keys(): colorscale_list = colors.PLOTLY_SCALES[colormap] else: raise exceptions.PlotlyError( "If 'colormap' is a string, it must be the name " "of a Plotly Colorscale. The available colorscale " "names are {}".format(colors.PLOTLY_SCALES.keys())) fig, annotations = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker) else: colorscale_list = colors.PLOTLY_SCALES['Reds'] fig, annotations = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker) else: fig, annotations = _facet_grid( df, x, y, facet_row, facet_col, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker) if not height: height = max(600, 100 * num_of_rows) if not width: width = max(600, 100 * num_of_cols) fig['layout'].update(height=height, width=width, title='', paper_bgcolor='rgb(251, 251, 251)') if ggplot2: fig['layout'].update(plot_bgcolor=PLOT_BGCOLOR, paper_bgcolor='rgb(255, 255, 255)', hovermode='closest') # axis titles x_title_annot = _axis_title_annotation(x, 'x') y_title_annot = _axis_title_annotation(y, 'y') # annotations annotations.append(x_title_annot) annotations.append(y_title_annot) # legend fig['layout']['showlegend'] = show_legend fig['layout']['legend']['bgcolor'] = LEGEND_COLOR fig['layout']['legend']['borderwidth'] = LEGEND_BORDER_WIDTH fig['layout']['legend']['x'] = 1.05 fig['layout']['legend']['y'] = 1 fig['layout']['legend']['yanchor'] = 'top' if show_legend: fig['layout']['showlegend'] = show_legend if ggplot2: if color_name: legend_annot = _legend_annotation(color_name) annotations.append(legend_annot) fig['layout']['margin']['r'] = 150 # assign annotations to figure fig['layout']['annotations'] = annotations # add shaded boxes behind axis titles if show_boxes and ggplot2: _add_shapes_to_fig(fig, ANNOT_RECT_COLOR, flipped_rows, flipped_cols) # all xaxis and yaxis labels axis_labels = {'x': [], 'y': []} for key in fig['layout']: if 'xaxis' in key: axis_labels['x'].append(key) elif 'yaxis' in key: axis_labels['y'].append(key) string_number_in_data = False for var in [v for v in [x, y] if v]: if isinstance(df[var].tolist()[0], str): for item in df[var]: try: int(item) string_number_in_data = True except ValueError: pass if string_number_in_data: for x_y in axis_labels.keys(): for axis_name in axis_labels[x_y]: fig['layout'][axis_name]['type'] = 'category' if scales == 'fixed': fixed_axes = ['x', 'y'] elif scales == 'free_x': fixed_axes = ['y'] elif scales == 'free_y': fixed_axes = ['x'] elif scales == 'free': fixed_axes = [] # fixed ranges for x_y in fixed_axes: min_ranges = [] max_ranges = [] for trace in fig['data']: if trace[x_y] is not None and len(trace[x_y]) > 0: min_ranges.append(min(trace[x_y])) max_ranges.append(max(trace[x_y])) while None in min_ranges: min_ranges.remove(None) while None in max_ranges: max_ranges.remove(None) min_range = min(min_ranges) max_range = max(max_ranges) range_are_numbers = (isinstance(min_range, Number) and isinstance(max_range, Number)) if range_are_numbers: min_range = math.floor(min_range) max_range = math.ceil(max_range) # extend widen frame by 5% on each side min_range -= 0.05 * (max_range - min_range) max_range += 0.05 * (max_range - min_range) if x_y == 'x': if dtick_x: dtick = dtick_x else: dtick = math.floor( (max_range - min_range) / MAX_TICKS_PER_AXIS) elif x_y == 'y': if dtick_y: dtick = dtick_y else: dtick = math.floor( (max_range - min_range) / MAX_TICKS_PER_AXIS) else: dtick = 1 for axis_title in axis_labels[x_y]: fig['layout'][axis_title]['dtick'] = dtick fig['layout'][axis_title]['ticklen'] = 0 fig['layout'][axis_title]['zeroline'] = False if ggplot2: fig['layout'][axis_title]['tickwidth'] = 1 fig['layout'][axis_title]['ticklen'] = 4 fig['layout'][axis_title]['gridwidth'] = GRID_WIDTH fig['layout'][axis_title]['gridcolor'] = GRID_COLOR fig['layout'][axis_title]['gridwidth'] = 2 fig['layout'][axis_title]['tickfont'] = { 'color': TICK_COLOR, 'size': 10 } # insert ranges into fig if x_y in fixed_axes: for key in fig['layout']: if '{}axis'.format(x_y) in key and range_are_numbers: fig['layout'][key]['range'] = [min_range, max_range] return fig
def create_scatterplotmatrix(df, index=None, endpts=None, diag='scatter', height=500, width=500, size=6, title='Scatterplot Matrix', colormap=None, colormap_type='cat', dataframe=None, headers=None, index_vals=None, **kwargs): """ Returns data for a scatterplot matrix. :param (array) df: array of the data with column headers :param (str) index: name of the index column in data array :param (list|tuple) endpts: takes an increasing sequece of numbers that defines intervals on the real line. They are used to group the entries in an index of numbers into their corresponding interval and therefore can be treated as categorical data :param (str) diag: sets the chart type for the main diagonal plots. The options are 'scatter', 'histogram' and 'box'. :param (int|float) height: sets the height of the chart :param (int|float) width: sets the width of the chart :param (float) size: sets the marker size (in px) :param (str) title: the title label of the scatterplot matrix :param (str|tuple|list|dict) colormap: either a plotly scale name, an rgb or hex color, a color tuple, a list of colors or a dictionary. An rgb color is of the form 'rgb(x, y, z)' where x, y and z belong to the interval [0, 255] and a color tuple is a tuple of the form (a, b, c) where a, b and c belong to [0, 1]. If colormap is a list, it must contain valid color types as its members. If colormap is a dictionary, all the string entries in the index column must be a key in colormap. In this case, the colormap_type is forced to 'cat' or categorical :param (str) colormap_type: determines how colormap is interpreted. Valid choices are 'seq' (sequential) and 'cat' (categorical). If 'seq' is selected, only the first two colors in colormap will be considered (when colormap is a list) and the index values will be linearly interpolated between those two colors. This option is forced if all index values are numeric. If 'cat' is selected, a color from colormap will be assigned to each category from index, including the intervals if endpts is being used :param (dict) **kwargs: a dictionary of scatterplot arguments The only forbidden parameters are 'size', 'color' and 'colorscale' in 'marker' Example 1: Vanilla Scatterplot Matrix ``` import plotly.plotly as py from plotly.graph_objs import graph_objs from plotly.figure_factory import create_scatterplotmatrix import numpy as np import pandas as pd # Create dataframe df = pd.DataFrame(np.random.randn(10, 2), columns=['Column 1', 'Column 2']) # Create scatterplot matrix fig = create_scatterplotmatrix(df) # Plot py.iplot(fig, filename='Vanilla Scatterplot Matrix') ``` Example 2: Indexing a Column ``` import plotly.plotly as py from plotly.graph_objs import graph_objs from plotly.figure_factory import create_scatterplotmatrix import numpy as np import pandas as pd # Create dataframe with index df = pd.DataFrame(np.random.randn(10, 2), columns=['A', 'B']) # Add another column of strings to the dataframe df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple', 'grape', 'pear', 'pear', 'apple', 'pear']) # Create scatterplot matrix fig = create_scatterplotmatrix(df, index='Fruit', size=10) # Plot py.iplot(fig, filename = 'Scatterplot Matrix with Index') ``` Example 3: Styling the Diagonal Subplots ``` import plotly.plotly as py from plotly.graph_objs import graph_objs from plotly.figure_factory import create_scatterplotmatrix import numpy as np import pandas as pd # Create dataframe with index df = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D']) # Add another column of strings to the dataframe df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple', 'grape', 'pear', 'pear', 'apple', 'pear']) # Create scatterplot matrix fig = create_scatterplotmatrix(df, diag='box', index='Fruit', height=1000, width=1000) # Plot py.iplot(fig, filename = 'Scatterplot Matrix - Diagonal Styling') ``` Example 4: Use a Theme to Style the Subplots ``` import plotly.plotly as py from plotly.graph_objs import graph_objs from plotly.figure_factory import create_scatterplotmatrix import numpy as np import pandas as pd # Create dataframe with random data df = pd.DataFrame(np.random.randn(100, 3), columns=['A', 'B', 'C']) # Create scatterplot matrix using a built-in # Plotly palette scale and indexing column 'A' fig = create_scatterplotmatrix(df, diag='histogram', index='A', colormap='Blues', height=800, width=800) # Plot py.iplot(fig, filename = 'Scatterplot Matrix - Colormap Theme') ``` Example 5: Example 4 with Interval Factoring ``` import plotly.plotly as py from plotly.graph_objs import graph_objs from plotly.figure_factory import create_scatterplotmatrix import numpy as np import pandas as pd # Create dataframe with random data df = pd.DataFrame(np.random.randn(100, 3), columns=['A', 'B', 'C']) # Create scatterplot matrix using a list of 2 rgb tuples # and endpoints at -1, 0 and 1 fig = create_scatterplotmatrix(df, diag='histogram', index='A', colormap=['rgb(140, 255, 50)', 'rgb(170, 60, 115)', '#6c4774', (0.5, 0.1, 0.8)], endpts=[-1, 0, 1], height=800, width=800) # Plot py.iplot(fig, filename = 'Scatterplot Matrix - Intervals') ``` Example 6: Using the colormap as a Dictionary ``` import plotly.plotly as py from plotly.graph_objs import graph_objs from plotly.figure_factory import create_scatterplotmatrix import numpy as np import pandas as pd import random # Create dataframe with random data df = pd.DataFrame(np.random.randn(100, 3), columns=['Column A', 'Column B', 'Column C']) # Add new color column to dataframe new_column = [] strange_colors = ['turquoise', 'limegreen', 'goldenrod'] for j in range(100): new_column.append(random.choice(strange_colors)) df['Colors'] = pd.Series(new_column, index=df.index) # Create scatterplot matrix using a dictionary of hex color values # which correspond to actual color names in 'Colors' column fig = create_scatterplotmatrix( df, diag='box', index='Colors', colormap= dict( turquoise = '#00F5FF', limegreen = '#32CD32', goldenrod = '#DAA520' ), colormap_type='cat', height=800, width=800 ) # Plot py.iplot(fig, filename = 'Scatterplot Matrix - colormap dictionary ') ``` """ # TODO: protected until #282 if dataframe is None: dataframe = [] if headers is None: headers = [] if index_vals is None: index_vals = [] validate_scatterplotmatrix(df, index, diag, colormap_type, **kwargs) # Validate colormap if isinstance(colormap, dict): colormap = clrs.validate_colors_dict(colormap, 'rgb') elif isinstance(colormap, six.string_types) and 'rgb' not in colormap and '#' not in colormap: if colormap not in clrs.PLOTLY_SCALES.keys(): raise exceptions.PlotlyError( "If 'colormap' is a string, it must be the name " "of a Plotly Colorscale. The available colorscale " "names are {}".format(clrs.PLOTLY_SCALES.keys()) ) else: # TODO change below to allow the correct Plotly colorscale colormap = clrs.colorscale_to_colors(clrs.PLOTLY_SCALES[colormap]) # keep only first and last item - fix later colormap = [colormap[0]] + [colormap[-1]] colormap = clrs.validate_colors(colormap, 'rgb') else: colormap = clrs.validate_colors(colormap, 'rgb') if not index: for name in df: headers.append(name) for name in headers: dataframe.append(df[name].values.tolist()) # Check for same data-type in df columns utils.validate_dataframe(dataframe) figure = scatterplot(dataframe, headers, diag, size, height, width, title, **kwargs) return figure else: # Validate index selection if index not in df: raise exceptions.PlotlyError("Make sure you set the index " "input variable to one of the " "column names of your " "dataframe.") index_vals = df[index].values.tolist() for name in df: if name != index: headers.append(name) for name in headers: dataframe.append(df[name].values.tolist()) # check for same data-type in each df column utils.validate_dataframe(dataframe) utils.validate_index(index_vals) # check if all colormap keys are in the index # if colormap is a dictionary if isinstance(colormap, dict): for key in colormap: if not all(index in colormap for index in index_vals): raise exceptions.PlotlyError("If colormap is a " "dictionary, all the " "names in the index " "must be keys.") figure = scatterplot_dict( dataframe, headers, diag, size, height, width, title, index, index_vals, endpts, colormap, colormap_type, **kwargs ) return figure else: figure = scatterplot_theme( dataframe, headers, diag, size, height, width, title, index, index_vals, endpts, colormap, colormap_type, **kwargs ) return figure
def create_facet_grid(df, x=None, y=None, facet_row=None, facet_col=None, color_name=None, colormap=None, color_is_cat=False, facet_row_labels=None, facet_col_labels=None, height=None, width=None, trace_type="scatter", scales="fixed", dtick_x=None, dtick_y=None, show_boxes=True, ggplot2=False, binsize=1, **kwargs): """ Returns figure for facet grid; **this function is deprecated**, since plotly.express functions should be used instead, for example >>> import plotly.express as px >>> tips = px.data.tips() >>> fig = px.scatter(tips, ... x='total_bill', ... y='tip', ... facet_row='sex', ... facet_col='smoker', ... color='size') :param (pd.DataFrame) df: the dataframe of columns for the facet grid. :param (str) x: the name of the dataframe column for the x axis data. :param (str) y: the name of the dataframe column for the y axis data. :param (str) facet_row: the name of the dataframe column that is used to facet the grid into row panels. :param (str) facet_col: the name of the dataframe column that is used to facet the grid into column panels. :param (str) color_name: the name of your dataframe column that will function as the colormap variable. :param (str|list|dict) colormap: the param that determines how the color_name column colors the data. If the dataframe contains numeric data, then a dictionary of colors will group the data categorically while a Plotly Colorscale name or a custom colorscale will treat it numerically. To learn more about colors and types of colormap, run `help(plotly.colors)`. :param (bool) color_is_cat: determines whether a numerical column for the colormap will be treated as categorical (True) or sequential (False). Default = False. :param (str|dict) facet_row_labels: set to either 'name' or a dictionary of all the unique values in the faceting row mapped to some text to show up in the label annotations. If None, labeling works like usual. :param (str|dict) facet_col_labels: set to either 'name' or a dictionary of all the values in the faceting row mapped to some text to show up in the label annotations. If None, labeling works like usual. :param (int) height: the height of the facet grid figure. :param (int) width: the width of the facet grid figure. :param (str) trace_type: decides the type of plot to appear in the facet grid. The options are 'scatter', 'scattergl', 'histogram', 'bar', and 'box'. Default = 'scatter'. :param (str) scales: determines if axes have fixed ranges or not. Valid settings are 'fixed' (all axes fixed), 'free_x' (x axis free only), 'free_y' (y axis free only) or 'free' (both axes free). :param (float) dtick_x: determines the distance between each tick on the x-axis. Default is None which means dtick_x is set automatically. :param (float) dtick_y: determines the distance between each tick on the y-axis. Default is None which means dtick_y is set automatically. :param (bool) show_boxes: draws grey boxes behind the facet titles. :param (bool) ggplot2: draws the facet grid in the style of `ggplot2`. See http://ggplot2.tidyverse.org/reference/facet_grid.html for reference. Default = False :param (int) binsize: groups all data into bins of a given length. :param (dict) kwargs: a dictionary of scatterplot arguments. Examples 1: One Way Faceting >>> import plotly.figure_factory as ff >>> import pandas as pd >>> mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') >>> fig = ff.create_facet_grid( ... mpg, ... x='displ', ... y='cty', ... facet_col='cyl', ... ) >>> fig.show() Example 2: Two Way Faceting >>> import plotly.figure_factory as ff >>> import pandas as pd >>> mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') >>> fig = ff.create_facet_grid( ... mpg, ... x='displ', ... y='cty', ... facet_row='drv', ... facet_col='cyl', ... ) >>> fig.show() Example 3: Categorical Coloring >>> import plotly.figure_factory as ff >>> import pandas as pd >>> mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv') >>> mtcars.cyl = mtcars.cyl.astype(str) >>> fig = ff.create_facet_grid( ... mtcars, ... x='mpg', ... y='wt', ... facet_col='cyl', ... color_name='cyl', ... color_is_cat=True, ... ) >>> fig.show() """ if not pd: raise ImportError( "'pandas' must be installed for this figure_factory.") if not isinstance(df, pd.DataFrame): raise exceptions.PlotlyError("You must input a pandas DataFrame.") # make sure all columns are of homogenous datatype utils.validate_dataframe(df) if trace_type in ["scatter", "scattergl"]: if not x or not y: raise exceptions.PlotlyError( "You need to input 'x' and 'y' if you are you are using a " "trace_type of 'scatter' or 'scattergl'.") for key in [x, y, facet_row, facet_col, color_name]: if key is not None: try: df[key] except KeyError: raise exceptions.PlotlyError( "x, y, facet_row, facet_col and color_name must be keys " "in your dataframe.") # autoscale histogram bars if trace_type not in ["scatter", "scattergl"]: scales = "free" # validate scales if scales not in ["fixed", "free_x", "free_y", "free"]: raise exceptions.PlotlyError( "'scales' must be set to 'fixed', 'free_x', 'free_y' and 'free'.") if trace_type not in VALID_TRACE_TYPES: raise exceptions.PlotlyError( "'trace_type' must be in {}".format(VALID_TRACE_TYPES)) if trace_type == "histogram": SUBPLOT_SPACING = 0.06 else: SUBPLOT_SPACING = 0.015 # seperate kwargs for marker and else if "marker" in kwargs: kwargs_marker = kwargs["marker"] else: kwargs_marker = {} marker_color = kwargs_marker.pop("color", None) kwargs.pop("marker", None) kwargs_trace = kwargs if "size" not in kwargs_marker: if ggplot2: kwargs_marker["size"] = 5 else: kwargs_marker["size"] = 8 if "opacity" not in kwargs_marker: if not ggplot2: kwargs_trace["opacity"] = 0.6 if "line" not in kwargs_marker: if not ggplot2: kwargs_marker["line"] = {"color": "darkgrey", "width": 1} else: kwargs_marker["line"] = {} # default marker size if not ggplot2: if not marker_color: marker_color = "rgb(31, 119, 180)" else: marker_color = "rgb(0, 0, 0)" num_of_rows = 1 num_of_cols = 1 flipped_rows = False flipped_cols = False if facet_row: num_of_rows = len(df[facet_row].unique()) flipped_rows = _is_flipped(num_of_rows) if isinstance(facet_row_labels, dict): for key in df[facet_row].unique(): if key not in facet_row_labels.keys(): unique_keys = df[facet_row].unique().tolist() raise exceptions.PlotlyError( CUSTOM_LABEL_ERROR.format(unique_keys)) if facet_col: num_of_cols = len(df[facet_col].unique()) flipped_cols = _is_flipped(num_of_cols) if isinstance(facet_col_labels, dict): for key in df[facet_col].unique(): if key not in facet_col_labels.keys(): unique_keys = df[facet_col].unique().tolist() raise exceptions.PlotlyError( CUSTOM_LABEL_ERROR.format(unique_keys)) show_legend = False if color_name: if isinstance(df[color_name].iloc[0], str) or color_is_cat: show_legend = True if isinstance(colormap, dict): clrs.validate_colors_dict(colormap, "rgb") for val in df[color_name].unique(): if val not in colormap.keys(): raise exceptions.PlotlyError( "If using 'colormap' as a dictionary, make sure " "all the values of the colormap column are in " "the keys of your dictionary.") else: # use default plotly colors for dictionary default_colors = clrs.DEFAULT_PLOTLY_COLORS colormap = {} j = 0 for val in df[color_name].unique(): if j >= len(default_colors): j = 0 colormap[val] = default_colors[j] j += 1 fig, annotations = _facet_grid_color_categorical( df, x, y, facet_row, facet_col, color_name, colormap, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker, ) elif isinstance(df[color_name].iloc[0], Number): if isinstance(colormap, dict): show_legend = True clrs.validate_colors_dict(colormap, "rgb") for val in df[color_name].unique(): if val not in colormap.keys(): raise exceptions.PlotlyError( "If using 'colormap' as a dictionary, make sure " "all the values of the colormap column are in " "the keys of your dictionary.") fig, annotations = _facet_grid_color_categorical( df, x, y, facet_row, facet_col, color_name, colormap, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker, ) elif isinstance(colormap, list): colorscale_list = colormap clrs.validate_colorscale(colorscale_list) fig, annotations = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker, ) elif isinstance(colormap, str): if colormap in clrs.PLOTLY_SCALES.keys(): colorscale_list = clrs.PLOTLY_SCALES[colormap] else: raise exceptions.PlotlyError( "If 'colormap' is a string, it must be the name " "of a Plotly Colorscale. The available colorscale " "names are {}".format(clrs.PLOTLY_SCALES.keys())) fig, annotations = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker, ) else: colorscale_list = clrs.PLOTLY_SCALES["Reds"] fig, annotations = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker, ) else: fig, annotations = _facet_grid( df, x, y, facet_row, facet_col, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker, ) if not height: height = max(600, 100 * num_of_rows) if not width: width = max(600, 100 * num_of_cols) fig["layout"].update(height=height, width=width, title="", paper_bgcolor="rgb(251, 251, 251)") if ggplot2: fig["layout"].update( plot_bgcolor=PLOT_BGCOLOR, paper_bgcolor="rgb(255, 255, 255)", hovermode="closest", ) # axis titles x_title_annot = _axis_title_annotation(x, "x") y_title_annot = _axis_title_annotation(y, "y") # annotations annotations.append(x_title_annot) annotations.append(y_title_annot) # legend fig["layout"]["showlegend"] = show_legend fig["layout"]["legend"]["bgcolor"] = LEGEND_COLOR fig["layout"]["legend"]["borderwidth"] = LEGEND_BORDER_WIDTH fig["layout"]["legend"]["x"] = 1.05 fig["layout"]["legend"]["y"] = 1 fig["layout"]["legend"]["yanchor"] = "top" if show_legend: fig["layout"]["showlegend"] = show_legend if ggplot2: if color_name: legend_annot = _legend_annotation(color_name) annotations.append(legend_annot) fig["layout"]["margin"]["r"] = 150 # assign annotations to figure fig["layout"]["annotations"] = annotations # add shaded boxes behind axis titles if show_boxes and ggplot2: _add_shapes_to_fig(fig, ANNOT_RECT_COLOR, flipped_rows, flipped_cols) # all xaxis and yaxis labels axis_labels = {"x": [], "y": []} for key in fig["layout"]: if "xaxis" in key: axis_labels["x"].append(key) elif "yaxis" in key: axis_labels["y"].append(key) string_number_in_data = False for var in [v for v in [x, y] if v]: if isinstance(df[var].tolist()[0], str): for item in df[var]: try: int(item) string_number_in_data = True except ValueError: pass if string_number_in_data: for x_y in axis_labels.keys(): for axis_name in axis_labels[x_y]: fig["layout"][axis_name]["type"] = "category" if scales == "fixed": fixed_axes = ["x", "y"] elif scales == "free_x": fixed_axes = ["y"] elif scales == "free_y": fixed_axes = ["x"] elif scales == "free": fixed_axes = [] # fixed ranges for x_y in fixed_axes: min_ranges = [] max_ranges = [] for trace in fig["data"]: if trace[x_y] is not None and len(trace[x_y]) > 0: min_ranges.append(min(trace[x_y])) max_ranges.append(max(trace[x_y])) while None in min_ranges: min_ranges.remove(None) while None in max_ranges: max_ranges.remove(None) min_range = min(min_ranges) max_range = max(max_ranges) range_are_numbers = isinstance(min_range, Number) and isinstance( max_range, Number) if range_are_numbers: min_range = math.floor(min_range) max_range = math.ceil(max_range) # extend widen frame by 5% on each side min_range -= 0.05 * (max_range - min_range) max_range += 0.05 * (max_range - min_range) if x_y == "x": if dtick_x: dtick = dtick_x else: dtick = math.floor( (max_range - min_range) / MAX_TICKS_PER_AXIS) elif x_y == "y": if dtick_y: dtick = dtick_y else: dtick = math.floor( (max_range - min_range) / MAX_TICKS_PER_AXIS) else: dtick = 1 for axis_title in axis_labels[x_y]: fig["layout"][axis_title]["dtick"] = dtick fig["layout"][axis_title]["ticklen"] = 0 fig["layout"][axis_title]["zeroline"] = False if ggplot2: fig["layout"][axis_title]["tickwidth"] = 1 fig["layout"][axis_title]["ticklen"] = 4 fig["layout"][axis_title]["gridwidth"] = GRID_WIDTH fig["layout"][axis_title]["gridcolor"] = GRID_COLOR fig["layout"][axis_title]["gridwidth"] = 2 fig["layout"][axis_title]["tickfont"] = { "color": TICK_COLOR, "size": 10, } # insert ranges into fig if x_y in fixed_axes: for key in fig["layout"]: if "{}axis".format(x_y) in key and range_are_numbers: fig["layout"][key]["range"] = [min_range, max_range] return fig
def create_facet_grid(df, x=None, y=None, facet_row=None, facet_col=None, color_name=None, colormap=None, color_is_cat=False, facet_row_labels=None, facet_col_labels=None, height=None, width=None, trace_type='scatter', scales='fixed', dtick_x=None, dtick_y=None, show_boxes=True, ggplot2=False, binsize=1, **kwargs): """ Returns figure for facet grid. :param (pd.DataFrame) df: the dataframe of columns for the facet grid. :param (str) x: the name of the dataframe column for the x axis data. :param (str) y: the name of the dataframe column for the y axis data. :param (str) facet_row: the name of the dataframe column that is used to facet the grid into row panels. :param (str) facet_col: the name of the dataframe column that is used to facet the grid into column panels. :param (str) color_name: the name of your dataframe column that will function as the colormap variable. :param (str|list|dict) colormap: the param that determines how the color_name column colors the data. If the dataframe contains numeric data, then a dictionary of colors will group the data categorically while a Plotly Colorscale name or a custom colorscale will treat it numerically. To learn more about colors and types of colormap, run `help(plotly.colors)`. :param (bool) color_is_cat: determines whether a numerical column for the colormap will be treated as categorical (True) or sequential (False). Default = False. :param (str|dict) facet_row_labels: set to either 'name' or a dictionary of all the unique values in the faceting row mapped to some text to show up in the label annotations. If None, labeling works like usual. :param (str|dict) facet_col_labels: set to either 'name' or a dictionary of all the values in the faceting row mapped to some text to show up in the label annotations. If None, labeling works like usual. :param (int) height: the height of the facet grid figure. :param (int) width: the width of the facet grid figure. :param (str) trace_type: decides the type of plot to appear in the facet grid. The options are 'scatter', 'scattergl', 'histogram', 'bar', and 'box'. Default = 'scatter'. :param (str) scales: determines if axes have fixed ranges or not. Valid settings are 'fixed' (all axes fixed), 'free_x' (x axis free only), 'free_y' (y axis free only) or 'free' (both axes free). :param (float) dtick_x: determines the distance between each tick on the x-axis. Default is None which means dtick_x is set automatically. :param (float) dtick_y: determines the distance between each tick on the y-axis. Default is None which means dtick_y is set automatically. :param (bool) show_boxes: draws grey boxes behind the facet titles. :param (bool) ggplot2: draws the facet grid in the style of `ggplot2`. See http://ggplot2.tidyverse.org/reference/facet_grid.html for reference. Default = False :param (int) binsize: groups all data into bins of a given length. :param (dict) kwargs: a dictionary of scatterplot arguments. Examples 1: One Way Faceting ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mpg, x='displ', y='cty', facet_col='cyl', ) py.iplot(fig, filename='facet_grid_mpg_one_way_facet') ``` Example 2: Two Way Faceting ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mpg, x='displ', y='cty', facet_row='drv', facet_col='cyl', ) py.iplot(fig, filename='facet_grid_mpg_two_way_facet') ``` Example 3: Categorical Coloring ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mtcars, x='mpg', y='wt', facet_col='cyl', color_name='cyl', color_is_cat=True, ) py.iplot(fig, filename='facet_grid_mpg_default_colors') ``` Example 4: Sequential Coloring ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd tips = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/tips.csv') fig = ff.create_facet_grid( tips, x='total_bill', y='tip', facet_row='sex', facet_col='smoker', color_name='size', colormap='Viridis', ) py.iplot(fig, filename='facet_grid_tips_sequential_colors') ``` Example 5: Custom labels ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv') fig = ff.create_facet_grid( mtcars, x='wt', y='mpg', facet_col='cyl', facet_col_labels={4: "$\\alpha$", 6: '$\\beta$', 8: '$\sqrt[y]{x}$'}, ) py.iplot(fig, filename='facet_grid_mtcars_custom_labels') ``` Example 6: Other Trace Type ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv') fig = ff.create_facet_grid( mtcars, x='wt', facet_col='cyl', trace_type='histogram', ) py.iplot(fig, filename='facet_grid_mtcars_other_trace_type') ``` """ if not pd: raise exceptions.ImportError( "'pandas' must be installed for this figure_factory." ) if not isinstance(df, pd.DataFrame): raise exceptions.PlotlyError( "You must input a pandas DataFrame." ) # make sure all columns are of homogenous datatype utils.validate_dataframe(df) if trace_type in ['scatter', 'scattergl']: if not x or not y: raise exceptions.PlotlyError( "You need to input 'x' and 'y' if you are you are using a " "trace_type of 'scatter' or 'scattergl'." ) for key in [x, y, facet_row, facet_col, color_name]: if key is not None: try: df[key] except KeyError: raise exceptions.PlotlyError( "x, y, facet_row, facet_col and color_name must be keys " "in your dataframe." ) # autoscale histogram bars if trace_type not in ['scatter', 'scattergl']: scales = 'free' # validate scales if scales not in ['fixed', 'free_x', 'free_y', 'free']: raise exceptions.PlotlyError( "'scales' must be set to 'fixed', 'free_x', 'free_y' and 'free'." ) if trace_type not in VALID_TRACE_TYPES: raise exceptions.PlotlyError( "'trace_type' must be in {}".format(VALID_TRACE_TYPES) ) if trace_type == 'histogram': SUBPLOT_SPACING = 0.06 else: SUBPLOT_SPACING = 0.015 # seperate kwargs for marker and else if 'marker' in kwargs: kwargs_marker = kwargs['marker'] else: kwargs_marker = {} marker_color = kwargs_marker.pop('color', None) kwargs.pop('marker', None) kwargs_trace = kwargs if 'size' not in kwargs_marker: if ggplot2: kwargs_marker['size'] = 5 else: kwargs_marker['size'] = 8 if 'opacity' not in kwargs_marker: if not ggplot2: kwargs_trace['opacity'] = 0.6 if 'line' not in kwargs_marker: if not ggplot2: kwargs_marker['line'] = {'color': 'darkgrey', 'width': 1} else: kwargs_marker['line'] = {} # default marker size if not ggplot2: if not marker_color: marker_color = 'rgb(31, 119, 180)' else: marker_color = 'rgb(0, 0, 0)' num_of_rows = 1 num_of_cols = 1 flipped_rows = False flipped_cols = False if facet_row: num_of_rows = len(df[facet_row].unique()) flipped_rows = _is_flipped(num_of_rows) if isinstance(facet_row_labels, dict): for key in df[facet_row].unique(): if key not in facet_row_labels.keys(): unique_keys = df[facet_row].unique().tolist() raise exceptions.PlotlyError( CUSTOM_LABEL_ERROR.format(unique_keys) ) if facet_col: num_of_cols = len(df[facet_col].unique()) flipped_cols = _is_flipped(num_of_cols) if isinstance(facet_col_labels, dict): for key in df[facet_col].unique(): if key not in facet_col_labels.keys(): unique_keys = df[facet_col].unique().tolist() raise exceptions.PlotlyError( CUSTOM_LABEL_ERROR.format(unique_keys) ) show_legend = False if color_name: if isinstance(df[color_name].iloc[0], str) or color_is_cat: show_legend = True if isinstance(colormap, dict): utils.validate_colors_dict(colormap, 'rgb') for val in df[color_name].unique(): if val not in colormap.keys(): raise exceptions.PlotlyError( "If using 'colormap' as a dictionary, make sure " "all the values of the colormap column are in " "the keys of your dictionary." ) else: # use default plotly colors for dictionary default_colors = utils.DEFAULT_PLOTLY_COLORS colormap = {} j = 0 for val in df[color_name].unique(): if j >= len(default_colors): j = 0 colormap[val] = default_colors[j] j += 1 fig = _facet_grid_color_categorical( df, x, y, facet_row, facet_col, color_name, colormap, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker ) elif isinstance(df[color_name].iloc[0], Number): if isinstance(colormap, dict): show_legend = True utils.validate_colors_dict(colormap, 'rgb') for val in df[color_name].unique(): if val not in colormap.keys(): raise exceptions.PlotlyError( "If using 'colormap' as a dictionary, make sure " "all the values of the colormap column are in " "the keys of your dictionary." ) fig = _facet_grid_color_categorical( df, x, y, facet_row, facet_col, color_name, colormap, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker ) elif isinstance(colormap, list): colorscale_list = colormap utils.validate_colorscale(colorscale_list) fig = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker ) elif isinstance(colormap, str): if colormap in colors.PLOTLY_SCALES.keys(): colorscale_list = colors.PLOTLY_SCALES[colormap] else: raise exceptions.PlotlyError( "If 'colormap' is a string, it must be the name " "of a Plotly Colorscale. The available colorscale " "names are {}".format(colors.PLOTLY_SCALES.keys()) ) fig = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker ) else: colorscale_list = colors.PLOTLY_SCALES['Reds'] fig = _facet_grid_color_numerical( df, x, y, facet_row, facet_col, color_name, colorscale_list, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker ) else: fig = _facet_grid( df, x, y, facet_row, facet_col, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, show_boxes, SUBPLOT_SPACING, marker_color, kwargs_trace, kwargs_marker ) if not height: height = max(600, 100 * num_of_rows) if not width: width = max(600, 100 * num_of_cols) fig['layout'].update(height=height, width=width, title='', paper_bgcolor='rgb(251, 251, 251)') if ggplot2: fig['layout'].update(plot_bgcolor=PLOT_BGCOLOR, paper_bgcolor='rgb(255, 255, 255)', hovermode='closest') # axis titles x_title_annot = _axis_title_annotation(x, 'x') y_title_annot = _axis_title_annotation(y, 'y') fig['layout']['annotations'].append(x_title_annot) fig['layout']['annotations'].append(y_title_annot) # legend fig['layout']['showlegend'] = show_legend fig['layout']['legend']['bgcolor'] = LEGEND_COLOR fig['layout']['legend']['borderwidth'] = LEGEND_BORDER_WIDTH fig['layout']['legend']['x'] = 1.05 fig['layout']['legend']['y'] = 1 fig['layout']['legend']['yanchor'] = 'top' if show_legend: fig['layout']['showlegend'] = show_legend if ggplot2: if color_name: legend_annot = _legend_annotation(color_name) fig['layout']['annotations'].append(legend_annot) fig['layout']['margin']['r'] = 150 # add shaded boxes behind axis titles if show_boxes and ggplot2: _add_shapes_to_fig(fig, ANNOT_RECT_COLOR, flipped_rows, flipped_cols) # all xaxis and yaxis labels axis_labels = {'x': [], 'y': []} for key in fig['layout']: if 'xaxis' in key: axis_labels['x'].append(key) elif 'yaxis' in key: axis_labels['y'].append(key) string_number_in_data = False for var in [v for v in [x, y] if v]: if isinstance(df[var].tolist()[0], str): for item in df[var]: try: int(item) string_number_in_data = True except ValueError: pass if string_number_in_data: for x_y in axis_labels.keys(): for axis_name in axis_labels[x_y]: fig['layout'][axis_name]['type'] = 'category' if scales == 'fixed': fixed_axes = ['x', 'y'] elif scales == 'free_x': fixed_axes = ['y'] elif scales == 'free_y': fixed_axes = ['x'] elif scales == 'free': fixed_axes = [] # fixed ranges for x_y in fixed_axes: min_ranges = [] max_ranges = [] for trace in fig['data']: if trace[x_y] is not None and len(trace[x_y]) > 0: min_ranges.append(min(trace[x_y])) max_ranges.append(max(trace[x_y])) while None in min_ranges: min_ranges.remove(None) while None in max_ranges: max_ranges.remove(None) min_range = min(min_ranges) max_range = max(max_ranges) range_are_numbers = (isinstance(min_range, Number) and isinstance(max_range, Number)) if range_are_numbers: min_range = math.floor(min_range) max_range = math.ceil(max_range) # extend widen frame by 5% on each side min_range -= 0.05 * (max_range - min_range) max_range += 0.05 * (max_range - min_range) if x_y == 'x': if dtick_x: dtick = dtick_x else: dtick = math.floor( (max_range - min_range) / MAX_TICKS_PER_AXIS ) elif x_y == 'y': if dtick_y: dtick = dtick_y else: dtick = math.floor( (max_range - min_range) / MAX_TICKS_PER_AXIS ) else: dtick = 1 for axis_title in axis_labels[x_y]: fig['layout'][axis_title]['dtick'] = dtick fig['layout'][axis_title]['ticklen'] = 0 fig['layout'][axis_title]['zeroline'] = False if ggplot2: fig['layout'][axis_title]['tickwidth'] = 1 fig['layout'][axis_title]['ticklen'] = 4 fig['layout'][axis_title]['gridwidth'] = GRID_WIDTH fig['layout'][axis_title]['gridcolor'] = GRID_COLOR fig['layout'][axis_title]['gridwidth'] = 2 fig['layout'][axis_title]['tickfont'] = { 'color': TICK_COLOR, 'size': 10 } # insert ranges into fig if x_y in fixed_axes: for key in fig['layout']: if '{}axis'.format(x_y) in key and range_are_numbers: fig['layout'][key]['range'] = [min_range, max_range] return fig
def create_facet_grid(df, x=None, y=None, facet_row=None, facet_col=None, color_name=None, colormap=None, color_is_cat=False, facet_row_labels=None, facet_col_labels=None, height=None, width=None, trace_type='scatter', hide_x_labels=False, hide_y_labels=False, scales='fixed', dtick_x=None, dtick_y=None, text_name=None, show_boxes=True, ggplot2=False, binsize=1, jitter=0, **kwargs): """ Returns figure for facet grid. :param (pd.DataFrame) df: the dataframe of columns for the facet grid. :param (str) x: the name of the dataframe column for the x axis data. :param (str) y: the name of the dataframe column for the y axis data. :param (str) facet_row: the name of the dataframe column that is used to facet the grid into row panels. :param (str) facet_col: the name of the dataframe column that is used to facet the grid into column panels. :param (str) color_name: the name of your dataframe column that will function as the colormap variable. :param (str|list|dict) colormap: the param that determines how the color_name column colors the data. If the dataframe contains numeric data, then a dictionary of colors will group the data categorically while a Plotly Colorscale name or a custom colorscale will treat it numerically. To learn more about colors and types of colormap, run `help(plotly.colors)`. :param (bool) color_is_cat: determines whether a numerical column for the colormap will be treated as categorical (True) or sequential (False). Default = False. :param (str|dict) facet_row_labels: set to either 'name' or a dictionary of all the unique values in the faceting row mapped to some text to show up in the label annotations. If None, labeling works like usual. :param (str|dict) facet_col_labels: set to either 'name' or a dictionary of all the values in the faceting row mapped to some text to show up in the label annotations. If None, labeling works like usual. :param (int) height: the height of the facet grid figure. :param (int) width: the width of the facet grid figure. :param (str) trace_type: decides the type of plot to appear in the facet grid. The options are 'scatter', 'scattergl', 'histogram', 'bar', and 'box'. Default = 'scatter'. :param (str) scales: determines if axes have fixed ranges or not. Valid settings are 'fixed' (all axes fixed), 'free_x' (x axis free only), 'free_y' (y axis free only) or 'free' (both axes free). :param (float) dtick_x: determines the distance between each tick on the x-axis. Default is None which means dtick_x is set automatically. :param (float) dtick_y: determines the distance between each tick on the y-axis. Default is None which means dtick_y is set automatically. :param (bool) show_boxes: draws grey boxes behind the facet titles. :param (bool) ggplot2: draws the facet grid in the style of `ggplot2`. See http://ggplot2.tidyverse.org/reference/facet_grid.html for reference. Default = False :param (int) binsize: groups all data into bins of a given length. :param (int) jitter: Amount to offset an individual categorical x-axis datapoint. The higher the number, the more extreme the jitter Default: 0 (no jitter) :param (dict) kwargs: a dictionary of scatterplot arguments. Examples 1: One Way Faceting ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mpg, x='displ', y='cty', facet_col='cyl', ) py.iplot(fig, filename='facet_grid_mpg_one_way_facet') ``` Example 2: Two Way Faceting ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mpg, x='displ', y='cty', facet_row='drv', facet_col='cyl', ) py.iplot(fig, filename='facet_grid_mpg_two_way_facet') ``` Example 3: Categorical Coloring ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mpg = pd.read_table('https://raw.githubusercontent.com/plotly/datasets/master/mpg_2017.txt') fig = ff.create_facet_grid( mtcars, x='mpg', y='wt', facet_col='cyl', color_name='cyl', color_is_cat=True, ) py.iplot(fig, filename='facet_grid_mpg_default_colors') ``` Example 4: Sequential Coloring ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd tips = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/tips.csv') fig = ff.create_facet_grid( tips, x='total_bill', y='tip', facet_row='sex', facet_col='smoker', color_name='size', colormap='Viridis', ) py.iplot(fig, filename='facet_grid_tips_sequential_colors') ``` Example 5: Custom labels ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv') fig = ff.create_facet_grid( mtcars, x='wt', y='mpg', facet_col='cyl', facet_col_labels={4: "$\\alpha$", 6: '$\\beta$', 8: '$\sqrt[y]{x}$'}, ) py.iplot(fig, filename='facet_grid_mtcars_custom_labels') ``` Example 6: Other Trace Type ``` import plotly.plotly as py import plotly.figure_factory as ff import pandas as pd mtcars = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/mtcars.csv') fig = ff.create_facet_grid( mtcars, x='wt', facet_col='cyl', trace_type='histogram', ) py.iplot(fig, filename='facet_grid_mtcars_other_trace_type') ``` """ if not pd: raise exceptions.ImportError( "'pandas' must be installed for this figure_factory.") if not isinstance(df, pd.DataFrame): raise exceptions.PlotlyError("You must input a pandas DataFrame.") # make sure all columns are of homogenous datatype utils.validate_dataframe(df) # the tsne_dynamic trace type is an alias for scatter if trace_type == 'tsne_dynamic': trace_type = 'scatter' if PLOT_LOGGING: print("DEBUG: trace_type is: {0}".format(trace_type), file=sys.stderr) if trace_type in ['scatter', 'scattergl']: if not x or not y: raise exceptions.PlotlyError( "You need to input 'x' and 'y' if you are you are using a " "trace_type of 'scatter' or 'scattergl'.") for key in [x, y, facet_row, facet_col, color_name]: if key is not None: try: df[key] except KeyError: raise exceptions.PlotlyError( "x, y, facet_row, facet_col and color_name must be keys " "in your dataframe.") if trace_type not in VALID_TRACE_TYPES: raise exceptions.PlotlyError( "'trace_type' must be in {}".format(VALID_TRACE_TYPES)) if trace_type == 'histogram' or trace_type == 'line': SUBPLOT_SPACING = 0.06 else: SUBPLOT_SPACING = 0.015 # seperate kwargs for marker and else if 'marker' in kwargs: kwargs_marker = kwargs['marker'] else: kwargs_marker = {} marker_color = kwargs_marker.pop('color', None) kwargs.pop('marker', None) kwargs_trace = kwargs if 'size' not in kwargs_marker: kwargs_marker['size'] = 3 # Bar plots do not accept size markers if trace_type == 'bar': kwargs_marker.pop('size', None) if 'opacity' not in kwargs_marker: kwargs_trace['opacity'] = 0.6 # if 'line' not in kwargs_marker: # kwargs_marker['line'] = {'color': 'darkgrey', 'width': 1} # default marker size if not ggplot2: if not marker_color: marker_color = '#401362' else: marker_color = 'rgb(0, 0, 0)' num_of_rows = 1 num_of_cols = 1 flipped_rows = False flipped_cols = False if facet_row: num_of_rows = len(df[facet_row].unique()) flipped_rows = _is_flipped(num_of_rows) if isinstance(facet_row_labels, dict): for key in df[facet_row].unique(): if key not in facet_row_labels.keys(): unique_keys = df[facet_row].unique().tolist() raise exceptions.PlotlyError( CUSTOM_LABEL_ERROR.format(unique_keys)) if facet_col: num_of_cols = len(df[facet_col].unique()) flipped_cols = _is_flipped(num_of_cols) if isinstance(facet_col_labels, dict): for key in df[facet_col].unique(): if key not in facet_col_labels.keys(): unique_keys = df[facet_col].unique().tolist() raise exceptions.PlotlyError( CUSTOM_LABEL_ERROR.format(unique_keys)) # Set up some args to pass to _gear_facet_grid function show_legend = False colormapping = None color_type = None # None, 'categorical', or 'numerical' # If there is a color label, use either the categorial or numerical facet grid if color_name: if isinstance(colormap, dict): show_legend = True color_type = "categorical" clrs.validate_colors_dict(colormap, 'rgb') for val in df[color_name].unique(): if val not in colormap.keys(): raise exceptions.PlotlyError( "If using 'colormap' as a dictionary, make sure " "all the values of the colormap column are in " "the keys of your dictionary.") colormapping = colormap if PLOT_LOGGING: print("DEBUG: Color type is 'categorical' with colormap dict", file=sys.stderr) elif isinstance(colormap, list): color_type = "numerical" colormapping = colormap clrs.validate_colorscale(colormapping) if PLOT_LOGGING: print("DEBUG: Color type is 'numerical' from colormap list", file=sys.stderr) elif isinstance(colormap, str): color_type = "numerical" if colormap in clrs.PLOTLY_SCALES.keys(): colormapping = clrs.PLOTLY_SCALES[colormap] else: raise exceptions.PlotlyError( "If 'colormap' is a string, it must be the name " "of a Plotly Colorscale. The available colorscale " "names are {}".format(clrs.PLOTLY_SCALES.keys())) if PLOT_LOGGING: print("DEBUG: Color type is 'numerical' from colormap string", file=sys.stderr) else: if isinstance(df[color_name].iloc[0], str) or color_is_cat: color_type = "categorical" # use default plotly colors for dictionary default_colors = clrs.DEFAULT_PLOTLY_COLORS colormap = {} j = 0 for val in df[color_name].unique(): if j >= len(default_colors): j = 0 colormap[val] = default_colors[j] j += 1 colormapping = colormap else: color_type = "numerical" colormapping = [[0, 'rgb(218, 183, 193)'], [0.35, 'rgb(194, 137, 166)'], [0.5, 'rgb(169, 98, 151)'], [0.6, 'rgb(145, 66, 143)'], [0.7, 'rgb(105, 39, 122)'], [1, 'rgb(63, 19, 98)']] if PLOT_LOGGING: print("DEBUG: Color type is 'numerical' with no colormap", file=sys.stderr) else: if PLOT_LOGGING: print("DEBUG: Color type is 'None'", file=sys.stderr) fig, annotations = _gear_facet_grid( df, x, y, facet_row, facet_col, color_name, colormapping, color_type, num_of_rows, num_of_cols, facet_row_labels, facet_col_labels, trace_type, flipped_rows, flipped_cols, SUBPLOT_SPACING, marker_color, text_name, jitter, kwargs_trace, kwargs_marker) ### General layout adjustments fig['layout'].update(title='', paper_bgcolor=PAPER_BGCOLOR) fig['layout']['hovermode'] = "closest" # Default "plotly" theme produces gray plot backgrounds fig['layout']['template'] = "none" # axis titles x_title_annot = _axis_title_annotation('', 'x') y_title_annot = _axis_title_annotation('', 'y') # annotations annotations.append(x_title_annot) annotations.append(y_title_annot) # all xaxis and yaxis labels axis_labels = {'x': [], 'y': []} for key in fig['layout']: if 'xaxis' in key: axis_labels['x'].append(key) elif 'yaxis' in key: axis_labels['y'].append(key) string_number_in_data = False for var in [v for v in [x, y] if v]: if isinstance(df[var].tolist()[0], str): for item in df[var]: try: int(item) string_number_in_data = True except ValueError: pass # Iterated through 'x' or 'y' axis for x_y in axis_labels.keys(): # Iterate through all faceted axes for axis_name in axis_labels[x_y]: # Common to both x and y if string_number_in_data: fig['layout'][axis_name]['type'] = 'category' fig['layout'][axis_name]['showgrid'] = False fig['layout'][axis_name]['automargin'] = True fig['layout'][axis_name]['zeroline'] = False # Specific axis only if x_y == 'x': if hide_x_labels: #TODO: test with 'visible' attribute instead of 'showticklabels' fig['layout'][axis_name]['showticklabels'] = False # Uniformity of tick angles if facet groupings are present if facet_col: fig['layout'][axis_name]['tickangle'] = 270 elif x_y == 'y': fig['layout'][axis_name]['hoverformat'] = '.2f' if hide_y_labels: fig['layout'][axis_name]['showticklabels'] = False fig['layout']['autosize'] = True # legend fig['layout']['showlegend'] = show_legend fig['layout']['legend']['bgcolor'] = LEGEND_COLOR fig['layout']['legend']['borderwidth'] = LEGEND_BORDER_WIDTH fig['layout']['legend']['x'] = 1.05 fig['layout']['legend']['y'] = 1 fig['layout']['legend']['yanchor'] = 'top' # Colorbar adjustments if color_type == "numerical": fig['layout']['coloraxis'] = { "colorscale": colormapping, # Defines the range of colors for a numerical color group "colorbar": { 'x': 1.15 }, "showscale": True, } # Violin plot settings if trace_type == 'violin': if color_name: fig['layout']['violinmode'] = 'group' else: fig['layout']['violinmode'] = 'overlay' # assign annotations to figure fig['layout']['annotations'] = annotations # autoscale histogram bars if trace_type not in ['scatter', 'line', 'scattergl']: scales = 'free' # validate scales if scales not in ['fixed', 'free_x', 'free_y', 'free']: raise exceptions.PlotlyError( "'scales' must be set to 'fixed', 'free_x', 'free_y' and 'free'.") fixed_axes = None if scales == 'fixed': fixed_axes = ['x', 'y'] elif scales == 'free_x': fixed_axes = ['y'] elif scales == 'free_y': fixed_axes = ['x'] elif scales == 'free': fixed_axes = [] else: raise ( "Invalid scale type provided. Must be 'fixed', 'free_x', 'free_y', or 'free'" ) # SAdkins - Removed checks for None and length and sparse matrix check # since recent edits should have all traces populated with data if len(fig['data']): # fixed ranges for x_y in fixed_axes: min_range = min(chain(*(trace[x_y] for trace in fig['data']))) max_range = max(chain(*(trace[x_y] for trace in fig['data']))) range_are_numbers = (isinstance(min_range, Number) and isinstance(max_range, Number)) if PLOT_LOGGING: print("DEBUG: On axis:{0} min_range:{1} max_range:{2}".format( x_y, min_range, max_range), file=sys.stderr) user_dtick = None if x_y == 'x': user_dtick = dtick_x elif x_y == 'y': user_dtick = dtick_y dtick, min_range, max_range = _calculate_dtick( min_range, max_range, range_are_numbers, user_dtick) # For the given axis dimension set tick attributes for axis_title in axis_labels[x_y]: fig['layout'][axis_title]['dtick'] = dtick fig['layout'][axis_title]['ticklen'] = 0 if range_are_numbers: fig['layout'][axis_title]['range'] = [min_range, max_range] else: if PLOT_LOGGING: print("DEBUG: No trace data for current plot") return fig