def test_default_dendrogram(self): X = np.array([[1, 2, 3, 4], [1, 1, 3, 4], [1, 2, 1, 4], [1, 2, 3, 1]]) dendro = tls.FigureFactory.create_dendrogram(X=X) expected_dendro = go.Figure( data=go.Data([ go.Scatter(x=np.array([25., 25., 35., 35.]), y=np.array([0., 1., 1., 0.]), marker=go.Marker(color='rgb(61,153,112)'), mode='lines', xaxis='x', yaxis='y'), go.Scatter(x=np.array([15., 15., 30., 30.]), y=np.array([0., 2.23606798, 2.23606798, 1.]), marker=go.Marker(color='rgb(61,153,112)'), mode='lines', xaxis='x', yaxis='y'), go.Scatter(x=np.array([5., 5., 22.5, 22.5]), y=np.array([0., 3.60555128, 3.60555128, 2.23606798]), marker=go.Marker(color='rgb(0,116,217)'), mode='lines', xaxis='x', yaxis='y') ]), layout=go.Layout(autosize=False, height='100%', hovermode='closest', showlegend=False, width='100%', xaxis=go.XAxis(mirror='allticks', rangemode='tozero', showgrid=False, showline=True, showticklabels=True, tickmode='array', ticks='outside', ticktext=np.array( ['3', '2', '0', '1']), tickvals=[5.0, 15.0, 25.0, 35.0], type='linear', zeroline=False), yaxis=go.YAxis(mirror='allticks', rangemode='tozero', showgrid=False, showline=True, showticklabels=True, ticks='outside', type='linear', zeroline=False))) self.assertEqual(len(dendro['data']), 3) # this is actually a bit clearer when debugging tests. self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0]) self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1]) self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2]) self.assert_dict_equal(dendro['layout'], expected_dendro['layout'])
def __init__(self, X, orientation='bottom', labels=None, colorscale=None, width="100%", height="100%", xaxis='xaxis', yaxis='yaxis', distfun=None, linkagefun=lambda x: sch.linkage(x, 'complete'), hovertext=None): self.orientation = orientation self.labels = labels self.xaxis = xaxis self.yaxis = yaxis self.data = [] self.leaves = [] self.sign = {self.xaxis: 1, self.yaxis: 1} self.layout = {self.xaxis: {}, self.yaxis: {}} if self.orientation in ['left', 'bottom']: self.sign[self.xaxis] = 1 else: self.sign[self.xaxis] = -1 if self.orientation in ['right', 'bottom']: self.sign[self.yaxis] = 1 else: self.sign[self.yaxis] = -1 if distfun is None: distfun = scs.distance.pdist (dd_traces, xvals, yvals, ordered_labels, leaves) = self.get_dendrogram_traces(X, colorscale, distfun, linkagefun, hovertext) self.labels = ordered_labels self.leaves = leaves yvals_flat = yvals.flatten() xvals_flat = xvals.flatten() self.zero_vals = [] for i in range(len(yvals_flat)): if yvals_flat[i] == 0.0 and xvals_flat[i] not in self.zero_vals: self.zero_vals.append(xvals_flat[i]) self.zero_vals.sort() self.layout = self.set_figure_layout(width, height) self.data = graph_objs.Data(dd_traces)
def __init__( self, X, orientation='bottom', labels=None, colorscale=None, width="100%", height="100%", xaxis='xaxis', yaxis='yaxis', method='complete', metric='euclidean', ): self.orientation = orientation self.labels = labels self.xaxis = xaxis self.yaxis = yaxis self.method = method self.metric = metric self.data = [] self.leaves = [] self.layout = {self.xaxis: {}, self.yaxis: {}} self.sign = {} self.sign[self.xaxis] = 1 if self.orientation in ['left', 'bottom' ] else -1 self.sign[self.yaxis] = 1 if self.orientation in ['right', 'bottom' ] else -1 (dd_traces, xvals, yvals, ordered_labels, leaves) = self.get_dendrogram_traces(X, colorscale) self.labels = ordered_labels self.leaves = leaves self.zero_vals = np.unique(xvals[yvals == 0.0]) self.layout = self.set_figure_layout(width, height) self.data = graph_objs.Data(dd_traces)
def create_violin(data, data_header=None, group_header=None, colors=None, use_colorscale=False, group_stats=None, rugplot=True, sort=False, height=450, width=600, title='Violin and Rug Plot'): """ Returns figure for a violin plot :param (list|array) data: accepts either a list of numerical values, a list of dictionaries all with identical keys and at least one column of numeric values, or a pandas dataframe with at least one column of numbers. :param (str) data_header: the header of the data column to be used from an inputted pandas dataframe. Not applicable if 'data' is a list of numeric values. :param (str) group_header: applicable if grouping data by a variable. 'group_header' must be set to the name of the grouping variable. :param (str|tuple|list|dict) colors: either a plotly scale name, an rgb or hex color, a color tuple, a list of colors or a dictionary. An rgb color is of the form 'rgb(x, y, z)' where x, y and z belong to the interval [0, 255] and a color tuple is a tuple of the form (a, b, c) where a, b and c belong to [0, 1]. If colors is a list, it must contain valid color types as its members. :param (bool) use_colorscale: only applicable if grouping by another variable. Will implement a colorscale based on the first 2 colors of param colors. This means colors must be a list with at least 2 colors in it (Plotly colorscales are accepted since they map to a list of two rgb colors). Default = False :param (dict) group_stats: a dictioanry where each key is a unique value from the group_header column in data. Each value must be a number and will be used to color the violin plots if a colorscale is being used. :param (bool) rugplot: determines if a rugplot is draw on violin plot. Default = True :param (bool) sort: determines if violins are sorted alphabetically (True) or by input order (False). Default = False :param (float) height: the height of the violin plot. :param (float) width: the width of the violin plot. :param (str) title: the title of the violin plot. Example 1: Single Violin Plot ``` import plotly.plotly as py from plotly.figure_factory import create_violin from plotly.graph_objs import graph_objs import numpy as np from scipy import stats # create list of random values data_list = np.random.randn(100) data_list.tolist() # create violin fig fig = create_violin(data_list, colors='#604d9e') # plot py.iplot(fig, filename='Violin Plot') ``` Example 2: Multiple Violin Plots with Qualitative Coloring ``` import plotly.plotly as py from plotly.figure_factory import create_violin from plotly.graph_objs import graph_objs import numpy as np import pandas as pd from scipy import stats # create dataframe np.random.seed(619517) Nr=250 y = np.random.randn(Nr) gr = np.random.choice(list("ABCDE"), Nr) norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)] for i, letter in enumerate("ABCDE"): y[gr == letter] *=norm_params[i][1]+ norm_params[i][0] df = pd.DataFrame(dict(Score=y, Group=gr)) # create violin fig fig = create_violin(df, data_header='Score', group_header='Group', sort=True, height=600, width=1000) # plot py.iplot(fig, filename='Violin Plot with Coloring') ``` Example 3: Violin Plots with Colorscale ``` import plotly.plotly as py from plotly.figure_factory import create_violin from plotly.graph_objs import graph_objs import numpy as np import pandas as pd from scipy import stats # create dataframe np.random.seed(619517) Nr=250 y = np.random.randn(Nr) gr = np.random.choice(list("ABCDE"), Nr) norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)] for i, letter in enumerate("ABCDE"): y[gr == letter] *=norm_params[i][1]+ norm_params[i][0] df = pd.DataFrame(dict(Score=y, Group=gr)) # define header params data_header = 'Score' group_header = 'Group' # make groupby object with pandas group_stats = {} groupby_data = df.groupby([group_header]) for group in "ABCDE": data_from_group = groupby_data.get_group(group)[data_header] # take a stat of the grouped data stat = np.median(data_from_group) # add to dictionary group_stats[group] = stat # create violin fig fig = create_violin(df, data_header='Score', group_header='Group', height=600, width=1000, use_colorscale=True, group_stats=group_stats) # plot py.iplot(fig, filename='Violin Plot with Colorscale') ``` """ # Validate colors if isinstance(colors, dict): valid_colors = utils.validate_colors_dict(colors, 'rgb') else: valid_colors = utils.validate_colors(colors, 'rgb') # validate data and choose plot type if group_header is None: if isinstance(data, list): if len(data) <= 0: raise exceptions.PlotlyError("If data is a list, it must be " "nonempty and contain either " "numbers or dictionaries.") if not all(isinstance(element, Number) for element in data): raise exceptions.PlotlyError("If data is a list, it must " "contain only numbers.") if pd and isinstance(data, pd.core.frame.DataFrame): if data_header is None: raise exceptions.PlotlyError("data_header must be the " "column name with the " "desired numeric data for " "the violin plot.") data = data[data_header].values.tolist() # call the plotting functions plot_data, plot_xrange = violinplot(data, fillcolor=valid_colors[0], rugplot=rugplot) layout = graph_objs.Layout( title=title, autosize=True, font=graph_objs.Font(size=11), height=height, showlegend=False, width=width, xaxis=make_XAxis('', plot_xrange), yaxis=make_YAxis(''), hovermode='closest' ) layout['yaxis'].update(dict(showline=False, showticklabels=False, ticks='')) fig = graph_objs.Figure(data=graph_objs.Data(plot_data), layout=layout) return fig else: if not isinstance(data, pd.core.frame.DataFrame): raise exceptions.PlotlyError("Error. You must use a pandas " "DataFrame if you are using a " "group header.") if data_header is None: raise exceptions.PlotlyError("data_header must be the column " "name with the desired numeric " "data for the violin plot.") if use_colorscale is False: if isinstance(valid_colors, dict): # validate colors dict choice below fig = violin_dict( data, data_header, group_header, valid_colors, use_colorscale, group_stats, rugplot, sort, height, width, title ) return fig else: fig = violin_no_colorscale( data, data_header, group_header, valid_colors, use_colorscale, group_stats, rugplot, sort, height, width, title ) return fig else: if isinstance(valid_colors, dict): raise exceptions.PlotlyError("The colors param cannot be " "a dictionary if you are " "using a colorscale.") if len(valid_colors) < 2: raise exceptions.PlotlyError("colors must be a list with " "at least 2 colors. A " "Plotly scale is allowed.") if not isinstance(group_stats, dict): raise exceptions.PlotlyError("Your group_stats param " "must be a dictionary.") fig = violin_colorscale( data, data_header, group_header, valid_colors, use_colorscale, group_stats, rugplot, sort, height, width, title ) return fig
def trisurf(x, y, z, simplices, show_colorbar, edges_color, scale, colormap=None, color_func=None, plot_edges=False, x_edge=None, y_edge=None, z_edge=None, facecolor=None): """ Refer to FigureFactory.create_trisurf() for docstring """ # numpy import check if not np: raise ImportError("FigureFactory._trisurf() requires " "numpy imported.") points3D = np.vstack((x, y, z)).T simplices = np.atleast_2d(simplices) # vertices of the surface triangles tri_vertices = points3D[simplices] # Define colors for the triangle faces if color_func is None: # mean values of z-coordinates of triangle vertices mean_dists = tri_vertices[:, :, 2].mean(-1) elif isinstance(color_func, (list, np.ndarray)): # Pre-computed list / array of values to map onto color if len(color_func) != len(simplices): raise ValueError("If color_func is a list/array, it must " "be the same length as simplices.") # convert all colors in color_func to rgb for index in range(len(color_func)): if isinstance(color_func[index], str): if '#' in color_func[index]: foo = colors.hex_to_rgb(color_func[index]) color_func[index] = colors.label_rgb(foo) if isinstance(color_func[index], tuple): foo = colors.convert_to_RGB_255(color_func[index]) color_func[index] = colors.label_rgb(foo) mean_dists = np.asarray(color_func) else: # apply user inputted function to calculate # custom coloring for triangle vertices mean_dists = [] for triangle in tri_vertices: dists = [] for vertex in triangle: dist = color_func(vertex[0], vertex[1], vertex[2]) dists.append(dist) mean_dists.append(np.mean(dists)) mean_dists = np.asarray(mean_dists) # Check if facecolors are already strings and can be skipped if isinstance(mean_dists[0], str): facecolor = mean_dists else: min_mean_dists = np.min(mean_dists) max_mean_dists = np.max(mean_dists) if facecolor is None: facecolor = [] for index in range(len(mean_dists)): color = map_face2color(mean_dists[index], colormap, scale, min_mean_dists, max_mean_dists) facecolor.append(color) # Make sure facecolor is a list so output is consistent across Pythons facecolor = np.asarray(facecolor) ii, jj, kk = simplices.T triangles = graph_objs.Mesh3d(x=x, y=y, z=z, facecolor=facecolor, i=ii, j=jj, k=kk, name='') mean_dists_are_numbers = not isinstance(mean_dists[0], str) if mean_dists_are_numbers and show_colorbar is True: # make a colorscale from the colors colorscale = colors.make_colorscale(colormap, scale) colorscale = colors.convert_colorscale_to_rgb(colorscale) colorbar = graph_objs.Scatter3d( x=x[:1], y=y[:1], z=z[:1], mode='markers', marker=dict(size=0.1, color=[min_mean_dists, max_mean_dists], colorscale=colorscale, showscale=True), hoverinfo='None', showlegend=False) # the triangle sides are not plotted if plot_edges is False: if mean_dists_are_numbers and show_colorbar is True: return graph_objs.Data([triangles, colorbar]) else: return graph_objs.Data([triangles]) # define the lists x_edge, y_edge and z_edge, of x, y, resp z # coordinates of edge end points for each triangle # None separates data corresponding to two consecutive triangles is_none = [ii is None for ii in [x_edge, y_edge, z_edge]] if any(is_none): if not all(is_none): raise ValueError("If any (x_edge, y_edge, z_edge) is None, " "all must be None") else: x_edge = [] y_edge = [] z_edge = [] # Pull indices we care about, then add a None column to separate tris ixs_triangles = [0, 1, 2, 0] pull_edges = tri_vertices[:, ixs_triangles, :] x_edge_pull = np.hstack( [pull_edges[:, :, 0], np.tile(None, [pull_edges.shape[0], 1])]) y_edge_pull = np.hstack( [pull_edges[:, :, 1], np.tile(None, [pull_edges.shape[0], 1])]) z_edge_pull = np.hstack( [pull_edges[:, :, 2], np.tile(None, [pull_edges.shape[0], 1])]) # Now unravel the edges into a 1-d vector for plotting x_edge = np.hstack([x_edge, x_edge_pull.reshape([1, -1])[0]]) y_edge = np.hstack([y_edge, y_edge_pull.reshape([1, -1])[0]]) z_edge = np.hstack([z_edge, z_edge_pull.reshape([1, -1])[0]]) if not (len(x_edge) == len(y_edge) == len(z_edge)): raise exceptions.PlotlyError("The lengths of x_edge, y_edge and " "z_edge are not the same.") # define the lines for plotting lines = graph_objs.Scatter3d(x=x_edge, y=y_edge, z=z_edge, mode='lines', line=graph_objs.Line(color=edges_color, width=1.5), showlegend=False) if mean_dists_are_numbers and show_colorbar is True: return graph_objs.Data([triangles, lines, colorbar]) else: return graph_objs.Data([triangles, lines])
def test_dendrogram_colorscale(self): X = np.array([[1, 2, 3, 4], [1, 1, 3, 4], [1, 2, 1, 4], [1, 2, 3, 1]]) greyscale = [ 'rgb(0,0,0)', # black 'rgb(05,105,105)', # dim grey 'rgb(128,128,128)', # grey 'rgb(169,169,169)', # dark grey 'rgb(192,192,192)', # silver 'rgb(211,211,211)', # light grey 'rgb(220,220,220)', # gainsboro 'rgb(245,245,245)' ] # white smoke dendro = tls.FigureFactory.create_dendrogram(X, colorscale=greyscale) expected_dendro = go.Figure( data=go.Data([ go.Scatter(x=np.array([25., 25., 35., 35.]), y=np.array([0., 1., 1., 0.]), marker=go.Marker(color='rgb(128,128,128)'), mode='lines', xaxis='x', yaxis='y'), go.Scatter(x=np.array([15., 15., 30., 30.]), y=np.array([0., 2.23606798, 2.23606798, 1.]), marker=go.Marker(color='rgb(128,128,128)'), mode='lines', xaxis='x', yaxis='y'), go.Scatter(x=np.array([5., 5., 22.5, 22.5]), y=np.array([0., 3.60555128, 3.60555128, 2.23606798]), marker=go.Marker(color='rgb(0,0,0)'), mode='lines', xaxis='x', yaxis='y') ]), layout=go.Layout(autosize=False, height='100%', hovermode='closest', showlegend=False, width='100%', xaxis=go.XAxis(mirror='allticks', rangemode='tozero', showgrid=False, showline=True, showticklabels=True, tickmode='array', ticks='outside', ticktext=np.array( ['3', '2', '0', '1']), tickvals=[5.0, 15.0, 25.0, 35.0], type='linear', zeroline=False), yaxis=go.YAxis(mirror='allticks', rangemode='tozero', showgrid=False, showline=True, showticklabels=True, ticks='outside', type='linear', zeroline=False))) self.assertEqual(len(dendro['data']), 3) # this is actually a bit clearer when debugging tests. self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0]) self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1]) self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2])
def test_dendrogram_random_matrix(self): # create a random uncorrelated matrix X = np.random.rand(5, 5) # variable 2 is correlated with all the other variables X[2, :] = sum(X, 0) names = ['Jack', 'Oxana', 'John', 'Chelsea', 'Mark'] dendro = tls.FigureFactory.create_dendrogram(X, labels=names) expected_dendro = go.Figure( data=go.Data([ go.Scatter(marker=go.Marker(color='rgb(61,153,112)'), mode='lines', xaxis='x', yaxis='y'), go.Scatter(marker=go.Marker(color='rgb(61,153,112)'), mode='lines', xaxis='x', yaxis='y'), go.Scatter(marker=go.Marker(color='rgb(61,153,112)'), mode='lines', xaxis='x', yaxis='y'), go.Scatter(marker=go.Marker(color='rgb(0,116,217)'), mode='lines', xaxis='x', yaxis='y') ]), layout=go.Layout(autosize=False, height='100%', hovermode='closest', showlegend=False, width='100%', xaxis=go.XAxis( mirror='allticks', rangemode='tozero', showgrid=False, showline=True, showticklabels=True, tickmode='array', ticks='outside', tickvals=[5.0, 15.0, 25.0, 35.0, 45.0], type='linear', zeroline=False), yaxis=go.YAxis(mirror='allticks', rangemode='tozero', showgrid=False, showline=True, showticklabels=True, ticks='outside', type='linear', zeroline=False))) self.assertEqual(len(dendro['data']), 4) # it's random, so we can only check that the values aren't equal y_vals = [ dendro['data'][0].pop('y'), dendro['data'][1].pop('y'), dendro['data'][2].pop('y'), dendro['data'][3].pop('y') ] for i in range(len(y_vals)): for j in range(len(y_vals)): if i != j: self.assertFalse(np.allclose(y_vals[i], y_vals[j])) x_vals = [ dendro['data'][0].pop('x'), dendro['data'][1].pop('x'), dendro['data'][2].pop('x'), dendro['data'][3].pop('x') ] for i in range(len(x_vals)): for j in range(len(x_vals)): if i != j: self.assertFalse(np.allclose(x_vals[i], x_vals[j])) # we also need to check the ticktext manually xaxis_ticktext = dendro['layout']['xaxis'].pop('ticktext') self.assertEqual(xaxis_ticktext[0], 'John') # this is actually a bit clearer when debugging tests. self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0]) self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1]) self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2]) self.assert_dict_equal(dendro['data'][3], expected_dendro['data'][3]) self.assert_dict_equal(dendro['layout'], expected_dendro['layout'])