コード例 #1
0
    def test_default_dendrogram(self):
        X = np.array([[1, 2, 3, 4], [1, 1, 3, 4], [1, 2, 1, 4], [1, 2, 3, 1]])
        dendro = tls.FigureFactory.create_dendrogram(X=X)

        expected_dendro = go.Figure(
            data=go.Data([
                go.Scatter(x=np.array([25., 25., 35., 35.]),
                           y=np.array([0., 1., 1., 0.]),
                           marker=go.Marker(color='rgb(61,153,112)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y'),
                go.Scatter(x=np.array([15., 15., 30., 30.]),
                           y=np.array([0., 2.23606798, 2.23606798, 1.]),
                           marker=go.Marker(color='rgb(61,153,112)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y'),
                go.Scatter(x=np.array([5., 5., 22.5, 22.5]),
                           y=np.array([0., 3.60555128, 3.60555128,
                                       2.23606798]),
                           marker=go.Marker(color='rgb(0,116,217)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y')
            ]),
            layout=go.Layout(autosize=False,
                             height='100%',
                             hovermode='closest',
                             showlegend=False,
                             width='100%',
                             xaxis=go.XAxis(mirror='allticks',
                                            rangemode='tozero',
                                            showgrid=False,
                                            showline=True,
                                            showticklabels=True,
                                            tickmode='array',
                                            ticks='outside',
                                            ticktext=np.array(
                                                ['3', '2', '0', '1']),
                                            tickvals=[5.0, 15.0, 25.0, 35.0],
                                            type='linear',
                                            zeroline=False),
                             yaxis=go.YAxis(mirror='allticks',
                                            rangemode='tozero',
                                            showgrid=False,
                                            showline=True,
                                            showticklabels=True,
                                            ticks='outside',
                                            type='linear',
                                            zeroline=False)))

        self.assertEqual(len(dendro['data']), 3)

        # this is actually a bit clearer when debugging tests.
        self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0])
        self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1])
        self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2])

        self.assert_dict_equal(dendro['layout'], expected_dendro['layout'])
    def __init__(self,
                 X,
                 orientation='bottom',
                 labels=None,
                 colorscale=None,
                 width="100%",
                 height="100%",
                 xaxis='xaxis',
                 yaxis='yaxis',
                 distfun=None,
                 linkagefun=lambda x: sch.linkage(x, 'complete'),
                 hovertext=None):
        self.orientation = orientation
        self.labels = labels
        self.xaxis = xaxis
        self.yaxis = yaxis
        self.data = []
        self.leaves = []
        self.sign = {self.xaxis: 1, self.yaxis: 1}
        self.layout = {self.xaxis: {}, self.yaxis: {}}

        if self.orientation in ['left', 'bottom']:
            self.sign[self.xaxis] = 1
        else:
            self.sign[self.xaxis] = -1

        if self.orientation in ['right', 'bottom']:
            self.sign[self.yaxis] = 1
        else:
            self.sign[self.yaxis] = -1

        if distfun is None:
            distfun = scs.distance.pdist

        (dd_traces, xvals, yvals, ordered_labels,
         leaves) = self.get_dendrogram_traces(X, colorscale, distfun,
                                              linkagefun, hovertext)

        self.labels = ordered_labels
        self.leaves = leaves
        yvals_flat = yvals.flatten()
        xvals_flat = xvals.flatten()

        self.zero_vals = []

        for i in range(len(yvals_flat)):
            if yvals_flat[i] == 0.0 and xvals_flat[i] not in self.zero_vals:
                self.zero_vals.append(xvals_flat[i])

        self.zero_vals.sort()

        self.layout = self.set_figure_layout(width, height)
        self.data = graph_objs.Data(dd_traces)
コード例 #3
0
ファイル: dendrogram.py プロジェクト: metaspace2020/metaspace
    def __init__(
        self,
        X,
        orientation='bottom',
        labels=None,
        colorscale=None,
        width="100%",
        height="100%",
        xaxis='xaxis',
        yaxis='yaxis',
        method='complete',
        metric='euclidean',
    ):
        self.orientation = orientation
        self.labels = labels
        self.xaxis = xaxis
        self.yaxis = yaxis
        self.method = method
        self.metric = metric
        self.data = []
        self.leaves = []
        self.layout = {self.xaxis: {}, self.yaxis: {}}

        self.sign = {}
        self.sign[self.xaxis] = 1 if self.orientation in ['left', 'bottom'
                                                          ] else -1
        self.sign[self.yaxis] = 1 if self.orientation in ['right', 'bottom'
                                                          ] else -1

        (dd_traces, xvals, yvals, ordered_labels,
         leaves) = self.get_dendrogram_traces(X, colorscale)

        self.labels = ordered_labels
        self.leaves = leaves

        self.zero_vals = np.unique(xvals[yvals == 0.0])

        self.layout = self.set_figure_layout(width, height)
        self.data = graph_objs.Data(dd_traces)
コード例 #4
0
def create_violin(data, data_header=None, group_header=None, colors=None,
                  use_colorscale=False, group_stats=None, rugplot=True,
                  sort=False, height=450, width=600,
                  title='Violin and Rug Plot'):
    """
    Returns figure for a violin plot

    :param (list|array) data: accepts either a list of numerical values,
        a list of dictionaries all with identical keys and at least one
        column of numeric values, or a pandas dataframe with at least one
        column of numbers.
    :param (str) data_header: the header of the data column to be used
        from an inputted pandas dataframe. Not applicable if 'data' is
        a list of numeric values.
    :param (str) group_header: applicable if grouping data by a variable.
        'group_header' must be set to the name of the grouping variable.
    :param (str|tuple|list|dict) colors: either a plotly scale name,
        an rgb or hex color, a color tuple, a list of colors or a
        dictionary. An rgb color is of the form 'rgb(x, y, z)' where
        x, y and z belong to the interval [0, 255] and a color tuple is a
        tuple of the form (a, b, c) where a, b and c belong to [0, 1].
        If colors is a list, it must contain valid color types as its
        members.
    :param (bool) use_colorscale: only applicable if grouping by another
        variable. Will implement a colorscale based on the first 2 colors
        of param colors. This means colors must be a list with at least 2
        colors in it (Plotly colorscales are accepted since they map to a
        list of two rgb colors). Default = False
    :param (dict) group_stats: a dictioanry where each key is a unique
        value from the group_header column in data. Each value must be a
        number and will be used to color the violin plots if a colorscale
        is being used.
    :param (bool) rugplot: determines if a rugplot is draw on violin plot.
        Default = True
    :param (bool) sort: determines if violins are sorted
        alphabetically (True) or by input order (False). Default = False
    :param (float) height: the height of the violin plot.
    :param (float) width: the width of the violin plot.
    :param (str) title: the title of the violin plot.

    Example 1: Single Violin Plot
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    from scipy import stats

    # create list of random values
    data_list = np.random.randn(100)
    data_list.tolist()

    # create violin fig
    fig = create_violin(data_list, colors='#604d9e')

    # plot
    py.iplot(fig, filename='Violin Plot')
    ```

    Example 2: Multiple Violin Plots with Qualitative Coloring
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    import pandas as pd
    from scipy import stats

    # create dataframe
    np.random.seed(619517)
    Nr=250
    y = np.random.randn(Nr)
    gr = np.random.choice(list("ABCDE"), Nr)
    norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

    for i, letter in enumerate("ABCDE"):
        y[gr == letter] *=norm_params[i][1]+ norm_params[i][0]
    df = pd.DataFrame(dict(Score=y, Group=gr))

    # create violin fig
    fig = create_violin(df, data_header='Score', group_header='Group',
                        sort=True, height=600, width=1000)

    # plot
    py.iplot(fig, filename='Violin Plot with Coloring')
    ```

    Example 3: Violin Plots with Colorscale
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    import pandas as pd
    from scipy import stats

    # create dataframe
    np.random.seed(619517)
    Nr=250
    y = np.random.randn(Nr)
    gr = np.random.choice(list("ABCDE"), Nr)
    norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

    for i, letter in enumerate("ABCDE"):
        y[gr == letter] *=norm_params[i][1]+ norm_params[i][0]
    df = pd.DataFrame(dict(Score=y, Group=gr))

    # define header params
    data_header = 'Score'
    group_header = 'Group'

    # make groupby object with pandas
    group_stats = {}
    groupby_data = df.groupby([group_header])

    for group in "ABCDE":
        data_from_group = groupby_data.get_group(group)[data_header]
        # take a stat of the grouped data
        stat = np.median(data_from_group)
        # add to dictionary
        group_stats[group] = stat

    # create violin fig
    fig = create_violin(df, data_header='Score', group_header='Group',
                        height=600, width=1000, use_colorscale=True,
                        group_stats=group_stats)

    # plot
    py.iplot(fig, filename='Violin Plot with Colorscale')
    ```
    """

    # Validate colors
    if isinstance(colors, dict):
        valid_colors = utils.validate_colors_dict(colors, 'rgb')
    else:
        valid_colors = utils.validate_colors(colors, 'rgb')

    # validate data and choose plot type
    if group_header is None:
        if isinstance(data, list):
            if len(data) <= 0:
                raise exceptions.PlotlyError("If data is a list, it must be "
                                             "nonempty and contain either "
                                             "numbers or dictionaries.")

            if not all(isinstance(element, Number) for element in data):
                raise exceptions.PlotlyError("If data is a list, it must "
                                             "contain only numbers.")

        if pd and isinstance(data, pd.core.frame.DataFrame):
            if data_header is None:
                raise exceptions.PlotlyError("data_header must be the "
                                             "column name with the "
                                             "desired numeric data for "
                                             "the violin plot.")

            data = data[data_header].values.tolist()

        # call the plotting functions
        plot_data, plot_xrange = violinplot(data, fillcolor=valid_colors[0],
                                            rugplot=rugplot)

        layout = graph_objs.Layout(
            title=title,
            autosize=True,
            font=graph_objs.Font(size=11),
            height=height,
            showlegend=False,
            width=width,
            xaxis=make_XAxis('', plot_xrange),
            yaxis=make_YAxis(''),
            hovermode='closest'
        )
        layout['yaxis'].update(dict(showline=False,
                                    showticklabels=False,
                                    ticks=''))

        fig = graph_objs.Figure(data=graph_objs.Data(plot_data),
                                layout=layout)

        return fig

    else:
        if not isinstance(data, pd.core.frame.DataFrame):
            raise exceptions.PlotlyError("Error. You must use a pandas "
                                         "DataFrame if you are using a "
                                         "group header.")

        if data_header is None:
            raise exceptions.PlotlyError("data_header must be the column "
                                         "name with the desired numeric "
                                         "data for the violin plot.")

        if use_colorscale is False:
            if isinstance(valid_colors, dict):
                # validate colors dict choice below
                fig = violin_dict(
                    data, data_header, group_header, valid_colors,
                    use_colorscale, group_stats, rugplot, sort,
                    height, width, title
                )
                return fig
            else:
                fig = violin_no_colorscale(
                    data, data_header, group_header, valid_colors,
                    use_colorscale, group_stats, rugplot, sort,
                    height, width, title
                )
                return fig
        else:
            if isinstance(valid_colors, dict):
                raise exceptions.PlotlyError("The colors param cannot be "
                                             "a dictionary if you are "
                                             "using a colorscale.")

            if len(valid_colors) < 2:
                raise exceptions.PlotlyError("colors must be a list with "
                                             "at least 2 colors. A "
                                             "Plotly scale is allowed.")

            if not isinstance(group_stats, dict):
                raise exceptions.PlotlyError("Your group_stats param "
                                             "must be a dictionary.")

            fig = violin_colorscale(
                data, data_header, group_header, valid_colors,
                use_colorscale, group_stats, rugplot, sort, height,
                width, title
            )
            return fig
def trisurf(x,
            y,
            z,
            simplices,
            show_colorbar,
            edges_color,
            scale,
            colormap=None,
            color_func=None,
            plot_edges=False,
            x_edge=None,
            y_edge=None,
            z_edge=None,
            facecolor=None):
    """
    Refer to FigureFactory.create_trisurf() for docstring
    """
    # numpy import check
    if not np:
        raise ImportError("FigureFactory._trisurf() requires "
                          "numpy imported.")
    points3D = np.vstack((x, y, z)).T
    simplices = np.atleast_2d(simplices)

    # vertices of the surface triangles
    tri_vertices = points3D[simplices]

    # Define colors for the triangle faces
    if color_func is None:
        # mean values of z-coordinates of triangle vertices
        mean_dists = tri_vertices[:, :, 2].mean(-1)
    elif isinstance(color_func, (list, np.ndarray)):
        # Pre-computed list / array of values to map onto color
        if len(color_func) != len(simplices):
            raise ValueError("If color_func is a list/array, it must "
                             "be the same length as simplices.")

        # convert all colors in color_func to rgb
        for index in range(len(color_func)):
            if isinstance(color_func[index], str):
                if '#' in color_func[index]:
                    foo = colors.hex_to_rgb(color_func[index])
                    color_func[index] = colors.label_rgb(foo)

            if isinstance(color_func[index], tuple):
                foo = colors.convert_to_RGB_255(color_func[index])
                color_func[index] = colors.label_rgb(foo)

        mean_dists = np.asarray(color_func)
    else:
        # apply user inputted function to calculate
        # custom coloring for triangle vertices
        mean_dists = []
        for triangle in tri_vertices:
            dists = []
            for vertex in triangle:
                dist = color_func(vertex[0], vertex[1], vertex[2])
                dists.append(dist)
            mean_dists.append(np.mean(dists))
        mean_dists = np.asarray(mean_dists)

    # Check if facecolors are already strings and can be skipped
    if isinstance(mean_dists[0], str):
        facecolor = mean_dists
    else:
        min_mean_dists = np.min(mean_dists)
        max_mean_dists = np.max(mean_dists)

        if facecolor is None:
            facecolor = []
        for index in range(len(mean_dists)):
            color = map_face2color(mean_dists[index], colormap, scale,
                                   min_mean_dists, max_mean_dists)
            facecolor.append(color)

    # Make sure facecolor is a list so output is consistent across Pythons
    facecolor = np.asarray(facecolor)
    ii, jj, kk = simplices.T

    triangles = graph_objs.Mesh3d(x=x,
                                  y=y,
                                  z=z,
                                  facecolor=facecolor,
                                  i=ii,
                                  j=jj,
                                  k=kk,
                                  name='')

    mean_dists_are_numbers = not isinstance(mean_dists[0], str)

    if mean_dists_are_numbers and show_colorbar is True:
        # make a colorscale from the colors
        colorscale = colors.make_colorscale(colormap, scale)
        colorscale = colors.convert_colorscale_to_rgb(colorscale)

        colorbar = graph_objs.Scatter3d(
            x=x[:1],
            y=y[:1],
            z=z[:1],
            mode='markers',
            marker=dict(size=0.1,
                        color=[min_mean_dists, max_mean_dists],
                        colorscale=colorscale,
                        showscale=True),
            hoverinfo='None',
            showlegend=False)

    # the triangle sides are not plotted
    if plot_edges is False:
        if mean_dists_are_numbers and show_colorbar is True:
            return graph_objs.Data([triangles, colorbar])
        else:
            return graph_objs.Data([triangles])

    # define the lists x_edge, y_edge and z_edge, of x, y, resp z
    # coordinates of edge end points for each triangle
    # None separates data corresponding to two consecutive triangles
    is_none = [ii is None for ii in [x_edge, y_edge, z_edge]]
    if any(is_none):
        if not all(is_none):
            raise ValueError("If any (x_edge, y_edge, z_edge) is None, "
                             "all must be None")
        else:
            x_edge = []
            y_edge = []
            z_edge = []

    # Pull indices we care about, then add a None column to separate tris
    ixs_triangles = [0, 1, 2, 0]
    pull_edges = tri_vertices[:, ixs_triangles, :]
    x_edge_pull = np.hstack(
        [pull_edges[:, :, 0],
         np.tile(None, [pull_edges.shape[0], 1])])
    y_edge_pull = np.hstack(
        [pull_edges[:, :, 1],
         np.tile(None, [pull_edges.shape[0], 1])])
    z_edge_pull = np.hstack(
        [pull_edges[:, :, 2],
         np.tile(None, [pull_edges.shape[0], 1])])

    # Now unravel the edges into a 1-d vector for plotting
    x_edge = np.hstack([x_edge, x_edge_pull.reshape([1, -1])[0]])
    y_edge = np.hstack([y_edge, y_edge_pull.reshape([1, -1])[0]])
    z_edge = np.hstack([z_edge, z_edge_pull.reshape([1, -1])[0]])

    if not (len(x_edge) == len(y_edge) == len(z_edge)):
        raise exceptions.PlotlyError("The lengths of x_edge, y_edge and "
                                     "z_edge are not the same.")

    # define the lines for plotting
    lines = graph_objs.Scatter3d(x=x_edge,
                                 y=y_edge,
                                 z=z_edge,
                                 mode='lines',
                                 line=graph_objs.Line(color=edges_color,
                                                      width=1.5),
                                 showlegend=False)

    if mean_dists_are_numbers and show_colorbar is True:
        return graph_objs.Data([triangles, lines, colorbar])
    else:
        return graph_objs.Data([triangles, lines])
コード例 #6
0
    def test_dendrogram_colorscale(self):
        X = np.array([[1, 2, 3, 4], [1, 1, 3, 4], [1, 2, 1, 4], [1, 2, 3, 1]])
        greyscale = [
            'rgb(0,0,0)',  # black
            'rgb(05,105,105)',  # dim grey
            'rgb(128,128,128)',  # grey
            'rgb(169,169,169)',  # dark grey
            'rgb(192,192,192)',  # silver
            'rgb(211,211,211)',  # light grey
            'rgb(220,220,220)',  # gainsboro
            'rgb(245,245,245)'
        ]  # white smoke

        dendro = tls.FigureFactory.create_dendrogram(X, colorscale=greyscale)

        expected_dendro = go.Figure(
            data=go.Data([
                go.Scatter(x=np.array([25., 25., 35., 35.]),
                           y=np.array([0., 1., 1., 0.]),
                           marker=go.Marker(color='rgb(128,128,128)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y'),
                go.Scatter(x=np.array([15., 15., 30., 30.]),
                           y=np.array([0., 2.23606798, 2.23606798, 1.]),
                           marker=go.Marker(color='rgb(128,128,128)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y'),
                go.Scatter(x=np.array([5., 5., 22.5, 22.5]),
                           y=np.array([0., 3.60555128, 3.60555128,
                                       2.23606798]),
                           marker=go.Marker(color='rgb(0,0,0)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y')
            ]),
            layout=go.Layout(autosize=False,
                             height='100%',
                             hovermode='closest',
                             showlegend=False,
                             width='100%',
                             xaxis=go.XAxis(mirror='allticks',
                                            rangemode='tozero',
                                            showgrid=False,
                                            showline=True,
                                            showticklabels=True,
                                            tickmode='array',
                                            ticks='outside',
                                            ticktext=np.array(
                                                ['3', '2', '0', '1']),
                                            tickvals=[5.0, 15.0, 25.0, 35.0],
                                            type='linear',
                                            zeroline=False),
                             yaxis=go.YAxis(mirror='allticks',
                                            rangemode='tozero',
                                            showgrid=False,
                                            showline=True,
                                            showticklabels=True,
                                            ticks='outside',
                                            type='linear',
                                            zeroline=False)))

        self.assertEqual(len(dendro['data']), 3)

        # this is actually a bit clearer when debugging tests.
        self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0])
        self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1])
        self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2])
コード例 #7
0
    def test_dendrogram_random_matrix(self):

        # create a random uncorrelated matrix
        X = np.random.rand(5, 5)

        # variable 2 is correlated with all the other variables
        X[2, :] = sum(X, 0)

        names = ['Jack', 'Oxana', 'John', 'Chelsea', 'Mark']
        dendro = tls.FigureFactory.create_dendrogram(X, labels=names)

        expected_dendro = go.Figure(
            data=go.Data([
                go.Scatter(marker=go.Marker(color='rgb(61,153,112)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y'),
                go.Scatter(marker=go.Marker(color='rgb(61,153,112)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y'),
                go.Scatter(marker=go.Marker(color='rgb(61,153,112)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y'),
                go.Scatter(marker=go.Marker(color='rgb(0,116,217)'),
                           mode='lines',
                           xaxis='x',
                           yaxis='y')
            ]),
            layout=go.Layout(autosize=False,
                             height='100%',
                             hovermode='closest',
                             showlegend=False,
                             width='100%',
                             xaxis=go.XAxis(
                                 mirror='allticks',
                                 rangemode='tozero',
                                 showgrid=False,
                                 showline=True,
                                 showticklabels=True,
                                 tickmode='array',
                                 ticks='outside',
                                 tickvals=[5.0, 15.0, 25.0, 35.0, 45.0],
                                 type='linear',
                                 zeroline=False),
                             yaxis=go.YAxis(mirror='allticks',
                                            rangemode='tozero',
                                            showgrid=False,
                                            showline=True,
                                            showticklabels=True,
                                            ticks='outside',
                                            type='linear',
                                            zeroline=False)))

        self.assertEqual(len(dendro['data']), 4)

        # it's random, so we can only check that the values aren't equal
        y_vals = [
            dendro['data'][0].pop('y'), dendro['data'][1].pop('y'),
            dendro['data'][2].pop('y'), dendro['data'][3].pop('y')
        ]
        for i in range(len(y_vals)):
            for j in range(len(y_vals)):
                if i != j:
                    self.assertFalse(np.allclose(y_vals[i], y_vals[j]))

        x_vals = [
            dendro['data'][0].pop('x'), dendro['data'][1].pop('x'),
            dendro['data'][2].pop('x'), dendro['data'][3].pop('x')
        ]
        for i in range(len(x_vals)):
            for j in range(len(x_vals)):
                if i != j:
                    self.assertFalse(np.allclose(x_vals[i], x_vals[j]))

        # we also need to check the ticktext manually
        xaxis_ticktext = dendro['layout']['xaxis'].pop('ticktext')
        self.assertEqual(xaxis_ticktext[0], 'John')

        # this is actually a bit clearer when debugging tests.
        self.assert_dict_equal(dendro['data'][0], expected_dendro['data'][0])
        self.assert_dict_equal(dendro['data'][1], expected_dendro['data'][1])
        self.assert_dict_equal(dendro['data'][2], expected_dendro['data'][2])
        self.assert_dict_equal(dendro['data'][3], expected_dendro['data'][3])

        self.assert_dict_equal(dendro['layout'], expected_dendro['layout'])