def test_analysis_layout_build(self, small_df, populated_config):
     ds = DataSource(small_df, None)
     layout = AnalysisLayout([["Scatter"], ["Scatter"]], 400, ds)
     root_widget = layout.build()
     assert isinstance(root_widget, widgets.VBox)
     children = root_widget.children
     assert (
         len(children) == 4
     )  # 2 + 2 for HTML(css) and selection types (standard, additive, subtractive)
     assert isinstance(children[1], widgets.HBox)
     assert isinstance(children[2], widgets.HBox)
     assert len(children[1].children) == 1
     assert len(children[2].children) == 1
    def test_build_row_height_list(self, small_df, populated_config):
        ds = DataSource(small_df, None)
        build_return = widgets.HBox()

        scatterInstance = Mock()
        scatterInstance.build = Mock(return_value=build_return)

        mockScatterClass = Mock(return_value=scatterInstance)

        wcr = WidgetClassRegistry()
        wcr.registry["Scatter"] = mockScatterClass
        layout = AnalysisLayout([["Scatter"], ["Scatter"]], [300, 200], ds)
        layout.build()
        call1 = call(ANY, ANY, ANY, ANY, 300)
        call2 = call(ANY, ANY, ANY, ANY, 200)
        mockScatterClass.assert_has_calls([call1, call2], any_order=False)
 def test_change_to_std(self, small_df):
     ds = DataSource(small_df, None)
     layout = AnalysisLayout([["Scatter"], ["Scatter"]], 400, ds)
     layout.selection_type_widget.value = (
         "add"  # change first so observe gets triggered
     )
     layout.selection_type_widget.value = "std"
     assert layout.data_source.selection_type == SelectionType.STANDARD
예제 #4
0
    def __init__(
        self,
        data: typing.Union[DataFrame, DataSource],
        layout: typing.Union[str, typing.List[typing.List[str]]] = "default",
        categorical_columns: typing.Union[typing.List[str], None] = None,
        row_height: typing.Union[int, typing.List[int]] = 400,
        sample: typing.Union[float, int, None] = None,
        select_color: typing.Union[str, typing.Tuple[int, int, int]] = "#323EEC",
        deselect_color: typing.Union[str, typing.Tuple[int, int, int]] = "#8A8C93",
        alpha: float = 0.75,
        seed: typing.Union[int, None] = None,
    ):
        """

        :param data: A pandas.DataFrame object or a :class:`DataSource`.
        :param layout: Layout specification name or explicit definition of widget names in rows.
            Those columns have to include all columns of the DataFrame
            which have type `object`, `str`, `bool` or `category`.
            This means it can only add columns which do not have the aforementioned types.
            Defaults to 'default'.
        :param categorical_columns: If given, specifies which columns are to be interpreted as categorical.
            Defaults to None.
        :param row_height: Height in pixels each row should have. If given an integer, each row has the height
            specified by that value, if given a list of integers, each value in the list specifies the height of
            the corresponding row.
            Defaults to 400.
        :param sample: Int or float value specifying if the DataFrame should be sub-sampled.
            When an int is given, the DataFrame will be limited to that number of rows given by the value.
            When a float is given, the DataFrame will include the fraction of rows given by the value.
            Defaults to None.
        :param select_color: RGB tuple or hex color specifying the color display selected data points.
            Values in the tuple have to be between 0 and 255 inclusive or a hex string that converts to
            such RGB values.
            Defaults to '#323EEC'.
        :param deselect_color: RGB tuple or hex color specifying the color display deselected data points.
            Values in the tuple have to be between 0 and 255 inclusive or a hex string that converts to
            such RGB values.
            Defaults to '#8A8C93'.
        :param alpha: Opacity of data points when applicable ranging from 0.0 to 1.0 inclusive. Defaults to 0.75.
        :param seed: Random seed used for sampling the data.
            Values can be any integer between 0 and 2**32 - 1 inclusive or None.
            Defaults to None.
        """
        super().__init__()

        validate.validate_data(data)
        validate.validate_alpha(alpha)
        validate.validate_color(select_color)
        validate.validate_color(deselect_color)

        if isinstance(select_color, str):
            self.select_color: typing.Tuple[int, int, int] = hex_to_rgb(select_color)
        elif isinstance(select_color, tuple):
            self.select_color: typing.Tuple[int, int, int] = select_color

        if isinstance(deselect_color, str):
            self.deselect_color: typing.Tuple[int, int, int] = hex_to_rgb(
                deselect_color
            )
        elif isinstance(deselect_color, tuple):
            self.deselect_color: typing.Tuple[int, int, int] = deselect_color

        self.alpha = alpha
        self.color_scale = [
            [0, "rgb(%d,%d,%d)" % self.deselect_color],
            [1, "rgb(%d,%d,%d)" % self.select_color],
        ]

        config = Config()
        config["alpha"] = self.alpha
        config["select_color"] = self.select_color
        config["deselect_color"] = self.deselect_color
        config["color_scale"] = self.color_scale

        if isinstance(data, DataFrame):
            self.data_source = DataSource(
                df=data,
                categorical_columns=categorical_columns,
                sample=sample,
                seed=seed,
            )
        elif isinstance(data, DataSource):
            self.data_source = data

        self.layout = AnalysisLayout(
            layout=layout, row_height=row_height, data_source=self.data_source
        )

        if self.data_source.few_num_cols and len(self._check_numerical_plots()) != 0:
            warnings.warn(
                "The passed DataFrame only has %d NUMERICAL column, which is insufficient for some plots "
                "like Parallel Coordinates. These plots will not be displayed."
                % len(self.data_source.numerical_columns)
            )

        if self.data_source.few_cat_cols and len(self._check_categorical_plots()) != 0:
            warnings.warn(
                "The passed DataFrame only has %d CATEGORICAL column, which is insufficient for some plots "
                "like Parallel Categories. These plots will not be displayed."
                % len(self.data_source.numerical_columns)
            )
예제 #5
0
class VisualAnalysis:

    """
    Generate plots that support linked-brushing from a pandas `DataFrame` and display them in Jupyter notebooks.
    """

    def __init__(
        self,
        data: typing.Union[DataFrame, DataSource],
        layout: typing.Union[str, typing.List[typing.List[str]]] = "default",
        categorical_columns: typing.Union[typing.List[str], None] = None,
        row_height: typing.Union[int, typing.List[int]] = 400,
        sample: typing.Union[float, int, None] = None,
        select_color: typing.Union[str, typing.Tuple[int, int, int]] = "#323EEC",
        deselect_color: typing.Union[str, typing.Tuple[int, int, int]] = "#8A8C93",
        alpha: float = 0.75,
        seed: typing.Union[int, None] = None,
    ):
        """

        :param data: A pandas.DataFrame object or a :class:`DataSource`.
        :param layout: Layout specification name or explicit definition of widget names in rows.
            Those columns have to include all columns of the DataFrame
            which have type `object`, `str`, `bool` or `category`.
            This means it can only add columns which do not have the aforementioned types.
            Defaults to 'default'.
        :param categorical_columns: If given, specifies which columns are to be interpreted as categorical.
            Defaults to None.
        :param row_height: Height in pixels each row should have. If given an integer, each row has the height
            specified by that value, if given a list of integers, each value in the list specifies the height of
            the corresponding row.
            Defaults to 400.
        :param sample: Int or float value specifying if the DataFrame should be sub-sampled.
            When an int is given, the DataFrame will be limited to that number of rows given by the value.
            When a float is given, the DataFrame will include the fraction of rows given by the value.
            Defaults to None.
        :param select_color: RGB tuple or hex color specifying the color display selected data points.
            Values in the tuple have to be between 0 and 255 inclusive or a hex string that converts to
            such RGB values.
            Defaults to '#323EEC'.
        :param deselect_color: RGB tuple or hex color specifying the color display deselected data points.
            Values in the tuple have to be between 0 and 255 inclusive or a hex string that converts to
            such RGB values.
            Defaults to '#8A8C93'.
        :param alpha: Opacity of data points when applicable ranging from 0.0 to 1.0 inclusive. Defaults to 0.75.
        :param seed: Random seed used for sampling the data.
            Values can be any integer between 0 and 2**32 - 1 inclusive or None.
            Defaults to None.
        """
        super().__init__()

        validate.validate_data(data)
        validate.validate_alpha(alpha)
        validate.validate_color(select_color)
        validate.validate_color(deselect_color)

        if isinstance(select_color, str):
            self.select_color: typing.Tuple[int, int, int] = hex_to_rgb(select_color)
        elif isinstance(select_color, tuple):
            self.select_color: typing.Tuple[int, int, int] = select_color

        if isinstance(deselect_color, str):
            self.deselect_color: typing.Tuple[int, int, int] = hex_to_rgb(
                deselect_color
            )
        elif isinstance(deselect_color, tuple):
            self.deselect_color: typing.Tuple[int, int, int] = deselect_color

        self.alpha = alpha
        self.color_scale = [
            [0, "rgb(%d,%d,%d)" % self.deselect_color],
            [1, "rgb(%d,%d,%d)" % self.select_color],
        ]

        config = Config()
        config["alpha"] = self.alpha
        config["select_color"] = self.select_color
        config["deselect_color"] = self.deselect_color
        config["color_scale"] = self.color_scale

        if isinstance(data, DataFrame):
            self.data_source = DataSource(
                df=data,
                categorical_columns=categorical_columns,
                sample=sample,
                seed=seed,
            )
        elif isinstance(data, DataSource):
            self.data_source = data

        self.layout = AnalysisLayout(
            layout=layout, row_height=row_height, data_source=self.data_source
        )

        if self.data_source.few_num_cols and len(self._check_numerical_plots()) != 0:
            warnings.warn(
                "The passed DataFrame only has %d NUMERICAL column, which is insufficient for some plots "
                "like Parallel Coordinates. These plots will not be displayed."
                % len(self.data_source.numerical_columns)
            )

        if self.data_source.few_cat_cols and len(self._check_categorical_plots()) != 0:
            warnings.warn(
                "The passed DataFrame only has %d CATEGORICAL column, which is insufficient for some plots "
                "like Parallel Categories. These plots will not be displayed."
                % len(self.data_source.numerical_columns)
            )

    def _ipython_display_(self):
        """
        Builds the layout and calls :func:`IPython.core.display.display`

        :return:
        """
        from IPython.core.display import display
        from ipywidgets import widgets

        root_widget: widgets.Widget = self.layout.build()
        # noinspection PyTypeChecker
        display(root_widget)

    def _check_numerical_plots(self) -> typing.List[str]:
        """
        Checks if the layout contains widgets that can only display numerical data.

        :return: Set of widgets in the layout that are strictly numerical. Empty set otherwise.
        """
        numerical_plots = {"ParallelCoordinates"}
        found_plots = set()
        for row in self.layout.layout_spec:
            for el in row:
                if el in numerical_plots:
                    found_plots.add(el)
        return list(found_plots)

    def _check_categorical_plots(self) -> typing.List[str]:
        """
        Checks if the layout contains widgets that can only display categorical data.

        :return: Set of widgets in the layout that are strictly categorical. Empty set otherwise.
        """
        numerical_plots = {"ParallelCategories"}
        found_plots = set()
        for row in self.layout.layout_spec:
            for el in row:
                if el in numerical_plots:
                    found_plots.add(el)
        return list(found_plots)

    @staticmethod
    def widgets():
        """

        :return: All the widget names that are available as input to a layout.
        """
        return WidgetClassRegistry().widget_list
 def test_type_error(self, small_df):
     with pytest.raises(TypeError):
         AnalysisLayout("default", "400px", DataSource(small_df))
 def test_int_negative(self, small_df):
     with pytest.raises(ValueError):
         AnalysisLayout("default", -200, DataSource(small_df))
 def test_int_normal(self, small_df):
     AnalysisLayout("default", 400, DataSource(small_df))
 def test_analysis_layout_incorrect_widget_name(self, small_df):
     with pytest.raises(ValueError):
         AnalysisLayout([["asdfasdf"]], 400, DataSource(small_df, None))
 def test_analysis_layout_wrong_layout_type_error(self, small_df):
     with pytest.raises(TypeError):
         AnalysisLayout(set(["A", "B"]), 400, DataSource(small_df, None))
 def test_analysis_layout_wrong_predefined_layout_error(self, small_df):
     with pytest.raises(ValueError):
         AnalysisLayout("some_unknown_layout", 400,
                        DataSource(small_df, None))
def test_analysis_layout_one_num_col_no_warning(small_df):
    assert AnalysisLayout([["Scatter"]], 400, DataSource(small_df, None))
 def test_change_to_sub(self, small_df):
     ds = DataSource(small_df, None)
     layout = AnalysisLayout([["Scatter"], ["Scatter"]], 400, ds)
     layout.selection_type_widget.value = "sub"
     assert layout.data_source.selection_type == SelectionType.SUBTRACTIVE
 def test_std_standard(self, small_df):
     ds = DataSource(small_df, None)
     layout = AnalysisLayout([["Scatter"], ["Scatter"]], 400, ds)
     assert layout.data_source.selection_type == SelectionType.STANDARD
 def test_list_negative(self, small_df):
     with pytest.raises(ValueError):
         AnalysisLayout([["Scatter"], ["Scatter"]], [200, -300],
                        DataSource(small_df))
 def test_list_normal(self, small_df):
     AnalysisLayout([["Scatter"], ["Scatter"]], [200, 300],
                    DataSource(small_df))