def test_fill_value(): td = TabularData(["col-1", "col-2", "col-3"], fill_value="?") td.append(["foo"]) assert list(td) == [["foo", "?", "?"]] td.extend( [ ["bar"], ["foobar", "foobar2"], ["f", "fo", "foo", "foob", "fooba", "foobar"], ] ) assert list(td) == [ ["foo", "?", "?"], ["bar", "?", "?"], ["foobar", "foobar2", "?"], ["f", "fo", "foo"], ] td.insert(1, ["lorem"]) assert td[1] == ["lorem", "?", "?"] td[0] = ["lorem", "ipsum"] assert td[0] == ["lorem", "ipsum", "?"] td[1:2] = [["f", "fo"]] assert td[1:2] == [["f", "fo", "?"]] td.add_column("col-4") assert td.keys() == ["col-1", "col-2", "col-3", "col-4"] assert td[0][3] == "?"
def experiments_table( all_experiments, headers, metric_headers, metric_names, param_headers, param_names, sort_by=None, sort_order=None, precision=DEFAULT_PRECISION, fill_value=FILL_VALUE, iso=False, ) -> "TabularData": from funcy import lconcat from dvc.compare import TabularData td = TabularData(lconcat(headers, metric_headers, param_headers), fill_value=fill_value) for base_rev, experiments in all_experiments.items(): rows = _collect_rows( base_rev, experiments, metric_names, param_names, sort_by=sort_by, sort_order=sort_order, precision=precision, fill_value=fill_value, iso=iso, ) td.extend(rows) return td
def experiments_table( all_experiments, metric_headers, metric_names, param_headers, param_names, sort_by=None, sort_order=None, precision=DEFAULT_PRECISION, ) -> "TabularData": from funcy import lconcat from dvc.compare import TabularData headers = ["Experiment", "rev", "queued", "typ", "Created", "parent"] td = TabularData(lconcat(headers, metric_headers, param_headers), fill_value=FILL_VALUE) for base_rev, experiments in all_experiments.items(): rows = _collect_rows( base_rev, experiments, metric_names, param_names, sort_by=sort_by, sort_order=sort_order, precision=precision, ) td.extend(rows) return td
def test_list_operations(): td = TabularData(["col1", "col2", "col3"]) td.append(["1", "2", "3"]) assert list(td) == [["1", "2", "3"]] td.extend((["11", "12", "13"], ["21", "22", "23"])) assert list(td) == [ ["1", "2", "3"], ["11", "12", "13"], ["21", "22", "23"], ] td.insert(1, ["01", "02", "03"]) assert list(td) == [ ["1", "2", "3"], ["01", "02", "03"], ["11", "12", "13"], ["21", "22", "23"], ] assert td.shape == (3, 4) assert len(td) == 4 assert td[1] == ["01", "02", "03"] assert td[1:] == [ ["01", "02", "03"], ["11", "12", "13"], ["21", "22", "23"], ] assert td[::-1] == [ ["21", "22", "23"], ["11", "12", "13"], ["01", "02", "03"], ["1", "2", "3"], ] del td[1] assert list(td) == [ ["1", "2", "3"], ["11", "12", "13"], ["21", "22", "23"], ] assert td.shape == (3, 3) td[1:3] = [["51", "52", "53"], ["61", "62", "63"]] assert list(td) == [ ["1", "2", "3"], ["51", "52", "53"], ["61", "62", "63"], ] td[1] = ["41", "42", "43"] assert td[1] == ["41", "42", "43"] del td[1:3] assert td.shape == (3, 1) assert td.to_csv() == "col1,col2,col3\r\n1,2,3\r\n"
def test_write_parallel_coordinates(tmp_dir): td = TabularData(["categorical", "scalar"]) td.extend([["foo", "0.1"], ["bar", "2"]]) renderer = ParallelCoordinatesRenderer(td) html_path = write(tmp_dir, renderers=[renderer]) html_text = html_path.read_text() assert ParallelCoordinatesRenderer.SCRIPTS in html_text div = ParallelCoordinatesRenderer.DIV.format(id="plot_experiments", partial=renderer.as_json()) assert div in html_text
def test_dict_like_interfaces(): td = TabularData(["col-1", "col-2"]) td.extend([["foo", "bar"], ["foobar", "foobar"]]) assert td.keys() == ["col-1", "col-2"] assert dict(td.items()) == { "col-1": ["foo", "foobar"], "col-2": ["bar", "foobar"], } assert td.as_dict() == [ {"col-1": "foo", "col-2": "bar"}, {"col-1": "foobar", "col-2": "foobar"}, ] assert td.as_dict(["col-1"]) == [{"col-1": "foo"}, {"col-1": "foobar"}]
def test_drop_duplicates_subset(axis, subset, expected): td = TabularData(["col-1", "col-2", "col-3", "col-4"]) td.extend([ ["foo", "foo", "foo", "bar"], ["foo", "bar", "foo", "bar"], ["foo", "bar", "foobar", "bar"], ]) assert list(td) == [ ["foo", "foo", "foo", "bar"], ["foo", "bar", "foo", "bar"], ["foo", "bar", "foobar", "bar"], ] td.drop_duplicates(axis, subset=subset) assert list(td) == expected
def test_color_by_scalar(): td = TabularData(["categorical", "scalar"]) td.extend([["foo", "0.1"], ["bar", "2"]]) renderer = ParallelCoordinatesRenderer(td, color_by="scalar") result = json.loads(renderer.as_json()) assert expected_format(result) assert result["data"][0]["line"] == { "color": [0.1, 2.0], "showscale": True, "colorbar": { "title": "scalar" }, }
def test_drop_duplicates(axis, expected): td = TabularData(["col-1", "col-2", "col-3"]) td.extend([["foo"], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"]]) assert list(td) == [ ["foo", "", ""], ["foo", "foo", ""], ["foo", "foo", ""], ["foo", "bar", "foobar"], ] td.drop_duplicates(axis) assert list(td) == expected
def test_to_parallel_coordinates(tmp_dir, mocker): (tmp_dir / "foo").mkdir() td = TabularData(["categorical", "scalar"]) td.extend([["foo", "0.1"], ["bar", "2"]]) write = mocker.patch("dvc.render.html.write") renderer_class = mocker.patch( "dvc.render.plotly.ParallelCoordinatesRenderer") renderer = renderer_class.return_value td.to_parallel_coordinates(output_path="foo") renderer_class.assert_called_with(td, None, td._fill_value) write.assert_called_with("foo", renderers=[renderer])
def test_drop_duplicates(axis, expected, ignore_empty): td = TabularData(["col-1", "col-2", "col-3"], fill_value="-") td.extend([["foo"], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"]]) assert list(td) == [ ["foo", "-", "-"], ["foo", "foo", "-"], ["foo", "foo", "-"], ["foo", "bar", "foobar"], ] td.drop_duplicates(axis, ignore_empty=ignore_empty) assert list(td) == expected
def test_fill_value(): td = TabularData(["categorical", "scalar"]) td.extend([["foo", "-"], ["-", "2"]]) renderer = ParallelCoordinatesRenderer(td, fill_value="-") result = json.loads(renderer.as_json()) assert expected_format(result) assert result["data"][0]["dimensions"][0] == { "label": "categorical", "values": [0, 1], "tickvals": [0, 1], "ticktext": ["foo", "Missing"], } assert result["data"][0]["dimensions"][1] == { "label": "scalar", "values": [None, 2.0], }
def test_mixed_columns(): td = TabularData(["categorical", "scalar"]) td.extend([["foo", "0.1"], ["bar", "2"]]) renderer = ParallelCoordinatesRenderer(td) result = json.loads(renderer.as_json()) assert expected_format(result) assert result["data"][0]["dimensions"][0] == { "label": "categorical", "values": [1, 0], "tickvals": [1, 0], "ticktext": ["foo", "bar"], } assert result["data"][0]["dimensions"][1] == { "label": "scalar", "values": [0.1, 2.0], }
def test_drop_duplicates_ignore_empty(): td = TabularData(["col-1", "col-2", "col-3"], fill_value="-") td.extend([["foo"], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"]]) assert list(td) == [ ["foo", "-", "-"], ["foo", "foo", "-"], ["foo", "foo", "-"], ["foo", "bar", "foobar"], ] td.drop_duplicates("cols", ignore_empty=False) assert list(td) == [ ["-", "-"], ["foo", "-"], ["foo", "-"], ["bar", "foobar"], ]
def test_scalar_columns(): td = TabularData(["col-1", "col-2", "col-3"]) td.extend([["0.1", "1", ""], ["2", "0.2", "0"]]) renderer = ParallelCoordinatesRenderer(td) result = json.loads(renderer.as_json()) assert expected_format(result) assert result["data"][0]["dimensions"][0] == { "label": "col-1", "values": [0.1, 2.0], } assert result["data"][0]["dimensions"][1] == { "label": "col-2", "values": [1.0, 0.2], } assert result["data"][0]["dimensions"][2] == { "label": "col-3", "values": [None, 0], }
def test_categorical_columns(): td = TabularData(["col-1", "col-2"]) td.extend([["foo", ""], ["bar", "foobar"], ["foo", ""]]) renderer = ParallelCoordinatesRenderer(td) result = json.loads(renderer.as_json()) assert expected_format(result) assert result["data"][0]["dimensions"][0] == { "label": "col-1", "values": [1, 0, 1], "tickvals": [1, 0, 1], "ticktext": ["foo", "bar", "foo"], } assert result["data"][0]["dimensions"][1] == { "label": "col-2", "values": [1, 0, 1], "tickvals": [1, 0, 1], "ticktext": ["Missing", "foobar", "Missing"], }
def test_drop_duplicates_rich_text(): from dvc.ui import ui td = TabularData(["col-1", "col-2", "col-3"], fill_value="-") td.extend([ ["foo", None, ui.rich_text("-")], ["foo", "foo"], ["foo", "foo"], ["foo", "bar", "foobar"], ]) assert list(td) == [ ["foo", "-", ui.rich_text("-")], ["foo", "foo", "-"], ["foo", "foo", "-"], ["foo", "bar", "foobar"], ] td.drop_duplicates("cols") assert list(td) == [["-"], ["foo"], ["foo"], ["bar"]]
def test_dropna(axis, how, data, expected): td = TabularData(["col-1", "col-2", "col-3"]) td.extend(data) td.dropna(axis, how) assert list(td) == expected
def test_dropna_subset(axis, expected): td = TabularData(["col-1", "col-2", "col-3"]) td.extend([["foo"], ["foo", "bar"], ["foo", "bar", "foobar"]]) td.dropna(axis, subset=["col-1", "col-2"]) assert list(td) == expected