Beispiel #1
0
    def test_normal_default_key(self):
        headers = ["a", "b"]

        assert TableData(None, headers, []).as_dict() == {"table": []}
        assert TableData("", headers, []).as_dict(default_key="dummy") == {
            "dummy": []
        }
Beispiel #2
0
    def test_normal_excel(self):
        url = "https://github.com/thombashi/valid/test/data/validdata.xlsx"
        data_path = os.path.join(os.path.dirname(__file__),
                                 "data/validdata.xlsx")

        with open(data_path, "rb") as f:
            responses.add(
                responses.GET,
                url,
                body=f.read(),
                content_type="application/octet-stream",
                status=200,
            )

        expeced_list = [
            TableData(
                "testsheet1",
                ["a1", "b1", "c1"],
                [["aa1", "ab1", "ac1"], [1.0, 1.1, "a"], [2.0, 2.2, "bb"],
                 [3.0, 3.3, "cc"]],
            ),
            TableData(
                "testsheet3",
                ["a3", "b3", "c3"],
                [["aa3", "ab3", "ac3"], [4.0, 1.1, "a"], [5.0, "", "bb"],
                 [6.0, 3.3, ""]],
            ),
        ]
        loader = ptr.TableUrlLoader(url)

        assert loader.format_name == "excel"

        for table_data in loader.load():
            assert table_data.in_tabledata_list(expeced_list)
Beispiel #3
0
class Test_TableData_equals:

    __LHS = TableData("tablename", ["a", "b"], [{
        "a": 1,
        "b": 2
    }, {
        "a": 11,
        "b": 12
    }])
    __RHS = TableData("tablename", ["a", "b"], [[1, 2], [11, 12]])

    @pytest.mark.parametrize(
        ["lhs", "rhs", "cmp_by_dp", "expected"],
        [[__LHS, __RHS, True, True], [__LHS, __RHS, False, False]],
    )
    def test_normal(self, lhs, rhs, cmp_by_dp, expected):
        empty_td = TableData("tablename", ["a", "b"], None)

        assert lhs.equals(rhs, cmp_by_dp=cmp_by_dp) == expected
        assert lhs.equals(empty_td, cmp_by_dp=cmp_by_dp) is False
        assert empty_td.equals(rhs, cmp_by_dp=cmp_by_dp) is False
        assert (lhs == rhs) is False
        assert (lhs != rhs) is True

        assert lhs.in_tabledata_list([rhs, empty_td],
                                     cmp_by_dp=cmp_by_dp) == expected
        assert lhs.in_tabledata_list([lhs, empty_td], cmp_by_dp=cmp_by_dp)
        assert lhs.in_tabledata_list([rhs, lhs, empty_td], cmp_by_dp=cmp_by_dp)
        assert empty_td.in_tabledata_list([rhs, lhs],
                                          cmp_by_dp=cmp_by_dp) is False
Beispiel #4
0
    def test_normal_multi_sheet(self, tmpdir):
        for writer_class in table_writer_class_list:
            test_filepath = str(tmpdir.join("test.xlsx"))
            data_list = [
                TableData(
                    table_name="first",
                    header_list=["ha1", "hb1", "hc1"],
                    row_list=[[1.0, 2.0, 3.0], [11.0, 12.0, 13.0]],
                ),
                TableData(
                    table_name="second",
                    header_list=["ha2", "hb2", "hc2"],
                    row_list=[[11.0, 12.0, 13.0], [1.0, 2.0, 3.0]],
                ),
            ]

            writer = writer_class()

            for data in data_list:
                writer.from_tabledata(data)
                writer.dump(test_filepath, close_after_write=False)

            writer.close()

            for data, expected in zip(
                    data_list,
                    ExcelTableFileLoader(test_filepath).load()):
                assert data == expected
    def test_normal_multi_table(self, tmpdir):
        test_filepath = str(tmpdir.join("test.sqlite"))
        data_list = [
            TableData(
                table_name="first",
                header_list=["ha1", "hb1", "hc1"],
                row_list=[[1.0, 2.0, 3.0], [11.0, 12.0, 13.0]],
            ),
            TableData(
                table_name="second",
                header_list=["ha2", "hb2", "hc2"],
                row_list=[[11.0, 12.0, 13.0], [1.0, 2.0, 3.0]],
            ),
        ]

        writer = ptw.SqliteTableWriter()

        for data in data_list:
            writer.from_tabledata(data)
            writer.dump(test_filepath, close_after_write=False)

        writer.close()

        count = 0
        for data, expected in zip(data_list,
                                  SqliteFileLoader(test_filepath).load()):
            assert data == expected
            count += 1

        assert count == 2
Beispiel #6
0
    def test_normal_excel(self, tmpdir):
        file_path = "/tmp/valid/test/data/validdata.xlsx"
        p_file_path = Path(str(tmpdir.join(file_path)))
        p_file_path.parent.makedirs_p()

        tabledata_list = [
            TableData(
                "testsheet1",
                ["a1", "b1", "c1"],
                [["aa1", "ab1", "ac1"], [1.0, 1.1, "a"], [2.0, 2.2, "bb"], [3.0, 3.3, 'cc"dd"']],
            ),
            TableData(
                "testsheet3",
                ["a3", "b3", "c3"],
                [["aa3", "ab3", "ac3"], [4.0, 1.1, "a"], [5.0, "", "bb"], [6.0, 3.3, ""]],
            ),
        ]

        writer = ExcelXlsxTableWriter()
        writer.open(p_file_path)
        for tabledata in tabledata_list:
            writer.from_tabledata(tabledata)
        writer.write_table()
        writer.close()

        loader = ptr.TableFileLoader(p_file_path)

        assert loader.format_name == "excel"

        for tabledata in loader.load():
            print(dump_tabledata(tabledata))

            assert tabledata in tabledata_list
Beispiel #7
0
    def test_normal_multi_table(self, con_a0, con_b0):
        out_db_path = "test.sqlite"
        runner = CliRunner()

        with runner.isolated_filesystem():
            result = runner.invoke(cmd, [
                "-o", out_db_path, "file", con_a0.database_path,
                con_b0.database_path
            ])
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            expected_list = [
                TableData(TEST_TABLE_NAME_A, ["attr_a", "attr_b"],
                          [[1, 2], [3, 4]]),
                TableData(TEST_TABLE_NAME_B, ["ba", "bb"],
                          [[101, 102], [103, 104]]),
            ]
            for tabledata in SqliteFileLoader(out_db_path).load():
                if tabledata.table_name == SourceInfo.get_table_name():
                    continue

                print("[actual]\n{}".format(tabledata))
                for record in tabledata.value_matrix:
                    print("  {}".format(record))

                assert tabledata in expected_list
Beispiel #8
0
    def test_normal_add_primary_key_column(self, tmpdir):
        p = tmpdir.join("tmp.db")
        con = SimpleSQLite(str(p), "w")

        table_name = "table1"
        con.create_table_from_data_matrix(
            table_name=table_name,
            attr_names=["AA", "BB"],
            data_matrix=[["a", 11], ["bb", 12]],
            add_primary_key_column=True,
        )
        assert con.select_as_tabledata(table_name) == TableData(
            table_name=table_name,
            headers=["id", "AA", "BB"],
            rows=[[1, "a", 11], [2, "bb", 12]])
        assert con.schema_extractor.fetch_table_schema(
            table_name).primary_key == "id"

        table_name = "table2"
        con.create_table_from_data_matrix(
            table_name=table_name,
            attr_names=["AA", "BB"],
            data_matrix=[["a", 11], ["bb", 12]],
            primary_key="pkey",
            add_primary_key_column=True,
        )
        assert con.select_as_tabledata(table_name) == TableData(
            table_name=table_name,
            headers=["pkey", "AA", "BB"],
            rows=[[1, "a", 11], [2, "bb", 12]])
        assert con.schema_extractor.fetch_table_schema(
            table_name).primary_key == "pkey"
Beispiel #9
0
    def test_normal_unmatch(self, table_name, headers, rows, pattern,
                            is_invert_match, is_re_match, expected):
        tabledata = TableData(table_name, headers, rows)
        actual = tabledata.filter_column(patterns=pattern,
                                         is_invert_match=is_invert_match,
                                         is_re_match=is_re_match)

        assert actual == expected
Beispiel #10
0
class Test_TableData_transpose:
    @pytest.mark.parametrize(
        ["value", "expected"],
        [[
            TableData("tablename", ["a", "b"], [[1, 2, 3], [1, 2, 3]]),
            TableData("tablename", ["a", "b"], [[1, 1], [2, 2], [3, 3]]),
        ]],
    )
    def test_normal(self, value, expected):
        assert value.transpose() == expected
Beispiel #11
0
    def test_normal(self, table_name, header_list, row_list):
        tabledata = TableData(table_name, header_list, row_list)
        dataframe = pandas.DataFrame(row_list)
        if typepy.is_not_empty_sequence(header_list):
            dataframe.columns = header_list

        print("lhs: {}".format(tabledata.as_dataframe()))
        print("rhs: {}".format(dataframe))

        assert tabledata.as_dataframe().equals(dataframe)
Beispiel #12
0
    def test_normal(self):
        dataframe = pandas.DataFrame(
            [[0, 0.1, "a"], [1, 1.1, "bb"], [2, 2.2, "ccc"]],
            columns=["id", "value", "name"])
        expected = TableData(
            "tablename",
            ["id", "value", "name"],
            [[0, Decimal("0.1"), "a"], [1, Decimal("1.1"), "bb"],
             [2, Decimal("2.2"), "ccc"]],
        )

        assert TableData.from_dataframe(dataframe,
                                        "tablename").equals(expected)
Beispiel #13
0
    def test_normal_pattern_match(self, table_name, headers, rows, pattern,
                                  is_invert_match, expected):
        tabledata = TableData(table_name, headers, rows)
        actual = tabledata.filter_column(
            patterns=pattern,
            is_invert_match=is_invert_match,
            is_re_match=True,
            pattern_match=PatternMatch.AND,
        )

        dumps_results(expected=expected, actual=tabledata)

        assert actual == expected
class Test_SQLiteTableDataSanitizer_dup_col_handler:
    @pytest.mark.parametrize(
        ["table_name", "headers", "dup_col_handler", "expected"],
        [
            [
                "all attrs are duplicated",
                ["A", "A", "A", "A", "A"],
                "rename",
                TableData("all_attrs_are_duplicated",
                          ["A", "A_1", "A_2", "A_3", "A_4"], []),
            ],
            [
                "recursively duplicated attrs",
                ["A", "A", "A_1", "A_1", "A_2", "A_1_1", "A_1_1"],
                "recursively_duplicated_attrs",
                TableData(
                    "recursively_duplicated_attrs",
                    ["A", "A_3", "A_1", "A_1_2", "A_2", "A_1_1", "A_1_1_1"],
                    [],
                ),
            ],
        ],
    )
    def test_normal_(self, table_name, headers, dup_col_handler, expected):
        new_tabledata = SQLiteTableDataSanitizer(
            TableData(table_name, headers, []),
            dup_col_handler=dup_col_handler).normalize()

        try:
            from pytablewriter import dumps_tabledata

            print_test_result(expected=dumps_tabledata(expected),
                              actual=dumps_tabledata(new_tabledata))
        except ImportError:
            pass

        assert new_tabledata.equals(expected)

    @pytest.mark.parametrize(
        ["table_name", "headers", "expected"],
        [
            ["duplicate columns", ["a", "a"], ValueError],
            ["duplicate columns", ["AA", "b", "AA"], ValueError],
        ],
    )
    def test_exception(self, table_name, headers, expected):
        with pytest.raises(expected):
            SQLiteTableDataSanitizer(TableData(table_name, headers, []),
                                     dup_col_handler="error").normalize()
Beispiel #15
0
    def test_normal(self, lhs, rhs, cmp_by_dp, expected):
        empty_td = TableData("tablename", ["a", "b"], None)

        assert lhs.equals(rhs, cmp_by_dp=cmp_by_dp) == expected
        assert lhs.equals(empty_td, cmp_by_dp=cmp_by_dp) is False
        assert empty_td.equals(rhs, cmp_by_dp=cmp_by_dp) is False
        assert (lhs == rhs) is False
        assert (lhs != rhs) is True

        assert lhs.in_tabledata_list([rhs, empty_td],
                                     cmp_by_dp=cmp_by_dp) == expected
        assert lhs.in_tabledata_list([lhs, empty_td], cmp_by_dp=cmp_by_dp)
        assert lhs.in_tabledata_list([rhs, lhs, empty_td], cmp_by_dp=cmp_by_dp)
        assert empty_td.in_tabledata_list([rhs, lhs],
                                          cmp_by_dp=cmp_by_dp) is False
    def test_normal_escape_html_tag_from_tabledata(self, capsys):
        writer = table_writer_class()
        writer.from_tabledata(
            TableData(
                table_name="",
                header_list=["no", "text"],
                row_list=[[
                    1,
                    "<caption>Table 'formatting for Jupyter Notebook.</caption>"
                ]],
            ))
        writer.is_escape_html_tag = True
        writer.write_table()

        expected = dedent("""\
            |no |                                   text                                    |
            |--:|---------------------------------------------------------------------------|
            |  1|&lt;caption&gt;Table &#x27;formatting for Jupyter Notebook.&lt;/caption&gt;|

            """)

        out, err = capsys.readouterr()
        print_test_result(expected=expected, actual=out, error=err)

        assert out == expected
Beispiel #17
0
    def generate(self, provider_list, rows, table_name=None, headers=None):
        """Generate fake data as tabular data.

        Args:
            provider_list (list):
                List of provider names to generate a tabular data.
            rows (int):
                Number of rows in the tabular data.
            headers (list):
                List of header names.

        Returns:
            tabledata.TableData: Generated fake tabular data.
        """

        self.__validate_provider(provider_list)

        if rows < 0:
            raise ValueError("invalid rows")

        return TableData(
            table_name,
            headers if headers else provider_list,
            [[
                getattr(self.__fake, faker_name)()
                for faker_name in provider_list
            ] for _row in range(rows)],
        )
    def test_normal_margin_2(self, capsys):
        writer = table_writer_class()
        writer.from_tabledata(
            TableData(table_name="margin 2",
                      header_list=header_list,
                      row_list=value_matrix))
        writer.margin = 2
        writer.write_table()

        expected = dedent("""\
            .. table:: margin 2

                +-----+---------+-------+-------+--------+
                |  a  |    b    |   c   |  dd   |   e    |
                +=====+=========+=======+=======+========+
                |  1  |  123.1  |  a    |  1.0  |     1  |
                +-----+---------+-------+-------+--------+
                |  2  |    2.2  |  bb   |  2.2  |   2.2  |
                +-----+---------+-------+-------+--------+
                |  3  |    3.3  |  ccc  |  3.0  |  cccc  |
                +-----+---------+-------+-------+--------+

            """)

        out, err = capsys.readouterr()
        print_test_result(expected=expected, actual=out, error=err)

        assert out == expected
Beispiel #19
0
    def tabledata(self):
        """
        :return: Table data.
        :rtype: tabledata.TableData
        """

        return TableData(self.table_name, self.header_list, self.value_matrix)
Beispiel #20
0
    def test_normal_single_tabledata(self, capsys):
        writer = table_writer_class()
        writer.from_tabledata(
            TableData(
                "loader_mapping",
                ["Name", "Loader"],
                [
                    ["csv", "CsvTableFileLoader"],
                    ["excel", "ExcelTableFileLoader"],
                    ["html", "HtmlTableFileLoader"],
                    ["markdown", "MarkdownTableFileLoader"],
                    ["mediawiki", "MediaWikiTableFileLoader"],
                    ["json", "JsonTableFileLoader"],
                    ["Long Format Name", "Loader"],
                ],
            ))
        writer.write_table()

        expected = dedent("""\
            # loader_mapping
            |      Name      |         Loader         |
            |----------------|------------------------|
            |csv             |CsvTableFileLoader      |
            |excel           |ExcelTableFileLoader    |
            |html            |HtmlTableFileLoader     |
            |markdown        |MarkdownTableFileLoader |
            |mediawiki       |MediaWikiTableFileLoader|
            |json            |JsonTableFileLoader     |
            |Long Format Name|Loader                  |
            """)

        out, err = capsys.readouterr()
        print_test_result(expected=expected, actual=out, error=err)

        assert out == expected
Beispiel #21
0
    def to_table_data(self):
        if typepy.is_empty_sequence(self._loader.header_list):
            header_list = self._source_data[0]

            if any([typepy.is_null_string(header) for header in header_list]):
                raise InvalidDataError(
                    "the first line includes empty string item."
                    "all of the items should contain header name."
                    "actual={}".format(header_list))

            data_matrix = self._source_data[1:]
        else:
            header_list = self._loader.header_list
            data_matrix = self._source_data

        if not data_matrix:
            raise InvalidDataError(
                "data row must be greater or equal than one")

        self._loader.inc_table_count()

        yield TableData(self._loader.make_table_name(),
                        header_list,
                        data_matrix,
                        quoting_flags=self._loader.quoting_flags)
Beispiel #22
0
    def test_normal_csv(self):
        text = dedent(
            """\
            "attr_a","attr_b","attr_c"
            1,4,"a"
            2,2.1,"bb"
            3,120.9,"ccc"
            """
        )

        expected_list = [
            TableData(
                "csv1",
                ["attr_a", "attr_b", "attr_c"],
                [[1, 4, "a"], [2, "2.1", "bb"], [3, "120.9", "ccc"]],
            )
        ]
        loader = ptr.TableTextLoader(text, format_name="csv")

        assert loader.format_name == "csv"

        for tabledata, expected in zip(loader.load(), expected_list):
            print(dumps_tabledata(expected))
            print(dumps_tabledata(tabledata))

            assert tabledata.equals(expected)
Beispiel #23
0
    def normalize_table(self, table_data: TableData, dup_col_handler=None) -> TableData:
        from pathvalidate import replace_symbol, replace_unprintable_char
        from simplesqlite import SQLiteTableDataSanitizer

        if dup_col_handler is None:
            dup_col_handler = DEFAULT_DUP_COL_HANDLER

        normalized_table_data = SQLiteTableDataSanitizer(
            table_data,
            dup_col_handler=dup_col_handler,
            is_type_inference=self._is_type_inference,
            max_workers=self._max_workers,
        ).normalize()

        if self._symbol_replace_value is None:
            return normalized_table_data

        return TableData(
            normalized_table_data.table_name,
            [
                replace_symbol(
                    replace_unprintable_char(header),
                    self._symbol_replace_value,
                    is_replace_consecutive_chars=True,
                    is_strip=True,
                )
                for header in normalized_table_data.headers
            ],
            normalized_table_data.rows,
            dp_extractor=normalized_table_data.dp_extractor,
            type_hints=table_data.dp_extractor.column_type_hints,
        )
Beispiel #24
0
    def test_normal(self, table_name, headers, rows, expected):
        for lhs, rhs in zip(
                TableData(table_name, headers, rows).as_tuple(), expected):
            print(f"lhs: {lhs}", file=sys.stderr)
            print(f"rhs: {rhs}", file=sys.stderr)

            assert tuple(lhs) == rhs
Beispiel #25
0
    def test_normal_json(self):
        text = dedent(
            """\
            [
                {"attr_a": 1},
                {"attr_b": 2.1, "attr_c": "bb"}
            ]"""
        )

        expected_list = [
            TableData(
                "json1",
                ["attr_a", "attr_b", "attr_c"],
                [{"attr_a": 1}, {"attr_b": 2.1, "attr_c": "bb"}],
            )
        ]
        loader = ptr.TableTextLoader(text, format_name="json")

        assert loader.format_name == "json"

        for table_data, expected in zip(loader.load(), expected_list):
            print(dumps_tabledata(expected))
            print(dumps_tabledata(table_data))

            assert table_data.equals(expected)
Beispiel #26
0
    def test_normal_csv(self, url, format_name):
        responses.add(
            responses.GET,
            url,
            body=dedent("""\
                "attr_a","attr_b","attr_c"
                1,4,"a"
                2,2.1,"bb"
                3,120.9,"ccc"
                """),
            content_type="text/plain; charset=utf-8",
            status=200,
        )
        expeced_list = [
            TableData(
                "csv1",
                ["attr_a", "attr_b", "attr_c"],
                [[1, 4, "a"], [2, "2.1", "bb"], [3, "120.9", "ccc"]],
            )
        ]
        loader = ptr.TableUrlLoader(url, format_name)

        assert loader.format_name == "csv"

        for table_data in loader.load():
            assert table_data.in_tabledata_list(expeced_list)
Beispiel #27
0
    def test_normal_json(self, tmpdir, file_path, format_name):
        p_file_path = Path(str(tmpdir.join(file_path)))
        p_file_path.parent.makedirs_p()

        with open(p_file_path, "w") as f:
            f.write(
                dedent(
                    """\
                [
                    {"attr_a": 1},
                    {"attr_b": 2.1, "attr_c": "bb"}
                ]"""
                )
            )

        expeced_list = [
            TableData(
                "validdata",
                ["attr_a", "attr_b", "attr_c"],
                [{"attr_a": 1}, {"attr_b": 2.1, "attr_c": "bb"}],
            )
        ]
        loader = ptr.TableFileLoader(p_file_path, format_name=format_name)

        assert loader.format_name == "json"

        for table_data, expected in zip(loader.load(), expeced_list):
            assert table_data.equals(expected)
Beispiel #28
0
    def test_normal_csv(self, tmpdir, file_path, format_name):
        filename = pv.replace_symbol(file_path, "")
        p_file_path = Path(six.text_type(tmpdir.join(filename + Path(file_path).ext)))
        p_file_path.parent.makedirs_p()

        with open(p_file_path, "w") as f:
            f.write(
                dedent(
                    """\
                "attr_a","attr_b","attr_c"
                1,4,"a"
                2,2.1,"bb"
                3,120.9,"ccc"
                """
                )
            )

        expeced_list = [
            TableData(
                filename,
                ["attr_a", "attr_b", "attr_c"],
                [[1, 4, "a"], [2, "2.1", "bb"], [3, "120.9", "ccc"]],
            )
        ]
        loader = ptr.TableFileLoader(p_file_path, format_name=format_name)

        assert loader.format_name == "csv"

        for tabledata, expected in zip(loader.load(), expeced_list):
            print(dump_tabledata(expected))
            print(dump_tabledata(tabledata))

            assert tabledata.equals(expected)
Beispiel #29
0
    def __parse_html(self, table):
        header_list = []
        data_matrix = []

        self.__parse_tag_id(table)

        row_list = table.find_all("tr")
        re_table_val = re.compile("td|th")
        for row in row_list:
            td_list = row.find_all("td")
            if typepy.is_empty_sequence(td_list):
                if typepy.is_not_empty_sequence(header_list):
                    continue

                th_list = row.find_all("th")
                if typepy.is_empty_sequence(th_list):
                    continue

                header_list = [row.text.strip() for row in th_list]
                continue

            data_matrix.append([value.get_text().strip() for value in row.find_all(re_table_val)])

        if typepy.is_empty_sequence(data_matrix):
            raise ValueError("data matrix is empty")

        self._loader.inc_table_count()

        return TableData(
            self._make_table_name(),
            header_list,
            data_matrix,
            dp_extractor=self._loader.dp_extractor,
        )
Beispiel #30
0
class Test_SimpleSQLite_select_as_tabledata:
    @pytest.mark.parametrize(
        ["value", "type_hints", "expected"],
        [[
            TableData(
                "typehints",
                ["aa", "ab", "ac"],
                [[1, 4, "10"], [2, 2.1, "11"], [3, 120.9, "12"]],
            ),
            {
                "aa": typepy.String,
                "cc": typepy.Integer
            },
            [["1", 4, 10], ["2", Decimal("2.1"), 11],
             ["3", Decimal("120.9"), 12]],
        ]],
    )
    def test_normal(self, tmpdir, value, type_hints, expected):
        p_db = tmpdir.join("tmp.db")

        con = SimpleSQLite(str(p_db), "w")
        con.create_table_from_tabledata(value)

        assert con.fetch_table_names() == [value.table_name]
        assert con.fetch_attr_names(value.table_name) == value.headers

        actual = con.select_as_tabledata(columns=value.headers,
                                         table_name=value.table_name,
                                         type_hints=type_hints)
        assert actual.value_matrix == expected