Beispiel #1
0
    def test_normal_json(self, tmpdir, file_path, format_name):
        p_file_path = Path(str(tmpdir.join(file_path)))
        p_file_path.parent.makedirs_p()

        with open(p_file_path, "w") as f:
            f.write(
                dedent(
                    """\
                [
                    {"attr_a": 1},
                    {"attr_b": 2.1, "attr_c": "bb"}
                ]"""
                )
            )

        expeced_list = [
            TableData(
                "validdata",
                ["attr_a", "attr_b", "attr_c"],
                [{"attr_a": 1}, {"attr_b": 2.1, "attr_c": "bb"}],
            )
        ]
        loader = ptr.TableFileLoader(p_file_path, format_name=format_name)

        assert loader.format_name == "json"

        for table_data, expected in zip(loader.load(), expeced_list):
            assert table_data.equals(expected)
Beispiel #2
0
    def test_normal_csv(self, tmpdir, file_path, format_name):
        filename = pv.replace_symbol(file_path, "")
        p_file_path = Path(six.text_type(tmpdir.join(filename + Path(file_path).ext)))
        p_file_path.parent.makedirs_p()

        with open(p_file_path, "w") as f:
            f.write(
                dedent(
                    """\
                "attr_a","attr_b","attr_c"
                1,4,"a"
                2,2.1,"bb"
                3,120.9,"ccc"
                """
                )
            )

        expeced_list = [
            TableData(
                filename,
                ["attr_a", "attr_b", "attr_c"],
                [[1, 4, "a"], [2, "2.1", "bb"], [3, "120.9", "ccc"]],
            )
        ]
        loader = ptr.TableFileLoader(p_file_path, format_name=format_name)

        assert loader.format_name == "csv"

        for tabledata, expected in zip(loader.load(), expeced_list):
            print(dump_tabledata(expected))
            print(dump_tabledata(tabledata))

            assert tabledata.equals(expected)
    def test_smoke(self, tmpdir, filename):
        p = tmpdir.join("tmp.db")
        con = SimpleSQLite(str(p), "w")

        test_data_file_path = os.path.join(
            os.path.dirname(__file__), "data", filename)
        loader = ptr.TableFileLoader(test_data_file_path)

        success_count = 0

        for tabledata in loader.load():
            if tabledata.is_empty():
                continue

            print(ptw.dump_tabledata(tabledata))

            try:
                con.create_table_from_tabledata(
                    ptr.SQLiteTableDataSanitizer(tabledata).sanitize())
                success_count += 1
            except ValueError as e:
                print(e)

        con.commit()

        assert success_count > 0
Beispiel #4
0
    def test_normal_json(self, tmpdir, file_path, format_name):
        p_file_path = Path(str(tmpdir.join(file_path)))
        p_file_path.parent.makedirs_p()

        with open(p_file_path, "w") as f:
            f.write('''[
        {"attr_a": 1},
        {"attr_b": 2.1, "attr_c": "bb"}
    ]''')

        expeced_list = [
            ptr.TableData("validdata_json1", ["attr_a", "attr_b", "attr_c"], [
                {
                    'attr_a': 1
                },
                {
                    'attr_b': 2.1,
                    'attr_c': 'bb'
                },
            ])
        ]

        loader = ptr.TableFileLoader(p_file_path, format_name)

        assert loader.format_name == "json"

        for tabledata, expected in zip(loader.load(), expeced_list):
            assert tabledata == expected
Beispiel #5
0
    def test_normal_excel(self, tmpdir):
        file_path = "/tmp/valid/test/data/validdata.xlsx"
        p_file_path = Path(str(tmpdir.join(file_path)))
        p_file_path.parent.makedirs_p()

        tabledata_list = [
            TableData(
                "testsheet1",
                ["a1", "b1", "c1"],
                [["aa1", "ab1", "ac1"], [1.0, 1.1, "a"], [2.0, 2.2, "bb"], [3.0, 3.3, 'cc"dd"']],
            ),
            TableData(
                "testsheet3",
                ["a3", "b3", "c3"],
                [["aa3", "ab3", "ac3"], [4.0, 1.1, "a"], [5.0, "", "bb"], [6.0, 3.3, ""]],
            ),
        ]

        writer = ExcelXlsxTableWriter()
        writer.open(p_file_path)
        for tabledata in tabledata_list:
            writer.from_tabledata(tabledata)
        writer.write_table()
        writer.close()

        loader = ptr.TableFileLoader(p_file_path)

        assert loader.format_name == "excel"

        for tabledata in loader.load():
            print(dump_tabledata(tabledata))

            assert tabledata in tabledata_list
Beispiel #6
0
    def test_normal_ssv(self, tmpdir):
        p_file_path = Path(six.text_type(tmpdir.join("testdata.txt")))
        p_file_path.parent.makedirs_p()

        with open(p_file_path, "w") as f:
            f.write(
                dedent(
                    """\
                USER       PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
                root         1  0.0  0.4  77664  8784 ?        Ss   May11   0:02 /sbin/init
                root         2  0.0  0.0      0     0 ?        S    May11   0:00 [kthreadd]
                root         4  0.0  0.0      0     0 ?        I<   May11   0:00 [kworker/0:0H]
                root         6  0.0  0.0      0     0 ?        I<   May11   0:00 [mm_percpu_wq]
                root         7  0.0  0.0      0     0 ?        S    May11   0:01 [ksoftirqd/0]
                """
                )
            )

        expeced_list = [
            TableData(
                "testdata",
                [
                    "USER",
                    "PID",
                    "%CPU",
                    "%MEM",
                    "VSZ",
                    "RSS",
                    "TTY",
                    "STAT",
                    "START",
                    "TIME",
                    "COMMAND",
                ],
                [
                    ["root", 1, 0, 0.4, 77664, 8784, "?", "Ss", "May11", "0:02", "/sbin/init"],
                    ["root", 2, 0, 0, 0, 0, "?", "S", "May11", "0:00", "[kthreadd]"],
                    ["root", 4, 0, 0, 0, 0, "?", "I<", "May11", "0:00", "[kworker/0:0H]"],
                    ["root", 6, 0, 0, 0, 0, "?", "I<", "May11", "0:00", "[mm_percpu_wq]"],
                    ["root", 7, 0, 0, 0, 0, "?", "S", "May11", "0:01", "[ksoftirqd/0]"],
                ],
            )
        ]
        loader = ptr.TableFileLoader(p_file_path, format_name="ssv")

        assert loader.format_name == "csv"

        for tabledata, expected in zip(loader.load(), expeced_list):
            print(dump_tabledata(expected))
            print(dump_tabledata(tabledata))

            assert tabledata.equals(expected)
    def test_normal(self, tmpdir, file_path, format_name, expected):
        test_file_path = Path(six.text_type(tmpdir.join(
            Path(MultiByteStrDecoder(file_path).unicode_str))))
        test_file_path.parent.makedirs_p()

        with open(test_file_path, "w") as f:
            f.write('''{}''')

        loader = ptr.TableFileLoader(test_file_path, format_name=format_name)
        expected_loader = expected("")

        assert loader.source_type == expected_loader.source_type
        assert loader.format_name == expected_loader.format_name
    def test_smoke(self, tmpdir, filename):
        test_data_file_path = os.path.join(os.path.dirname(__file__), "data", filename)
        loader = ptr.TableFileLoader(test_data_file_path)

        success_count = 0

        for tabledata in loader.load():
            if tabledata.is_empty():
                continue

            assert len(dumps_tabledata(tabledata)) > 10

            success_count += 1

        assert success_count > 0
    def test_normal_excel(self, tmpdir):
        file_path = '/tmp/valid/test/data/validdata.xlsx'
        p_file_path = Path(str(tmpdir.join(file_path)))
        p_file_path.parent.makedirs_p()

        tabledata_list = [
            TableData(
                table_name='testsheet1',
                header_list=['a1', 'b1', 'c1'],
                row_list=[
                    ['aa1', 'ab1', 'ac1'],
                    [1.0, 1.1, 'a'],
                    [2.0, 2.2, 'bb'],
                    [3.0, 3.3, 'cc"dd"'],
                ]),
            TableData(
                table_name='testsheet3',
                header_list=['a3', 'b3', 'c3'],
                row_list=[
                    ['aa3', 'ab3', 'ac3'],
                    [4.0, 1.1, 'a'],
                    [5.0, '', 'bb'],
                    [6.0, 3.3, ''],
                ]),
        ]

        writer = ptw.ExcelXlsxTableWriter()
        writer.open(p_file_path)
        for tabledata in tabledata_list:
            writer.from_tabledata(tabledata)
        writer.write_table()
        writer.close()

        loader = ptr.TableFileLoader(p_file_path)

        assert loader.format_name == "excel"

        for tabledata in loader.load():
            print(ptw.dump_tabledata(tabledata))

            assert tabledata in tabledata_list
Beispiel #10
0
    def test_smoke(self, tmpdir, filename):
        try:
            import pytablereader as ptr
        except ImportError:
            pytest.skip("requires pytablereader")

        p = tmpdir.join("tmp.db")
        con = SimpleSQLite(str(p), "w")

        test_data_file_path = os.path.join(os.path.dirname(__file__), "data",
                                           filename)
        loader = ptr.TableFileLoader(test_data_file_path)

        success_count = 0

        for table_data in loader.load():
            if table_data.is_empty():
                continue

            try:
                from pytablewriter import dumps_tabledata

                print(dumps_tabledata(table_data))
            except ImportError:
                pass

            try:
                con.create_table_from_tabledata(
                    SQLiteTableDataSanitizer(table_data).normalize())
                success_count += 1
            except ValueError as e:
                print(e)

        con.commit()

        assert success_count > 0
Beispiel #11
0
def file(ctx, files, output_path):
    """
    Convert tabular data within CSV/Excel/HTML/JSON/LTSV/Markdown/SQLite/TSV
    file(s) to a SQLite database file.
    """

    if typepy.is_empty_sequence(files):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    schema_extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()
    logger = make_logger("{:s} file".format(PROGRAM_NAME),
                         ctx.obj[Context.LOG_LEVEL])
    table_creator = TableCreator(logger=logger, dst_con=con)

    for file_path in files:
        file_path = path.Path(file_path)

        if not file_path.isfile():
            logger.error(u"file not found: {}".format(file_path))
            result_counter.inc_fail()
            continue

        if file_path == output_path:
            logger.warn(
                u"skip a file which has the same path as the output file ({})".
                format(file_path))
            continue

        logger.debug(u"converting '{}'".format(file_path))

        try:
            loader = ptr.TableFileLoader(file_path)
        except ptr.InvalidFilePathError as e:
            logger.debug(e)
            result_counter.inc_fail()
            continue
        except ptr.LoaderNotFoundError:
            logger.debug(
                u"loader not found that coincide with '{}'".format(file_path))
            result_counter.inc_fail()
            continue

        try:
            for tabledata in loader.load():
                logger.debug(u"loaded tabledata: {}".format(
                    six.text_type(tabledata)))

                sqlite_tabledata = ptr.SQLiteTableDataSanitizer(
                    tabledata).sanitize()

                try:
                    table_creator.create(sqlite_tabledata,
                                         ctx.obj.get(Context.INDEX_LIST))
                    result_counter.inc_success()
                except (ValueError, IOError) as e:
                    logger.debug(u"path={}, message={}".format(file_path, e))
                    result_counter.inc_fail()
                    continue

                logger.info(
                    get_success_message(
                        verbosity_level, file_path,
                        schema_extractor.get_table_schema_text(
                            sqlite_tabledata.table_name).strip()))
        except ptr.OpenError as e:
            logger.error(u"open error: file={}, message='{}'".format(
                file_path, str(e)))
            result_counter.inc_fail()
        except ptr.ValidationError as e:
            logger.error(u"invalid {} data format: path={}, message={}".format(
                _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()
        except ptr.InvalidDataError as e:
            logger.error(u"invalid {} data: path={}, message={}".format(
                _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Beispiel #12
0
 def test_exception(self, value, format_name, expected):
     with pytest.raises(expected):
         ptr.TableFileLoader(value, format_name=format_name)
def file(ctx, files, output_path):
    """
    Convert tabular data within CSV/Excel/HTML/JSON/LTSV/Markdown/TSV file(s)
    to a SQLite database file.
    """

    if dataproperty.is_empty_sequence(files):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()

    logger = logbook.Logger("sqlitebiter file")
    _setup_logger_from_context(logger, ctx.obj[Context.LOG_LEVEL])

    for file_path in files:
        file_path = path.Path(file_path)
        if not file_path.isfile():
            logger.debug(u"file not found: {}".format(file_path))
            result_counter.inc_fail()
            continue

        logger.debug(u"converting '{}'".format(file_path))

        try:
            loader = ptr.TableFileLoader(file_path)
        except ptr.InvalidFilePathError as e:
            logger.debug(e)
            result_counter.inc_fail()
            continue
        except ptr.LoaderNotFoundError:
            logger.debug(
                u"loader not found that coincide with '{}'".format(file_path))
            result_counter.inc_fail()
            continue

        try:
            for tabledata in loader.load():
                sqlite_tabledata = ptr.SQLiteTableDataSanitizer(
                    tabledata).sanitize()

                try:
                    con.create_table_from_tabledata(sqlite_tabledata)
                    result_counter.inc_success()
                except (ValueError, IOError) as e:
                    logger.debug(u"path={}, message={}".format(file_path, e))
                    result_counter.inc_fail()
                    continue

                log_message = get_success_log_format(verbosity_level).format(
                    file_path,
                    extractor.get_table_schema_text(
                        sqlite_tabledata.table_name).strip())
                logger.info(log_message)
        except ptr.OpenError as e:
            logger.error(u"open error: file={}, message='{}'".format(
                file_path, str(e)))
            result_counter.inc_fail()
        except ptr.ValidationError as e:
            logger.error(u"invalid {} data format: path={}, message={}".format(
                _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()
        except ptr.InvalidDataError as e:
            logger.error(u"invalid {} data: path={}, message={}".format(
                _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Beispiel #14
0
def file(ctx, files, format_name, output_path, encoding):
    """
    Convert tabular data within
    CSV/Excel/HTML/JSON/Jupyter Notebook/LTSV/Markdown/Mediawiki/SQLite/SSV/TSV
    file(s) to a SQLite database file.
    """

    from ._ipynb_converter import is_ipynb_file_path, load_ipynb_file, convert_nb

    if typepy.is_empty_sequence(files):
        sys.exit(ExitCode.NO_INPUT)

    con = create_database(ctx, output_path)
    verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL)
    schema_extractor = get_schema_extractor(con, verbosity_level)
    result_counter = ResultCounter()
    logger = make_logger("{:s} file".format(PROGRAM_NAME),
                         ctx.obj[Context.LOG_LEVEL])
    table_creator = TableCreator(logger=logger, dst_con=con)

    for file_path in files:
        file_path = path.Path(file_path)

        if not file_path.isfile():
            logger.error(u"file not found: {}".format(file_path))
            result_counter.inc_fail()
            continue

        if file_path == output_path:
            logger.warn(
                u"skip a file which has the same path as the output file ({})".
                format(file_path))
            continue

        logger.debug(u"converting '{}'".format(file_path))
        convert_count = result_counter.total_count

        if format_name in IPYNB_FORMAT_NAME_LIST or is_ipynb_file_path(
                file_path):
            convert_nb(logger,
                       con,
                       result_counter,
                       nb=load_ipynb_file(file_path, encoding=encoding))
            for table_name in con.get_table_name_list():
                logger.info(
                    get_success_message(
                        verbosity_level, file_path,
                        schema_extractor.get_table_schema_text(table_name)))
                result_counter.inc_success()
            if result_counter.total_count == convert_count:
                table_not_found_msg_format.format(file_path)
            continue

        try:
            loader = ptr.TableFileLoader(file_path,
                                         format_name=format_name,
                                         encoding=encoding)
        except ptr.InvalidFilePathError as e:
            logger.debug(msgfy.to_debug_message(e))
            result_counter.inc_fail()
            continue
        except ptr.LoaderNotFoundError:
            logger.debug(
                u"loader not found that coincide with '{}'".format(file_path))
            result_counter.inc_fail()
            continue

        try:
            for table_data in loader.load():
                logger.debug(u"loaded tabledata: {}".format(
                    six.text_type(table_data)))

                sqlite_tabledata = SQLiteTableDataSanitizer(
                    table_data).normalize()

                try:
                    table_creator.create(sqlite_tabledata,
                                         ctx.obj.get(Context.INDEX_LIST))
                    result_counter.inc_success()
                except (ValueError, IOError) as e:
                    logger.debug(u"exception={:s}, path={}, message={}".format(
                        type(e).__name__, file_path, e))
                    result_counter.inc_fail()
                    continue

                logger.info(
                    get_success_message(
                        verbosity_level, file_path,
                        schema_extractor.get_table_schema_text(
                            sqlite_tabledata.table_name)))
        except ptr.OpenError as e:
            logger.error(u"{:s}: open error: file={}, message='{}'".format(
                e.__class__.__name__, file_path, str(e)))
            result_counter.inc_fail()
        except ptr.ValidationError as e:
            if loader.format_name == "json":
                dict_converter = DictConverter(logger,
                                               table_creator,
                                               result_counter,
                                               schema_extractor,
                                               verbosity_level,
                                               source=file_path,
                                               index_list=ctx.obj.get(
                                                   Context.INDEX_LIST))

                try:
                    dict_converter.to_sqlite_table(loader.loader.load_dict(),
                                                   [])
                except AttributeError:
                    pass
                else:
                    continue

            logger.error(
                u"{:s}: invalid {} data format: path={}, message={}".format(
                    e.__class__.__name__,
                    _get_format_type_from_path(file_path), file_path, str(e)))
            result_counter.inc_fail()
        except ptr.DataError as e:
            logger.error(u"{:s}: invalid {} data: path={}, message={}".format(
                e.__class__.__name__, _get_format_type_from_path(file_path),
                file_path, str(e)))
            result_counter.inc_fail()

        if result_counter.total_count == convert_count:
            logger.warn(table_not_found_msg_format.format(file_path))

    write_completion_message(logger, output_path, result_counter)

    sys.exit(result_counter.get_return_code())
Beispiel #15
0
    def __convert(self, file_path, source_info_record_base):
        logger = self._logger
        result_counter = self._result_counter

        try:
            loader = ptr.TableFileLoader(
                file_path,
                format_name=self._format_name,
                encoding=self._encoding,
                type_hint_rules=TYPE_HINT_FROM_HEADER_RULES
                if self._is_type_hint_header else None,
            )
        except ptr.InvalidFilePathError as e:
            logger.debug(msgfy.to_debug_message(e))
            result_counter.inc_fail()
            return
        except ptr.LoaderNotFoundError:
            logger.warn("not supported file format: ext={}, path={}".format(
                file_path.ext, file_path))
            result_counter.inc_fail()
            return

        source_info_record_base.format_name = loader.format_name

        try:
            for table_data in loader.load():
                logger.debug("loaded tabledata: {}".format(
                    six.text_type(table_data)))

                sqlite_tabledata = self.normalize_table(table_data)

                try:
                    self._table_creator.create(
                        sqlite_tabledata,
                        self._index_list,
                        source_info=source_info_record_base)
                except (ValueError, IOError) as e:
                    logger.debug("exception={:s}, path={}, message={}".format(
                        type(e).__name__, file_path, e))
                    result_counter.inc_fail()
                    return

                record = deepcopy(source_info_record_base)
                record.dst_table = sqlite_tabledata.table_name
                SourceInfo.insert(record)
        except ptr.OpenError as e:
            logger.error("{:s}: open error: file={}, message='{}'".format(
                e.__class__.__name__, file_path, str(e)))
            result_counter.inc_fail()
        except ptr.ValidationError as e:
            if loader.format_name == "json":
                for table_name in self._convert_complex_json(
                        loader.loader, source_info_record_base):
                    record = deepcopy(source_info_record_base)
                    record.dst_table = table_name
                    SourceInfo.insert(record)
            else:
                logger.error(
                    "{:s}: invalid {} data format: path={}, message={}".format(
                        e.__class__.__name__,
                        _get_format_type_from_path(file_path),
                        file_path,
                        str(e),
                    ))
                result_counter.inc_fail()
        except ptr.DataError as e:
            logger.error("{:s}: invalid {} data: path={}, message={}".format(
                e.__class__.__name__, _get_format_type_from_path(file_path),
                file_path, str(e)))
            result_counter.inc_fail()