def test_normal_dup_col_csv_file(self):
        db_path = "test_dup_col.sqlite"
        runner = CliRunner()
        expected = dedent(
            """\
            _source_info_ (source_id, dir_name, base_name, format, dst_table, size, mtime)
            dup_col (A, A_2, A_1)"""
        )

        with runner.isolated_filesystem():
            result = runner.invoke(cmd, ["-o", db_path, "file", dup_col_csv_file()])
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)
            options = {"output_format": "text", "verbosity_level": 1}
            print_test_result(expected=expected, actual=extractor.dumps(**options))
            assert len(extractor.dumps(**options)) > 100
    def test_normal_dup_col_csv_file(self):
        db_path = "test_dup_col.sqlite"
        runner = CliRunner()
        expected = dedent("""\
            _source_info_ (source_id, dir_name, base_name, format, dst_table, size, mtime)
            dup_col (A, A_2, A_1)""")

        with runner.isolated_filesystem():
            result = runner.invoke(cmd,
                                   ["-o", db_path, "file",
                                    dup_col_csv_file()])
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)
            options = {"output_format": "text", "verbosity_level": 1}
            print_test_result(expected=expected,
                              actual=extractor.dumps(**options))
            assert len(extractor.dumps(**options)) > 100
Example #3
0
    def test_normal_index(self, file_creator, index_list, expected):
        db_path = "test_index.sqlite"
        runner = CliRunner()

        with runner.isolated_filesystem():
            file_path = file_creator()
            result = runner.invoke(
                cmd, ["-o", db_path, "--index", index_list, "file", file_path])
            print_traceback(result)

            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)

            print_test_result(expected=expected, actual=extractor.dumps())

            assert extractor.fetch_table_schema(
                "valid_csv_3_1").dumps() == expected
Example #4
0
class TableConverter(object):
    def __init__(
        self,
        logger,
        con,
        symbol_replace_value,
        index_list,
        verbosity_level,
        format_name=None,
        encoding=None,
    ):
        self._logger = logger
        self._con = con
        self._symbol_replace_value = symbol_replace_value
        self._index_list = index_list
        self._verbosity_level = verbosity_level
        self._format_name = format_name
        self._encoding = encoding

        self._schema_extractor = SQLiteSchemaExtractor(con)
        self._result_counter = ResultCounter()
        self._result_logger = ResultLogger(
            logger, self._schema_extractor, self._result_counter, self._verbosity_level
        )
        self._table_creator = TableCreator(
            logger=self._logger,
            dst_con=con,
            result_logger=self._result_logger,
            verbosity_level=verbosity_level,
        )

        SourceInfo.attach(con, is_hidden=True)
        SourceInfo.create()

    def _fetch_source_id(self, source_info):
        where_list = [
            Where("base_name", source_info.base_name),
            Where("format_name", source_info.format_name),
        ]

        if source_info.dir_name:
            where_list.append(Where("dir_name", source_info.dir_name))
        if source_info.size is not None:
            where_list.append(Where("size", source_info.size))
        if source_info.mtime is not None:
            where_list.append(Where("mtime", source_info.mtime))

        return self._con.fetch_value(
            select=Attr("source_id"), table_name=SourceInfo.get_table_name(), where=And(where_list)
        )

    def _fetch_next_source_id(self):
        source_id = self._con.fetch_value(
            select="MAX({})".format("source_id"), table_name=SourceInfo.get_table_name()
        )

        if source_id is None:
            return 1

        return source_id + 1

    def get_return_code(self):
        return self._result_counter.get_return_code()

    def get_success_count(self):
        return self._result_counter.success_count

    def normalize_table(self, table_data, dup_col_handler=None):
        from tabledata import TableData
        from pathvalidate import replace_symbol, replace_unprintable_char
        from simplesqlite import SQLiteTableDataSanitizer

        if dup_col_handler is None:
            dup_col_handler = DEFAULT_DUP_COL_HANDLER

        normalized_table_data = SQLiteTableDataSanitizer(
            table_data, dup_col_handler=dup_col_handler
        ).normalize()

        if self._symbol_replace_value is None:
            return normalized_table_data

        return TableData(
            table_name=normalized_table_data.table_name,
            header_list=[
                replace_symbol(
                    replace_unprintable_char(header),
                    self._symbol_replace_value,
                    is_replace_consecutive_chars=True,
                    is_strip=True,
                )
                for header in normalized_table_data.header_list
            ],
            row_list=normalized_table_data.row_list,
            dp_extractor=normalized_table_data.dp_extractor,
        )

    def write_completion_message(self):
        logger = self._logger

        logger.debug("----- {:s} completed -----".format(PROGRAM_NAME))

        log_list = [
            "source={}".format(
                bright(
                    self._con.fetch_value(
                        select="COUNT(DISTINCT({}))".format("source_id"),
                        table_name=SourceInfo.get_table_name(),
                    )
                )
            )
        ]
        if self.get_success_count() > 0:
            log_list.append(green("success={}".format(bright(self.get_success_count()))))
        if self._result_counter.fail_count > 0:
            log_list.append(red("fail={}".format(bright(self._result_counter.fail_count))))
        if self._result_counter.skip_count > 0:
            log_list.append(yellow("skip={}".format(bright(self._result_counter.skip_count))))
        if self._result_counter.created_table_count > 0:
            log_list.append(
                "created-table={}".format(bright(self._result_counter.created_table_count))
            )

        logger.info("converted results: {}".format(", ".join(log_list)))
        database_path_msg = "database path: {:s}".format(
            bright(Path(self._con.database_path).relpath())
        )

        if self.get_success_count() > 0:
            output_format, verbosity_level = self.__get_dump_param()
            logger.info(database_path_msg)

            try:
                from textwrap import indent
            except ImportError:
                # for Python 2 compatibility
                def indent(value, _):
                    return value

            logger.debug(
                "----- database schema -----\n{}".format(
                    indent(
                        self._schema_extractor.dumps(
                            output_format=output_format, verbosity_level=verbosity_level
                        ),
                        "    ",
                    )
                )
            )
        else:
            logger.debug(database_path_msg)

    def _convert_nb(self, nb, source_info):
        success_count = self._result_counter.success_count
        created_table_set = convert_nb(
            logger=self._logger,
            source_info=source_info,
            con=self._con,
            result_logger=self._result_logger,
            nb=nb,
        )

        if self._result_counter.success_count == success_count:
            self._logger.warn(TABLE_NOT_FOUND_MSG_FORMAT.format(source_info.base_name))
            return

        return created_table_set

    def _convert_complex_json(self, json_loader, source_info):
        from .._dict_converter import DictConverter

        dict_converter = DictConverter(
            self._logger, self._table_creator, source_info=source_info, index_list=self._index_list
        )

        try:
            dict_converter.to_sqlite_table(json_loader.load_dict(), [])
        except AttributeError:
            pass

        return dict_converter.converted_table_name_set

    def __get_dump_param(self):
        found_ptw = True
        try:
            import pytablewriter  # noqa: W0611
        except ImportError:
            found_ptw = False

        if found_ptw:
            return ("rst_simple_table", self._verbosity_level)

        if self._verbosity_level >= 1:
            return ("text", MAX_VERBOSITY_LEVEL)

        if self._verbosity_level == 0:
            return ("text", 1)

        raise ValueError("invalid verbosity_level: {}".format(self._verbosity_level))