Ejemplo n.º 1
0
    def __init__(self, logger, dst_con, result_logger, verbosity_level):
        self.__logger = logger
        self.__dst_con = dst_con
        self.__result_logger = result_logger
        self.__verbosity_level = verbosity_level

        self.__schema_extractor = SQLiteSchemaExtractor(dst_con)
Ejemplo n.º 2
0
    def __init__(
        self,
        logger,
        con,
        symbol_replace_value,
        index_list,
        verbosity_level,
        format_name=None,
        encoding=None,
    ):
        self._logger = logger
        self._con = con
        self._symbol_replace_value = symbol_replace_value
        self._index_list = index_list
        self._verbosity_level = verbosity_level
        self._format_name = format_name
        self._encoding = encoding

        self._schema_extractor = SQLiteSchemaExtractor(con)
        self._result_counter = ResultCounter()
        self._result_logger = ResultLogger(
            logger, self._schema_extractor, self._result_counter, self._verbosity_level
        )
        self._table_creator = TableCreator(
            logger=self._logger,
            dst_con=con,
            result_logger=self._result_logger,
            verbosity_level=verbosity_level,
        )

        SourceInfo.attach(con, is_hidden=True)
        SourceInfo.create()
Ejemplo n.º 3
0
    def test_normal_index(self, file_creator, index_list, expected):
        db_path = "test_index.sqlite"
        runner = CliRunner()

        with runner.isolated_filesystem():
            file_path = file_creator()
            result = runner.invoke(cmd, ["-o", db_path, "--index", index_list, "file", file_path])
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)
            output = extractor.fetch_table_schema("valid_csv_3_1").dumps()
            print_test_result(expected=expected, actual=output)
            assert output == expected
    def test_normal_index(self, file_creator, index_list, expected):
        db_path = "test_index.sqlite"
        runner = CliRunner()

        with runner.isolated_filesystem():
            file_path = file_creator()
            result = runner.invoke(
                cmd, ["-o", db_path, "--index", index_list, "file", file_path])
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)
            output = extractor.fetch_table_schema("valid_csv_3_1").dumps()
            print_test_result(expected=expected, actual=output)
            assert output == expected
Ejemplo n.º 5
0
    def test_normal_type_hints(self, tmpdir):
        test_file_path = str(tmpdir.join("test.sqlite"))

        writer = ptw.SqliteTableWriter()
        writer.open(test_file_path)
        writer.table_name = "hoge"
        writer.headers = ["a", "b"]
        writer.value_matrix = [[1, 2], [11, 12]]
        writer.type_hints = [ptw.String]
        writer.write_table()
        writer.close()

        schema = SQLiteSchemaExtractor(
            test_file_path).fetch_database_schema_as_dict()

        assert schema[writer.table_name] == [
            OrderedDict([
                ("Field", "a"),
                ("Index", False),
                ("Type", "TEXT"),
                ("Null", "YES"),
                ("Key", ""),
                ("Default", "NULL"),
                ("Extra", ""),
            ]),
            OrderedDict([
                ("Field", "b"),
                ("Index", False),
                ("Type", "INTEGER"),
                ("Null", "YES"),
                ("Key", ""),
                ("Default", "NULL"),
                ("Extra", ""),
            ]),
        ]
Ejemplo n.º 6
0
    def test_normal_symbols_attr(self):
        db_path = "test_symbols_attr.sqlite"
        runner = CliRunner()
        expected = dedent("symbols_attr (A1_A, B2B, C3_C)")

        with runner.isolated_filesystem():
            result = runner.invoke(
                cmd, ["-o", db_path, "--replace-symbol", "_", "file", symbols_attr_csv_file()]
            )
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)
            options = {"output_format": "text", "verbosity_level": 1}
            schema = extractor.fetch_table_schema("symbols_attr")
            print_test_result(expected=expected, actual=schema.dumps(**options))
            assert schema.dumps(**options) == expected
Ejemplo n.º 7
0
    def test_normal_dup_col_csv_file(self):
        db_path = "test_dup_col.sqlite"
        runner = CliRunner()
        expected = dedent(
            """\
            _source_info_ (source_id, dir_name, base_name, format, dst_table, size, mtime)
            dup_col (A, A_2, A_1)"""
        )

        with runner.isolated_filesystem():
            result = runner.invoke(cmd, ["-o", db_path, "file", dup_col_csv_file()])
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)
            options = {"output_format": "text", "verbosity_level": 1}
            print_test_result(expected=expected, actual=extractor.dumps(**options))
            assert len(extractor.dumps(**options)) > 100
    def test_normal_symbols_attr(self):
        db_path = "test_symbols_attr.sqlite"
        runner = CliRunner()
        expected = dedent("symbols_attr (A1_A, B2B, C3_C)")

        with runner.isolated_filesystem():
            result = runner.invoke(cmd, [
                "-o", db_path, "--replace-symbol", "_", "file",
                symbols_attr_csv_file()
            ])
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)
            options = {"output_format": "text", "verbosity_level": 1}
            schema = extractor.fetch_table_schema("symbols_attr")
            print_test_result(expected=expected,
                              actual=schema.dumps(**options))
            assert schema.dumps(**options) == expected
    def test_normal_dup_col_csv_file(self):
        db_path = "test_dup_col.sqlite"
        runner = CliRunner()
        expected = dedent("""\
            _source_info_ (source_id, dir_name, base_name, format, dst_table, size, mtime)
            dup_col (A, A_2, A_1)""")

        with runner.isolated_filesystem():
            result = runner.invoke(cmd,
                                   ["-o", db_path, "file",
                                    dup_col_csv_file()])
            print_traceback(result)
            assert result.exit_code == ExitCode.SUCCESS

            extractor = SQLiteSchemaExtractor(db_path)
            options = {"output_format": "text", "verbosity_level": 1}
            print_test_result(expected=expected,
                              actual=extractor.dumps(**options))
            assert len(extractor.dumps(**options)) > 100
Ejemplo n.º 10
0
    def __require_rename_table(self, src_con, src_table_name):
        if not self.__dst_con.has_table(src_table_name):
            return False

        lhs = self.__schema_extractor.fetch_table_schema(
            src_table_name).as_dict()
        rhs = SQLiteSchemaExtractor(src_con).fetch_table_schema(
            src_table_name).as_dict()

        return lhs != rhs
Ejemplo n.º 11
0
class TableCreator(object):
    def __init__(self, logger, dst_con, result_logger, verbosity_level):
        self.__logger = logger
        self.__dst_con = dst_con
        self.__result_logger = result_logger
        self.__verbosity_level = verbosity_level

        self.__schema_extractor = SQLiteSchemaExtractor(dst_con)

    def create(self, table_data, index_list, source_info):
        con_mem = simplesqlite.connect_memdb()
        con_mem.create_table_from_tabledata(table_data)
        need_rename = self.__require_rename_table(con_mem,
                                                  table_data.table_name)
        src_table_name = con_mem.fetch_table_name_list()[0]
        dst_table_name = src_table_name

        if need_rename:
            dst_table_name = self.__make_unique_table_name(src_table_name)

            self.__logger.debug("rename table from '{}' to '{}'".format(
                src_table_name, dst_table_name))

            is_create_table = True
            simplesqlite.copy_table(
                src_con=con_mem,
                dst_con=self.__dst_con,
                src_table_name=src_table_name,
                dst_table_name=dst_table_name,
            )
        else:
            is_create_table = not self.__dst_con.has_table(dst_table_name)
            simplesqlite.append_table(src_con=con_mem,
                                      dst_con=self.__dst_con,
                                      table_name=dst_table_name)

        self.__dst_con.create_index_list(dst_table_name, index_list)

        self.__result_logger.logging_success(
            source_info.get_name(self.__verbosity_level), dst_table_name,
            is_create_table)

    def __require_rename_table(self, src_con, src_table_name):
        if not self.__dst_con.has_table(src_table_name):
            return False

        lhs = self.__schema_extractor.fetch_table_schema(
            src_table_name).as_dict()
        rhs = SQLiteSchemaExtractor(src_con).fetch_table_schema(
            src_table_name).as_dict()

        return lhs != rhs

    def __make_unique_table_name(self, table_name_base):
        exist_table_name_list = self.__dst_con.fetch_table_name_list()

        if table_name_base not in exist_table_name_list:
            return table_name_base

        suffix_id = 1
        while True:
            table_name_candidate = "{:s}_{:d}".format(table_name_base,
                                                      suffix_id)

            if table_name_candidate not in exist_table_name_list:
                return table_name_candidate

            suffix_id += 1
Ejemplo n.º 12
0
class TableConverter(object):
    def __init__(
        self,
        logger,
        con,
        symbol_replace_value,
        index_list,
        verbosity_level,
        format_name=None,
        encoding=None,
    ):
        self._logger = logger
        self._con = con
        self._symbol_replace_value = symbol_replace_value
        self._index_list = index_list
        self._verbosity_level = verbosity_level
        self._format_name = format_name
        self._encoding = encoding

        self._schema_extractor = SQLiteSchemaExtractor(con)
        self._result_counter = ResultCounter()
        self._result_logger = ResultLogger(
            logger, self._schema_extractor, self._result_counter, self._verbosity_level
        )
        self._table_creator = TableCreator(
            logger=self._logger,
            dst_con=con,
            result_logger=self._result_logger,
            verbosity_level=verbosity_level,
        )

        SourceInfo.attach(con, is_hidden=True)
        SourceInfo.create()

    def _fetch_source_id(self, source_info):
        where_list = [
            Where("base_name", source_info.base_name),
            Where("format_name", source_info.format_name),
        ]

        if source_info.dir_name:
            where_list.append(Where("dir_name", source_info.dir_name))
        if source_info.size is not None:
            where_list.append(Where("size", source_info.size))
        if source_info.mtime is not None:
            where_list.append(Where("mtime", source_info.mtime))

        return self._con.fetch_value(
            select=Attr("source_id"), table_name=SourceInfo.get_table_name(), where=And(where_list)
        )

    def _fetch_next_source_id(self):
        source_id = self._con.fetch_value(
            select="MAX({})".format("source_id"), table_name=SourceInfo.get_table_name()
        )

        if source_id is None:
            return 1

        return source_id + 1

    def get_return_code(self):
        return self._result_counter.get_return_code()

    def get_success_count(self):
        return self._result_counter.success_count

    def normalize_table(self, table_data, dup_col_handler=None):
        from tabledata import TableData
        from pathvalidate import replace_symbol, replace_unprintable_char
        from simplesqlite import SQLiteTableDataSanitizer

        if dup_col_handler is None:
            dup_col_handler = DEFAULT_DUP_COL_HANDLER

        normalized_table_data = SQLiteTableDataSanitizer(
            table_data, dup_col_handler=dup_col_handler
        ).normalize()

        if self._symbol_replace_value is None:
            return normalized_table_data

        return TableData(
            table_name=normalized_table_data.table_name,
            header_list=[
                replace_symbol(
                    replace_unprintable_char(header),
                    self._symbol_replace_value,
                    is_replace_consecutive_chars=True,
                    is_strip=True,
                )
                for header in normalized_table_data.header_list
            ],
            row_list=normalized_table_data.row_list,
            dp_extractor=normalized_table_data.dp_extractor,
        )

    def write_completion_message(self):
        logger = self._logger

        logger.debug("----- {:s} completed -----".format(PROGRAM_NAME))

        log_list = [
            "source={}".format(
                bright(
                    self._con.fetch_value(
                        select="COUNT(DISTINCT({}))".format("source_id"),
                        table_name=SourceInfo.get_table_name(),
                    )
                )
            )
        ]
        if self.get_success_count() > 0:
            log_list.append(green("success={}".format(bright(self.get_success_count()))))
        if self._result_counter.fail_count > 0:
            log_list.append(red("fail={}".format(bright(self._result_counter.fail_count))))
        if self._result_counter.skip_count > 0:
            log_list.append(yellow("skip={}".format(bright(self._result_counter.skip_count))))
        if self._result_counter.created_table_count > 0:
            log_list.append(
                "created-table={}".format(bright(self._result_counter.created_table_count))
            )

        logger.info("converted results: {}".format(", ".join(log_list)))
        database_path_msg = "database path: {:s}".format(
            bright(Path(self._con.database_path).relpath())
        )

        if self.get_success_count() > 0:
            output_format, verbosity_level = self.__get_dump_param()
            logger.info(database_path_msg)

            try:
                from textwrap import indent
            except ImportError:
                # for Python 2 compatibility
                def indent(value, _):
                    return value

            logger.debug(
                "----- database schema -----\n{}".format(
                    indent(
                        self._schema_extractor.dumps(
                            output_format=output_format, verbosity_level=verbosity_level
                        ),
                        "    ",
                    )
                )
            )
        else:
            logger.debug(database_path_msg)

    def _convert_nb(self, nb, source_info):
        success_count = self._result_counter.success_count
        created_table_set = convert_nb(
            logger=self._logger,
            source_info=source_info,
            con=self._con,
            result_logger=self._result_logger,
            nb=nb,
        )

        if self._result_counter.success_count == success_count:
            self._logger.warn(TABLE_NOT_FOUND_MSG_FORMAT.format(source_info.base_name))
            return

        return created_table_set

    def _convert_complex_json(self, json_loader, source_info):
        from .._dict_converter import DictConverter

        dict_converter = DictConverter(
            self._logger, self._table_creator, source_info=source_info, index_list=self._index_list
        )

        try:
            dict_converter.to_sqlite_table(json_loader.load_dict(), [])
        except AttributeError:
            pass

        return dict_converter.converted_table_name_set

    def __get_dump_param(self):
        found_ptw = True
        try:
            import pytablewriter  # noqa: W0611
        except ImportError:
            found_ptw = False

        if found_ptw:
            return ("rst_simple_table", self._verbosity_level)

        if self._verbosity_level >= 1:
            return ("text", MAX_VERBOSITY_LEVEL)

        if self._verbosity_level == 0:
            return ("text", 1)

        raise ValueError("invalid verbosity_level: {}".format(self._verbosity_level))