def test_normal_json(self, tmpdir, file_path, format_name): p_file_path = Path(str(tmpdir.join(file_path))) p_file_path.parent.makedirs_p() with open(p_file_path, "w") as f: f.write( dedent( """\ [ {"attr_a": 1}, {"attr_b": 2.1, "attr_c": "bb"} ]""" ) ) expeced_list = [ TableData( "validdata", ["attr_a", "attr_b", "attr_c"], [{"attr_a": 1}, {"attr_b": 2.1, "attr_c": "bb"}], ) ] loader = ptr.TableFileLoader(p_file_path, format_name=format_name) assert loader.format_name == "json" for table_data, expected in zip(loader.load(), expeced_list): assert table_data.equals(expected)
def test_normal_csv(self, tmpdir, file_path, format_name): filename = pv.replace_symbol(file_path, "") p_file_path = Path(six.text_type(tmpdir.join(filename + Path(file_path).ext))) p_file_path.parent.makedirs_p() with open(p_file_path, "w") as f: f.write( dedent( """\ "attr_a","attr_b","attr_c" 1,4,"a" 2,2.1,"bb" 3,120.9,"ccc" """ ) ) expeced_list = [ TableData( filename, ["attr_a", "attr_b", "attr_c"], [[1, 4, "a"], [2, "2.1", "bb"], [3, "120.9", "ccc"]], ) ] loader = ptr.TableFileLoader(p_file_path, format_name=format_name) assert loader.format_name == "csv" for tabledata, expected in zip(loader.load(), expeced_list): print(dump_tabledata(expected)) print(dump_tabledata(tabledata)) assert tabledata.equals(expected)
def test_smoke(self, tmpdir, filename): p = tmpdir.join("tmp.db") con = SimpleSQLite(str(p), "w") test_data_file_path = os.path.join( os.path.dirname(__file__), "data", filename) loader = ptr.TableFileLoader(test_data_file_path) success_count = 0 for tabledata in loader.load(): if tabledata.is_empty(): continue print(ptw.dump_tabledata(tabledata)) try: con.create_table_from_tabledata( ptr.SQLiteTableDataSanitizer(tabledata).sanitize()) success_count += 1 except ValueError as e: print(e) con.commit() assert success_count > 0
def test_normal_json(self, tmpdir, file_path, format_name): p_file_path = Path(str(tmpdir.join(file_path))) p_file_path.parent.makedirs_p() with open(p_file_path, "w") as f: f.write('''[ {"attr_a": 1}, {"attr_b": 2.1, "attr_c": "bb"} ]''') expeced_list = [ ptr.TableData("validdata_json1", ["attr_a", "attr_b", "attr_c"], [ { 'attr_a': 1 }, { 'attr_b': 2.1, 'attr_c': 'bb' }, ]) ] loader = ptr.TableFileLoader(p_file_path, format_name) assert loader.format_name == "json" for tabledata, expected in zip(loader.load(), expeced_list): assert tabledata == expected
def test_normal_excel(self, tmpdir): file_path = "/tmp/valid/test/data/validdata.xlsx" p_file_path = Path(str(tmpdir.join(file_path))) p_file_path.parent.makedirs_p() tabledata_list = [ TableData( "testsheet1", ["a1", "b1", "c1"], [["aa1", "ab1", "ac1"], [1.0, 1.1, "a"], [2.0, 2.2, "bb"], [3.0, 3.3, 'cc"dd"']], ), TableData( "testsheet3", ["a3", "b3", "c3"], [["aa3", "ab3", "ac3"], [4.0, 1.1, "a"], [5.0, "", "bb"], [6.0, 3.3, ""]], ), ] writer = ExcelXlsxTableWriter() writer.open(p_file_path) for tabledata in tabledata_list: writer.from_tabledata(tabledata) writer.write_table() writer.close() loader = ptr.TableFileLoader(p_file_path) assert loader.format_name == "excel" for tabledata in loader.load(): print(dump_tabledata(tabledata)) assert tabledata in tabledata_list
def test_normal_ssv(self, tmpdir): p_file_path = Path(six.text_type(tmpdir.join("testdata.txt"))) p_file_path.parent.makedirs_p() with open(p_file_path, "w") as f: f.write( dedent( """\ USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND root 1 0.0 0.4 77664 8784 ? Ss May11 0:02 /sbin/init root 2 0.0 0.0 0 0 ? S May11 0:00 [kthreadd] root 4 0.0 0.0 0 0 ? I< May11 0:00 [kworker/0:0H] root 6 0.0 0.0 0 0 ? I< May11 0:00 [mm_percpu_wq] root 7 0.0 0.0 0 0 ? S May11 0:01 [ksoftirqd/0] """ ) ) expeced_list = [ TableData( "testdata", [ "USER", "PID", "%CPU", "%MEM", "VSZ", "RSS", "TTY", "STAT", "START", "TIME", "COMMAND", ], [ ["root", 1, 0, 0.4, 77664, 8784, "?", "Ss", "May11", "0:02", "/sbin/init"], ["root", 2, 0, 0, 0, 0, "?", "S", "May11", "0:00", "[kthreadd]"], ["root", 4, 0, 0, 0, 0, "?", "I<", "May11", "0:00", "[kworker/0:0H]"], ["root", 6, 0, 0, 0, 0, "?", "I<", "May11", "0:00", "[mm_percpu_wq]"], ["root", 7, 0, 0, 0, 0, "?", "S", "May11", "0:01", "[ksoftirqd/0]"], ], ) ] loader = ptr.TableFileLoader(p_file_path, format_name="ssv") assert loader.format_name == "csv" for tabledata, expected in zip(loader.load(), expeced_list): print(dump_tabledata(expected)) print(dump_tabledata(tabledata)) assert tabledata.equals(expected)
def test_normal(self, tmpdir, file_path, format_name, expected): test_file_path = Path(six.text_type(tmpdir.join( Path(MultiByteStrDecoder(file_path).unicode_str)))) test_file_path.parent.makedirs_p() with open(test_file_path, "w") as f: f.write('''{}''') loader = ptr.TableFileLoader(test_file_path, format_name=format_name) expected_loader = expected("") assert loader.source_type == expected_loader.source_type assert loader.format_name == expected_loader.format_name
def test_smoke(self, tmpdir, filename): test_data_file_path = os.path.join(os.path.dirname(__file__), "data", filename) loader = ptr.TableFileLoader(test_data_file_path) success_count = 0 for tabledata in loader.load(): if tabledata.is_empty(): continue assert len(dumps_tabledata(tabledata)) > 10 success_count += 1 assert success_count > 0
def test_normal_excel(self, tmpdir): file_path = '/tmp/valid/test/data/validdata.xlsx' p_file_path = Path(str(tmpdir.join(file_path))) p_file_path.parent.makedirs_p() tabledata_list = [ TableData( table_name='testsheet1', header_list=['a1', 'b1', 'c1'], row_list=[ ['aa1', 'ab1', 'ac1'], [1.0, 1.1, 'a'], [2.0, 2.2, 'bb'], [3.0, 3.3, 'cc"dd"'], ]), TableData( table_name='testsheet3', header_list=['a3', 'b3', 'c3'], row_list=[ ['aa3', 'ab3', 'ac3'], [4.0, 1.1, 'a'], [5.0, '', 'bb'], [6.0, 3.3, ''], ]), ] writer = ptw.ExcelXlsxTableWriter() writer.open(p_file_path) for tabledata in tabledata_list: writer.from_tabledata(tabledata) writer.write_table() writer.close() loader = ptr.TableFileLoader(p_file_path) assert loader.format_name == "excel" for tabledata in loader.load(): print(ptw.dump_tabledata(tabledata)) assert tabledata in tabledata_list
def test_smoke(self, tmpdir, filename): try: import pytablereader as ptr except ImportError: pytest.skip("requires pytablereader") p = tmpdir.join("tmp.db") con = SimpleSQLite(str(p), "w") test_data_file_path = os.path.join(os.path.dirname(__file__), "data", filename) loader = ptr.TableFileLoader(test_data_file_path) success_count = 0 for table_data in loader.load(): if table_data.is_empty(): continue try: from pytablewriter import dumps_tabledata print(dumps_tabledata(table_data)) except ImportError: pass try: con.create_table_from_tabledata( SQLiteTableDataSanitizer(table_data).normalize()) success_count += 1 except ValueError as e: print(e) con.commit() assert success_count > 0
def file(ctx, files, output_path): """ Convert tabular data within CSV/Excel/HTML/JSON/LTSV/Markdown/SQLite/TSV file(s) to a SQLite database file. """ if typepy.is_empty_sequence(files): sys.exit(ExitCode.NO_INPUT) con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) schema_extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = make_logger("{:s} file".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) table_creator = TableCreator(logger=logger, dst_con=con) for file_path in files: file_path = path.Path(file_path) if not file_path.isfile(): logger.error(u"file not found: {}".format(file_path)) result_counter.inc_fail() continue if file_path == output_path: logger.warn( u"skip a file which has the same path as the output file ({})". format(file_path)) continue logger.debug(u"converting '{}'".format(file_path)) try: loader = ptr.TableFileLoader(file_path) except ptr.InvalidFilePathError as e: logger.debug(e) result_counter.inc_fail() continue except ptr.LoaderNotFoundError: logger.debug( u"loader not found that coincide with '{}'".format(file_path)) result_counter.inc_fail() continue try: for tabledata in loader.load(): logger.debug(u"loaded tabledata: {}".format( six.text_type(tabledata))) sqlite_tabledata = ptr.SQLiteTableDataSanitizer( tabledata).sanitize() try: table_creator.create(sqlite_tabledata, ctx.obj.get(Context.INDEX_LIST)) result_counter.inc_success() except (ValueError, IOError) as e: logger.debug(u"path={}, message={}".format(file_path, e)) result_counter.inc_fail() continue logger.info( get_success_message( verbosity_level, file_path, schema_extractor.get_table_schema_text( sqlite_tabledata.table_name).strip())) except ptr.OpenError as e: logger.error(u"open error: file={}, message='{}'".format( file_path, str(e))) result_counter.inc_fail() except ptr.ValidationError as e: logger.error(u"invalid {} data format: path={}, message={}".format( _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() except ptr.InvalidDataError as e: logger.error(u"invalid {} data: path={}, message={}".format( _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def test_exception(self, value, format_name, expected): with pytest.raises(expected): ptr.TableFileLoader(value, format_name=format_name)
def file(ctx, files, output_path): """ Convert tabular data within CSV/Excel/HTML/JSON/LTSV/Markdown/TSV file(s) to a SQLite database file. """ if dataproperty.is_empty_sequence(files): sys.exit(ExitCode.NO_INPUT) con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = logbook.Logger("sqlitebiter file") _setup_logger_from_context(logger, ctx.obj[Context.LOG_LEVEL]) for file_path in files: file_path = path.Path(file_path) if not file_path.isfile(): logger.debug(u"file not found: {}".format(file_path)) result_counter.inc_fail() continue logger.debug(u"converting '{}'".format(file_path)) try: loader = ptr.TableFileLoader(file_path) except ptr.InvalidFilePathError as e: logger.debug(e) result_counter.inc_fail() continue except ptr.LoaderNotFoundError: logger.debug( u"loader not found that coincide with '{}'".format(file_path)) result_counter.inc_fail() continue try: for tabledata in loader.load(): sqlite_tabledata = ptr.SQLiteTableDataSanitizer( tabledata).sanitize() try: con.create_table_from_tabledata(sqlite_tabledata) result_counter.inc_success() except (ValueError, IOError) as e: logger.debug(u"path={}, message={}".format(file_path, e)) result_counter.inc_fail() continue log_message = get_success_log_format(verbosity_level).format( file_path, extractor.get_table_schema_text( sqlite_tabledata.table_name).strip()) logger.info(log_message) except ptr.OpenError as e: logger.error(u"open error: file={}, message='{}'".format( file_path, str(e))) result_counter.inc_fail() except ptr.ValidationError as e: logger.error(u"invalid {} data format: path={}, message={}".format( _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() except ptr.InvalidDataError as e: logger.error(u"invalid {} data: path={}, message={}".format( _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def file(ctx, files, format_name, output_path, encoding): """ Convert tabular data within CSV/Excel/HTML/JSON/Jupyter Notebook/LTSV/Markdown/Mediawiki/SQLite/SSV/TSV file(s) to a SQLite database file. """ from ._ipynb_converter import is_ipynb_file_path, load_ipynb_file, convert_nb if typepy.is_empty_sequence(files): sys.exit(ExitCode.NO_INPUT) con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) schema_extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = make_logger("{:s} file".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) table_creator = TableCreator(logger=logger, dst_con=con) for file_path in files: file_path = path.Path(file_path) if not file_path.isfile(): logger.error(u"file not found: {}".format(file_path)) result_counter.inc_fail() continue if file_path == output_path: logger.warn( u"skip a file which has the same path as the output file ({})". format(file_path)) continue logger.debug(u"converting '{}'".format(file_path)) convert_count = result_counter.total_count if format_name in IPYNB_FORMAT_NAME_LIST or is_ipynb_file_path( file_path): convert_nb(logger, con, result_counter, nb=load_ipynb_file(file_path, encoding=encoding)) for table_name in con.get_table_name_list(): logger.info( get_success_message( verbosity_level, file_path, schema_extractor.get_table_schema_text(table_name))) result_counter.inc_success() if result_counter.total_count == convert_count: table_not_found_msg_format.format(file_path) continue try: loader = ptr.TableFileLoader(file_path, format_name=format_name, encoding=encoding) except ptr.InvalidFilePathError as e: logger.debug(msgfy.to_debug_message(e)) result_counter.inc_fail() continue except ptr.LoaderNotFoundError: logger.debug( u"loader not found that coincide with '{}'".format(file_path)) result_counter.inc_fail() continue try: for table_data in loader.load(): logger.debug(u"loaded tabledata: {}".format( six.text_type(table_data))) sqlite_tabledata = SQLiteTableDataSanitizer( table_data).normalize() try: table_creator.create(sqlite_tabledata, ctx.obj.get(Context.INDEX_LIST)) result_counter.inc_success() except (ValueError, IOError) as e: logger.debug(u"exception={:s}, path={}, message={}".format( type(e).__name__, file_path, e)) result_counter.inc_fail() continue logger.info( get_success_message( verbosity_level, file_path, schema_extractor.get_table_schema_text( sqlite_tabledata.table_name))) except ptr.OpenError as e: logger.error(u"{:s}: open error: file={}, message='{}'".format( e.__class__.__name__, file_path, str(e))) result_counter.inc_fail() except ptr.ValidationError as e: if loader.format_name == "json": dict_converter = DictConverter(logger, table_creator, result_counter, schema_extractor, verbosity_level, source=file_path, index_list=ctx.obj.get( Context.INDEX_LIST)) try: dict_converter.to_sqlite_table(loader.loader.load_dict(), []) except AttributeError: pass else: continue logger.error( u"{:s}: invalid {} data format: path={}, message={}".format( e.__class__.__name__, _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() except ptr.DataError as e: logger.error(u"{:s}: invalid {} data: path={}, message={}".format( e.__class__.__name__, _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() if result_counter.total_count == convert_count: logger.warn(table_not_found_msg_format.format(file_path)) write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def __convert(self, file_path, source_info_record_base): logger = self._logger result_counter = self._result_counter try: loader = ptr.TableFileLoader( file_path, format_name=self._format_name, encoding=self._encoding, type_hint_rules=TYPE_HINT_FROM_HEADER_RULES if self._is_type_hint_header else None, ) except ptr.InvalidFilePathError as e: logger.debug(msgfy.to_debug_message(e)) result_counter.inc_fail() return except ptr.LoaderNotFoundError: logger.warn("not supported file format: ext={}, path={}".format( file_path.ext, file_path)) result_counter.inc_fail() return source_info_record_base.format_name = loader.format_name try: for table_data in loader.load(): logger.debug("loaded tabledata: {}".format( six.text_type(table_data))) sqlite_tabledata = self.normalize_table(table_data) try: self._table_creator.create( sqlite_tabledata, self._index_list, source_info=source_info_record_base) except (ValueError, IOError) as e: logger.debug("exception={:s}, path={}, message={}".format( type(e).__name__, file_path, e)) result_counter.inc_fail() return record = deepcopy(source_info_record_base) record.dst_table = sqlite_tabledata.table_name SourceInfo.insert(record) except ptr.OpenError as e: logger.error("{:s}: open error: file={}, message='{}'".format( e.__class__.__name__, file_path, str(e))) result_counter.inc_fail() except ptr.ValidationError as e: if loader.format_name == "json": for table_name in self._convert_complex_json( loader.loader, source_info_record_base): record = deepcopy(source_info_record_base) record.dst_table = table_name SourceInfo.insert(record) else: logger.error( "{:s}: invalid {} data format: path={}, message={}".format( e.__class__.__name__, _get_format_type_from_path(file_path), file_path, str(e), )) result_counter.inc_fail() except ptr.DataError as e: logger.error("{:s}: invalid {} data: path={}, message={}".format( e.__class__.__name__, _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail()