def __parse_html(self, table): header_list = [] data_matrix = [] self.__parse_tag_id(table) row_list = table.find_all("tr") re_table_val = re.compile("td|th") for row in row_list: td_list = row.find_all("td") if typepy.is_empty_sequence(td_list): if typepy.is_not_empty_sequence(header_list): continue th_list = row.find_all("th") if typepy.is_empty_sequence(th_list): continue header_list = [row.text.strip() for row in th_list] continue data_matrix.append([value.get_text().strip() for value in row.find_all(re_table_val)]) if typepy.is_empty_sequence(data_matrix): raise ValueError("data matrix is empty") self._loader.inc_table_count() return TableData( self._make_table_name(), header_list, data_matrix, dp_extractor=self._loader.dp_extractor, )
def asdict(self): """ :return: Table data as a |dict| instance. :rtype: dict """ dp_extractor = dp.DataPropertyExtractor() dp_extractor.strip_str = '"' dp_extractor.float_type = float dict_body = [] for value_list in self.value_matrix: if typepy.is_empty_sequence(value_list): continue dict_record = [ (header, dp_extractor.to_dataproperty(value).data) for header, value in zip(self.header_list, value_list) if value is not None ] if typepy.is_empty_sequence(dict_record): continue dict_body.append(dict(dict_record)) return {self.table_name: dict_body}
def url(ctx, url, format_name, encoding, proxy): """ Scrape tabular data from a URL and convert data to a SQLite database file. """ if typepy.is_empty_sequence(url): sys.exit(ExitCode.NO_INPUT) logger = make_logger("{:s} url".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) if typepy.is_empty_sequence(encoding): encoding = app_config_manager.load().get(ConfigKey.DEFAULT_ENCODING) logger.debug("use default encoding: {}".format(encoding)) if typepy.is_null_string(proxy): proxy = app_config_manager.load().get(ConfigKey.PROXY_SERVER) con, is_create_db = create_database(ctx.obj[Context.OUTPUT_PATH], ctx.obj[Context.DUP_DATABASE]) converter = UrlConverter( logger=logger, con=con, symbol_replace_value=ctx.obj[Context.SYMBOL_REPLACE_VALUE], index_list=ctx.obj.get(Context.INDEX_LIST), verbosity_level=ctx.obj.get(Context.VERBOSITY_LEVEL), format_name=format_name, encoding=encoding, proxy=proxy, ) converter.convert(url) sys.exit(finalize(con, converter, is_create_db))
def url(ctx, url, format_name, encoding, proxy): """ Scrape tabular data from a URL and convert data to a SQLite database file. """ if typepy.is_empty_sequence(url): sys.exit(ExitCode.NO_INPUT) initialize_log_handler(ctx.obj[Context.LOG_LEVEL]) logger = make_logger("{:s} url".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) try: app_configs = app_config_mgr.load() except ValueError as e: logger.debug(msgfy.to_debug_message(e)) app_configs = {} if typepy.is_empty_sequence(encoding): encoding = app_configs.get(ConfigKey.DEFAULT_ENCODING) logger.debug("use default encoding: {}".format(encoding)) if typepy.is_null_string(proxy): proxy = app_configs.get(ConfigKey.PROXY_SERVER) convert_configs = load_convert_config(logger, ctx.obj[Context.CONVERT_CONFIG], subcommand="url") con, is_create_db = create_database(ctx.obj[Context.OUTPUT_PATH], ctx.obj[Context.DUP_DATABASE]) converter = UrlConverter( logger=logger, con=con, symbol_replace_value=ctx.obj[Context.SYMBOL_REPLACE_VALUE], add_pri_key_name=ctx.obj[Context.ADD_PRIMARY_KEY_NAME], convert_configs=convert_configs, index_list=ctx.obj.get(Context.INDEX_LIST), is_type_inference=ctx.obj[Context.TYPE_INFERENCE], is_type_hint_header=ctx.obj[Context.TYPE_HINT_HEADER], verbosity_level=ctx.obj.get(Context.VERBOSITY_LEVEL), format_name=format_name, encoding=encoding, proxy=proxy, ) converter.convert(url) sys.exit(finalize(con, converter, is_create_db))
def __create_table_from_tabledata(self, tabledata, index_attr_list=None): self.validate_access_permission(["w", "a"]) validate_table_name(tabledata.table_name) logger.debug( "__create_table_from_tabledata: table={}, headers={}".format( tabledata.table_name, tabledata.header_list)) attr_name_list = self.__sanitize_attr_name_list(tabledata.header_list) try: self.__validate_attr_name_list(attr_name_list) except pathvalidate.ReservedNameError: pass if typepy.is_empty_sequence(tabledata.value_matrix): raise ValueError("input data is null: '{} ({})'".format( tabledata.table_name, ", ".join(attr_name_list))) self.__verify_value_matrix(attr_name_list, tabledata.value_matrix) self.create_table( tabledata.table_name, self.__get_attr_desc_list(attr_name_list, tabledata.value_matrix)) self.insert_many(tabledata.table_name, tabledata.value_matrix) if typepy.is_not_empty_sequence(index_attr_list): self.create_index_list( tabledata.table_name, self.__sanitize_attr_name_list(index_attr_list)) self.commit()
def _to_dp_list(self, data_list, type_hint=None, strip_str=None, strict_type_mapping=None): from collections import Counter from typepy import StrictLevel if is_empty_sequence(data_list): return [] type_counter = Counter() dp_list = [] for data in data_list: expect_type_hist = type_hint if type_hint is None: try: expect_type_hist, _count = type_counter.most_common(1)[0] if not expect_type_hist( data, strict_level=StrictLevel.MAX).is_type(): expect_type_hist = None except IndexError: pass dataprop = self.__to_dp(data=data, type_hint=expect_type_hist, strip_str=strip_str, strict_type_mapping=strict_type_mapping) type_counter[dataprop.type_class] += 1 dp_list.append(dataprop) return dp_list
def __strip_empty_col(self): from simplesqlite import connect_memdb from simplesqlite.query import Attr, AttrList con = connect_memdb() tmp_table_name = "tmp" headers = ["a{:d}".format(i) for i in range(len(self.__all_values[0]))] con.create_table_from_data_matrix(tmp_table_name, headers, self.__all_values) for col_idx, header in enumerate(headers): result = con.select(select=Attr(header), table_name=tmp_table_name) if any([ typepy.is_not_null_string(record[0]) for record in result.fetchall() ]): break strip_headers = headers[col_idx:] if typepy.is_empty_sequence(strip_headers): raise ValueError() result = con.select(select=AttrList(strip_headers), table_name=tmp_table_name) self.__all_values = result.fetchall()
def to_dp_list(self, values): if is_empty_sequence(values): return [] self.__update_dp_converter() return self._to_dp_list(values, strip_str=self.strip_str_value)
def _write_header(self): if not self.is_write_header or typepy.is_empty_sequence( self.header_list): return for col, value in enumerate(self.header_list): self.stream.write(self.first_header_row, col, value)
def _preprocess_table_dp(self): if self._is_complete_table_dp_preprocess: return self._logger.logger.debug("_preprocess_table_dp") if typepy.is_empty_sequence(self.header_list) and self._use_default_header: self.header_list = [ convert_idx_to_alphabet(col_idx) for col_idx in range(len(self.__value_matrix_org[0])) ] try: self._table_value_dp_matrix = self._dp_extractor.to_dp_matrix( to_value_matrix(self.header_list, self.__value_matrix_org) ) except TypeError as e: self._logger.logger.debug(msgfy.to_error_message(e)) self._table_value_dp_matrix = [] self._column_dp_list = self._dp_extractor.to_column_dp_list( self._table_value_dp_matrix, self._column_dp_list ) self._is_complete_table_dp_preprocess = True
def write_table(self): """ |write_table| with `Labeled Tab-separated Values (LTSV) <http://ltsv.org/>`__ format. Invalid characters in labels/data are removed. :raises pytablewriter.EmptyHeaderError: If the |headers| is empty. :Example: :ref:`example-ltsv-table-writer` """ with self._logger: self._verify_property() self._preprocess() for values in self._table_value_matrix: ltsv_item_list = [ "{:s}:{}".format(pathvalidate.sanitize_ltsv_label(header_name), value) for header_name, value in zip(self.headers, values) if typepy.is_not_null_string(value) ] if typepy.is_empty_sequence(ltsv_item_list): continue self._write_line("\t".join(ltsv_item_list))
def write_table(self): """ |write_table| with `Labeled Tab-separated Values (LTSV) <http://ltsv.org/>`__ format. Invalid characters in labels/data are removed. :raises pytablewriter.EmptyHeaderError: If the |header_list| is empty. :Example: :ref:`example-ltsv-table-writer` """ with self._logger: self._verify_property() self._preprocess() for value_list in self._table_value_matrix: ltsv_item_list = [ "{:s}:{}".format(pathvalidate.sanitize_ltsv_label(header_name), value) for header_name, value in zip(self.header_list, value_list) if typepy.is_not_null_string(value) ] if typepy.is_empty_sequence(ltsv_item_list): continue self._write_line("\t".join(ltsv_item_list))
def _write_row(self, row: int, values: Sequence[str]) -> None: if typepy.is_empty_sequence(values): return col_delimiters = ([ self.__to_column_delimiter( row, None, self._column_dp_list[0], self.char_left_side_row, ) ] + [ self.__to_column_delimiter( row, self._column_dp_list[col_idx], self._column_dp_list[col_idx + 1], self.column_delimiter, ) for col_idx in range(len(self._column_dp_list) - 1) ] + [ self.__to_column_delimiter( row, self._column_dp_list[-1], None, self.char_right_side_row, ) ]) row_items = [""] * (len(col_delimiters) + len(values)) row_items[::2] = col_delimiters row_items[1::2] = list(values) self._write_line("".join(chain.from_iterable(row_items)))
def is_empty_header(self): """ :return: |True| if the data :py:attr:`.header_list` is empty. :rtype: bool """ return typepy.is_empty_sequence(self.header_list)
def __create_table_from_tabledata( self, tabledata, index_attr_list=None): self.validate_access_permission(["w", "a"]) validate_table_name(tabledata.table_name) logger.debug( "__create_table_from_tabledata: table={}, headers={}".format( tabledata.table_name, tabledata.header_list)) attr_name_list = self.__sanitize_attr_name_list(tabledata.header_list) try: self.__validate_attr_name_list(attr_name_list) except pathvalidate.ReservedNameError: pass if typepy.is_empty_sequence(tabledata.value_matrix): raise ValueError("input data is null: '{} ({})'".format( tabledata.table_name, ", ".join(attr_name_list))) self.__verify_value_matrix(attr_name_list, tabledata.value_matrix) self.create_table( tabledata.table_name, self.__get_attr_desc_list( attr_name_list, tabledata.value_matrix)) self.insert_many(tabledata.table_name, tabledata.value_matrix) if typepy.is_not_empty_sequence(index_attr_list): self.create_index_list( tabledata.table_name, self.__sanitize_attr_name_list(index_attr_list)) self.commit()
def _validate_empty_header(self): """ :raises pytablewriter.EmptyHeaderError: If the |header_list| is empty. """ if typepy.is_empty_sequence(self.header_list): raise EmptyHeaderError("header_list expected to have one or more header names")
def _to_data_matrix(self): from collections import OrderedDict data_matrix = [] for row_idx, row in enumerate(self._ltsv_input_stream): if typepy.is_empty_sequence(row): continue ltsv_record = OrderedDict() for col_idx, ltsv_item in enumerate(row.strip().split("\t")): try: label, value = ltsv_item.split(":") except ValueError: raise DataError( "invalid lstv item found: line={}, col={}, item='{}'". format(row_idx, col_idx, ltsv_item)) label = label.strip('"') try: pv.validate_ltsv_label(label) except (pv.NullNameError, pv.InvalidCharError): raise InvalidHeaderNameError( "invalid label found (acceptable chars are [0-9A-Za-z_.-]): " "line={}, col={}, label='{}'".format( row_idx, col_idx, label)) ltsv_record[label] = value data_matrix.append(ltsv_record) # using generator to prepare for future enhancement to support # iterative load. yield data_matrix
def to_table_data(self): if typepy.is_empty_sequence(self._loader.header_list): header_list = self._source_data[0] if any([typepy.is_null_string(header) for header in header_list]): raise InvalidDataError( "the first line includes empty string item." "all of the items should contain header name." "actual={}".format(header_list)) data_matrix = self._source_data[1:] else: header_list = self._loader.header_list data_matrix = self._source_data if not data_matrix: raise InvalidDataError( "data row must be greater or equal than one") self._loader.inc_table_count() yield TableData(self._loader.make_table_name(), header_list, data_matrix, quoting_flags=self._loader.quoting_flags)
def to_dp_list(self, values: Sequence) -> List[DataProperty]: if is_empty_sequence(values): return [] self.__update_dp_converter() return self._to_dp_list(values)
def write_table(self, **kwargs) -> None: """ |write_table| with `Labeled Tab-separated Values (LTSV) <http://ltsv.org/>`__ format. Invalid characters in labels/data are removed. :Example: :ref:`example-ltsv-table-writer` """ with self._logger: self._verify_property() self._preprocess() for values in self._table_value_matrix: ltsv_item_list = [ f"{pathvalidate.sanitize_ltsv_label(header_name):s}:{value}" for header_name, value in zip(self.headers, values) if typepy.is_not_null_string(value) ] if typepy.is_empty_sequence(ltsv_item_list): continue self._write_line("\t".join(ltsv_item_list))
def file(ctx, files, format_name, encoding): """ Convert tabular data within CSV/Excel/HTML/JSON/Jupyter Notebook/LDJSON/LTSV/Markdown/Mediawiki/SQLite/SSV/TSV file(s) to a SQLite database file. """ if typepy.is_empty_sequence(files): sys.exit(ExitCode.NO_INPUT) logger = make_logger("{:s} file".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) con, is_create_db = create_database(ctx.obj[Context.OUTPUT_PATH], ctx.obj[Context.DUP_DATABASE]) converter = FileConverter( logger=logger, con=con, symbol_replace_value=ctx.obj[Context.SYMBOL_REPLACE_VALUE], index_list=ctx.obj.get(Context.INDEX_LIST), verbosity_level=ctx.obj.get(Context.VERBOSITY_LEVEL), format_name=format_name, encoding=encoding, ) for file_path in files: converter.convert(file_path) sys.exit(finalize(con, converter, is_create_db))
def _verify_property(self): self._verify_table_name() self._verify_stream() if all([ typepy.is_empty_sequence(self.header_list), typepy.is_empty_sequence(self.value_matrix), typepy.is_empty_sequence(self._table_value_dp_matrix), ]): raise EmptyTableDataError() self._verify_header() try: self._verify_value_matrix() except EmptyValueError: pass
def _to_dp_list(self, data_list, type_hint=None, strip_str=None, strict_level_map=None): if is_empty_sequence(data_list): return [] type_counter = Counter() dp_list = [] for data in data_list: expect_type_hint = type_hint if type_hint is None: try: expect_type_hint, _count = type_counter.most_common(1)[0] if not expect_type_hint(data, strict_level=StrictLevel.MAX).is_type(): expect_type_hint = None except IndexError: pass dataprop = self.__to_dp( data=data, type_hint=expect_type_hint, strip_str=strip_str, strict_level_map=strict_level_map, ) type_counter[dataprop.type_class] += 1 dp_list.append(dataprop) return dp_list
def as_tuple(self): """ :return: Rows of the table. :rtype: list of |namedtuple| :Sample Code: .. code:: python from tabledata import TableData records = TableData( "sample", ["a", "b"], [[1, 2], [3.3, 4.4]] ).as_tuple() for record in records: print(record) :Output: .. code-block:: none Row(a=1, b=2) Row(a=Decimal('3.3'), b=Decimal('4.4')) """ Row = namedtuple("Row", self.headers) for value_dp_list in self.value_dp_matrix: if typepy.is_empty_sequence(value_dp_list): continue row = Row(*[value_dp.data for value_dp in value_dp_list]) yield row
def __strip_empty_col(self): from simplesqlite import connect_sqlite_db_mem from simplesqlite.sqlquery import SqlQuery con = connect_sqlite_db_mem() tmp_table_name = "tmp" header_list = [ "a{:d}".format(i) for i in range(len(self.__all_values[0])) ] con.create_table_from_data_matrix(table_name=tmp_table_name, attr_name_list=header_list, data_matrix=self.__all_values) for col_idx, header in enumerate(header_list): result = con.select(select=SqlQuery.to_attr_str(header), table_name=tmp_table_name) if any([ typepy.is_not_null_string(record[0]) for record in result.fetchall() ]): break strip_header_list = header_list[col_idx:] if typepy.is_empty_sequence(strip_header_list): raise ValueError() result = con.select(select=",".join( SqlQuery.to_attr_str_list(strip_header_list)), table_name=tmp_table_name) self.__all_values = result.fetchall()
def to_dp_list(self, value_list): if is_empty_sequence(value_list): return [] self.__update_dp_converter() return self._to_dp_list(value_list, strip_str=self.strip_str_value)
def _write_row(self, value_list): if typepy.is_empty_sequence(value_list): return self._write_line(self.char_left_side_row + self.column_delimiter.join(value_list) + self.char_right_side_row)
def is_empty_record(self): """ :return: |True| if the data :py:attr:`.value_matrix` is empty. :rtype: bool """ return typepy.is_empty_sequence(self.value_matrix)
def to_table_data(self): if typepy.is_empty_sequence(self._loader.headers): headers = self._source_data[0] if any([typepy.is_null_string(header) for header in headers]): raise DataError("the first line includes empty string item." "all of the items should contain header name." "actual={}".format(headers)) data_matrix = self._source_data[1:] else: headers = self._loader.headers data_matrix = self._source_data if not data_matrix: raise DataError("data row must be greater or equal than one") self._loader.inc_table_count() yield TableData( self._loader.make_table_name(), headers, data_matrix, dp_extractor=self._loader.dp_extractor, type_hints=self._extract_type_hints(headers), )
def _validate_empty_header(self) -> None: """ Raises: ValueError: If the |headers| is empty. """ if typepy.is_empty_sequence(self.headers): raise ValueError("headers expected to have one or more header names")
def _write_header(self) -> None: if not self.is_write_header: return if typepy.is_empty_sequence(self._table_headers): raise ValueError("header is empty") self._write_row(HEADER_ROW, self._table_headers)
def _write_header(self): if not self.is_write_header: return if typepy.is_empty_sequence(self._table_header_list): raise EmptyHeaderError("header is empty") self._write_row(self._table_header_list)
def url(ctx, url, format_name, encoding, proxy): """ Scrape tabular data from a URL and convert data to a SQLite database file. """ if typepy.is_empty_sequence(url): sys.exit(ExitCode.NO_INPUT) initialize_log_handler(ctx.obj[Context.LOG_LEVEL]) logger = make_logger("{:s} url".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) try: app_configs = app_config_mgr.load() except ValueError as e: logger.debug(msgfy.to_debug_message(e)) app_configs = {} if typepy.is_empty_sequence(encoding): encoding = app_configs.get(ConfigKey.DEFAULT_ENCODING) logger.debug("use default encoding: {}".format(encoding)) if typepy.is_null_string(proxy): proxy = app_configs.get(ConfigKey.PROXY_SERVER) convert_configs = load_convert_config(logger, ctx.obj[Context.CONVERT_CONFIG], subcommand="url") con, is_create_db = create_database(ctx.obj[Context.OUTPUT_PATH], ctx.obj[Context.DUP_DATABASE]) converter = UrlConverter( logger=logger, con=con, symbol_replace_value=ctx.obj[Context.SYMBOL_REPLACE_VALUE], add_pri_key_name=ctx.obj[Context.ADD_PRIMARY_KEY_NAME], convert_configs=convert_configs, index_list=ctx.obj.get(Context.INDEX_LIST), is_type_inference=ctx.obj[Context.TYPE_INFERENCE], is_type_hint_header=ctx.obj[Context.TYPE_HINT_HEADER], verbosity_level=ctx.obj.get(Context.VERBOSITY_LEVEL), format_name=format_name, encoding=encoding, proxy=proxy, ) converter.convert(url) sys.exit(finalize(con, converter, is_create_db))
def url(ctx, url, format_name, output_path, encoding, proxy): """ Scrape tabular data from a URL and convert data to a SQLite database file. """ if typepy.is_empty_sequence(url): sys.exit(ExitCode.NO_INPUT) con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) schema_extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = make_logger("{:s} url".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) if typepy.is_null_string(proxy): proxy = app_config_manager.load().get(ConfigKey.PROXY_SERVER) proxies = { "http": proxy, "https": proxy, } try: loader = create_url_loader(logger, url, format_name, encoding, proxies) except ptr.LoaderNotFoundError as e: try: loader = create_url_loader(logger, url, "html", encoding, proxies) except ptr.LoaderNotFoundError as e: logger.error(e) sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND) try: for tabledata in loader.load(): sqlite_tabledata = ptr.SQLiteTableDataSanitizer( tabledata).sanitize() try: TableCreator(dst_con=con, tabledata=sqlite_tabledata).create() result_counter.inc_success() except (ValueError) as e: logger.debug(u"url={}, message={}".format(url, str(e))) result_counter.inc_fail() continue logger.info( get_success_message( verbosity_level, url, schema_extractor.get_table_schema_text( sqlite_tabledata.table_name).strip())) except ptr.InvalidDataError as e: logger.error(u"invalid data: url={}, message={}".format(url, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def __to_record_list(self, record_list): """ Convert matrix to records """ if typepy.is_empty_sequence(self.header_list): return record_list return [self.__to_record(record) for record in record_list]
def __init__(self, table, attrs): validate_table_name(table) if not isinstance(attrs, AttrList): raise TypeError("attr must be a AttrList class instance: actual={}".format(type(attrs))) if typepy.is_empty_sequence(attrs): raise ValueError("empty attributes") self.__table = table self.__attrs = attrs
def file(ctx, files, recursive, pattern, exclude, follow_symlinks, format_name, encoding): """ Convert tabular data within CSV/Excel/HTML/JSON/Jupyter Notebook/LDJSON/LTSV/Markdown/Mediawiki/SQLite/SSV/TSV file(s) or named pipes to a SQLite database file. """ initialize_log_handler(ctx.obj[Context.LOG_LEVEL]) logger = make_logger("{:s} file".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) if typepy.is_empty_sequence(files): logger.error("require at least one file specification.\n\n{}".format(ctx.get_help())) sys.exit(ExitCode.NO_INPUT) convert_configs = load_convert_config( logger, ctx.obj[Context.CONVERT_CONFIG], subcommand="file" ) con, is_create_db = create_database(ctx.obj[Context.OUTPUT_PATH], ctx.obj[Context.DUP_DATABASE]) converter = FileConverter( logger=logger, con=con, symbol_replace_value=ctx.obj[Context.SYMBOL_REPLACE_VALUE], add_pri_key_name=ctx.obj[Context.ADD_PRIMARY_KEY_NAME], convert_configs=convert_configs, index_list=ctx.obj.get(Context.INDEX_LIST), is_type_inference=ctx.obj[Context.TYPE_INFERENCE], is_type_hint_header=ctx.obj[Context.TYPE_HINT_HEADER], verbosity_level=ctx.obj.get(Context.VERBOSITY_LEVEL), format_name=format_name, encoding=encoding, exclude_pattern=exclude, follow_symlinks=follow_symlinks, ) for file_path in files: dir_path_obj = path.Path(file_path) if not follow_symlinks and dir_path_obj.islink() and dir_path_obj.isdir(): logger.debug( "skip symlink to a directory: {} -> {}".format( dir_path_obj, dir_path_obj.readlink() ) ) continue if recursive and dir_path_obj.isdir(): for file_path_obj in dir_path_obj.walkfiles(pattern): converter.convert(file_path_obj) else: converter.convert(file_path) sys.exit(finalize(con, converter, is_create_db))
def has_attr_list(self, table_name, attr_name_list): """ :param str table_name: Table name that attributes exists. :param str attr_name_list: Attribute names to tested. :return: |True| if the table has all of the attribute. :rtype: bool :raises simplesqlite.TableNotFoundError: |raises_verify_table_existence| :Sample Code: .. code:: python import simplesqlite table_name = "sample_table" con = simplesqlite.SimpleSQLite("sample.sqlite", "w") con.create_table_from_data_matrix( table_name=table_name, attr_name_list=["attr_a", "attr_b"], data_matrix=[[1, "a"], [2, "b"]]) print(con.has_attr_list(table_name, ["attr_a"])) print(con.has_attr_list(table_name, ["attr_a", "attr_b"])) print(con.has_attr_list( table_name, ["attr_a", "attr_b", "not_existing"])) try: print(con.has_attr("not_existing", ["attr_a"])) except simplesqlite.TableNotFoundError as e: print(e) :Output: .. parsed-literal:: True True False 'not_existing' table not found in /tmp/sample.sqlite """ if typepy.is_empty_sequence(attr_name_list): return False not_exist_field_list = [ attr_name for attr_name in attr_name_list if not self.has_attr(table_name, attr_name) ] if not_exist_field_list: return False return True
def _write_header(self): if not self.is_write_header or typepy.is_empty_sequence(self.headers): return header_format_props = self.format_table.get(self.TableFormat.HEADER, self.default_format) header_format = self.__add_format(header_format_props) self.stream.write_row( row=self.first_header_row, col=0, data=self.headers, cell_format=header_format ) for row in range(self.first_header_row, self.last_header_row): self.stream.write_row( row=row, col=0, data=[""] * len(self.headers), cell_format=header_format )
def _write_header(self): tags = _get_tags_module() if not self.is_write_header: return if typepy.is_empty_sequence(self.headers): raise EmptyHeaderError("headers is empty") tr_tag = tags.tr() for header in self.headers: tr_tag += tags.th(MultiByteStrDecoder(header).unicode_str) thead_tag = tags.thead() thead_tag += tr_tag self._table_tag += thead_tag
def insert_many(self, table_name, insert_record_list): """ Send an INSERT query with multiple records to the database. :param str table: Table name of executing the query. :param insert_record: Records to be inserted. :type insert_record: |dict|/|namedtuple|/|list|/|tuple| :raises IOError: |raises_write_permission| :raises simplesqlite.NullDatabaseConnectionError: |raises_check_connection| :raises simplesqlite.TableNotFoundError: |raises_verify_table_existence| :raises simplesqlite.OperationalError: |raises_operational_error| :Example: :ref:`example-insert-records` .. seealso:: :py:meth:`.sqlquery.SqlQuery.make_insert` """ self.validate_access_permission(["w", "a"]) self.verify_table_existence(table_name) logger.debug("insert {} records".format( len(insert_record_list) if insert_record_list else 0)) if typepy.is_empty_sequence(insert_record_list): return record_list = RecordConvertor.to_record_list( self.get_attr_name_list(table_name), insert_record_list) query = SqlQuery.make_insert(table_name, record_list[0]) try: self.connection.executemany(query, record_list) except sqlite3.OperationalError as e: caller = logging.getLogger().findCaller() file_path, line_no, func_name = caller[:3] raise OperationalError( "{:s}({:d}) {:s}: failed to execute query:\n".format( file_path, line_no, func_name) + " query={}\n".format(query) + " msg='{}'\n".format(str(e)) + " db={}\n".format(self.database_path) + " records={}\n".format(record_list[:2]))
def _normalize_headers(self): if typepy.is_empty_sequence(self._tabledata.headers): try: return [ self.__get_default_header(col_idx) for col_idx in range(len(self._tabledata.rows[0])) ] except IndexError: raise DataError("header list and data body are empty") attr_name_list = AttrList.sanitize( super(SQLiteTableDataSanitizer, self)._normalize_headers() ) try: for attr_name in attr_name_list: validate_sqlite_attr_name(attr_name) except ReservedNameError: pass # duplicated attribute name handling --- for key, count in Counter(attr_name_list).most_common(): if count <= 1: continue if self.__dup_col_handler == "error": raise ValueError("duplicate column name: {}".format(key)) # rename duplicate headers rename_target_idx_list = [i for i, attr in enumerate(attr_name_list) if attr == key][1:] suffix_count = 0 for rename_target_idx in rename_target_idx_list: while True: suffix_count += 1 attr_name_candidate = "{:s}_{:d}".format(key, suffix_count) if attr_name_candidate in attr_name_list: continue attr_name_list[rename_target_idx] = attr_name_candidate break return attr_name_list
def create_index_list(self, table_name, attr_name_list): """ :param str table_name: Table name that exists attribute. :param list attr_name_list: List of attribute names to create indices. Ignore attributes that are not existing in the table. .. seealso:: :py:meth:`.create_index` """ self.validate_access_permission(["w", "a"]) if typepy.is_empty_sequence(attr_name_list): return table_attr_set = set(self.get_attr_name_list(table_name)) index_attr_set = set(attr_name_list) for attribute in list(table_attr_set.intersection(index_attr_set)): self.create_index(table_name, attribute)
def make_insert(cls, table, insert_tuple): """ Make INSERT query. :param str table: Table name of executing the query. :param list/tuple insert_tuple: Insertion data. :return: Query of SQLite. :rtype: str :raises ValueError: If ``insert_tuple`` is empty |list|/|tuple|. :raises simplesqlite.InvalidTableNameError: |raises_validate_table_name| """ validate_table_name(table) table = cls.to_table_str(table) if typepy.is_empty_sequence(insert_tuple): raise ValueError("empty insert list/tuple") return "INSERT INTO {:s} VALUES ({:s})".format( table, ",".join(['?' for _i in insert_tuple]))
def __validate_attr_name_list(attr_name_list): if typepy.is_empty_sequence(attr_name_list): raise InvalidAttributeNameError("attribute name list is empty") for attr_name in attr_name_list: pathvalidate.validate_sqlite_attr_name(attr_name)
def _write_header(self): if not self.is_write_header or typepy.is_empty_sequence(self.headers): return for col, value in enumerate(self.headers): self.stream.write(self.first_header_row, col, value)
def _write_header(self): if typepy.is_empty_sequence(self.headers): return super(CsvTableWriter, self)._write_header()
def test_normal(self, con): profile_list = con.get_profile() assert typepy.is_empty_sequence(profile_list)
def __validate_stats_body(self, body_line_list): if typepy.is_empty_sequence(body_line_list): raise ParseError(reason=ParseErrorReason.EMPTY_STATISTICS)
def _validate_headers(self): if typepy.is_empty_sequence(self._tabledata.headers): raise ValueError("attribute name list is empty") for header in self._tabledata.headers: self._validate_header(header)