def test_smoke(self, tmpdir, filename): p = tmpdir.join("tmp.db") con = SimpleSQLite(str(p), "w") test_data_file_path = os.path.join( os.path.dirname(__file__), "data", filename) loader = ptr.TableFileLoader(test_data_file_path) success_count = 0 for tabledata in loader.load(): if tabledata.is_empty(): continue print(ptw.dump_tabledata(tabledata)) try: con.create_table_from_tabledata( ptr.SQLiteTableDataSanitizer(tabledata).sanitize()) success_count += 1 except ValueError as e: print(e) con.commit() assert success_count > 0
def gs(ctx, credentials, title, output_path): """ Convert a spreadsheet in Google Sheets to a SQLite database file. CREDENTIALS: OAuth2 Google credentials file. TITLE: Title of the Google Sheets to convert. """ con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) schema_extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = make_logger("{:s} gs".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) table_creator = TableCreator(logger=logger, dst_con=con) loader = ptr.GoogleSheetsTableLoader() loader.source = credentials loader.title = title # if typepy.is_null_string(loader.source): # loader.source = app_config_manager.load().get( # ConfigKey.GS_CREDENTIALS_FILE_PATH) try: for tabledata in loader.load(): logger.debug(u"loaded tabledata: {}".format( six.text_type(tabledata))) sqlite_tabledata = ptr.SQLiteTableDataSanitizer( tabledata).sanitize() try: table_creator.create(sqlite_tabledata, ctx.obj.get(Context.INDEX_LIST)) except (ptr.ValidationError, ptr.InvalidDataError): result_counter.inc_fail() logger.info( get_success_message( verbosity_level, "google sheets", schema_extractor.get_table_schema_text( tabledata.table_name).strip())) except ptr.OpenError as e: logger.error(e) result_counter.inc_fail() except AttributeError: logger.error(u"invalid credentials data: path={}".format(credentials)) result_counter.inc_fail() except (ptr.ValidationError, ptr.InvalidDataError) as e: logger.error(u"invalid credentials data: path={}, message={}".format( credentials, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def url(ctx, url, format_name, output_path, encoding, proxy): """ Scrape tabular data from a URL and convert data to a SQLite database file. """ if typepy.is_empty_sequence(url): sys.exit(ExitCode.NO_INPUT) con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) schema_extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = make_logger("{:s} url".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) if typepy.is_null_string(proxy): proxy = app_config_manager.load().get(ConfigKey.PROXY_SERVER) proxies = { "http": proxy, "https": proxy, } try: loader = create_url_loader(logger, url, format_name, encoding, proxies) except ptr.LoaderNotFoundError as e: try: loader = create_url_loader(logger, url, "html", encoding, proxies) except ptr.LoaderNotFoundError as e: logger.error(e) sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND) try: for tabledata in loader.load(): sqlite_tabledata = ptr.SQLiteTableDataSanitizer( tabledata).sanitize() try: TableCreator(dst_con=con, tabledata=sqlite_tabledata).create() result_counter.inc_success() except (ValueError) as e: logger.debug(u"url={}, message={}".format(url, str(e))) result_counter.inc_fail() continue logger.info( get_success_message( verbosity_level, url, schema_extractor.get_table_schema_text( sqlite_tabledata.table_name).strip())) except ptr.InvalidDataError as e: logger.error(u"invalid data: url={}, message={}".format(url, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def file(ctx, files, output_path): """ Convert tabular data within CSV/Excel/HTML/JSON/LTSV/Markdown/SQLite/TSV file(s) to a SQLite database file. """ if typepy.is_empty_sequence(files): sys.exit(ExitCode.NO_INPUT) con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) schema_extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = make_logger("{:s} file".format(PROGRAM_NAME), ctx.obj[Context.LOG_LEVEL]) table_creator = TableCreator(logger=logger, dst_con=con) for file_path in files: file_path = path.Path(file_path) if not file_path.isfile(): logger.error(u"file not found: {}".format(file_path)) result_counter.inc_fail() continue if file_path == output_path: logger.warn( u"skip a file which has the same path as the output file ({})". format(file_path)) continue logger.debug(u"converting '{}'".format(file_path)) try: loader = ptr.TableFileLoader(file_path) except ptr.InvalidFilePathError as e: logger.debug(e) result_counter.inc_fail() continue except ptr.LoaderNotFoundError: logger.debug( u"loader not found that coincide with '{}'".format(file_path)) result_counter.inc_fail() continue try: for tabledata in loader.load(): logger.debug(u"loaded tabledata: {}".format( six.text_type(tabledata))) sqlite_tabledata = ptr.SQLiteTableDataSanitizer( tabledata).sanitize() try: table_creator.create(sqlite_tabledata, ctx.obj.get(Context.INDEX_LIST)) result_counter.inc_success() except (ValueError, IOError) as e: logger.debug(u"path={}, message={}".format(file_path, e)) result_counter.inc_fail() continue logger.info( get_success_message( verbosity_level, file_path, schema_extractor.get_table_schema_text( sqlite_tabledata.table_name).strip())) except ptr.OpenError as e: logger.error(u"open error: file={}, message='{}'".format( file_path, str(e))) result_counter.inc_fail() except ptr.ValidationError as e: logger.error(u"invalid {} data format: path={}, message={}".format( _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() except ptr.InvalidDataError as e: logger.error(u"invalid {} data: path={}, message={}".format( _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def url(ctx, url, format_name, output_path, encoding, proxy): """ Fetch data from a URL and convert data to a SQLite database file. """ if dataproperty.is_empty_sequence(url): sys.exit(ExitCode.NO_INPUT) con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = logbook.Logger("sqlitebiter url") _setup_logger_from_context(logger, ctx.obj[Context.LOG_LEVEL]) proxies = {} if dataproperty.is_not_empty_string(proxy): proxies = { "http": proxy, "https": proxy, } try: loader = ptr.TableUrlLoader(url, format_name, encoding=encoding, proxies=proxies) except ptr.LoaderNotFoundError as e: try: loader = ptr.TableUrlLoader(url, "html", encoding=encoding, proxies=proxies) except (ptr.LoaderNotFoundError, ptr.HTTPError): logger.error(e) sys.exit(ExitCode.FAILED_LOADER_NOT_FOUND) except ptr.HTTPError as e: logger.error(e) sys.exit(ExitCode.FAILED_HTTP) try: for tabledata in loader.load(): sqlite_tabledata = ptr.SQLiteTableDataSanitizer( tabledata).sanitize() try: con.create_table_from_tabledata(sqlite_tabledata) result_counter.inc_success() except (ValueError) as e: logger.debug(u"url={}, message={}".format(url, str(e))) result_counter.inc_fail() continue log_message = get_success_log_format(verbosity_level).format( url, extractor.get_table_schema_text( sqlite_tabledata.table_name).strip()) logger.info(log_message) except ptr.InvalidDataError as e: logger.error(u"invalid data: url={}, message={}".format(url, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())
def file(ctx, files, output_path): """ Convert tabular data within CSV/Excel/HTML/JSON/LTSV/Markdown/TSV file(s) to a SQLite database file. """ if dataproperty.is_empty_sequence(files): sys.exit(ExitCode.NO_INPUT) con = create_database(ctx, output_path) verbosity_level = ctx.obj.get(Context.VERBOSITY_LEVEL) extractor = get_schema_extractor(con, verbosity_level) result_counter = ResultCounter() logger = logbook.Logger("sqlitebiter file") _setup_logger_from_context(logger, ctx.obj[Context.LOG_LEVEL]) for file_path in files: file_path = path.Path(file_path) if not file_path.isfile(): logger.debug(u"file not found: {}".format(file_path)) result_counter.inc_fail() continue logger.debug(u"converting '{}'".format(file_path)) try: loader = ptr.TableFileLoader(file_path) except ptr.InvalidFilePathError as e: logger.debug(e) result_counter.inc_fail() continue except ptr.LoaderNotFoundError: logger.debug( u"loader not found that coincide with '{}'".format(file_path)) result_counter.inc_fail() continue try: for tabledata in loader.load(): sqlite_tabledata = ptr.SQLiteTableDataSanitizer( tabledata).sanitize() try: con.create_table_from_tabledata(sqlite_tabledata) result_counter.inc_success() except (ValueError, IOError) as e: logger.debug(u"path={}, message={}".format(file_path, e)) result_counter.inc_fail() continue log_message = get_success_log_format(verbosity_level).format( file_path, extractor.get_table_schema_text( sqlite_tabledata.table_name).strip()) logger.info(log_message) except ptr.OpenError as e: logger.error(u"open error: file={}, message='{}'".format( file_path, str(e))) result_counter.inc_fail() except ptr.ValidationError as e: logger.error(u"invalid {} data format: path={}, message={}".format( _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() except ptr.InvalidDataError as e: logger.error(u"invalid {} data: path={}, message={}".format( _get_format_type_from_path(file_path), file_path, str(e))) result_counter.inc_fail() write_completion_message(logger, output_path, result_counter) sys.exit(result_counter.get_return_code())