hive.execute(stmt) with cluster.impala.cursor(db_name=target_db) as impala: impala.invalidate_metadata() impala.compute_stats() LOG.info("Done loading nested TPCH data") if __name__ == "__main__": parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) cli_options.add_logging_options(parser) cli_options.add_cluster_options( parser) # --cm-host and similar args added here cli_options.add_kerberos_options(parser) cli_options.add_ssl_options(parser) parser.add_argument("-s", "--source-db", default="tpch_parquet") parser.add_argument("-t", "--target-db", default="tpch_nested_parquet") parser.add_argument("-c", "-p", "--chunks", type=int, default=1) args = parser.parse_args() cli_options.configure_logging(args.log_level, debug_log_file=args.debug_log_file) cluster = cli_options.create_cluster(args) source_db = args.source_db target_db = args.target_db chunks = args.chunks
impala.execute(stmt) impala.invalidate_metadata(table_name="customer") impala.invalidate_metadata(table_name="part") impala.invalidate_metadata(table_name="region") impala.invalidate_metadata(table_name="supplier") impala.compute_stats() LOG.info("Done loading nested TPCH data") if __name__ == "__main__": parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) cli_options.add_logging_options(parser) cli_options.add_cluster_options(parser) # --cm-host and similar args added here cli_options.add_kerberos_options(parser) cli_options.add_ssl_options(parser) parser.add_argument("-s", "--source-db", default="tpch_parquet") parser.add_argument("-t", "--target-db", default="tpch_nested_parquet") parser.add_argument("-f", "--table-format", default="parquet/none") # can be "orc/def" parser.add_argument("-c", "-p", "--chunks", type=int, default=1) args = parser.parse_args() cli_options.configure_logging(args.log_level, debug_log_file=args.debug_log_file) cluster = cli_options.create_cluster(args) source_db = args.source_db target_db = args.target_db file_format, compression_value = args.table_format.split("/")