if __name__ == '__main__': from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser from tests.comparison import cli_options parser = ArgumentParser( usage='usage: \n' ' %(prog)s [options] [populate]\n\n' ' Create and populate database(s). The Impala database will always be \n' ' included. Postgres is optional. The other databases are not supported.\n\n' ' %(prog)s [options] migrate\n\n' ' Migrate an Impala database to another database type. The destination \n' ' database will be dropped and recreated.', formatter_class=ArgumentDefaultsHelpFormatter) cli_options.add_logging_options(parser) cli_options.add_cluster_options(parser) cli_options.add_db_name_option(parser) cli_options.add_connection_option_groups(parser) group = parser.add_argument_group('Database Population Options') group.add_argument( '--randomization-seed', default=1, type=int, help= 'The randomization will be initialized with this seed. Using the same seed ' 'will produce the same results across runs.') cli_options.add_storage_format_options(group) group.add_argument( '--create-data-files', default=False,
impala.execute(query_str) def get_test_file_path(workload): if "IMPALA_HOME" not in os.environ: raise Exception("IMPALA_HOME must be set") sql_file_path = os.path.join(os.environ["IMPALA_HOME"], "testdata", "datasets", workload, "%s_kudu_template.sql" % (workload)) return sql_file_path if __name__ == "__main__": from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser import tests.comparison.cli_options as cli_options parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) cli_options.add_logging_options(parser) cli_options.add_cluster_options(parser) parser.add_argument("-s", "--source-db", required=True, help="Source DB to load data from.") parser.add_argument("-t", "--target-db", required=True, help="Target DB to load data to.") parser.add_argument("-w", "--workload", choices=['tpch', 'tpcds'], required=True) parser.add_argument("--kudu_master", required=True, help="Address or host name of Kudu master") # TODO: Automatically set #buckets as a function of cluster nodes and/or # scale parser.add_argument("-b", "--buckets", default="9", help="Number of buckets to partition Kudu tables (only for hash-based).") parser.add_argument("-v", "--verbose", action='store_true', help="Print the executed statements.") parser.add_argument("--clean", action='store_true',
continue LOG.info("Executing: {0}".format(stmt)) hive.execute(stmt) with cluster.impala.cursor(db_name=target_db) as impala: impala.invalidate_metadata() impala.compute_stats() LOG.info("Done loading nested TPCH data") if __name__ == "__main__": parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) cli_options.add_logging_options(parser) cli_options.add_cluster_options( parser) # --cm-host and similar args added here cli_options.add_kerberos_options(parser) cli_options.add_ssl_options(parser) parser.add_argument("-s", "--source-db", default="tpch_parquet") parser.add_argument("-t", "--target-db", default="tpch_nested_parquet") parser.add_argument("-c", "-p", "--chunks", type=int, default=1) args = parser.parse_args() cli_options.configure_logging(args.log_level, debug_log_file=args.debug_log_file) cluster = cli_options.create_cluster(args) source_db = args.source_db target_db = args.target_db
LOG.info("Executing: {0}".format(stmt)) impala.execute(stmt) impala.invalidate_metadata(table_name="customer") impala.invalidate_metadata(table_name="part") impala.invalidate_metadata(table_name="region") impala.invalidate_metadata(table_name="supplier") impala.compute_stats() LOG.info("Done loading nested TPCH data") if __name__ == "__main__": parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) cli_options.add_logging_options(parser) cli_options.add_cluster_options(parser) # --cm-host and similar args added here cli_options.add_kerberos_options(parser) cli_options.add_ssl_options(parser) parser.add_argument("-s", "--source-db", default="tpch_parquet") parser.add_argument("-t", "--target-db", default="tpch_nested_parquet") parser.add_argument("-f", "--table-format", default="parquet/none") # can be "orc/def" parser.add_argument("-c", "-p", "--chunks", type=int, default=1) args = parser.parse_args() cli_options.configure_logging(args.log_level, debug_log_file=args.debug_log_file) cluster = cli_options.create_cluster(args) source_db = args.source_db target_db = args.target_db