if command not in ('populate', 'migrate'): raise Exception( 'Command must either be "populate" or "migrate" but was "%s"' % command) if command == 'migrate' and \ not any((args.use_mysql, args.use_postgresql, args.use_oracle)): raise Exception( 'At least one destination database must be chosen with ' '--use-<database type>') cli_options.configure_logging(args.log_level, debug_log_file=args.debug_log_file) seed(args.randomization_seed) cluster = cli_options.create_cluster(args) populator = DbPopulator( db_connection.HIVE if args.use_hive else db_connection.IMPALA) if command == 'populate': populator.randomization_seed = args.randomization_seed populator.cluster = cluster populator.db_name = args.db_name populator.min_col_count = args.min_column_count populator.max_col_count = args.max_column_count populator.min_row_count = args.min_row_count populator.max_row_count = args.max_row_count populator.allowed_storage_formats = args.storage_file_formats.split( ',') if args.use_hive:
cli_options.add_logging_options(parser) cli_options.add_cluster_options(parser) parser.add_argument("-s", "--source-db", required=True, help="Source DB to load data from.") parser.add_argument("-t", "--target-db", required=True, help="Target DB to load data to.") parser.add_argument("-w", "--workload", choices=['tpch', 'tpcds'], required=True) parser.add_argument("--kudu_master", required=True, help="Address or host name of Kudu master") # TODO: Automatically set #buckets as a function of cluster nodes and/or # scale parser.add_argument("-b", "--buckets", default="9", help="Number of buckets to partition Kudu tables (only for hash-based).") parser.add_argument("-v", "--verbose", action='store_true', help="Print the executed statements.") parser.add_argument("--clean", action='store_true', help="Drop all tables in the speficied target database.") args = parser.parse_args() cli_options.configure_logging(args.log_level, debug_log_file=args.debug_log_file) cluster = cli_options.create_cluster(args) source_db = args.source_db target_db = args.target_db buckets = args.buckets kudu_master = args.kudu_master workload = args.workload verbose = args.verbose if args.clean: clean_data() load_data()