if __name__ == '__main__':
    from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser

    from tests.comparison import cli_options

    parser = ArgumentParser(
        usage='usage: \n'
        '  %(prog)s [options] [populate]\n\n'
        '     Create and populate database(s). The Impala database will always be \n'
        '     included. Postgres is optional. The other databases are not supported.\n\n'
        '  %(prog)s [options] migrate\n\n'
        '     Migrate an Impala database to another database type. The destination \n'
        '     database will be dropped and recreated.',
        formatter_class=ArgumentDefaultsHelpFormatter)
    cli_options.add_logging_options(parser)
    cli_options.add_cluster_options(parser)
    cli_options.add_db_name_option(parser)
    cli_options.add_connection_option_groups(parser)

    group = parser.add_argument_group('Database Population Options')
    group.add_argument(
        '--randomization-seed',
        default=1,
        type=int,
        help=
        'The randomization will be initialized with this seed. Using the same seed '
        'will produce the same results across runs.')
    cli_options.add_storage_format_options(group)
    group.add_argument(
        '--create-data-files',
        default=False,
      impala.execute(query_str)

def get_test_file_path(workload):
  if "IMPALA_HOME" not in os.environ:
    raise Exception("IMPALA_HOME must be set")
  sql_file_path = os.path.join(os.environ["IMPALA_HOME"], "testdata", "datasets",
      workload, "%s_kudu_template.sql" % (workload))
  return sql_file_path

if __name__ == "__main__":
  from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
  import tests.comparison.cli_options as cli_options

  parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
  cli_options.add_logging_options(parser)
  cli_options.add_cluster_options(parser)
  parser.add_argument("-s", "--source-db", required=True,
      help="Source DB to load data from.")
  parser.add_argument("-t", "--target-db", required=True,
      help="Target DB to load data to.")
  parser.add_argument("-w", "--workload", choices=['tpch', 'tpcds'],
      required=True)
  parser.add_argument("--kudu_master", required=True,
      help="Address or host name of Kudu master")
  # TODO: Automatically set #buckets as a function of cluster nodes and/or
  # scale
  parser.add_argument("-b", "--buckets", default="9",
      help="Number of buckets to partition Kudu tables (only for hash-based).")
  parser.add_argument("-v", "--verbose", action='store_true',
      help="Print the executed statements.")
  parser.add_argument("--clean", action='store_true',
Exemple #3
0
                continue
            LOG.info("Executing: {0}".format(stmt))
            hive.execute(stmt)

    with cluster.impala.cursor(db_name=target_db) as impala:
        impala.invalidate_metadata()
        impala.compute_stats()

    LOG.info("Done loading nested TPCH data")


if __name__ == "__main__":

    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
    cli_options.add_logging_options(parser)
    cli_options.add_cluster_options(
        parser)  # --cm-host and similar args added here
    cli_options.add_kerberos_options(parser)
    cli_options.add_ssl_options(parser)

    parser.add_argument("-s", "--source-db", default="tpch_parquet")
    parser.add_argument("-t", "--target-db", default="tpch_nested_parquet")
    parser.add_argument("-c", "-p", "--chunks", type=int, default=1)

    args = parser.parse_args()

    cli_options.configure_logging(args.log_level,
                                  debug_log_file=args.debug_log_file)

    cluster = cli_options.create_cluster(args)
    source_db = args.source_db
    target_db = args.target_db
      LOG.info("Executing: {0}".format(stmt))
      impala.execute(stmt)

    impala.invalidate_metadata(table_name="customer")
    impala.invalidate_metadata(table_name="part")
    impala.invalidate_metadata(table_name="region")
    impala.invalidate_metadata(table_name="supplier")
    impala.compute_stats()

  LOG.info("Done loading nested TPCH data")

if __name__ == "__main__":

  parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
  cli_options.add_logging_options(parser)
  cli_options.add_cluster_options(parser)  # --cm-host and similar args added here
  cli_options.add_kerberos_options(parser)
  cli_options.add_ssl_options(parser)

  parser.add_argument("-s", "--source-db", default="tpch_parquet")
  parser.add_argument("-t", "--target-db", default="tpch_nested_parquet")
  parser.add_argument("-f", "--table-format", default="parquet/none")  # can be "orc/def"
  parser.add_argument("-c", "-p", "--chunks", type=int, default=1)

  args = parser.parse_args()

  cli_options.configure_logging(args.log_level, debug_log_file=args.debug_log_file)

  cluster = cli_options.create_cluster(args)
  source_db = args.source_db
  target_db = args.target_db