예제 #1
0
            hive.execute(stmt)

    with cluster.impala.cursor(db_name=target_db) as impala:
        impala.invalidate_metadata()
        impala.compute_stats()

    LOG.info("Done loading nested TPCH data")


if __name__ == "__main__":

    parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
    cli_options.add_logging_options(parser)
    cli_options.add_cluster_options(
        parser)  # --cm-host and similar args added here
    cli_options.add_kerberos_options(parser)
    cli_options.add_ssl_options(parser)

    parser.add_argument("-s", "--source-db", default="tpch_parquet")
    parser.add_argument("-t", "--target-db", default="tpch_nested_parquet")
    parser.add_argument("-c", "-p", "--chunks", type=int, default=1)

    args = parser.parse_args()

    cli_options.configure_logging(args.log_level,
                                  debug_log_file=args.debug_log_file)

    cluster = cli_options.create_cluster(args)
    source_db = args.source_db
    target_db = args.target_db
    chunks = args.chunks
예제 #2
0
      impala.execute(stmt)

    impala.invalidate_metadata(table_name="customer")
    impala.invalidate_metadata(table_name="part")
    impala.invalidate_metadata(table_name="region")
    impala.invalidate_metadata(table_name="supplier")
    impala.compute_stats()

  LOG.info("Done loading nested TPCH data")

if __name__ == "__main__":

  parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
  cli_options.add_logging_options(parser)
  cli_options.add_cluster_options(parser)  # --cm-host and similar args added here
  cli_options.add_kerberos_options(parser)
  cli_options.add_ssl_options(parser)

  parser.add_argument("-s", "--source-db", default="tpch_parquet")
  parser.add_argument("-t", "--target-db", default="tpch_nested_parquet")
  parser.add_argument("-f", "--table-format", default="parquet/none")  # can be "orc/def"
  parser.add_argument("-c", "-p", "--chunks", type=int, default=1)

  args = parser.parse_args()

  cli_options.configure_logging(args.log_level, debug_log_file=args.debug_log_file)

  cluster = cli_options.create_cluster(args)
  source_db = args.source_db
  target_db = args.target_db
  file_format, compression_value = args.table_format.split("/")