Esempio n. 1
0
def __delete_data_process(bq_client, dataset, condition, table_name_list):

    # create temp dataset
    temp_dataset_name = __create_temp_dataset(bq_client)

    # get data we need to temp dataset with the same table name
    job_table_map = {}

    print "Create tables with data remain."
    table_package_list = utils.get_table_packages(table_name_list)
    for table_name_package in table_package_list:
        for table_name in table_name_package:
            job_id = __query_out_remain_data(bq_client, table_name, condition, dataset, temp_dataset_name)
            job_table_map[job_id] = table_name

        utils.thruhold_jobs(bq_client)

    # wait job to finish
    print "Wait for all temp tables are created."
    utils.wait_all_job_finish(bq_client, job_table_map.keys())

    # TODO: validate the temp data

    # delete the origin table
    print "Delete origin tables."
    table_delete_handler._delete_table_list(bq_client, dataset, table_name_list, ignore_confirm=True)

    # copy table with the data we want to the origin dataset
    print "Copy the remaining data."
    table_copy_handler._copy_table_list(bq_client, temp_dataset_name, dataset, table_name_list, ignore_confirm=True)
Esempio n. 2
0
def _copy_table_list(
        bq_client,
        org_dataset,
        dest_dataset,
        table_name_list,
        ignore_confirm=False):
  # ask for confirmation
  if not ignore_confirm:
    print ""
    for table_name in table_name_list:
      print table_name
    print ""
    print "The [%d] tables above is going to be copied." \
        % len(table_name_list)
    print "From dataset: [%s]" % org_dataset
    print "To   dataset: [%s]" % dest_dataset
    print ""
    print "Is it ok? [y/N]"

    proceed_choices = ['yes', 'y']
    abort_choices = ['no', 'n']

    while True:
      choice = raw_input().lower()
      if choice in proceed_choices:
        break
      if choice in abort_choices:
        return
      else:
        print "Please enter [y or n]"

  # get launch packages
  package_list = utils.get_table_packages(table_name_list)

  for sub_table_name_list in package_list:
    # check running jobs
    utils.thruhold_jobs(bq_client)

    for table_name in sub_table_name_list:
      __copy_table(
          bq_client,
          org_dataset,
          dest_dataset,
          table_name,
          table_name)