def main():
    from google.cloud.bigquery import Client

    tables = {}
    with open("/vol/required_tables.txt") as rt:
        table_names = rt.read().split()
        bq_client = Client()
        for table_name in table_names:
            splited_table_name = table_name.split(".")
            if len(splited_table_name) == 3:
                dataset_ref = bq_client.dataset(splited_table_name[1],
                                                project=splited_table_name[0])
            else:
                dataset_ref = bq_client.dataset(splited_table_name[0])
            table_ref = dataset_ref.table(splited_table_name[-1])
            table = bq_client.get_table(table_ref)
            tables[table_name] = [
                field.to_api_repr() for field in table.schema
            ]
            if table_name.endswith("*"):
                tables[table_name].append({
                    "name": "_TABLE_SUFFIX",
                    "type": "STRING",
                    "mode": "REQUIRED"
                })
    with open("/vol/schema.json", mode="w") as schema:
        schema.write(json.dumps(tables))
Exemple #2
0
def _recreate_table(client: bigquery.Client, table_id: str, schema: list) -> None:
    logger.info(f'Recreating {table_id} table...')
    dataset_ref = client.dataset(DATASET_ID)
    table_ref = dataset_ref.table(table_id)
    table = bigquery.Table(table_ref, schema=schema)
    client.delete_table(table_ref, not_found_ok=True)
    client.create_table(table)
def main():
    args = get_sysargs(sys.argv[1:])
    exporter = Exporter()
    jinjafy = Jinjafy('../data/queries/marreco/datajet/')

    client = Client()
    dataset = client.dataset(args.dataset)
    table = dataset.table(args.table)

    for day in range(args.days_init, args.days_end - 1, -1):
        print('processing day: ', day)
        for idx, file_ in enumerate(['productview.sql',
                                     'search.sql',
                                     'purchase.sql']):
                    
             query = build_query(jinjafy,
                                 file_,
                                 {'dataset': '40663402', 
                                 'days_interval': day,
                                 'days_interval_end': day})

             exporter.bq_to_gcs(client,
                                query,
                                {'destination': table,
                                 'maximum_bytes_billed': 1000000000000,
                                 'write_disposition': 'WRITE_TRUNCATE'},
                                {'uri': args.uri.format(day=day, idx=idx),
                                 'table': table,
                                 'compression': 'GZIP',
                                 'destination_format': 'NEWLINE_DELIMITED_JSON'})
Exemple #4
0
        def create_view_task(ds, **kwargs):

            template_context = kwargs.copy()
            template_context['ds'] = ds
            template_context['params'] = environment

            client = Client()

            dest_table_name = '{task}'.format(task=task)
            dest_table_ref = client.dataset(
                dataset_name,
                project=destination_dataset_project_id).table(dest_table_name)
            table = Table(dest_table_ref)

            sql_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/sqls/{task}.sql'.format(task=task))
            sql_template = read_file(sql_path)
            sql = kwargs['task'].render_template('', sql_template,
                                                 template_context)
            table.view_query = sql

            description_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/descriptions/{task}.txt'.format(
                    task=task))
            table.description = read_file(description_path)
            logging.info('Creating view: ' + json.dumps(table.to_api_repr()))

            try:
                table = client.create_table(table)
            except Conflict:
                # https://cloud.google.com/bigquery/docs/managing-views
                table = client.update_table(table, ['view_query'])
            assert table.table_id == dest_table_name
Exemple #5
0
def delete_views_or_table(client: bigquery.Client, view_or_table_name: str,
                          dataset: str):
    LOGGER.debug("delete_views_or_tables: %s", view_or_table_name)
    dataset_ref = client.dataset(dataset)
    table_ref = dataset_ref.table(view_or_table_name)
    client.delete_table(table_ref)
    LOGGER.info("deleted view or table: %s", view_or_table_name)
def update_or_create_table_from_csv(
    client: bigquery.Client,
    table_name: str,
    source_file: str,
    dataset: str,
    source_schema_file: str,
):
    LOGGER.debug("update_or_create_table_from_csv: %s=%s", table_name,
                 [source_file])
    dataset_ref = client.dataset(dataset)
    table_ref = dataset_ref.table(table_name)

    job_config = LoadJobConfig()
    job_config.source_format = "CSV"
    job_config.skip_leading_rows = 1
    if Path(source_schema_file).exists():
        job_config.schema = get_table_schema(source_schema_file)
    else:
        job_config.autodetect = True
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

    with open(source_file, "rb") as source_fp:
        load_job = client.load_table_from_file(source_fp,
                                               destination=table_ref,
                                               job_config=job_config)

    # wait for job to complete
    load_job.result()

    LOGGER.info("updated config table: %s", table_ref.table_id)
Exemple #7
0
        def create_view_task():

            client = Client()

            dest_table_name = '{task}'.format(task=task)
            dest_table_ref = client.dataset(
                dataset_name,
                project=destination_dataset_project_id).table(dest_table_name)
            table = Table(dest_table_ref)

            sql_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/sqls/{task}.sql'.format(task=task))
            sql = read_file(sql_path, environment)
            table.view_query = sql

            description_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/descriptions/{task}.txt'.format(
                    task=task))
            table.description = read_file(description_path)
            logging.info('Creating view: ' + json.dumps(table.to_api_repr()))

            try:
                table = client.create_table(table)
            except Conflict:
                # https://cloud.google.com/bigquery/docs/managing-views
                table = client.update_table(table, ['view_query'])
            assert table.table_id == dest_table_name
Exemple #8
0
        def load_task():
            client = Client()
            job_config = LoadJobConfig()
            schema_path = os.path.join(
                dags_folder,
                'resources/stages/raw/schemas/{task}.json'.format(task=task))
            job_config.schema = read_bigquery_schema_from_file(schema_path)
            job_config.source_format = SourceFormat.CSV if file_format == 'csv' else SourceFormat.NEWLINE_DELIMITED_JSON
            if file_format == 'csv':
                job_config.skip_leading_rows = 1
            job_config.write_disposition = 'WRITE_TRUNCATE'
            job_config.allow_quoted_newlines = allow_quoted_newlines
            job_config.ignore_unknown_values = True

            export_location_uri = 'gs://{bucket}/export'.format(
                bucket=output_bucket)
            uri = '{export_location_uri}/{task}/*.{file_format}'.format(
                export_location_uri=export_location_uri,
                task=task,
                file_format=file_format)
            table_ref = client.dataset(dataset_name_raw).table(task)
            load_job = client.load_table_from_uri(uri,
                                                  table_ref,
                                                  job_config=job_config)
            submit_bigquery_job(load_job, job_config)
            assert load_job.state == 'DONE'
Exemple #9
0
def does_bigquery_table_exist(client: bigquery.Client, dataset_name: str,
                              table_name: str):
    dataset_ref = client.dataset(dataset_name)
    table_ref = dataset_ref.table(table_name)

    try:
        client.get_table(table_ref)
        return True
    except NotFound:
        return False
def dataset(bq: bigquery.Client, dataset_id: str):
    """Context manager for creating and deleting the BigQuery dataset for a test."""
    try:
        bq.get_dataset(dataset_id)
    except NotFound:
        bq.create_dataset(dataset_id)
    try:
        yield bq.dataset(dataset_id)
    finally:
        bq.delete_dataset(dataset_id, delete_contents=True)
Exemple #11
0
def get_temporary_dataset(client: bigquery.Client):
    """Get a cached reference to the dataset used for server-assigned destinations."""
    global temporary_dataset
    if temporary_dataset is None:
        # look up the dataset used for query results without a destination
        dry_run = bigquery.QueryJobConfig(dry_run=True)
        destination = client.query("SELECT NULL", dry_run).destination
        temporary_dataset = client.dataset(destination.dataset_id,
                                           destination.project)
    return temporary_dataset
def get_bq_dataset(
    client: bigquery.Client,
    dataset_id: str,
    project_id: str = None,
) -> bigquery.Dataset:
    # If `project_id is None` then the default project of `client` will be used.
    dataset_ref = client.dataset(
        dataset_id, project=project_id)  # type: bigquery.DatasetReference

    # API request
    return client.get_dataset(dataset_ref)  # type: bigquery.Dataset
Exemple #13
0
def does_table_exist(bigquery_client: bigquery.Client, table: str, dataset: str = 'analytics') -> bool:
    """Check if given table from given Dataset exists in BigQuery, return True if so."""
    try:
        table_reference = bigquery_client.dataset(dataset).table(table)
        is_table = bigquery_client.get_table(table_reference)
        if is_table:
            logging.info('Table "{}" in Dataset "{}" already exists in BigQuery.'.format(table, dataset))
            return True
    except NotFound as error:
        logging.warning('Table "{}" does not exist in BigQuery Dataset "{}". Ref: {}.'.format(table, dataset, error))
        return False
def get_bq_table(
    client: bigquery.Client,
    dataset_id: str,
    table_id: str,
    project_id: str = None,
) -> bigquery.Table:
    # If `project_id is None` then the default project of `client` will be used.
    table_ref = client.dataset(dataset_id, project=project_id).table(
        table_id)  # type: bigquery.TableReference  # noqa: E501

    # API request
    return client.get_table(table_ref)  # type: bigquery.Table
Exemple #15
0
def get_bigquery_table():
    log = logging.getLogger('get_bigquery_table')

    log.info("get client")
    # Create a client, and get the table that we are interested in:
    bigquery_client = BQClient(project="pocketreplacement")
    log.info("get dataset")
    dataset_name = 'RedditData'
    dataset = bigquery_client.dataset(dataset_name)
    log.info("get table")
    table = dataset.table("subset")
    table.reload()
    return table
def loadjob_with_thread(project_name, dataset_name, table_name,
                        num_thread, sleep_time=None):
    tlist = []
    for _ in range(num_thread):
        client = Client(project_name)
        dataset_ref = Dataset(client.dataset(dataset_name))
        tw = ThreadWrapper(loadjob_infinite,
                           [client, dataset_ref, table_name, sleep_time])
        tw.start()
        tlist.append(tw)

    while len(tlist) > 0:
        tlist = [t for t in tlist if t.isAlive()]
        time.sleep(1)
Exemple #17
0
def delete_table(bq_client: bigquery.Client, dataset_id: str,
                 table_name: str) -> None:
    """Deletes a specified table in BigQuery.

    Args:
        bq_client: bigquery.Client object.
        dataset_id: String holding ID of dataset
        table_name: String of table name to delete

    Returns:
        None; Deletes a table in BigQuery
    """
    dataset_ref = bq_client.dataset(dataset_id)
    table_ref = dataset_ref.table(table_name)
    bq_client.delete_table(table=table_ref)
Exemple #18
0
def update_or_create_view(client: bigquery.Client, view_name: str,
                          view_query: str, dataset: str):
    LOGGER.debug("update_view: %s=%s", view_name, [view_query])
    dataset_ref = client.dataset(dataset)
    view_ref = dataset_ref.table(view_name)
    view = bigquery.Table(view_ref)
    view.view_query = view_query

    query_job = client.query(get_create_or_replace_view_query(view))
    query_job.result()  # wait for query job to finish

    updated_view = client.get_table(view)
    LOGGER.info("updated or replaced view: %s", updated_view.full_table_id)
    LOGGER.debug("view schema (%s): %s", updated_view.full_table_id,
                 updated_view.schema)
def load_bigquery_table_via_bq_apis(bq_client: bigquery.Client, dataset_id,
                                    table_name, imported_data_info, src_uris):
    """
    Load tables using BigQuery Load jobs, using the same configuration as BQ DTS ImportedDataInfo
    :return:
    """
    # https://googlecloudplatform.github.io/google-cloud-python/latest/_modules/google/cloud/bigquery/client.html#Client.load_table_from_uri
    # Step 1 - Translate required fields for BigQuery Python SDK
    tgt_tabledef = imported_data_info['table_defs'][0]

    # Step 2 - Create target table if it doesn't exist
    dataset_ref = bq_client.dataset(dataset_id)
    table_ref = dataset_ref.table(table_name)
    try:
        bq_client.get_table(table_ref)
    except exceptions.NotFound:
        # Step 2a - Attach schema
        tgt_schema = RPCRecordSchema_to_GCloudSchema(tgt_tabledef['schema'])
        tgt_table = bigquery.Table(table_ref, schema=tgt_schema)

        # Step 2b - Attach description
        tgt_table.description = imported_data_info[
            'destination_table_description']

        # Step 2c - Conditionally set partitioning type
        if '$' in table_name:
            tgt_table.partitioning_type = 'DAY'
            tgt_table._properties['tableReference'][
                'tableId'], _, _ = table_name.partition('$')

        # Step 2d - Create BigQuery table
        bq_client.create_table(tgt_table)

    # Step 3a - Create BigQuery Load Job ID
    current_datetime = datetime.datetime.utcnow().isoformat()
    raw_job_id = f'{table_name}_{current_datetime}'
    clean_job_id = BQ_JOB_ID_MATCHER.sub('___', raw_job_id)

    # Step 3b - Create BigQuery Job Config
    job_config = DTSTableDefinition_to_BQLoadJobConfig(tgt_tabledef)

    # Step 4 - Execute BigQuery Load Job using Python SDK
    load_job = bq_client.load_table_from_uri(source_uris=src_uris,
                                             destination=table_ref,
                                             job_id=clean_job_id,
                                             job_config=job_config)

    return load_job
Exemple #20
0
def list_tables(client: bigquery.Client, dataset_id: str):
    """
    Lists the tables in project:dataset

    Args:
        client: BQ API client
        dataset_id: dataset to be inspected

    Returns:
        list

    Examples:
        list_tables(client, 'my_dataset')
    """
    dataset_ref = client.dataset(dataset_id)
    return [t.table_id for t in client.list_tables(dataset_ref)]
def _main():
    args = _parse_args()

    _client = Client(args.project)
    _dataset_ref = Dataset(_client.dataset(args.dataset))
    _table = args.table

    if args.cmd == 'create-table':
        create_table(_client, _dataset_ref, _table, args.partitioned)
    elif args.cmd == 'drop-table':
        drop_table(_client, _dataset_ref, _table)
    elif args.cmd == 'loadjob-one':
        loadjob_one(_client, _dataset_ref, _table)
    elif args.cmd == 'loadjob-with-thread':
        loadjob_with_thread(args.project, args.dataset, _table, args.num_thread,
                            args.sleep)
Exemple #22
0
def delete_dataset(client: bigquery.Client, dataset_id: str, delete_contents: bool = True, ):
    """
    Deletes dataset from the current project

    Args:
        client: BQ API client
        dataset_id: dataset to be deleted
        delete_contents: (default is True)

    Returns:
        deletes dataset

    """
    # TODO: ADD CHECK OF EXISTENCE
    dataset_ref = client.dataset(dataset_id)
    client.delete_dataset(dataset_ref, delete_contents)
def create_bq_dataset(
    client: bigquery.Client,
    dataset_id: str,
    dataset_description: str = None,
) -> bigquery.Dataset:
    """
    Create empty dataset.
    """
    # TODO: validate 'dataset_id'.
    #   > Dataset IDs must be alphanumeric (plus underscores) and must be at most 1024 chars long.

    # note: it is not intuitive the dual instantiation of a 'Dataset' object.
    dataset = bigquery.Dataset(
        client.dataset(dataset_id))  # type: bigquery.Dataset
    dataset.description = dataset_description

    # API request
    return client.create_dataset(dataset)  # type: bigquery.Dataset
Exemple #24
0
def create_dataset(client: bigquery.Client, dataset_id: str, location: str = 'EU',
                   description: str = "Creation date: {}".format(datetime.datetime.now())):
    """
    Creates a dataset with following referece project_id:dataset_id

    Args:
        client: BQ API client
        dataset_id: dataset to be created
        location: location of the dataset (default is Europe for legal reasons)
        description: description of the dataset (default is date oc creation)

    Returns:

    """
    # TODO: ADD CHECK OF EXISTENCE
    dataset_ref = client.dataset(dataset_id)
    dataset = bigquery.Dataset(dataset_ref)
    dataset.location = location
    dataset.description = description
    client.create_dataset(dataset)
Exemple #25
0
def delete_table(client: bigquery.Client, dataset_id: str, table_id: str):
    """
    Deletes the specified table in the given project:dataset

    Args:
        client: BQ API client
        dataset_id: destination dataset
        table_id: table to be deleted

    Returns:

    Examples:
        delete_table(client, 'my_dataset', 'my_table')
    """
    dataset_ref = client.dataset(dataset_id=dataset_id)
    tables_list = [t.table_id for t in list(client.list_tables(dataset_ref))]

    if table_id not in tables_list:
        print("THIS TABLE DOES NOT EXIST IN {}:{}".format(client.project, dataset_id))
    else:
        table_ref = dataset_ref.table(table_id)
        client.delete_table(table_ref)
Exemple #26
0
def create_table(client: bigquery.Client, dataset_id: str, table_id: str, schema: list):
    """
    Creates a table according to the given schema in the specified project:dataset

    Args:
        client: BQ API client
        dataset_id: destination dataset
        table_id: table to be created
        schema: schema of the table to be created

    Returns:

    Examples:
        create_table(client, 'my_dataset', 'my_table', my_schema)
    """
    dataset_ref = client.dataset(dataset_id=dataset_id)
    tables_list = [t.table_id for t in list(client.list_tables(dataset_ref))]

    if table_id in tables_list:
        print("THIS TABLE ALREADY EXISTS IN {}:{}".format(client.project, dataset_id))
    else:
        table_ref = dataset_ref.table(table_id)
        client.create_table(bigquery.Table(table_ref, schema))
Exemple #27
0
def materialize_view(  # pylint: disable=too-many-arguments, too-many-locals
    client: bigquery.Client,
    source_view_name: str,
    destination_table_name: str,
    project: str,
    source_dataset: str,
    destination_dataset: str,
) -> QueryJob:
    query = get_select_all_from_query(source_view_name,
                                      project=project,
                                      dataset=source_dataset)
    LOGGER.info("materializing view: %s.%s -> %s.%s", source_dataset,
                source_view_name, destination_dataset, destination_table_name)
    LOGGER.debug("materialize_view: %s=%s", destination_table_name, [query])

    start = time.perf_counter()
    dataset_ref = client.dataset(destination_dataset)
    destination_table_ref = dataset_ref.table(destination_table_name)

    job_config = QueryJobConfig()
    job_config.destination = destination_table_ref
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

    query_job = client.query(query, job_config=job_config)
    # getting the result will make sure that the query ran successfully
    result: bigquery.table.RowIterator = query_job.result()
    duration = time.perf_counter() - start
    total_bytes_processed = query_job.total_bytes_processed
    LOGGER.info(
        'materialized view: %s.%s, total rows: %s, %s bytes processed, took: %.3fs',
        source_dataset, source_view_name, result.total_rows,
        total_bytes_processed, duration)
    if LOGGER.isEnabledFor(logging.DEBUG):
        sample_result = list(islice(result, 3))
        LOGGER.debug("sample_result: %s", sample_result)
    return MaterializeViewResult(total_bytes_processed=total_bytes_processed,
                                 total_rows=result.total_rows)
Exemple #28
0
        def enrich_task():
            client = Client()

            # Need to use a temporary table because bq query sets field modes to NULLABLE and descriptions to null
            # when writeDisposition is WRITE_TRUNCATE

            # Create a temporary table
            temp_table_name = '{task}_{milliseconds}'.format(
                task=task, milliseconds=int(round(time.time() * 1000)))
            temp_table_ref = client.dataset(dataset_name_temp).table(
                temp_table_name)
            table = Table(temp_table_ref)

            description_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/descriptions/{task}.txt'.format(
                    task=task))
            table.description = read_file(description_path)
            if time_partitioning_field is not None:
                table.time_partitioning = TimePartitioning(
                    field=time_partitioning_field)
            logging.info('Creating table: ' + json.dumps(table.to_api_repr()))

            schema_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/schemas/{task}.json'.format(
                    task=task))
            schema = read_bigquery_schema_from_file(schema_path)
            table.schema = schema

            table = client.create_table(table)
            assert table.table_id == temp_table_name

            # Query from raw to temporary table
            query_job_config = QueryJobConfig()
            # Finishes faster, query limit for concurrent interactive queries is 50
            query_job_config.priority = QueryPriority.INTERACTIVE
            query_job_config.destination = temp_table_ref
            sql_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/sqls/{task}.sql'.format(task=task))
            sql = read_file(sql_path, environment)
            query_job = client.query(sql,
                                     location='US',
                                     job_config=query_job_config)
            submit_bigquery_job(query_job, query_job_config)
            assert query_job.state == 'DONE'

            # Copy temporary table to destination
            copy_job_config = CopyJobConfig()
            copy_job_config.write_disposition = 'WRITE_TRUNCATE'

            dest_table_name = '{task}'.format(task=task)
            dest_table_ref = client.dataset(
                dataset_name,
                project=destination_dataset_project_id).table(dest_table_name)
            copy_job = client.copy_table(temp_table_ref,
                                         dest_table_ref,
                                         location='US',
                                         job_config=copy_job_config)
            submit_bigquery_job(copy_job, copy_job_config)
            assert copy_job.state == 'DONE'

            # Delete temp table
            client.delete_table(temp_table_ref)
Exemple #29
0
def get_bq_view_query(client: bigquery.Client, view_name: str, dataset: str):
    dataset_ref = client.dataset(dataset)
    view_ref = dataset_ref.table(view_name)
    view = client.get_table(view_ref)
    return view.view_query
Exemple #30
0
        def enrich_task(ds, **kwargs):
            template_context = kwargs.copy()
            template_context['ds'] = ds
            template_context['params'] = environment

            client = Client()

            # Need to use a temporary table because bq query sets field modes to NULLABLE and descriptions to null
            # when writeDisposition is WRITE_TRUNCATE

            # Create a temporary table
            temp_table_name = '{task}_{milliseconds}'.format(
                task=task, milliseconds=int(round(time.time() * 1000)))
            temp_table_ref = client.dataset(dataset_name_temp).table(
                temp_table_name)
            table = Table(temp_table_ref)

            description_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/descriptions/{task}.txt'.format(
                    task=task))
            table.description = read_file(description_path)
            table.time_partitioning = TimePartitioning(
                field=time_partitioning_field)
            logging.info('Creating table: ' + json.dumps(table.to_api_repr()))

            schema_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/schemas/{task}.json'.format(
                    task=task))
            schema = read_bigquery_schema_from_file(schema_path)
            table.schema = schema

            table = client.create_table(table)
            assert table.table_id == temp_table_name

            # Query from raw to temporary table
            query_job_config = QueryJobConfig()
            # Finishes faster, query limit for concurrent interactive queries is 50
            query_job_config.priority = QueryPriority.INTERACTIVE
            query_job_config.destination = temp_table_ref

            sql_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/sqls/{task}.sql'.format(task=task))
            sql_template = read_file(sql_path)
            sql = kwargs['task'].render_template('', sql_template,
                                                 template_context)
            print('Enrichment sql:')
            print(sql)

            query_job = client.query(sql,
                                     location='US',
                                     job_config=query_job_config)
            submit_bigquery_job(query_job, query_job_config)
            assert query_job.state == 'DONE'

            if load_all_partitions:
                # Copy temporary table to destination
                copy_job_config = CopyJobConfig()
                copy_job_config.write_disposition = 'WRITE_TRUNCATE'

                dest_table_name = '{task}'.format(task=task)
                dest_table_ref = client.dataset(
                    dataset_name,
                    project=destination_dataset_project_id).table(
                        dest_table_name)
                copy_job = client.copy_table(temp_table_ref,
                                             dest_table_ref,
                                             location='US',
                                             job_config=copy_job_config)
                submit_bigquery_job(copy_job, copy_job_config)
                assert copy_job.state == 'DONE'
            else:
                # Merge
                # https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement
                merge_job_config = QueryJobConfig()
                # Finishes faster, query limit for concurrent interactive queries is 50
                merge_job_config.priority = QueryPriority.INTERACTIVE

                merge_sql_path = os.path.join(
                    dags_folder,
                    'resources/stages/enrich/sqls/merge_{task}.sql'.format(
                        task=task))
                merge_sql_template = read_file(merge_sql_path)
                template_context['params']['source_table'] = temp_table_name
                merge_sql = kwargs['task'].render_template(
                    '', merge_sql_template, template_context)
                print('Merge sql:')
                print(merge_sql)
                merge_job = client.query(merge_sql,
                                         location='US',
                                         job_config=merge_job_config)
                submit_bigquery_job(merge_job, merge_job_config)
                assert merge_job.state == 'DONE'

            # Delete temp table
            client.delete_table(temp_table_ref)