Python ConnectBqCtlTable 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: etl_framework

클래스/타입: ConnectBqCtlTable

hotexamples.com에서의 예제들: 11

Python ConnectBqCtlTable - 11개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 etl_framework.ConnectBqCtlTable에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

get_batch_insert_query(2)

get_agg_insert_query(1)

get_agg_update_query(1)

get_batch_merge_query(1)

get_batch_update_query(1)

get_final_target_merge_query(1)

get_incr_query(1)

get_insert_query(1)

get_merge_query(1)

get_update_query(1)

예제 #1

파일 보기

 def get_config_details(dataset_name, table_name, etl_project_id):
     """
     Getting the configuration details from control table
     """
     incr_query = ConnectBqCtlTable.get_incr_query(dataset_name, table_name,
                                                   etl_project_id)
     query_job = (incr_query).format(dataset_name, table_name,
                                     etl_project_id)
     job = ExecutePipeline.client.query(query_job)
     result = job.result()
     param_dataframe = ExecutePipeline.client.query(
         query_job).to_dataframe()
     batch_name = dataset_name
     table_name = param_dataframe['table_name'].values[0]
     query = param_dataframe['query'].values[0]
     gs_path = param_dataframe['gs_path'].values[0]
     gcs_folder = param_dataframe['gcs_folder'].values[0]
     gcs_file_name = param_dataframe['gcs_file_name'].values[0]
     strg_project_id = param_dataframe['strg_project_id'].values[0]
     bq_project_id = param_dataframe['bq_project_id'].values[0]
     final_schema = param_dataframe['final_schema'].values[0]
     stg_schema = param_dataframe['stg_schema'].values[0]
     prune_min = int(param_dataframe['prune_minutes'].values[0])
     bq_stg_table = param_dataframe['bq_stg_table'].values[0]
     pipeline_run_id = param_dataframe['pipeline_run_id'].values[0]
     last_extract_date = datetime.datetime.strptime(
         str(param_dataframe['last_extract_date'].values[0]),
         '%Y-%m-%dT%H:%M:%S.%f000')
     start_date = datetime.datetime.strftime(
         datetime.datetime.strptime(
             str(param_dataframe['last_extract_date'].values[0]),
             '%Y-%m-%dT%H:%M:%S.%f000'), '%Y%m%d')
     end_date = datetime.datetime.strftime(datetime.date.today(), '%Y%m%d')
     return batch_name, table_name, query, start_date, end_date, gs_path, gcs_folder, gcs_file_name, last_extract_date, bq_stg_table, strg_project_id, bq_project_id, final_schema, stg_schema, prune_min, pipeline_run_id

예제 #2

파일 보기

파일: etl_finish_batch.py 프로젝트: rsah-equinix/dev-rageeni-v5

 def load_job_run_tbl(etl_project_id):
     """
     Loading the final control table
     """
     merge_query = ConnectBqCtlTable.get_batch_merge_query(
         '{}'.format(etl_project_id))
     query_job = (merge_query).format(etl_project_id, ExecuteBatch.batch_id)
     job = ExecuteBatch.client.query(query_job)
     job.result()

예제 #3

파일 보기

 def load_hist_update_pipeline(self):
     param = self.extract_update_param()
     upd_query = ConnectBqCtlTable.get_agg_update_query(
         self.project_id, self.dataset)
     for job_name, pipeline_id in param[['job_name', 'pipeline_id'
                                         ]].itertuples(index=False):
         job = upd_query.format(pipeline_id, self.project_id)
         job_load = client.query(job)
         job_load.result()
         print("updated to table %s" % pipeline_id)

예제 #4

파일 보기

파일: etl_finish_batch.py 프로젝트: rsah-equinix/dev-rageeni-v5

 def load_hist_job_run_tbl(etl_project_id):
     """
     Loading the history control table basedon parameters
     """
     insert_query = ConnectBqCtlTable.get_batch_insert_query(
         '{}'.format(etl_project_id))
     query_job = (insert_query).format(
         etl_project_id, ExecuteBatch.batch_id)  ##need to check
     job = ExecuteBatch.client.query(query_job)
     job.result()

예제 #5

파일 보기

 def load_hist_job_run_tbl_upd(etl_project_id, state):
     """
     Loading the history control table basedon parameters
     """
     update_query = ConnectBqCtlTable.get_batch_update_query(
         '{}'.format(etl_project_id))
     query_job = (update_query).format(etl_project_id,
                                       ExecuteBatch.batch_id, state)
     job = ExecuteBatch.client.query(query_job)
     job.result()

예제 #6

파일 보기

    def bq_load_final(batch_name, table, bq_project_id):
        """
        method is used to load data to bq_final table

        """
        final_target_merge_query = ConnectBqCtlTable.get_final_target_merge_query(
            batch_name, table, bq_project_id)
        client = bigquery.Client()
        query_job = (final_target_merge_query).format(bq_project_id)
        job = client.query(query_job)
        result = job.result()

예제 #7

파일 보기

 def load_job_run_tbl(job_id, batch_name, table_name, etl_project_id):
     """
     Loading the final control table
     """
     merge_query = ConnectBqCtlTable.get_merge_query(
         '{}'.format(batch_name), '{}'.format(table_name),
         '{}'.format(etl_project_id))
     query_job = (merge_query).format(etl_project_id, batch_name,
                                      table_name)
     job = ExecutePipeline.client.query(query_job)
     job.result()

예제 #8

파일 보기

 def load_hist_job_run_tbl(job_id, batch_name, table_name, table,
                           batch_start_time, prune_min, pipeline_run_id,
                           etl_project_id, batch_run_id):
     """
     Loading the history control table basedon parameters
     """
     insert_query = ConnectBqCtlTable.get_insert_query(
         '{}'.format(batch_name), '{}'.format(table_name),
         '{}'.format(etl_project_id))
     query_job = (insert_query).format(etl_project_id, job_id, batch_name,
                                       table, batch_start_time, prune_min,
                                       pipeline_run_id, batch_run_id)
     job = ExecutePipeline.client.query(query_job)
     job.result()

예제 #9

파일 보기

 def load_hist_pipeline_job(self):
     params = self.extract_ins_param()
     query = ConnectBqCtlTable.get_agg_insert_query(self.project_id,
                                                    self.dataset)
     client = bigquery.Client()
     for job_name, table_name, pipeline_id, prune_min, job_name_id in params[
         [
             'job_name', 'table_name', 'pipeline_id', 'prune_min',
             'job_name_id'
         ]].itertuples(index=False):
         job = query.format(pipeline_id, job_name_id, table_name,
                            self.project_id, self.batch_run_id)
         job_load = client.query(job)
         job_load.result()
         print("inserted to table %s" % pipeline_id)

예제 #10

파일 보기

 def load_hist_job_run_tbl_upd(job_id, batch_name, table_name, state,
                               last_extract_date, batch_start_time,
                               batch_end_time, etl_project_id,
                               batch_run_id):
     """
     Loading the history control table basedon parameters
     """
     update_query = ConnectBqCtlTable.get_update_query(
         '{}'.format(batch_name), '{}'.format(table_name),
         '{}'.format(etl_project_id))
     query_job = (update_query).format(etl_project_id, batch_name,
                                       table_name, last_extract_date,
                                       batch_start_time, batch_end_time,
                                       state, batch_run_id)
     job = ExecutePipeline.client.query(query_job)
     job.result()

예제 #11

파일 보기

 def load_hist_job_run_tbl(etl_project_id):
     """
     Loading the history control table basedon parameters
     """
     insert_query = ConnectBqCtlTable.get_batch_insert_query(
         '{}'.format(etl_project_id))
     query_job = (insert_query).format(
         etl_project_id, ExecuteBatch.batch_id)  ##need to check
     job = ExecuteBatch.client.query(query_job)
     job.result()
     query = 'select max(batch_run_id) as batch_run_id, batch_name  from `analytics-plp-uat.PLP_BQ_CTL_METADATA.PLP_BQ_CTL_BATCH_RUN_H` where batch_name = \'BCM_DCIM\' group by batch_name'
     job = ExecuteBatch.client.query(query)
     results = job.result()
     for row in results:
         print("batch_name : ", row.batch_name)
         print("batch_run_id : ", row.batch_run_id)