def bigquery_get_data( logger: Logger, dataset_id: str, table_id: str, big_query_hook: BigQueryHook, batch_size: int, selected_fields: Optional[Union[List[str], str]], ) -> Iterator: logger.info('Fetching Data from:') logger.info('Dataset: %s ; Table: %s', dataset_id, table_id) i = 0 while True: rows: List[Row] = big_query_hook.list_rows( dataset_id=dataset_id, table_id=table_id, max_results=batch_size, selected_fields=selected_fields, start_index=i * batch_size, ) if len(rows) == 0: logger.info('Job Finished') return logger.info('Total Extracted rows: %s', len(rows) + i * batch_size) yield [row.values() for row in rows] i += 1
def _bq_get_data(self): self.log.info('Fetching Data from:') self.log.info('Dataset: %s ; Table: %s', self.dataset_id, self.table_id) hook = BigQueryHook( gcp_conn_id=self.gcp_conn_id, delegate_to=self.delegate_to, location=self.location, impersonation_chain=self.impersonation_chain, ) i = 0 while True: response = hook.list_rows( dataset_id=self.dataset_id, table_id=self.table_id, max_results=self.batch_size, selected_fields=self.selected_fields, start_index=i * self.batch_size, ) rows = [dict(r) for r in response] if len(rows) == 0: self.log.info('Job Finished') return self.log.info('Total Extracted rows: %s', len(rows) + i * self.batch_size) table_data = [] for dict_row in rows: single_row = [] for fields in dict_row['f']: single_row.append(fields['v']) table_data.append(single_row) yield table_data i += 1