def process_response_rows_for_bigquery(self, rows: list, table_reference: TableReference): rows_dataframe = DataFrame.from_records(rows) rows_dataframe = concat( [rows_dataframe, rows_dataframe['dimensions'].apply(Series)], axis=1, join='inner') rows_dataframe = rows_dataframe.drop(['dimensions'], axis=1) rows_dataframe['date'] = rows_dataframe['date'].apply( lambda x: x.date()) job_config = LoadJobConfig() job_config.write_disposition = WriteDisposition.WRITE_APPEND job_config.time_partitioning = TimePartitioning( type_=TimePartitioningType.DAY, field='date') job_config.schema = [ self._get_schema_for_field(column) for column in list(rows_dataframe.columns.values) ] try: load_job = self.bigquery.client.load_table_from_dataframe( rows_dataframe, table_reference, job_config=job_config) load_job.result() except BadRequest as error: print(error.errors)
def _load_to_bq(self, client, dataset, table_name, table_schema, table_config, key_props, metadata_columns, truncate, rows): logger = self.logger partition_field = table_config.get("partition_field", None) cluster_fields = table_config.get("cluster_fields", None) force_fields = table_config.get("force_fields", {}) schema = build_schema(table_schema, key_properties=key_props, add_metadata=metadata_columns, force_fields=force_fields) load_config = LoadJobConfig() load_config.ignore_unknown_values = True load_config.schema = schema if partition_field: load_config.time_partitioning = bigquery.table.TimePartitioning( type_=bigquery.table.TimePartitioningType.DAY, field=partition_field) if cluster_fields: load_config.clustering_fields = cluster_fields load_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON if truncate: logger.info(f"Load {table_name} by FULL_TABLE (truncate)") load_config.write_disposition = WriteDisposition.WRITE_TRUNCATE else: logger.info(f"Appending to {table_name}") load_config.write_disposition = WriteDisposition.WRITE_APPEND logger.info("loading {} to BigQuery".format(table_name)) load_job = None try: load_job = client.load_table_from_file(rows, dataset.table(table_name), job_config=load_config, rewind=True) logger.info("loading job {}".format(load_job.job_id)) job = load_job.result() logger.info(job._properties) return job except google_exceptions.BadRequest as err: logger.error("failed to load table {} from file: {}".format( table_name, str(err))) if load_job and load_job.errors: reason = err.errors[0]["reason"] messages = [f"{err['message']}" for err in load_job.errors] logger.error("reason: {reason}, errors:\n{e}".format( reason=reason, e="\n".join(messages))) err.message = f"reason: {reason}, errors: {';'.join(messages)}" raise err
def _process_data_for_bigquery(self, data: DataFrame, output_tablereference: TableReference): job_config = LoadJobConfig() job_config.write_disposition = WriteDisposition.WRITE_APPEND job_config.time_partitioning = TimePartitioning( type_=TimePartitioningType.DAY, field='date') try: load_job = self.bigquery.client.load_table_from_dataframe( data, output_tablereference, job_config=job_config) load_job.result() except BadRequest as error: print(error.errors)