Python LoadJobConfig.time_partitioning 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: google.cloud.bigquery

클래스/타입: LoadJobConfig

메소드/함수: time_partitioning

hotexamples.com에서의 예제들: 3

Python LoadJobConfig.time_partitioning - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 google.cloud.bigquery.LoadJobConfig.time_partitioning에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

LoadJobConfig(30)

schema(21)

source_format(14)

write_disposition(13)

skip_leading_rows(11)

allow_quoted_newlines(5)

encoding(3)

field_delimiter(3)

ignore_unknown_values(3)

max_bad_records(3)

quote_character(3)

time_partitioning(3)

create_disposition(2)

autodetect(2)

get(1)

clustering_fields(1)

schema_update_options(1)

to_api_repr(1)

labels(1)

예제 #1

파일 보기

    def process_response_rows_for_bigquery(self, rows: list,
                                           table_reference: TableReference):
        rows_dataframe = DataFrame.from_records(rows)

        rows_dataframe = concat(
            [rows_dataframe, rows_dataframe['dimensions'].apply(Series)],
            axis=1,
            join='inner')
        rows_dataframe = rows_dataframe.drop(['dimensions'], axis=1)
        rows_dataframe['date'] = rows_dataframe['date'].apply(
            lambda x: x.date())

        job_config = LoadJobConfig()
        job_config.write_disposition = WriteDisposition.WRITE_APPEND
        job_config.time_partitioning = TimePartitioning(
            type_=TimePartitioningType.DAY, field='date')
        job_config.schema = [
            self._get_schema_for_field(column)
            for column in list(rows_dataframe.columns.values)
        ]

        try:
            load_job = self.bigquery.client.load_table_from_dataframe(
                rows_dataframe, table_reference, job_config=job_config)

            load_job.result()
        except BadRequest as error:
            print(error.errors)

예제 #2

파일 보기

파일: processhandler.py 프로젝트: taikonauten/target-bigquery

    def _load_to_bq(self, client, dataset, table_name, table_schema,
                    table_config, key_props, metadata_columns, truncate, rows):
        logger = self.logger
        partition_field = table_config.get("partition_field", None)
        cluster_fields = table_config.get("cluster_fields", None)
        force_fields = table_config.get("force_fields", {})

        schema = build_schema(table_schema,
                              key_properties=key_props,
                              add_metadata=metadata_columns,
                              force_fields=force_fields)
        load_config = LoadJobConfig()
        load_config.ignore_unknown_values = True
        load_config.schema = schema
        if partition_field:
            load_config.time_partitioning = bigquery.table.TimePartitioning(
                type_=bigquery.table.TimePartitioningType.DAY,
                field=partition_field)

        if cluster_fields:
            load_config.clustering_fields = cluster_fields

        load_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON

        if truncate:
            logger.info(f"Load {table_name} by FULL_TABLE (truncate)")
            load_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
        else:
            logger.info(f"Appending to {table_name}")
            load_config.write_disposition = WriteDisposition.WRITE_APPEND

        logger.info("loading {} to BigQuery".format(table_name))

        load_job = None
        try:
            load_job = client.load_table_from_file(rows,
                                                   dataset.table(table_name),
                                                   job_config=load_config,
                                                   rewind=True)
            logger.info("loading job {}".format(load_job.job_id))
            job = load_job.result()
            logger.info(job._properties)

            return job

        except google_exceptions.BadRequest as err:
            logger.error("failed to load table {} from file: {}".format(
                table_name, str(err)))
            if load_job and load_job.errors:
                reason = err.errors[0]["reason"]
                messages = [f"{err['message']}" for err in load_job.errors]
                logger.error("reason: {reason}, errors:\n{e}".format(
                    reason=reason, e="\n".join(messages)))
                err.message = f"reason: {reason}, errors: {';'.join(messages)}"

            raise err

예제 #3

파일 보기

    def _process_data_for_bigquery(self, data: DataFrame,
                                   output_tablereference: TableReference):
        job_config = LoadJobConfig()
        job_config.write_disposition = WriteDisposition.WRITE_APPEND
        job_config.time_partitioning = TimePartitioning(
            type_=TimePartitioningType.DAY, field='date')

        try:
            load_job = self.bigquery.client.load_table_from_dataframe(
                data, output_tablereference, job_config=job_config)

            load_job.result()
        except BadRequest as error:
            print(error.errors)