Python LoadJobConfig.autodetect Examples

Programming Language: Python

Namespace/Package Name: google.cloud.bigquery.job

Class/Type: LoadJobConfig

Method/Function: autodetect

Examples at hotexamples.com: 6

Python LoadJobConfig.autodetect - 6 examples found. These are the top rated real world Python examples of google.cloud.bigquery.job.LoadJobConfig.autodetect extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LoadJobConfig(30)

autodetect(6)

allow_jagged_rows(2)

allow_quoted_newlines(2)

create_disposition(2)

field_delimiter(2)

destination_table_description(1)

encoding(1)

Example #1

Show file

 def _make_load_job_config(source_format,  # type: str
                           write_disposition,  # type: str
                           schema=None,  # type: Optional[List[SchemaField]]
                           skip_leading_row=False,  #type: bool
                           ):
     """
     Makes and returns a LoadJobConfig according to the passed-in parameters.
     Args:
         source_format: Should be a recognized BigQuery source format. See
             https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.sourceFormat
         write_disposition: Should be a recognized BigQuery write disposition. See
             https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.writeDisposition
         schema: A list of SchemaFields. If unset, BigQuery will try to infer a schema.
         skip_leading_row: If True, the first row of the file loaded in will be skipped.
     """
     job_config = LoadJobConfig()
     job_config.source_format = source_format
     job_config.write_disposition = write_disposition
     if schema:
         job_config.schema = schema
     else:
         job_config.autodetect = True
     if skip_leading_row:
         job_config.skip_leading_rows = 1
     return job_config

Example #2

Show file

File: config_tables.py Project: elifesciences/bigquery-views-manager

def update_or_create_table_from_csv(
    client: bigquery.Client,
    table_name: str,
    source_file: str,
    dataset: str,
    source_schema_file: str,
):
    LOGGER.debug("update_or_create_table_from_csv: %s=%s", table_name,
                 [source_file])
    dataset_ref = client.dataset(dataset)
    table_ref = dataset_ref.table(table_name)

    job_config = LoadJobConfig()
    job_config.source_format = "CSV"
    job_config.skip_leading_rows = 1
    if Path(source_schema_file).exists():
        job_config.schema = get_table_schema(source_schema_file)
    else:
        job_config.autodetect = True
    job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

    with open(source_file, "rb") as source_fp:
        load_job = client.load_table_from_file(source_fp,
                                               destination=table_ref,
                                               job_config=job_config)

    # wait for job to complete
    load_job.result()

    LOGGER.info("updated config table: %s", table_ref.table_id)

Example #3

Show file

File: data_warehouse.py Project: pawanrana/hyper-model

    def import_csv(self,
                   bucket_name: str,
                   bucket_path: str,
                   dataset: str,
                   table: str,
                   sep: str = "\t") -> bool:
        logging.info(
            f"DataWarehouse.import_csv {bucket_path} to {dataset}.{table} ...")
        client = self._get_client()

        config = LoadJobConfig()
        config.autodetect = True
        config.field_delimiter = sep

        bucket_url = f"gs://{self.config.lake_path}/{bucket_path}"

        load_job = client.load_table_from_uri(bucket_url,
                                              f"{dataset}.{table}",
                                              job_config=config)
        result = load_job.result()

        logging.info(
            f"DataWarehouse.import_csv {bucket_path} to {dataset}.{table} Complete!"
        )

        return True

Example #4

Show file

File: test_load.py Project: tswast/python-bigquery

    def test_begin_w_autodetect(self):
        from google.cloud.bigquery.job import LoadJobConfig

        path = "/projects/{}/jobs".format(self.PROJECT)
        resource = self._make_resource()
        resource["configuration"]["load"]["autodetect"] = True
        # Ensure None for missing server-set props
        del resource["statistics"]["creationTime"]
        del resource["etag"]
        del resource["selfLink"]
        del resource["user_email"]
        conn = _make_connection(resource)
        client = _make_client(project=self.PROJECT, connection=conn)
        config = LoadJobConfig()
        config.autodetect = True
        job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF,
                             client, config)
        with mock.patch(
                "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
        ) as final_attributes:
            job._begin()

        final_attributes.assert_called_with({"path": path}, client, job)

        sent = {
            "jobReference": {
                "projectId": self.PROJECT,
                "jobId": self.JOB_ID
            },
            "configuration": {
                "load": {
                    "sourceUris": [self.SOURCE1],
                    "destinationTable": {
                        "projectId": self.PROJECT,
                        "datasetId": self.DS_ID,
                        "tableId": self.TABLE_ID,
                    },
                    "autodetect": True,
                }
            },
        }
        conn.api_request.assert_called_once_with(method="POST",
                                                 path=path,
                                                 data=sent,
                                                 timeout=None)
        self._verifyResourceProperties(job, resource)

Example #5

Show file

File: storage_to_bq_importer.py Project: instant-bqml/crmint

    def _execute(self):
        client = self._get_client()
        source_uris = self._get_source_uris()

        job_config = LoadJobConfig()
        if self._params['import_json']:
            job_config.source_format = 'NEWLINE_DELIMITED_JSON'
        else:
            try:
                job_config.skip_leading_rows = self._params['rows_to_skip']
            except KeyError:
                job_config.skip_leading_rows = 0
        job_config.autodetect = self._params['autodetect']
        if not job_config.autodetect:
            job_config.allow_jagged_rows = True
            job_config.allow_quoted_newlines = True
            job_config.ignore_unknown_values = True
            if self._params['schema']:
                job_config.schema = self._parse_bq_json_schema(
                    self._params['schema'])
        if self._params['csv_null_marker']:
            job_config.null_marker = self._params['csv_null_marker']
        try:
            job_config.max_bad_records = self._params['errors_to_allow']
        except KeyError:
            job_config.max_bad_records = 0
        if self._params['overwrite']:
            job_config.write_disposition = 'WRITE_TRUNCATE'
        else:
            job_config.write_disposition = 'WRITE_APPEND'
        if self._params['dont_create']:
            job_config.create_disposition = 'CREATE_NEVER'
        else:
            job_config.create_disposition = 'CREATE_IF_NEEDED'

        job = client.load_table_from_uri(source_uris,
                                         self._get_full_table_name(),
                                         job_id_prefix=self._get_prefix(),
                                         job_config=job_config)
        self._wait(job)
        self.log_info('Finished successfully')

Example #6

Show file

    def update_or_create_table_from_csv(self, table_name, table_file, schema_file):
        dataset_ref = self.client.dataset(self.dataset_id)
        table_ref = dataset_ref.table(table_name)

        job_config = LoadJobConfig()
        job_config.source_format = "CSV"
        job_config.skip_leading_rows = 1
        if Path(schema_file).exists():
            job_config.schema = self.get_table_schema(schema_file)
        else:
            job_config.autodetect = True
        job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

        with open(table_file, "rb") as source_fp:
            load_job = self.client.load_table_from_file(
                source_fp,
                destination=table_ref,
                job_config=job_config
            )
            
        load_job.result()
        logging.info("tabela [%s] criada com sucesso", table_ref.table_id)