def load_async(self, source, mode='create', source_format='csv', csv_options=None, ignore_unknown_values=False, max_bad_records=0): """ Starts importing a table from GCS and return a Future. Args: source: the URL of the source objects(s). Can include a wildcard '*' at the end of the item name. Can be a single source or a list. mode: one of 'create', 'append', or 'overwrite'. 'append' or 'overwrite' will fail if the table does not already exist, while 'create' will fail if it does. The default is 'create'. If 'create' the schema will be inferred if necessary. source_format: the format of the data, 'csv' or 'json'; default 'csv'. csv_options: if source format is 'csv', additional options as a CSVOptions object. ignore_unknown_values: If True, accept rows that contain values that do not match the schema; the unknown values are ignored (default False). max_bad_records: the maximum number of bad records that are allowed (and ignored) before returning an 'invalid' error in the Job result (default 0). Returns: A Job object for the import if it was started successfully or None if not. Raises: Exception if the load job failed to be started or invalid arguments were supplied. """ if source_format == 'csv': source_format = 'CSV' elif source_format == 'json': source_format = 'NEWLINE_DELIMITED_JSON' else: raise Exception("Invalid source format %s" % source_format) if not (mode == 'create' or mode == 'append' or mode == 'overwrite'): raise Exception("Invalid mode %s" % mode) if csv_options is None: csv_options = _csv_options.CSVOptions() try: response = self._api.jobs_insert_load( source, self._name_parts, append=(mode == 'append'), overwrite=(mode == 'overwrite'), create=(mode == 'create'), source_format=source_format, field_delimiter=csv_options.delimiter, allow_jagged_rows=csv_options.allow_jagged_rows, allow_quoted_newlines=csv_options.allow_quoted_newlines, encoding=csv_options.encoding.upper(), ignore_unknown_values=ignore_unknown_values, max_bad_records=max_bad_records, quote=csv_options.quote, skip_leading_rows=csv_options.skip_leading_rows) except Exception as e: raise e return self._init_job_from_response(response)
def from_storage(source, source_format='csv', csv_options=None, ignore_unknown_values=False, max_bad_records=0, compressed=False, schema=None): """ Create an external table for a GCS object. Args: source: the URL of the source objects(s). Can include a wildcard '*' at the end of the item name. Can be a single source or a list. source_format: the format of the data, 'csv' or 'json'; default 'csv'. csv_options: For CSV files, the options such as quote character and delimiter. ignore_unknown_values: If True, accept rows that contain values that do not match the schema; the unknown values are ignored (default False). max_bad_records: The maximum number of bad records that are allowed (and ignored) before returning an 'invalid' error in the Job result (default 0). compressed: whether the data is GZ compressed or not (default False). Note that compressed data can be used as a federated table but cannot be loaded into a BQ Table. schema: the schema of the data. This is required for this table to be used as a federated table or to be loaded using a Table object that itself has no schema (default None). """ result = FederatedTable() # Do some sanity checking and concert some params from friendly form to form used by BQ. if source_format == 'csv': result._bq_source_format = 'CSV' if csv_options is None: csv_options = _csv_options.CSVOptions() # use defaults elif source_format == 'json': if csv_options: raise Exception('CSV options are not support for JSON tables') result._bq_source_format = 'NEWLINE_DELIMITED_JSON' else: raise Exception("Invalid source format %s" % source_format) result._source = source if isinstance(source, list) else [source] result._source_format = source_format result._csv_options = csv_options result._ignore_unknown_values = ignore_unknown_values result._max_bad_records = max_bad_records result._compressed = compressed result._schema = schema return result