コード例 #1
0
    def load_async(self,
                   source,
                   mode='create',
                   source_format='csv',
                   csv_options=None,
                   ignore_unknown_values=False,
                   max_bad_records=0):
        """ Starts importing a table from GCS and return a Future.

    Args:
      source: the URL of the source objects(s). Can include a wildcard '*' at the end of the item
         name. Can be a single source or a list.
      mode: one of 'create', 'append', or 'overwrite'. 'append' or 'overwrite' will fail if the
          table does not already exist, while 'create' will fail if it does. The default is
          'create'. If 'create' the schema will be inferred if necessary.
      source_format: the format of the data, 'csv' or 'json'; default 'csv'.
      csv_options: if source format is 'csv', additional options as a CSVOptions object.
      ignore_unknown_values: If True, accept rows that contain values that do not match the schema;
          the unknown values are ignored (default False).
      max_bad_records: the maximum number of bad records that are allowed (and ignored) before
          returning an 'invalid' error in the Job result (default 0).

    Returns:
      A Job object for the import if it was started successfully or None if not.
    Raises:
      Exception if the load job failed to be started or invalid arguments were supplied.
    """
        if source_format == 'csv':
            source_format = 'CSV'
        elif source_format == 'json':
            source_format = 'NEWLINE_DELIMITED_JSON'
        else:
            raise Exception("Invalid source format %s" % source_format)

        if not (mode == 'create' or mode == 'append' or mode == 'overwrite'):
            raise Exception("Invalid mode %s" % mode)

        if csv_options is None:
            csv_options = _csv_options.CSVOptions()

        try:
            response = self._api.jobs_insert_load(
                source,
                self._name_parts,
                append=(mode == 'append'),
                overwrite=(mode == 'overwrite'),
                create=(mode == 'create'),
                source_format=source_format,
                field_delimiter=csv_options.delimiter,
                allow_jagged_rows=csv_options.allow_jagged_rows,
                allow_quoted_newlines=csv_options.allow_quoted_newlines,
                encoding=csv_options.encoding.upper(),
                ignore_unknown_values=ignore_unknown_values,
                max_bad_records=max_bad_records,
                quote=csv_options.quote,
                skip_leading_rows=csv_options.skip_leading_rows)
        except Exception as e:
            raise e
        return self._init_job_from_response(response)
コード例 #2
0
    def from_storage(source,
                     source_format='csv',
                     csv_options=None,
                     ignore_unknown_values=False,
                     max_bad_records=0,
                     compressed=False,
                     schema=None):
        """ Create an external table for a GCS object.

    Args:
      source: the URL of the source objects(s). Can include a wildcard '*' at the end of the item
         name. Can be a single source or a list.
      source_format: the format of the data, 'csv' or 'json'; default 'csv'.
      csv_options: For CSV files, the options such as quote character and delimiter.
      ignore_unknown_values: If True, accept rows that contain values that do not match the schema;
          the unknown values are ignored (default False).
      max_bad_records: The maximum number of bad records that are allowed (and ignored) before
          returning an 'invalid' error in the Job result (default 0).
      compressed: whether the data is GZ compressed or not (default False). Note that compressed
          data can be used as a federated table but cannot be loaded into a BQ Table.
      schema: the schema of the data. This is required for this table to be used as a federated
          table or to be loaded using a Table object that itself has no schema (default None).

  """
        result = FederatedTable()
        # Do some sanity checking and concert some params from friendly form to form used by BQ.
        if source_format == 'csv':
            result._bq_source_format = 'CSV'
            if csv_options is None:
                csv_options = _csv_options.CSVOptions()  # use defaults
        elif source_format == 'json':
            if csv_options:
                raise Exception('CSV options are not support for JSON tables')
            result._bq_source_format = 'NEWLINE_DELIMITED_JSON'
        else:
            raise Exception("Invalid source format %s" % source_format)

        result._source = source if isinstance(source, list) else [source]
        result._source_format = source_format
        result._csv_options = csv_options
        result._ignore_unknown_values = ignore_unknown_values
        result._max_bad_records = max_bad_records
        result._compressed = compressed
        result._schema = schema
        return result