Example #1
0
 def table_get(cls):
     """
     Returns:
         (google.cloud.bigquery.table.Table):  The table this class maps to.
     Raises:
         (google.api_core.exceptions.NotFound): If the table does not exist.
     """
     client = DatabaseContext.get_session().connection().connection._client
     table_ref = _get_table_ref(cls.__table__.name, client)
     table = client.get_table(table_ref)
     return table
Example #2
0
    def create_load_job(cls, instances):
        """
        Load instances through a load job.
        The job is asynchronous but this function will wait for the job to complete.
        https://cloud.google.com/bigquery/quotas#load_jobs
        Limited to 1000 per table per day.
        Maximum row size limit is 100MB.
        https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-json

        Args:
            instances (List[BigQueryCRUDMixin]):  Instances of cls.
                These will be appended to the database, duplicates will be added.
                Table metadata is eventually consistent.  This means that if you've
                recently create this table or changed the schema, this method may
                incorrectly report no errors.
        """
        if not all([type(inst) == cls for inst in instances]):
            raise BigQueryOrmError(
                'Got invalid class in {}\'s create method'.format(cls))

        instances_json_str = '\n'.join(
            [instance.serialize_as_json() for instance in instances])
        json_bytes_file = six.BytesIO(instances_json_str.encode('utf-8'))

        client = DatabaseContext.get_session().connection().connection._client
        table_ref = _get_table_ref(cls.__table__.name, client)

        job_config = bigquery_job.LoadJobConfig(
            autodetect=False,
            create_disposition=bigquery_job.CreateDisposition.CREATE_NEVER,
            ignore_unknown_values=False,
            source_format=bigquery_job.SourceFormat.NEWLINE_DELIMITED_JSON,
            write_disposition=bigquery_job.WriteDisposition.WRITE_APPEND)
        load_job = client.load_table_from_file(file_obj=json_bytes_file,
                                               destination=table_ref,
                                               job_config=job_config)

        try:
            load_job.result()
        except Exception as e:
            raise exceptions.DatabaseError('{}\n{}\n{}\n{}'.format(
                load_job.errors,
                '{}({})'.format(type(e), e),
                load_job.error_result,
                'This error may have occured because a column'
                ' default value could not be created locally.  Only'
                ' scalar defaults or python callables are supported.',
            ))

        if ((load_job.error_result and len(load_job.error_result) > 0)
                or (load_job.errors and len(load_job.errors) > 0)):
            raise exceptions.DatabaseError('{}\n{}'.format(
                load_job.errors, load_job.error_result))
Example #3
0
    def _create_streaming(cls, instances):
        client = DatabaseContext.get_session().connection().connection._client
        table_ref = _get_table_ref(cls.__table__.name, client)
        table = client.get_table(table_ref)

        # https://cloud.google.com/bigquery/quotas#streaming_inserts
        empty_row = {field.name: None for field in table.schema}
        seq_of_parameters = [inst.serialize_as_dict() for inst in instances]
        seq_of_parameters = [
            dict(empty_row, **params) for params in seq_of_parameters
        ]
        errors = client.insert_rows(table, seq_of_parameters)
        if len(errors) > 0:
            raise exceptions.DatabaseError(errors)
Example #4
0
    def serialize_as_dict(self, excluded_keys=None):
        """
        Returns this object as a dictionary.
        Will get default values as possible and will call bind_processor
        on any value.
        (e.g. bind_processor will turn geometries into geojson strings)

        Args:
            excluded_keys (Iterable[str]):  A list of keys to exclude.
        Returns:
            (Dict[str, Any]):  Returns the dict representation of this object
                with values populated by their defaults if available.
        """
        if excluded_keys is None:
            excluded_keys = set()
        else:
            excluded_keys = set(excluded_keys)
        attr_names = JsonSerializableOrmMixin.get_entity_loaded_property_names_to_columns(
            self)

        json_out = {}
        for property_name, columns in attr_names.items():
            if property_name in excluded_keys:
                continue

            if len(columns) > 1:
                raise ValueError(
                    'serialize_as_json does not support composite types.')
            column = columns[0]

            key = column.key
            value = getattr(self, property_name)

            if value is None:
                if column.default is not None:
                    default_arg = column.default.arg
                    if column.default.is_callable:
                        value = default_arg(None)
                    elif column.default.is_scalar:
                        value = default_arg

            if value is not None:
                bind_processor = column.type.bind_processor(
                    dialect=DatabaseContext.get_session().bind.dialect)
                if bind_processor is not None:
                    value = bind_processor(value)

            json_out[key] = value

        return json_out
Example #5
0
    def create_from_query(cls, query, flatten_results=True):
        """
        Load instances through a query job.
        The job is asynchronous but this function will wait for the job to complete.
        See https://cloud.google.com/bigquery/docs/writing-results
        Note that this method must compile the sql query to a string.
        It does so using sqlalchemy_query.statement.compile(compile_kwargs={"literal_binds": True}).
        This will fail for certain queries and should not be used for queries which depend on untrusted input.
        See https://docs.sqlalchemy.org/en/13/faq/sqlexpressions.html for more information.
        Args:
            query (BigQueryQuery):  A query object whose results are
                to be appended to the table.
            flatten_results (Optional[bool]): If True, will flatten the query results.
                Defaults to True.
        """
        client = DatabaseContext.get_session().connection().connection._client
        table_ref = _get_table_ref(cls.__table__.name, client)

        job_config = bigquery_job.QueryJobConfig(
            destination=table_ref,
            create_disposition=bigquery_job.CreateDisposition.CREATE_NEVER,
            write_disposition=bigquery_job.WriteDisposition.WRITE_APPEND,
            flatten_results=flatten_results,
            allow_large_results=not flatten_results,
        )

        dialect = DatabaseContext.get_engine().dialect
        compiled_sql = query.sqlalchemy_query.statement.compile(
            dialect=dialect, compile_kwargs={
                'literal_binds': True,
            })
        raw_sql = str(compiled_sql)

        query_job = client.query(raw_sql, job_config=job_config)

        try:
            query_job.result()
        except Exception as e:
            raise exceptions.DatabaseError('{}\n{}\n{}'.format(
                query_job.errors,
                '{}({})'.format(type(e), e),
                query_job.error_result,
            ))

        if ((query_job.error_result and len(query_job.error_result) > 0)
                or (query_job.errors and len(query_job.errors) > 0)):
            raise exceptions.DatabaseError('{}\n{}'.format(
                query_job.errors, query_job.error_result))
Example #6
0
    def query_empty(cls, *args, **kwargs):
        """
        https://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query

        Selects no columns by default.

        Args:
            *args (Union[Column, BigQueryModel]):
                Columns or classes matching what the sql statement is expected to return
                (e.g. what is selects).
            **kwargs (Any):  Passed to sqlalchemy.orm.query
        Returns
            (ReadOnlyBigQueryQuery):  A query object that wraps sqlalchemy.orm.Query.
        """
        return ReadOnlyBigQueryQuery(DatabaseContext.get_session().query(
            *args, **kwargs))
Example #7
0
    def query(cls, *args, **kwargs):
        """
        https://docs.sqlalchemy.org/en/latest/orm/query.html#sqlalchemy.orm.query.Query

        Selects all columns of the class and any additional
        ORM objects requested through arguments.

        Args:
            *args (Union[Column, BigQueryModel]):
                Columns or classes matching what the sql statement is expected to return
                (e.g. what is selects).
            **kwargs (Any):  Passed to sqlalchemy.orm.query
        Returns
            (ReadOnlyBigQueryQuery):  A query object that wraps sqlalchemy.orm.Query.
        """
        return ReadOnlyBigQueryQuery(DatabaseContext.get_session().query(
            cls, *args, **kwargs))