Ejemplo n.º 1
0
    def setUp(self):
        self.cache = RelationsCache()
        inputs = [
            ('foo', 'table1'),
            ('bar', 'table2'),
            ('foo', 'table3'),
            ('foo', 'table4'),
            ('bar', 'table3'),
        ]
        self.inputs = [make_relation(s, i) for s, i in inputs]
        for relation in self.inputs:
            self.cache.add(relation)

        # foo.table3 references foo.table1
        # (create view table3 as (select * from table1...))
        self.cache.add_link(make_relation('foo', 'table1'),
                            make_relation('foo', 'table3'))
        # bar.table3 references foo.table3
        # (create view bar.table5 as (select * from foo.table3...))
        self.cache.add_link(make_relation('foo', 'table3'),
                            make_relation('bar', 'table3'))

        # foo.table2 also references foo.table1
        self.cache.add_link(make_relation('foo', 'table1'),
                            make_relation('foo', 'table4'))
Ejemplo n.º 2
0
    def setUp(self):
        self.cache = RelationsCache()
        inputs = [
            ('dbt', 'foo', 'table1'),
            ('dbt', 'foo', 'table3'),
            ('dbt', 'foo', 'table4'),
            ('dbt', 'bar', 'table2'),
            ('dbt', 'bar', 'table3'),
            ('dbt_2', 'foo', 'table1'),
            ('dbt_2', 'foo', 'table2'),
        ]
        self.inputs = [make_relation(d, s, i) for d, s, i in inputs]
        for relation in self.inputs:
            self.cache.add(relation)

        # dbt.foo.table3 references dbt.foo.table1
        # (create view dbt.foo.table3 as (select * from dbt.foo.table1...))
        self.cache.add_link(make_relation('dbt', 'foo', 'table1'),
                            make_relation('dbt', 'foo', 'table3'))
        # dbt.bar.table3 references dbt.foo.table3
        # (create view dbt.bar.table5 as (select * from dbt.foo.table3...))
        self.cache.add_link(make_relation('dbt', 'foo', 'table3'),
                            make_relation('dbt', 'bar', 'table3'))

        # dbt.foo.table4 also references dbt.foo.table1
        self.cache.add_link(make_relation('dbt', 'foo', 'table1'),
                            make_relation('dbt', 'foo', 'table4'))

        # and dbt_2.foo.table1 references dbt.foo.table1
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table1'),
            make_relation('dbt_2', 'foo', 'table1'),
        )
Ejemplo n.º 3
0
class TestCache(TestCase):
    def setUp(self):
        self.cache = RelationsCache()

    def assert_relations_state(self, database, schema, identifiers):
        relations = self.cache.get_relations(database, schema)
        for identifier, expect in identifiers.items():
            found = any(
                (r.identifier == identifier and \
                 r.schema == schema and \
                 r.database == database)
                for r in relations
            )
            msg = '{}.{}.{} was{} found in the cache!'.format(
                database, schema, identifier, '' if found else ' not')
            self.assertEqual(expect, found, msg)

    def assert_relations_exist(self, database, schema, *identifiers):
        self.assert_relations_state(database, schema,
                                    {k: True
                                     for k in identifiers})

    def assert_relations_do_not_exist(self, database, schema, *identifiers):
        self.assert_relations_state(database, schema,
                                    {k: False
                                     for k in identifiers})
Ejemplo n.º 4
0
    def setUp(self):
        self.cache = RelationsCache()
        self._sleep = True

        # add a bunch of cache entries
        for ident in 'abcdef':
            self.cache.add(make_relation('dbt', 'schema', ident))
        # 'b' references 'a'
        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'b'))
        # and 'c' references 'b'
        self.cache.add_link(make_relation('dbt', 'schema', 'b'),
                            make_relation('dbt', 'schema', 'c'))
        # and 'd' references 'b'
        self.cache.add_link(make_relation('dbt', 'schema', 'b'),
                            make_relation('dbt', 'schema', 'd'))
        # and 'e' references 'a'
        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'e'))
        # and 'f' references 'd'
        self.cache.add_link(make_relation('dbt', 'schema', 'd'),
                            make_relation('dbt', 'schema', 'f'))
Ejemplo n.º 5
0
    def setUp(self):
        self.cache = RelationsCache()
        inputs = [
            ('dbt', 'foo', 'table1'),
            ('dbt', 'foo', 'table3'),
            ('dbt', 'foo', 'table4'),
            ('dbt', 'bar', 'table2'),
            ('dbt', 'bar', 'table3'),
            ('dbt_2', 'foo', 'table1'),
            ('dbt_2', 'foo', 'table2'),
        ]
        self.inputs = [make_relation(d, s, i) for d, s, i in inputs]
        for relation in self.inputs:
            self.cache.add(relation)

        # dbt.foo.table3 references dbt.foo.table1
        # (create view dbt.foo.table3 as (select * from dbt.foo.table1...))
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table1'),
            make_relation('dbt', 'foo', 'table3')
        )
        # dbt.bar.table3 references dbt.foo.table3
        # (create view dbt.bar.table5 as (select * from dbt.foo.table3...))
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table3'),
            make_relation('dbt', 'bar', 'table3')
        )

        # dbt.foo.table4 also references dbt.foo.table1
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table1'),
            make_relation('dbt', 'foo', 'table4')
        )

        # and dbt_2.foo.table1 references dbt.foo.table1
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table1'),
            make_relation('dbt_2', 'foo', 'table1'),
        )
Ejemplo n.º 6
0
    def setUp(self):
        self.cache = RelationsCache()
        self._sleep = True

        # add a bunch of cache entries
        for ident in 'abcdef':
            self.cache.add(make_relation('dbt', 'schema', ident))
        # 'b' references 'a'
        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'b'))
        # and 'c' references 'b'
        self.cache.add_link(make_relation('dbt', 'schema', 'b'),
                            make_relation('dbt', 'schema', 'c'))
        # and 'd' references 'b'
        self.cache.add_link(make_relation('dbt', 'schema', 'b'),
                            make_relation('dbt', 'schema', 'd'))
        # and 'e' references 'a'
        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'e'))
        # and 'f' references 'd'
        self.cache.add_link(make_relation('dbt', 'schema', 'd'),
                            make_relation('dbt', 'schema', 'f'))
Ejemplo n.º 7
0
class TestCache(TestCase):
    def setUp(self):
        self.cache = RelationsCache()

    def assert_relations_state(self, database, schema, identifiers):
        relations = self.cache.get_relations(database, schema)
        for identifier, expect in identifiers.items():
            found = any(
                (r.identifier == identifier and \
                 r.schema == schema and \
                 r.database == database)
                for r in relations
            )
            msg = '{}.{}.{} was{} found in the cache!'.format(
                database, schema, identifier, '' if found else ' not'
            )
            self.assertEqual(expect, found, msg)

    def assert_relations_exist(self, database, schema, *identifiers):
        self.assert_relations_state(database, schema, {k: True for k in identifiers})

    def assert_relations_do_not_exist(self, database, schema, *identifiers):
        self.assert_relations_state(database, schema, {k: False for k in identifiers})
Ejemplo n.º 8
0
class TestComplexCache(TestCase):
    def setUp(self):
        self.cache = RelationsCache()
        inputs = [
            ('dbt', 'foo', 'table1'),
            ('dbt', 'foo', 'table3'),
            ('dbt', 'foo', 'table4'),
            ('dbt', 'bar', 'table2'),
            ('dbt', 'bar', 'table3'),
            ('dbt_2', 'foo', 'table1'),
            ('dbt_2', 'foo', 'table2'),
        ]
        self.inputs = [make_relation(d, s, i) for d, s, i in inputs]
        for relation in self.inputs:
            self.cache.add(relation)

        # dbt.foo.table3 references dbt.foo.table1
        # (create view dbt.foo.table3 as (select * from dbt.foo.table1...))
        self.cache.add_link(make_relation('dbt', 'foo', 'table1'),
                            make_relation('dbt', 'foo', 'table3'))
        # dbt.bar.table3 references dbt.foo.table3
        # (create view dbt.bar.table5 as (select * from dbt.foo.table3...))
        self.cache.add_link(make_relation('dbt', 'foo', 'table3'),
                            make_relation('dbt', 'bar', 'table3'))

        # dbt.foo.table4 also references dbt.foo.table1
        self.cache.add_link(make_relation('dbt', 'foo', 'table1'),
                            make_relation('dbt', 'foo', 'table4'))

        # and dbt_2.foo.table1 references dbt.foo.table1
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table1'),
            make_relation('dbt_2', 'foo', 'table1'),
        )

    def test_get_relations(self):
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 3)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 2)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 2)
        self.assertEqual(len(self.cache.relations), 7)

    def test_drop_one(self):
        # dropping dbt.bar.table2 should only drop itself
        self.cache.drop(make_relation('dbt', 'bar', 'table2'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 3)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 1)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 2)
        self.assertEqual(len(self.cache.relations), 6)

    def test_drop_many(self):
        # dropping dbt.foo.table1 should drop everything but dbt.bar.table2 and
        # dbt_2.foo.table2
        self.cache.drop(make_relation('dbt', 'foo', 'table1'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 0)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 1)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 1)
        self.assertEqual(len(self.cache.relations), 2)

    def test_rename_root(self):
        self.cache.rename(make_relation('dbt', 'foo', 'table1'),
                          make_relation('dbt', 'bar', 'table1'))
        retrieved = self.cache.relations[('dbt', 'bar', 'table1')].inner
        self.assertEqual(retrieved.schema, 'bar')
        self.assertEqual(retrieved.identifier, 'table1')
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 2)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 3)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 2)
        self.assertEqual(len(self.cache.relations), 7)

        # make sure drops still cascade from the renamed table
        self.cache.drop(make_relation('dbt', 'bar', 'table1'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 0)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 1)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 1)
        self.assertEqual(len(self.cache.relations), 2)

    def test_rename_branch(self):
        self.cache.rename(make_relation('dbt', 'foo', 'table3'),
                          make_relation('dbt', 'foo', 'table2'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 3)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 2)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 2)

        # make sure drops still cascade through the renamed table
        self.cache.drop(make_relation('dbt', 'foo', 'table1'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 0)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 1)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 1)
        self.assertEqual(len(self.cache.relations), 2)
Ejemplo n.º 9
0
class BaseAdapter(object):
    """The BaseAdapter provides an abstract base class for adapters.

    Adapters must implement the following methods and macros. Some of the
    methods can be safely overridden as a noop, where it makes sense
    (transactions on databases that don't support them, for instance). Those
    methods are marked with a (passable) in their docstrings. Check docstrings
    for type information, etc.

    To implement a macro, implement "${adapter_type}__${macro_name}". in the
    adapter's internal project.

    Methods:
        - exception_handler
        - date_function
        - list_schemas
        - drop_relation
        - truncate_relation
        - rename_relation
        - get_columns_in_relation
        - expand_column_types
        - list_relations_without_caching
        - is_cancelable
        - create_schema
        - drop_schema
        - quote
        - convert_text_type
        - convert_number_type
        - convert_boolean_type
        - convert_datetime_type
        - convert_date_type
        - convert_time_type

    Macros:
        - get_catalog
    """
    requires = {}

    Relation = BaseRelation
    Column = Column
    # This should be an implementation of BaseConnectionManager
    ConnectionManager = None

    # A set of clobber config fields accepted by this adapter
    # for use in materializations
    AdapterSpecificConfigs = frozenset()

    def __init__(self, config):
        self.config = config
        self.cache = RelationsCache()
        self.connections = self.ConnectionManager(config)
        self._internal_manifest_lazy = None

    ###
    # Methods that pass through to the connection manager
    ###
    def acquire_connection(self, name):
        return self.connections.get(name)

    def release_connection(self, name):
        return self.connections.release(name)

    def cleanup_connections(self):
        return self.connections.cleanup_all()

    def clear_transaction(self, conn_name='master'):
        return self.connections.clear_transaction(conn_name)

    def commit_if_has_connection(self, name):
        return self.connections.commit_if_has_connection(name)

    @available
    def execute(self, sql, model_name=None, auto_begin=False, fetch=False):
        """Execute the given SQL. This is a thin wrapper around
        ConnectionManager.execute.

        :param str sql: The sql to execute.
        :param Optional[str] model_name: The model name to use for the
            connection.
        :param bool auto_begin: If set, and dbt is not currently inside a
            transaction, automatically begin one.
        :param bool fetch: If set, fetch results.
        :return: A tuple of the status and the results (empty if fetch=False).
        :rtype: Tuple[str, agate.Table]
        """
        return self.connections.execute(
            sql=sql,
            name=model_name,
            auto_begin=auto_begin,
            fetch=fetch
        )

    ###
    # Methods that should never be overridden
    ###
    @classmethod
    def type(cls):
        """Get the type of this adapter. Types must be class-unique and
        consistent.

        :return: The type name
        :rtype: str
        """
        return cls.ConnectionManager.TYPE

    @property
    def _internal_manifest(self):
        if self._internal_manifest_lazy is None:
            manifest = GraphLoader.load_internal(self.config)
            self._internal_manifest_lazy = manifest
        return self._internal_manifest_lazy

    def check_internal_manifest(self):
        """Return the internal manifest (used for executing macros) if it's
        been initialized, otherwise return None.
        """
        return self._internal_manifest_lazy

    ###
    # Caching methods
    ###
    def _schema_is_cached(self, database, schema, model_name=None):
        """Check if the schema is cached, and by default logs if it is not."""
        if dbt.flags.USE_CACHE is False:
            return False
        elif (database, schema) not in self.cache:
            logger.debug(
                'On "{}": cache miss for schema "{}.{}", this is inefficient'
                .format(model_name or '<None>', database, schema)
            )
            return False
        else:
            return True

    @classmethod
    def _relations_filter_table(cls, table, schemas):
        """Filter the table as appropriate for relations table entries.
        Subclasses can override this to change filtering rules on a per-adapter
        basis.
        """
        return table.where(_relations_filter_schemas(schemas))

    def _get_cache_schemas(self, manifest, exec_only=False):
        """Get a mapping of each node's "information_schema" relations to a
        set of all schemas expected in that information_schema.

        There may be keys that are technically duplicates on the database side,
        for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as
        databases, and values could overlap as appropriate. All values are
        lowercase strings.
        """
        info_schema_name_map = SchemaSearchMap()
        for node in manifest.nodes.values():
            if exec_only and node.resource_type not in NodeType.executable():
                continue
            relation = self.Relation.create_from(self.config, node)
            info_schema_name_map.add(relation)
        # result is a map whose keys are information_schema Relations without
        # identifiers that have appropriate database prefixes, and whose values
        # are sets of lowercase schema names that are valid members of those
        # schemas
        return info_schema_name_map

    def _relations_cache_for_schemas(self, manifest):
        """Populate the relations cache for the given schemas. Returns an
        iteratble of the schemas populated, as strings.
        """
        if not dbt.flags.USE_CACHE:
            return

        info_schema_name_map = self._get_cache_schemas(manifest,
                                                       exec_only=True)
        for db, schema in info_schema_name_map.search():
            for relation in self.list_relations_without_caching(db, schema):
                self.cache.add(relation)

        # it's possible that there were no relations in some schemas. We want
        # to insert the schemas we query into the cache's `.schemas` attribute
        # so we can check it later
        self.cache.update_schemas(info_schema_name_map.schemas_searched())

    def set_relations_cache(self, manifest, clear=False):
        """Run a query that gets a populated cache of the relations in the
        database and set the cache on this adapter.
        """
        if not dbt.flags.USE_CACHE:
            return

        with self.cache.lock:
            if clear:
                self.cache.clear()
            self._relations_cache_for_schemas(manifest)

    def cache_new_relation(self, relation, model_name=None):
        """Cache a new relation in dbt. It will show up in `list relations`."""
        if relation is None:
            dbt.exceptions.raise_compiler_error(
                'Attempted to cache a null relation for {}'.format(model_name)
            )
        if dbt.flags.USE_CACHE:
            self.cache.add(relation)
        # so jinja doesn't render things
        return ''

    ###
    # Abstract methods for database-specific values, attributes, and types
    ###
    @abstractclassmethod
    def date_function(cls):
        """Get the date function used by this adapter's database.

        :return: The date function
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`date_function` is not implemented for this adapter!')

    @abstractclassmethod
    def is_cancelable(cls):
        raise dbt.exceptions.NotImplementedException(
            '`is_cancelable` is not implemented for this adapter!'
        )

    ###
    # Abstract methods about schemas
    ###
    @abc.abstractmethod
    def list_schemas(self, database, model_name=None):
        """Get a list of existing schemas.

        :param str database: The name of the database to list under.
        :param Optional[str] model_name: The name of the connection to query as
        :return: All schemas that currently exist in the database
        :rtype: List[str]
        """
        raise dbt.exceptions.NotImplementedException(
            '`list_schemas` is not implemented for this adapter!'
        )

    def check_schema_exists(self, database, schema, model_name=None):
        """Check if a schema exists.

        The default implementation of this is potentially unnecessarily slow,
        and adapters should implement it if there is an optimized path (and
        there probably is)
        """
        search = (
            s.lower() for s in
            self.list_schemas(database=database, model_name=model_name)
        )
        return schema.lower() in search

    ###
    # Abstract methods about relations
    ###
    @abc.abstractmethod
    @available
    def drop_relation(self, relation, model_name=None):
        """Drop the given relation.

        *Implementors must call self.cache.drop() to preserve cache state!*

        :param self.Relation relation: The relation to drop
        :param Optional[str] model_name: The name of the model to use for the
            connection.
        """
        raise dbt.exceptions.NotImplementedException(
            '`drop_relation` is not implemented for this adapter!'
        )

    @abc.abstractmethod
    @available
    def truncate_relation(self, relation, model_name=None):
        """Truncate the given relation.

        :param self.Relation relation: The relation to truncate
        :param Optional[str] model_name: The name of the model to use for the
            connection."""
        raise dbt.exceptions.NotImplementedException(
            '`truncate_relation` is not implemented for this adapter!'
        )

    @abc.abstractmethod
    @available
    def rename_relation(self, from_relation, to_relation, model_name=None):
        """Rename the relation from from_relation to to_relation.

        Implementors must call self.cache.rename() to preserve cache state.

        :param self.Relation from_relation: The original relation name
        :param self.Relation to_relation: The new relation name
        :param Optional[str] model_name: The name of the model to use for the
            connection.
        """
        raise dbt.exceptions.NotImplementedException(
            '`rename_relation` is not implemented for this adapter!'
        )

    @abc.abstractmethod
    @available
    def get_columns_in_relation(self, relation, model_name=None):
        """Get a list of the columns in the given Relation.

        :param self.Relation relation: The relation to query for.
        :param Optional[str] model_name: The name of the model to use for the
            connection.
        :return: Information about all columns in the given relation.
        :rtype: List[self.Column]
        """
        raise dbt.exceptions.NotImplementedException(
            '`get_columns_in_relation` is not implemented for this adapter!'
        )

    @available_deprecated('get_columns_in_relation')
    def get_columns_in_table(self, schema, identifier, model_name=None):
        """DEPRECATED: Get a list of the columns in the given table."""
        relation = self.Relation.create(
            database=self.config.credentials.database,
            schema=schema,
            identifier=identifier,
            quote_policy=self.config.quoting
        )
        return self.get_columns_in_relation(relation, model_name=model_name)

    @abc.abstractmethod
    def expand_column_types(self, goal, current, model_name=None):
        """Expand the current table's types to match the goal table. (passable)

        :param self.Relation goal: A relation that currently exists in the
            database with columns of the desired types.
        :param self.Relation current: A relation that currently exists in the
            database with columns of unspecified types.
        :param Optional[str] model_name: The name of the model to use for the
            connection.
        """
        raise dbt.exceptions.NotImplementedException(
            '`expand_target_column_types` is not implemented for this adapter!'
        )

    @abc.abstractmethod
    def list_relations_without_caching(self, information_schema, schema,
                                       model_name=None):
        """List relations in the given schema, bypassing the cache.

        This is used as the underlying behavior to fill the cache.

        :param Relation information_schema: The information schema to list
            relations from.
        :param str schema: The name of the schema to list relations from.
        :param Optional[str] model_name: The name of the model to use for the
            connection.
        :return: The relations in schema
        :retype: List[self.Relation]
        """
        raise dbt.exceptions.NotImplementedException(
            '`list_relations_without_caching` is not implemented for this '
            'adapter!'
        )

    ###
    # Provided methods about relations
    ###
    @available
    def get_missing_columns(self, from_relation, to_relation, model_name=None):
        """Returns dict of {column:type} for columns in from_table that are
        missing from to_relation
        """
        if not isinstance(from_relation, self.Relation):
            dbt.exceptions.invalid_type_error(
                method_name='get_missing_columns',
                arg_name='from_relation',
                got_value=from_relation,
                expected_type=self.Relation)

        if not isinstance(to_relation, self.Relation):
            dbt.exceptions.invalid_type_error(
                method_name='get_missing_columns',
                arg_name='to_relation',
                got_value=to_relation,
                expected_type=self.Relation)

        from_columns = {
            col.name: col for col in
            self.get_columns_in_relation(from_relation, model_name=model_name)
        }

        to_columns = {
            col.name: col for col in
            self.get_columns_in_relation(to_relation, model_name=model_name)
        }

        missing_columns = set(from_columns.keys()) - set(to_columns.keys())

        return [
            col for (col_name, col) in from_columns.items()
            if col_name in missing_columns
        ]

    @available
    def expand_target_column_types(self, temp_table, to_relation,
                                   model_name=None):
        if not isinstance(to_relation, self.Relation):
            dbt.exceptions.invalid_type_error(
                method_name='expand_target_column_types',
                arg_name='to_relation',
                got_value=to_relation,
                expected_type=self.Relation)

        goal = self.Relation.create(
            database=None,
            schema=None,
            identifier=temp_table,
            type='table',
            quote_policy=self.config.quoting
        )
        self.expand_column_types(goal, to_relation, model_name)

    def list_relations(self, database, schema, model_name=None):
        if self._schema_is_cached(database, schema, model_name):
            return self.cache.get_relations(database, schema)

        information_schema = self.Relation.create(
            database=database,
            schema=schema,
            model_name='').information_schema()

        # we can't build the relations cache because we don't have a
        # manifest so we can't run any operations.
        relations = self.list_relations_without_caching(
            information_schema, schema, model_name=model_name
        )

        logger.debug('with schema={}, model_name={}, relations={}'
                     .format(schema, model_name, relations))
        return relations

    def _make_match_kwargs(self, database, schema, identifier):
        quoting = self.config.quoting
        if identifier is not None and quoting['identifier'] is False:
            identifier = identifier.lower()

        if schema is not None and quoting['schema'] is False:
            schema = schema.lower()

        if database is not None and quoting['schema'] is False:
            database = database.lower()

        return filter_null_values({
            'database': database,
            'identifier': identifier,
            'schema': schema,
        })

    def _make_match(self, relations_list, database, schema, identifier):

        matches = []

        search = self._make_match_kwargs(database, schema, identifier)

        for relation in relations_list:
            if relation.matches(**search):
                matches.append(relation)

        return matches

    @available
    def get_relation(self, database, schema, identifier, model_name=None):
        relations_list = self.list_relations(database, schema, model_name)

        matches = self._make_match(relations_list, database, schema,
                                   identifier)

        if len(matches) > 1:
            kwargs = {
                'identifier': identifier,
                'schema': schema,
                'database': database,
            }
            dbt.exceptions.get_relation_returned_multiple_results(
                kwargs, matches
            )

        elif matches:
            return matches[0]

        return None

    @available_deprecated('get_relation')
    def already_exists(self, schema, name, model_name=None):
        """DEPRECATED: Return if a model already exists in the database"""
        database = self.config.credentials.database
        relation = self.get_relation(database, schema, name,
                                     model_name=model_name)
        return relation is not None

    ###
    # ODBC FUNCTIONS -- these should not need to change for every adapter,
    #                   although some adapters may override them
    ###
    @abc.abstractmethod
    @available
    def create_schema(self, database, schema, model_name=None):
        """Create the given schema if it does not exist.

        :param str schema: The schema name to create.
        :param Optional[str] model_name: The name of the model to use for the
            connection.
        """
        raise dbt.exceptions.NotImplementedException(
            '`create_schema` is not implemented for this adapter!'
        )

    @abc.abstractmethod
    def drop_schema(self, database, schema, model_name=None):
        """Drop the given schema (and everything in it) if it exists.

        :param str schema: The schema name to drop.
        :param Optional[str] model_name: The name of the model to use for the
            connection.
        """
        raise dbt.exceptions.NotImplementedException(
            '`drop_schema` is not implemented for this adapter!'
        )

    @available_raw
    @abstractclassmethod
    def quote(cls, identifier):
        """Quote the given identifier, as appropriate for the database.

        :param str identifier: The identifier to quote
        :return: The quoted identifier
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`quote` is not implemented for this adapter!'
        )

    @available
    def quote_as_configured(self, identifier, quote_key, model_name=None):
        """Quote or do not quote the given identifer as configured in the
        project config for the quote key.

        The quote key should be one of 'database' (on bigquery, 'profile'),
        'identifier', or 'schema', or it will be treated as if you set `True`.
        """
        default = self.Relation.DEFAULTS['quote_policy'].get(quote_key)
        if self.config.quoting.get(quote_key, default):
            return self.quote(identifier)
        else:
            return identifier

    ###
    # Conversions: These must be implemented by concrete implementations, for
    # converting agate types into their sql equivalents.
    ###
    @abstractclassmethod
    def convert_text_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.Text
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_text_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_number_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.Number
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_number_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_boolean_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.Boolean
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_boolean_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_datetime_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.DateTime
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_datetime_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_date_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.Date
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_date_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_time_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the
        agate.TimeDelta type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_time_type` is not implemented for this adapter!')

    @available_raw
    @classmethod
    def convert_type(cls, agate_table, col_idx):
        return cls.convert_agate_type(agate_table, col_idx)

    @classmethod
    def convert_agate_type(cls, agate_table, col_idx):
        agate_type = agate_table.column_types[col_idx]
        conversions = [
            (agate.Text, cls.convert_text_type),
            (agate.Number, cls.convert_number_type),
            (agate.Boolean, cls.convert_boolean_type),
            (agate.DateTime, cls.convert_datetime_type),
            (agate.Date, cls.convert_date_type),
            (agate.TimeDelta, cls.convert_time_type),
        ]
        for agate_cls, func in conversions:
            if isinstance(agate_type, agate_cls):
                return func(agate_table, col_idx)

    ###
    # Operations involving the manifest
    ###
    def execute_macro(self, macro_name, manifest=None, project=None,
                      context_override=None, kwargs=None, release=False,
                      connection_name=None):
        """Look macro_name up in the manifest and execute its results.

        :param str macro_name: The name of the macro to execute.
        :param Optional[Manifest] manifest: The manifest to use for generating
            the base macro execution context. If none is provided, use the
            internal manifest.
        :param Optional[str] project: The name of the project to search in, or
            None for the first match.
        :param Optional[dict] context_override: An optional dict to update()
            the macro execution context.
        :param Optional[dict] kwargs: An optional dict of keyword args used to
            pass to the macro.
        :param bool release: If True, release the connection after executing.
        :param Optional[str] connection_name: The connection name to use, or
            use the macro name.

        Return an an AttrDict with three attributes: 'table', 'data', and
            'status'. 'table' is an agate.Table.
        """
        if kwargs is None:
            kwargs = {}
        if context_override is None:
            context_override = {}
        if connection_name is None:
            connection_name = macro_name

        if manifest is None:
            manifest = self._internal_manifest

        macro = manifest.find_macro_by_name(macro_name, project)
        if macro is None:
            if project is None:
                package_name = 'any package'
            else:
                package_name = 'the "{}" package'.format(project)

            # The import of dbt.context.runtime below shadows 'dbt'
            import dbt.exceptions
            raise dbt.exceptions.RuntimeException(
                'dbt could not find a macro with the name "{}" in {}'
                .format(macro_name, package_name)
            )

        # This causes a reference cycle, as dbt.context.runtime.generate()
        # ends up calling get_adapter, so the import has to be here.
        import dbt.context.runtime
        macro_context = dbt.context.runtime.generate_macro(
            macro,
            self.config,
            manifest,
            connection_name
        )
        macro_context.update(context_override)

        macro_function = macro.generator(macro_context)

        try:
            result = macro_function(**kwargs)
        finally:
            if release:
                self.release_connection(connection_name)
        return result

    @classmethod
    def _catalog_filter_table(cls, table, manifest):
        """Filter the table as appropriate for catalog entries. Subclasses can
        override this to change filtering rules on a per-adapter basis.
        """
        return table.where(_catalog_filter_schemas(manifest))

    def get_catalog(self, manifest):
        """Get the catalog for this manifest by running the get catalog macro.
        Returns an agate.Table of catalog information.
        """
        information_schemas = list(self._get_cache_schemas(manifest).keys())
        # make it a list so macros can index into it.
        kwargs = {'information_schemas': information_schemas}
        table = self.execute_macro(GET_CATALOG_MACRO_NAME,
                                   kwargs=kwargs,
                                   release=True)

        results = self._catalog_filter_table(table, manifest)
        return results

    def cancel_open_connections(self):
        """Cancel all open connections."""
        return self.connections.cancel_open()

    def calculate_freshness(self, source, loaded_at_field, manifest=None,
                            connection_name=None):
        """Calculate the freshness of sources in dbt, and return it"""
        # in the future `source` will be a Relation instead of a string
        kwargs = {
            'source': source,
            'loaded_at_field': loaded_at_field
        }

        # run the macro
        table = self.execute_macro(
            FRESHNESS_MACRO_NAME,
            kwargs=kwargs,
            release=True,
            manifest=manifest,
            connection_name=connection_name
        )
        # now we have a 1-row table of the maximum `loaded_at_field` value and
        # the current time according to the db.
        if len(table) != 1 or len(table[0]) != 2:
            dbt.exceptions.raise_compiler_error(
                'Got an invalid result from "{}" macro: {}'.format(
                    FRESHNESS_MACRO_NAME, [tuple(r) for r in table]
                )
            )

        max_loaded_at = _utc(table[0][0], source, loaded_at_field)
        snapshotted_at = _utc(table[0][1], source, loaded_at_field)

        age = (snapshotted_at - max_loaded_at).total_seconds()
        return {
            'max_loaded_at': max_loaded_at,
            'snapshotted_at': snapshotted_at,
            'age': age,
        }
Ejemplo n.º 10
0
class TestLikeDbt(TestCase):
    def setUp(self):
        self.cache = RelationsCache()
        self._sleep = True

        # add a bunch of cache entries
        for ident in 'abcdef':
            self.cache.add(make_relation('dbt', 'schema', ident))
        # 'b' references 'a'
        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'b'))
        # and 'c' references 'b'
        self.cache.add_link(make_relation('dbt', 'schema', 'b'),
                            make_relation('dbt', 'schema', 'c'))
        # and 'd' references 'b'
        self.cache.add_link(make_relation('dbt', 'schema', 'b'),
                            make_relation('dbt', 'schema', 'd'))
        # and 'e' references 'a'
        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'e'))
        # and 'f' references 'd'
        self.cache.add_link(make_relation('dbt', 'schema', 'd'),
                            make_relation('dbt', 'schema', 'f'))
        # so drop propagation goes (a -> (b -> (c (d -> f))) e)

    def assert_has_relations(self, expected):
        current = set(r.identifier
                      for r in self.cache.get_relations('dbt', 'schema'))
        self.assertEqual(current, expected)

    def test_drop_inner(self):
        self.assert_has_relations(set('abcdef'))
        self.cache.drop(make_relation('dbt', 'schema', 'b'))
        self.assert_has_relations({'a', 'e'})

    def test_rename_and_drop(self):
        self.assert_has_relations(set('abcdef'))
        # drop the backup/tmp
        self.cache.drop(make_relation('dbt', 'schema', 'b__backup'))
        self.cache.drop(make_relation('dbt', 'schema', 'b__tmp'))
        self.assert_has_relations(set('abcdef'))
        # create a new b__tmp
        self.cache.add(make_relation(
            'dbt',
            'schema',
            'b__tmp',
        ))
        self.assert_has_relations(set('abcdef') | {'b__tmp'})
        # rename b -> b__backup
        self.cache.rename(make_relation('dbt', 'schema', 'b'),
                          make_relation('dbt', 'schema', 'b__backup'))
        self.assert_has_relations(set('acdef') | {'b__tmp', 'b__backup'})
        # rename temp to b
        self.cache.rename(make_relation('dbt', 'schema', 'b__tmp'),
                          make_relation('dbt', 'schema', 'b'))
        self.assert_has_relations(set('abcdef') | {'b__backup'})

        # drop backup, everything that used to depend on b should be gone, but
        # b itself should still exist
        self.cache.drop(make_relation('dbt', 'schema', 'b__backup'))
        self.assert_has_relations(set('abe'))
        relation = self.cache.relations[('dbt', 'schema', 'a')]
        self.assertEqual(len(relation.referenced_by), 1)

    def _rand_sleep(self):
        if not self._sleep:
            return
        time.sleep(random.random() * 0.1)

    def _target(self, ident):
        self._rand_sleep()
        self.cache.rename(make_relation('dbt', 'schema', ident),
                          make_relation('dbt', 'schema', ident + '__backup'))
        self._rand_sleep()
        self.cache.add(make_relation('dbt', 'schema', ident + '__tmp'))
        self._rand_sleep()
        self.cache.rename(make_relation('dbt', 'schema', ident + '__tmp'),
                          make_relation('dbt', 'schema', ident))
        self._rand_sleep()
        self.cache.drop(make_relation('dbt', 'schema', ident + '__backup'))
        return ident, self.cache.get_relations('dbt', 'schema')

    def test_threaded(self):
        # add three more short subchains for threads to test on
        for ident in 'ghijklmno':
            obj = make_mock_relationship('test_db', 'schema', ident)
            self.cache.add(make_relation('dbt', 'schema', ident))

        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'g'))
        self.cache.add_link(make_relation('dbt', 'schema', 'g'),
                            make_relation('dbt', 'schema', 'h'))
        self.cache.add_link(make_relation('dbt', 'schema', 'h'),
                            make_relation('dbt', 'schema', 'i'))

        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'j'))
        self.cache.add_link(make_relation('dbt', 'schema', 'j'),
                            make_relation('dbt', 'schema', 'k'))
        self.cache.add_link(make_relation('dbt', 'schema', 'k'),
                            make_relation('dbt', 'schema', 'l'))

        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'm'))
        self.cache.add_link(make_relation('dbt', 'schema', 'm'),
                            make_relation('dbt', 'schema', 'n'))
        self.cache.add_link(make_relation('dbt', 'schema', 'n'),
                            make_relation('dbt', 'schema', 'o'))

        pool = ThreadPool(4)
        results = list(pool.imap_unordered(self._target, ('b', 'g', 'j', 'm')))
        pool.close()
        pool.join()
        # at a minimum, we expect each table to "see" itself, its parent ('a'),
        # and the unrelated table ('a')
        min_expect = {
            'b': {'a', 'b', 'e'},
            'g': {'a', 'g', 'e'},
            'j': {'a', 'j', 'e'},
            'm': {'a', 'm', 'e'},
        }

        for ident, relations in results:
            seen = set(r.identifier for r in relations)
            self.assertTrue(min_expect[ident].issubset(seen))

        self.assert_has_relations(set('abgjme'))

    def test_threaded_repeated(self):
        for _ in range(10):
            self.setUp()
            self._sleep = False
            self.test_threaded()
Ejemplo n.º 11
0
class TestLikeDbt(TestCase):
    def setUp(self):
        self.cache = RelationsCache()
        self._sleep = True

        # add a bunch of cache entries
        for ident in 'abcdef':
            self.cache.add(make_relation('dbt', 'schema', ident))
        # 'b' references 'a'
        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'b'))
        # and 'c' references 'b'
        self.cache.add_link(make_relation('dbt', 'schema', 'b'),
                            make_relation('dbt', 'schema', 'c'))
        # and 'd' references 'b'
        self.cache.add_link(make_relation('dbt', 'schema', 'b'),
                            make_relation('dbt', 'schema', 'd'))
        # and 'e' references 'a'
        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'e'))
        # and 'f' references 'd'
        self.cache.add_link(make_relation('dbt', 'schema', 'd'),
                            make_relation('dbt', 'schema', 'f'))
        # so drop propagation goes (a -> (b -> (c (d -> f))) e)

    def assert_has_relations(self, expected):
        current = set(r.identifier for r in self.cache.get_relations('dbt', 'schema'))
        self.assertEqual(current, expected)

    def test_drop_inner(self):
        self.assert_has_relations(set('abcdef'))
        self.cache.drop(make_relation('dbt', 'schema', 'b'))
        self.assert_has_relations({'a', 'e'})

    def test_rename_and_drop(self):
        self.assert_has_relations(set('abcdef'))
        # drop the backup/tmp
        self.cache.drop(make_relation('dbt', 'schema', 'b__backup'))
        self.cache.drop(make_relation('dbt', 'schema', 'b__tmp'))
        self.assert_has_relations(set('abcdef'))
        # create a new b__tmp
        self.cache.add(make_relation('dbt', 'schema', 'b__tmp',))
        self.assert_has_relations(set('abcdef') | {'b__tmp'})
        # rename b -> b__backup
        self.cache.rename(make_relation('dbt', 'schema', 'b'),
                          make_relation('dbt', 'schema', 'b__backup'))
        self.assert_has_relations(set('acdef') | {'b__tmp', 'b__backup'})
        # rename temp to b
        self.cache.rename(make_relation('dbt', 'schema', 'b__tmp'),
                          make_relation('dbt', 'schema', 'b'))
        self.assert_has_relations(set('abcdef') | {'b__backup'})

        # drop backup, everything that used to depend on b should be gone, but
        # b itself should still exist
        self.cache.drop(make_relation('dbt', 'schema', 'b__backup'))
        self.assert_has_relations(set('abe'))
        relation = self.cache.relations[('dbt', 'schema', 'a')]
        self.assertEqual(len(relation.referenced_by), 1)

    def _rand_sleep(self):
        if not self._sleep:
            return
        time.sleep(random.random() * 0.1)

    def _target(self, ident):
        self._rand_sleep()
        self.cache.rename(make_relation('dbt', 'schema', ident),
                          make_relation('dbt', 'schema', ident+'__backup'))
        self._rand_sleep()
        self.cache.add(make_relation('dbt', 'schema', ident+'__tmp')
        )
        self._rand_sleep()
        self.cache.rename(make_relation('dbt', 'schema', ident+'__tmp'),
                          make_relation('dbt', 'schema', ident))
        self._rand_sleep()
        self.cache.drop(make_relation('dbt', 'schema', ident+'__backup'))
        return ident, self.cache.get_relations('dbt', 'schema')

    def test_threaded(self):
        # add three more short subchains for threads to test on
        for ident in 'ghijklmno':
            obj = make_mock_relationship('test_db', 'schema', ident)
            self.cache.add(make_relation('dbt', 'schema', ident))

        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'g'))
        self.cache.add_link(make_relation('dbt', 'schema', 'g'),
                            make_relation('dbt', 'schema', 'h'))
        self.cache.add_link(make_relation('dbt', 'schema', 'h'),
                            make_relation('dbt', 'schema', 'i'))

        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'j'))
        self.cache.add_link(make_relation('dbt', 'schema', 'j'),
                            make_relation('dbt', 'schema', 'k'))
        self.cache.add_link(make_relation('dbt', 'schema', 'k'),
                            make_relation('dbt', 'schema', 'l'))

        self.cache.add_link(make_relation('dbt', 'schema', 'a'),
                            make_relation('dbt', 'schema', 'm'))
        self.cache.add_link(make_relation('dbt', 'schema', 'm'),
                            make_relation('dbt', 'schema', 'n'))
        self.cache.add_link(make_relation('dbt', 'schema', 'n'),
                            make_relation('dbt', 'schema', 'o'))

        pool = ThreadPool(4)
        results = list(pool.imap_unordered(self._target, ('b', 'g', 'j', 'm')))
        pool.close()
        pool.join()
        # at a minimum, we expect each table to "see" itself, its parent ('a'),
        # and the unrelated table ('a')
        min_expect = {
            'b': {'a', 'b', 'e'},
            'g': {'a', 'g', 'e'},
            'j': {'a', 'j', 'e'},
            'm': {'a', 'm', 'e'},
        }

        for ident, relations in results:
            seen = set(r.identifier for r in relations)
            self.assertTrue(min_expect[ident].issubset(seen))

        self.assert_has_relations(set('abgjme'))

    def test_threaded_repeated(self):
        for _ in range(10):
            self.setUp()
            self._sleep = False
            self.test_threaded()
Ejemplo n.º 12
0
class TestComplexCache(TestCase):
    def setUp(self):
        self.cache = RelationsCache()
        inputs = [
            ('dbt', 'foo', 'table1'),
            ('dbt', 'foo', 'table3'),
            ('dbt', 'foo', 'table4'),
            ('dbt', 'bar', 'table2'),
            ('dbt', 'bar', 'table3'),
            ('dbt_2', 'foo', 'table1'),
            ('dbt_2', 'foo', 'table2'),
        ]
        self.inputs = [make_relation(d, s, i) for d, s, i in inputs]
        for relation in self.inputs:
            self.cache.add(relation)

        # dbt.foo.table3 references dbt.foo.table1
        # (create view dbt.foo.table3 as (select * from dbt.foo.table1...))
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table1'),
            make_relation('dbt', 'foo', 'table3')
        )
        # dbt.bar.table3 references dbt.foo.table3
        # (create view dbt.bar.table5 as (select * from dbt.foo.table3...))
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table3'),
            make_relation('dbt', 'bar', 'table3')
        )

        # dbt.foo.table4 also references dbt.foo.table1
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table1'),
            make_relation('dbt', 'foo', 'table4')
        )

        # and dbt_2.foo.table1 references dbt.foo.table1
        self.cache.add_link(
            make_relation('dbt', 'foo', 'table1'),
            make_relation('dbt_2', 'foo', 'table1'),
        )

    def test_get_relations(self):
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 3)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 2)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 2)
        self.assertEqual(len(self.cache.relations), 7)

    def test_drop_one(self):
        # dropping dbt.bar.table2 should only drop itself
        self.cache.drop(make_relation('dbt', 'bar', 'table2'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 3)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 1)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 2)
        self.assertEqual(len(self.cache.relations), 6)

    def test_drop_many(self):
        # dropping dbt.foo.table1 should drop everything but dbt.bar.table2 and
        # dbt_2.foo.table2
        self.cache.drop(make_relation('dbt', 'foo', 'table1'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 0)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 1)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 1)
        self.assertEqual(len(self.cache.relations), 2)

    def test_rename_root(self):
        self.cache.rename(make_relation('dbt', 'foo', 'table1'),
                          make_relation('dbt', 'bar', 'table1'))
        retrieved = self.cache.relations[('dbt', 'bar', 'table1')].inner
        self.assertEqual(retrieved.schema, 'bar')
        self.assertEqual(retrieved.identifier, 'table1')
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 2)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 3)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 2)
        self.assertEqual(len(self.cache.relations), 7)

        # make sure drops still cascade from the renamed table
        self.cache.drop(make_relation('dbt', 'bar', 'table1'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 0)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 1)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 1)
        self.assertEqual(len(self.cache.relations), 2)

    def test_rename_branch(self):
        self.cache.rename(make_relation('dbt', 'foo', 'table3'),
                          make_relation('dbt', 'foo', 'table2'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 3)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 2)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 2)

        # make sure drops still cascade through the renamed table
        self.cache.drop(make_relation('dbt', 'foo', 'table1'))
        self.assertEqual(len(self.cache.get_relations('dbt', 'foo')), 0)
        self.assertEqual(len(self.cache.get_relations('dbt', 'bar')), 1)
        self.assertEqual(len(self.cache.get_relations('dbt_2', 'foo')), 1)
        self.assertEqual(len(self.cache.relations), 2)
Ejemplo n.º 13
0
 def setUp(self):
     self.cache = RelationsCache()
Ejemplo n.º 14
0
class TestCache(TestCase):
    def setUp(self):
        self.cache = RelationsCache()

    def test_empty(self):
        self.assertEqual(len(self.cache.relations), 0)
        relations = self.cache.get_relations('test')
        self.assertEqual(len(relations), 0)

    def test_bad_drop(self):
        self.cache.drop(make_relation('foo', 'bar'))

    def test_bad_link(self):
        self.cache.add(make_relation('schema', 'foo'))
        # src does not exist
        with self.assertRaises(dbt.exceptions.InternalException):
            self.cache.add_link(make_relation('schema', 'bar'),
                                make_relation('schema', 'foo'))

        # dst does not exist
        with self.assertRaises(dbt.exceptions.InternalException):
            self.cache.add_link(make_relation('schema', 'foo'),
                                make_relation('schema', 'bar'))

    def test_bad_rename(self):
        # foo does not exist - should be ignored
        self.cache.rename(make_relation('schema', 'foo'),
                          make_relation('schema', 'bar'))

        self.cache.add(make_relation('schema', 'foo'))
        self.cache.add(make_relation('schema', 'bar'))
        # bar exists
        with self.assertRaises(dbt.exceptions.InternalException):
            self.cache.rename(make_relation('schema', 'foo'),
                              make_relation('schema', 'bar'))

    def test_get_relations(self):
        relation = make_relation('foo', 'bar')
        self.cache.add(relation)
        self.assertEqual(len(self.cache.relations), 1)

        relations = self.cache.get_relations('foo')
        self.assertEqual(len(relations), 1)
        self.assertIs(relations[0], relation)

        relations = self.cache.get_relations('FOO')
        self.assertEqual(len(relations), 1)
        self.assertIs(relations[0], relation)

    def test_add(self):
        rel = make_relation('foo', 'bar')
        self.cache.add(rel)

        relations = self.cache.get_relations('foo')
        self.assertEqual(len(relations), 1)
        self.assertIs(relations[0], rel)

        # add a new relation with same name
        self.cache.add(make_relation('foo', 'bar'))
        self.assertEqual(len(self.cache.relations), 1)
        self.assertEqual(self.cache.schemas, {'foo'})

        relations = self.cache.get_relations('foo')
        self.assertEqual(len(relations), 1)
        self.assertIs(relations[0], rel)

        self.cache.add(make_relation('FOO', 'baz'))
        self.assertEqual(len(self.cache.relations), 2)

        relations = self.cache.get_relations('foo')
        self.assertEqual(len(relations), 2)

        self.assertEqual(self.cache.schemas, {'foo'})
        self.assertIsNot(self.cache.relations[('foo', 'bar')].inner, None)
        self.assertIsNot(self.cache.relations[('foo', 'baz')].inner, None)

    def test_rename(self):
        self.cache.add(make_relation('foo', 'bar'))
        self.assertIsNot(self.cache.relations[('foo', 'bar')].inner, None)
        self.cache.rename(make_relation('foo', 'bar'),
                          make_relation('foo', 'baz'))

        relations = self.cache.get_relations('foo')
        self.assertEqual(len(relations), 1)
        self.assertEqual(relations[0].schema, 'foo')
        self.assertEqual(relations[0].identifier, 'baz')

        relation = self.cache.relations[('foo', 'baz')]
        self.assertEqual(relation.inner.schema, 'foo')
        self.assertEqual(relation.inner.identifier, 'baz')
        self.assertEqual(relation.schema, 'foo')
        self.assertEqual(relation.identifier, 'baz')

        with self.assertRaises(KeyError):
            self.cache.relations[('foo', 'bar')]
Ejemplo n.º 15
0
 def __init__(self, config):
     self.config = config
     self.cache = RelationsCache()
Ejemplo n.º 16
0
 def setUp(self):
     self.cache = RelationsCache()
Ejemplo n.º 17
0
class BaseAdapter(object):
    """The BaseAdapter provides an abstract base class for adapters.

    Adapters must implement the following methods and macros. Some of the
    methods can be safely overridden as a noop, where it makes sense
    (transactions on databases that don't support them, for instance). Those
    methods are marked with a (passable) in their docstrings. Check docstrings
    for type information, etc.

    To implement a macro, implement "${adapter_type}__${macro_name}". in the
    adapter's internal project.

    Methods:
        - exception_handler
        - date_function
        - list_schemas
        - drop_relation
        - truncate_relation
        - rename_relation
        - get_columns_in_relation
        - expand_column_types
        - list_relations_without_caching
        - is_cancelable
        - create_schema
        - drop_schema
        - quote
        - convert_text_type
        - convert_number_type
        - convert_boolean_type
        - convert_datetime_type
        - convert_date_type
        - convert_time_type

    Macros:
        - get_catalog
    """
    requires = {}

    Relation = BaseRelation
    Column = Column
    # This should be an implementation of BaseConnectionManager
    ConnectionManager = None

    # A set of clobber config fields accepted by this adapter
    # for use in materializations
    AdapterSpecificConfigs = frozenset()

    def __init__(self, config):
        self.config = config
        self.cache = RelationsCache()
        self.connections = self.ConnectionManager(config)
        self._internal_manifest_lazy = None

    ###
    # Methods that pass through to the connection manager
    ###
    def acquire_connection(self, name=None):
        return self.connections.set_connection_name(name)

    def release_connection(self):
        return self.connections.release()

    def cleanup_connections(self):
        return self.connections.cleanup_all()

    def clear_transaction(self):
        self.connections.clear_transaction()

    def commit_if_has_connection(self):
        return self.connections.commit_if_has_connection()

    def nice_connection_name(self):
        conn = self.connections.get_thread_connection()
        if conn is None or conn.name is None:
            return '<None>'
        return conn.name

    @contextmanager
    def connection_named(self, name):
        try:
            yield self.acquire_connection(name)
        finally:
            self.release_connection()

    @available.parse(lambda *a, **k: ('', dbt.clients.agate_helper()))
    def execute(self, sql, auto_begin=False, fetch=False):
        """Execute the given SQL. This is a thin wrapper around
        ConnectionManager.execute.

        :param str sql: The sql to execute.
        :param bool auto_begin: If set, and dbt is not currently inside a
            transaction, automatically begin one.
        :param bool fetch: If set, fetch results.
        :return: A tuple of the status and the results (empty if fetch=False).
        :rtype: Tuple[str, agate.Table]
        """
        return self.connections.execute(sql=sql,
                                        auto_begin=auto_begin,
                                        fetch=fetch)

    ###
    # Methods that should never be overridden
    ###
    @classmethod
    def type(cls):
        """Get the type of this adapter. Types must be class-unique and
        consistent.

        :return: The type name
        :rtype: str
        """
        return cls.ConnectionManager.TYPE

    @property
    def _internal_manifest(self):
        if self._internal_manifest_lazy is None:
            manifest = GraphLoader.load_internal(self.config)
            self._internal_manifest_lazy = manifest
        return self._internal_manifest_lazy

    def check_internal_manifest(self):
        """Return the internal manifest (used for executing macros) if it's
        been initialized, otherwise return None.
        """
        return self._internal_manifest_lazy

    ###
    # Caching methods
    ###
    def _schema_is_cached(self, database, schema):
        """Check if the schema is cached, and by default logs if it is not."""

        if dbt.flags.USE_CACHE is False:
            return False
        elif (database, schema) not in self.cache:
            logger.debug(
                'On "{}": cache miss for schema "{}.{}", this is inefficient'.
                format(self.nice_connection_name(), database, schema))
            return False
        else:
            return True

    @classmethod
    def _relations_filter_table(cls, table, schemas):
        """Filter the table as appropriate for relations table entries.
        Subclasses can override this to change filtering rules on a per-adapter
        basis.
        """
        return table.where(_relations_filter_schemas(schemas))

    def _get_cache_schemas(self, manifest, exec_only=False):
        """Get a mapping of each node's "information_schema" relations to a
        set of all schemas expected in that information_schema.

        There may be keys that are technically duplicates on the database side,
        for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as
        databases, and values could overlap as appropriate. All values are
        lowercase strings.
        """
        info_schema_name_map = SchemaSearchMap()
        for node in manifest.nodes.values():
            if exec_only and node.resource_type not in NodeType.executable():
                continue
            relation = self.Relation.create_from(self.config, node)
            info_schema_name_map.add(relation)
        # result is a map whose keys are information_schema Relations without
        # identifiers that have appropriate database prefixes, and whose values
        # are sets of lowercase schema names that are valid members of those
        # schemas
        return info_schema_name_map

    def _relations_cache_for_schemas(self, manifest):
        """Populate the relations cache for the given schemas. Returns an
        iteratble of the schemas populated, as strings.
        """
        if not dbt.flags.USE_CACHE:
            return

        info_schema_name_map = self._get_cache_schemas(manifest,
                                                       exec_only=True)
        for db, schema in info_schema_name_map.search():
            for relation in self.list_relations_without_caching(db, schema):
                self.cache.add(relation)

        # it's possible that there were no relations in some schemas. We want
        # to insert the schemas we query into the cache's `.schemas` attribute
        # so we can check it later
        self.cache.update_schemas(info_schema_name_map.schemas_searched())

    def set_relations_cache(self, manifest, clear=False):
        """Run a query that gets a populated cache of the relations in the
        database and set the cache on this adapter.
        """
        if not dbt.flags.USE_CACHE:
            return

        with self.cache.lock:
            if clear:
                self.cache.clear()
            self._relations_cache_for_schemas(manifest)

    def cache_new_relation(self, relation):
        """Cache a new relation in dbt. It will show up in `list relations`."""
        if relation is None:
            name = self.nice_connection_name()
            dbt.exceptions.raise_compiler_error(
                'Attempted to cache a null relation for {}'.format(name))
        if dbt.flags.USE_CACHE:
            self.cache.add(relation)
        # so jinja doesn't render things
        return ''

    ###
    # Abstract methods for database-specific values, attributes, and types
    ###
    @abstractclassmethod
    def date_function(cls):
        """Get the date function used by this adapter's database.

        :return: The date function
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`date_function` is not implemented for this adapter!')

    @abstractclassmethod
    def is_cancelable(cls):
        raise dbt.exceptions.NotImplementedException(
            '`is_cancelable` is not implemented for this adapter!')

    ###
    # Abstract methods about schemas
    ###
    @abc.abstractmethod
    def list_schemas(self, database):
        """Get a list of existing schemas.

        :param str database: The name of the database to list under.
        :return: All schemas that currently exist in the database
        :rtype: List[str]
        """
        raise dbt.exceptions.NotImplementedException(
            '`list_schemas` is not implemented for this adapter!')

    @available.parse(lambda *a, **k: False)
    def check_schema_exists(self, database, schema):
        """Check if a schema exists.

        The default implementation of this is potentially unnecessarily slow,
        and adapters should implement it if there is an optimized path (and
        there probably is)
        """
        search = (s.lower() for s in self.list_schemas(database=database))
        return schema.lower() in search

    ###
    # Abstract methods about relations
    ###
    @abc.abstractmethod
    @available.parse_none
    def drop_relation(self, relation):
        """Drop the given relation.

        *Implementors must call self.cache.drop() to preserve cache state!*

        :param self.Relation relation: The relation to drop
        """
        raise dbt.exceptions.NotImplementedException(
            '`drop_relation` is not implemented for this adapter!')

    @abc.abstractmethod
    @available.parse_none
    def truncate_relation(self, relation):
        """Truncate the given relation.

        :param self.Relation relation: The relation to truncate
        """
        raise dbt.exceptions.NotImplementedException(
            '`truncate_relation` is not implemented for this adapter!')

    @abc.abstractmethod
    @available.parse_none
    def rename_relation(self, from_relation, to_relation):
        """Rename the relation from from_relation to to_relation.

        Implementors must call self.cache.rename() to preserve cache state.

        :param self.Relation from_relation: The original relation name
        :param self.Relation to_relation: The new relation name
        """
        raise dbt.exceptions.NotImplementedException(
            '`rename_relation` is not implemented for this adapter!')

    @abc.abstractmethod
    @available.parse_list
    def get_columns_in_relation(self, relation):
        """Get a list of the columns in the given Relation.

        :param self.Relation relation: The relation to query for.
        :return: Information about all columns in the given relation.
        :rtype: List[self.Column]
        """
        raise dbt.exceptions.NotImplementedException(
            '`get_columns_in_relation` is not implemented for this adapter!')

    @available.deprecated('get_columns_in_relation', lambda *a, **k: [])
    def get_columns_in_table(self, schema, identifier):
        """DEPRECATED: Get a list of the columns in the given table."""
        relation = self.Relation.create(
            database=self.config.credentials.database,
            schema=schema,
            identifier=identifier,
            quote_policy=self.config.quoting)
        return self.get_columns_in_relation(relation)

    @abc.abstractmethod
    def expand_column_types(self, goal, current):
        """Expand the current table's types to match the goal table. (passable)

        :param self.Relation goal: A relation that currently exists in the
            database with columns of the desired types.
        :param self.Relation current: A relation that currently exists in the
            database with columns of unspecified types.
        """
        raise dbt.exceptions.NotImplementedException(
            '`expand_target_column_types` is not implemented for this adapter!'
        )

    @abc.abstractmethod
    def list_relations_without_caching(self, information_schema, schema):
        """List relations in the given schema, bypassing the cache.

        This is used as the underlying behavior to fill the cache.

        :param Relation information_schema: The information schema to list
            relations from.
        :param str schema: The name of the schema to list relations from.
        :return: The relations in schema
        :rtype: List[self.Relation]
        """
        raise dbt.exceptions.NotImplementedException(
            '`list_relations_without_caching` is not implemented for this '
            'adapter!')

    ###
    # Provided methods about relations
    ###
    @available.parse_list
    def get_missing_columns(self, from_relation, to_relation):
        """Returns a list of Columns in from_relation that are missing from
        to_relation.

        :param Relation from_relation: The relation that might have extra
            columns
        :param Relation to_relation: The realtion that might have columns
            missing
        :return: The columns in from_relation that are missing from to_relation
        :rtype: List[self.Relation]
        """
        if not isinstance(from_relation, self.Relation):
            dbt.exceptions.invalid_type_error(
                method_name='get_missing_columns',
                arg_name='from_relation',
                got_value=from_relation,
                expected_type=self.Relation)

        if not isinstance(to_relation, self.Relation):
            dbt.exceptions.invalid_type_error(
                method_name='get_missing_columns',
                arg_name='to_relation',
                got_value=to_relation,
                expected_type=self.Relation)

        from_columns = {
            col.name: col
            for col in self.get_columns_in_relation(from_relation)
        }

        to_columns = {
            col.name: col
            for col in self.get_columns_in_relation(to_relation)
        }

        missing_columns = set(from_columns.keys()) - set(to_columns.keys())

        return [
            col for (col_name, col) in from_columns.items()
            if col_name in missing_columns
        ]

    @available.parse_none
    def valid_snapshot_target(self, relation):
        """Ensure that the target relation is valid, by making sure it has the
        expected columns.

        :param Relation relation: The relation to check
        :raises dbt.exceptions.CompilationException: If the columns are
            incorrect.
        """
        if not isinstance(relation, self.Relation):
            dbt.exceptions.invalid_type_error(
                method_name='valid_snapshot_target',
                arg_name='relation',
                got_value=relation,
                expected_type=self.Relation)

        columns = self.get_columns_in_relation(relation)
        names = set(c.name.lower() for c in columns)
        expanded_keys = ('scd_id', 'valid_from', 'valid_to')
        extra = []
        missing = []
        for legacy in expanded_keys:
            desired = 'dbt_' + legacy
            if desired not in names:
                missing.append(desired)
                if legacy in names:
                    extra.append(legacy)

        if missing:
            if extra:
                msg = ('Snapshot target has ("{}") but not ("{}") - is it an '
                       'unmigrated previous version archive?'.format(
                           '", "'.join(extra), '", "'.join(missing)))
            else:
                msg = ('Snapshot target is not a snapshot table (missing "{}")'
                       .format('", "'.join(missing)))
            dbt.exceptions.raise_compiler_error(msg)

    @available.parse_none
    def expand_target_column_types(self, from_relation, to_relation):
        if not isinstance(from_relation, self.Relation):
            dbt.exceptions.invalid_type_error(
                method_name='expand_target_column_types',
                arg_name='from_relation',
                got_value=from_relation,
                expected_type=self.Relation)

        if not isinstance(to_relation, self.Relation):
            dbt.exceptions.invalid_type_error(
                method_name='expand_target_column_types',
                arg_name='to_relation',
                got_value=to_relation,
                expected_type=self.Relation)

        self.expand_column_types(from_relation, to_relation)

    def list_relations(self, database, schema):
        if self._schema_is_cached(database, schema):
            return self.cache.get_relations(database, schema)

        information_schema = self.Relation.create(
            database=database,
            schema=schema,
            model_name='',
            quote_policy=self.config.quoting).information_schema()

        # we can't build the relations cache because we don't have a
        # manifest so we can't run any operations.
        relations = self.list_relations_without_caching(
            information_schema, schema)

        logger.debug('with database={}, schema={}, relations={}'.format(
            database, schema, relations))
        return relations

    def _make_match_kwargs(self, database, schema, identifier):
        quoting = self.config.quoting
        if identifier is not None and quoting['identifier'] is False:
            identifier = identifier.lower()

        if schema is not None and quoting['schema'] is False:
            schema = schema.lower()

        if database is not None and quoting['database'] is False:
            database = database.lower()

        return filter_null_values({
            'database': database,
            'identifier': identifier,
            'schema': schema,
        })

    def _make_match(self, relations_list, database, schema, identifier):

        matches = []

        search = self._make_match_kwargs(database, schema, identifier)

        for relation in relations_list:
            if relation.matches(**search):
                matches.append(relation)

        return matches

    @available.parse_none
    def get_relation(self, database, schema, identifier):
        relations_list = self.list_relations(database, schema)

        matches = self._make_match(relations_list, database, schema,
                                   identifier)

        if len(matches) > 1:
            kwargs = {
                'identifier': identifier,
                'schema': schema,
                'database': database,
            }
            dbt.exceptions.get_relation_returned_multiple_results(
                kwargs, matches)

        elif matches:
            return matches[0]

        return None

    @available.deprecated('get_relation', lambda *a, **k: False)
    def already_exists(self, schema, name):
        """DEPRECATED: Return if a model already exists in the database"""
        database = self.config.credentials.database
        relation = self.get_relation(database, schema, name)
        return relation is not None

    ###
    # ODBC FUNCTIONS -- these should not need to change for every adapter,
    #                   although some adapters may override them
    ###
    @abc.abstractmethod
    @available.parse_none
    def create_schema(self, database, schema):
        """Create the given schema if it does not exist.

        :param str schema: The schema name to create.
        """
        raise dbt.exceptions.NotImplementedException(
            '`create_schema` is not implemented for this adapter!')

    @abc.abstractmethod
    def drop_schema(self, database, schema):
        """Drop the given schema (and everything in it) if it exists.

        :param str schema: The schema name to drop.
        """
        raise dbt.exceptions.NotImplementedException(
            '`drop_schema` is not implemented for this adapter!')

    @available
    @abstractclassmethod
    def quote(cls, identifier):
        """Quote the given identifier, as appropriate for the database.

        :param str identifier: The identifier to quote
        :return: The quoted identifier
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`quote` is not implemented for this adapter!')

    @available
    def quote_as_configured(self, identifier, quote_key):
        """Quote or do not quote the given identifer as configured in the
        project config for the quote key.

        The quote key should be one of 'database' (on bigquery, 'profile'),
        'identifier', or 'schema', or it will be treated as if you set `True`.
        """
        default = self.Relation.DEFAULTS['quote_policy'].get(quote_key)
        if self.config.quoting.get(quote_key, default):
            return self.quote(identifier)
        else:
            return identifier

    ###
    # Conversions: These must be implemented by concrete implementations, for
    # converting agate types into their sql equivalents.
    ###
    @abstractclassmethod
    def convert_text_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.Text
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_text_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_number_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.Number
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_number_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_boolean_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.Boolean
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_boolean_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_datetime_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.DateTime
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_datetime_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_date_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the agate.Date
        type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_date_type` is not implemented for this adapter!')

    @abstractclassmethod
    def convert_time_type(cls, agate_table, col_idx):
        """Return the type in the database that best maps to the
        agate.TimeDelta type for the given agate table and column index.

        :param agate.Table agate_table: The table
        :param int col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        :rtype: str
        """
        raise dbt.exceptions.NotImplementedException(
            '`convert_time_type` is not implemented for this adapter!')

    @available
    @classmethod
    def convert_type(cls, agate_table, col_idx):
        return cls.convert_agate_type(agate_table, col_idx)

    @classmethod
    def convert_agate_type(cls, agate_table, col_idx):
        agate_type = agate_table.column_types[col_idx]
        conversions = [
            (agate.Text, cls.convert_text_type),
            (agate.Number, cls.convert_number_type),
            (agate.Boolean, cls.convert_boolean_type),
            (agate.DateTime, cls.convert_datetime_type),
            (agate.Date, cls.convert_date_type),
            (agate.TimeDelta, cls.convert_time_type),
        ]
        for agate_cls, func in conversions:
            if isinstance(agate_type, agate_cls):
                return func(agate_table, col_idx)

    ###
    # Operations involving the manifest
    ###
    def execute_macro(self,
                      macro_name,
                      manifest=None,
                      project=None,
                      context_override=None,
                      kwargs=None,
                      release=False):
        """Look macro_name up in the manifest and execute its results.

        :param str macro_name: The name of the macro to execute.
        :param Optional[Manifest] manifest: The manifest to use for generating
            the base macro execution context. If none is provided, use the
            internal manifest.
        :param Optional[str] project: The name of the project to search in, or
            None for the first match.
        :param Optional[dict] context_override: An optional dict to update()
            the macro execution context.
        :param Optional[dict] kwargs: An optional dict of keyword args used to
            pass to the macro.
        :param bool release: If True, release the connection after executing.

        Return an an AttrDict with three attributes: 'table', 'data', and
            'status'. 'table' is an agate.Table.
        """
        if kwargs is None:
            kwargs = {}
        if context_override is None:
            context_override = {}

        if manifest is None:
            manifest = self._internal_manifest

        macro = manifest.find_macro_by_name(macro_name, project)
        if macro is None:
            if project is None:
                package_name = 'any package'
            else:
                package_name = 'the "{}" package'.format(project)

            # The import of dbt.context.runtime below shadows 'dbt'
            import dbt.exceptions
            raise dbt.exceptions.RuntimeException(
                'dbt could not find a macro with the name "{}" in {}'.format(
                    macro_name, package_name))
        # This causes a reference cycle, as dbt.context.runtime.generate()
        # ends up calling get_adapter, so the import has to be here.
        import dbt.context.operation
        macro_context = dbt.context.operation.generate(macro, self.config,
                                                       manifest)
        macro_context.update(context_override)

        macro_function = macro.generator(macro_context)

        try:
            result = macro_function(**kwargs)
        finally:
            if release:
                self.release_connection()
        return result

    @classmethod
    def _catalog_filter_table(cls, table, manifest):
        """Filter the table as appropriate for catalog entries. Subclasses can
        override this to change filtering rules on a per-adapter basis.
        """
        return table.where(_catalog_filter_schemas(manifest))

    def get_catalog(self, manifest):
        """Get the catalog for this manifest by running the get catalog macro.
        Returns an agate.Table of catalog information.
        """
        information_schemas = list(self._get_cache_schemas(manifest).keys())
        # make it a list so macros can index into it.
        kwargs = {'information_schemas': information_schemas}
        table = self.execute_macro(GET_CATALOG_MACRO_NAME,
                                   kwargs=kwargs,
                                   release=True)

        results = self._catalog_filter_table(table, manifest)
        return results

    def cancel_open_connections(self):
        """Cancel all open connections."""
        return self.connections.cancel_open()

    def calculate_freshness(self, source, loaded_at_field, manifest=None):
        """Calculate the freshness of sources in dbt, and return it"""
        # in the future `source` will be a Relation instead of a string
        kwargs = {'source': source, 'loaded_at_field': loaded_at_field}

        # run the macro
        table = self.execute_macro(FRESHNESS_MACRO_NAME,
                                   kwargs=kwargs,
                                   release=True,
                                   manifest=manifest)
        # now we have a 1-row table of the maximum `loaded_at_field` value and
        # the current time according to the db.
        if len(table) != 1 or len(table[0]) != 2:
            dbt.exceptions.raise_compiler_error(
                'Got an invalid result from "{}" macro: {}'.format(
                    FRESHNESS_MACRO_NAME, [tuple(r) for r in table]))

        max_loaded_at = _utc(table[0][0], source, loaded_at_field)
        snapshotted_at = _utc(table[0][1], source, loaded_at_field)

        age = (snapshotted_at - max_loaded_at).total_seconds()
        return {
            'max_loaded_at': max_loaded_at,
            'snapshotted_at': snapshotted_at,
            'age': age,
        }
Ejemplo n.º 18
0
class BaseAdapter(metaclass=AdapterMeta):
    """The BaseAdapter provides an abstract base class for adapters.

    Adapters must implement the following methods and macros. Some of the
    methods can be safely overridden as a noop, where it makes sense
    (transactions on databases that don't support them, for instance). Those
    methods are marked with a (passable) in their docstrings. Check docstrings
    for type information, etc.

    To implement a macro, implement "${adapter_type}__${macro_name}". in the
    adapter's internal project.

    Methods:
        - exception_handler
        - date_function
        - list_schemas
        - drop_relation
        - truncate_relation
        - rename_relation
        - get_columns_in_relation
        - expand_column_types
        - list_relations_without_caching
        - is_cancelable
        - create_schema
        - drop_schema
        - quote
        - convert_text_type
        - convert_number_type
        - convert_boolean_type
        - convert_datetime_type
        - convert_date_type
        - convert_time_type

    Macros:
        - get_catalog
    """
    Relation: Type[BaseRelation] = BaseRelation
    Column: Type[BaseColumn] = BaseColumn
    ConnectionManager: Type[BaseConnectionManager]

    # A set of clobber config fields accepted by this adapter
    # for use in materializations
    AdapterSpecificConfigs: Type[AdapterConfig] = AdapterConfig

    def __init__(self, config):
        self.config = config
        self.cache = RelationsCache()
        self.connections = self.ConnectionManager(config)
        self._internal_manifest_lazy: Optional[Manifest] = None

    ###
    # Methods that pass through to the connection manager
    ###
    def acquire_connection(self, name=None) -> Connection:
        return self.connections.set_connection_name(name)

    def release_connection(self) -> None:
        self.connections.release()

    def cleanup_connections(self) -> None:
        self.connections.cleanup_all()

    def clear_transaction(self) -> None:
        self.connections.clear_transaction()

    def commit_if_has_connection(self) -> None:
        self.connections.commit_if_has_connection()

    def nice_connection_name(self) -> str:
        conn = self.connections.get_if_exists()
        if conn is None or conn.name is None:
            return '<None>'
        return conn.name

    @contextmanager
    def connection_named(
            self,
            name: str,
            node: Optional[CompileResultNode] = None) -> Iterator[None]:
        try:
            if self.connections.query_header is not None:
                self.connections.query_header.set(name, node)
            self.acquire_connection(name)
            yield
        finally:
            self.release_connection()
            if self.connections.query_header is not None:
                self.connections.query_header.reset()

    @contextmanager
    def connection_for(self, node: CompileResultNode) -> Iterator[None]:
        with self.connection_named(node.unique_id, node):
            yield

    @available.parse(lambda *a, **k: ('', empty_table()))
    def execute(self,
                sql: str,
                auto_begin: bool = False,
                fetch: bool = False) -> Tuple[str, agate.Table]:
        """Execute the given SQL. This is a thin wrapper around
        ConnectionManager.execute.

        :param str sql: The sql to execute.
        :param bool auto_begin: If set, and dbt is not currently inside a
            transaction, automatically begin one.
        :param bool fetch: If set, fetch results.
        :return: A tuple of the status and the results (empty if fetch=False).
        :rtype: Tuple[str, agate.Table]
        """
        return self.connections.execute(sql=sql,
                                        auto_begin=auto_begin,
                                        fetch=fetch)

    ###
    # Methods that should never be overridden
    ###
    @classmethod
    def type(cls) -> str:
        """Get the type of this adapter. Types must be class-unique and
        consistent.

        :return: The type name
        :rtype: str
        """
        return cls.ConnectionManager.TYPE

    @property
    def _internal_manifest(self) -> Manifest:
        if self._internal_manifest_lazy is None:
            return self.load_internal_manifest()
        return self._internal_manifest_lazy

    def check_internal_manifest(self) -> Optional[Manifest]:
        """Return the internal manifest (used for executing macros) if it's
        been initialized, otherwise return None.
        """
        return self._internal_manifest_lazy

    def load_internal_manifest(self) -> Manifest:
        if self._internal_manifest_lazy is None:
            # avoid a circular import
            from dbt.parser.manifest import load_internal_manifest
            manifest = load_internal_manifest(self.config)
            self._internal_manifest_lazy = manifest
        return self._internal_manifest_lazy

    ###
    # Caching methods
    ###
    def _schema_is_cached(self, database: Optional[str], schema: str) -> bool:
        """Check if the schema is cached, and by default logs if it is not."""

        if dbt.flags.USE_CACHE is False:
            return False
        elif (database, schema) not in self.cache:
            logger.debug(
                'On "{}": cache miss for schema "{}.{}", this is inefficient'.
                format(self.nice_connection_name(), database, schema))
            return False
        else:
            return True

    def _get_cache_schemas(self, manifest: Manifest) -> Set[BaseRelation]:
        """Get the set of schema relations that the cache logic needs to
        populate. This means only executable nodes are included.
        """
        # the cache only cares about executable nodes
        return {
            self.Relation.create_from(self.config, node).without_identifier()
            for node in manifest.nodes.values()
            if node.resource_type in NodeType.executable()
        }

    def _get_catalog_schemas(self, manifest: Manifest) -> SchemaSearchMap:
        """Get a mapping of each node's "information_schema" relations to a
        set of all schemas expected in that information_schema.

        There may be keys that are technically duplicates on the database side,
        for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as
        databases, and values could overlap as appropriate. All values are
        lowercase strings.
        """
        info_schema_name_map = SchemaSearchMap()
        nodes: Iterator[CompileResultNode] = chain(
            manifest.nodes.values(),
            manifest.sources.values(),
        )
        for node in nodes:
            relation = self.Relation.create_from(self.config, node)
            info_schema_name_map.add(relation)
        # result is a map whose keys are information_schema Relations without
        # identifiers that have appropriate database prefixes, and whose values
        # are sets of lowercase schema names that are valid members of those
        # databases
        return info_schema_name_map

    def _list_relations_get_connection(
            self, schema_relation: BaseRelation) -> List[BaseRelation]:
        name = f'list_{schema_relation.database}_{schema_relation.schema}'
        with self.connection_named(name):
            return self.list_relations_without_caching(schema_relation)

    def _relations_cache_for_schemas(self, manifest: Manifest) -> None:
        """Populate the relations cache for the given schemas. Returns an
        iterable of the schemas populated, as strings.
        """
        if not dbt.flags.USE_CACHE:
            return

        cache_schemas = self._get_cache_schemas(manifest)
        with executor(self.config) as tpe:
            futures: List[Future[List[BaseRelation]]] = [
                tpe.submit(self._list_relations_get_connection, cache_schema)
                for cache_schema in cache_schemas
            ]
            for future in as_completed(futures):
                # if we can't read the relations we need to just raise anyway,
                # so just call future.result() and let that raise on failure
                for relation in future.result():
                    self.cache.add(relation)

        # it's possible that there were no relations in some schemas. We want
        # to insert the schemas we query into the cache's `.schemas` attribute
        # so we can check it later
        cache_update: Set[Tuple[Optional[str], Optional[str]]] = set()
        for relation in cache_schemas:
            cache_update.add((relation.database, relation.schema))
        self.cache.update_schemas(cache_update)

    def set_relations_cache(self,
                            manifest: Manifest,
                            clear: bool = False) -> None:
        """Run a query that gets a populated cache of the relations in the
        database and set the cache on this adapter.
        """
        if not dbt.flags.USE_CACHE:
            return

        with self.cache.lock:
            if clear:
                self.cache.clear()
            self._relations_cache_for_schemas(manifest)

    @available
    def cache_added(self, relation: Optional[BaseRelation]) -> str:
        """Cache a new relation in dbt. It will show up in `list relations`."""
        if relation is None:
            name = self.nice_connection_name()
            raise_compiler_error(
                'Attempted to cache a null relation for {}'.format(name))
        if dbt.flags.USE_CACHE:
            self.cache.add(relation)
        # so jinja doesn't render things
        return ''

    @available
    def cache_dropped(self, relation: Optional[BaseRelation]) -> str:
        """Drop a relation in dbt. It will no longer show up in
        `list relations`, and any bound views will be dropped from the cache
        """
        if relation is None:
            name = self.nice_connection_name()
            raise_compiler_error(
                'Attempted to drop a null relation for {}'.format(name))
        if dbt.flags.USE_CACHE:
            self.cache.drop(relation)
        return ''

    @available
    def cache_renamed(
        self,
        from_relation: Optional[BaseRelation],
        to_relation: Optional[BaseRelation],
    ) -> str:
        """Rename a relation in dbt. It will show up with a new name in
        `list_relations`, but bound views will remain bound.
        """
        if from_relation is None or to_relation is None:
            name = self.nice_connection_name()
            src_name = _relation_name(from_relation)
            dst_name = _relation_name(to_relation)
            raise_compiler_error('Attempted to rename {} to {} for {}'.format(
                src_name, dst_name, name))

        if dbt.flags.USE_CACHE:
            self.cache.rename(from_relation, to_relation)
        return ''

    ###
    # Abstract methods for database-specific values, attributes, and types
    ###
    @abc.abstractclassmethod
    def date_function(cls) -> str:
        """Get the date function used by this adapter's database."""
        raise NotImplementedException(
            '`date_function` is not implemented for this adapter!')

    @abc.abstractclassmethod
    def is_cancelable(cls) -> bool:
        raise NotImplementedException(
            '`is_cancelable` is not implemented for this adapter!')

    ###
    # Abstract methods about schemas
    ###
    @abc.abstractmethod
    def list_schemas(self, database: str) -> List[str]:
        """Get a list of existing schemas in database"""
        raise NotImplementedException(
            '`list_schemas` is not implemented for this adapter!')

    @available.parse(lambda *a, **k: False)
    def check_schema_exists(self, database: str, schema: str) -> bool:
        """Check if a schema exists.

        The default implementation of this is potentially unnecessarily slow,
        and adapters should implement it if there is an optimized path (and
        there probably is)
        """
        search = (s.lower() for s in self.list_schemas(database=database))
        return schema.lower() in search

    ###
    # Abstract methods about relations
    ###
    @abc.abstractmethod
    @available.parse_none
    def drop_relation(self, relation: BaseRelation) -> None:
        """Drop the given relation.

        *Implementors must call self.cache.drop() to preserve cache state!*
        """
        raise NotImplementedException(
            '`drop_relation` is not implemented for this adapter!')

    @abc.abstractmethod
    @available.parse_none
    def truncate_relation(self, relation: BaseRelation) -> None:
        """Truncate the given relation."""
        raise NotImplementedException(
            '`truncate_relation` is not implemented for this adapter!')

    @abc.abstractmethod
    @available.parse_none
    def rename_relation(self, from_relation: BaseRelation,
                        to_relation: BaseRelation) -> None:
        """Rename the relation from from_relation to to_relation.

        Implementors must call self.cache.rename() to preserve cache state.
        """
        raise NotImplementedException(
            '`rename_relation` is not implemented for this adapter!')

    @abc.abstractmethod
    @available.parse_list
    def get_columns_in_relation(self,
                                relation: BaseRelation) -> List[BaseColumn]:
        """Get a list of the columns in the given Relation."""
        raise NotImplementedException(
            '`get_columns_in_relation` is not implemented for this adapter!')

    @available.deprecated('get_columns_in_relation', lambda *a, **k: [])
    def get_columns_in_table(self, schema: str,
                             identifier: str) -> List[BaseColumn]:
        """DEPRECATED: Get a list of the columns in the given table."""
        relation = self.Relation.create(
            database=self.config.credentials.database,
            schema=schema,
            identifier=identifier,
            quote_policy=self.config.quoting)
        return self.get_columns_in_relation(relation)

    @abc.abstractmethod
    def expand_column_types(self, goal: BaseRelation,
                            current: BaseRelation) -> None:
        """Expand the current table's types to match the goal table. (passable)

        :param self.Relation goal: A relation that currently exists in the
            database with columns of the desired types.
        :param self.Relation current: A relation that currently exists in the
            database with columns of unspecified types.
        """
        raise NotImplementedException(
            '`expand_target_column_types` is not implemented for this adapter!'
        )

    @abc.abstractmethod
    def list_relations_without_caching(
            self, schema_relation: BaseRelation) -> List[BaseRelation]:
        """List relations in the given schema, bypassing the cache.

        This is used as the underlying behavior to fill the cache.

        :param schema_relation: A relation containing the database and schema
            as appropraite for the underlying data warehouse
        :return: The relations in schema
        :rtype: List[self.Relation]
        """
        raise NotImplementedException(
            '`list_relations_without_caching` is not implemented for this '
            'adapter!')

    ###
    # Provided methods about relations
    ###
    @available.parse_list
    def get_missing_columns(self, from_relation: BaseRelation,
                            to_relation: BaseRelation) -> List[BaseColumn]:
        """Returns a list of Columns in from_relation that are missing from
        to_relation.
        """
        if not isinstance(from_relation, self.Relation):
            invalid_type_error(method_name='get_missing_columns',
                               arg_name='from_relation',
                               got_value=from_relation,
                               expected_type=self.Relation)

        if not isinstance(to_relation, self.Relation):
            invalid_type_error(method_name='get_missing_columns',
                               arg_name='to_relation',
                               got_value=to_relation,
                               expected_type=self.Relation)

        from_columns = {
            col.name: col
            for col in self.get_columns_in_relation(from_relation)
        }

        to_columns = {
            col.name: col
            for col in self.get_columns_in_relation(to_relation)
        }

        missing_columns = set(from_columns.keys()) - set(to_columns.keys())

        return [
            col for (col_name, col) in from_columns.items()
            if col_name in missing_columns
        ]

    @available.parse_none
    def valid_snapshot_target(self, relation: BaseRelation) -> None:
        """Ensure that the target relation is valid, by making sure it has the
        expected columns.

        :param Relation relation: The relation to check
        :raises CompilationException: If the columns are
            incorrect.
        """
        if not isinstance(relation, self.Relation):
            invalid_type_error(method_name='valid_snapshot_target',
                               arg_name='relation',
                               got_value=relation,
                               expected_type=self.Relation)

        columns = self.get_columns_in_relation(relation)
        names = set(c.name.lower() for c in columns)
        expanded_keys = ('scd_id', 'valid_from', 'valid_to')
        extra = []
        missing = []
        for legacy in expanded_keys:
            desired = 'dbt_' + legacy
            if desired not in names:
                missing.append(desired)
                if legacy in names:
                    extra.append(legacy)

        if missing:
            if extra:
                msg = ('Snapshot target has ("{}") but not ("{}") - is it an '
                       'unmigrated previous version archive?'.format(
                           '", "'.join(extra), '", "'.join(missing)))
            else:
                msg = ('Snapshot target is not a snapshot table (missing "{}")'
                       .format('", "'.join(missing)))
            raise_compiler_error(msg)

    @available.parse_none
    def expand_target_column_types(self, from_relation: BaseRelation,
                                   to_relation: BaseRelation) -> None:
        if not isinstance(from_relation, self.Relation):
            invalid_type_error(method_name='expand_target_column_types',
                               arg_name='from_relation',
                               got_value=from_relation,
                               expected_type=self.Relation)

        if not isinstance(to_relation, self.Relation):
            invalid_type_error(method_name='expand_target_column_types',
                               arg_name='to_relation',
                               got_value=to_relation,
                               expected_type=self.Relation)

        self.expand_column_types(from_relation, to_relation)

    def list_relations(self, database: Optional[str],
                       schema: str) -> List[BaseRelation]:
        if self._schema_is_cached(database, schema):
            return self.cache.get_relations(database, schema)

        schema_relation = self.Relation.create(
            database=database,
            schema=schema,
            identifier='',
            quote_policy=self.config.quoting).without_identifier()

        # we can't build the relations cache because we don't have a
        # manifest so we can't run any operations.
        relations = self.list_relations_without_caching(schema_relation)

        logger.debug('with database={}, schema={}, relations={}'.format(
            database, schema, relations))
        return relations

    def _make_match_kwargs(self, database: str, schema: str,
                           identifier: str) -> Dict[str, str]:
        quoting = self.config.quoting
        if identifier is not None and quoting['identifier'] is False:
            identifier = identifier.lower()

        if schema is not None and quoting['schema'] is False:
            schema = schema.lower()

        if database is not None and quoting['database'] is False:
            database = database.lower()

        return filter_null_values({
            'database': database,
            'identifier': identifier,
            'schema': schema,
        })

    def _make_match(
        self,
        relations_list: List[BaseRelation],
        database: str,
        schema: str,
        identifier: str,
    ) -> List[BaseRelation]:

        matches = []

        search = self._make_match_kwargs(database, schema, identifier)

        for relation in relations_list:
            if relation.matches(**search):
                matches.append(relation)

        return matches

    @available.parse_none
    def get_relation(self, database: str, schema: str,
                     identifier: str) -> Optional[BaseRelation]:
        relations_list = self.list_relations(database, schema)

        matches = self._make_match(relations_list, database, schema,
                                   identifier)

        if len(matches) > 1:
            kwargs = {
                'identifier': identifier,
                'schema': schema,
                'database': database,
            }
            get_relation_returned_multiple_results(kwargs, matches)

        elif matches:
            return matches[0]

        return None

    @available.deprecated('get_relation', lambda *a, **k: False)
    def already_exists(self, schema: str, name: str) -> bool:
        """DEPRECATED: Return if a model already exists in the database"""
        database = self.config.credentials.database
        relation = self.get_relation(database, schema, name)
        return relation is not None

    ###
    # ODBC FUNCTIONS -- these should not need to change for every adapter,
    #                   although some adapters may override them
    ###
    @abc.abstractmethod
    @available.parse_none
    def create_schema(self, relation: BaseRelation):
        """Create the given schema if it does not exist."""
        raise NotImplementedException(
            '`create_schema` is not implemented for this adapter!')

    @abc.abstractmethod
    @available.parse_none
    def drop_schema(self, relation: BaseRelation):
        """Drop the given schema (and everything in it) if it exists."""
        raise NotImplementedException(
            '`drop_schema` is not implemented for this adapter!')

    @available
    @abc.abstractclassmethod
    def quote(cls, identifier: str) -> str:
        """Quote the given identifier, as appropriate for the database."""
        raise NotImplementedException(
            '`quote` is not implemented for this adapter!')

    @available
    def quote_as_configured(self, identifier: str, quote_key: str) -> str:
        """Quote or do not quote the given identifer as configured in the
        project config for the quote key.

        The quote key should be one of 'database' (on bigquery, 'profile'),
        'identifier', or 'schema', or it will be treated as if you set `True`.
        """
        try:
            key = ComponentName(quote_key)
        except ValueError:
            return identifier

        default = self.Relation.get_default_quote_policy().get_part(key)
        if self.config.quoting.get(key, default):
            return self.quote(identifier)
        else:
            return identifier

    @available
    def quote_seed_column(self, column: str,
                          quote_config: Optional[bool]) -> str:
        # this is the default for now
        quote_columns: bool = False
        if isinstance(quote_config, bool):
            quote_columns = quote_config
        elif quote_config is None:
            deprecations.warn('column-quoting-unset')
        else:
            raise_compiler_error(
                f'The seed configuration value of "quote_columns" has an '
                f'invalid type {type(quote_config)}')

        if quote_columns:
            return self.quote(column)
        else:
            return column

    ###
    # Conversions: These must be implemented by concrete implementations, for
    # converting agate types into their sql equivalents.
    ###
    @abc.abstractclassmethod
    def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str:
        """Return the type in the database that best maps to the agate.Text
        type for the given agate table and column index.

        :param agate_table: The table
        :param col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        """
        raise NotImplementedException(
            '`convert_text_type` is not implemented for this adapter!')

    @abc.abstractclassmethod
    def convert_number_type(cls, agate_table: agate.Table,
                            col_idx: int) -> str:
        """Return the type in the database that best maps to the agate.Number
        type for the given agate table and column index.

        :param agate_table: The table
        :param col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        """
        raise NotImplementedException(
            '`convert_number_type` is not implemented for this adapter!')

    @abc.abstractclassmethod
    def convert_boolean_type(cls, agate_table: agate.Table,
                             col_idx: int) -> str:
        """Return the type in the database that best maps to the agate.Boolean
        type for the given agate table and column index.

        :param agate_table: The table
        :param col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        """
        raise NotImplementedException(
            '`convert_boolean_type` is not implemented for this adapter!')

    @abc.abstractclassmethod
    def convert_datetime_type(cls, agate_table: agate.Table,
                              col_idx: int) -> str:
        """Return the type in the database that best maps to the agate.DateTime
        type for the given agate table and column index.

        :param agate_table: The table
        :param col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        """
        raise NotImplementedException(
            '`convert_datetime_type` is not implemented for this adapter!')

    @abc.abstractclassmethod
    def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str:
        """Return the type in the database that best maps to the agate.Date
        type for the given agate table and column index.

        :param agate_table: The table
        :param col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        """
        raise NotImplementedException(
            '`convert_date_type` is not implemented for this adapter!')

    @abc.abstractclassmethod
    def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str:
        """Return the type in the database that best maps to the
        agate.TimeDelta type for the given agate table and column index.

        :param agate_table: The table
        :param col_idx: The index into the agate table for the column.
        :return: The name of the type in the database
        """
        raise NotImplementedException(
            '`convert_time_type` is not implemented for this adapter!')

    @available
    @classmethod
    def convert_type(cls, agate_table: agate.Table,
                     col_idx: int) -> Optional[str]:
        return cls.convert_agate_type(agate_table, col_idx)

    @classmethod
    def convert_agate_type(cls, agate_table: agate.Table,
                           col_idx: int) -> Optional[str]:
        agate_type: Type = agate_table.column_types[col_idx]
        conversions: List[Tuple[Type, Callable[..., str]]] = [
            (agate.Text, cls.convert_text_type),
            (agate.Number, cls.convert_number_type),
            (agate.Boolean, cls.convert_boolean_type),
            (agate.DateTime, cls.convert_datetime_type),
            (agate.Date, cls.convert_date_type),
            (agate.TimeDelta, cls.convert_time_type),
        ]
        for agate_cls, func in conversions:
            if isinstance(agate_type, agate_cls):
                return func(agate_table, col_idx)

        return None

    ###
    # Operations involving the manifest
    ###
    def execute_macro(
        self,
        macro_name: str,
        manifest: Optional[Manifest] = None,
        project: Optional[str] = None,
        context_override: Optional[Dict[str, Any]] = None,
        kwargs: Dict[str, Any] = None,
        release: bool = False,
        text_only_columns: Optional[Iterable[str]] = None,
    ) -> agate.Table:
        """Look macro_name up in the manifest and execute its results.

        :param macro_name: The name of the macro to execute.
        :param manifest: The manifest to use for generating the base macro
            execution context. If none is provided, use the internal manifest.
        :param project: The name of the project to search in, or None for the
            first match.
        :param context_override: An optional dict to update() the macro
            execution context.
        :param kwargs: An optional dict of keyword args used to pass to the
            macro.
        :param release: If True, release the connection after executing.
        """
        if kwargs is None:
            kwargs = {}
        if context_override is None:
            context_override = {}

        if manifest is None:
            manifest = self._internal_manifest

        macro = manifest.find_macro_by_name(macro_name,
                                            self.config.project_name, project)
        if macro is None:
            if project is None:
                package_name = 'any package'
            else:
                package_name = 'the "{}" package'.format(project)

            raise RuntimeException(
                'dbt could not find a macro with the name "{}" in {}'.format(
                    macro_name, package_name))
        # This causes a reference cycle, as generate_runtime_macro()
        # ends up calling get_adapter, so the import has to be here.
        from dbt.context.providers import generate_runtime_macro
        macro_context = generate_runtime_macro(macro=macro,
                                               config=self.config,
                                               manifest=manifest,
                                               package_name=project)
        macro_context.update(context_override)

        macro_function = MacroGenerator(macro, macro_context)

        with self.connections.exception_handler(f'macro {macro_name}'):
            try:
                result = macro_function(**kwargs)
            finally:
                if release:
                    self.release_connection()
        return result

    @classmethod
    def _catalog_filter_table(cls, table: agate.Table,
                              manifest: Manifest) -> agate.Table:
        """Filter the table as appropriate for catalog entries. Subclasses can
        override this to change filtering rules on a per-adapter basis.
        """
        # force database + schema to be strings
        table = table_from_rows(
            table.rows,
            table.column_names,
            text_only_columns=['table_database', 'table_schema', 'table_name'])
        return table.where(_catalog_filter_schemas(manifest))

    def _get_one_catalog(
        self,
        information_schema: InformationSchema,
        schemas: Set[str],
        manifest: Manifest,
    ) -> agate.Table:

        name = '.'.join(
            [str(information_schema.database), 'information_schema'])

        with self.connection_named(name):
            kwargs = {
                'information_schema': information_schema,
                'schemas': schemas
            }
            table = self.execute_macro(
                GET_CATALOG_MACRO_NAME,
                kwargs=kwargs,
                release=True,
                # pass in the full manifest so we get any local project
                # overrides
                manifest=manifest,
            )

        results = self._catalog_filter_table(table, manifest)
        return results

    def get_catalog(self,
                    manifest: Manifest) -> Tuple[agate.Table, List[Exception]]:
        schema_map = self._get_catalog_schemas(manifest)

        with executor(self.config) as tpe:
            futures: List[Future[agate.Table]] = [
                tpe.submit(self._get_one_catalog, info, schemas, manifest)
                for info, schemas in schema_map.items() if len(schemas) > 0
            ]
            catalogs, exceptions = catch_as_completed(futures)

        return catalogs, exceptions

    def cancel_open_connections(self):
        """Cancel all open connections."""
        return self.connections.cancel_open()

    def calculate_freshness(
            self,
            source: BaseRelation,
            loaded_at_field: str,
            filter: Optional[str],
            manifest: Optional[Manifest] = None) -> Dict[str, Any]:
        """Calculate the freshness of sources in dbt, and return it"""
        kwargs: Dict[str, Any] = {
            'source': source,
            'loaded_at_field': loaded_at_field,
            'filter': filter,
        }

        # run the macro
        table = self.execute_macro(FRESHNESS_MACRO_NAME,
                                   kwargs=kwargs,
                                   release=True,
                                   manifest=manifest)
        # now we have a 1-row table of the maximum `loaded_at_field` value and
        # the current time according to the db.
        if len(table) != 1 or len(table[0]) != 2:
            raise_compiler_error(
                'Got an invalid result from "{}" macro: {}'.format(
                    FRESHNESS_MACRO_NAME, [tuple(r) for r in table]))
        if table[0][0] is None:
            # no records in the table, so really the max_loaded_at was
            # infinitely long ago. Just call it 0:00 January 1 year UTC
            max_loaded_at = datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.UTC)
        else:
            max_loaded_at = _utc(table[0][0], source, loaded_at_field)

        snapshotted_at = _utc(table[0][1], source, loaded_at_field)
        age = (snapshotted_at - max_loaded_at).total_seconds()
        return {
            'max_loaded_at': max_loaded_at,
            'snapshotted_at': snapshotted_at,
            'age': age,
        }

    def pre_model_hook(self, config: Mapping[str, Any]) -> Any:
        """A hook for running some operation before the model materialization
        runs. The hook can assume it has a connection available.

        The only parameter is a configuration dictionary (the same one
        available in the materialization context). It should be considered
        read-only.

        The pre-model hook may return anything as a context, which will be
        passed to the post-model hook.
        """
        pass

    def post_model_hook(self, config: Mapping[str, Any], context: Any) -> None:
        """A hook for running some operation after the model materialization
        runs. The hook can assume it has a connection available.

        The first parameter is a configuration dictionary (the same one
        available in the materialization context). It should be considered
        read-only.

        The second parameter is the value returned by pre_mdoel_hook.
        """
        pass
Ejemplo n.º 19
0
 def __init__(self, config):
     self.config = config
     self.cache = RelationsCache()
     self.connections = self.ConnectionManager(config)
     self._macro_manifest_lazy: Optional[MacroManifest] = None
Ejemplo n.º 20
0
class DefaultAdapter(object):
    requires = {}

    config_functions = [
        "get_columns_in_table",
        "get_missing_columns",
        "expand_target_column_types",
        "create_schema",
        "quote_as_configured",
        "cache_new_relation",

        # deprecated -- use versions that take relations instead
        "already_exists",
        "query_for_existing",
        "rename",
        "drop",
        "truncate",

        # just deprecated. going away in a future release
        "quote_schema_and_table",

        # versions of adapter functions that take / return Relations
        "get_relation",
        "drop_relation",
        "rename_relation",
        "truncate_relation",

        # formerly profile functions
        "execute",
        "add_query",
    ]

    raw_functions = [
        "get_status",
        "get_result_from_cursor",
        "quote",
        "convert_type",
    ]
    Relation = DefaultRelation
    Column = Column

    def __init__(self, config):
        self.config = config
        self.cache = RelationsCache()

    ###
    # ADAPTER-SPECIFIC FUNCTIONS -- each of these must be overridden in
    #                               every adapter
    ###
    @contextmanager
    def exception_handler(self, sql, model_name=None, connection_name=None):
        raise dbt.exceptions.NotImplementedException(
            '`exception_handler` is not implemented for this adapter!')

    @classmethod
    def type(cls):
        raise dbt.exceptions.NotImplementedException(
            '`type` is not implemented for this adapter!')

    @classmethod
    def date_function(cls):
        raise dbt.exceptions.NotImplementedException(
            '`date_function` is not implemented for this adapter!')

    @classmethod
    def get_status(cls, cursor):
        raise dbt.exceptions.NotImplementedException(
            '`get_status` is not implemented for this adapter!')

    def alter_column_type(self,
                          schema,
                          table,
                          column_name,
                          new_column_type,
                          model_name=None):
        raise dbt.exceptions.NotImplementedException(
            '`alter_column_type` is not implemented for this adapter!')

    def query_for_existing(self, schemas, model_name=None):
        if not isinstance(schemas, (list, tuple)):
            schemas = [schemas]

        all_relations = []

        for schema in schemas:
            all_relations.extend(self.list_relations(schema, model_name))

        return {
            relation.identifier: relation.type
            for relation in all_relations
        }

    def get_existing_schemas(self, model_name=None):
        raise dbt.exceptions.NotImplementedException(
            '`get_existing_schemas` is not implemented for this adapter!')

    def check_schema_exists(self, schema):
        raise dbt.exceptions.NotImplementedException(
            '`check_schema_exists` is not implemented for this adapter!')

    def cancel_connection(self, connection):
        raise dbt.exceptions.NotImplementedException(
            '`cancel_connection` is not implemented for this adapter!')

    ###
    # FUNCTIONS THAT SHOULD BE ABSTRACT
    ###
    def cache_new_relation(self, relation, model_name=None):
        """Cache a new relation in dbt. It will show up in `list relations`."""
        if relation is None:
            dbt.exceptions.raise_compiler_error(
                'Attempted to cache a null relation for {}'.format(model_name))
        if dbt.flags.USE_CACHE:
            self.cache.add(relation)
        # so jinja doesn't render things
        return ''

    @classmethod
    def get_result_from_cursor(cls, cursor):
        data = []
        column_names = []

        if cursor.description is not None:
            column_names = [col[0] for col in cursor.description]
            raw_results = cursor.fetchall()
            data = [dict(zip(column_names, row)) for row in raw_results]

        return dbt.clients.agate_helper.table_from_data(data, column_names)

    def drop(self, schema, relation, relation_type, model_name=None):
        identifier = relation
        relation = self.Relation.create(schema=schema,
                                        identifier=identifier,
                                        type=relation_type,
                                        quote_policy=self.config.quoting)

        return self.drop_relation(relation, model_name)

    def drop_relation(self, relation, model_name=None):
        if dbt.flags.USE_CACHE:
            self.cache.drop(relation)
        if relation.type is None:
            dbt.exceptions.raise_compiler_error(
                'Tried to drop relation {}, but its type is null.'.format(
                    relation))

        sql = 'drop {} if exists {} cascade'.format(relation.type, relation)

        connection, cursor = self.add_query(sql, model_name, auto_begin=False)

    def truncate(self, schema, table, model_name=None):
        relation = self.Relation.create(schema=schema,
                                        identifier=table,
                                        type='table',
                                        quote_policy=self.config.quoting)

        return self.truncate_relation(relation, model_name)

    def truncate_relation(self, relation, model_name=None):
        sql = 'truncate table {}'.format(relation)

        connection, cursor = self.add_query(sql, model_name)

    def rename(self, schema, from_name, to_name, model_name=None):
        quote_policy = self.config.quoting
        from_relation = self.Relation.create(schema=schema,
                                             identifier=from_name,
                                             quote_policy=quote_policy)
        to_relation = self.Relation.create(identifier=to_name,
                                           quote_policy=quote_policy)
        return self.rename_relation(from_relation=from_relation,
                                    to_relation=to_relation,
                                    model_name=model_name)

    def rename_relation(self, from_relation, to_relation, model_name=None):
        if dbt.flags.USE_CACHE:
            self.cache.rename(from_relation, to_relation)
        sql = 'alter {} {} rename to {}'.format(
            to_relation.type, from_relation, to_relation.include(schema=False))
        connection, cursor = self.add_query(sql, model_name)

    @classmethod
    def is_cancelable(cls):
        return True

    def get_missing_columns(self,
                            from_schema,
                            from_table,
                            to_schema,
                            to_table,
                            model_name=None):
        """Returns dict of {column:type} for columns in from_table that are
        missing from to_table"""
        from_columns = {
            col.name: col
            for col in self.get_columns_in_table(
                from_schema, from_table, model_name=model_name)
        }
        to_columns = {
            col.name: col
            for col in self.get_columns_in_table(
                to_schema, to_table, model_name=model_name)
        }

        missing_columns = set(from_columns.keys()) - set(to_columns.keys())

        return [
            col for (col_name, col) in from_columns.items()
            if col_name in missing_columns
        ]

    @classmethod
    def _get_columns_in_table_sql(cls, schema_name, table_name, database):
        schema_filter = '1=1'
        if schema_name is not None:
            schema_filter = "table_schema = '{}'".format(schema_name)

        db_prefix = '' if database is None else '{}.'.format(database)

        sql = """
        select
            column_name,
            data_type,
            character_maximum_length,
            numeric_precision || ',' || numeric_scale as numeric_size

        from {db_prefix}information_schema.columns
        where table_name = '{table_name}'
          and {schema_filter}
        order by ordinal_position
        """.format(db_prefix=db_prefix,
                   table_name=table_name,
                   schema_filter=schema_filter).strip()

        return sql

    def get_columns_in_table(self,
                             schema_name,
                             table_name,
                             database=None,
                             model_name=None):
        sql = self._get_columns_in_table_sql(schema_name, table_name, database)
        connection, cursor = self.add_query(sql, model_name)

        data = cursor.fetchall()
        columns = []

        for row in data:
            name, data_type, char_size, numeric_size = row
            column = self.Column(name, data_type, char_size, numeric_size)
            columns.append(column)

        return columns

    @classmethod
    def _table_columns_to_dict(cls, columns):
        return {col.name: col for col in columns}

    def expand_target_column_types(self,
                                   temp_table,
                                   to_schema,
                                   to_table,
                                   model_name=None):

        reference_columns = self._table_columns_to_dict(
            self.get_columns_in_table(None, temp_table, model_name=model_name))

        target_columns = self._table_columns_to_dict(
            self.get_columns_in_table(to_schema,
                                      to_table,
                                      model_name=model_name))

        for column_name, reference_column in reference_columns.items():
            target_column = target_columns.get(column_name)

            if target_column is not None and \
               target_column.can_expand_to(reference_column):
                col_string_size = reference_column.string_size()
                new_type = self.Column.string_type(col_string_size)
                logger.debug("Changing col type from %s to %s in table %s.%s",
                             target_column.data_type, new_type, to_schema,
                             to_table)

                self.alter_column_type(to_schema, to_table, column_name,
                                       new_type, model_name)

    ###
    # RELATIONS
    ###
    def _schema_is_cached(self,
                          schema,
                          model_name=None,
                          debug_on_missing=True):
        """Check if the schema is cached, and by default logs if it is not."""
        if dbt.flags.USE_CACHE is False:
            return False
        elif schema not in self.cache:
            if debug_on_missing:
                logger.debug(
                    'On "{}": cache miss for schema "{}", this is inefficient'.
                    format(model_name or '<None>', schema))
            return False
        else:
            return True

    def _list_relations(self, schema, model_name=None):
        raise dbt.exceptions.NotImplementedException(
            '`list_relations` is not implemented for this adapter!')

    def list_relations(self, schema, model_name=None):
        if self._schema_is_cached(schema, model_name):
            return self.cache.get_relations(schema)

        # we can't build the relations cache because we don't have a
        # manifest so we can't run any operations.
        relations = self._list_relations(schema, model_name=model_name)

        logger.debug('with schema={}, model_name={}, relations={}'.format(
            schema, model_name, relations))
        return relations

    def _make_match_kwargs(self, schema, identifier):
        quoting = self.config.quoting
        if identifier is not None and quoting['identifier'] is False:
            identifier = identifier.lower()

        if schema is not None and quoting['schema'] is False:
            schema = schema.lower()

        return filter_null_values({'identifier': identifier, 'schema': schema})

    def _make_match(self, relations_list, schema, identifier):

        matches = []

        search = self._make_match_kwargs(schema, identifier)

        for relation in relations_list:
            if relation.matches(**search):
                matches.append(relation)

        return matches

    def get_relation(self, schema, identifier, model_name=None):
        relations_list = self.list_relations(schema, model_name)

        matches = self._make_match(relations_list, schema, identifier)

        if len(matches) > 1:
            dbt.exceptions.get_relation_returned_multiple_results(
                {
                    'identifier': identifier,
                    'schema': schema
                }, matches)

        elif matches:
            return matches[0]

        return None

    ###
    # SANE ANSI SQL DEFAULTS
    ###
    def get_create_schema_sql(self, schema):
        schema = self.quote_as_configured(schema, 'schema')

        return ('create schema if not exists {schema}'.format(schema=schema))

    def get_drop_schema_sql(self, schema):
        schema = self.quote_as_configured(schema, 'schema')

        return ('drop schema if exists {schema} cascade'.format(schema=schema))

    ###
    # ODBC FUNCTIONS -- these should not need to change for every adapter,
    #                   although some adapters may override them
    ###
    def get_default_schema(self):
        return self.config.credentials.schema

    def get_connection(self, name=None, recache_if_missing=True):
        global connections_in_use

        if name is None:
            # if a name isn't specified, we'll re-use a single handle
            # named 'master'
            name = 'master'

        if connections_in_use.get(name):
            return connections_in_use.get(name)

        if not recache_if_missing:
            raise dbt.exceptions.InternalException(
                'Tried to get a connection "{}" which does not exist '
                '(recache_if_missing is off).'.format(name))

        logger.debug('Acquiring new {} connection "{}".'.format(
            self.type(), name))

        connection = self.acquire_connection(name)
        connections_in_use[name] = connection

        return self.get_connection(name)

    def cancel_open_connections(self):
        global connections_in_use

        for name, connection in connections_in_use.items():
            if name == 'master':
                continue

            self.cancel_connection(connection)
            yield name

    @classmethod
    def total_connections_allocated(cls):
        global connections_in_use, connections_available

        return len(connections_in_use) + len(connections_available)

    def acquire_connection(self, name):
        global connections_available, lock

        # we add a magic number, 2 because there are overhead connections,
        # one for pre- and post-run hooks and other misc operations that occur
        # before the run starts, and one for integration tests.
        max_connections = self.config.threads + 2

        with lock:
            num_allocated = self.total_connections_allocated()

            if len(connections_available) > 0:
                logger.debug('Re-using an available connection from the pool.')
                to_return = connections_available.pop()
                to_return.name = name
                return to_return

            elif num_allocated >= max_connections:
                raise dbt.exceptions.InternalException(
                    'Tried to request a new connection "{}" but '
                    'the maximum number of connections are already '
                    'allocated!'.format(name))

            logger.debug(
                'Opening a new connection ({} currently allocated)'.format(
                    num_allocated))

            result = Connection(type=self.type(),
                                name=name,
                                state='init',
                                transaction_open=False,
                                handle=None,
                                credentials=self.config.credentials)

            return self.open_connection(result)

    def release_connection(self, name):
        global connections_in_use, connections_available, lock

        with lock:

            if name not in connections_in_use:
                return

            to_release = self.get_connection(name, recache_if_missing=False)

            if to_release.state == 'open':

                if to_release.transaction_open is True:
                    self.rollback(to_release)

                to_release.name = None
                connections_available.append(to_release)
            else:
                self.close(to_release)

            del connections_in_use[name]

    @classmethod
    def cleanup_connections(cls):
        global connections_in_use, connections_available, lock

        with lock:
            for name, connection in connections_in_use.items():
                if connection.get('state') != 'closed':
                    logger.debug("Connection '{}' was left open.".format(name))
                else:
                    logger.debug(
                        "Connection '{}' was properly closed.".format(name))

            conns_in_use = list(connections_in_use.values())
            for conn in conns_in_use + connections_available:
                cls.close(conn)

            # garbage collect these connections
            connections_in_use = {}
            connections_available = []

    def reload(self, connection):
        return self.get_connection(connection.name)

    def add_begin_query(self, name):
        return self.add_query('BEGIN', name, auto_begin=False)

    def add_commit_query(self, name):
        return self.add_query('COMMIT', name, auto_begin=False)

    def begin(self, name):
        global connections_in_use
        connection = self.get_connection(name)

        if dbt.flags.STRICT_MODE:
            assert isinstance(connection, Connection)

        if connection.transaction_open is True:
            raise dbt.exceptions.InternalException(
                'Tried to begin a new transaction on connection "{}", but '
                'it already had one open!'.format(connection.get('name')))

        self.add_begin_query(name)

        connection.transaction_open = True
        connections_in_use[name] = connection

        return connection

    def commit_if_has_connection(self, name):
        global connections_in_use

        if name is None:
            name = 'master'

        if connections_in_use.get(name) is None:
            return

        connection = self.get_connection(name, False)

        return self.commit(connection)

    def commit(self, connection):
        global connections_in_use

        if dbt.flags.STRICT_MODE:
            assert isinstance(connection, Connection)

        connection = self.reload(connection)

        if connection.transaction_open is False:
            raise dbt.exceptions.InternalException(
                'Tried to commit transaction on connection "{}", but '
                'it does not have one open!'.format(connection.name))

        logger.debug('On {}: COMMIT'.format(connection.name))
        self.add_commit_query(connection.name)

        connection.transaction_open = False
        connections_in_use[connection.name] = connection

        return connection

    def rollback(self, connection):
        if dbt.flags.STRICT_MODE:
            Connection(**connection)

        connection = self.reload(connection)

        if connection.transaction_open is False:
            raise dbt.exceptions.InternalException(
                'Tried to rollback transaction on connection "{}", but '
                'it does not have one open!'.format(connection.name))

        logger.debug('On {}: ROLLBACK'.format(connection.name))
        connection.handle.rollback()

        connection.transaction_open = False
        connections_in_use[connection.name] = connection

        return connection

    @classmethod
    def close(cls, connection):
        if dbt.flags.STRICT_MODE:
            assert isinstance(connection, Connection)

        # On windows, sometimes connection handles don't have a close() attr.
        if hasattr(connection.handle, 'close'):
            connection.handle.close()

        connection.state = 'closed'

        return connection

    def add_query(self,
                  sql,
                  model_name=None,
                  auto_begin=True,
                  bindings=None,
                  abridge_sql_log=False):
        connection = self.get_connection(model_name)
        connection_name = connection.name

        if auto_begin and connection.transaction_open is False:
            self.begin(connection_name)

        logger.debug('Using {} connection "{}".'.format(
            self.type(), connection_name))

        with self.exception_handler(sql, model_name, connection_name):
            if abridge_sql_log:
                logger.debug('On %s: %s....', connection_name, sql[0:512])
            else:
                logger.debug('On %s: %s', connection_name, sql)
            pre = time.time()

            cursor = connection.handle.cursor()
            cursor.execute(sql, bindings)

            logger.debug("SQL status: %s in %0.2f seconds",
                         self.get_status(cursor), (time.time() - pre))

            return connection, cursor

    def clear_transaction(self, conn_name='master'):
        conn = self.begin(conn_name)
        self.commit(conn)
        return conn_name

    def execute_one(self, sql, model_name=None, auto_begin=False):
        self.get_connection(model_name)

        return self.add_query(sql, model_name, auto_begin)

    def execute_and_fetch(self, sql, model_name=None, auto_begin=False):
        _, cursor = self.execute_one(sql, model_name, auto_begin)

        status = self.get_status(cursor)
        table = self.get_result_from_cursor(cursor)
        return status, table

    def execute(self, sql, model_name=None, auto_begin=False, fetch=False):
        if fetch:
            return self.execute_and_fetch(sql, model_name, auto_begin)
        else:
            _, cursor = self.execute_one(sql, model_name, auto_begin)
            status = self.get_status(cursor)
            return status, dbt.clients.agate_helper.empty_table()

    def execute_all(self, sqls, model_name=None):
        connection = self.get_connection(model_name)

        if len(sqls) == 0:
            return connection

        for i, sql in enumerate(sqls):
            connection, _ = self.add_query(sql, model_name)

        return connection

    def create_schema(self, schema, model_name=None):
        logger.debug('Creating schema "%s".', schema)
        sql = self.get_create_schema_sql(schema)
        res = self.add_query(sql, model_name)

        self.commit_if_has_connection(model_name)

        return res

    def drop_schema(self, schema, model_name=None):
        logger.debug('Dropping schema "%s".', schema)
        sql = self.get_drop_schema_sql(schema)
        return self.add_query(sql, model_name)

    def already_exists(self, schema, table, model_name=None):
        relation = self.get_relation(schema=schema, identifier=table)
        return relation is not None

    @classmethod
    def quote(cls, identifier):
        return '"{}"'.format(identifier)

    def quote_as_configured(self, identifier, quote_key, model_name=None):
        """Quote or do not quote the given identifer as configured in the
        project config for the quote key.

        The quote key should be one of 'database' (on bigquery, 'profile'),
        'identifier', or 'schema', or it will be treated as if you set `True`.
        """
        default = self.Relation.DEFAULTS['quote_policy'].get(quote_key)
        if self.config.quoting.get(quote_key, default):
            return self.quote(identifier)
        else:
            return identifier

    @classmethod
    def convert_text_type(cls, agate_table, col_idx):
        raise dbt.exceptions.NotImplementedException(
            '`convert_text_type` is not implemented for this adapter!')

    @classmethod
    def convert_number_type(cls, agate_table, col_idx):
        raise dbt.exceptions.NotImplementedException(
            '`convert_number_type` is not implemented for this adapter!')

    @classmethod
    def convert_boolean_type(cls, agate_table, col_idx):
        raise dbt.exceptions.NotImplementedException(
            '`convert_boolean_type` is not implemented for this adapter!')

    @classmethod
    def convert_datetime_type(cls, agate_table, col_idx):
        raise dbt.exceptions.NotImplementedException(
            '`convert_datetime_type` is not implemented for this adapter!')

    @classmethod
    def convert_date_type(cls, agate_table, col_idx):
        raise dbt.exceptions.NotImplementedException(
            '`convert_date_type` is not implemented for this adapter!')

    @classmethod
    def convert_time_type(cls, agate_table, col_idx):
        raise dbt.exceptions.NotImplementedException(
            '`convert_time_type` is not implemented for this adapter!')

    @classmethod
    def convert_type(cls, agate_table, col_idx):
        return cls.convert_agate_type(agate_table, col_idx)

    @classmethod
    def convert_agate_type(cls, agate_table, col_idx):
        agate_type = agate_table.column_types[col_idx]
        conversions = [
            (agate.Text, cls.convert_text_type),
            (agate.Number, cls.convert_number_type),
            (agate.Boolean, cls.convert_boolean_type),
            (agate.DateTime, cls.convert_datetime_type),
            (agate.Date, cls.convert_date_type),
            (agate.TimeDelta, cls.convert_time_type),
        ]
        for agate_cls, func in conversions:
            if isinstance(agate_type, agate_cls):
                return func(agate_table, col_idx)

    ###
    # Operations involving the manifest
    ###
    def run_operation(self, manifest, operation_name):
        """Look the operation identified by operation_name up in the manifest
        and run it.

        Return an an AttrDict with three attributes: 'table', 'data', and
            'status'. 'table' is an agate.Table.
        """
        operation = manifest.find_operation_by_name(operation_name, 'dbt')

        # This causes a reference cycle, as dbt.context.runtime.generate()
        # ends up calling get_adapter, so the import has to be here.
        import dbt.context.runtime
        context = dbt.context.runtime.generate(
            operation,
            self.config,
            manifest,
        )

        result = operation.generator(context)()
        return result

    ###
    # Abstract methods involving the manifest
    ###
    @classmethod
    def _catalog_filter_table(cls, table, manifest):
        return table.where(_catalog_filter_schemas(manifest))

    def get_catalog(self, manifest):
        try:
            table = self.run_operation(manifest, GET_CATALOG_OPERATION_NAME)
        finally:
            self.release_connection(GET_CATALOG_OPERATION_NAME)

        results = self._catalog_filter_table(table, manifest)
        return results

    @classmethod
    def _relations_filter_table(cls, table, schemas):
        return table.where(_relations_filter_schemas(schemas))

    def _link_cached_relations(self, manifest, schemas):
        """This method has to exist because BigQueryAdapter and SnowflakeAdapter
        inherit from the PostgresAdapter, so they need something to override
        in order to disable linking.
        """
        pass

    def _relations_cache_for_schemas(self, manifest, schemas=None):
        if not dbt.flags.USE_CACHE:
            return

        if schemas is None:
            schemas = manifest.get_used_schemas()

        relations = []
        # add all relations
        for schema in schemas:
            # bypass the cache, of course!
            for relation in self._list_relations(schema):
                self.cache.add(relation)
        self._link_cached_relations(manifest, schemas)
        # it's possible that there were no relations in some schemas. We want
        # to insert the schemas we query into the cache's `.schemas` attribute
        # so we can check it later
        self.cache.update_schemas(schemas)

    def set_relations_cache(self, manifest, clear=False):
        """Run a query that gets a populated cache of the relations in the
        database and set the cache on this adapter.
        """
        if not dbt.flags.USE_CACHE:
            return

        with self.cache.lock:
            if clear:
                self.cache.clear()
            self._relations_cache_for_schemas(manifest)
Ejemplo n.º 21
0
 def __init__(self, config):
     self.config = config
     self.cache = RelationsCache()
     self.connections = self.ConnectionManager(config)
     self._internal_manifest_lazy = None
Ejemplo n.º 22
0
 def __init__(self, config):
     self.config = config
     self.cache = RelationsCache()
     self.connections = self.ConnectionManager(config)
     self._internal_manifest_lazy = None