Exemple #1
0
    def run_sql(self, query, fetch='None', kwargs=None, connection_name=None):
        if connection_name is None:
            connection_name = '__test'

        if query.strip() == "":
            return

        sql = self.transform_sql(query, kwargs=kwargs)
        if self.adapter_type == 'bigquery':
            return self.run_sql_bigquery(sql, fetch)
        elif self.adapter_type == 'presto':
            return self.run_sql_presto(sql, fetch, connection_name)

        conn = self.adapter.acquire_connection(connection_name)
        with conn.handle.cursor() as cursor:
            logger.debug('test connection "{}" executing: {}'.format(connection_name, sql))
            try:
                cursor.execute(sql)
                conn.handle.commit()
                if fetch == 'one':
                    return cursor.fetchone()
                elif fetch == 'all':
                    return cursor.fetchall()
                else:
                    return
            except BaseException as e:
                conn.handle.rollback()
                print(query)
                print(e)
                raise e
            finally:
                conn.transaction_open = False
Exemple #2
0
    def expand_column_types(self, goal, current, model_name=None):
        reference_columns = {
            c.name: c for c in
            self.get_columns_in_relation(goal, model_name=model_name)
        }

        target_columns = {
            c.name: c for c
            in self.get_columns_in_relation(current, model_name=model_name)
        }

        for column_name, reference_column in reference_columns.items():
            target_column = target_columns.get(column_name)

            if target_column is not None and \
               target_column.can_expand_to(reference_column):
                col_string_size = reference_column.string_size()
                new_type = self.Column.string_type(col_string_size)
                logger.debug("Changing col type from %s to %s in table %s",
                             target_column.data_type, new_type, current)

                self.alter_column_type(current, column_name, new_type,
                                       model_name=model_name)

        if model_name is None:
            self.release_connection('master')
    def handle_error(cls, error, message, sql):
        logger.debug(message.format(sql=sql))
        logger.debug(error)
        error_msg = "\n".join(
            [item['message'] for item in error.errors])

        raise dbt.exceptions.DatabaseException(error_msg)
Exemple #4
0
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
                       branch=None):
    exists = None
    try:
        _, err = clone(repo, cwd, dirname=dirname,
                       remove_git_dir=remove_git_dir)
    except dbt.exceptions.CommandResultError as exc:
        err = exc.stderr.decode('utf-8')
        exists = re.match("fatal: destination path '(.+)' already exists", err)
        if not exists:  # something else is wrong, raise it
            raise

    directory = None
    start_sha = None
    if exists:
        directory = exists.group(1)
        logger.debug('Updating existing dependency %s.', directory)
    else:
        matches = re.match("Cloning into '(.+)'", err.decode('utf-8'))
        directory = matches.group(1)
        logger.debug('Pulling new dependency %s.', directory)
    full_path = os.path.join(cwd, directory)
    start_sha = get_current_sha(full_path)
    checkout(full_path, repo, branch)
    end_sha = get_current_sha(full_path)
    if exists:
        if start_sha == end_sha:
            logger.debug('  Already at %s, nothing to do.', start_sha[:7])
        else:
            logger.debug('  Updated checkout from %s to %s.',
                         start_sha[:7], end_sha[:7])
    else:
        logger.debug('  Checked out at %s.', end_sha[:7])
    return directory
Exemple #5
0
    def patch_nodes(self, patches):
        """Patch nodes with the given dict of patches. Note that this consumes
        the input!
        """
        # because we don't have any mapping from node _names_ to nodes, and we
        # only have the node name in the patch, we have to iterate over all the
        # nodes looking for matching names. We could use _find_by_name if we
        # were ok with doing an O(n*m) search (one nodes scan per patch)
        for node in self.nodes.values():
            if node.resource_type != NodeType.Model:
                continue
            patch = patches.pop(node.name, None)
            if not patch:
                continue
            node.patch(patch)

        # log debug-level warning about nodes we couldn't find
        if patches:
            for patch in patches.values():
                # since patches aren't nodes, we can't use the existing
                # target_not_found warning
                logger.debug((
                    'WARNING: Found documentation for model "{}" which was '
                    'not found or is disabled').format(patch.name)
                )
Exemple #6
0
    def render_profile(cls, raw_profile, profile_name, target_override,
                       cli_vars):
        """This is a containment zone for the hateful way we're rendering
        profiles.
        """
        renderer = ConfigRenderer(cli_vars=cli_vars)

        # rendering profiles is a bit complex. Two constraints cause trouble:
        # 1) users should be able to use environment/cli variables to specify
        #    the target in their profile.
        # 2) Missing environment/cli variables in profiles/targets that don't
        #    end up getting selected should not cause errors.
        # so first we'll just render the target name, then we use that rendered
        # name to extract a profile that we can render.
        if target_override is not None:
            target_name = target_override
        elif 'target' in raw_profile:
            # render the target if it was parsed from yaml
            target_name = renderer.render_value(raw_profile['target'])
        else:
            target_name = 'default'
            logger.debug(
                "target not specified in profile '{}', using '{}'"
                .format(profile_name, target_name)
            )

        raw_profile_data = cls._get_profile_data(
            raw_profile, profile_name, target_name
        )

        profile_data = renderer.render_profile_data(raw_profile_data)
        return target_name, profile_data
    def open(cls, connection):
        if connection.state == 'open':
            logger.debug('Connection is already open, skipping open.')
            return connection

        try:
            handle = cls.get_bigquery_client(connection.credentials)

        except google.auth.exceptions.DefaultCredentialsError as e:
            logger.info("Please log into GCP to continue")
            dbt.clients.gcloud.setup_default_credentials()

            handle = cls.get_bigquery_client(connection.credentials)

        except Exception as e:
            raise
            logger.debug("Got an error when attempting to create a bigquery "
                         "client: '{}'".format(e))

            connection.handle = None
            connection.state = 'fail'

            raise dbt.exceptions.FailedToConnectException(str(e))

        connection.handle = handle
        connection.state = 'open'
        return connection
Exemple #8
0
def gcloud_installed():
    try:
        run_cmd('.', ['gcloud', '--version'])
        return True
    except OSError as e:
        logger.debug(e)
        return False
Exemple #9
0
def initialize_tracking(cookie_dir):
    global active_user
    active_user = User(cookie_dir)
    try:
        active_user.initialize()
    except Exception:
        logger.debug('Got an exception trying to initialize tracking',
                     exc_info=True)
        active_user = User(None)
Exemple #10
0
 def drop_schema(self, database, schema, model_name=None):
     logger.debug('Dropping schema "%s"."%s".', database, schema)
     kwargs = {
         'database_name': self.quote_as_configured(database, 'database'),
         'schema_name': self.quote_as_configured(schema, 'schema'),
     }
     self.execute_macro(DROP_SCHEMA_MACRO_NAME,
                        kwargs=kwargs,
                        connection_name=model_name)
Exemple #11
0
    def compile_node(self, node, manifest, extra_context=None):
        if extra_context is None:
            extra_context = {}

        logger.debug("Compiling {}".format(node.get('unique_id')))

        data = node.to_dict()
        data.update({
            'compiled': False,
            'compiled_sql': None,
            'extra_ctes_injected': False,
            'extra_ctes': [],
            'injected_sql': None,
        })
        compiled_node = CompiledNode(**data)

        context = dbt.context.runtime.generate(
            compiled_node, self.config, manifest)
        context.update(extra_context)

        compiled_node.compiled_sql = dbt.clients.jinja.get_rendered(
            node.get('raw_sql'),
            context,
            node)

        compiled_node.compiled = True

        injected_node, _ = prepend_ctes(compiled_node, manifest)

        should_wrap = {NodeType.Test, NodeType.Operation}
        if injected_node.resource_type in should_wrap:
            # data tests get wrapped in count(*)
            # TODO : move this somewhere more reasonable
            if 'data' in injected_node.tags and \
               is_type(injected_node, NodeType.Test):
                injected_node.wrapped_sql = (
                    "select count(*) from (\n{test_sql}\n) sbq").format(
                        test_sql=injected_node.injected_sql)
            else:
                # don't wrap schema tests or analyses.
                injected_node.wrapped_sql = injected_node.injected_sql

        elif is_type(injected_node, NodeType.Archive):
            # unfortunately we do everything automagically for
            # archives. in the future it'd be nice to generate
            # the SQL at the parser level.
            pass

        elif(is_type(injected_node, NodeType.Model) and
             get_materialization(injected_node) == 'ephemeral'):
            pass

        else:
            injected_node.wrapped_sql = None

        return injected_node
Exemple #12
0
    def parse_macro_file(self, macro_file_path, macro_file_contents, root_path,
                         package_name, resource_type, tags=None, context=None):

        logger.debug("Parsing {}".format(macro_file_path))

        to_return = {}

        if tags is None:
            tags = []

        context = {}

        # change these to actual kwargs
        base_node = UnparsedMacro(
            path=macro_file_path,
            original_file_path=macro_file_path,
            package_name=package_name,
            raw_sql=macro_file_contents,
            root_path=root_path,
        )

        try:
            ast = dbt.clients.jinja.parse(macro_file_contents)
        except dbt.exceptions.CompilationException as e:
            e.node = base_node
            raise e

        for macro_node in ast.find_all(jinja2.nodes.Macro):
            macro_name = macro_node.name

            node_type = None
            if macro_name.startswith(dbt.utils.MACRO_PREFIX):
                node_type = NodeType.Macro
                name = macro_name.replace(dbt.utils.MACRO_PREFIX, '')

            if node_type != resource_type:
                continue

            unique_id = self.get_path(resource_type, package_name, name)

            merged = dbt.utils.deep_merge(
                base_node.serialize(),
                {
                    'name': name,
                    'unique_id': unique_id,
                    'tags': tags,
                    'resource_type': resource_type,
                    'depends_on': {'macros': []},
                })

            new_node = ParsedMacro(**merged)

            to_return[unique_id] = new_node

        return to_return
Exemple #13
0
def track(user, *args, **kwargs):
    if user.do_not_track:
        return
    else:
        logger.debug("Sending event: {}".format(kwargs))
        try:
            tracker.track_struct_event(*args, **kwargs)
        except Exception:
            logger.debug(
                "An error was encountered while trying to send an event"
            )
Exemple #14
0
 def _schema_is_cached(self, database, schema, model_name=None):
     """Check if the schema is cached, and by default logs if it is not."""
     if dbt.flags.USE_CACHE is False:
         return False
     elif (database, schema) not in self.cache:
         logger.debug(
             'On "{}": cache miss for schema "{}.{}", this is inefficient'
             .format(model_name or '<None>', database, schema)
         )
         return False
     else:
         return True
Exemple #15
0
 def create_schema(self, database, schema, model_name=None):
     logger.debug('Creating schema "%s"."%s".', database, schema)
     if model_name is None:
         model_name = 'master'
     kwargs = {
         'database_name': self.quote_as_configured(database, 'database'),
         'schema_name': self.quote_as_configured(schema, 'schema'),
     }
     self.execute_macro(CREATE_SCHEMA_MACRO_NAME,
                        kwargs=kwargs,
                        connection_name=model_name)
     self.commit_if_has_connection(model_name)
Exemple #16
0
    def cancel(self, connection):
        connection_name = connection.name
        pid = connection.handle.get_backend_pid()

        sql = "select pg_terminate_backend({})".format(pid)

        logger.debug("Cancelling query '{}' ({})".format(connection_name, pid))

        _, cursor = self.add_query(sql, 'master')
        res = cursor.fetchone()

        logger.debug("Cancel query '{}': {}".format(connection_name, res))
Exemple #17
0
def rmdir(path):
    """
    Recursively deletes a directory. Includes an error handler to retry with
    different permissions on Windows. Otherwise, removing directories (eg.
    cloned via git) can cause rmtree to throw a PermissionError exception
    """
    logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform))
    if sys.platform == 'win32':
        onerror = _windows_rmdir_readonly
    else:
        onerror = None

    return shutil.rmtree(path, onerror=onerror)
Exemple #18
0
    def get_columns_in_relation(self, relation, model_name=None):
        try:
            table = self.connections.get_bq_table(
                database=relation.database,
                schema=relation.schema,
                identifier=relation.table_name,
                conn_name=model_name
            )
            return self._get_dbt_columns_from_bq_table(table)

        except (ValueError, google.cloud.exceptions.NotFound) as e:
            logger.debug("get_columns_in_relation error: {}".format(e))
            return []
Exemple #19
0
    def create_schema(self, schema, model_name=None):
        logger.debug('Creating schema "%s".', schema)

        conn = self.get_connection(model_name)
        client = conn.handle

        dataset = self.get_dataset(schema, model_name)

        # Emulate 'create schema if not exists ...'
        try:
            client.get_dataset(dataset)
        except google.api_core.exceptions.NotFound:
            with self.exception_handler('create dataset', model_name):
                client.create_dataset(dataset)
Exemple #20
0
    def add_query(cls,
                  profile,
                  sql,
                  model_name=None,
                  auto_begin=True,
                  bindings=None,
                  abridge_sql_log=False):
        connection = cls.get_connection(profile, model_name)
        connection_name = connection.get('name')

        if auto_begin and connection['transaction_open'] is False:
            cls.begin(profile, connection_name)

        logger.debug('Using {} connection "{}".'.format(
            cls.type(), connection_name))

        with cls.exception_handler(profile, sql, model_name, connection_name):
            if abridge_sql_log:
                logger.debug('On %s: %s....', connection_name, sql[0:512])
            else:
                logger.debug('On %s: %s', connection_name, sql)
            pre = time.time()

            cursor = connection.get('handle').cursor()
            cursor.execute(sql, bindings)

            logger.debug("SQL status: %s in %0.2f seconds",
                         cls.get_status(cursor), (time.time() - pre))

            return connection, cursor
Exemple #21
0
def run_cmd(cwd, cmd, env=None):
    logger.debug('Executing "{}"'.format(' '.join(cmd)))
    if len(cmd) == 0:
        raise dbt.exceptions.CommandError(cwd, cmd)

    # the env argument replaces the environment entirely, which has exciting
    # consequences on Windows! Do an update instead.
    full_env = env
    if env is not None:
        full_env = os.environ.copy()
        full_env.update(env)

    try:
        proc = subprocess.Popen(
            cmd,
            cwd=cwd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=full_env)

        out, err = proc.communicate()
    except OSError as exc:
        _interpret_oserror(exc, cwd, cmd)

    logger.debug('STDOUT: "{}"'.format(out))
    logger.debug('STDERR: "{}"'.format(err))

    if proc.returncode != 0:
        logger.debug('command return code={}'.format(proc.returncode))
        raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
                                                out, err)

    return out, err
Exemple #22
0
    def cancel(self, connection):
        handle = connection.handle
        sid = handle.session_id

        connection_name = connection.name

        sql = 'select system$abort_session({})'.format(sid)

        logger.debug("Cancelling query '{}' ({})".format(connection_name, sid))

        _, cursor = self.add_query(sql)
        res = cursor.fetchone()

        logger.debug("Cancel query '{}': {}".format(connection_name, res))
Exemple #23
0
def invalid_source_fail_unless_test(node, target_name, target_table_name,
                                    disabled):
    if node.resource_type == NodeType.Test:
        msg = dbt.exceptions.get_source_not_found_or_disabled_msg(
            node, target_name, target_table_name, disabled)
        if disabled:
            logger.debug(f'WARNING: {msg}')
        else:
            dbt.exceptions.warn_or_error(msg, log_fmt='WARNING: {}')
    else:
        dbt.exceptions.source_target_not_found(node,
                                               target_name,
                                               target_table_name,
                                               disabled=disabled)
Exemple #24
0
    def exception_handler(cls,
                          profile,
                          sql,
                          model_name=None,
                          connection_name=None):
        try:
            yield

        except psycopg2.DatabaseError as e:
            logger.debug('Postgres error: {}'.format(str(e)))

            try:
                # attempt to release the connection
                cls.release_connection(profile, connection_name)
            except psycopg2.Error:
                logger.debug("Failed to release connection!")
                pass

            raise dbt.exceptions.DatabaseException(
                dbt.compat.to_string(e).strip())

        except Exception as e:
            logger.debug("Error running SQL: %s", sql)
            logger.debug("Rolling back transaction.")
            cls.release_connection(profile, connection_name)
            raise dbt.exceptions.RuntimeException(e)
    def exception_handler(self, sql):
        try:
            yield
        except snowflake.connector.errors.ProgrammingError as e:
            msg = dbt.compat.to_string(e)

            logger.debug('Snowflake error: {}'.format(msg))

            if 'Empty SQL statement' in msg:
                logger.debug("got empty sql statement, moving on")
            elif 'This session does not have a current database' in msg:
                self.release()
                raise dbt.exceptions.FailedToConnectException(
                    ('{}\n\nThis error sometimes occurs when invalid '
                     'credentials are provided, or when your default role '
                     'does not have access to use the specified database. '
                     'Please double check your profile and try again.')
                    .format(msg))
            else:
                self.release()
                raise dbt.exceptions.DatabaseException(msg)
        except Exception as e:
            logger.debug("Error running SQL: %s", sql)
            logger.debug("Rolling back transaction.")
            self.release()
            if isinstance(e, dbt.exceptions.RuntimeException):
                # during a sql query, an internal to dbt exception was raised.
                # this sounds a lot like a signal handler and probably has
                # useful information, so raise it without modification.
                raise
            raise dbt.exceptions.RuntimeException(e.msg)
Exemple #26
0
    def open(cls, connection):

        if connection.state == 'open':
            logger.debug('Connection is already open, skipping open.')
            return connection

        credentials = connection.credentials

        try:
            con_str = ["ConnectionType=Direct", "AuthenticationType=Plain"]
            con_str.append(f"Driver={{{credentials.driver}}}")
            con_str.append(f"HOST={credentials.host}")
            con_str.append(f"PORT={credentials.port}")
            con_str.append(f"UID={credentials.UID}")
            con_str.append(f"PWD={credentials.PWD}")
            if credentials.additional_parameters:
                con_str.append(f"{credentials.additional_parameters}")
            con_str_concat = ';'.join(con_str)
            logger.debug(f'Using connection string: {con_str_concat}')

            handle = pyodbc.connect(con_str_concat, autocommit=True)

            connection.state = 'open'
            connection.handle = handle
            logger.debug(f'Connected to db: {credentials.database}')

        except pyodbc.Error as e:
            logger.debug(f"Could not connect to db: {e}")

            connection.handle = None
            connection.state = 'fail'

            raise dbt.exceptions.FailedToConnectException(str(e))

        return connection
Exemple #27
0
    def drop_schema(cls, profile, project_cfg, schema, model_name=None):
        logger.debug('Dropping schema "%s".', schema)

        if not cls.check_schema_exists(profile, project_cfg, schema,
                                       model_name):
            return

        conn = cls.get_connection(profile)
        client = conn.get('handle')

        dataset = cls.get_dataset(profile, project_cfg, schema, model_name)
        with cls.exception_handler(profile, 'drop dataset', model_name):
            cls.drop_tables_in_schema(profile, project_cfg, dataset)
            client.delete_dataset(dataset)
Exemple #28
0
    def create_schema(cls, profile, project_cfg, schema, model_name=None):
        logger.debug('Creating schema "%s".', schema)

        conn = cls.get_connection(profile, model_name)
        client = conn.get('handle')

        dataset = cls.get_dataset(profile, project_cfg, schema, model_name)

        # Emulate 'create schema if not exists ...'
        try:
            client.get_dataset(dataset)
        except google.api_core.exceptions.NotFound:
            with cls.exception_handler(profile, 'create dataset', model_name):
                client.create_dataset(dataset)
Exemple #29
0
    def exception_handler(cls, profile, sql, model_name=None,
                          connection_name='master'):
        connection = cls.get_connection(profile, connection_name)

        try:
            yield
        except snowflake.connector.errors.ProgrammingError as e:
            msg = dbt.compat.to_string(e)

            logger.debug('Snowflake error: {}'.format(msg))

            if 'Empty SQL statement' in msg:
                logger.debug("got empty sql statement, moving on")
            elif 'This session does not have a current database' in msg:
                cls.release_connection(profile, connection_name)
                raise dbt.exceptions.FailedToConnectException(
                    ('{}\n\nThis error sometimes occurs when invalid '
                     'credentials are provided, or when your default role '
                     'does not have access to use the specified database. '
                     'Please double check your profile and try again.')
                    .format(msg))
            else:
                cls.release_connection(profile, connection_name)
                raise dbt.exceptions.DatabaseException(msg)
        except Exception as e:
            logger.debug("Error running SQL: %s", sql)
            logger.debug("Rolling back transaction.")
            cls.release_connection(profile, connection_name)
            raise dbt.exceptions.RuntimeException(e.msg)
    def add_query(self,
                  sql,
                  auto_begin=True,
                  bindings=None,
                  abridge_sql_log=False):

        connection = self.get_thread_connection()

        if auto_begin and connection.transaction_open is False:
            self.begin()

        logger.debug('Using {} connection "{}".'.format(
            self.TYPE, connection.name))

        with self.exception_handler(sql):
            if abridge_sql_log:
                logger.debug("On {}: {}....".format(connection.name,
                                                    sql[0:512]))
            else:
                logger.debug("On {}: {}".format(connection.name, sql))
            pre = time.time()

            cursor = connection.handle.cursor()

            # pyodbc does not handle a None type binding!
            if bindings is None:
                cursor.execute(sql)
            else:
                cursor.execute(sql, bindings)

            logger.debug("SQL status: {} in {:0.2f} seconds".format(
                self.get_response(cursor), (time.time() - pre)))

            return connection, cursor
Exemple #31
0
    def _safe_release_connection(self):
        """Try to release a connection. If an exception is hit, log and return
        the error string.
        """
        node_name = self.node.name
        try:
            self.adapter.release_connection(node_name)
        except Exception as exc:
            logger.debug(
                'Error releasing connection for node {}: {!s}\n{}'.format(
                    node_name, exc, traceback.format_exc()))
            return dbt.compat.to_string(exc)

        return None
    def exception_handler(self, sql):
        try:
            yield

        except pyodbc.DatabaseError as e:
            logger.debug("Database error: {}".format(str(e)))

            try:
                # attempt to release the connection
                self.release()
            except pyodbc.Error:
                logger.debug("Failed to release connection!")
                pass

            raise dbt.exceptions.DatabaseException(str(e).strip()) from e

        except Exception as e:
            logger.debug(f"Error running SQL: {sql}")
            logger.debug("Rolling back transaction.")
            self.release()
            if isinstance(e, dbt.exceptions.RuntimeException):
                # during a sql query, an internal to dbt exception was raised.
                # this sounds a lot like a signal handler and probably has
                # useful information, so raise it without modification.
                raise

            raise dbt.exceptions.RuntimeException(e)
Exemple #33
0
    def compile_node(self, node, manifest):
        logger.debug("Compiling {}".format(node.get('unique_id')))

        data = node.to_dict()
        data.update({
            'compiled': False,
            'compiled_sql': None,
            'extra_ctes_injected': False,
            'extra_ctes': [],
            'injected_sql': None,
        })
        compiled_node = CompiledNode(**data)

        context = dbt.context.runtime.generate(compiled_node, self.project,
                                               manifest)

        compiled_node.compiled_sql = dbt.clients.jinja.get_rendered(
            node.get('raw_sql'), context, node)

        compiled_node.compiled = True

        injected_node, _ = prepend_ctes(compiled_node, manifest)

        should_wrap = {NodeType.Test, NodeType.Analysis, NodeType.Operation}
        if injected_node.resource_type in should_wrap:
            # data tests get wrapped in count(*)
            # TODO : move this somewhere more reasonable
            if 'data' in injected_node.tags and \
               is_type(injected_node, NodeType.Test):
                injected_node.wrapped_sql = (
                    "select count(*) from (\n{test_sql}\n) sbq").format(
                        test_sql=injected_node.injected_sql)
            else:
                # don't wrap schema tests or analyses.
                injected_node.wrapped_sql = injected_node.injected_sql

        elif is_type(injected_node, NodeType.Archive):
            # unfortunately we do everything automagically for
            # archives. in the future it'd be nice to generate
            # the SQL at the parser level.
            pass

        elif (is_type(injected_node, NodeType.Model)
              and get_materialization(injected_node) == 'ephemeral'):
            pass

        else:
            injected_node.wrapped_sql = None

        return injected_node
Exemple #34
0
def run_cmd(
    cwd: str, cmd: List[str], env: Optional[Dict[str, Any]] = None
) -> Tuple[bytes, bytes]:
    logger.debug('Executing "{}"'.format(' '.join(cmd)))
    if len(cmd) == 0:
        raise dbt.exceptions.CommandError(cwd, cmd)

    # the env argument replaces the environment entirely, which has exciting
    # consequences on Windows! Do an update instead.
    full_env = env
    if env is not None:
        full_env = os.environ.copy()
        full_env.update(env)

    try:
        proc = subprocess.Popen(
            cmd,
            cwd=cwd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=full_env)

        out, err = proc.communicate()
    except OSError as exc:
        _interpret_oserror(exc, cwd, cmd)

    logger.debug('STDOUT: "{!s}"'.format(out))
    logger.debug('STDERR: "{!s}"'.format(err))

    if proc.returncode != 0:
        logger.debug('command return code={}'.format(proc.returncode))
        raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
                                                out, err)

    return out, err
Exemple #35
0
    def cancel_connection(cls, profile, connection):
        handle = connection['handle']
        sid = handle.session_id

        connection_name = connection.get('name')

        sql = 'select system$abort_session({})'.format(sid)

        logger.debug("Cancelling query '{}' ({})".format(connection_name, sid))

        _, cursor = cls.add_query(profile, sql, 'master')
        res = cursor.fetchone()

        logger.debug("Cancel query '{}': {}".format(connection_name, res))
Exemple #36
0
        def __deepcopy__(self, memo):
            path = os.path.join(self.node.get('root_path'),
                                self.node.get('original_file_path'))

            logger.debug(
                'dbt encountered an undefined variable, "{}" in node {}.{} '
                '(source path: {})'.format(self.name,
                                           self.node.get('package_name'),
                                           self.node.get('name'), path))

            # match jinja's message
            dbt.exceptions.raise_compiler_error("{!r} is undefined".format(
                self.name),
                                                node=self.node)
Exemple #37
0
    def add_query(self,
                  sql: str,
                  auto_begin: bool = True,
                  bindings: Optional[Any] = {},
                  abridge_sql_log: bool = False) -> Tuple[Connection, Any]:
        logger.debug(sql)
        connection = self.get_thread_connection()
        if auto_begin and connection.transaction_open is False:
            self.begin()

        logger.debug('Using {} connection "{}".'.format(
            self.TYPE, connection.name))

        with self.exception_handler(sql):
            if abridge_sql_log:
                log_sql = '{}...'.format(sql[:512])
            else:
                log_sql = sql

            logger.debug(
                'On {connection_name}: {sql}',
                connection_name=connection.name,
                sql=log_sql,
            )
            pre = time.time()

            cursor = connection.handle.cursor()
            cursor.execute(sql, bindings)

            logger.debug("SQL status: {status} in {elapsed:0.2f} seconds",
                         status=self.get_status(cursor),
                         elapsed=(time.time() - pre))

            return connection, cursor
Exemple #38
0
    def add_query(self, sql, name=None, auto_begin=True, bindings=None,
                  abridge_sql_log=False):
        connection = self.get(name)
        connection_name = connection.name

        if auto_begin and connection.transaction_open is False:
            self.begin(connection_name)

        logger.debug('Using {} connection "{}".'
                     .format(self.TYPE, connection_name))

        with self.exception_handler(sql, connection_name):
            if abridge_sql_log:
                logger.debug('On %s: %s....', connection_name, sql[0:512])
            else:
                logger.debug('On %s: %s', connection_name, sql)
            pre = time.time()

            cursor = connection.handle.cursor()
            cursor.execute(sql, bindings)

            logger.debug("SQL status: %s in %0.2f seconds",
                         self.get_status(cursor), (time.time() - pre))

            return connection, cursor
    def add_query(self, sql, auto_begin=True, bindings=None,
                  abridge_sql_log=False):
        connection = self.get_thread_connection()

        if bindings:
            # The sqlserver connector is more strict than, eg., psycopg2 -
            # which allows any iterable thing to be passed as a binding.
            bindings = tuple(bindings)

        if auto_begin and connection.transaction_open is False:
            self.begin()

        logger.debug('Using {} connection "{}".'
                     .format(self.TYPE, connection.name))

        with self.exception_handler(sql):
            if abridge_sql_log:
                logger.debug('On %s: %s....', connection.name, sql[0:512])
            else:
                logger.debug('On %s: %s', connection.name, sql)
            pre = time.time()

            cursor = connection.handle.cursor()
            cursor.execute(sql, bindings)

            logger.debug("SQL status: %s in %0.2f seconds",
                         self.get_status(cursor), (time.time() - pre))

            return connection, cursor
Exemple #40
0
    def get_catalog(self, manifest):
        schemas = manifest.get_used_schemas()

        column_names = (
            'table_database',
            'table_schema',
            'table_name',
            'table_type',
            'table_comment',
            'table_owner',
            'column_name',
            'column_index',
            'column_type',
            'column_comment',
        )

        columns = []
        for (database_name, schema_name) in schemas:
            relations = self.list_relations(database_name, schema_name)
            for relation in relations:
                logger.debug("Getting table schema for relation {}".format(
                    relation))  # noqa
                table_columns = self.get_columns_in_relation(relation)
                rel_type = self.get_relation_type(relation)

                for column_index, column in enumerate(table_columns):
                    # Fixes for pseudocolumns with no type
                    if column.name in ('# Partition Information',
                                       '# col_name'):
                        continue
                    elif column.dtype is None:
                        continue

                    column_data = (
                        relation.database,
                        relation.schema,
                        relation.name,
                        rel_type,
                        None,
                        None,
                        column.name,
                        column_index,
                        column.data_type,
                        None,
                    )
                    column_dict = dict(zip(column_names, column_data))
                    columns.append(column_dict)

        return dbt.clients.agate_helper.table_from_data(columns, column_names)
Exemple #41
0
    def raw_execute(self, sql, name=None, fetch=False):
        conn = self.get(name)
        client = conn.handle

        logger.debug('On %s: %s', name, sql)

        job_config = google.cloud.bigquery.QueryJobConfig()
        job_config.use_legacy_sql = False
        query_job = client.query(sql, job_config)

        # this blocks until the query has completed
        with self.exception_handler(sql, conn.name):
            iterator = query_job.result()

        return query_job, iterator
Exemple #42
0
    def get_credentials(cls, credentials):
        method = credentials.method

        # Support missing 'method' for backwards compatibility
        if method == 'database' or method is None:
            logger.debug("Connecting to Redshift using 'database' credentials")
            return credentials

        elif method == 'iam':
            logger.debug("Connecting to Redshift using 'IAM' credentials")
            return cls.get_tmp_iam_cluster_credentials(credentials)

        else:
            raise dbt.exceptions.FailedToConnectException(
                "Invalid 'method' in profile: '{}'".format(method))
Exemple #43
0
        def __deepcopy__(self, memo):
            path = os.path.join(self.node.get('root_path'),
                                self.node.get('original_file_path'))

            logger.debug(
                'dbt encountered an undefined variable, "{}" in node {}.{} '
                '(source path: {})'
                .format(self.name, self.node.get('package_name'),
                        self.node.get('name'), path))

            # match jinja's message
            dbt.exceptions.raise_compiler_error(
                "{!r} is undefined".format(self.name),
                node=self.node
            )
    def _safe_release_connection(self):
        """Try to release a connection. If an exception is hit, log and return
        the error string.
        """
        node_name = self.node.name
        try:
            self.adapter.release_connection(node_name)
        except Exception as exc:
            logger.debug(
                'Error releasing connection for node {}: {!s}\n{}'
                .format(node_name, exc, traceback.format_exc())
            )
            return dbt.compat.to_string(exc)

        return None
Exemple #45
0
    def get_credentials(cls, credentials):
        method = credentials.method

        # Support missing 'method' for backwards compatibility
        if method == 'database' or method is None:
            logger.debug("Connecting to Redshift using 'database' credentials")
            return credentials

        elif method == 'iam':
            logger.debug("Connecting to Redshift using 'IAM' credentials")
            return cls.get_tmp_iam_cluster_credentials(credentials)

        else:
            raise dbt.exceptions.FailedToConnectException(
                    "Invalid 'method' in profile: '{}'".format(method))
Exemple #46
0
    def load(self, internal_manifest: Optional[Manifest] = None):
        old_results = self.read_parse_results()
        if old_results is not None:
            logger.debug('Got an acceptable cached parse result')
        self._load_macros(old_results, internal_manifest=internal_manifest)
        # make a manifest with just the macros to get the context
        macro_manifest = Manifest.from_macros(
            macros=self.results.macros,
            files=self.results.files
        )
        self.macro_hook(macro_manifest)

        for project in self.all_projects.values():
            # parse a single project
            self.parse_project(project, macro_manifest, old_results)
Exemple #47
0
    def get_catalog(self, manifest: Manifest) -> agate.Table:
        schemas = manifest.get_used_schemas()

        def to_dict(d: any) -> Dict:
            return d.__dict__

        columns = []
        for (database_name, schema_name) in schemas:
            relations = self.list_relations(database_name, schema_name)
            for relation in relations:
                logger.debug("Getting table schema for relation {}", relation)
                columns += list(
                    map(to_dict, self.get_columns_in_relation(relation))
                )
        return agate.Table.from_object(columns)
Exemple #48
0
    def raw_execute(cls, profile, sql, model_name=None, fetch=False, **kwargs):
        conn = cls.get_connection(profile, model_name)
        client = conn.get('handle')

        logger.debug('On %s: %s', model_name, sql)

        job_config = google.cloud.bigquery.QueryJobConfig()
        job_config.use_legacy_sql = False
        query_job = client.query(sql, job_config)

        # this blocks until the query has completed
        with cls.exception_handler(profile, sql, model_name):
            iterator = query_job.result()

        return query_job, iterator
Exemple #49
0
 def _schema_is_cached(self,
                       schema,
                       model_name=None,
                       debug_on_missing=True):
     """Check if the schema is cached, and by default logs if it is not."""
     if dbt.flags.USE_CACHE is False:
         return False
     elif schema not in self.cache:
         if debug_on_missing:
             logger.debug(
                 'On "{}": cache miss for schema "{}", this is inefficient'.
                 format(model_name or '<None>', schema))
         return False
     else:
         return True
Exemple #50
0
    def exception_handler(self, sql, connection_name='master'):
        try:
            yield
        except Exception as exc:
            logger.debug("Error while running:\n{}".format(sql))
            logger.debug(exc)
            if len(exc.args) == 0:
                raise

            thrift_resp = exc.args[0]
            if hasattr(thrift_resp, 'status'):
                msg = thrift_resp.status.errorMessage
                raise dbt.exceptions.RuntimeException(msg)
            else:
                raise dbt.exceptions.RuntimeException(str(exc))
Exemple #51
0
def _initialize_downloads():
    global DOWNLOADS_PATH, REMOVE_DOWNLOADS
    # the user might have set an environment variable. Set it to None, and do
    # not remove it when finished.
    if DOWNLOADS_PATH is None:
        DOWNLOADS_PATH = os.environ.get('DBT_DOWNLOADS_DIR', None)
        REMOVE_DOWNLOADS = False
    # if we are making a per-run temp directory, remove it at the end of
    # successful runs
    if DOWNLOADS_PATH is None:
        DOWNLOADS_PATH = tempfile.mkdtemp(prefix='dbt-downloads-')
        REMOVE_DOWNLOADS = True

    dbt.clients.system.make_directory(DOWNLOADS_PATH)
    logger.debug("Set downloads directory='{}'".format(DOWNLOADS_PATH))
Exemple #52
0
def invalid_ref_fail_unless_test(node, target_model_name,
                                 target_model_package, disabled):
    if node.get('resource_type') == NodeType.Test:
        msg = invalid_ref_test_message(node, target_model_name,
                                       target_model_package, disabled)
        if disabled:
            logger.debug(msg)
        else:
            dbt.exceptions.warn_or_error(msg)

    else:
        dbt.exceptions.ref_target_not_found(
            node,
            target_model_name,
            target_model_package)
Exemple #53
0
    def exception_handler(self, sql):
        try:
            yield

        except Exception as e:
            logger.debug("Error running SQL: %s", sql)
            logger.debug("Rolling back transaction.")
            self.release()
            if isinstance(e, dbt.exceptions.RuntimeException):
                # during a sql query, an internal to dbt exception was raised.
                # this sounds a lot like a signal handler and probably has
                # useful information, so raise it without modification.
                raise

            raise dbt.exceptions.RuntimeException(e)
Exemple #54
0
def compile_node(adapter, config, node, manifest, extra_context, write=True):
    compiler = Compiler(config)
    node = compiler.compile_node(node, manifest, extra_context)
    node = _inject_runtime_config(adapter, node, extra_context)

    if write and _is_writable(node):
        logger.debug('Writing injected SQL for node "{}"'.format(
            node.unique_id))

        written_path = dbt.writer.write_node(node, config.target_path,
                                             'compiled', node.injected_sql)

        node.build_path = written_path

    return node
Exemple #55
0
    def _materialize_as_view(self, model):
        model_database = model.get('database')
        model_schema = model.get('schema')
        model_name = model.get('name')
        model_alias = model.get('alias')
        model_sql = model.get('injected_sql')

        logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql))
        self.connections.create_view(
            database=model_database,
            schema=model_schema,
            table_name=model_alias,
            conn_name=model_name,
            sql=model_sql
        )
        return "CREATE VIEW"
Exemple #56
0
    def exception_handler(self, sql, connection_name='master'):
        try:
            yield

        except google.cloud.exceptions.BadRequest as e:
            message = "Bad request while running:\n{sql}"
            self.handle_error(e, message, sql)

        except google.cloud.exceptions.Forbidden as e:
            message = "Access denied while running:\n{sql}"
            self.handle_error(e, message, sql)

        except Exception as e:
            logger.debug("Unhandled error while running:\n{}".format(sql))
            logger.debug(e)
            raise dbt.exceptions.RuntimeException(dbt.compat.to_string(e))
Exemple #57
0
def _checkout(cwd, repo, branch):
    logger.debug('  Checking out branch {}.'.format(branch))

    run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch])
    run_cmd(cwd, ['git', 'fetch', '--tags', '--depth', '1', 'origin', branch])

    tags = list_tags(cwd)

    # Prefer tags to branches if one exists
    if branch in tags:
        spec = 'tags/{}'.format(branch)
    else:
        spec = 'origin/{}'.format(branch)

    out, err = run_cmd(cwd, ['git', 'reset', '--hard', spec],
                       env={'LC_ALL': 'C'})
    return out, err
Exemple #58
0
    def cleanup_all(self):
        with self.lock:
            for name, connection in self.in_use.items():
                if connection.state != 'closed':
                    logger.debug("Connection '{}' was left open."
                                 .format(name))
                else:
                    logger.debug("Connection '{}' was properly closed."
                                 .format(name))

            conns_in_use = list(self.in_use.values())
            for conn in conns_in_use + self.available:
                self.close(conn)

            # garbage collect these connections
            self.in_use.clear()
            self._set_initial_connections()
Exemple #59
0
    def list_relations(self, database, schema, model_name=None):
        if self._schema_is_cached(database, schema, model_name):
            return self.cache.get_relations(database, schema)

        information_schema = self.Relation.create(
            database=database,
            schema=schema,
            model_name='').information_schema()

        # we can't build the relations cache because we don't have a
        # manifest so we can't run any operations.
        relations = self.list_relations_without_caching(
            information_schema, schema, model_name=model_name
        )

        logger.debug('with schema={}, model_name={}, relations={}'
                     .format(schema, model_name, relations))
        return relations
Exemple #60
0
    def alter_table_add_columns(self, relation, columns, model_name=None):

        logger.debug('Adding columns ({}) to table {}".'.format(
                     columns, relation))

        conn = self.connections.get(model_name)
        client = conn.handle

        table_ref = self.connections.table_ref(relation.database,
                                               relation.schema,
                                               relation.identifier, conn)
        table = client.get_table(table_ref)

        new_columns = [column_to_bq_schema(col) for col in columns]
        new_schema = table.schema + new_columns

        new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema)
        client.update_table(new_table, ['schema'])