def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): if connection_name is None: connection_name = '__test' if query.strip() == "": return sql = self.transform_sql(query, kwargs=kwargs) if self.adapter_type == 'bigquery': return self.run_sql_bigquery(sql, fetch) elif self.adapter_type == 'presto': return self.run_sql_presto(sql, fetch, connection_name) conn = self.adapter.acquire_connection(connection_name) with conn.handle.cursor() as cursor: logger.debug('test connection "{}" executing: {}'.format(connection_name, sql)) try: cursor.execute(sql) conn.handle.commit() if fetch == 'one': return cursor.fetchone() elif fetch == 'all': return cursor.fetchall() else: return except BaseException as e: conn.handle.rollback() print(query) print(e) raise e finally: conn.transaction_open = False
def expand_column_types(self, goal, current, model_name=None): reference_columns = { c.name: c for c in self.get_columns_in_relation(goal, model_name=model_name) } target_columns = { c.name: c for c in self.get_columns_in_relation(current, model_name=model_name) } for column_name, reference_column in reference_columns.items(): target_column = target_columns.get(column_name) if target_column is not None and \ target_column.can_expand_to(reference_column): col_string_size = reference_column.string_size() new_type = self.Column.string_type(col_string_size) logger.debug("Changing col type from %s to %s in table %s", target_column.data_type, new_type, current) self.alter_column_type(current, column_name, new_type, model_name=model_name) if model_name is None: self.release_connection('master')
def handle_error(cls, error, message, sql): logger.debug(message.format(sql=sql)) logger.debug(error) error_msg = "\n".join( [item['message'] for item in error.errors]) raise dbt.exceptions.DatabaseException(error_msg)
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False, branch=None): exists = None try: _, err = clone(repo, cwd, dirname=dirname, remove_git_dir=remove_git_dir) except dbt.exceptions.CommandResultError as exc: err = exc.stderr.decode('utf-8') exists = re.match("fatal: destination path '(.+)' already exists", err) if not exists: # something else is wrong, raise it raise directory = None start_sha = None if exists: directory = exists.group(1) logger.debug('Updating existing dependency %s.', directory) else: matches = re.match("Cloning into '(.+)'", err.decode('utf-8')) directory = matches.group(1) logger.debug('Pulling new dependency %s.', directory) full_path = os.path.join(cwd, directory) start_sha = get_current_sha(full_path) checkout(full_path, repo, branch) end_sha = get_current_sha(full_path) if exists: if start_sha == end_sha: logger.debug(' Already at %s, nothing to do.', start_sha[:7]) else: logger.debug(' Updated checkout from %s to %s.', start_sha[:7], end_sha[:7]) else: logger.debug(' Checked out at %s.', end_sha[:7]) return directory
def patch_nodes(self, patches): """Patch nodes with the given dict of patches. Note that this consumes the input! """ # because we don't have any mapping from node _names_ to nodes, and we # only have the node name in the patch, we have to iterate over all the # nodes looking for matching names. We could use _find_by_name if we # were ok with doing an O(n*m) search (one nodes scan per patch) for node in self.nodes.values(): if node.resource_type != NodeType.Model: continue patch = patches.pop(node.name, None) if not patch: continue node.patch(patch) # log debug-level warning about nodes we couldn't find if patches: for patch in patches.values(): # since patches aren't nodes, we can't use the existing # target_not_found warning logger.debug(( 'WARNING: Found documentation for model "{}" which was ' 'not found or is disabled').format(patch.name) )
def render_profile(cls, raw_profile, profile_name, target_override, cli_vars): """This is a containment zone for the hateful way we're rendering profiles. """ renderer = ConfigRenderer(cli_vars=cli_vars) # rendering profiles is a bit complex. Two constraints cause trouble: # 1) users should be able to use environment/cli variables to specify # the target in their profile. # 2) Missing environment/cli variables in profiles/targets that don't # end up getting selected should not cause errors. # so first we'll just render the target name, then we use that rendered # name to extract a profile that we can render. if target_override is not None: target_name = target_override elif 'target' in raw_profile: # render the target if it was parsed from yaml target_name = renderer.render_value(raw_profile['target']) else: target_name = 'default' logger.debug( "target not specified in profile '{}', using '{}'" .format(profile_name, target_name) ) raw_profile_data = cls._get_profile_data( raw_profile, profile_name, target_name ) profile_data = renderer.render_profile_data(raw_profile_data) return target_name, profile_data
def open(cls, connection): if connection.state == 'open': logger.debug('Connection is already open, skipping open.') return connection try: handle = cls.get_bigquery_client(connection.credentials) except google.auth.exceptions.DefaultCredentialsError as e: logger.info("Please log into GCP to continue") dbt.clients.gcloud.setup_default_credentials() handle = cls.get_bigquery_client(connection.credentials) except Exception as e: raise logger.debug("Got an error when attempting to create a bigquery " "client: '{}'".format(e)) connection.handle = None connection.state = 'fail' raise dbt.exceptions.FailedToConnectException(str(e)) connection.handle = handle connection.state = 'open' return connection
def gcloud_installed(): try: run_cmd('.', ['gcloud', '--version']) return True except OSError as e: logger.debug(e) return False
def initialize_tracking(cookie_dir): global active_user active_user = User(cookie_dir) try: active_user.initialize() except Exception: logger.debug('Got an exception trying to initialize tracking', exc_info=True) active_user = User(None)
def drop_schema(self, database, schema, model_name=None): logger.debug('Dropping schema "%s"."%s".', database, schema) kwargs = { 'database_name': self.quote_as_configured(database, 'database'), 'schema_name': self.quote_as_configured(schema, 'schema'), } self.execute_macro(DROP_SCHEMA_MACRO_NAME, kwargs=kwargs, connection_name=model_name)
def compile_node(self, node, manifest, extra_context=None): if extra_context is None: extra_context = {} logger.debug("Compiling {}".format(node.get('unique_id'))) data = node.to_dict() data.update({ 'compiled': False, 'compiled_sql': None, 'extra_ctes_injected': False, 'extra_ctes': [], 'injected_sql': None, }) compiled_node = CompiledNode(**data) context = dbt.context.runtime.generate( compiled_node, self.config, manifest) context.update(extra_context) compiled_node.compiled_sql = dbt.clients.jinja.get_rendered( node.get('raw_sql'), context, node) compiled_node.compiled = True injected_node, _ = prepend_ctes(compiled_node, manifest) should_wrap = {NodeType.Test, NodeType.Operation} if injected_node.resource_type in should_wrap: # data tests get wrapped in count(*) # TODO : move this somewhere more reasonable if 'data' in injected_node.tags and \ is_type(injected_node, NodeType.Test): injected_node.wrapped_sql = ( "select count(*) from (\n{test_sql}\n) sbq").format( test_sql=injected_node.injected_sql) else: # don't wrap schema tests or analyses. injected_node.wrapped_sql = injected_node.injected_sql elif is_type(injected_node, NodeType.Archive): # unfortunately we do everything automagically for # archives. in the future it'd be nice to generate # the SQL at the parser level. pass elif(is_type(injected_node, NodeType.Model) and get_materialization(injected_node) == 'ephemeral'): pass else: injected_node.wrapped_sql = None return injected_node
def parse_macro_file(self, macro_file_path, macro_file_contents, root_path, package_name, resource_type, tags=None, context=None): logger.debug("Parsing {}".format(macro_file_path)) to_return = {} if tags is None: tags = [] context = {} # change these to actual kwargs base_node = UnparsedMacro( path=macro_file_path, original_file_path=macro_file_path, package_name=package_name, raw_sql=macro_file_contents, root_path=root_path, ) try: ast = dbt.clients.jinja.parse(macro_file_contents) except dbt.exceptions.CompilationException as e: e.node = base_node raise e for macro_node in ast.find_all(jinja2.nodes.Macro): macro_name = macro_node.name node_type = None if macro_name.startswith(dbt.utils.MACRO_PREFIX): node_type = NodeType.Macro name = macro_name.replace(dbt.utils.MACRO_PREFIX, '') if node_type != resource_type: continue unique_id = self.get_path(resource_type, package_name, name) merged = dbt.utils.deep_merge( base_node.serialize(), { 'name': name, 'unique_id': unique_id, 'tags': tags, 'resource_type': resource_type, 'depends_on': {'macros': []}, }) new_node = ParsedMacro(**merged) to_return[unique_id] = new_node return to_return
def track(user, *args, **kwargs): if user.do_not_track: return else: logger.debug("Sending event: {}".format(kwargs)) try: tracker.track_struct_event(*args, **kwargs) except Exception: logger.debug( "An error was encountered while trying to send an event" )
def _schema_is_cached(self, database, schema, model_name=None): """Check if the schema is cached, and by default logs if it is not.""" if dbt.flags.USE_CACHE is False: return False elif (database, schema) not in self.cache: logger.debug( 'On "{}": cache miss for schema "{}.{}", this is inefficient' .format(model_name or '<None>', database, schema) ) return False else: return True
def create_schema(self, database, schema, model_name=None): logger.debug('Creating schema "%s"."%s".', database, schema) if model_name is None: model_name = 'master' kwargs = { 'database_name': self.quote_as_configured(database, 'database'), 'schema_name': self.quote_as_configured(schema, 'schema'), } self.execute_macro(CREATE_SCHEMA_MACRO_NAME, kwargs=kwargs, connection_name=model_name) self.commit_if_has_connection(model_name)
def cancel(self, connection): connection_name = connection.name pid = connection.handle.get_backend_pid() sql = "select pg_terminate_backend({})".format(pid) logger.debug("Cancelling query '{}' ({})".format(connection_name, pid)) _, cursor = self.add_query(sql, 'master') res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res))
def rmdir(path): """ Recursively deletes a directory. Includes an error handler to retry with different permissions on Windows. Otherwise, removing directories (eg. cloned via git) can cause rmtree to throw a PermissionError exception """ logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform)) if sys.platform == 'win32': onerror = _windows_rmdir_readonly else: onerror = None return shutil.rmtree(path, onerror=onerror)
def get_columns_in_relation(self, relation, model_name=None): try: table = self.connections.get_bq_table( database=relation.database, schema=relation.schema, identifier=relation.table_name, conn_name=model_name ) return self._get_dbt_columns_from_bq_table(table) except (ValueError, google.cloud.exceptions.NotFound) as e: logger.debug("get_columns_in_relation error: {}".format(e)) return []
def create_schema(self, schema, model_name=None): logger.debug('Creating schema "%s".', schema) conn = self.get_connection(model_name) client = conn.handle dataset = self.get_dataset(schema, model_name) # Emulate 'create schema if not exists ...' try: client.get_dataset(dataset) except google.api_core.exceptions.NotFound: with self.exception_handler('create dataset', model_name): client.create_dataset(dataset)
def add_query(cls, profile, sql, model_name=None, auto_begin=True, bindings=None, abridge_sql_log=False): connection = cls.get_connection(profile, model_name) connection_name = connection.get('name') if auto_begin and connection['transaction_open'] is False: cls.begin(profile, connection_name) logger.debug('Using {} connection "{}".'.format( cls.type(), connection_name)) with cls.exception_handler(profile, sql, model_name, connection_name): if abridge_sql_log: logger.debug('On %s: %s....', connection_name, sql[0:512]) else: logger.debug('On %s: %s', connection_name, sql) pre = time.time() cursor = connection.get('handle').cursor() cursor.execute(sql, bindings) logger.debug("SQL status: %s in %0.2f seconds", cls.get_status(cursor), (time.time() - pre)) return connection, cursor
def run_cmd(cwd, cmd, env=None): logger.debug('Executing "{}"'.format(' '.join(cmd))) if len(cmd) == 0: raise dbt.exceptions.CommandError(cwd, cmd) # the env argument replaces the environment entirely, which has exciting # consequences on Windows! Do an update instead. full_env = env if env is not None: full_env = os.environ.copy() full_env.update(env) try: proc = subprocess.Popen( cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=full_env) out, err = proc.communicate() except OSError as exc: _interpret_oserror(exc, cwd, cmd) logger.debug('STDOUT: "{}"'.format(out)) logger.debug('STDERR: "{}"'.format(err)) if proc.returncode != 0: logger.debug('command return code={}'.format(proc.returncode)) raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode, out, err) return out, err
def cancel(self, connection): handle = connection.handle sid = handle.session_id connection_name = connection.name sql = 'select system$abort_session({})'.format(sid) logger.debug("Cancelling query '{}' ({})".format(connection_name, sid)) _, cursor = self.add_query(sql) res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res))
def invalid_source_fail_unless_test(node, target_name, target_table_name, disabled): if node.resource_type == NodeType.Test: msg = dbt.exceptions.get_source_not_found_or_disabled_msg( node, target_name, target_table_name, disabled) if disabled: logger.debug(f'WARNING: {msg}') else: dbt.exceptions.warn_or_error(msg, log_fmt='WARNING: {}') else: dbt.exceptions.source_target_not_found(node, target_name, target_table_name, disabled=disabled)
def exception_handler(cls, profile, sql, model_name=None, connection_name=None): try: yield except psycopg2.DatabaseError as e: logger.debug('Postgres error: {}'.format(str(e))) try: # attempt to release the connection cls.release_connection(profile, connection_name) except psycopg2.Error: logger.debug("Failed to release connection!") pass raise dbt.exceptions.DatabaseException( dbt.compat.to_string(e).strip()) except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") cls.release_connection(profile, connection_name) raise dbt.exceptions.RuntimeException(e)
def exception_handler(self, sql): try: yield except snowflake.connector.errors.ProgrammingError as e: msg = dbt.compat.to_string(e) logger.debug('Snowflake error: {}'.format(msg)) if 'Empty SQL statement' in msg: logger.debug("got empty sql statement, moving on") elif 'This session does not have a current database' in msg: self.release() raise dbt.exceptions.FailedToConnectException( ('{}\n\nThis error sometimes occurs when invalid ' 'credentials are provided, or when your default role ' 'does not have access to use the specified database. ' 'Please double check your profile and try again.') .format(msg)) else: self.release() raise dbt.exceptions.DatabaseException(msg) except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") self.release() if isinstance(e, dbt.exceptions.RuntimeException): # during a sql query, an internal to dbt exception was raised. # this sounds a lot like a signal handler and probably has # useful information, so raise it without modification. raise raise dbt.exceptions.RuntimeException(e.msg)
def open(cls, connection): if connection.state == 'open': logger.debug('Connection is already open, skipping open.') return connection credentials = connection.credentials try: con_str = ["ConnectionType=Direct", "AuthenticationType=Plain"] con_str.append(f"Driver={{{credentials.driver}}}") con_str.append(f"HOST={credentials.host}") con_str.append(f"PORT={credentials.port}") con_str.append(f"UID={credentials.UID}") con_str.append(f"PWD={credentials.PWD}") if credentials.additional_parameters: con_str.append(f"{credentials.additional_parameters}") con_str_concat = ';'.join(con_str) logger.debug(f'Using connection string: {con_str_concat}') handle = pyodbc.connect(con_str_concat, autocommit=True) connection.state = 'open' connection.handle = handle logger.debug(f'Connected to db: {credentials.database}') except pyodbc.Error as e: logger.debug(f"Could not connect to db: {e}") connection.handle = None connection.state = 'fail' raise dbt.exceptions.FailedToConnectException(str(e)) return connection
def drop_schema(cls, profile, project_cfg, schema, model_name=None): logger.debug('Dropping schema "%s".', schema) if not cls.check_schema_exists(profile, project_cfg, schema, model_name): return conn = cls.get_connection(profile) client = conn.get('handle') dataset = cls.get_dataset(profile, project_cfg, schema, model_name) with cls.exception_handler(profile, 'drop dataset', model_name): cls.drop_tables_in_schema(profile, project_cfg, dataset) client.delete_dataset(dataset)
def create_schema(cls, profile, project_cfg, schema, model_name=None): logger.debug('Creating schema "%s".', schema) conn = cls.get_connection(profile, model_name) client = conn.get('handle') dataset = cls.get_dataset(profile, project_cfg, schema, model_name) # Emulate 'create schema if not exists ...' try: client.get_dataset(dataset) except google.api_core.exceptions.NotFound: with cls.exception_handler(profile, 'create dataset', model_name): client.create_dataset(dataset)
def exception_handler(cls, profile, sql, model_name=None, connection_name='master'): connection = cls.get_connection(profile, connection_name) try: yield except snowflake.connector.errors.ProgrammingError as e: msg = dbt.compat.to_string(e) logger.debug('Snowflake error: {}'.format(msg)) if 'Empty SQL statement' in msg: logger.debug("got empty sql statement, moving on") elif 'This session does not have a current database' in msg: cls.release_connection(profile, connection_name) raise dbt.exceptions.FailedToConnectException( ('{}\n\nThis error sometimes occurs when invalid ' 'credentials are provided, or when your default role ' 'does not have access to use the specified database. ' 'Please double check your profile and try again.') .format(msg)) else: cls.release_connection(profile, connection_name) raise dbt.exceptions.DatabaseException(msg) except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") cls.release_connection(profile, connection_name) raise dbt.exceptions.RuntimeException(e.msg)
def add_query(self, sql, auto_begin=True, bindings=None, abridge_sql_log=False): connection = self.get_thread_connection() if auto_begin and connection.transaction_open is False: self.begin() logger.debug('Using {} connection "{}".'.format( self.TYPE, connection.name)) with self.exception_handler(sql): if abridge_sql_log: logger.debug("On {}: {}....".format(connection.name, sql[0:512])) else: logger.debug("On {}: {}".format(connection.name, sql)) pre = time.time() cursor = connection.handle.cursor() # pyodbc does not handle a None type binding! if bindings is None: cursor.execute(sql) else: cursor.execute(sql, bindings) logger.debug("SQL status: {} in {:0.2f} seconds".format( self.get_response(cursor), (time.time() - pre))) return connection, cursor
def _safe_release_connection(self): """Try to release a connection. If an exception is hit, log and return the error string. """ node_name = self.node.name try: self.adapter.release_connection(node_name) except Exception as exc: logger.debug( 'Error releasing connection for node {}: {!s}\n{}'.format( node_name, exc, traceback.format_exc())) return dbt.compat.to_string(exc) return None
def exception_handler(self, sql): try: yield except pyodbc.DatabaseError as e: logger.debug("Database error: {}".format(str(e))) try: # attempt to release the connection self.release() except pyodbc.Error: logger.debug("Failed to release connection!") pass raise dbt.exceptions.DatabaseException(str(e).strip()) from e except Exception as e: logger.debug(f"Error running SQL: {sql}") logger.debug("Rolling back transaction.") self.release() if isinstance(e, dbt.exceptions.RuntimeException): # during a sql query, an internal to dbt exception was raised. # this sounds a lot like a signal handler and probably has # useful information, so raise it without modification. raise raise dbt.exceptions.RuntimeException(e)
def compile_node(self, node, manifest): logger.debug("Compiling {}".format(node.get('unique_id'))) data = node.to_dict() data.update({ 'compiled': False, 'compiled_sql': None, 'extra_ctes_injected': False, 'extra_ctes': [], 'injected_sql': None, }) compiled_node = CompiledNode(**data) context = dbt.context.runtime.generate(compiled_node, self.project, manifest) compiled_node.compiled_sql = dbt.clients.jinja.get_rendered( node.get('raw_sql'), context, node) compiled_node.compiled = True injected_node, _ = prepend_ctes(compiled_node, manifest) should_wrap = {NodeType.Test, NodeType.Analysis, NodeType.Operation} if injected_node.resource_type in should_wrap: # data tests get wrapped in count(*) # TODO : move this somewhere more reasonable if 'data' in injected_node.tags and \ is_type(injected_node, NodeType.Test): injected_node.wrapped_sql = ( "select count(*) from (\n{test_sql}\n) sbq").format( test_sql=injected_node.injected_sql) else: # don't wrap schema tests or analyses. injected_node.wrapped_sql = injected_node.injected_sql elif is_type(injected_node, NodeType.Archive): # unfortunately we do everything automagically for # archives. in the future it'd be nice to generate # the SQL at the parser level. pass elif (is_type(injected_node, NodeType.Model) and get_materialization(injected_node) == 'ephemeral'): pass else: injected_node.wrapped_sql = None return injected_node
def run_cmd( cwd: str, cmd: List[str], env: Optional[Dict[str, Any]] = None ) -> Tuple[bytes, bytes]: logger.debug('Executing "{}"'.format(' '.join(cmd))) if len(cmd) == 0: raise dbt.exceptions.CommandError(cwd, cmd) # the env argument replaces the environment entirely, which has exciting # consequences on Windows! Do an update instead. full_env = env if env is not None: full_env = os.environ.copy() full_env.update(env) try: proc = subprocess.Popen( cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=full_env) out, err = proc.communicate() except OSError as exc: _interpret_oserror(exc, cwd, cmd) logger.debug('STDOUT: "{!s}"'.format(out)) logger.debug('STDERR: "{!s}"'.format(err)) if proc.returncode != 0: logger.debug('command return code={}'.format(proc.returncode)) raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode, out, err) return out, err
def cancel_connection(cls, profile, connection): handle = connection['handle'] sid = handle.session_id connection_name = connection.get('name') sql = 'select system$abort_session({})'.format(sid) logger.debug("Cancelling query '{}' ({})".format(connection_name, sid)) _, cursor = cls.add_query(profile, sql, 'master') res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res))
def __deepcopy__(self, memo): path = os.path.join(self.node.get('root_path'), self.node.get('original_file_path')) logger.debug( 'dbt encountered an undefined variable, "{}" in node {}.{} ' '(source path: {})'.format(self.name, self.node.get('package_name'), self.node.get('name'), path)) # match jinja's message dbt.exceptions.raise_compiler_error("{!r} is undefined".format( self.name), node=self.node)
def add_query(self, sql: str, auto_begin: bool = True, bindings: Optional[Any] = {}, abridge_sql_log: bool = False) -> Tuple[Connection, Any]: logger.debug(sql) connection = self.get_thread_connection() if auto_begin and connection.transaction_open is False: self.begin() logger.debug('Using {} connection "{}".'.format( self.TYPE, connection.name)) with self.exception_handler(sql): if abridge_sql_log: log_sql = '{}...'.format(sql[:512]) else: log_sql = sql logger.debug( 'On {connection_name}: {sql}', connection_name=connection.name, sql=log_sql, ) pre = time.time() cursor = connection.handle.cursor() cursor.execute(sql, bindings) logger.debug("SQL status: {status} in {elapsed:0.2f} seconds", status=self.get_status(cursor), elapsed=(time.time() - pre)) return connection, cursor
def add_query(self, sql, name=None, auto_begin=True, bindings=None, abridge_sql_log=False): connection = self.get(name) connection_name = connection.name if auto_begin and connection.transaction_open is False: self.begin(connection_name) logger.debug('Using {} connection "{}".' .format(self.TYPE, connection_name)) with self.exception_handler(sql, connection_name): if abridge_sql_log: logger.debug('On %s: %s....', connection_name, sql[0:512]) else: logger.debug('On %s: %s', connection_name, sql) pre = time.time() cursor = connection.handle.cursor() cursor.execute(sql, bindings) logger.debug("SQL status: %s in %0.2f seconds", self.get_status(cursor), (time.time() - pre)) return connection, cursor
def add_query(self, sql, auto_begin=True, bindings=None, abridge_sql_log=False): connection = self.get_thread_connection() if bindings: # The sqlserver connector is more strict than, eg., psycopg2 - # which allows any iterable thing to be passed as a binding. bindings = tuple(bindings) if auto_begin and connection.transaction_open is False: self.begin() logger.debug('Using {} connection "{}".' .format(self.TYPE, connection.name)) with self.exception_handler(sql): if abridge_sql_log: logger.debug('On %s: %s....', connection.name, sql[0:512]) else: logger.debug('On %s: %s', connection.name, sql) pre = time.time() cursor = connection.handle.cursor() cursor.execute(sql, bindings) logger.debug("SQL status: %s in %0.2f seconds", self.get_status(cursor), (time.time() - pre)) return connection, cursor
def get_catalog(self, manifest): schemas = manifest.get_used_schemas() column_names = ( 'table_database', 'table_schema', 'table_name', 'table_type', 'table_comment', 'table_owner', 'column_name', 'column_index', 'column_type', 'column_comment', ) columns = [] for (database_name, schema_name) in schemas: relations = self.list_relations(database_name, schema_name) for relation in relations: logger.debug("Getting table schema for relation {}".format( relation)) # noqa table_columns = self.get_columns_in_relation(relation) rel_type = self.get_relation_type(relation) for column_index, column in enumerate(table_columns): # Fixes for pseudocolumns with no type if column.name in ('# Partition Information', '# col_name'): continue elif column.dtype is None: continue column_data = ( relation.database, relation.schema, relation.name, rel_type, None, None, column.name, column_index, column.data_type, None, ) column_dict = dict(zip(column_names, column_data)) columns.append(column_dict) return dbt.clients.agate_helper.table_from_data(columns, column_names)
def raw_execute(self, sql, name=None, fetch=False): conn = self.get(name) client = conn.handle logger.debug('On %s: %s', name, sql) job_config = google.cloud.bigquery.QueryJobConfig() job_config.use_legacy_sql = False query_job = client.query(sql, job_config) # this blocks until the query has completed with self.exception_handler(sql, conn.name): iterator = query_job.result() return query_job, iterator
def get_credentials(cls, credentials): method = credentials.method # Support missing 'method' for backwards compatibility if method == 'database' or method is None: logger.debug("Connecting to Redshift using 'database' credentials") return credentials elif method == 'iam': logger.debug("Connecting to Redshift using 'IAM' credentials") return cls.get_tmp_iam_cluster_credentials(credentials) else: raise dbt.exceptions.FailedToConnectException( "Invalid 'method' in profile: '{}'".format(method))
def __deepcopy__(self, memo): path = os.path.join(self.node.get('root_path'), self.node.get('original_file_path')) logger.debug( 'dbt encountered an undefined variable, "{}" in node {}.{} ' '(source path: {})' .format(self.name, self.node.get('package_name'), self.node.get('name'), path)) # match jinja's message dbt.exceptions.raise_compiler_error( "{!r} is undefined".format(self.name), node=self.node )
def _safe_release_connection(self): """Try to release a connection. If an exception is hit, log and return the error string. """ node_name = self.node.name try: self.adapter.release_connection(node_name) except Exception as exc: logger.debug( 'Error releasing connection for node {}: {!s}\n{}' .format(node_name, exc, traceback.format_exc()) ) return dbt.compat.to_string(exc) return None
def load(self, internal_manifest: Optional[Manifest] = None): old_results = self.read_parse_results() if old_results is not None: logger.debug('Got an acceptable cached parse result') self._load_macros(old_results, internal_manifest=internal_manifest) # make a manifest with just the macros to get the context macro_manifest = Manifest.from_macros( macros=self.results.macros, files=self.results.files ) self.macro_hook(macro_manifest) for project in self.all_projects.values(): # parse a single project self.parse_project(project, macro_manifest, old_results)
def get_catalog(self, manifest: Manifest) -> agate.Table: schemas = manifest.get_used_schemas() def to_dict(d: any) -> Dict: return d.__dict__ columns = [] for (database_name, schema_name) in schemas: relations = self.list_relations(database_name, schema_name) for relation in relations: logger.debug("Getting table schema for relation {}", relation) columns += list( map(to_dict, self.get_columns_in_relation(relation)) ) return agate.Table.from_object(columns)
def raw_execute(cls, profile, sql, model_name=None, fetch=False, **kwargs): conn = cls.get_connection(profile, model_name) client = conn.get('handle') logger.debug('On %s: %s', model_name, sql) job_config = google.cloud.bigquery.QueryJobConfig() job_config.use_legacy_sql = False query_job = client.query(sql, job_config) # this blocks until the query has completed with cls.exception_handler(profile, sql, model_name): iterator = query_job.result() return query_job, iterator
def _schema_is_cached(self, schema, model_name=None, debug_on_missing=True): """Check if the schema is cached, and by default logs if it is not.""" if dbt.flags.USE_CACHE is False: return False elif schema not in self.cache: if debug_on_missing: logger.debug( 'On "{}": cache miss for schema "{}", this is inefficient'. format(model_name or '<None>', schema)) return False else: return True
def exception_handler(self, sql, connection_name='master'): try: yield except Exception as exc: logger.debug("Error while running:\n{}".format(sql)) logger.debug(exc) if len(exc.args) == 0: raise thrift_resp = exc.args[0] if hasattr(thrift_resp, 'status'): msg = thrift_resp.status.errorMessage raise dbt.exceptions.RuntimeException(msg) else: raise dbt.exceptions.RuntimeException(str(exc))
def _initialize_downloads(): global DOWNLOADS_PATH, REMOVE_DOWNLOADS # the user might have set an environment variable. Set it to None, and do # not remove it when finished. if DOWNLOADS_PATH is None: DOWNLOADS_PATH = os.environ.get('DBT_DOWNLOADS_DIR', None) REMOVE_DOWNLOADS = False # if we are making a per-run temp directory, remove it at the end of # successful runs if DOWNLOADS_PATH is None: DOWNLOADS_PATH = tempfile.mkdtemp(prefix='dbt-downloads-') REMOVE_DOWNLOADS = True dbt.clients.system.make_directory(DOWNLOADS_PATH) logger.debug("Set downloads directory='{}'".format(DOWNLOADS_PATH))
def invalid_ref_fail_unless_test(node, target_model_name, target_model_package, disabled): if node.get('resource_type') == NodeType.Test: msg = invalid_ref_test_message(node, target_model_name, target_model_package, disabled) if disabled: logger.debug(msg) else: dbt.exceptions.warn_or_error(msg) else: dbt.exceptions.ref_target_not_found( node, target_model_name, target_model_package)
def exception_handler(self, sql): try: yield except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") self.release() if isinstance(e, dbt.exceptions.RuntimeException): # during a sql query, an internal to dbt exception was raised. # this sounds a lot like a signal handler and probably has # useful information, so raise it without modification. raise raise dbt.exceptions.RuntimeException(e)
def compile_node(adapter, config, node, manifest, extra_context, write=True): compiler = Compiler(config) node = compiler.compile_node(node, manifest, extra_context) node = _inject_runtime_config(adapter, node, extra_context) if write and _is_writable(node): logger.debug('Writing injected SQL for node "{}"'.format( node.unique_id)) written_path = dbt.writer.write_node(node, config.target_path, 'compiled', node.injected_sql) node.build_path = written_path return node
def _materialize_as_view(self, model): model_database = model.get('database') model_schema = model.get('schema') model_name = model.get('name') model_alias = model.get('alias') model_sql = model.get('injected_sql') logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql)) self.connections.create_view( database=model_database, schema=model_schema, table_name=model_alias, conn_name=model_name, sql=model_sql ) return "CREATE VIEW"
def exception_handler(self, sql, connection_name='master'): try: yield except google.cloud.exceptions.BadRequest as e: message = "Bad request while running:\n{sql}" self.handle_error(e, message, sql) except google.cloud.exceptions.Forbidden as e: message = "Access denied while running:\n{sql}" self.handle_error(e, message, sql) except Exception as e: logger.debug("Unhandled error while running:\n{}".format(sql)) logger.debug(e) raise dbt.exceptions.RuntimeException(dbt.compat.to_string(e))
def _checkout(cwd, repo, branch): logger.debug(' Checking out branch {}.'.format(branch)) run_cmd(cwd, ['git', 'remote', 'set-branches', 'origin', branch]) run_cmd(cwd, ['git', 'fetch', '--tags', '--depth', '1', 'origin', branch]) tags = list_tags(cwd) # Prefer tags to branches if one exists if branch in tags: spec = 'tags/{}'.format(branch) else: spec = 'origin/{}'.format(branch) out, err = run_cmd(cwd, ['git', 'reset', '--hard', spec], env={'LC_ALL': 'C'}) return out, err
def cleanup_all(self): with self.lock: for name, connection in self.in_use.items(): if connection.state != 'closed': logger.debug("Connection '{}' was left open." .format(name)) else: logger.debug("Connection '{}' was properly closed." .format(name)) conns_in_use = list(self.in_use.values()) for conn in conns_in_use + self.available: self.close(conn) # garbage collect these connections self.in_use.clear() self._set_initial_connections()
def list_relations(self, database, schema, model_name=None): if self._schema_is_cached(database, schema, model_name): return self.cache.get_relations(database, schema) information_schema = self.Relation.create( database=database, schema=schema, model_name='').information_schema() # we can't build the relations cache because we don't have a # manifest so we can't run any operations. relations = self.list_relations_without_caching( information_schema, schema, model_name=model_name ) logger.debug('with schema={}, model_name={}, relations={}' .format(schema, model_name, relations)) return relations
def alter_table_add_columns(self, relation, columns, model_name=None): logger.debug('Adding columns ({}) to table {}".'.format( columns, relation)) conn = self.connections.get(model_name) client = conn.handle table_ref = self.connections.table_ref(relation.database, relation.schema, relation.identifier, conn) table = client.get_table(table_ref) new_columns = [column_to_bq_schema(col) for col in columns] new_schema = table.schema + new_columns new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema) client.update_table(new_table, ['schema'])