def do_ref(*args): target_model_name = None target_model_package = None if len(args) == 1: target_model_name = args[0] elif len(args) == 2: target_model_package, target_model_name = args else: dbt.exceptions.ref_invalid_args(model, args) target_model = dbt.utils.find_model_by_name( all_models, target_model_name, target_model_package) if target_model is None: dbt.exceptions.ref_target_not_found(model, target_model_name, target_model_package) target_model_id = target_model.get('unique_id') if target_model_id not in model.get('depends_on', {}).get('nodes'): dbt.exceptions.ref_bad_context(model, target_model_name, target_model_package) if get_materialization(target_model) == 'ephemeral': model['extra_ctes'][target_model_id] = None return '__dbt__CTE__{}'.format(target_model.get('name')) else: return '"{}"."{}"'.format(schema, target_model.get('name'))
def do_ref(*args): target_model_name = None target_model_package = None if len(args) == 1: target_model_name = args[0] elif len(args) == 2: target_model_package, target_model_name = args else: dbt.exceptions.ref_invalid_args(model, args) target_model = dbt.parser.ParserUtils.resolve_ref( manifest, target_model_name, target_model_package, current_project, model.get('package_name')) if target_model is None: dbt.exceptions.ref_target_not_found(model, target_model_name, target_model_package) target_model_id = target_model.get('unique_id') if target_model_id not in model.get('depends_on', {}).get('nodes'): dbt.exceptions.ref_bad_context(model, target_model_name, target_model_package) is_ephemeral = (get_materialization(target_model) == 'ephemeral') if is_ephemeral: model.set_cte(target_model_id, None) return adapter.Relation.create( type=adapter.Relation.CTE, identifier=add_ephemeral_model_prefix( target_model_name)).quote(identifier=False) else: return adapter.Relation.create_from_node(profile, target_model)
def materialization_not_available(model, adapter_type): materialization = get_materialization(model) raise_compiler_error( "Materialization '{}' is not available for {}!" .format(materialization, adapter_type), model)
def get_ancestor_ephemeral_nodes(self, flat_graph, linked_graph, selected_nodes): node_names = { node: flat_graph['nodes'].get(node).get('name') for node in selected_nodes if node in flat_graph['nodes'] } include_spec = [ '+{}'.format(node_names[node]) for node in selected_nodes if node in node_names ] all_ancestors = dbt.graph.selector.select_nodes( self.project, linked_graph, include_spec, []) res = [] for ancestor in all_ancestors: if ancestor not in flat_graph['nodes']: continue ancestor_node = flat_graph['nodes'][ancestor] is_model = ancestor_node.get('resource_type') == NodeType.Model is_ephemeral = get_materialization(ancestor_node) == 'ephemeral' if is_model and is_ephemeral: res.append(ancestor) return set(res)
def wrap(model, project, context, injected_graph): adapter = get_adapter(project.run_environment()) schema = context['env'].get('schema', 'public') # these are empty strings if configs aren't provided dist_qualifier = get_dist_qualifier(model, project) sort_qualifier = get_sort_qualifier(model, project) pre_hooks = get_hooks(model, context, 'pre-hook') post_hooks = get_hooks(model, context, 'post-hook') rendered_query = model['injected_sql'] profile = project.run_environment() db_wrapper = DatabaseWrapper(model, adapter, profile) opts = { "materialization": get_materialization(model), "model": model, "schema": schema, "dist": dist_qualifier, "sort": sort_qualifier, "pre_hooks": pre_hooks, "post_hooks": post_hooks, "sql": rendered_query, "flags": dbt.flags, "adapter": db_wrapper } opts.update(db_wrapper.get_context_functions()) return do_wrap(model, opts, injected_graph, context, project)
def materialization_not_available(model, adapter_type): from dbt.utils import get_materialization # noqa materialization = get_materialization(model) raise_compiler_error( "Materialization '{}' is not available for {}!".format( materialization, adapter_type), model)
def print_model_start_line(model, schema_name, index, total): msg = "START {model_type} model {schema}.{relation}".format( model_type=get_materialization(model), schema=schema_name, relation=model.get('name')) print_fancy_output_line(msg, 'RUN', index, total)
def materialization_not_available(model, adapter_type): from dbt.utils import get_materialization # noqa materialization = get_materialization(model) raise_compiler_error( "Materialization '{}' is not available for {}!" .format(materialization, adapter_type), model)
def compile_node(self, node, manifest, extra_context=None): if extra_context is None: extra_context = {} logger.debug("Compiling {}".format(node.get('unique_id'))) data = node.to_dict() data.update({ 'compiled': False, 'compiled_sql': None, 'extra_ctes_injected': False, 'extra_ctes': [], 'injected_sql': None, }) compiled_node = CompiledNode(**data) context = dbt.context.runtime.generate( compiled_node, self.config, manifest) context.update(extra_context) compiled_node.compiled_sql = dbt.clients.jinja.get_rendered( node.get('raw_sql'), context, node) compiled_node.compiled = True injected_node, _ = prepend_ctes(compiled_node, manifest) should_wrap = {NodeType.Test, NodeType.Operation} if injected_node.resource_type in should_wrap: # data tests get wrapped in count(*) # TODO : move this somewhere more reasonable if 'data' in injected_node.tags and \ is_type(injected_node, NodeType.Test): injected_node.wrapped_sql = ( "select count(*) from (\n{test_sql}\n) sbq").format( test_sql=injected_node.injected_sql) else: # don't wrap schema tests or analyses. injected_node.wrapped_sql = injected_node.injected_sql elif is_type(injected_node, NodeType.Archive): # unfortunately we do everything automagically for # archives. in the future it'd be nice to generate # the SQL at the parser level. pass elif(is_type(injected_node, NodeType.Model) and get_materialization(injected_node) == 'ephemeral'): pass else: injected_node.wrapped_sql = None return injected_node
def missing_materialization(model, adapter_type): materialization = get_materialization(model) valid_types = "'default'" if adapter_type != 'default': valid_types = "'default' and '{}'".format(adapter_type) raise_compiler_error( "No materialization '{}' was found for adapter {}! (searched types {})" .format(materialization, adapter_type, valid_types), model)
def compile_node(self, node, flat_graph): logger.debug("Compiling {}".format(node.get('unique_id'))) compiled_node = node.copy() compiled_node.update({ 'compiled': False, 'compiled_sql': None, 'extra_ctes_injected': False, 'extra_ctes': OrderedDict(), 'injected_sql': None, }) context = dbt.context.runtime.generate( compiled_node, self.project.cfg, flat_graph) compiled_node['compiled_sql'] = dbt.clients.jinja.get_rendered( node.get('raw_sql'), context, node) compiled_node['compiled'] = True injected_node, _ = prepend_ctes(compiled_node, flat_graph) if compiled_node.get('resource_type') in [NodeType.Test, NodeType.Analysis, NodeType.Operation]: # data tests get wrapped in count(*) # TODO : move this somewhere more reasonable if 'data' in injected_node['tags'] and \ is_type(injected_node, NodeType.Test): injected_node['wrapped_sql'] = ( "select count(*) from (\n{test_sql}\n) sbq").format( test_sql=injected_node['injected_sql']) else: # don't wrap schema tests or analyses. injected_node['wrapped_sql'] = injected_node.get( 'injected_sql') elif is_type(injected_node, NodeType.Archive): # unfortunately we do everything automagically for # archives. in the future it'd be nice to generate # the SQL at the parser level. pass elif(is_type(injected_node, NodeType.Model) and get_materialization(injected_node) == 'ephemeral'): pass else: injected_node['wrapped_sql'] = None return injected_node
def print_model_result_line(result, schema_name, index, total): model = result.node info, status = get_printable_result(result, 'created', 'creating') print_fancy_output_line( "{info} {model_type} model {schema}.{relation}".format( info=info, model_type=get_materialization(model), schema=schema_name, relation=model.get('name')), status, index, total, result.execution_time)
def missing_materialization(model, adapter_type): from dbt.utils import get_materialization # noqa materialization = get_materialization(model) valid_types = "'default'" if adapter_type != 'default': valid_types = "'default' and '{}'".format(adapter_type) raise_compiler_error( "No materialization '{}' was found for adapter {}! (searched types {})" .format(materialization, adapter_type, valid_types), model)
def get_ancestor_ephemeral_nodes(self, flat_graph, linked_graph, selected_nodes): all_ancestors = dbt.graph.selector.select_nodes( self.project, linked_graph, [ '+{}'.format(flat_graph.get('nodes').get(node).get('name')) for node in selected_nodes ], []) return set([ ancestor for ancestor in all_ancestors if (flat_graph['nodes'][ancestor].get('resource_type') == NodeType.Model and get_materialization( flat_graph['nodes'][ancestor]) == 'ephemeral') ])
def get_wrapping_macro(model, macros): mapping = { 'incremental': get_dbt_materialization_macro_uid('create_incremental'), 'table': get_dbt_materialization_macro_uid('create_table'), 'view': get_dbt_materialization_macro_uid('create_view') } materialization = get_materialization(model) uid = mapping[materialization] if macros.get(uid) is None: dbt.exceptions.macro_not_found(model, uid) return macros[uid]['parsed_macro']
def get_counts(flat_nodes): counts = {} for node in flat_nodes: t = node.get('resource_type') if node.get('resource_type') == NodeType.Model: t = '{} {}'.format(get_materialization(node), t) counts[t] = counts.get(t, 0) + 1 stat_line = ", ".join(["{} {}s".format(v, k) for k, v in counts.items()]) return stat_line
def print_model_result_line(result, schema_name, index, total): model = result.node info = 'OK created' if result.errored: info = 'ERROR creating' print_fancy_output_line( "{info} {model_type} model {schema}.{relation}".format( info=info, model_type=get_materialization(model), schema=schema_name, relation=model.get('name')), result.status, index, total, result.execution_time)
def get_counts(flat_nodes): counts = {} for node in flat_nodes: t = node.get('resource_type') if node.get('resource_type') == NodeType.Model: t = '{} {}'.format(get_materialization(node), t) counts[t] = counts.get(t, 0) + 1 stat_line = ", ".join( ["{} {}s".format(v, k) for k, v in counts.items()]) return stat_line
def print_counts(flat_nodes): counts = {} for node in flat_nodes: t = node.get('resource_type') if node.get('resource_type') == NodeType.Model: t = '{} {}'.format(get_materialization(node), t) counts[t] = counts.get(t, 0) + 1 stat_line = ", ".join(["{} {}s".format(v, k) for k, v in counts.items()]) logger.info("") print_timestamped_line("Running {}".format(stat_line)) print_timestamped_line("")
def print_results_line(results, execution_time): stats = {} for result in results: t = result.node.get('resource_type') if result.node.get('resource_type') == NodeType.Model: t = '{} {}'.format(get_materialization(result.node), t) stats[t] = stats.get(t, 0) + 1 stat_line = ", ".join(["{} {}s".format(ct, t) for t, ct in stats.items()]) print_timestamped_line("") print_timestamped_line( "Finished running {stat_line} in {execution_time:0.2f}s.".format( stat_line=stat_line, execution_time=execution_time))
def get_counts(flat_nodes) -> str: counts: Dict[str, int] = {} for node in flat_nodes: t = node.resource_type if node.resource_type == NodeType.Model: t = '{} {}'.format(get_materialization(node), t) elif node.resource_type == NodeType.Operation: t = 'hook' counts[t] = counts.get(t, 0) + 1 stat_line = ", ".join( [dbt.utils.pluralize(v, k) for k, v in counts.items()]) return stat_line
def get_dist_qualifier(model, project): model_config = model.get('config', {}) if 'dist' not in model_config: return '' if get_materialization(model) not in ('table', 'incremental'): return '' dist_key = model_config.get('dist') if not isinstance(dist_key, basestring): compiler_error( model, "The provided distkey '{}' is not valid!".format(dist_key)) dist_key = dist_key.strip().lower() adapter = get_adapter(project.run_environment()) return adapter.dist_qualifier(dist_key)
def get_sort_qualifier(model, project): model_config = model.get('config', {}) if 'sort' not in model['config']: return '' if get_materialization(model) not in ('table', 'incremental'): return '' sort_keys = model_config.get('sort') sort_type = model_config.get('sort_type', 'compound') if not isinstance(sort_type, basestring): compiler_error( model, "The provided sort_type '{}' is not valid!".format(sort_type)) sort_type = sort_type.strip().lower() adapter = get_adapter(project.run_environment()) return adapter.sort_qualifier(sort_type, sort_keys)
def track_model_run(index, num_nodes, run_model_result): invocation_id = dbt.tracking.active_user.invocation_id dbt.tracking.track_model_run({ "invocation_id": invocation_id, "index": index, "total": num_nodes, "execution_time": run_model_result.execution_time, "run_status": run_model_result.status, "run_skipped": run_model_result.skip, "run_error": run_model_result.error, "model_materialization": get_materialization(run_model_result.node), # noqa "model_id": get_hash(run_model_result.node), "hashed_contents": get_hashed_contents(run_model_result.node), # noqa })
def validate_incremental(node): if (node.get('resource_type') == NodeType.Model and get_materialization(node) == 'incremental' and node.get('config', {}).get('sql_where') is None): dbt.exceptions.missing_sql_where(node)
def safe_execute_node(self, data): node, flat_graph, existing, schema_name, node_index, num_nodes = data start_time = time.time() error = None status = None is_ephemeral = (get_materialization(node) == 'ephemeral') try: if not is_ephemeral: print_start_line(node, schema_name, node_index, num_nodes) profile = self.project.run_environment() adapter = get_adapter(profile) compiler = dbt.compilation.Compiler(self.project) node = compiler.compile_node(node, flat_graph) if not is_ephemeral: node, status = self.execute_node(node, flat_graph, existing, profile, adapter) except dbt.exceptions.CompilationException as e: return RunModelResult(node, error=str(e), status='ERROR') except (RuntimeError, dbt.exceptions.ProgrammingException, psycopg2.ProgrammingError, psycopg2.InternalError) as e: error = "Error executing {filepath}\n{error}".format( filepath=node.get('build_path'), error=str(e).strip()) status = "ERROR" logger.debug(error) if type(e) == psycopg2.InternalError and \ ABORTED_TRANSACTION_STRING == e.diag.message_primary: return RunModelResult( node, error='{}\n'.format(ABORTED_TRANSACTION_STRING), status="SKIP") except dbt.exceptions.InternalException as e: error = ("Internal error executing {filepath}\n\n{error}" "\n\nThis is an error in dbt. Please try again. If " "the error persists, open an issue at " "https://github.com/fishtown-analytics/dbt").format( filepath=node.get('build_path'), error=str(e).strip()) status = "ERROR" except Exception as e: error = ( "Unhandled error while executing {filepath}\n{error}".format( filepath=node.get('build_path'), error=str(e).strip())) logger.debug(error) raise e finally: adapter.release_connection(profile, node.get('name')) execution_time = time.time() - start_time result = RunModelResult(node, error=error, status=status, execution_time=execution_time) if not is_ephemeral: print_result_line(result, schema_name, node_index, num_nodes) return result
def is_ephemeral_model(self, node): is_model = node.get('resource_type') == NodeType.Model is_ephemeral = get_materialization(node) == 'ephemeral' return is_model and is_ephemeral
def execute_model(profile, model, existing): adapter = get_adapter(profile) schema = adapter.get_default_schema(profile) tmp_name = '{}__dbt_tmp'.format(model.get('name')) if dbt.flags.NON_DESTRUCTIVE: # for non destructive mode, we only look at the already existing table. tmp_name = model.get('name') result = None # TRUNCATE / DROP if get_materialization(model) == 'table' and \ dbt.flags.NON_DESTRUCTIVE and \ existing.get(tmp_name) == 'table': # tables get truncated instead of dropped in non-destructive mode. adapter.truncate(profile=profile, table=tmp_name, model_name=model.get('name')) elif dbt.flags.NON_DESTRUCTIVE: # never drop existing relations in non destructive mode. pass elif (get_materialization(model) != 'incremental' and existing.get(tmp_name) is not None): # otherwise, for non-incremental things, drop them with IF EXISTS adapter.drop(profile=profile, relation=tmp_name, relation_type=existing.get(tmp_name), model_name=model.get('name')) # and update the list of what exists existing = adapter.query_for_existing(profile, schema, model_name=model.get('name')) # EXECUTE if get_materialization(model) == 'view' and dbt.flags.NON_DESTRUCTIVE and \ model.get('name') in existing: # views don't need to be recreated in non destructive mode since they # will repopulate automatically. note that we won't run DDL for these # views either. pass elif is_enabled(model) and get_materialization(model) != 'ephemeral': result = adapter.execute_model(profile, model) # DROP OLD RELATION AND RENAME if dbt.flags.NON_DESTRUCTIVE: # in non-destructive mode, we truncate and repopulate tables, and # don't modify views. pass elif get_materialization(model) in ['table', 'view']: # otherwise, drop tables and views, and rename tmp tables/views to # their new names if existing.get(model.get('name')) is not None: adapter.drop(profile=profile, relation=model.get('name'), relation_type=existing.get(model.get('name')), model_name=model.get('name')) adapter.rename(profile=profile, from_name=tmp_name, to_name=model.get('name'), model_name=model.get('name')) return result
def execute_nodes(self, flat_graph, node_dependency_list, on_failure, should_run_hooks=False): profile = self.project.run_environment() adapter = get_adapter(profile) master_connection = adapter.get_connection(profile) schema_name = adapter.get_default_schema(profile) flat_nodes = list(itertools.chain.from_iterable(node_dependency_list)) if len(flat_nodes) == 0: logger.info("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return [] num_threads = self.threads logger.info("Concurrency: {} threads (target='{}')".format( num_threads, self.project.get_target().get('name'))) master_connection = adapter.begin(profile) existing = adapter.query_for_existing(profile, schema_name) master_connection = adapter.commit(master_connection) node_id_to_index_map = {} i = 1 for node in flat_nodes: if get_materialization(node) != 'ephemeral': node_id_to_index_map[node.get('unique_id')] = i i += 1 num_nodes = len(node_id_to_index_map) pool = ThreadPool(num_threads) print_counts(flat_nodes) start_time = time.time() if should_run_hooks: master_connection = adapter.begin(profile) run_hooks(self.project.get_target(), self.project.cfg.get('on-run-start', []), self.node_context({}), 'on-run-start hooks') master_connection = adapter.commit(master_connection) def get_idx(node): return node_id_to_index_map.get(node.get('unique_id')) node_results = [] for node_list in node_dependency_list: for i, node in enumerate( [node for node in node_list if node.get('skip')]): print_skip_line(node, schema_name, node.get('name'), get_idx(node), num_nodes) node_result = RunModelResult(node, skip=True) node_results.append(node_result) nodes_to_execute = [ node for node in node_list if not node.get('skip') ] for result in pool.imap_unordered(self.safe_execute_node, [( node, flat_graph, existing, schema_name, get_idx(node), num_nodes, ) for node in nodes_to_execute]): node_results.append(result) # propagate so that CTEs get injected properly flat_graph['nodes'][result.node.get('unique_id')] = result.node index = get_idx(result.node) track_model_run(index, num_nodes, result) if result.errored: on_failure(result.node) logger.info(result.error) pool.close() pool.join() if should_run_hooks: adapter.begin(profile) run_hooks(self.project.get_target(), self.project.cfg.get('on-run-end', []), self.node_context({}), 'on-run-end hooks') adapter.commit(master_connection) execution_time = time.time() - start_time print_results_line(node_results, execution_time) return node_results
def execute_nodes(self, flat_graph, node_dependency_list, on_failure, should_run_hooks=False, should_execute=True): profile = self.project.run_environment() adapter = get_adapter(profile) master_connection = adapter.get_connection(profile) schema_name = adapter.get_default_schema(profile) flat_nodes = list(itertools.chain.from_iterable(node_dependency_list)) if len(flat_nodes) == 0: logger.info("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return [] num_threads = self.threads logger.info("Concurrency: {} threads (target='{}')".format( num_threads, self.project.get_target().get('name'))) master_connection = adapter.begin(profile) existing = adapter.query_for_existing(profile, schema_name) master_connection = adapter.commit(master_connection) node_id_to_index_map = {} i = 1 for node in flat_nodes: if get_materialization(node) != 'ephemeral': node_id_to_index_map[node.get('unique_id')] = i i += 1 num_nodes = len(node_id_to_index_map) pool = ThreadPool(num_threads) if should_execute: stat_line = dbt.ui.printer.get_counts(flat_nodes) full_line = "Running {}".format(stat_line) logger.info("") dbt.ui.printer.print_timestamped_line(full_line) dbt.ui.printer.print_timestamped_line("") start_time = time.time() if should_run_hooks: self.run_hooks(profile, flat_graph, dbt.utils.RunHookType.Start) def get_idx(node): return node_id_to_index_map.get(node.get('unique_id')) node_results = [] for node_list in node_dependency_list: for i, node in enumerate( [node for node in node_list if node.get('skip')]): node_name = node.get('name') dbt.ui.printer.print_skip_line(node, schema_name, node_name, get_idx(node), num_nodes) node_result = RunModelResult(node, skip=True) node_results.append(node_result) nodes_to_execute = [ node for node in node_list if not node.get('skip') ] if should_execute: action = self.safe_execute_node else: action = self.safe_compile_node node_result = [] try: args_list = [] for node in nodes_to_execute: args_list.append({ 'node': node, 'flat_graph': flat_graph, 'existing': existing, 'schema_name': schema_name, 'node_index': get_idx(node), 'num_nodes': num_nodes }) for result in pool.imap_unordered(action, args_list): node_results.append(result) # propagate so that CTEs get injected properly node_id = result.node.get('unique_id') flat_graph['nodes'][node_id] = result.node index = get_idx(result.node) if should_execute: track_model_run(index, num_nodes, result) if result.errored: on_failure(result.node) logger.info(result.error) except KeyboardInterrupt: pool.close() pool.terminate() profile = self.project.run_environment() adapter = get_adapter(profile) for conn_name in adapter.cancel_open_connections(profile): dbt.ui.printer.print_cancel_line(conn_name, schema_name) pool.join() raise pool.close() pool.join() if should_run_hooks: self.run_hooks(profile, flat_graph, dbt.utils.RunHookType.End) execution_time = time.time() - start_time if should_execute: print_results_line(node_results, execution_time) return node_results
def safe_execute_node(self, data): node = data['node'] flat_graph = data['flat_graph'] existing = data['existing'] schema_name = data['schema_name'] node_index = data['node_index'] num_nodes = data['num_nodes'] start_time = time.time() error = None status = None is_ephemeral = (get_materialization(node) == 'ephemeral') try: if not is_ephemeral: print_start_line(node, schema_name, node_index, num_nodes) profile = self.project.run_environment() adapter = get_adapter(profile) node = self.compile_node(node, flat_graph) if not is_ephemeral: node, status = self.execute_node(node, flat_graph, existing, profile, adapter) except dbt.exceptions.CompilationException as e: return RunModelResult(node, error=str(e), status='ERROR') except (RuntimeError, dbt.exceptions.ProgrammingException, psycopg2.ProgrammingError, psycopg2.InternalError) as e: prefix = "Error executing {}\n".format(node.get('build_path')) error = "{}{}".format(dbt.ui.printer.red(prefix), str(e).strip()) status = "ERROR" logger.debug(error) if type(e) == psycopg2.InternalError and \ ABORTED_TRANSACTION_STRING == e.diag.message_primary: return RunModelResult( node, error='{}\n'.format(ABORTED_TRANSACTION_STRING), status="SKIP") except dbt.exceptions.InternalException as e: build_path = node.get('build_path') prefix = 'Internal error executing {}'.format(build_path) error = "{prefix}\n{error}\n\n{note}".format( prefix=dbt.ui.printer.red(prefix), error=str(e).strip(), note=INTERNAL_ERROR_STRING) logger.debug(error) status = "ERROR" except Exception as e: prefix = "Unhandled error while executing {filepath}".format( filepath=node.get('build_path')) error = "{prefix}\n{error}".format( prefix=dbt.ui.printer.red(prefix), error=str(e).strip()) logger.debug(error) raise e finally: adapter.release_connection(profile, node.get('name')) execution_time = time.time() - start_time result = RunModelResult(node, error=error, status=status, execution_time=execution_time) if not is_ephemeral: print_result_line(result, schema_name, node_index, num_nodes) return result
def create_relation(self, target_model, name): if get_materialization(target_model) == 'ephemeral': return self.create_ephemeral_relation(target_model, name) else: return self.Relation.create_from(self.config, target_model)