def load_manifest(config): # performance trick: if the adapter has a manifest loaded, use that to # avoid parsing internal macros twice. internal_manifest = get_adapter(config).check_internal_manifest() manifest = GraphLoader.load_all(config, internal_manifest=internal_manifest) manifest.write(os.path.join(config.target_path, MANIFEST_FILE_NAME)) return manifest
def _connection_result(self): adapter = get_adapter(self.profile) try: adapter.execute('select 1 as id') except Exception as exc: self.messages.append(COULD_NOT_CONNECT_MESSAGE.format( err=str(exc), url=ProfileConfigDocs )) return red('ERROR') return green('OK connection ok')
def get_runner(self, node): adapter = get_adapter(self.config) if node.is_ephemeral_model: run_count = 0 num_nodes = 0 else: self.run_count += 1 run_count = self.run_count num_nodes = self.num_nodes cls = self.get_runner_type() return cls(self.config, adapter, node, run_count, num_nodes)
def tearDown(self): self._clean_files() # get any current run adapter and clean up its connections before we # reset them. It'll probably be different from ours because # handle_and_check() calls reset_adapters(). adapter = get_adapter(self.config) if adapter is not self.adapter: adapter.cleanup_connections() if not hasattr(self, 'adapter'): self.adapter = adapter self._drop_schemas() self.adapter.cleanup_connections() reset_adapters()
def run(self): manifest = GraphLoader.load_all(self.config) adapter = get_adapter(self.config) package_name, macro_name = self._get_macro_parts() macro_kwargs = self._get_kwargs() res = adapter.execute_macro( macro_name, project=package_name, kwargs=macro_kwargs, manifest=manifest, connection_name="macro_{}".format(macro_name) ) return res
def execute_nodes(self): num_threads = self.config.threads target_name = self.config.target_name text = "Concurrency: {} threads (target='{}')" concurrency_line = text.format(num_threads, target_name) dbt.ui.printer.print_timestamped_line(concurrency_line) dbt.ui.printer.print_timestamped_line("") pool = ThreadPool(num_threads) try: self.run_queue(pool) except KeyboardInterrupt: pool.close() pool.terminate() adapter = get_adapter(self.config) if not adapter.is_cancelable(): msg = ("The {} adapter does not support query " "cancellation. Some queries may still be " "running!".format(adapter.type())) yellow = dbt.ui.printer.COLOR_FG_YELLOW dbt.ui.printer.print_timestamped_line(msg, yellow) raise for conn_name in adapter.cancel_open_connections(): dbt.ui.printer.print_cancel_line(conn_name) pool.join() dbt.ui.printer.print_run_end_messages(self.node_results, early_exit=True) raise pool.close() pool.join() return self.node_results
def run(self): compile_results = None if self.args.compile: compile_results = super(GenerateTask, self).run() if any(r.error is not None for r in compile_results): dbt.ui.printer.print_timestamped_line( 'compile failed, cannot generate docs' ) return {'compile_results': compile_results} shutil.copyfile( DOCS_INDEX_FILE_PATH, os.path.join(self.config.target_path, 'index.html')) manifest = self._get_manifest() adapter = get_adapter(self.config) dbt.ui.printer.print_timestamped_line("Building catalog") results = adapter.get_catalog(manifest) results = [ dict(zip(results.column_names, row)) for row in results ] nested_results = unflatten(results) results = { 'nodes': incorporate_catalog_unique_ids(nested_results, manifest), 'generated_at': dbt.utils.timestring(), } path = os.path.join(self.config.target_path, CATALOG_FILENAME) write_json(path, results) dbt.ui.printer.print_timestamped_line( 'Catalog written to {}'.format(os.path.abspath(path)) ) # now that we've serialized the data we can add compile_results in to # make interpret_results happy. results['compile_results'] = compile_results return results
def __init__( self, model, config: RuntimeConfig, manifest: Manifest, provider: Provider, context_config: Optional[ContextConfigType], ) -> None: if provider is None: raise InternalException( f"Invalid provider given to context: {provider}") # mypy appeasement - we know it'll be a RuntimeConfig self.config: RuntimeConfig super().__init__(config, manifest, model.package_name) self.sql_results: Dict[str, AttrDict] = {} self.model: Union[ParsedMacro, NonSourceNode] = model self.context_config: Optional[ContextConfigType] = context_config self.provider: Provider = provider self.adapter = get_adapter(self.config) self.db_wrapper = self.provider.DatabaseWrapper(self.adapter)
def execute_with_hooks(self, selected_uids: AbstractSet[str]): adapter = get_adapter(self.config) try: self.before_hooks(adapter) started = time.time() self.before_run(adapter, selected_uids) res = self.execute_nodes() self.after_run(adapter, res) elapsed = time.time() - started self.after_hooks(adapter, res, elapsed) finally: adapter.cleanup_connections() result = self.get_result( results=res, elapsed_time=elapsed, generated_at=datetime.utcnow() ) return result
def tearDown(self): os.remove(DBT_PROFILES) os.remove("dbt_project.yml") # quick fix for windows bug that prevents us from deleting dbt_modules try: if os.path.exists('dbt_modules'): shutil.rmtree('dbt_modules') except: os.rename("dbt_modules", "dbt_modules-{}".format(time.time())) self.adapter = get_adapter(self._profile) self._drop_schema() # hack for BQ -- TODO if hasattr(self.handle, 'close'): self.handle.close() self.adapter.cleanup_connections()
def handle_request(self) -> RemoteExecutionResult: # we could get a ctrl+c at any time, including during parsing. thread = None started = datetime.utcnow() try: node = self._get_exec_node() selected_uids = [node.unique_id] self.runtime_cleanup(selected_uids) thread_done = threading.Event() thread = threading.Thread(target=self._in_thread, args=(node, thread_done)) thread.start() thread_done.wait() except KeyboardInterrupt: adapter = get_adapter(self.config) # type: ignore if adapter.is_cancelable(): for conn_name in adapter.cancel_open_connections(): logger.debug('canceled query {}'.format(conn_name)) if thread: thread.join() else: msg = ("The {} adapter does not support query " "cancellation. Some queries may still be " "running!".format(adapter.type())) logger.debug(msg) raise RPCKilledException(signal.SIGINT) self._raise_set_error() ended = datetime.utcnow() elapsed = (ended - started).total_seconds() return self.get_result( results=self.node_results, elapsed_time=elapsed, generated_at=ended, )
def get_compiler_context(self, model, flat_graph): context = self.project.context() profile = self.project.run_environment() adapter = get_adapter(profile) wrapper = dbt.wrapper.DatabaseWrapper(model, adapter, profile) # built-ins context['ref'] = self.__ref(context, model, flat_graph) context['config'] = self.__model_config(model) context['this'] = This( context['env']['schema'], dbt.utils.model_immediate_name(model, dbt.flags.NON_DESTRUCTIVE), model.get('name')) context['var'] = Var(model, context=context) context['target'] = self.project.get_target() context['adapter'] = wrapper context['flags'] = dbt.flags context.update(wrapper.get_context_functions()) context['run_started_at'] = '{{ run_started_at }}' context['invocation_id'] = '{{ invocation_id }}' context['sql_now'] = adapter.date_function() for unique_id, macro in flat_graph.get('macros').items(): package_name = macro.get('package_name') macro_map = {macro.get('name'): macro.get('parsed_macro')} if context.get(package_name) is None: context[package_name] = {} context.get(package_name, {}) \ .update(macro_map) if (package_name == model.get('package_name') or package_name == dbt.include.GLOBAL_PROJECT_NAME): context.update(macro_map) return context
def load_config(self): # we've written our profile and project. Now we want to instantiate a # fresh adapter for the tests. # it's important to use a different connection handle here so # we don't look into an incomplete transaction kwargs = { 'profile': None, 'profile_dir': DBT_CONFIG_DIR, 'target': None, } config = RuntimeConfig.from_args(TestArgs(kwargs)) adapter = get_adapter(config) adapter.cleanup_connections() self.adapter_type = adapter.type() self.adapter = adapter self.config = config self._drop_schemas() self._create_schemas()
def load_config(self): # we've written our profile and project. Now we want to instantiate a # fresh adapter for the tests. # it's important to use a different connection handle here so # we don't look into an incomplete transaction kwargs = { 'profile': None, 'profile_dir': DBT_CONFIG_DIR, 'target': None, } config = RuntimeConfig.from_args(TestArgs(kwargs)) adapter = get_adapter(config) adapter.cleanup_connections() self.adapter_type = adapter.type() self.adapter = adapter self.config = config self._drop_schemas() self._create_schemas()
def get_sort_qualifier(model, project): model_config = model.get('config', {}) if 'sort' not in model['config']: return '' if get_materialization(model) not in ('table', 'incremental'): return '' sort_keys = model_config.get('sort') sort_type = model_config.get('sort_type', 'compound') if not isinstance(sort_type, basestring): compiler_error( model, "The provided sort_type '{}' is not valid!".format(sort_type)) sort_type = sort_type.strip().lower() adapter = get_adapter(project.run_environment()) return adapter.sort_qualifier(sort_type, sort_keys)
def tearDown(self): # get any current run adapter and clean up its connections before we # reset them. It'll probably be different from ours because # handle_and_check() calls reset_adapters(). register_adapter(self.config) adapter = get_adapter(self.config) if adapter is not self.adapter: adapter.cleanup_connections() if not hasattr(self, 'adapter'): self.adapter = adapter self._drop_schemas() self.adapter.cleanup_connections() reset_adapters() os.chdir(INITIAL_ROOT) try: shutil.rmtree(self.test_root_dir) except EnvironmentError: logger.exception('Could not clean up after test - {} not removable' .format(self.test_root_dir))
def run(self): """ Run dbt for the query, based on the graph. """ adapter = get_adapter(self.config) if len(self._flattened_nodes) == 0: logger.info("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return [] elif self.Runner.print_header: stat_line = dbt.ui.printer.get_counts(self._flattened_nodes) logger.info("") dbt.ui.printer.print_timestamped_line(stat_line) dbt.ui.printer.print_timestamped_line("") else: logger.info("") try: self.Runner.before_hooks(self.config, adapter, self.manifest) started = time.time() self.Runner.before_run(self.config, adapter, self.manifest) res = self.execute_nodes() self.Runner.after_run(self.config, adapter, res, self.manifest) elapsed = time.time() - started self.Runner.after_hooks(self.config, adapter, res, self.manifest, elapsed) finally: adapter.cleanup_connections() result = ExecutionResult( results=res, elapsed_time=elapsed, generated_at=dbt.utils.timestring(), ) self.write_results(result) return res
def execute(self, model): profile = self.project.run_environment() adapter = get_adapter(profile) _, cursor = adapter.execute_one(profile, model.compiled_contents, model.name) rows = cursor.fetchall() cursor.close() if len(rows) > 1: raise RuntimeError( "Bad test {name}: Returned {num_rows} rows instead of 1". format(name=model.name, num_rows=len(rows))) row = rows[0] if len(row) > 1: raise RuntimeError( "Bad test {name}: Returned {num_cols} cols instead of 1". format(name=model.name, num_cols=len(row))) return row[0]
def tearDown(self): os.remove(DBT_PROFILES) os.remove("dbt_project.yml") # quick fix for windows bug that prevents us from deleting dbt_modules try: if os.path.exists('dbt_modules'): shutil.rmtree('dbt_modules') except: os.rename("dbt_modules", "dbt_modules-{}".format(time.time())) if self.adapter_type == 'bigquery': adapter = get_adapter(self.profile) adapter.drop_schema(self.profile, self.unique_schema(), '__test') else: self.run_sql('DROP SCHEMA IF EXISTS "{}" CASCADE'.format( self.unique_schema())) self.handle.close() # hack for BQ -- TODO if hasattr(self.handle, 'close'): self.handle.close()
def _compile_ancestors(self, unique_id: str): # this just gets a transitive closure of the nodes. We could build a # special GraphQueue around this, but we do them all in the main thread # so we only care about preserving dependency order anyway if self.linker is None or self.manifest is None: raise InternalException( 'linker and manifest not set in _compile_ancestors') sorted_ancestors = self.linker.sorted_ephemeral_ancestors( self.manifest, unique_id, ) # We're just compiling, so we don't need to use a graph queue adapter = get_adapter(self.config) # type: ignore for unique_id in sorted_ancestors: # for each node, compile it + overwrite it parsed = self.manifest.expect(unique_id) self.manifest.nodes[unique_id] = compile_node(adapter, self.config, parsed, self.manifest, {}, write=False)
def run(self): compile_results = None if self.args.compile: compile_results = super(GenerateTask, self).run() if any(r.errored for r in compile_results): dbt.ui.printer.print_timestamped_line( 'compile failed, cannot generate docs') return {'compile_results': compile_results} shutil.copyfile( DOCS_INDEX_FILE_PATH, os.path.join(self.project['target-path'], 'index.html')) manifest = self._get_manifest() profile = self.project.run_environment() adapter = get_adapter(profile) dbt.ui.printer.print_timestamped_line("Building catalog") results = adapter.get_catalog(profile, self.project.cfg, manifest) results = [dict(zip(results.column_names, row)) for row in results] nested_results = unflatten(results) results = { 'nodes': incorporate_catalog_unique_ids(nested_results, manifest), 'generated_at': dbt.utils.timestring(), } path = os.path.join(self.project['target-path'], CATALOG_FILENAME) write_json(path, results) dbt.ui.printer.print_timestamped_line('Catalog written to {}'.format( os.path.abspath(path))) # now that we've serialized the data we can add compile_results in to # make interpret_results happy. results['compile_results'] = compile_results return results
def get_full_manifest( config: RuntimeConfig, *, reset: bool = False, ) -> Manifest: """Load the full manifest, using the adapter's internal manifest if it exists to skip parsing internal (dbt + plugins) macros a second time. Also, make sure that we force-laod the adapter's manifest, so it gets attached to the adapter for any methods that need it. """ adapter = get_adapter(config) # type: ignore if reset: config.clear_dependencies() adapter.clear_macro_manifest() internal: Manifest = adapter.load_macro_manifest() return load_manifest( config, internal, adapter.connections.set_query_header, )
def execute(self, model): profile = self.project.run_environment() adapter = get_adapter(profile) if model.tmp_drop_type is not None: if model.materialization == 'table' and self.is_non_destructive(): adapter.truncate(profile=profile, table=model.tmp_name, model_name=model.name) else: adapter.drop(profile=profile, relation=model.tmp_name, relation_type=model.tmp_drop_type, model_name=model.name) status = adapter.execute_model(profile=profile, model=model) if model.final_drop_type is not None: if model.materialization == 'table' and self.is_non_destructive(): # we just inserted into this recently truncated table... # do nothing here pass else: adapter.drop(profile=profile, relation=model.name, relation_type=model.final_drop_type, model_name=model.name) if model.should_rename(self.project.args): adapter.rename(profile=profile, from_name=model.tmp_name, to_name=model.name, model_name=model.name) adapter.commit(profile=profile) return status
def use_profile(self, adapter_type): self.adapter_type = adapter_type profile_config = {} default_profile_config = self.get_profile(adapter_type) profile_config.update(default_profile_config) profile_config.update(self.profile_config) if not os.path.exists(DBT_CONFIG_DIR): os.makedirs(DBT_CONFIG_DIR) with open(DBT_PROFILES, 'w') as f: yaml.safe_dump(profile_config, f, default_flow_style=True) profile = profile_config.get('test').get('outputs').get('default2') adapter = get_adapter(profile) self.adapter = adapter # it's important to use a different connection handle here so # we don't look into an incomplete transaction connection = adapter.acquire_connection(profile, '__test') self.handle = connection.get('handle') self.adapter_type = profile.get('type') self._profile_config = profile_config self._profile = profile if self.adapter_type == 'bigquery': adapter.drop_schema(profile, self.project, self.unique_schema(), '__test') adapter.create_schema(profile, self.project, self.unique_schema(), '__test') else: schema = self.adapter.quote(self.unique_schema()) self.run_sql('DROP SCHEMA IF EXISTS {} CASCADE'.format(schema)) self.run_sql('CREATE SCHEMA {}'.format(schema))
def __init__(self, project, target_path, args): self.project = project self.target_path = target_path self.args = args profile = self.project.run_environment() # TODO validate the number of threads if self.args.threads is None: self.threads = profile.get('threads', 1) else: self.threads = self.args.threads adapter = get_adapter(profile) schema_name = adapter.get_default_schema(profile) self.existing_models = adapter.query_for_existing(profile, schema_name) def call_get_columns_in_table(schema_name, table_name): return adapter.get_columns_in_table(profile, schema_name, table_name) def call_get_missing_columns(from_schema, from_table, to_schema, to_table): return adapter.get_missing_columns(profile, from_schema, from_table, to_schema, to_table) def call_table_exists(schema, table): return adapter.table_exists(profile, schema, table) self.context = { "run_started_at": datetime.now(), "invocation_id": dbt.tracking.active_user.invocation_id, "get_columns_in_table": call_get_columns_in_table, "get_missing_columns": call_get_missing_columns, "already_exists": call_table_exists, }
def run(self): """ Run dbt for the query, based on the graph. """ self._runtime_initialize() adapter = get_adapter(self.config) if len(self._flattened_nodes) == 0: logger.info("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return [] else: logger.info("") selected_uids = frozenset(n.unique_id for n in self._flattened_nodes) try: self.before_hooks(adapter) started = time.time() self.before_run(adapter, selected_uids) res = self.execute_nodes() self.after_run(adapter, res) elapsed = time.time() - started self.after_hooks(adapter, res, elapsed) finally: adapter.cleanup_connections() result = self.get_result( results=res, elapsed_time=elapsed, generated_at=dbt.utils.timestring() ) result.write(self.result_path()) self.task_end_messages(res) return res
def _get_exec_node(self): if self.manifest is None: raise InternalException( 'manifest not set in _get_exec_node' ) macro_overrides = {} macros = self.args.macros sql, macros = self._extract_request_data(self.args.sql) if macros: macro_parser = RPCMacroParser(self.config, self.manifest) for node in macro_parser.parse_remote(macros): macro_overrides[node.unique_id] = node self.manifest.macros.update(macro_overrides) rpc_parser = RPCCallParser( project=self.config, manifest=self.manifest, root_project=self.config, ) rpc_node = rpc_parser.parse_remote(sql, self.args.name) add_new_refs( manifest=self.manifest, config=self.config, node=rpc_node, macros=macro_overrides ) # don't write our new, weird manifest! adapter = get_adapter(self.config) compiler = adapter.get_compiler() self.graph = compiler.compile(self.manifest, write=False) # previously, this compiled the ancestors, but they are compiled at # runtime now. return rpc_node
def run_from_graph(self, Selector, Runner, query): flat_graph, linker = self.compile(self.project) selector = Selector(linker, flat_graph) selected_nodes = selector.select(query) dep_list = selector.as_node_list(selected_nodes) profile = self.project.run_environment() adapter = get_adapter(profile) flat_nodes = dbt.utils.flatten_nodes(dep_list) if len(flat_nodes) == 0: logger.info("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return [] elif Runner.print_header: stat_line = dbt.ui.printer.get_counts(flat_nodes) logger.info("") dbt.ui.printer.print_timestamped_line(stat_line) dbt.ui.printer.print_timestamped_line("") else: logger.info("") try: Runner.before_hooks(self.project, adapter, flat_graph) started = time.time() Runner.before_run(self.project, adapter, flat_graph) res = self.execute_nodes(linker, Runner, flat_graph, dep_list) Runner.after_run(self.project, adapter, res, flat_graph) elapsed = time.time() - started Runner.after_hooks(self.project, adapter, res, flat_graph, elapsed) finally: adapter.cleanup_connections() return res
def run(self): compile_results = None if self.args.compile: compile_results = CompileTask.run(self) if any(r.error is not None for r in compile_results): dbt.ui.printer.print_timestamped_line( 'compile failed, cannot generate docs') return CatalogResults({}, datetime.utcnow(), compile_results) shutil.copyfile(DOCS_INDEX_FILE_PATH, os.path.join(self.config.target_path, 'index.html')) adapter = get_adapter(self.config) with adapter.connection_named('generate_catalog'): dbt.ui.printer.print_timestamped_line("Building catalog") catalog_table = adapter.get_catalog(self.manifest) catalog_data: List[PrimitiveDict] = [ dict(zip(catalog_table.column_names, map(_coerce_decimal, row))) for row in catalog_table ] catalog = Catalog(catalog_data) results = self.get_catalog_results( nodes=catalog.make_unique_id_map(self.manifest), generated_at=datetime.utcnow(), compile_results=compile_results, ) path = os.path.join(self.config.target_path, CATALOG_FILENAME) results.write(path) write_manifest(self.config, self.manifest) dbt.ui.printer.print_timestamped_line('Catalog written to {}'.format( os.path.abspath(path))) return results
def do_ref(*args): target_model_name = None target_model_package = None if len(args) == 1: target_model_name = args[0] elif len(args) == 2: target_model_package, target_model_name = args else: dbt.exceptions.ref_invalid_args(model, args) target_model = dbt.parser.resolve_ref( flat_graph, target_model_name, target_model_package, current_project, model.get('package_name')) if target_model is None: dbt.exceptions.ref_target_not_found( model, target_model_name, target_model_package) target_model_id = target_model.get('unique_id') if target_model_id not in model.get('depends_on', {}).get('nodes'): dbt.exceptions.ref_bad_context(model, target_model_name, target_model_package) if dbt.utils.get_materialization(target_model) == 'ephemeral': model['extra_ctes'][target_model_id] = None adapter = get_adapter(profile) return dbt.utils.Relation(profile, adapter, target_model)
def use_profile(self, adapter_type): profile_config = {} default_profile_config = self.get_profile(adapter_type) profile_config.update(default_profile_config) profile_config.update(self.profile_config) if not os.path.exists(DBT_CONFIG_DIR): os.makedirs(DBT_CONFIG_DIR) with open(DBT_PROFILES, 'w') as f: yaml.safe_dump(profile_config, f, default_flow_style=True) profile = profile_config.get('test').get('outputs').get('default2') adapter = get_adapter(profile) # it's important to use a different connection handle here so # we don't look into an incomplete transaction connection = adapter.acquire_connection(profile) self.handle = connection.get('handle') self.adapter_type = profile.get('type') self.run_sql('DROP SCHEMA IF EXISTS "{}" CASCADE'.format(self.schema)) self.run_sql('CREATE SCHEMA "{}"'.format(self.schema))
def generate_base(model, model_dict, project_cfg, manifest, source_config, provider): """Generate the common aspects of the config dict.""" if provider is None: raise dbt.exceptions.InternalException( "Invalid provider given to context: {}".format(provider)) target_name = project_cfg.get('target') profile = project_cfg.get('outputs').get(target_name) target = profile.copy() target.pop('pass', None) target['name'] = target_name adapter = get_adapter(profile) context = {'env': target} schema = profile.get('schema', 'public') pre_hooks = None post_hooks = None relation_type = create_relation(adapter.Relation, project_cfg.get('quoting')) db_wrapper = DatabaseWrapper(model_dict, create_adapter(adapter, relation_type), profile, project_cfg) context = dbt.utils.merge(context, { "adapter": db_wrapper, "api": { "Relation": relation_type, "Column": adapter.Column, }, "column": adapter.Column, "config": provider.Config(model_dict, source_config), "env_var": _env_var, "exceptions": dbt.exceptions, "execute": provider.execute, "flags": dbt.flags, # TODO: Do we have to leave this in? "graph": manifest.to_flat_graph(), "log": log, "model": model_dict, "modules": { "pytz": pytz, "datetime": datetime }, "post_hooks": post_hooks, "pre_hooks": pre_hooks, "ref": provider.ref(db_wrapper, model, project_cfg, profile, manifest), "return": _return, "schema": schema, "sql": None, "sql_now": adapter.date_function(), "fromjson": fromjson, "tojson": tojson, "target": target, "try_or_compiler_error": try_or_compiler_error(model) }) # Operations do not represent database relations, so there should be no # 'this' variable in the context for operations. The Operation branch # below should be removed in a future release. The fake relation below # mirrors the historical implementation, without causing errors around # the missing 'alias' attribute for operations # # https://github.com/fishtown-analytics/dbt/issues/878 if model.resource_type == NodeType.Operation: this = db_wrapper.adapter.Relation.create( schema=target['schema'], identifier=model.name ) else: this = get_this_relation(db_wrapper, project_cfg, profile, model_dict) context["this"] = this return context
def compile_manifest(self): adapter = get_adapter(self.config) compiler = adapter.get_compiler() self.graph = compiler.compile(self.manifest)
def execute_nodes(self, linker, Runner, manifest, node_dependency_list): profile = self.project.run_environment() adapter = get_adapter(profile) num_threads = self.threads target_name = self.project.get_target().get('name') text = "Concurrency: {} threads (target='{}')" concurrency_line = text.format(num_threads, target_name) dbt.ui.printer.print_timestamped_line(concurrency_line) dbt.ui.printer.print_timestamped_line("") schemas = list(Runner.get_model_schemas(manifest)) node_runners = self.get_runners(Runner, adapter, node_dependency_list) pool = ThreadPool(num_threads) node_results = [] for node_list in node_dependency_list: runners = self.get_relevant_runners(node_runners, node_list) args_list = [] for runner in runners: args_list.append({'manifest': manifest, 'runner': runner}) try: for result in pool.imap_unordered(self.call_runner, args_list): if not Runner.is_ephemeral_model(result.node): node_results.append(result) node = CompileResultNode(**result.node) node_id = node.unique_id manifest.nodes[node_id] = node if result.errored: for dep_node_id in self.get_dependent(linker, node_id): runner = node_runners.get(dep_node_id) if runner: runner.do_skip() except KeyboardInterrupt: pool.close() pool.terminate() profile = self.project.run_environment() adapter = get_adapter(profile) if not adapter.is_cancelable(): msg = ("The {} adapter does not support query " "cancellation. Some queries may still be " "running!".format(adapter.type())) yellow = dbt.ui.printer.COLOR_FG_YELLOW dbt.ui.printer.print_timestamped_line(msg, yellow) raise for conn_name in adapter.cancel_open_connections(profile): dbt.ui.printer.print_cancel_line(conn_name) dbt.ui.printer.print_run_end_messages(node_results, early_exit=True) pool.join() raise pool.close() pool.join() return node_results
def safe_execute_node(self, data): node = data['node'] flat_graph = data['flat_graph'] existing = data['existing'] schema_name = data['schema_name'] node_index = data['node_index'] num_nodes = data['num_nodes'] start_time = time.time() error = None status = None is_ephemeral = (get_materialization(node) == 'ephemeral') try: if not is_ephemeral: print_start_line(node, schema_name, node_index, num_nodes) profile = self.project.run_environment() adapter = get_adapter(profile) node = self.compile_node(node, flat_graph) if not is_ephemeral: node, status = self.execute_node(node, flat_graph, existing, profile, adapter) except dbt.exceptions.CompilationException as e: return RunModelResult(node, error=str(e), status='ERROR') except (RuntimeError, dbt.exceptions.ProgrammingException, psycopg2.ProgrammingError, psycopg2.InternalError) as e: prefix = "Error executing {}\n".format(node.get('build_path')) error = "{}{}".format(dbt.ui.printer.red(prefix), str(e).strip()) status = "ERROR" logger.debug(error) if type(e) == psycopg2.InternalError and \ ABORTED_TRANSACTION_STRING == e.diag.message_primary: return RunModelResult( node, error='{}\n'.format(ABORTED_TRANSACTION_STRING), status="SKIP") except dbt.exceptions.InternalException as e: build_path = node.get('build_path') prefix = 'Internal error executing {}'.format(build_path) error = "{prefix}\n{error}\n\n{note}".format( prefix=dbt.ui.printer.red(prefix), error=str(e).strip(), note=INTERNAL_ERROR_STRING) logger.debug(error) status = "ERROR" except Exception as e: prefix = "Unhandled error while executing {filepath}".format( filepath=node.get('build_path')) error = "{prefix}\n{error}".format( prefix=dbt.ui.printer.red(prefix), error=str(e).strip()) logger.debug(error) raise e finally: adapter.release_connection(profile, node.get('name')) execution_time = time.time() - start_time result = RunModelResult(node, error=error, status=status, execution_time=execution_time) if not is_ephemeral: print_result_line(result, schema_name, node_index, num_nodes) return result
def run(self) -> CatalogResults: compile_results = None if self.args.compile: compile_results = CompileTask.run(self) if any(r.error is not None for r in compile_results): print_timestamped_line('compile failed, cannot generate docs') return CatalogResults(nodes={}, sources={}, generated_at=datetime.utcnow(), errors=None, _compile_results=compile_results) else: self.manifest = get_full_manifest(self.config) shutil.copyfile(DOCS_INDEX_FILE_PATH, os.path.join(self.config.target_path, 'index.html')) for asset_path in self.config.asset_paths: to_asset_path = os.path.join(self.config.target_path, asset_path) if os.path.exists(to_asset_path): shutil.rmtree(to_asset_path) if os.path.exists(asset_path): shutil.copytree(asset_path, to_asset_path) if self.manifest is None: raise InternalException('self.manifest was None in run!') adapter = get_adapter(self.config) with adapter.connection_named('generate_catalog'): print_timestamped_line("Building catalog") catalog_table, exceptions = adapter.get_catalog(self.manifest) catalog_data: List[PrimitiveDict] = [ dict(zip(catalog_table.column_names, map(_coerce_decimal, row))) for row in catalog_table ] catalog = Catalog(catalog_data) errors: Optional[List[str]] = None if exceptions: errors = [str(e) for e in exceptions] nodes, sources = catalog.make_unique_id_map(self.manifest) results = self.get_catalog_results( nodes=nodes, sources=sources, generated_at=datetime.utcnow(), compile_results=compile_results, errors=errors, ) path = os.path.join(self.config.target_path, CATALOG_FILENAME) results.write(path) if self.args.compile: self.write_manifest() if exceptions: logger.error( 'dbt encountered {} failure{} while writing the catalog'. format(len(exceptions), (len(exceptions) != 1) * 's')) print_timestamped_line('Catalog written to {}'.format( os.path.abspath(path))) return results
def execute_model(profile, model, existing): adapter = get_adapter(profile) schema = adapter.get_default_schema(profile) tmp_name = '{}__dbt_tmp'.format(model.get('name')) if dbt.flags.NON_DESTRUCTIVE: # for non destructive mode, we only look at the already existing table. tmp_name = model.get('name') result = None # TRUNCATE / DROP if get_materialization(model) == 'table' and \ dbt.flags.NON_DESTRUCTIVE and \ existing.get(tmp_name) == 'table': # tables get truncated instead of dropped in non-destructive mode. adapter.truncate(profile=profile, table=tmp_name, model_name=model.get('name')) elif dbt.flags.NON_DESTRUCTIVE: # never drop existing relations in non destructive mode. pass elif (get_materialization(model) != 'incremental' and existing.get(tmp_name) is not None): # otherwise, for non-incremental things, drop them with IF EXISTS adapter.drop(profile=profile, relation=tmp_name, relation_type=existing.get(tmp_name), model_name=model.get('name')) # and update the list of what exists existing = adapter.query_for_existing(profile, schema, model_name=model.get('name')) # EXECUTE if get_materialization(model) == 'view' and dbt.flags.NON_DESTRUCTIVE and \ model.get('name') in existing: # views don't need to be recreated in non destructive mode since they # will repopulate automatically. note that we won't run DDL for these # views either. pass elif is_enabled(model) and get_materialization(model) != 'ephemeral': result = adapter.execute_model(profile, model) # DROP OLD RELATION AND RENAME if dbt.flags.NON_DESTRUCTIVE: # in non-destructive mode, we truncate and repopulate tables, and # don't modify views. pass elif get_materialization(model) in ['table', 'view']: # otherwise, drop tables and views, and rename tmp tables/views to # their new names if existing.get(model.get('name')) is not None: adapter.drop(profile=profile, relation=model.get('name'), relation_type=existing.get(model.get('name')), model_name=model.get('name')) adapter.rename(profile=profile, from_name=tmp_name, to_name=model.get('name'), model_name=model.get('name')) return result
def execute_nodes(self, flat_graph, node_dependency_list, on_failure, should_run_hooks=False, should_execute=True): profile = self.project.run_environment() adapter = get_adapter(profile) master_connection = adapter.get_connection(profile) schema_name = adapter.get_default_schema(profile) flat_nodes = list(itertools.chain.from_iterable(node_dependency_list)) if len(flat_nodes) == 0: logger.info("WARNING: Nothing to do. Try checking your model " "configs and model specification args") return [] num_threads = self.threads logger.info("Concurrency: {} threads (target='{}')".format( num_threads, self.project.get_target().get('name'))) master_connection = adapter.begin(profile) existing = adapter.query_for_existing(profile, schema_name) master_connection = adapter.commit(master_connection) node_id_to_index_map = {} i = 1 for node in flat_nodes: if get_materialization(node) != 'ephemeral': node_id_to_index_map[node.get('unique_id')] = i i += 1 num_nodes = len(node_id_to_index_map) pool = ThreadPool(num_threads) if should_execute: stat_line = dbt.ui.printer.get_counts(flat_nodes) full_line = "Running {}".format(stat_line) logger.info("") dbt.ui.printer.print_timestamped_line(full_line) dbt.ui.printer.print_timestamped_line("") start_time = time.time() if should_run_hooks: self.run_hooks(profile, flat_graph, dbt.utils.RunHookType.Start) def get_idx(node): return node_id_to_index_map.get(node.get('unique_id')) node_results = [] for node_list in node_dependency_list: for i, node in enumerate( [node for node in node_list if node.get('skip')]): node_name = node.get('name') dbt.ui.printer.print_skip_line(node, schema_name, node_name, get_idx(node), num_nodes) node_result = RunModelResult(node, skip=True) node_results.append(node_result) nodes_to_execute = [ node for node in node_list if not node.get('skip') ] if should_execute: action = self.safe_execute_node else: action = self.safe_compile_node node_result = [] try: args_list = [] for node in nodes_to_execute: args_list.append({ 'node': node, 'flat_graph': flat_graph, 'existing': existing, 'schema_name': schema_name, 'node_index': get_idx(node), 'num_nodes': num_nodes }) for result in pool.imap_unordered(action, args_list): node_results.append(result) # propagate so that CTEs get injected properly node_id = result.node.get('unique_id') flat_graph['nodes'][node_id] = result.node index = get_idx(result.node) if should_execute: track_model_run(index, num_nodes, result) if result.errored: on_failure(result.node) logger.info(result.error) except KeyboardInterrupt: pool.close() pool.terminate() profile = self.project.run_environment() adapter = get_adapter(profile) for conn_name in adapter.cancel_open_connections(profile): dbt.ui.printer.print_cancel_line(conn_name, schema_name) pool.join() raise pool.close() pool.join() if should_run_hooks: self.run_hooks(profile, flat_graph, dbt.utils.RunHookType.End) execution_time = time.time() - start_time if should_execute: print_results_line(node_results, execution_time) return node_results