예제 #1
0
def load_manifest(config):
    # performance trick: if the adapter has a manifest loaded, use that to
    # avoid parsing internal macros twice.
    internal_manifest = get_adapter(config).check_internal_manifest()
    manifest = GraphLoader.load_all(config,
                                    internal_manifest=internal_manifest)

    manifest.write(os.path.join(config.target_path, MANIFEST_FILE_NAME))
    return manifest
예제 #2
0
 def _connection_result(self):
     adapter = get_adapter(self.profile)
     try:
         adapter.execute('select 1 as id')
     except Exception as exc:
         self.messages.append(COULD_NOT_CONNECT_MESSAGE.format(
             err=str(exc),
             url=ProfileConfigDocs
         ))
         return red('ERROR')
     return green('OK connection ok')
예제 #3
0
    def get_runner(self, node):
        adapter = get_adapter(self.config)

        if node.is_ephemeral_model:
            run_count = 0
            num_nodes = 0
        else:
            self.run_count += 1
            run_count = self.run_count
            num_nodes = self.num_nodes

        cls = self.get_runner_type()
        return cls(self.config, adapter, node, run_count, num_nodes)
예제 #4
0
    def tearDown(self):
        self._clean_files()

        # get any current run adapter and clean up its connections before we
        # reset them. It'll probably be different from ours because
        # handle_and_check() calls reset_adapters().
        adapter = get_adapter(self.config)
        if adapter is not self.adapter:
            adapter.cleanup_connections()
        if not hasattr(self, 'adapter'):
            self.adapter = adapter

        self._drop_schemas()

        self.adapter.cleanup_connections()
        reset_adapters()
예제 #5
0
    def run(self):
        manifest = GraphLoader.load_all(self.config)
        adapter = get_adapter(self.config)

        package_name, macro_name = self._get_macro_parts()
        macro_kwargs = self._get_kwargs()

        res = adapter.execute_macro(
            macro_name,
            project=package_name,
            kwargs=macro_kwargs,
            manifest=manifest,
            connection_name="macro_{}".format(macro_name)
        )

        return res
예제 #6
0
    def execute_nodes(self):
        num_threads = self.config.threads
        target_name = self.config.target_name

        text = "Concurrency: {} threads (target='{}')"
        concurrency_line = text.format(num_threads, target_name)
        dbt.ui.printer.print_timestamped_line(concurrency_line)
        dbt.ui.printer.print_timestamped_line("")

        pool = ThreadPool(num_threads)
        try:
            self.run_queue(pool)

        except KeyboardInterrupt:
            pool.close()
            pool.terminate()

            adapter = get_adapter(self.config)

            if not adapter.is_cancelable():
                msg = ("The {} adapter does not support query "
                       "cancellation. Some queries may still be "
                       "running!".format(adapter.type()))

                yellow = dbt.ui.printer.COLOR_FG_YELLOW
                dbt.ui.printer.print_timestamped_line(msg, yellow)
                raise

            for conn_name in adapter.cancel_open_connections():
                dbt.ui.printer.print_cancel_line(conn_name)

            pool.join()

            dbt.ui.printer.print_run_end_messages(self.node_results,
                                                  early_exit=True)

            raise

        pool.close()
        pool.join()

        return self.node_results
예제 #7
0
    def run(self):
        compile_results = None
        if self.args.compile:
            compile_results = super(GenerateTask, self).run()
            if any(r.error is not None for r in compile_results):
                dbt.ui.printer.print_timestamped_line(
                    'compile failed, cannot generate docs'
                )
                return {'compile_results': compile_results}

        shutil.copyfile(
            DOCS_INDEX_FILE_PATH,
            os.path.join(self.config.target_path, 'index.html'))

        manifest = self._get_manifest()
        adapter = get_adapter(self.config)

        dbt.ui.printer.print_timestamped_line("Building catalog")
        results = adapter.get_catalog(manifest)

        results = [
            dict(zip(results.column_names, row))
            for row in results
        ]

        nested_results = unflatten(results)
        results = {
            'nodes': incorporate_catalog_unique_ids(nested_results, manifest),
            'generated_at': dbt.utils.timestring(),
        }

        path = os.path.join(self.config.target_path, CATALOG_FILENAME)
        write_json(path, results)

        dbt.ui.printer.print_timestamped_line(
            'Catalog written to {}'.format(os.path.abspath(path))
        )
        # now that we've serialized the data we can add compile_results in to
        # make interpret_results happy.
        results['compile_results'] = compile_results

        return results
예제 #8
0
 def __init__(
     self,
     model,
     config: RuntimeConfig,
     manifest: Manifest,
     provider: Provider,
     context_config: Optional[ContextConfigType],
 ) -> None:
     if provider is None:
         raise InternalException(
             f"Invalid provider given to context: {provider}")
     # mypy appeasement - we know it'll be a RuntimeConfig
     self.config: RuntimeConfig
     super().__init__(config, manifest, model.package_name)
     self.sql_results: Dict[str, AttrDict] = {}
     self.model: Union[ParsedMacro, NonSourceNode] = model
     self.context_config: Optional[ContextConfigType] = context_config
     self.provider: Provider = provider
     self.adapter = get_adapter(self.config)
     self.db_wrapper = self.provider.DatabaseWrapper(self.adapter)
예제 #9
0
    def execute_with_hooks(self, selected_uids: AbstractSet[str]):
        adapter = get_adapter(self.config)
        try:
            self.before_hooks(adapter)
            started = time.time()
            self.before_run(adapter, selected_uids)
            res = self.execute_nodes()
            self.after_run(adapter, res)
            elapsed = time.time() - started
            self.after_hooks(adapter, res, elapsed)

        finally:
            adapter.cleanup_connections()

        result = self.get_result(
            results=res,
            elapsed_time=elapsed,
            generated_at=datetime.utcnow()
        )
        return result
예제 #10
0
    def tearDown(self):
        os.remove(DBT_PROFILES)
        os.remove("dbt_project.yml")

        # quick fix for windows bug that prevents us from deleting dbt_modules
        try:
            if os.path.exists('dbt_modules'):
                shutil.rmtree('dbt_modules')
        except:
            os.rename("dbt_modules", "dbt_modules-{}".format(time.time()))

        self.adapter = get_adapter(self._profile)

        self._drop_schema()

        # hack for BQ -- TODO
        if hasattr(self.handle, 'close'):
            self.handle.close()

        self.adapter.cleanup_connections()
예제 #11
0
    def handle_request(self) -> RemoteExecutionResult:
        # we could get a ctrl+c at any time, including during parsing.
        thread = None
        started = datetime.utcnow()
        try:
            node = self._get_exec_node()

            selected_uids = [node.unique_id]
            self.runtime_cleanup(selected_uids)

            thread_done = threading.Event()
            thread = threading.Thread(target=self._in_thread,
                                      args=(node, thread_done))
            thread.start()
            thread_done.wait()
        except KeyboardInterrupt:
            adapter = get_adapter(self.config)  # type: ignore
            if adapter.is_cancelable():

                for conn_name in adapter.cancel_open_connections():
                    logger.debug('canceled query {}'.format(conn_name))
                if thread:
                    thread.join()
            else:
                msg = ("The {} adapter does not support query "
                       "cancellation. Some queries may still be "
                       "running!".format(adapter.type()))

                logger.debug(msg)

            raise RPCKilledException(signal.SIGINT)

        self._raise_set_error()

        ended = datetime.utcnow()
        elapsed = (ended - started).total_seconds()
        return self.get_result(
            results=self.node_results,
            elapsed_time=elapsed,
            generated_at=ended,
        )
예제 #12
0
    def get_compiler_context(self, model, flat_graph):
        context = self.project.context()
        profile = self.project.run_environment()
        adapter = get_adapter(profile)

        wrapper = dbt.wrapper.DatabaseWrapper(model, adapter, profile)

        # built-ins
        context['ref'] = self.__ref(context, model, flat_graph)
        context['config'] = self.__model_config(model)
        context['this'] = This(
            context['env']['schema'],
            dbt.utils.model_immediate_name(model, dbt.flags.NON_DESTRUCTIVE),
            model.get('name'))
        context['var'] = Var(model, context=context)
        context['target'] = self.project.get_target()
        context['adapter'] = wrapper
        context['flags'] = dbt.flags

        context.update(wrapper.get_context_functions())

        context['run_started_at'] = '{{ run_started_at }}'
        context['invocation_id'] = '{{ invocation_id }}'
        context['sql_now'] = adapter.date_function()

        for unique_id, macro in flat_graph.get('macros').items():
            package_name = macro.get('package_name')

            macro_map = {macro.get('name'): macro.get('parsed_macro')}

            if context.get(package_name) is None:
                context[package_name] = {}

            context.get(package_name, {}) \
                   .update(macro_map)

            if (package_name == model.get('package_name')
                    or package_name == dbt.include.GLOBAL_PROJECT_NAME):
                context.update(macro_map)

        return context
예제 #13
0
파일: base.py 프로젝트: ParthRaj22/dbt-1
    def load_config(self):
        # we've written our profile and project. Now we want to instantiate a
        # fresh adapter for the tests.
        # it's important to use a different connection handle here so
        # we don't look into an incomplete transaction
        kwargs = {
            'profile': None,
            'profile_dir': DBT_CONFIG_DIR,
            'target': None,
        }

        config = RuntimeConfig.from_args(TestArgs(kwargs))

        adapter = get_adapter(config)
        adapter.cleanup_connections()
        self.adapter_type = adapter.type()
        self.adapter = adapter
        self.config = config

        self._drop_schemas()
        self._create_schemas()
예제 #14
0
    def load_config(self):
        # we've written our profile and project. Now we want to instantiate a
        # fresh adapter for the tests.
        # it's important to use a different connection handle here so
        # we don't look into an incomplete transaction
        kwargs = {
            'profile': None,
            'profile_dir': DBT_CONFIG_DIR,
            'target': None,
        }

        config = RuntimeConfig.from_args(TestArgs(kwargs))

        adapter = get_adapter(config)
        adapter.cleanup_connections()
        self.adapter_type = adapter.type()
        self.adapter = adapter
        self.config = config

        self._drop_schemas()
        self._create_schemas()
예제 #15
0
def get_sort_qualifier(model, project):
    model_config = model.get('config', {})

    if 'sort' not in model['config']:
        return ''

    if get_materialization(model) not in ('table', 'incremental'):
        return ''

    sort_keys = model_config.get('sort')
    sort_type = model_config.get('sort_type', 'compound')

    if not isinstance(sort_type, basestring):
        compiler_error(
            model,
            "The provided sort_type '{}' is not valid!".format(sort_type))

    sort_type = sort_type.strip().lower()

    adapter = get_adapter(project.run_environment())
    return adapter.sort_qualifier(sort_type, sort_keys)
예제 #16
0
    def tearDown(self):
        # get any current run adapter and clean up its connections before we
        # reset them. It'll probably be different from ours because
        # handle_and_check() calls reset_adapters().
        register_adapter(self.config)
        adapter = get_adapter(self.config)
        if adapter is not self.adapter:
            adapter.cleanup_connections()
        if not hasattr(self, 'adapter'):
            self.adapter = adapter

        self._drop_schemas()

        self.adapter.cleanup_connections()
        reset_adapters()
        os.chdir(INITIAL_ROOT)
        try:
            shutil.rmtree(self.test_root_dir)
        except EnvironmentError:
            logger.exception('Could not clean up after test - {} not removable'
                             .format(self.test_root_dir))
예제 #17
0
파일: runner.py 프로젝트: massmutual/dbt
    def run(self):
        """
        Run dbt for the query, based on the graph.
        """
        adapter = get_adapter(self.config)

        if len(self._flattened_nodes) == 0:
            logger.info("WARNING: Nothing to do. Try checking your model "
                        "configs and model specification args")
            return []
        elif self.Runner.print_header:
            stat_line = dbt.ui.printer.get_counts(self._flattened_nodes)
            logger.info("")
            dbt.ui.printer.print_timestamped_line(stat_line)
            dbt.ui.printer.print_timestamped_line("")
        else:
            logger.info("")

        try:
            self.Runner.before_hooks(self.config, adapter, self.manifest)
            started = time.time()
            self.Runner.before_run(self.config, adapter, self.manifest)
            res = self.execute_nodes()
            self.Runner.after_run(self.config, adapter, res, self.manifest)
            elapsed = time.time() - started
            self.Runner.after_hooks(self.config, adapter, res, self.manifest,
                                    elapsed)

        finally:
            adapter.cleanup_connections()

        result = ExecutionResult(
            results=res,
            elapsed_time=elapsed,
            generated_at=dbt.utils.timestring(),
        )
        self.write_results(result)

        return res
예제 #18
0
    def execute(self, model):
        profile = self.project.run_environment()
        adapter = get_adapter(profile)

        _, cursor = adapter.execute_one(profile, model.compiled_contents,
                                        model.name)
        rows = cursor.fetchall()

        cursor.close()

        if len(rows) > 1:
            raise RuntimeError(
                "Bad test {name}: Returned {num_rows} rows instead of 1".
                format(name=model.name, num_rows=len(rows)))

        row = rows[0]
        if len(row) > 1:
            raise RuntimeError(
                "Bad test {name}: Returned {num_cols} cols instead of 1".
                format(name=model.name, num_cols=len(row)))

        return row[0]
예제 #19
0
파일: base.py 프로젝트: ridhoq/dbt
    def tearDown(self):
        os.remove(DBT_PROFILES)
        os.remove("dbt_project.yml")

        # quick fix for windows bug that prevents us from deleting dbt_modules
        try:
            if os.path.exists('dbt_modules'):
                shutil.rmtree('dbt_modules')
        except:
            os.rename("dbt_modules", "dbt_modules-{}".format(time.time()))

        if self.adapter_type == 'bigquery':
            adapter = get_adapter(self.profile)
            adapter.drop_schema(self.profile, self.unique_schema(), '__test')
        else:
            self.run_sql('DROP SCHEMA IF EXISTS "{}" CASCADE'.format(
                self.unique_schema()))
            self.handle.close()

        # hack for BQ -- TODO
        if hasattr(self.handle, 'close'):
            self.handle.close()
예제 #20
0
    def _compile_ancestors(self, unique_id: str):
        # this just gets a transitive closure of the nodes. We could build a
        # special GraphQueue around this, but we do them all in the main thread
        # so we only care about preserving dependency order anyway
        if self.linker is None or self.manifest is None:
            raise InternalException(
                'linker and manifest not set in _compile_ancestors')
        sorted_ancestors = self.linker.sorted_ephemeral_ancestors(
            self.manifest,
            unique_id,
        )
        # We're just compiling, so we don't need to use a graph queue
        adapter = get_adapter(self.config)  # type: ignore

        for unique_id in sorted_ancestors:
            # for each node, compile it + overwrite it
            parsed = self.manifest.expect(unique_id)
            self.manifest.nodes[unique_id] = compile_node(adapter,
                                                          self.config,
                                                          parsed,
                                                          self.manifest, {},
                                                          write=False)
예제 #21
0
    def run(self):
        compile_results = None
        if self.args.compile:
            compile_results = super(GenerateTask, self).run()
            if any(r.errored for r in compile_results):
                dbt.ui.printer.print_timestamped_line(
                    'compile failed, cannot generate docs')
                return {'compile_results': compile_results}

        shutil.copyfile(
            DOCS_INDEX_FILE_PATH,
            os.path.join(self.project['target-path'], 'index.html'))

        manifest = self._get_manifest()
        profile = self.project.run_environment()
        adapter = get_adapter(profile)

        dbt.ui.printer.print_timestamped_line("Building catalog")
        results = adapter.get_catalog(profile, self.project.cfg, manifest)

        results = [dict(zip(results.column_names, row)) for row in results]

        nested_results = unflatten(results)
        results = {
            'nodes': incorporate_catalog_unique_ids(nested_results, manifest),
            'generated_at': dbt.utils.timestring(),
        }

        path = os.path.join(self.project['target-path'], CATALOG_FILENAME)
        write_json(path, results)

        dbt.ui.printer.print_timestamped_line('Catalog written to {}'.format(
            os.path.abspath(path)))
        # now that we've serialized the data we can add compile_results in to
        # make interpret_results happy.
        results['compile_results'] = compile_results

        return results
예제 #22
0
def get_full_manifest(
    config: RuntimeConfig,
    *,
    reset: bool = False,
) -> Manifest:
    """Load the full manifest, using the adapter's internal manifest if it
    exists to skip parsing internal (dbt + plugins) macros a second time.

    Also, make sure that we force-laod the adapter's manifest, so it gets
    attached to the adapter for any methods that need it.
    """
    adapter = get_adapter(config)  # type: ignore
    if reset:
        config.clear_dependencies()
        adapter.clear_macro_manifest()

    internal: Manifest = adapter.load_macro_manifest()

    return load_manifest(
        config,
        internal,
        adapter.connections.set_query_header,
    )
예제 #23
0
    def execute(self, model):
        profile = self.project.run_environment()
        adapter = get_adapter(profile)

        if model.tmp_drop_type is not None:
            if model.materialization == 'table' and self.is_non_destructive():
                adapter.truncate(profile=profile,
                                 table=model.tmp_name,
                                 model_name=model.name)
            else:
                adapter.drop(profile=profile,
                             relation=model.tmp_name,
                             relation_type=model.tmp_drop_type,
                             model_name=model.name)

        status = adapter.execute_model(profile=profile, model=model)

        if model.final_drop_type is not None:
            if model.materialization == 'table' and self.is_non_destructive():
                # we just inserted into this recently truncated table...
                # do nothing here
                pass
            else:
                adapter.drop(profile=profile,
                             relation=model.name,
                             relation_type=model.final_drop_type,
                             model_name=model.name)

        if model.should_rename(self.project.args):
            adapter.rename(profile=profile,
                           from_name=model.tmp_name,
                           to_name=model.name,
                           model_name=model.name)

        adapter.commit(profile=profile)

        return status
예제 #24
0
    def use_profile(self, adapter_type):
        self.adapter_type = adapter_type

        profile_config = {}
        default_profile_config = self.get_profile(adapter_type)

        profile_config.update(default_profile_config)
        profile_config.update(self.profile_config)

        if not os.path.exists(DBT_CONFIG_DIR):
            os.makedirs(DBT_CONFIG_DIR)

        with open(DBT_PROFILES, 'w') as f:
            yaml.safe_dump(profile_config, f, default_flow_style=True)

        profile = profile_config.get('test').get('outputs').get('default2')
        adapter = get_adapter(profile)

        self.adapter = adapter

        # it's important to use a different connection handle here so
        # we don't look into an incomplete transaction
        connection = adapter.acquire_connection(profile, '__test')
        self.handle = connection.get('handle')
        self.adapter_type = profile.get('type')
        self._profile_config = profile_config
        self._profile = profile

        if self.adapter_type == 'bigquery':
            adapter.drop_schema(profile, self.project, self.unique_schema(),
                                '__test')
            adapter.create_schema(profile, self.project, self.unique_schema(),
                                  '__test')
        else:
            schema = self.adapter.quote(self.unique_schema())
            self.run_sql('DROP SCHEMA IF EXISTS {} CASCADE'.format(schema))
            self.run_sql('CREATE SCHEMA {}'.format(schema))
예제 #25
0
    def __init__(self, project, target_path, args):
        self.project = project
        self.target_path = target_path
        self.args = args

        profile = self.project.run_environment()

        # TODO validate the number of threads
        if self.args.threads is None:
            self.threads = profile.get('threads', 1)
        else:
            self.threads = self.args.threads

        adapter = get_adapter(profile)
        schema_name = adapter.get_default_schema(profile)

        self.existing_models = adapter.query_for_existing(profile, schema_name)

        def call_get_columns_in_table(schema_name, table_name):
            return adapter.get_columns_in_table(profile, schema_name,
                                                table_name)

        def call_get_missing_columns(from_schema, from_table, to_schema,
                                     to_table):
            return adapter.get_missing_columns(profile, from_schema,
                                               from_table, to_schema, to_table)

        def call_table_exists(schema, table):
            return adapter.table_exists(profile, schema, table)

        self.context = {
            "run_started_at": datetime.now(),
            "invocation_id": dbt.tracking.active_user.invocation_id,
            "get_columns_in_table": call_get_columns_in_table,
            "get_missing_columns": call_get_missing_columns,
            "already_exists": call_table_exists,
        }
예제 #26
0
    def run(self):
        """
        Run dbt for the query, based on the graph.
        """
        self._runtime_initialize()
        adapter = get_adapter(self.config)

        if len(self._flattened_nodes) == 0:
            logger.info("WARNING: Nothing to do. Try checking your model "
                        "configs and model specification args")
            return []
        else:
            logger.info("")

        selected_uids = frozenset(n.unique_id for n in self._flattened_nodes)
        try:
            self.before_hooks(adapter)
            started = time.time()
            self.before_run(adapter, selected_uids)
            res = self.execute_nodes()
            self.after_run(adapter, res)
            elapsed = time.time() - started
            self.after_hooks(adapter, res, elapsed)

        finally:
            adapter.cleanup_connections()

        result = self.get_result(
            results=res,
            elapsed_time=elapsed,
            generated_at=dbt.utils.timestring()
        )
        result.write(self.result_path())

        self.task_end_messages(res)
        return res
예제 #27
0
    def _get_exec_node(self):
        if self.manifest is None:
            raise InternalException(
                'manifest not set in _get_exec_node'
            )

        macro_overrides = {}
        macros = self.args.macros
        sql, macros = self._extract_request_data(self.args.sql)

        if macros:
            macro_parser = RPCMacroParser(self.config, self.manifest)
            for node in macro_parser.parse_remote(macros):
                macro_overrides[node.unique_id] = node

        self.manifest.macros.update(macro_overrides)
        rpc_parser = RPCCallParser(
            project=self.config,
            manifest=self.manifest,
            root_project=self.config,
        )
        rpc_node = rpc_parser.parse_remote(sql, self.args.name)
        add_new_refs(
            manifest=self.manifest,
            config=self.config,
            node=rpc_node,
            macros=macro_overrides
        )

        # don't write our new, weird manifest!
        adapter = get_adapter(self.config)
        compiler = adapter.get_compiler()
        self.graph = compiler.compile(self.manifest, write=False)
        # previously, this compiled the ancestors, but they are compiled at
        # runtime now.
        return rpc_node
예제 #28
0
파일: runner.py 프로젝트: ridhoq/dbt
    def run_from_graph(self, Selector, Runner, query):
        flat_graph, linker = self.compile(self.project)

        selector = Selector(linker, flat_graph)
        selected_nodes = selector.select(query)
        dep_list = selector.as_node_list(selected_nodes)

        profile = self.project.run_environment()
        adapter = get_adapter(profile)

        flat_nodes = dbt.utils.flatten_nodes(dep_list)
        if len(flat_nodes) == 0:
            logger.info("WARNING: Nothing to do. Try checking your model "
                        "configs and model specification args")
            return []
        elif Runner.print_header:
            stat_line = dbt.ui.printer.get_counts(flat_nodes)
            logger.info("")
            dbt.ui.printer.print_timestamped_line(stat_line)
            dbt.ui.printer.print_timestamped_line("")
        else:
            logger.info("")

        try:
            Runner.before_hooks(self.project, adapter, flat_graph)
            started = time.time()
            Runner.before_run(self.project, adapter, flat_graph)
            res = self.execute_nodes(linker, Runner, flat_graph, dep_list)
            Runner.after_run(self.project, adapter, res, flat_graph)
            elapsed = time.time() - started
            Runner.after_hooks(self.project, adapter, res, flat_graph, elapsed)

        finally:
            adapter.cleanup_connections()

        return res
예제 #29
0
파일: generate.py 프로젝트: convoyinc/dbt
    def run(self):
        compile_results = None
        if self.args.compile:
            compile_results = CompileTask.run(self)
            if any(r.error is not None for r in compile_results):
                dbt.ui.printer.print_timestamped_line(
                    'compile failed, cannot generate docs')
                return CatalogResults({}, datetime.utcnow(), compile_results)

        shutil.copyfile(DOCS_INDEX_FILE_PATH,
                        os.path.join(self.config.target_path, 'index.html'))

        adapter = get_adapter(self.config)
        with adapter.connection_named('generate_catalog'):
            dbt.ui.printer.print_timestamped_line("Building catalog")
            catalog_table = adapter.get_catalog(self.manifest)

        catalog_data: List[PrimitiveDict] = [
            dict(zip(catalog_table.column_names, map(_coerce_decimal, row)))
            for row in catalog_table
        ]

        catalog = Catalog(catalog_data)
        results = self.get_catalog_results(
            nodes=catalog.make_unique_id_map(self.manifest),
            generated_at=datetime.utcnow(),
            compile_results=compile_results,
        )

        path = os.path.join(self.config.target_path, CATALOG_FILENAME)
        results.write(path)
        write_manifest(self.config, self.manifest)

        dbt.ui.printer.print_timestamped_line('Catalog written to {}'.format(
            os.path.abspath(path)))
        return results
예제 #30
0
    def do_ref(*args):
        target_model_name = None
        target_model_package = None

        if len(args) == 1:
            target_model_name = args[0]
        elif len(args) == 2:
            target_model_package, target_model_name = args
        else:
            dbt.exceptions.ref_invalid_args(model, args)

        target_model = dbt.parser.resolve_ref(
            flat_graph,
            target_model_name,
            target_model_package,
            current_project,
            model.get('package_name'))

        if target_model is None:
            dbt.exceptions.ref_target_not_found(
                model,
                target_model_name,
                target_model_package)

        target_model_id = target_model.get('unique_id')

        if target_model_id not in model.get('depends_on', {}).get('nodes'):
            dbt.exceptions.ref_bad_context(model,
                                           target_model_name,
                                           target_model_package)

        if dbt.utils.get_materialization(target_model) == 'ephemeral':
            model['extra_ctes'][target_model_id] = None

        adapter = get_adapter(profile)
        return dbt.utils.Relation(profile, adapter, target_model)
예제 #31
0
    def use_profile(self, adapter_type):
        profile_config = {}
        default_profile_config = self.get_profile(adapter_type)

        profile_config.update(default_profile_config)
        profile_config.update(self.profile_config)

        if not os.path.exists(DBT_CONFIG_DIR):
            os.makedirs(DBT_CONFIG_DIR)

        with open(DBT_PROFILES, 'w') as f:
            yaml.safe_dump(profile_config, f, default_flow_style=True)

        profile = profile_config.get('test').get('outputs').get('default2')
        adapter = get_adapter(profile)

        # it's important to use a different connection handle here so
        # we don't look into an incomplete transaction
        connection = adapter.acquire_connection(profile)
        self.handle = connection.get('handle')
        self.adapter_type = profile.get('type')

        self.run_sql('DROP SCHEMA IF EXISTS "{}" CASCADE'.format(self.schema))
        self.run_sql('CREATE SCHEMA "{}"'.format(self.schema))
예제 #32
0
파일: common.py 프로젝트: vishalbelsare/dbt
def generate_base(model, model_dict, project_cfg, manifest, source_config,
                  provider):
    """Generate the common aspects of the config dict."""
    if provider is None:
        raise dbt.exceptions.InternalException(
            "Invalid provider given to context: {}".format(provider))

    target_name = project_cfg.get('target')
    profile = project_cfg.get('outputs').get(target_name)
    target = profile.copy()
    target.pop('pass', None)
    target['name'] = target_name
    adapter = get_adapter(profile)

    context = {'env': target}
    schema = profile.get('schema', 'public')

    pre_hooks = None
    post_hooks = None

    relation_type = create_relation(adapter.Relation,
                                    project_cfg.get('quoting'))

    db_wrapper = DatabaseWrapper(model_dict,
                                 create_adapter(adapter, relation_type),
                                 profile,
                                 project_cfg)
    context = dbt.utils.merge(context, {
        "adapter": db_wrapper,
        "api": {
            "Relation": relation_type,
            "Column": adapter.Column,
        },
        "column": adapter.Column,
        "config": provider.Config(model_dict, source_config),
        "env_var": _env_var,
        "exceptions": dbt.exceptions,
        "execute": provider.execute,
        "flags": dbt.flags,
        # TODO: Do we have to leave this in?
        "graph": manifest.to_flat_graph(),
        "log": log,
        "model": model_dict,
        "modules": {
            "pytz": pytz,
            "datetime": datetime
        },
        "post_hooks": post_hooks,
        "pre_hooks": pre_hooks,
        "ref": provider.ref(db_wrapper, model, project_cfg,
                            profile, manifest),
        "return": _return,
        "schema": schema,
        "sql": None,
        "sql_now": adapter.date_function(),
        "fromjson": fromjson,
        "tojson": tojson,
        "target": target,
        "try_or_compiler_error": try_or_compiler_error(model)
    })

    # Operations do not represent database relations, so there should be no
    # 'this' variable in the context for operations. The Operation branch
    # below should be removed in a future release. The fake relation below
    # mirrors the historical implementation, without causing errors around
    # the missing 'alias' attribute for operations
    #
    # https://github.com/fishtown-analytics/dbt/issues/878
    if model.resource_type == NodeType.Operation:
        this = db_wrapper.adapter.Relation.create(
                schema=target['schema'],
                identifier=model.name
        )
    else:
        this = get_this_relation(db_wrapper, project_cfg, profile, model_dict)

    context["this"] = this
    return context
예제 #33
0
 def compile_manifest(self):
     adapter = get_adapter(self.config)
     compiler = adapter.get_compiler()
     self.graph = compiler.compile(self.manifest)
예제 #34
0
    def execute_nodes(self, linker, Runner, manifest, node_dependency_list):
        profile = self.project.run_environment()
        adapter = get_adapter(profile)

        num_threads = self.threads
        target_name = self.project.get_target().get('name')

        text = "Concurrency: {} threads (target='{}')"
        concurrency_line = text.format(num_threads, target_name)
        dbt.ui.printer.print_timestamped_line(concurrency_line)
        dbt.ui.printer.print_timestamped_line("")

        schemas = list(Runner.get_model_schemas(manifest))
        node_runners = self.get_runners(Runner, adapter, node_dependency_list)

        pool = ThreadPool(num_threads)
        node_results = []
        for node_list in node_dependency_list:
            runners = self.get_relevant_runners(node_runners, node_list)

            args_list = []
            for runner in runners:
                args_list.append({'manifest': manifest, 'runner': runner})

            try:
                for result in pool.imap_unordered(self.call_runner, args_list):
                    if not Runner.is_ephemeral_model(result.node):
                        node_results.append(result)

                    node = CompileResultNode(**result.node)
                    node_id = node.unique_id
                    manifest.nodes[node_id] = node

                    if result.errored:
                        for dep_node_id in self.get_dependent(linker, node_id):
                            runner = node_runners.get(dep_node_id)
                            if runner:
                                runner.do_skip()

            except KeyboardInterrupt:
                pool.close()
                pool.terminate()

                profile = self.project.run_environment()
                adapter = get_adapter(profile)

                if not adapter.is_cancelable():
                    msg = ("The {} adapter does not support query "
                           "cancellation. Some queries may still be "
                           "running!".format(adapter.type()))

                    yellow = dbt.ui.printer.COLOR_FG_YELLOW
                    dbt.ui.printer.print_timestamped_line(msg, yellow)
                    raise

                for conn_name in adapter.cancel_open_connections(profile):
                    dbt.ui.printer.print_cancel_line(conn_name)

                dbt.ui.printer.print_run_end_messages(node_results,
                                                      early_exit=True)

                pool.join()
                raise

        pool.close()
        pool.join()

        return node_results
예제 #35
0
파일: runner.py 프로젝트: bellhops/dbt
    def safe_execute_node(self, data):
        node = data['node']
        flat_graph = data['flat_graph']
        existing = data['existing']
        schema_name = data['schema_name']
        node_index = data['node_index']
        num_nodes = data['num_nodes']

        start_time = time.time()

        error = None
        status = None
        is_ephemeral = (get_materialization(node) == 'ephemeral')

        try:
            if not is_ephemeral:
                print_start_line(node, schema_name, node_index, num_nodes)

            profile = self.project.run_environment()
            adapter = get_adapter(profile)

            node = self.compile_node(node, flat_graph)

            if not is_ephemeral:
                node, status = self.execute_node(node, flat_graph, existing,
                                                 profile, adapter)

        except dbt.exceptions.CompilationException as e:
            return RunModelResult(node, error=str(e), status='ERROR')

        except (RuntimeError, dbt.exceptions.ProgrammingException,
                psycopg2.ProgrammingError, psycopg2.InternalError) as e:

            prefix = "Error executing {}\n".format(node.get('build_path'))
            error = "{}{}".format(dbt.ui.printer.red(prefix), str(e).strip())

            status = "ERROR"
            logger.debug(error)
            if type(e) == psycopg2.InternalError and \
               ABORTED_TRANSACTION_STRING == e.diag.message_primary:
                return RunModelResult(
                    node,
                    error='{}\n'.format(ABORTED_TRANSACTION_STRING),
                    status="SKIP")

        except dbt.exceptions.InternalException as e:

            build_path = node.get('build_path')
            prefix = 'Internal error executing {}'.format(build_path)

            error = "{prefix}\n{error}\n\n{note}".format(
                prefix=dbt.ui.printer.red(prefix),
                error=str(e).strip(),
                note=INTERNAL_ERROR_STRING)
            logger.debug(error)

            status = "ERROR"

        except Exception as e:

            prefix = "Unhandled error while executing {filepath}".format(
                filepath=node.get('build_path'))

            error = "{prefix}\n{error}".format(
                prefix=dbt.ui.printer.red(prefix), error=str(e).strip())

            logger.debug(error)

            raise e

        finally:
            adapter.release_connection(profile, node.get('name'))

        execution_time = time.time() - start_time

        result = RunModelResult(node,
                                error=error,
                                status=status,
                                execution_time=execution_time)

        if not is_ephemeral:
            print_result_line(result, schema_name, node_index, num_nodes)

        return result
예제 #36
0
    def run(self) -> CatalogResults:
        compile_results = None
        if self.args.compile:
            compile_results = CompileTask.run(self)
            if any(r.error is not None for r in compile_results):
                print_timestamped_line('compile failed, cannot generate docs')
                return CatalogResults(nodes={},
                                      sources={},
                                      generated_at=datetime.utcnow(),
                                      errors=None,
                                      _compile_results=compile_results)
        else:
            self.manifest = get_full_manifest(self.config)

        shutil.copyfile(DOCS_INDEX_FILE_PATH,
                        os.path.join(self.config.target_path, 'index.html'))

        for asset_path in self.config.asset_paths:
            to_asset_path = os.path.join(self.config.target_path, asset_path)

            if os.path.exists(to_asset_path):
                shutil.rmtree(to_asset_path)

            if os.path.exists(asset_path):
                shutil.copytree(asset_path, to_asset_path)

        if self.manifest is None:
            raise InternalException('self.manifest was None in run!')

        adapter = get_adapter(self.config)
        with adapter.connection_named('generate_catalog'):
            print_timestamped_line("Building catalog")
            catalog_table, exceptions = adapter.get_catalog(self.manifest)

        catalog_data: List[PrimitiveDict] = [
            dict(zip(catalog_table.column_names, map(_coerce_decimal, row)))
            for row in catalog_table
        ]

        catalog = Catalog(catalog_data)

        errors: Optional[List[str]] = None
        if exceptions:
            errors = [str(e) for e in exceptions]

        nodes, sources = catalog.make_unique_id_map(self.manifest)
        results = self.get_catalog_results(
            nodes=nodes,
            sources=sources,
            generated_at=datetime.utcnow(),
            compile_results=compile_results,
            errors=errors,
        )

        path = os.path.join(self.config.target_path, CATALOG_FILENAME)
        results.write(path)
        if self.args.compile:
            self.write_manifest()

        if exceptions:
            logger.error(
                'dbt encountered {} failure{} while writing the catalog'.
                format(len(exceptions), (len(exceptions) != 1) * 's'))

        print_timestamped_line('Catalog written to {}'.format(
            os.path.abspath(path)))

        return results
예제 #37
0
파일: runner.py 프로젝트: bellhops/dbt
def execute_model(profile, model, existing):
    adapter = get_adapter(profile)
    schema = adapter.get_default_schema(profile)

    tmp_name = '{}__dbt_tmp'.format(model.get('name'))

    if dbt.flags.NON_DESTRUCTIVE:
        # for non destructive mode, we only look at the already existing table.
        tmp_name = model.get('name')

    result = None

    # TRUNCATE / DROP
    if get_materialization(model) == 'table' and \
       dbt.flags.NON_DESTRUCTIVE and \
       existing.get(tmp_name) == 'table':
        # tables get truncated instead of dropped in non-destructive mode.
        adapter.truncate(profile=profile,
                         table=tmp_name,
                         model_name=model.get('name'))

    elif dbt.flags.NON_DESTRUCTIVE:
        # never drop existing relations in non destructive mode.
        pass

    elif (get_materialization(model) != 'incremental'
          and existing.get(tmp_name) is not None):
        # otherwise, for non-incremental things, drop them with IF EXISTS
        adapter.drop(profile=profile,
                     relation=tmp_name,
                     relation_type=existing.get(tmp_name),
                     model_name=model.get('name'))

        # and update the list of what exists
        existing = adapter.query_for_existing(profile,
                                              schema,
                                              model_name=model.get('name'))

    # EXECUTE
    if get_materialization(model) == 'view' and dbt.flags.NON_DESTRUCTIVE and \
       model.get('name') in existing:
        # views don't need to be recreated in non destructive mode since they
        # will repopulate automatically. note that we won't run DDL for these
        # views either.
        pass
    elif is_enabled(model) and get_materialization(model) != 'ephemeral':
        result = adapter.execute_model(profile, model)

    # DROP OLD RELATION AND RENAME
    if dbt.flags.NON_DESTRUCTIVE:
        # in non-destructive mode, we truncate and repopulate tables, and
        # don't modify views.
        pass
    elif get_materialization(model) in ['table', 'view']:
        # otherwise, drop tables and views, and rename tmp tables/views to
        # their new names
        if existing.get(model.get('name')) is not None:
            adapter.drop(profile=profile,
                         relation=model.get('name'),
                         relation_type=existing.get(model.get('name')),
                         model_name=model.get('name'))

        adapter.rename(profile=profile,
                       from_name=tmp_name,
                       to_name=model.get('name'),
                       model_name=model.get('name'))

    return result
예제 #38
0
파일: runner.py 프로젝트: bellhops/dbt
    def execute_nodes(self,
                      flat_graph,
                      node_dependency_list,
                      on_failure,
                      should_run_hooks=False,
                      should_execute=True):
        profile = self.project.run_environment()
        adapter = get_adapter(profile)
        master_connection = adapter.get_connection(profile)
        schema_name = adapter.get_default_schema(profile)

        flat_nodes = list(itertools.chain.from_iterable(node_dependency_list))

        if len(flat_nodes) == 0:
            logger.info("WARNING: Nothing to do. Try checking your model "
                        "configs and model specification args")
            return []

        num_threads = self.threads
        logger.info("Concurrency: {} threads (target='{}')".format(
            num_threads,
            self.project.get_target().get('name')))

        master_connection = adapter.begin(profile)
        existing = adapter.query_for_existing(profile, schema_name)
        master_connection = adapter.commit(master_connection)

        node_id_to_index_map = {}
        i = 1

        for node in flat_nodes:
            if get_materialization(node) != 'ephemeral':
                node_id_to_index_map[node.get('unique_id')] = i
                i += 1

        num_nodes = len(node_id_to_index_map)

        pool = ThreadPool(num_threads)

        if should_execute:
            stat_line = dbt.ui.printer.get_counts(flat_nodes)
            full_line = "Running {}".format(stat_line)

            logger.info("")
            dbt.ui.printer.print_timestamped_line(full_line)
            dbt.ui.printer.print_timestamped_line("")

        start_time = time.time()

        if should_run_hooks:
            self.run_hooks(profile, flat_graph, dbt.utils.RunHookType.Start)

        def get_idx(node):
            return node_id_to_index_map.get(node.get('unique_id'))

        node_results = []

        for node_list in node_dependency_list:
            for i, node in enumerate(
                [node for node in node_list if node.get('skip')]):
                node_name = node.get('name')
                dbt.ui.printer.print_skip_line(node, schema_name, node_name,
                                               get_idx(node), num_nodes)

                node_result = RunModelResult(node, skip=True)
                node_results.append(node_result)

            nodes_to_execute = [
                node for node in node_list if not node.get('skip')
            ]

            if should_execute:
                action = self.safe_execute_node
            else:
                action = self.safe_compile_node

            node_result = []
            try:
                args_list = []
                for node in nodes_to_execute:
                    args_list.append({
                        'node': node,
                        'flat_graph': flat_graph,
                        'existing': existing,
                        'schema_name': schema_name,
                        'node_index': get_idx(node),
                        'num_nodes': num_nodes
                    })

                for result in pool.imap_unordered(action, args_list):
                    node_results.append(result)

                    # propagate so that CTEs get injected properly
                    node_id = result.node.get('unique_id')
                    flat_graph['nodes'][node_id] = result.node

                    index = get_idx(result.node)
                    if should_execute:
                        track_model_run(index, num_nodes, result)

                    if result.errored:
                        on_failure(result.node)
                        logger.info(result.error)

            except KeyboardInterrupt:
                pool.close()
                pool.terminate()

                profile = self.project.run_environment()
                adapter = get_adapter(profile)

                for conn_name in adapter.cancel_open_connections(profile):
                    dbt.ui.printer.print_cancel_line(conn_name, schema_name)

                pool.join()
                raise

        pool.close()
        pool.join()

        if should_run_hooks:
            self.run_hooks(profile, flat_graph, dbt.utils.RunHookType.End)

        execution_time = time.time() - start_time

        if should_execute:
            print_results_line(node_results, execution_time)

        return node_results