Ejemplo n.º 1
0
    def patch_nodes(self, patches):
        """Patch nodes with the given dict of patches. Note that this consumes
        the input!
        """
        # because we don't have any mapping from node _names_ to nodes, and we
        # only have the node name in the patch, we have to iterate over all the
        # nodes looking for matching names. We could use _find_by_name if we
        # were ok with doing an O(n*m) search (one nodes scan per patch)
        for node in self.nodes.values():
            if node.resource_type != NodeType.Model:
                continue
            patch = patches.pop(node.name, None)
            if not patch:
                continue
            node.patch(patch)

        # log debug-level warning about nodes we couldn't find
        if patches:
            for patch in patches.values():
                # since patches aren't nodes, we can't use the existing
                # target_not_found warning
                logger.debug((
                    'WARNING: Found documentation for model "{}" which was '
                    'not found or is disabled').format(patch.name)
                )
Ejemplo n.º 2
0
    def render_profile(cls, raw_profile, profile_name, target_override,
                       cli_vars):
        """This is a containment zone for the hateful way we're rendering
        profiles.
        """
        renderer = ConfigRenderer(cli_vars=cli_vars)

        # rendering profiles is a bit complex. Two constraints cause trouble:
        # 1) users should be able to use environment/cli variables to specify
        #    the target in their profile.
        # 2) Missing environment/cli variables in profiles/targets that don't
        #    end up getting selected should not cause errors.
        # so first we'll just render the target name, then we use that rendered
        # name to extract a profile that we can render.
        if target_override is not None:
            target_name = target_override
        elif 'target' in raw_profile:
            # render the target if it was parsed from yaml
            target_name = renderer.render_value(raw_profile['target'])
        else:
            target_name = 'default'
            logger.debug(
                "target not specified in profile '{}', using '{}'"
                .format(profile_name, target_name)
            )

        raw_profile_data = cls._get_profile_data(
            raw_profile, profile_name, target_name
        )

        profile_data = renderer.render_profile_data(raw_profile_data)
        return target_name, profile_data
Ejemplo n.º 3
0
def gcloud_installed():
    try:
        run_cmd('.', ['gcloud', '--version'])
        return True
    except OSError as e:
        logger.debug(e)
        return False
Ejemplo n.º 4
0
    def handle_error(cls, error, message, sql):
        logger.debug(message.format(sql=sql))
        logger.debug(error)
        error_msg = "\n".join(
            [item['message'] for item in error.errors])

        raise dbt.exceptions.DatabaseException(error_msg)
Ejemplo n.º 5
0
    def run_sql(self, query, fetch='None', kwargs=None, connection_name=None):
        if connection_name is None:
            connection_name = '__test'

        if query.strip() == "":
            return

        sql = self.transform_sql(query, kwargs=kwargs)
        if self.adapter_type == 'bigquery':
            return self.run_sql_bigquery(sql, fetch)
        elif self.adapter_type == 'presto':
            return self.run_sql_presto(sql, fetch, connection_name)

        conn = self.adapter.acquire_connection(connection_name)
        with conn.handle.cursor() as cursor:
            logger.debug('test connection "{}" executing: {}'.format(connection_name, sql))
            try:
                cursor.execute(sql)
                conn.handle.commit()
                if fetch == 'one':
                    return cursor.fetchone()
                elif fetch == 'all':
                    return cursor.fetchall()
                else:
                    return
            except BaseException as e:
                conn.handle.rollback()
                print(query)
                print(e)
                raise e
            finally:
                conn.transaction_open = False
Ejemplo n.º 6
0
    def run_dbt_and_check(self, args=None):
        if args is None:
            args = ["run"]

        args = ["--strict"] + args
        logger.info("Invoking dbt with {}".format(args))
        return dbt.handle_and_check(args)
Ejemplo n.º 7
0
def clone_and_checkout(repo, cwd, dirname=None, remove_git_dir=False,
                       branch=None):
    exists = None
    try:
        _, err = clone(repo, cwd, dirname=dirname,
                       remove_git_dir=remove_git_dir)
    except dbt.exceptions.CommandResultError as exc:
        err = exc.stderr.decode('utf-8')
        exists = re.match("fatal: destination path '(.+)' already exists", err)
        if not exists:  # something else is wrong, raise it
            raise

    directory = None
    start_sha = None
    if exists:
        directory = exists.group(1)
        logger.debug('Updating existing dependency %s.', directory)
    else:
        matches = re.match("Cloning into '(.+)'", err.decode('utf-8'))
        directory = matches.group(1)
        logger.debug('Pulling new dependency %s.', directory)
    full_path = os.path.join(cwd, directory)
    start_sha = get_current_sha(full_path)
    checkout(full_path, repo, branch)
    end_sha = get_current_sha(full_path)
    if exists:
        if start_sha == end_sha:
            logger.debug('  Already at %s, nothing to do.', start_sha[:7])
        else:
            logger.debug('  Updated checkout from %s to %s.',
                         start_sha[:7], end_sha[:7])
    else:
        logger.debug('  Checked out at %s.', end_sha[:7])
    return directory
Ejemplo n.º 8
0
    def expand_column_types(self, goal, current, model_name=None):
        reference_columns = {
            c.name: c for c in
            self.get_columns_in_relation(goal, model_name=model_name)
        }

        target_columns = {
            c.name: c for c
            in self.get_columns_in_relation(current, model_name=model_name)
        }

        for column_name, reference_column in reference_columns.items():
            target_column = target_columns.get(column_name)

            if target_column is not None and \
               target_column.can_expand_to(reference_column):
                col_string_size = reference_column.string_size()
                new_type = self.Column.string_type(col_string_size)
                logger.debug("Changing col type from %s to %s in table %s",
                             target_column.data_type, new_type, current)

                self.alter_column_type(current, column_name, new_type,
                                       model_name=model_name)

        if model_name is None:
            self.release_connection('master')
Ejemplo n.º 9
0
    def find_schema_yml(cls, package_name, root_dir, relative_dirs):
        """This is common to both v1 and v2 - look through the relative_dirs
        under root_dir for .yml files yield pairs of filepath and loaded yaml
        contents.
        """
        extension = "[!.#~]*.yml"

        file_matches = dbt.clients.system.find_matching(
            root_dir,
            relative_dirs,
            extension)

        for file_match in file_matches:
            file_contents = dbt.clients.system.load_file_contents(
                file_match.get('absolute_path'), strip=False)
            test_path = file_match.get('relative_path', '')

            original_file_path = os.path.join(file_match.get('searched_path'),
                                              test_path)

            try:
                test_yml = dbt.clients.yaml_helper.load_yaml_text(
                    file_contents
                )
            except dbt.exceptions.ValidationException as e:
                test_yml = None
                logger.info("Error reading {}:{} - Skipping\n{}".format(
                            package_name, test_path, e))

            if test_yml is None:
                continue

            yield original_file_path, test_yml
Ejemplo n.º 10
0
def warn_or_error(msg, node=None, log_fmt=None):
    if dbt.flags.WARN_ERROR:
        raise_compiler_error(msg, node)
    else:
        if log_fmt is not None:
            msg = log_fmt.format(msg)
        logger.warning(msg)
Ejemplo n.º 11
0
    def path_info(self):
        open_cmd = dbt.clients.system.open_dir_cmd()

        message = PROFILE_DIR_MESSAGE.format(
            open_cmd=open_cmd,
            profiles_dir=self.profiles_dir
        )

        logger.info(message)
Ejemplo n.º 12
0
def initialize_tracking(cookie_dir):
    global active_user
    active_user = User(cookie_dir)
    try:
        active_user.initialize()
    except Exception:
        logger.debug('Got an exception trying to initialize tracking',
                     exc_info=True)
        active_user = User(None)
Ejemplo n.º 13
0
 def drop_schema(self, database, schema, model_name=None):
     logger.debug('Dropping schema "%s"."%s".', database, schema)
     kwargs = {
         'database_name': self.quote_as_configured(database, 'database'),
         'schema_name': self.quote_as_configured(schema, 'schema'),
     }
     self.execute_macro(DROP_SCHEMA_MACRO_NAME,
                        kwargs=kwargs,
                        connection_name=model_name)
Ejemplo n.º 14
0
    def compile_node(self, node, manifest, extra_context=None):
        if extra_context is None:
            extra_context = {}

        logger.debug("Compiling {}".format(node.get('unique_id')))

        data = node.to_dict()
        data.update({
            'compiled': False,
            'compiled_sql': None,
            'extra_ctes_injected': False,
            'extra_ctes': [],
            'injected_sql': None,
        })
        compiled_node = CompiledNode(**data)

        context = dbt.context.runtime.generate(
            compiled_node, self.config, manifest)
        context.update(extra_context)

        compiled_node.compiled_sql = dbt.clients.jinja.get_rendered(
            node.get('raw_sql'),
            context,
            node)

        compiled_node.compiled = True

        injected_node, _ = prepend_ctes(compiled_node, manifest)

        should_wrap = {NodeType.Test, NodeType.Operation}
        if injected_node.resource_type in should_wrap:
            # data tests get wrapped in count(*)
            # TODO : move this somewhere more reasonable
            if 'data' in injected_node.tags and \
               is_type(injected_node, NodeType.Test):
                injected_node.wrapped_sql = (
                    "select count(*) from (\n{test_sql}\n) sbq").format(
                        test_sql=injected_node.injected_sql)
            else:
                # don't wrap schema tests or analyses.
                injected_node.wrapped_sql = injected_node.injected_sql

        elif is_type(injected_node, NodeType.Archive):
            # unfortunately we do everything automagically for
            # archives. in the future it'd be nice to generate
            # the SQL at the parser level.
            pass

        elif(is_type(injected_node, NodeType.Model) and
             get_materialization(injected_node) == 'ephemeral'):
            pass

        else:
            injected_node.wrapped_sql = None

        return injected_node
Ejemplo n.º 15
0
def print_end_of_run_summary(num_errors, early_exit=False):
    if early_exit:
        message = yellow('Exited because of keyboard interrupt.')
    elif num_errors > 0:
        message = red('Completed with {} errors:'.format(num_errors))
    else:
        message = green('Completed successfully')

    logger.info('')
    logger.info('{}'.format(message))
Ejemplo n.º 16
0
    def parse_macro_file(self, macro_file_path, macro_file_contents, root_path,
                         package_name, resource_type, tags=None, context=None):

        logger.debug("Parsing {}".format(macro_file_path))

        to_return = {}

        if tags is None:
            tags = []

        context = {}

        # change these to actual kwargs
        base_node = UnparsedMacro(
            path=macro_file_path,
            original_file_path=macro_file_path,
            package_name=package_name,
            raw_sql=macro_file_contents,
            root_path=root_path,
        )

        try:
            ast = dbt.clients.jinja.parse(macro_file_contents)
        except dbt.exceptions.CompilationException as e:
            e.node = base_node
            raise e

        for macro_node in ast.find_all(jinja2.nodes.Macro):
            macro_name = macro_node.name

            node_type = None
            if macro_name.startswith(dbt.utils.MACRO_PREFIX):
                node_type = NodeType.Macro
                name = macro_name.replace(dbt.utils.MACRO_PREFIX, '')

            if node_type != resource_type:
                continue

            unique_id = self.get_path(resource_type, package_name, name)

            merged = dbt.utils.deep_merge(
                base_node.serialize(),
                {
                    'name': name,
                    'unique_id': unique_id,
                    'tags': tags,
                    'resource_type': resource_type,
                    'depends_on': {'macros': []},
                })

            new_node = ParsedMacro(**merged)

            to_return[unique_id] = new_node

        return to_return
Ejemplo n.º 17
0
def track(user, *args, **kwargs):
    if user.do_not_track:
        return
    else:
        logger.debug("Sending event: {}".format(kwargs))
        try:
            tracker.track_struct_event(*args, **kwargs)
        except Exception:
            logger.debug(
                "An error was encountered while trying to send an event"
            )
Ejemplo n.º 18
0
 def create_schema(self, database, schema, model_name=None):
     logger.debug('Creating schema "%s"."%s".', database, schema)
     if model_name is None:
         model_name = 'master'
     kwargs = {
         'database_name': self.quote_as_configured(database, 'database'),
         'schema_name': self.quote_as_configured(schema, 'schema'),
     }
     self.execute_macro(CREATE_SCHEMA_MACRO_NAME,
                        kwargs=kwargs,
                        connection_name=model_name)
     self.commit_if_has_connection(model_name)
Ejemplo n.º 19
0
    def cancel(self, connection):
        connection_name = connection.name
        pid = connection.handle.get_backend_pid()

        sql = "select pg_terminate_backend({})".format(pid)

        logger.debug("Cancelling query '{}' ({})".format(connection_name, pid))

        _, cursor = self.add_query(sql, 'master')
        res = cursor.fetchone()

        logger.debug("Cancel query '{}': {}".format(connection_name, res))
Ejemplo n.º 20
0
 def _schema_is_cached(self, database, schema, model_name=None):
     """Check if the schema is cached, and by default logs if it is not."""
     if dbt.flags.USE_CACHE is False:
         return False
     elif (database, schema) not in self.cache:
         logger.debug(
             'On "{}": cache miss for schema "{}.{}", this is inefficient'
             .format(model_name or '<None>', database, schema)
         )
         return False
     else:
         return True
Ejemplo n.º 21
0
    def get_columns_in_relation(self, relation, model_name=None):
        try:
            table = self.connections.get_bq_table(
                database=relation.database,
                schema=relation.schema,
                identifier=relation.table_name,
                conn_name=model_name
            )
            return self._get_dbt_columns_from_bq_table(table)

        except (ValueError, google.cloud.exceptions.NotFound) as e:
            logger.debug("get_columns_in_relation error: {}".format(e))
            return []
Ejemplo n.º 22
0
def rmdir(path):
    """
    Recursively deletes a directory. Includes an error handler to retry with
    different permissions on Windows. Otherwise, removing directories (eg.
    cloned via git) can cause rmtree to throw a PermissionError exception
    """
    logger.debug("DEBUG** Window rmdir sys.platform: {}".format(sys.platform))
    if sys.platform == 'win32':
        onerror = _windows_rmdir_readonly
    else:
        onerror = None

    return shutil.rmtree(path, onerror=onerror)
Ejemplo n.º 23
0
def run_cmd(cwd, cmd, env=None):
    logger.debug('Executing "{}"'.format(' '.join(cmd)))
    if len(cmd) == 0:
        raise dbt.exceptions.CommandError(cwd, cmd)

    # the env argument replaces the environment entirely, which has exciting
    # consequences on Windows! Do an update instead.
    full_env = env
    if env is not None:
        full_env = os.environ.copy()
        full_env.update(env)

    try:
        proc = subprocess.Popen(
            cmd,
            cwd=cwd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            env=full_env)

        out, err = proc.communicate()
    except OSError as exc:
        _interpret_oserror(exc, cwd, cmd)

    logger.debug('STDOUT: "{}"'.format(out))
    logger.debug('STDERR: "{}"'.format(err))

    if proc.returncode != 0:
        logger.debug('command return code={}'.format(proc.returncode))
        raise dbt.exceptions.CommandResultError(cwd, cmd, proc.returncode,
                                                out, err)

    return out, err
Ejemplo n.º 24
0
def parse_cli_vars(var_string):
    try:
        cli_vars = yaml_helper.load_yaml_text(var_string)
        var_type = type(cli_vars)
        if var_type == dict:
            return cli_vars
        else:
            type_name = var_type.__name__
            dbt.exceptions.raise_compiler_error(
                "The --vars argument must be a YAML dictionary, but was "
                "of type '{}'".format(type_name))
    except dbt.exceptions.ValidationException as e:
        logger.error(
                "The YAML provided in the --vars argument is not valid.\n")
        raise
Ejemplo n.º 25
0
    def raw_execute(self, sql, name=None, fetch=False):
        conn = self.get(name)
        client = conn.handle

        logger.debug('On %s: %s', name, sql)

        job_config = google.cloud.bigquery.QueryJobConfig()
        job_config.use_legacy_sql = False
        query_job = client.query(sql, job_config)

        # this blocks until the query has completed
        with self.exception_handler(sql, conn.name):
            iterator = query_job.result()

        return query_job, iterator
Ejemplo n.º 26
0
    def _safe_release_connection(self):
        """Try to release a connection. If an exception is hit, log and return
        the error string.
        """
        node_name = self.node.name
        try:
            self.adapter.release_connection(node_name)
        except Exception as exc:
            logger.debug(
                'Error releasing connection for node {}: {!s}\n{}'
                .format(node_name, exc, traceback.format_exc())
            )
            return dbt.compat.to_string(exc)

        return None
Ejemplo n.º 27
0
        def __deepcopy__(self, memo):
            path = os.path.join(self.node.get('root_path'),
                                self.node.get('original_file_path'))

            logger.debug(
                'dbt encountered an undefined variable, "{}" in node {}.{} '
                '(source path: {})'
                .format(self.name, self.node.get('package_name'),
                        self.node.get('name'), path))

            # match jinja's message
            dbt.exceptions.raise_compiler_error(
                "{!r} is undefined".format(self.name),
                node=self.node
            )
Ejemplo n.º 28
0
    def run_dbt(self, args=None, expect_pass=True, strict=True):
        if args is None:
            args = ["run"]

        if strict:
            args = ["--strict"] + args
        args.append('--log-cache-events')
        logger.info("Invoking dbt with {}".format(args))

        res, success = dbt.handle_and_check(args)
        self.assertEqual(
            success, expect_pass,
            "dbt exit state did not match expected")

        return res
Ejemplo n.º 29
0
    def get_credentials(cls, credentials):
        method = credentials.method

        # Support missing 'method' for backwards compatibility
        if method == 'database' or method is None:
            logger.debug("Connecting to Redshift using 'database' credentials")
            return credentials

        elif method == 'iam':
            logger.debug("Connecting to Redshift using 'IAM' credentials")
            return cls.get_tmp_iam_cluster_credentials(credentials)

        else:
            raise dbt.exceptions.FailedToConnectException(
                    "Invalid 'method' in profile: '{}'".format(method))
Ejemplo n.º 30
0
def print_run_status_line(results):
    stats = {
        'error': 0,
        'skip': 0,
        'pass': 0,
        'total': 0,
    }

    for r in results:
        result_type = interpret_run_result(r)
        stats[result_type] += 1
        stats['total'] += 1

    stats_line = "\nDone. PASS={pass} ERROR={error} SKIP={skip} TOTAL={total}"
    logger.info(stats_line.format(**stats))
Ejemplo n.º 31
0
    def diag(self):
        # if we got here, a 'dbt_project.yml' does exist, but we have not tried
        # to parse it.
        project_profile = None
        try:
            project = dbt.config.Project.from_current_directory()
            project_profile = project.profile_name
        except dbt.config.DbtConfigError as exc:
            project = 'ERROR loading project: {!s}'.format(exc)

        # log the profile we decided on as well, if it's available.
        try:
            profile = dbt.config.Profile.from_args(self.args, project_profile)
        except dbt.config.DbtConfigError as exc:
            profile = 'ERROR loading profile: {!s}'.format(exc)

        logger.info("args: {}".format(self.args))
        logger.info("")
        logger.info("project:\n{!s}".format(project))
        logger.info("")
        logger.info("profile:\n{!s}".format(profile))
Ejemplo n.º 32
0
def print_compile_stats(stats):
    names = {
        NodeType.Model: 'model',
        NodeType.Test: 'test',
        NodeType.Snapshot: 'snapshot',
        NodeType.Analysis: 'analysis',
        NodeType.Macro: 'macro',
        NodeType.Operation: 'operation',
        NodeType.Seed: 'seed file',
        NodeType.Source: 'source',
    }

    results = {k: 0 for k in names.keys()}
    results.update(stats)

    stat_line = ", ".join([
        dbt.utils.pluralize(ct, names.get(t)) for t, ct in results.items()
        if t in names
    ])

    logger.info("Found {}".format(stat_line))
Ejemplo n.º 33
0
Archivo: test.py Proyecto: igrayson/dbt
    def run(self):
        runner = RunManager(self.project, self.project['target-path'],
                            self.args)

        include = self.args.models
        exclude = self.args.exclude

        test_types = [self.args.data, self.args.schema]

        if all(test_types) or not any(test_types):
            results = runner.run_tests(include, exclude, set())
        elif self.args.data:
            results = runner.run_tests(include, exclude, {'data'})
        elif self.args.schema:
            results = runner.run_tests(include, exclude, {'schema'})
        else:
            raise RuntimeError("unexpected")

        logger.info(dbt.utils.get_run_status_line(results))

        return results
Ejemplo n.º 34
0
 def _iterate_selected_nodes(self):
     selector = self.get_node_selector()
     spec = self.get_selection_spec()
     nodes = sorted(selector.get_selected(spec))
     if not nodes:
         logger.warning('No nodes selected!')
         return
     if self.manifest is None:
         raise InternalException(
             'manifest is None in _iterate_selected_nodes')
     for node in nodes:
         if node in self.manifest.nodes:
             yield self.manifest.nodes[node]
         elif node in self.manifest.sources:
             yield self.manifest.sources[node]
         elif node in self.manifest.exposures:
             yield self.manifest.exposures[node]
         else:
             raise RuntimeException(
                 f'Got an unexpected result from node selection: "{node}"'
                 f'Expected a source or a node!')
Ejemplo n.º 35
0
Archivo: impl.py Proyecto: pravilla/dbt
    def get_columns_in_table(self,
                             schema_name,
                             table_name,
                             database=None,
                             model_name=None):

        # BigQuery does not have databases -- the database parameter is here
        # for consistency with the base implementation

        conn = self.get_connection(model_name)
        client = conn.handle

        try:
            dataset_ref = client.dataset(schema_name)
            table_ref = dataset_ref.table(table_name)
            table = client.get_table(table_ref)
            return self.get_dbt_columns_from_bq_table(table)

        except (ValueError, google.cloud.exceptions.NotFound) as e:
            logger.debug("get_columns_in_table error: {}".format(e))
            return []
Ejemplo n.º 36
0
def invalid_source_fail_unless_test(
    node, target_name, target_table_name, disabled
):
    if node.resource_type == NodeType.Test:
        msg = get_source_not_found_or_disabled_msg(
            node, target_name, target_table_name, disabled
        )
        if disabled:
            logger.debug(warning_tag(msg))
        else:
            warn_or_error(
                msg,
                log_fmt=warning_tag('{}')
            )
    else:
        source_target_not_found(
            node,
            target_name,
            target_table_name,
            disabled=disabled
        )
Ejemplo n.º 37
0
 def _checkout(self, project):
     """Performs a shallow clone of the repository into the downloads
     directory. This function can be called repeatedly. If the project has
     already been checked out at this version, it will be a no-op. Returns
     the path to the checked out directory."""
     if len(self.version) != 1:
         dbt.exceptions.raise_dependency_error(
             'Cannot checkout repository until the version is pinned.')
     try:
         dir_ = dbt.clients.git.clone_and_checkout(
             self.git, DOWNLOADS_PATH, branch=self.version[0],
             dirname=self._checkout_name)
     except dbt.exceptions.ExecutableError as exc:
         if exc.cmd and exc.cmd[0] == 'git':
             logger.error(
                 'Make sure git is installed on your machine. More '
                 'information: '
                 'https://docs.getdbt.com/docs/package-management'
             )
         raise
     return os.path.join(DOWNLOADS_PATH, dir_)
Ejemplo n.º 38
0
    def get_credentials(cls, credentials):
        method = credentials.method

        # Support missing 'method' for backwards compatibility
        if method == 'database' or method is None:
            logger.debug("Connecting to Redshift using 'database' credentials")
            # this requirement is really annoying to encode into json schema,
            # so validate it here
            if credentials.password is None:
                raise dbt.exceptions.FailedToConnectException(
                    "'password' field is required for 'database' credentials"
                )
            return credentials

        elif method == 'iam':
            logger.debug("Connecting to Redshift using 'IAM' credentials")
            return cls.get_tmp_iam_cluster_credentials(credentials)

        else:
            raise dbt.exceptions.FailedToConnectException(
                "Invalid 'method' in profile: '{}'".format(method))
Ejemplo n.º 39
0
    def parse_file(self, block: FileBlock):
        # mark the file as seen, even if there are no macros in it
        self.results.get_file(block.file)
        source_file = block.file

        original_file_path = source_file.path.original_file_path

        logger.debug("Parsing {}".format(original_file_path))

        # this is really only used for error messages
        base_node = UnparsedMacro(
            path=original_file_path,
            original_file_path=original_file_path,
            package_name=self.project.project_name,
            raw_sql=source_file.contents,
            root_path=self.project.project_root,
            resource_type=NodeType.Macro,
        )

        for node in self.parse_unparsed_macros(base_node):
            self.results.add_macro(block.file, node)
Ejemplo n.º 40
0
    def run(self):
        project_dir = self.args.project_name

        profiles_dir = dbt.config.PROFILES_DIR
        profiles_file = os.path.join(profiles_dir, 'profiles.yml')

        self.create_profiles_dir(profiles_dir)
        self.create_profiles_file(profiles_file)

        msg = "Creating dbt configuration folder at {}"
        logger.info(msg.format(profiles_dir))

        if os.path.exists(project_dir):
            raise RuntimeError("directory {} already exists!".format(
                project_dir
            ))

        self.clone_starter_repo(project_dir)

        addendum = self.get_addendum(project_dir, profiles_dir)
        logger.info(addendum)
Ejemplo n.º 41
0
    def run_dbt(self,
                args=None,
                expect_pass=True,
                strict=True,
                clear_adapters=True):
        # clear the adapter cache
        if clear_adapters:
            reset_adapters()
        if args is None:
            args = ["run"]

        if strict:
            args = ["--strict"] + args
        args.append('--log-cache-events')
        logger.info("Invoking dbt with {}".format(args))

        res, success = dbt.handle_and_check(args)
        self.assertEqual(success, expect_pass,
                         "dbt exit state did not match expected")

        return res
Ejemplo n.º 42
0
    def tearDown(self):
        # get any current run adapter and clean up its connections before we
        # reset them. It'll probably be different from ours because
        # handle_and_check() calls reset_adapters().
        register_adapter(self.config)
        adapter = get_adapter(self.config)
        if adapter is not self.adapter:
            adapter.cleanup_connections()
        if not hasattr(self, 'adapter'):
            self.adapter = adapter

        self._drop_schemas()

        self.adapter.cleanup_connections()
        reset_adapters()
        os.chdir(INITIAL_ROOT)
        try:
            shutil.rmtree(self.test_root_dir)
        except EnvironmentError:
            logger.exception('Could not clean up after test - {} not removable'
                             .format(self.test_root_dir))
Ejemplo n.º 43
0
    def open(cls, connection):
        if connection.state == 'open':
            logger.debug('Connection is already open, skipping open.')
            return connection

        credentials = connection.credentials
        #if credentials.method == 'kerberos':
        #    auth = KerberosAuthentication()
        #else:
        #    auth = prestodb.constants.DEFAULT_AUTH

        hive_conn = hive.connect(
            host=credentials.host,
            port=credentials.get('port', 10000),
            username=credentials.get('username', ''),  #getuser()),
            password=credentials.get('pass', ''),
            #schema=credentials.schema,
            auth='LDAP')
        connection.state = 'open'
        connection.handle = ConnectionWrapper(hive_conn)
        return connection
Ejemplo n.º 44
0
    def exception_handler(cls,
                          profile,
                          sql,
                          model_name=None,
                          connection_name=None):
        connection = cls.get_connection(profile, connection_name)
        schema = connection.get('credentials', {}).get('schema')

        try:
            yield
        except psycopg2.ProgrammingError as e:
            logger.debug('Postgres error: {}'.format(str(e)))

            cls.rollback(connection)
            error_data = {
                "model": model_name,
                "schema": schema,
                "user": connection.get('credentials', {}).get('user')
            }

            if 'must be owner of relation' in e.diag.message_primary:
                raise RuntimeError(
                    RELATION_NOT_OWNER_MESSAGE.format(**error_data))
            elif "permission denied for" in e.diag.message_primary:
                raise RuntimeError(
                    RELATION_PERMISSION_DENIED_MESSAGE.format(**error_data))
            else:
                raise e
        except Exception as e:
            logger.debug("Error running SQL: %s", sql)
            logger.debug("Rolling back transaction.")
            cls.rollback(connection)
            raise e
Ejemplo n.º 45
0
def dependency_projects(project):
    module_paths = [
        GLOBAL_DBT_MODULES_PATH,
        os.path.join(project['project-root'], project['modules-path'])
    ]

    for module_path in module_paths:
        logger.debug("Loading dependency project from {}".format(module_path))

        for obj in os.listdir(module_path):
            full_obj = os.path.join(module_path, obj)

            if not os.path.isdir(full_obj) or obj.startswith('__'):
                # exclude non-dirs and dirs that start with __
                # the latter could be something like __pycache__
                # for the global dbt modules dir
                continue

            try:
                yield dbt.project.read_project(
                    os.path.join(full_obj, 'dbt_project.yml'),
                    project.profiles_dir,
                    profile_to_load=project.profile_to_load,
                    args=project.args)
            except dbt.project.DbtProjectError as e:
                logger.info(
                    "Error reading dependency project at {}".format(full_obj))
                logger.info(str(e))
Ejemplo n.º 46
0
    def add_query(self,
                  sql: str,
                  auto_begin: bool = True,
                  bindings: Optional[Any] = None,
                  abridge_sql_log: bool = False) -> Tuple[Connection, Any]:
        connection = self.get_thread_connection()
        if auto_begin and connection.transaction_open is False:
            self.begin()

        logger.debug('Using {} connection "{}".'.format(
            self.TYPE, connection.name))

        with self.exception_handler(sql):
            if abridge_sql_log:
                log_sql = '{}...'.format(sql[:512])
            else:
                log_sql = sql

            logger.debug(f'On {connection.name}: {sql}')
            pre = time.time()

            cursor = connection.handle.cursor()

            # Driver will fail if bindings are passed to function and not needed
            if bindings:
                cursor.execute(sql, bindings)
            else:
                cursor.execute(sql)

            logger.debug(
                f"SQL status: {self.get_response(cursor)} in {time.time() - pre:0.2f} seconds"
            )

            return connection, cursor
Ejemplo n.º 47
0
    def open(cls, connection):
        if connection.state == 'open':
            logger.debug('Connection is already open, skipping open.')
            return connection

        try:
            handle = cls.get_bigquery_client(connection.credentials)

        except google.auth.exceptions.DefaultCredentialsError:
            logger.info("Please log into GCP to continue")
            gcloud.setup_default_credentials()

            handle = cls.get_bigquery_client(connection.credentials)

        except Exception as e:
            logger.debug("Got an error when attempting to create a bigquery "
                         "client: '{}'".format(e))

            connection.handle = None
            connection.state = 'fail'

            raise FailedToConnectException(str(e))

        connection.handle = handle
        connection.state = 'open'
        return connection
Ejemplo n.º 48
0
    def run(self):
        os.chdir(self.config.target_path)

        port = self.args.port

        shutil.copyfile(DOCS_INDEX_FILE_PATH, 'index.html')

        logger.info("Serving docs at 0.0.0.0:{}".format(port))
        logger.info(
            "To access from your browser, navigate to:  http://localhost:{}"
            .format(port)
        )
        logger.info("Press Ctrl+C to exit.\n\n")

        # mypy doesn't think SimpleHTTPRequestHandler is ok here, but it is
        httpd = TCPServer(  # type: ignore
            ('0.0.0.0', port),
            SimpleHTTPRequestHandler  # type: ignore
        )  # type: ignore

        if self.args.open_browser:
            try:
                webbrowser.open_new_tab(f'http://127.0.0.1:{port}')
            except webbrowser.Error:
                pass

        try:
            httpd.serve_forever()  # blocks
        finally:
            httpd.shutdown()
            httpd.server_close()

        return None
Ejemplo n.º 49
0
    def exception_handler(self, sql):
        try:
            yield

        except cx_Oracle.DatabaseError as e:
            logger.info('Oracle error: {}'.format(str(e)))

            try:
                # attempt to release the connection
                self.release()
            except cx_Oracle.Error:
                logger.info("Failed to release connection!")
                pass

            raise dbt.exceptions.DatabaseException(str(e).strip()) from e

        except Exception as e:
            logger.info("Rolling back transaction.")
            self.release()
            if isinstance(e, dbt.exceptions.RuntimeException):
                # during a sql query, an internal to dbt exception was raised.
                # this sounds a lot like a signal handler and probably has
                # useful information, so raise it without modification.
                raise e

            raise dbt.exceptions.RuntimeException(e) from e
Ejemplo n.º 50
0
    def add_query(self,
                  sql: str,
                  auto_begin: bool = True,
                  bindings: Optional[Any] = {},
                  abridge_sql_log: bool = False) -> Tuple[Connection, Any]:
        connection = self.get_thread_connection()
        if auto_begin and connection.transaction_open is False:
            self.begin()

        logger.debug('Using {} connection "{}".'.format(
            self.TYPE, connection.name))

        with self.exception_handler(sql):
            if abridge_sql_log:
                log_sql = '{}...'.format(sql[:512])
            else:
                log_sql = sql

            logger.debug(
                'On {connection_name}: {sql}',
                connection_name=connection.name,
                sql=log_sql,
            )
            pre = time.time()

            cursor = connection.handle.cursor()
            cursor.execute(sql, bindings)
            connection.handle.commit()
            logger.debug("SQL status: {status} in {elapsed:0.2f} seconds",
                         status=self.get_status(cursor),
                         elapsed=(time.time() - pre))

            return connection, cursor
Ejemplo n.º 51
0
    def run_from_graph(self, Selector, Runner, query):
        flat_graph, linker = self.compile(self.project)

        selector = Selector(linker, flat_graph)
        selected_nodes = selector.select(query)
        dep_list = selector.as_node_list(selected_nodes)

        profile = self.project.run_environment()
        adapter = get_adapter(profile)

        flat_nodes = dbt.utils.flatten_nodes(dep_list)
        if len(flat_nodes) == 0:
            logger.info("WARNING: Nothing to do. Try checking your model "
                        "configs and model specification args")
            return []
        elif Runner.print_header:
            stat_line = dbt.ui.printer.get_counts(flat_nodes)
            logger.info("")
            dbt.ui.printer.print_timestamped_line(stat_line)
            dbt.ui.printer.print_timestamped_line("")
        else:
            logger.info("")

        try:
            Runner.before_run(self.project, adapter, flat_graph)
            started = time.time()
            res = self.execute_nodes(linker, Runner, flat_graph, dep_list)
            elapsed = time.time() - started
            Runner.after_run(self.project, adapter, res, flat_graph, elapsed)

        finally:
            adapter.cleanup_connections()

        return res
Ejemplo n.º 52
0
    def run_forever(self):
        host = self.args.host
        port = self.args.port
        addr = (host, port)

        display_host = host
        if host == '0.0.0.0':
            display_host = 'localhost'

        logger.info('Serving RPC server at {}:{}, pid={}'.format(
            *addr, os.getpid()))

        logger.info('Supported methods: {}'.format(
            sorted(self.task_manager.methods())))

        logger.info('Send requests to http://{}:{}/jsonrpc'.format(
            display_host, port))

        app = DispatcherMiddleware(self.handle_request, {
            '/jsonrpc': self.handle_jsonrpc_request,
        })

        # we have to run in threaded mode if we want to share subprocess
        # handles, which is the easiest way to implement `kill` (it makes
        # `ps` easier as well). The alternative involves tracking
        # metadata+state in a multiprocessing.Manager, adds polling the
        # manager to the request  task handler and in general gets messy
        # fast.
        run_simple(
            host,
            port,
            app,
            threaded=not self.task_manager.single_threaded(),
        )
Ejemplo n.º 53
0
def run_from_args(parsed):
    log_cache_events(getattr(parsed, 'log_cache_events', False))
    flags.set_from_args(parsed)

    parsed.cls.pre_init_hook(parsed)
    # we can now use the logger for stdout

    logger.info("Running with dbt{}".format(dbt.version.installed))

    # this will convert DbtConfigErrors into RuntimeExceptions
    task = parsed.cls.from_args(args=parsed)
    logger.debug("running dbt with arguments {parsed}", parsed=str(parsed))

    log_path = None
    if task.config is not None:
        log_path = getattr(task.config, 'log_path', None)
    # we can finally set the file logger up
    log_manager.set_path(log_path)
    if dbt.tracking.active_user is not None:  # mypy appeasement, always true
        logger.debug("Tracking: {}".format(dbt.tracking.active_user.state()))

    results = None

    with track_run(task):
        results = task.run()

    return task, results
Ejemplo n.º 54
0
    def run(self):
        dbt.clients.system.make_directory(self.project['modules-path'])
        dbt.clients.system.make_directory(DOWNLOADS_PATH)

        packages = _read_packages(self.project)
        if not packages:
            logger.info('Warning: No packages were found in packages.yml')
            return

        pending_deps = PackageListing.create(packages)
        final_deps = PackageListing.create([])

        while pending_deps:
            sub_deps = PackageListing.create([])
            for name, package in pending_deps.items():
                final_deps.incorporate(package)
                final_deps[name].resolve_version()
                target_metadata = final_deps[name].fetch_metadata(self.project)
                sub_deps.incorporate_from_yaml(_read_packages(target_metadata))
            pending_deps = sub_deps

        self._check_for_duplicate_project_names(final_deps)

        for _, package in final_deps.items():
            logger.info('Installing %s', package)
            package.install(self.project)
            logger.info('  Installed from %s\n', package.nice_version_name())

            self.track_package_install(
                package_name=package.name,
                source_type=package.source_type(),
                version=package.version_name())
Ejemplo n.º 55
0
 def run(self):
     """
     This function takes all the paths in the target file
     and cleans the project paths that are not protected.
     """
     for path in self.config.clean_targets:
         logger.info("Checking {}/*".format(path))
         if not self.__is_protected_path(path):
             shutil.rmtree(path, True)
             logger.info(" Cleaned {}/*".format(path))
         else:
             logger.info("ERROR: not cleaning {}/* because it is "
                         "protected".format(path))
     logger.info("Finished cleaning all paths.")
Ejemplo n.º 56
0
    def try_create_schema(self):
        profile = self.project.run_environment()
        adapter = get_adapter(profile)

        schema_name = adapter.get_default_schema(profile)
        model_name = None

        connection = adapter.begin(profile)
        schema_exists = adapter.check_schema_exists(profile, schema_name)
        adapter.commit(connection)

        if schema_exists:
            logger.debug('schema {} already exists -- '
                         'not creating'.format(schema_name))
            return

        try:
            connection = adapter.begin(profile)
            adapter.create_schema(profile, schema_name)
            adapter.commit(connection)

        except (dbt.exceptions.FailedToConnectException,
                psycopg2.OperationalError) as e:
            logger.info("ERROR: Could not connect to the target database. Try "
                        "`dbt debug` for more information.")
            logger.info(str(e))
            raise
Ejemplo n.º 57
0
    def run(self):
        dbt.clients.system.make_directory(self.config.modules_path)
        _initialize_downloads()

        packages = self.config.packages.packages
        if not packages:
            logger.info('Warning: No packages were found in packages.yml')
            return

        pending_deps = PackageListing.create(packages)
        final_deps = PackageListing.create([])

        while pending_deps:
            sub_deps = PackageListing.create([])
            for name, package in pending_deps.items():
                final_deps.incorporate(package)
                final_deps[name].resolve_version()
                target_config = final_deps[name].fetch_metadata(self.config)
                sub_deps.incorporate_from_yaml(target_config.packages)
            pending_deps = sub_deps

        self._check_for_duplicate_project_names(final_deps)

        for _, package in final_deps.items():
            logger.info('Installing %s', package)
            package.install(self.config)
            logger.info('  Installed from %s\n', package.nice_version_name())

            self.track_package_install(package_name=package.name,
                                       source_type=package.source_type(),
                                       version=package.version_name())

        if REMOVE_DOWNLOADS:
            dbt.clients.system.rmtree(DOWNLOADS_PATH)
Ejemplo n.º 58
0
    def run(self):
        os.chdir(self.config.target_path)

        port = self.args.port

        shutil.copyfile(DOCS_INDEX_FILE_PATH, 'index.html')

        logger.info("Serving docs at 0.0.0.0:{}".format(port))
        logger.info(
            "To access from your browser, navigate to http://localhost:{}.".
            format(port))
        logger.info("Press Ctrl+C to exit.\n\n")

        httpd = TCPServer(('0.0.0.0', port), SimpleHTTPRequestHandler)

        try:
            webbrowser.open_new_tab('http://127.0.0.1:{}'.format(port))
        except webbrowser.Error:
            pass

        try:
            httpd.serve_forever()  # blocks
        finally:
            httpd.shutdown()
            httpd.server_close()

        return None
Ejemplo n.º 59
0
    def open_connection(cls, connection):
        if connection.get('state') == 'open':
            logger.debug('Connection is already open, skipping open.')
            return connection

        result = connection.copy()
        credentials = connection.get('credentials', {})

        try:
            handle = cls.get_bigquery_client(credentials)

        except google.auth.exceptions.DefaultCredentialsError as e:
            logger.info("Please log into GCP to continue")
            dbt.clients.gcloud.setup_default_credentials()

            handle = cls.get_bigquery_client(credentials)

        except Exception as e:
            raise
            logger.debug("Got an error when attempting to create a bigquery "
                         "client: '{}'".format(e))

            result['handle'] = None
            result['state'] = 'fail'

            raise dbt.exceptions.FailedToConnectException(str(e))

        result['handle'] = handle
        result['state'] = 'open'
        return result
Ejemplo n.º 60
0
    def patch_nodes(
        self, patches: MutableMapping[str, ParsedNodePatch]
    ) -> None:
        """Patch nodes with the given dict of patches. Note that this consumes
        the input!
        This relies on the fact that all nodes have unique _name_ fields, not
        just unique unique_id fields.
        """
        # because we don't have any mapping from node _names_ to nodes, and we
        # only have the node name in the patch, we have to iterate over all the
        # nodes looking for matching names. We could use a NameSearcher if we
        # were ok with doing an O(n*m) search (one nodes scan per patch)
        for node in self.nodes.values():
            patch = patches.pop(node.name, None)
            if not patch:
                continue

            expected_key = node.resource_type.pluralize()
            if expected_key != patch.yaml_key:
                if patch.yaml_key == 'models':
                    deprecations.warn(
                        'models-key-mismatch',
                        patch=patch, node=node, expected_key=expected_key
                    )
                else:
                    raise_invalid_patch(
                        node, patch.yaml_key, patch.original_file_path
                    )

            node.patch(patch)

        # log debug-level warning about nodes we couldn't find
        if patches:
            for patch in patches.values():
                # since patches aren't nodes, we can't use the existing
                # target_not_found warning
                logger.debug((
                    'WARNING: Found documentation for resource "{}" which was '
                    'not found or is disabled').format(patch.name)
                )