Ejemplo n.º 1
0
 def search(self, included_nodes: Set[UniqueId],
            selector: str) -> Iterator[UniqueId]:
     try:
         resource_type = NodeType(selector)
     except ValueError as exc:
         raise RuntimeException(
             f'Invalid resource_type selector "{selector}"') from exc
     for node, real_node in self.parsed_nodes(included_nodes):
         if real_node.resource_type == resource_type:
             yield node
Ejemplo n.º 2
0
 def _get_warehouse(self) -> str:
     _, table = self.execute(
         'select current_warehouse() as warehouse',
         fetch=True
     )
     if len(table) == 0 or len(table[0]) == 0:
         # can this happen?
         raise RuntimeException(
             'Could not get current warehouse: no results'
         )
     return str(table[0][0])
Ejemplo n.º 3
0
 def __init__(self,
              args,
              config,
              tasks: Optional[List[Type[RemoteMethod]]] = None) -> None:
     if os.name == 'nt':
         raise RuntimeException(
             'The dbt RPC server is not supported on windows')
     super().__init__(args, config)
     self.task_manager = TaskManager(self.args, self.config,
                                     TaskTypes(tasks))
     signal.signal(signal.SIGHUP, self._sighup_handler)
Ejemplo n.º 4
0
def _match_to_int(match: Dict[str, str], key: str) -> Optional[int]:
    raw = match.get(key)
    # turn the empty string into None, too.
    if not raw:
        return None
    try:
        return int(raw)
    except ValueError as exc:
        raise RuntimeException(
            f'Invalid node spec - could not handle parent depth {raw}'
        ) from exc
Ejemplo n.º 5
0
 def prepend_ctes(self, prepended_ctes: List[InjectedCTE]):
     self.extra_ctes_injected = True
     self.extra_ctes = prepended_ctes
     if self.compiled_sql is None:
         raise RuntimeException('Cannot prepend ctes to an unparsed node',
                                self)
     self.injected_sql = _inject_ctes_into_sql(
         self.compiled_sql,
         prepended_ctes,
     )
     self.validate(self.to_dict())
Ejemplo n.º 6
0
 def __post_init__(self):
     # some core things set database='', which we should ignore.
     if self.database and self.database != self.schema:
         raise RuntimeException(
             f'Error while parsing relation {self.name}: \n'
             f'    identifier: {self.identifier} \n'
             f'    schema: {self.schema} \n'
             f'    database: {self.database} \n'
             f'On Spark, database should not be set. Use the schema '
             f'config to set a custom schema/database for this relation.'
         )
Ejemplo n.º 7
0
 def string_add_sql(
     self, add_to: str, value: str, location='append',
 ) -> str:
     if location == 'append':
         return f"{add_to} || '{value}'"
     elif location == 'prepend':
         return f"'{value}' || {add_to}"
     else:
         raise RuntimeException(
             f'Got an unexpected location value of "{location}"'
         )
Ejemplo n.º 8
0
 def exception_handler(self, sql: str):
     try:
         yield
     except sqlite3.DatabaseError as e:
         self.release()
         logger.debug("sqlite3 error: {}".format(str(e)))
         raise DatabaseException(str(e))
     except Exception as e:
         logger.debug("Error running SQL: {}".format(sql))
         logger.debug("Rolling back transaction.")
         self.release()
         raise RuntimeException(str(e))
Ejemplo n.º 9
0
    def list_schemas(self, database: str) -> List[str]:
        try:
            results = self.execute_macro(LIST_SCHEMAS_MACRO_NAME,
                                         kwargs={'database': database})
        except DatabaseException as exc:
            msg = (f'Database error while listing schemas in database '
                   f'"{database}"\n{exc}')
            raise RuntimeException(msg)
        # this uses 'show terse schemas in database', and the column name we
        # want is 'name'

        return [row['name'] for row in results]
Ejemplo n.º 10
0
    def __init__(self, args, config):
        if not args.from_archive:
            raise RuntimeException('The --from-archive parameter is required!')
        if args.apply:
            args.write_files = True
            args.migrate_database = True
        super(MigrationTask, self).__init__(args, config)

        self.snapshot_root = os.path.normpath(self.config.snapshot_paths[0])
        system.make_directory(self.snapshot_root)

        self.backups_made = []
        self.snapshots_written = []
Ejemplo n.º 11
0
    def create_profiles_file(self, profiles_file, sample_adapter):
        # Line below raises an exception if the specified adapter is not found
        load_plugin(sample_adapter)
        adapter_path = get_include_paths(sample_adapter)[0]
        sample_profiles_path = adapter_path / 'sample_profiles.yml'

        if not sample_profiles_path.exists():
            raise RuntimeException(f'No sample profile for {sample_adapter}')

        if not os.path.exists(profiles_file):
            shutil.copyfile(sample_profiles_path, profiles_file)
            return True

        return False
Ejemplo n.º 12
0
    def exception_handler(self, sql: str) -> ContextManager:
        try:
            yield
        except Exception as e:
            logger.error("Error running SQL: {}".format(sql))
            logger.error("Rolling back transaction.")
            self.rollback_if_open()

            if isinstance(e, RuntimeException):
                # during a sql query, an internal to dbt exception was raised.
                # this sounds a lot like a signal handler and probably has
                # useful information, so raise it without modification.
                raise

            raise RuntimeException(e) from e
Ejemplo n.º 13
0
def run_from_args(parsed):
    task = None
    cfg = None

    if parsed.which in ('init', 'debug'):
        # bypass looking for a project file if we're running `dbt init` or
        # `dbt debug`
        task = parsed.cls(args=parsed)
    else:
        nearest_project_dir = get_nearest_project_dir()
        if nearest_project_dir is None:
            raise RuntimeException(
                "fatal: Not a dbt project (or any of the parent directories). "
                "Missing dbt_project.yml file")

        os.chdir(nearest_project_dir)

        res = invoke_dbt(parsed)
        if res is None:
            raise RuntimeException("Could not run dbt")
        else:
            task, cfg = res

    log_path = None

    if cfg is not None:
        log_path = cfg.log_path

    initialize_logger(parsed.debug, log_path)
    logger.debug("Tracking: {}".format(dbt.tracking.active_user.state()))

    dbt.tracking.track_invocation_start(config=cfg, args=parsed)

    results = run_from_task(task, cfg, parsed)

    return task, results
Ejemplo n.º 14
0
    def search(self, included_nodes: Set[UniqueId],
               selector: str) -> Iterator[UniqueId]:
        search_types: Tuple[Type, ...]
        if selector == 'schema':
            search_types = (ParsedSchemaTestNode, CompiledSchemaTestNode)
        elif selector == 'data':
            search_types = (ParsedDataTestNode, CompiledDataTestNode)
        else:
            raise RuntimeException(
                f'Invalid test type selector {selector}: expected "data" or '
                '"schema"')

        for node, real_node in self.parsed_nodes(included_nodes):
            if isinstance(real_node, search_types):
                yield node
Ejemplo n.º 15
0
    def from_description(cls, name: str, raw_data_type: str) -> 'Column':
        match = re.match(r'([^(]+)(\([^)]+\))?', raw_data_type)
        if match is None:
            raise RuntimeException(
                f'Could not interpret data type "{raw_data_type}"')
        data_type, size_info = match.groups()
        char_size = None
        numeric_precision = None
        numeric_scale = None
        if size_info is not None:
            # strip out the parentheses
            size_info = size_info[1:-1]
            parts = size_info.split(',')
            if len(parts) == 1:
                try:
                    char_size = int(parts[0])
                except ValueError:
                    raise RuntimeException(
                        f'Could not interpret data_type "{raw_data_type}": '
                        f'could not convert "{parts[0]}" to an integer')
            elif len(parts) == 2:
                try:
                    numeric_precision = int(parts[0])
                except ValueError:
                    raise RuntimeException(
                        f'Could not interpret data_type "{raw_data_type}": '
                        f'could not convert "{parts[0]}" to an integer')
                try:
                    numeric_scale = int(parts[1])
                except ValueError:
                    raise RuntimeException(
                        f'Could not interpret data_type "{raw_data_type}": '
                        f'could not convert "{parts[1]}" to an integer')

        return cls(name, data_type, char_size, numeric_precision,
                   numeric_scale)
Ejemplo n.º 16
0
    def _model_prepend_ctes(
            self, model: NonSourceCompiledNode,
            prepended_ctes: List[InjectedCTE]) -> NonSourceCompiledNode:
        if model.compiled_sql is None:
            raise RuntimeException('Cannot prepend ctes to an unparsed node',
                                   model)
        injected_sql = self._inject_ctes_into_sql(
            model.compiled_sql,
            prepended_ctes,
        )

        model.extra_ctes_injected = True
        model.extra_ctes = prepended_ctes
        model.injected_sql = injected_sql
        model.validate(model.to_dict())
        return model
Ejemplo n.º 17
0
    def __call__(self, *args, **kwargs):
        if len(args) == 1 and len(kwargs) == 0:
            opts = args[0]
        elif len(args) == 0 and len(kwargs) > 0:
            opts = kwargs
        else:
            raise_compiler_error("Invalid inline model config", self.model)

        opts = self._transform_config(opts)

        # it's ok to have a parse context with no context config, but you must
        # not call it!
        if self.context_config is None:
            raise RuntimeException(
                'At parse time, did not receive a context config')
        self.context_config.update_in_model_config(opts)
        return ''
Ejemplo n.º 18
0
 def _iterate_selected_nodes(self):
     nodes = sorted(self.select_nodes())
     if not nodes:
         logger.warning('No nodes selected!')
         return
     if self.manifest is None:
         raise InternalException(
             'manifest is None in _iterate_selected_nodes')
     for node in nodes:
         if node in self.manifest.nodes:
             yield self.manifest.nodes[node]
         elif node in self.manifest.sources:
             yield self.manifest.sources[node]
         else:
             raise RuntimeException(
                 f'Got an unexpected result from node selection: "{node}"'
                 f'Expected a source or a node!')
Ejemplo n.º 19
0
    def add_query(self,
                  sql,
                  auto_begin=True,
                  bindings=None,
                  abridge_sql_log=False):

        connection = None
        cursor = None

        if bindings:
            # The snowflake connector is more strict than, eg., psycopg2 -
            # which allows any iterable thing to be passed as a binding.
            bindings = tuple(bindings)

        queries = self._split_queries(sql)

        for individual_query in queries:
            # hack -- after the last ';', remove comments and don't run
            # empty queries. this avoids using exceptions as flow control,
            # and also allows us to return the status of the last cursor
            without_comments = re.sub(re.compile('^.*(--.*)$', re.MULTILINE),
                                      '', individual_query).strip()

            if without_comments == "":
                continue

            connection, cursor = super().add_query(
                individual_query,
                auto_begin,
                bindings=bindings,
                abridge_sql_log=abridge_sql_log)

        if cursor is None:
            conn = self.get_thread_connection()
            if conn is None or conn.name is None:
                conn_name = '<None>'
            else:
                conn_name = conn.name

            raise RuntimeException(
                "Tried to run an empty query on model '{}'. If you are "
                "conditionally running\nsql, eg. in a model hook, make "
                "sure your `else` clause contains valid sql!\n\n"
                "Provided SQL:\n{}".format(conn_name, sql))

        return connection, cursor
Ejemplo n.º 20
0
    def __setitem__(self, key, value):
        if key not in self:
            super().__setitem__(key, value)
            return

        existing_type = self[key]
        if isinstance(existing_type, _NullMarker):
            # overwrite
            super().__setitem__(key, value)
        elif isinstance(value, _NullMarker):
            # use the existing value
            return
        elif not isinstance(value, type(existing_type)):
            # actual type mismatch!
            raise RuntimeException(
                f'Tables contain columns with the same names ({key}), '
                f'but different types ({value} vs {existing_type})'
            )
Ejemplo n.º 21
0
    def __init__(self, node_spec: str):
        self.raw = node_spec
        self.select_children = False
        self.select_children_max_depth = None
        self.select_parents = False
        self.select_parents_max_depth = None
        self.select_childrens_parents = False

        if node_spec.startswith(SELECTOR_CHILDREN_AND_ANCESTORS):
            self.select_childrens_parents = True
            node_spec = node_spec[1:]

        matches = re.match(SELECTOR_PARENTS, node_spec)
        if matches:
            self.select_parents = True
            if matches['depth']:
                self.select_parents_max_depth = int(matches['depth'])
            node_spec = matches['node']

        matches = re.match(SELECTOR_CHILDREN, node_spec)
        if matches:
            self.select_children = True
            if matches['depth']:
                self.select_children_max_depth = int(matches['depth'])
            node_spec = matches['node']

        if self.select_children and self.select_childrens_parents:
            raise RuntimeException(
                'Invalid node spec {} - "@" prefix and "+" suffix are '
                'incompatible'.format(self.raw)
            )

        if SELECTOR_DELIMITER in node_spec:
            selector_parts = node_spec.split(SELECTOR_DELIMITER, 1)
            selector_type, self.selector_value = selector_parts
            self.selector_type = SELECTOR_FILTERS(selector_type)
        else:
            self.selector_value = node_spec
            # if the selector type has an OS path separator in it, it can't
            # really be a valid file name, so assume it's a path.
            if _probably_path(node_spec):
                self.selector_type = SELECTOR_FILTERS.PATH
            else:
                self.selector_type = SELECTOR_FILTERS.FQN
Ejemplo n.º 22
0
    def from_dict(cls, raw: Any, dct: Dict[str, Any]) -> 'SelectionCriteria':
        if 'value' not in dct:
            raise RuntimeException(
                f'Invalid node spec "{raw}" - no search value!')
        method_name, method_arguments = cls.parse_method(dct)

        parents_depth = _match_to_int(dct, 'parents_depth')
        children_depth = _match_to_int(dct, 'children_depth')
        return cls(
            raw=raw,
            method=method_name,
            method_arguments=method_arguments,
            value=dct['value'],
            childrens_parents=bool(dct.get('childrens_parents')),
            parents=bool(dct.get('parents')),
            parents_depth=parents_depth,
            children=bool(dct.get('children')),
            children_depth=children_depth,
        )
Ejemplo n.º 23
0
    def search(self, included_nodes: Set[UniqueId],
               selector: str) -> Iterator[UniqueId]:
        parts = selector.split('.')
        target_package = SELECTOR_GLOB
        if len(parts) == 1:
            target_name = parts[0]
        elif len(parts) == 2:
            target_package, target_name = parts
        else:
            msg = (
                'Invalid exposure selector value "{}". Exposures must be of '
                'the form ${{exposure_name}} or '
                '${{exposure_package.exposure_name}}').format(selector)
            raise RuntimeException(msg)

        for node, real_node in self.exposure_nodes(included_nodes):
            if target_package not in (real_node.package_name, SELECTOR_GLOB):
                continue
            if target_name not in (real_node.name, SELECTOR_GLOB):
                continue

            yield node
Ejemplo n.º 24
0
    def add_query(self,
                  sql,
                  model_name=None,
                  auto_begin=True,
                  bindings=None,
                  abridge_sql_log=False):

        connection = None
        cursor = None

        # TODO: is this sufficient? Largely copy+pasted from snowflake, so
        # there's some common behavior here we can maybe factor out into the
        # SQLAdapter?
        queries = [q.rstrip(';') for q in sqlparse.split(sql)]

        for individual_query in queries:
            # hack -- after the last ';', remove comments and don't run
            # empty queries. this avoids using exceptions as flow control,
            # and also allows us to return the status of the last cursor
            without_comments = re.sub(re.compile('^.*(--.*)$', re.MULTILINE),
                                      '', individual_query).strip()

            if without_comments == "":
                continue

            parent = super(HiveConnectionManager, self)
            connection, cursor = parent.add_query(individual_query, model_name,
                                                  auto_begin, bindings,
                                                  abridge_sql_log)

        if cursor is None:
            raise RuntimeException(
                "Tried to run an empty query on model '{}'. If you are "
                "conditionally running\nsql, eg. in a model hook, make "
                "sure your `else` clause contains valid sql!\n\n"
                "Provided SQL:\n{}".format(model_name, sql))

            return connection, cursor
Ejemplo n.º 25
0
 def compile(self, manifest):
     if self.node.resource_type != NodeType.Source:
         # should be unreachable...
         raise RuntimeException('fresnhess runner: got a non-Source')
     # we don't do anything interesting when we compile a source node
     return self.node
Ejemplo n.º 26
0
 def __post_init__(self):
     if self.children and self.childrens_parents:
         raise RuntimeException(
             f'Invalid node spec {self.raw} - "@" prefix and "+" suffix '
             'are incompatible')
Ejemplo n.º 27
0
 def render(self):
     if self.include_policy.database and self.include_policy.schema:
         raise RuntimeException(
             'Got a spark relation with schema and database set to '
             'include, but only one can be set')
     return super().render()
Ejemplo n.º 28
0
 def get_selector(self, name: str) -> SelectionSpec:
     if name not in self.selectors:
         raise RuntimeException(
             f'Could not find selector named {name}, expected one of '
             f'{list(self.selectors)}')
     return self.selectors[name]
Ejemplo n.º 29
0
 def on_skip(self):
     raise RuntimeException('Freshness: nodes cannot be skipped!')
Ejemplo n.º 30
0
    def __getattribute__(self, name):
        if name in {'profile_name', 'target_name', 'threads'}:
            raise RuntimeException(
                f'Error: disallowed attribute "{name}" - no profile!')

        return Profile.__getattribute__(self, name)