Beispiel #1
0
    def __init__(self,
                 warehouse_yml_dict: dict,
                 warehouse_yml_path: str = 'warehouse-dict'):
        super().__init__(description=warehouse_yml_path)

        if isinstance(warehouse_yml_dict, dict):
            self._push_context(object=warehouse_yml_dict,
                               name=self.description)

            self.warehouse_yml = WarehouseYml()
            self.warehouse_yml.name = self.get_str_required(KEY_NAME)

            EnvVars.load_env_vars(self.warehouse_yml.name)

            connection_dict = self.get_dict_required(KEY_CONNECTION)
            if connection_dict:
                self._push_context(object=connection_dict, name=KEY_CONNECTION)
                self.warehouse_yml.dialect = Dialect.create(self)
                if self.warehouse_yml.dialect:
                    self.warehouse_yml.dialect.validate_connection()
                self._pop_context()

            soda_account_dict = self.get_dict_optional(KEY_SODA_ACCOUNT)
            if soda_account_dict:
                self._push_context(object=soda_account_dict,
                                   name=KEY_SODA_ACCOUNT)
                self.warehouse_yml.soda_host = self.get_str_optional(
                    SODA_KEY_HOST, 'cloud.soda.io')
                self.warehouse_yml.soda_port = self.get_int_optional(
                    SODA_KEY_PORT, 443)
                self.warehouse_yml.soda_protocol = self.get_str_optional(
                    SODA_KEY_PROTOCOL, 'https')
                self.warehouse_yml.soda_api_key_id = self.get_str_required_env(
                    SODA_KEY_API_KEY_ID)
                self.warehouse_yml.soda_api_key_secret = self.get_str_required_env(
                    SODA_KEY_API_KEY_SECRET)
                self._pop_context()

            ingest_dict = self.get_dict_optional(KEY_INGEST)
            if ingest_dict:
                self._push_context(object=ingest_dict, name=KEY_INGEST)
                self.warehouse_yml.dbt_cloud_api_token = self.get_str_optional(
                    DBT_CLOUD_KEY_API_TOKEN)
                self._pop_context()

            self.check_invalid_keys(VALID_WAREHOUSE_KEYS)

        else:
            self.error('No warehouse configuration provided')
Beispiel #2
0
 def __get_valid_condition(self, column_metadata: ColumnMetadata,
                           validity: Validity, dialect: Dialect):
     qualified_column_name = dialect.qualify_column_name(
         column_metadata.name, column_metadata.data_type)
     if validity is None:
         return '', True
     validity_clauses = []
     if validity.format is not None and self.is_text:
         format_regex = Validity.FORMATS.get(validity.format)
         qualified_regex = dialect.qualify_regex(format_regex)
         validity_clauses.append(
             dialect.sql_expr_regexp_like(qualified_column_name,
                                          qualified_regex))
     if validity.regex is not None and self.is_text:
         qualified_regex = dialect.qualify_regex(validity.regex)
         validity_clauses.append(
             dialect.sql_expr_regexp_like(qualified_column_name,
                                          qualified_regex))
     if validity.values is not None:
         valid_values_sql = dialect.sql_expr_list(column_metadata,
                                                  validity.values)
         validity_clauses.append(
             dialect.sql_expr_in(qualified_column_name, valid_values_sql))
     if validity.min_length is not None and self.is_text:
         validity_clauses.append(
             f'{dialect.sql_expr_length(qualified_column_name)} >= {validity.min_length}'
         )
     if validity.max_length is not None and self.is_text:
         validity_clauses.append(
             f'{dialect.sql_expr_length(qualified_column_name)} <= {validity.max_length}'
         )
     if validity.min is not None and self.is_number:
         validity_clauses.append(
             f'{qualified_column_name} >= {validity.min}')
     if validity.max is not None and self.is_number:
         validity_clauses.append(
             f'{qualified_column_name} <= {validity.max}')
     if validity.min is not None and self.is_text:
         validity_clauses.append(
             f'{dialect.sql_expr_cast_text_to_number(qualified_column_name, self.validity_format)} >= {validity.min}'
         )
     if validity.max is not None and self.is_text:
         validity_clauses.append(
             f'{dialect.sql_expr_cast_text_to_number(qualified_column_name, self.validity_format)} <= {validity.max}'
         )
     if len(validity_clauses) != 0:
         return '(' + ' AND '.join(validity_clauses) + ')', len(
             validity_clauses) == 0
     else:
         return '', True
Beispiel #3
0
 def __get_missing_condition(cls, column_metadata: ColumnMetadata, missing: Missing, dialect: Dialect):
     qualified_column_name = dialect.qualify_column_name(column_metadata.name)
     validity_clauses = [f'{qualified_column_name} IS NULL']
     if missing:
         if missing.values:
             sql_expr_missing_values = dialect.sql_expr_list(column_metadata, missing.values)
             validity_clauses.append(f'{qualified_column_name} IN {sql_expr_missing_values}')
         if missing.format:
             format_regex = Missing.FORMATS.get(missing.format)
             qualified_regex = dialect.qualify_regex(format_regex)
             validity_clauses.append(dialect.sql_expr_regexp_like(qualified_column_name, qualified_regex))
         if missing.regex:
             qualified_regex = dialect.qualify_regex(missing.regex)
             validity_clauses.append(dialect.sql_expr_regexp_like(qualified_column_name, qualified_regex))
     return " OR ".join(validity_clauses), len(validity_clauses) == 1
Beispiel #4
0
 def __get_valid_condition(cls, column_metadata: ColumnMetadata,
                           validity: Validity, dialect: Dialect):
     qualified_column_name = dialect.qualify_column_name(
         column_metadata.name)
     if validity is None:
         return '', True
     validity_clauses = []
     if validity.format:
         format_regex = Validity.FORMATS.get(validity.format)
         qualified_regex = dialect.qualify_regex(format_regex)
         validity_clauses.append(
             dialect.sql_expr_regexp_like(qualified_column_name,
                                          qualified_regex))
     if validity.regex:
         qualified_regex = dialect.qualify_regex(validity.regex)
         validity_clauses.append(
             dialect.sql_expr_regexp_like(qualified_column_name,
                                          qualified_regex))
     if validity.values:
         valid_values_sql = dialect.literal_list(validity.values)
         validity_clauses.append(
             dialect.sql_expr_in(qualified_column_name, valid_values_sql))
     if validity.min_length:
         validity_clauses.append(
             f'{dialect.sql_expr_length(qualified_column_name)} >= {validity.min_length}'
         )
     if validity.max_length:
         validity_clauses.append(
             f'{dialect.sql_expr_length(qualified_column_name)} <= {validity.max_length}'
         )
     if validity.min:
         validity_clauses.append(
             f'{qualified_column_name} >= {validity.min}')
     if validity.max:
         validity_clauses.append(
             f'{qualified_column_name} <= {validity.max}')
     return '(' + ' AND '.join(validity_clauses) + ')', len(
         validity_clauses) == 0
Beispiel #5
0
def create(warehouse_type: str, file: Optional[str], warehouse: Optional[str],
           database: Optional[str], username: Optional[str],
           password: Optional[str]):
    """
    Creates a new warehouse.yml file and prepares credentials in your ~/.soda/env_vars.yml
    Nothing will be overwritten or removed, only added if it does not exist yet.

    WAREHOUSE_TYPE is one of {postgres, snowflake, redshift, bigquery, athena}
    """
    try:
        """
        Creates a warehouse.yml file
        """
        logging.info(f"Soda CLI version {SODA_SQL_VERSION}")
        file_system = FileSystemSingleton.INSTANCE

        # if not warehouse:
        #     warehouse_dir_parent, warehouse_dir_name = file_system.split(warehouse_dir)
        #     warehouse = warehouse_dir_name if warehouse_dir_name != '.' else warehouse_type

        from sodasql.scan.dialect import ALL_WAREHOUSE_TYPES, Dialect
        dialect = Dialect.create_for_warehouse_type(warehouse_type)
        if not dialect:
            logging.info(
                f"Invalid warehouse type {warehouse_type}, use one of {str(ALL_WAREHOUSE_TYPES)}"
            )
            sys.exit(1)

        configuration_params = {}
        if isinstance(database, str):
            configuration_params['database'] = database
        if isinstance(username, str):
            configuration_params['username'] = username
        if isinstance(password, str):
            configuration_params['password'] = password
        connection_properties = dialect.default_connection_properties(
            configuration_params)
        warehouse_env_vars_dict = dialect.default_env_vars(
            configuration_params)

        if file_system.file_exists(file):
            logging.info(f"Warehouse file {file} already exists")
        else:
            logging.info(f"Creating warehouse YAML file {file} ...")
            file_system.mkdirs(file_system.dirname(file))

            if not warehouse:
                warehouse = warehouse_type

            warehouse_dict = {
                'name': warehouse,
                'connection': connection_properties
            }
            warehouse_yml_str = yaml.dump(warehouse_dict,
                                          default_flow_style=False,
                                          sort_keys=False)
            file_system.file_write_from_str(file, warehouse_yml_str)

        dot_soda_dir = file_system.join(file_system.user_home_dir(), '.soda')
        if not file_system.file_exists(dot_soda_dir):
            file_system.mkdirs(dot_soda_dir)

        env_vars_file = file_system.join(dot_soda_dir, 'env_vars.yml')
        env_vars_yml_str = ''
        env_vars_file_exists = file_system.file_exists(env_vars_file)
        if env_vars_file_exists:
            env_vars_yml_str = file_system.file_read_as_str(env_vars_file)
            existing_env_vars_yml_dict = yaml.load(env_vars_yml_str,
                                                   Loader=yaml.SafeLoader)
            if isinstance(existing_env_vars_yml_dict,
                          dict) and warehouse in existing_env_vars_yml_dict:
                logging.info(
                    f"Warehouse section {warehouse} already exists in {env_vars_file}.  Skipping..."
                )
                warehouse_env_vars_dict = None

        if warehouse_env_vars_dict:
            warehouse_env_vars_dict = {warehouse: warehouse_env_vars_dict}

            if len(env_vars_yml_str) > 0:
                env_vars_yml_str += '\n'

            env_vars_yml_str += yaml.dump(warehouse_env_vars_dict,
                                          default_flow_style=False,
                                          sort_keys=False)

            if env_vars_file_exists:
                logging.info(
                    f"Adding env vars for {warehouse} to {env_vars_file}")
            else:
                logging.info(
                    f"Creating {env_vars_file} with example env vars in section {warehouse}"
                )

            file_system.file_write_from_str(env_vars_file, env_vars_yml_str)

        logging.info(f"Review warehouse.yml by running command")
        logging.info(f"  cat {file}")
        if warehouse_env_vars_dict:
            logging.info(
                f"Review section {warehouse} in ~/.soda/env_vars.yml by running command"
            )
            logging.info(f"  cat ~/.soda/env_vars.yml")
        logging.info(f"Then run the soda analyze command")
    except Exception as e:
        logging.exception(f'Exception: {str(e)}')
        sys.exit(1)
class TestSqlExpressions(TestCase):

    dialect = Dialect('test')

    def test_string(self):
        self.assertEqual(
            "'hello'",
            self.dialect.sql_expression({
                'type': 'string',
                'value': 'hello'
            }))

    def test_number(self):
        self.assertEqual(
            "93", self.dialect.sql_expression({
                'type': 'number',
                'value': 93
            }))

        self.assertEqual(
            "93.11",
            self.dialect.sql_expression({
                'type': 'number',
                'value': 93.11
            }))

    def test_column_value(self):
        self.assertEqual(
            "col_name",
            self.dialect.sql_expression({
                'type': 'columnValue',
                'columnName': 'col_name'
            }))

    def test_collection(self):
        self.assertEqual(
            "('a','b',3,4)",
            self.dialect.sql_expression({
                'type': 'collection',
                'value': ['a', 'b', 3, 4]
            }))

    def test_equals(self):
        self.assertEqual(
            "name = 't'",
            self.dialect.sql_expression({
                'type': 'equals',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'string',
                    'value': 't'
                }
            }))

    def test_less_than(self):
        self.assertEqual(
            "name < 3",
            self.dialect.sql_expression({
                'type': 'lessThan',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'number',
                    'value': 3
                }
            }))

    def test_less_than_or_equals(self):
        self.assertEqual(
            "name <= 3",
            self.dialect.sql_expression({
                'type': 'lessThanOrEqual',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'number',
                    'value': 3
                }
            }))

    def test_greater_than(self):
        self.assertEqual(
            "name > 3",
            self.dialect.sql_expression({
                'type': 'greaterThan',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'number',
                    'value': 3
                }
            }))

    def test_greater_than_or_equals(self):
        self.assertEqual(
            "name >= 3",
            self.dialect.sql_expression({
                'type': 'greaterThanOrEqual',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'number',
                    'value': 3
                }
            }))

    def test_between(self):
        self.assertEqual(
            "3 <= name AND name < 44",
            self.dialect.sql_expression({
                'type': 'between',
                'gte': 3,
                'value': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'lt': 44
            }))

        self.assertEqual(
            "3 < name",
            self.dialect.sql_expression({
                'type': 'between',
                'gt': 3,
                'value': {
                    'type': 'columnValue',
                    'columnName': 'name'
                }
            }))

        self.assertEqual(
            "name <= 44",
            self.dialect.sql_expression({
                'type': 'between',
                'value': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'lte': 44
            }))

    def test_in(self):
        self.assertEqual(
            "name IN ('a','b','c')",
            self.dialect.sql_expression({
                'type': 'in',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'collection',
                    'value': ['a', 'b', 'c']
                }
            }))

    def test_contains(self):
        self.assertEqual(
            "name LIKE '%t%'",
            self.dialect.sql_expression({
                'type': 'contains',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'string',
                    'value': 't'
                }
            }))

    def test_starts_with(self):
        self.assertEqual(
            "name LIKE 't%'",
            self.dialect.sql_expression({
                'type': 'startsWith',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'string',
                    'value': 't'
                }
            }))

    def test_ends_with(self):
        self.assertEqual(
            "name LIKE '%t'",
            self.dialect.sql_expression({
                'type': 'endsWith',
                'left': {
                    'type': 'columnValue',
                    'columnName': 'name'
                },
                'right': {
                    'type': 'string',
                    'value': 't'
                }
            }))

    def test_not(self):
        self.assertEqual(
            "NOT (name = 't')",
            self.dialect.sql_expression({
                'type': 'not',
                'expression': {
                    'type': 'equals',
                    'left': {
                        'type': 'columnValue',
                        'columnName': 'name'
                    },
                    'right': {
                        'type': 'string',
                        'value': 't'
                    }
                }
            }))

    def test_and(self):
        self.assertEqual(
            "(name = 't') AND (size = 3)",
            self.dialect.sql_expression({
                'type':
                'and',
                'andExpressions': [{
                    'type': 'equals',
                    'left': {
                        'type': 'columnValue',
                        'columnName': 'name'
                    },
                    'right': {
                        'type': 'string',
                        'value': 't'
                    }
                }, {
                    'type': 'equals',
                    'left': {
                        'type': 'columnValue',
                        'columnName': 'size'
                    },
                    'right': {
                        'type': 'number',
                        'value': 3
                    }
                }]
            }))

    def test_or(self):
        self.assertEqual(
            "(name = 't') OR (size = 3)",
            self.dialect.sql_expression({
                'type':
                'or',
                'orExpressions': [{
                    'type': 'equals',
                    'left': {
                        'type': 'columnValue',
                        'columnName': 'name'
                    },
                    'right': {
                        'type': 'string',
                        'value': 't'
                    }
                }, {
                    'type': 'equals',
                    'left': {
                        'type': 'columnValue',
                        'columnName': 'size'
                    },
                    'right': {
                        'type': 'number',
                        'value': 3
                    }
                }]
            }))
 def __init__(self, warehouse_connection_dict: dict):
     super().__init__('warehouse')
     self._push_context(warehouse_connection_dict, 'connection')
     self.dialect = Dialect.create(self)
Beispiel #8
0
 def obtain_datasource_hash(dialect: Dialect):
     return dialect.generate_hash_safe()
def create_dialect(target):
    return Dialect.create(DialectParser({KEY_WAREHOUSE_TYPE: target}))