Esempio n. 1
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid delete
        params = OperatorParameters(
            parameter_string=
            f"table_name:good_table,database_name:good_database")
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Info': ['Table good_table successfully deleted.']
        }, 200))

        # database DNE
        params = OperatorParameters(
            parameter_string=f"table_name:bad_table,database_name:bad_database"
        )
        bad = op.validate(config, params).run(env, Response())
        assert (bad.with_status() == ({
            'Errors': ['Database bad_database not found.']
        }, 400))

        # table DNE
        params = OperatorParameters(
            parameter_string=f"table_name:bad_table,database_name:good_database"
        )
        bad = op.validate(config, params).run(env, Response())
        assert (bad.with_status() == ({
            'Errors': ['Table bad_table not found.']
        }, 400))
Esempio n. 2
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid refresh
        params = OperatorParameters(
            parameter_string=
            "table_name:catalog_poc_data,database_name:crawler-poc")
        refresh = op.validate(config, params).run(env, Response())
        assert (refresh.with_status() == table.refresh(False))

        # already refreshing
        params = OperatorParameters(
            parameter_string=
            "table_name:catalog_poc_data_refreshing,database_name:crawler-poc")
        refreshing = op.validate(config, params).run(env, Response())
        assert (refreshing.with_status() == table.refresh(True))
Esempio n. 3
0
    def parse_param_dict(
        self, param_dict: dict
    ) -> Tuple[List[WorkflowParameter], List[InvalidParameter], Optional[str],
               Optional[str], bool]:
        valid: List[WorkflowParameter] = []
        invalid: List[InvalidParameter] = []
        schedule: Optional[str] = None
        schedule_name: Optional[str] = None
        strict: bool = True

        if isinstance(param_dict, dict):
            validated = object_from_json_schema(
                param_dict, from_root("/parameters/workflow_schema.json"),
                dict)
            # TODO: Use typistry for this
            # parameteters = validate_dict(TypedDict(param_dict, "workflow_parameters"))
            if isinstance(
                    validated, dict
            ):  #can now be confident it is matches schema definition
                schedule = validated.get("schedule")
                schedule_name = validated.get("schedule_name")
                strict_mode: Optional[Any] = validated.get("strict")
                if not isinstance(strict_mode, bool):
                    strict = True
                else:
                    strict = strict_mode
                for key, value in validated.items():
                    if key != "schedule" and key != "schedule_name" and key != "strict":
                        config_id: str = str(value["config_id"])
                        parameters: Dict[str, Dict[str,
                                                   Any]] = value["parameters"]
                        valid_step, invalid_step = parse_dict(
                            parameters, from_root("/parameters/schema.json"))
                        ip = OperatorParameters()
                        ip.parameters = valid_step
                        ip.invalid = invalid_step
                        valid.append(WorkflowParameter(key, config_id, ip))
            else:
                invalid.append(
                    InvalidParameter(
                        f"Invalid parameters: {validated.reason}"))
        else:
            invalid.append(
                InvalidParameter(
                    f"Parameters do not conform to specified schema in parameters/workflow_schema.json.  Must be of form step_id: key:value.  {param_dict}"
                ))

        return valid, invalid, schedule, schedule_name, strict
Esempio n. 4
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):

        # Database Exists
        params = OperatorParameters(
            parameter_string="database_name:crawler-poc")
        valid = op.validate(config, params)
        exists = valid.run(env, Response())
        assert exists.with_status() == table.index(
            config.metastore().client.name())

        # Database DNE
        params = OperatorParameters(
            parameter_string="database_name:bad-database")
        dne = op.validate(config, params).run(env, Response())
        assert (dne.with_status() == table.index(
            config.metastore().client.name(), False))
Esempio n. 5
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid delete
        params = OperatorParameters(
            parameter_string=f"schedule_name:good_schedule")
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Info': ['Schedule good_schedule successfully deleted.']
        }, 200))

        # dne
        params = OperatorParameters(
            parameter_string=f"schedule_name:bad_schedule")
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Errors': ["Crawler entry with name bad_schedule does not exist"]
        }, 400))
Esempio n. 6
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        params = OperatorParameters(
            parameter_string=
            f"database_name:mason-sample-data,table_name:tests/in/csv/,format:boogo,output_path:mason-sample-data/tests/out/csv/"
        )
        good = op.validate(config, params).run(env, Response())
        invalid_job = good.object
        assert (isinstance(invalid_job, InvalidJob))

        params = OperatorParameters(
            parameter_string=
            f"database_name:mason-sample-data,table_name:tests/in/csv/,format:csv,output_path:good_output_path"
        )
        good = op.validate(config, params).run(env, Response())
        executed_job = good.object
        assert (isinstance(executed_job, ExecutedJob))
Esempio n. 7
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid query
        query = "SELECT * from $table limit 3"
        output_path = from_root("/.tmp/")
        params = OperatorParameters(
            parameter_string=
            f"query_string:{query},database_name:good_database,table_name:good_table,output_path:{output_path}"
        )
        result = op.validate(config, params).run(env, Response())
        exp = {
            "1": [
                'Running Query "SELECT * from $table limit 3"',
                'Running Athena query.  query_id: test', 'Running job id=test'
            ],
            "4": [
                f'Table succesfully formatted as parquet and exported to {output_path}'
            ]
        }

        expect = {'Info': exp[config.id]}
        assert (result.with_status() == (expect, 200))

        # bad permissions
        query = "SELECT * from $table limit 3"
        params = OperatorParameters(
            parameter_string=
            f"query_string:{query},database_name:access_denied,table_name:good_table,output_path:{output_path}"
        )
        result = op.validate(config, params).run(env, Response())
        exp_2 = {
            "1": ({
                'Errors': [
                    'Job errored: Access denied for credentials.  Ensure associated user or role has permission to CreateNamedQuery on athena'
                ],
                'Info': ['Running Query "SELECT * from $table limit 3"']
            }, 403),
            "4": ({
                'Info': [
                    f'Table succesfully formatted as parquet and exported to {output_path}'
                ]
            }, 200)
        }

        assert (result.with_status() == exp_2[config.id])
Esempio n. 8
0
    def test_bad_parameter_strings(self):

        bad_tests = [
            "test",
            "test,",
            "test:",
            "test:,"
        ]

        for bad in bad_tests:
            assert(OperatorParameters(bad).parameters == [])
Esempio n. 9
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # database DNE
        params = OperatorParameters(
            parameter_string=
            f"database_name:bad-database,storage_path:crawler-poc/catalog_poc_data"
        )
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Errors':
            ['Job errored: Metastore database bad-database not found'],
            'Info': ['Table inferred: catalog_poc_data']
        }, 404))

        # bad path
        params = OperatorParameters(
            parameter_string=
            f"database_name:crawler-poc,storage_path:crawler-poc/bad-table")
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Errors': [
                'No keys at s3://crawler-poc/bad-table',
                'Job errored: Invalid Tables: No keys at s3://crawler-poc/bad-table'
            ]
        }, 404))

        # valid path
        params = OperatorParameters(
            parameter_string=
            f"database_name:crawler-poc,storage_path:crawler-poc/catalog_poc_data,output_path:crawler-poc/athena/"
        )
        good = op.validate(config, params).run(env, Response())

        def clean(s: List[str]):
            return list(map(lambda i: clean_uuid(clean_string(i)), s))

        infos = clean(good.formatted()["Info"])
        expect = [
            'Tableinferred:catalog_poc_data',
            'RunningAthenaquery.query_id:test_id', 'Runningjobid=test_id'
        ]
        assert (infos == expect)
Esempio n. 10
0
    def test_good_parameter_strings(self):
        good_tests = {
            "param:value": [{"param": "value"}],
            "param_test-value.with.dots/and/slash:value-test_value.with.dots/and/slash": [{"param_test-value.with.dots/and/slash": "value-test_value.with.dots/and/slash"}],
            "param_test-value=with.equals:value-test_value=with.equals": [{"param_test-value=with.equals": "value-test_value=with.equals"}],
            "param1:value,param2:value": [{"param1": "value"}, {"param2": "value"}],
            "param1:value,param1:value2": [{"param1": "value2"}],
            "testwith\,inthemiddle:result,param2:andanother\:inthemiddle": [{'inthemiddle': 'result'}, {'param2': 'andanother\\'}],
            "test with space: on both sides": [{'test with space': ' on both sides'}]
        }

        for param_string, result in good_tests.items():
            assert(OperatorParameters(param_string).to_dict() == result)
Esempio n. 11
0
    def test_parameter_validation(self):

        tests: Dict[str, List[List[str]]] = {
            "param:value": [["param"], [], ["value"], [], ["value"]],
            "param:value,other_param:stuff": [["other_param"], ["param"], ["stuff"], ["value"], ["value", "stuff"]]
        }
        for param_string, results in tests.items():
            input_param = OperatorParameters(param_string)
            op = Operator("cmd", "subcmd", {"required": results[0], "optional": results[1]}, [])
            validated = op.parameters.validate(input_param)
            assert(isinstance(validated, ValidatedParameters))
            assert(list(map(lambda v: v.value, validated.validated_parameters)) == results[2])
            assert(list(map(lambda v: v.value, validated.optional_parameters)) == results[3])
            assert(list(map(lambda v: v.value, validated.parsed_parameters)) == results[4])
Esempio n. 12
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # Database and table Exist
        params = OperatorParameters(
            parameter_string=
            "database_name:crawler-poc,table_name:catalog_poc_data")
        exists = op.validate(config, params).run(env, Response())
        assert (exists.with_status() == table.get(
            config.metastore().client.name(), 1))

        # Database DNE
        params = OperatorParameters(
            parameter_string=
            "database_name:bad-database,table_name:catalog_poc_data")
        dne = op.validate(config, params).run(env, Response())
        assert (dne.with_status() == table.get(
            config.metastore().client.name(), 2))

        # Table DNE
        params = OperatorParameters(
            parameter_string="database_name:crawler-poc,table_name:bad-table")
        dne2 = op.validate(config, params).run(env, Response())
        assert (dne2.with_status() == table.get(
            config.metastore().client.name(), 3))
Esempio n. 13
0
    def get_parameters(
        self, type: str, parameter_string: Optional[str],
        parameter_path: Optional[str], parameter_dict: Optional[dict]
    ) -> Union[Parameters, MalformedResource]:
        parameters: Union[Parameters, MalformedResource]
        if self.type_workflow(type):
            parameters = WorkflowParameters(parameter_path, parameter_dict)
        elif self.type_operator(type):
            parameters = OperatorParameters(parameter_string, parameter_path,
                                            parameter_dict)
        elif self.type_config(type):
            parameters = MalformedResource(
                message=f"Config type not supported: {type}")
        else:
            parameters = MalformedResource(
                message=f"Type not supported: {type}")

        return parameters
Esempio n. 14
0
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid job_id
        params = OperatorParameters(parameter_string=f"job_id:good_job_id")
        # TODO: consolidate these
        expect = {
            'spark': {
                'Data': [{
                    'Logs': ['<LOG_DATA>']
                }]
            },
            'athena': {
                'Data': [{
                    'ResultSetMetadata': {
                        'ColumnInfo': [{
                            'CaseSensitive': True,
                            'CatalogName': 'hive',
                            'Label': 'widget',
                            'Name': 'widget',
                            'Nullable': 'UNKNOWN',
                            'Precision': 2147483647,
                            'Scale': 0,
                            'SchemaName': '',
                            'TableName': '',
                            'Type': 'varchar'
                        }]
                    },
                    'Rows': [{
                        'Data': [{
                            'VarCharValue': 'widget'
                        }]
                    }]
                }],
                'Info': ['Job Status: SUCCEEDED']
            },
        }

        good = op.validate(config, params).run(env, Response())

        assert ((expect[config.execution().client.name()],
                 200) == good.with_status())

        # invalid job_id
        params = OperatorParameters(parameter_string="job_id:bad_job_id")
        bad = op.validate(config, params).run(env, Response())

        expect = {
            'spark': {
                'Errors': [
                    'Error from server (NotFound): pods "bad_job_id-driver" not found'
                ]
            },
            'athena': {
                'Errors': [
                    'QueryExecution bad_job_id was not found',
                    'Job errored: Invalid Job: QueryExecution bad_job_id was not found'
                ]
            }
        }

        assert (bad.with_status() == (expect[config.execution().client.name()],
                                      400))
Esempio n. 15
0
 def test_bad_from_path(self):
     params = OperatorParameters(parameter_path=from_root("/test/support/parameters/bad_params.yaml"))
     message = "Parameters do not conform to specified schema in parameters/schema.json.  Must be of form key:value"
     assert(params.invalid[0].reason == message)
Esempio n. 16
0
 def test_from_path(self):
     params = OperatorParameters(parameter_path=from_root("/test/support/parameters/good_params.yaml"))
     assert(list(map(lambda p: p.value, params.parameters)) == ["test_value", "test_value_2"])
Esempio n. 17
0
 def test_no_parameters(self):
     params = OperatorParameters()
     assert(params.invalid == [])
     assert(params.parameters == [])
Esempio n. 18
0
 def validate(self, input_parameters: OperatorParameters) -> ValidatedParameters:
     return input_parameters.validate(self.required, self.optional)