def tests(env: MasonEnvironment, config: Config, op: Operator): # valid delete params = OperatorParameters( parameter_string= f"table_name:good_table,database_name:good_database") good = op.validate(config, params).run(env, Response()) assert (good.with_status() == ({ 'Info': ['Table good_table successfully deleted.'] }, 200)) # database DNE params = OperatorParameters( parameter_string=f"table_name:bad_table,database_name:bad_database" ) bad = op.validate(config, params).run(env, Response()) assert (bad.with_status() == ({ 'Errors': ['Database bad_database not found.'] }, 400)) # table DNE params = OperatorParameters( parameter_string=f"table_name:bad_table,database_name:good_database" ) bad = op.validate(config, params).run(env, Response()) assert (bad.with_status() == ({ 'Errors': ['Table bad_table not found.'] }, 400))
def tests(env: MasonEnvironment, config: Config, op: Operator): # valid refresh params = OperatorParameters( parameter_string= "table_name:catalog_poc_data,database_name:crawler-poc") refresh = op.validate(config, params).run(env, Response()) assert (refresh.with_status() == table.refresh(False)) # already refreshing params = OperatorParameters( parameter_string= "table_name:catalog_poc_data_refreshing,database_name:crawler-poc") refreshing = op.validate(config, params).run(env, Response()) assert (refreshing.with_status() == table.refresh(True))
def parse_param_dict( self, param_dict: dict ) -> Tuple[List[WorkflowParameter], List[InvalidParameter], Optional[str], Optional[str], bool]: valid: List[WorkflowParameter] = [] invalid: List[InvalidParameter] = [] schedule: Optional[str] = None schedule_name: Optional[str] = None strict: bool = True if isinstance(param_dict, dict): validated = object_from_json_schema( param_dict, from_root("/parameters/workflow_schema.json"), dict) # TODO: Use typistry for this # parameteters = validate_dict(TypedDict(param_dict, "workflow_parameters")) if isinstance( validated, dict ): #can now be confident it is matches schema definition schedule = validated.get("schedule") schedule_name = validated.get("schedule_name") strict_mode: Optional[Any] = validated.get("strict") if not isinstance(strict_mode, bool): strict = True else: strict = strict_mode for key, value in validated.items(): if key != "schedule" and key != "schedule_name" and key != "strict": config_id: str = str(value["config_id"]) parameters: Dict[str, Dict[str, Any]] = value["parameters"] valid_step, invalid_step = parse_dict( parameters, from_root("/parameters/schema.json")) ip = OperatorParameters() ip.parameters = valid_step ip.invalid = invalid_step valid.append(WorkflowParameter(key, config_id, ip)) else: invalid.append( InvalidParameter( f"Invalid parameters: {validated.reason}")) else: invalid.append( InvalidParameter( f"Parameters do not conform to specified schema in parameters/workflow_schema.json. Must be of form step_id: key:value. {param_dict}" )) return valid, invalid, schedule, schedule_name, strict
def tests(env: MasonEnvironment, config: Config, op: Operator): # Database Exists params = OperatorParameters( parameter_string="database_name:crawler-poc") valid = op.validate(config, params) exists = valid.run(env, Response()) assert exists.with_status() == table.index( config.metastore().client.name()) # Database DNE params = OperatorParameters( parameter_string="database_name:bad-database") dne = op.validate(config, params).run(env, Response()) assert (dne.with_status() == table.index( config.metastore().client.name(), False))
def tests(env: MasonEnvironment, config: Config, op: Operator): # valid delete params = OperatorParameters( parameter_string=f"schedule_name:good_schedule") good = op.validate(config, params).run(env, Response()) assert (good.with_status() == ({ 'Info': ['Schedule good_schedule successfully deleted.'] }, 200)) # dne params = OperatorParameters( parameter_string=f"schedule_name:bad_schedule") good = op.validate(config, params).run(env, Response()) assert (good.with_status() == ({ 'Errors': ["Crawler entry with name bad_schedule does not exist"] }, 400))
def tests(env: MasonEnvironment, config: Config, op: Operator): params = OperatorParameters( parameter_string= f"database_name:mason-sample-data,table_name:tests/in/csv/,format:boogo,output_path:mason-sample-data/tests/out/csv/" ) good = op.validate(config, params).run(env, Response()) invalid_job = good.object assert (isinstance(invalid_job, InvalidJob)) params = OperatorParameters( parameter_string= f"database_name:mason-sample-data,table_name:tests/in/csv/,format:csv,output_path:good_output_path" ) good = op.validate(config, params).run(env, Response()) executed_job = good.object assert (isinstance(executed_job, ExecutedJob))
def tests(env: MasonEnvironment, config: Config, op: Operator): # valid query query = "SELECT * from $table limit 3" output_path = from_root("/.tmp/") params = OperatorParameters( parameter_string= f"query_string:{query},database_name:good_database,table_name:good_table,output_path:{output_path}" ) result = op.validate(config, params).run(env, Response()) exp = { "1": [ 'Running Query "SELECT * from $table limit 3"', 'Running Athena query. query_id: test', 'Running job id=test' ], "4": [ f'Table succesfully formatted as parquet and exported to {output_path}' ] } expect = {'Info': exp[config.id]} assert (result.with_status() == (expect, 200)) # bad permissions query = "SELECT * from $table limit 3" params = OperatorParameters( parameter_string= f"query_string:{query},database_name:access_denied,table_name:good_table,output_path:{output_path}" ) result = op.validate(config, params).run(env, Response()) exp_2 = { "1": ({ 'Errors': [ 'Job errored: Access denied for credentials. Ensure associated user or role has permission to CreateNamedQuery on athena' ], 'Info': ['Running Query "SELECT * from $table limit 3"'] }, 403), "4": ({ 'Info': [ f'Table succesfully formatted as parquet and exported to {output_path}' ] }, 200) } assert (result.with_status() == exp_2[config.id])
def test_bad_parameter_strings(self): bad_tests = [ "test", "test,", "test:", "test:," ] for bad in bad_tests: assert(OperatorParameters(bad).parameters == [])
def tests(env: MasonEnvironment, config: Config, op: Operator): # database DNE params = OperatorParameters( parameter_string= f"database_name:bad-database,storage_path:crawler-poc/catalog_poc_data" ) good = op.validate(config, params).run(env, Response()) assert (good.with_status() == ({ 'Errors': ['Job errored: Metastore database bad-database not found'], 'Info': ['Table inferred: catalog_poc_data'] }, 404)) # bad path params = OperatorParameters( parameter_string= f"database_name:crawler-poc,storage_path:crawler-poc/bad-table") good = op.validate(config, params).run(env, Response()) assert (good.with_status() == ({ 'Errors': [ 'No keys at s3://crawler-poc/bad-table', 'Job errored: Invalid Tables: No keys at s3://crawler-poc/bad-table' ] }, 404)) # valid path params = OperatorParameters( parameter_string= f"database_name:crawler-poc,storage_path:crawler-poc/catalog_poc_data,output_path:crawler-poc/athena/" ) good = op.validate(config, params).run(env, Response()) def clean(s: List[str]): return list(map(lambda i: clean_uuid(clean_string(i)), s)) infos = clean(good.formatted()["Info"]) expect = [ 'Tableinferred:catalog_poc_data', 'RunningAthenaquery.query_id:test_id', 'Runningjobid=test_id' ] assert (infos == expect)
def test_good_parameter_strings(self): good_tests = { "param:value": [{"param": "value"}], "param_test-value.with.dots/and/slash:value-test_value.with.dots/and/slash": [{"param_test-value.with.dots/and/slash": "value-test_value.with.dots/and/slash"}], "param_test-value=with.equals:value-test_value=with.equals": [{"param_test-value=with.equals": "value-test_value=with.equals"}], "param1:value,param2:value": [{"param1": "value"}, {"param2": "value"}], "param1:value,param1:value2": [{"param1": "value2"}], "testwith\,inthemiddle:result,param2:andanother\:inthemiddle": [{'inthemiddle': 'result'}, {'param2': 'andanother\\'}], "test with space: on both sides": [{'test with space': ' on both sides'}] } for param_string, result in good_tests.items(): assert(OperatorParameters(param_string).to_dict() == result)
def test_parameter_validation(self): tests: Dict[str, List[List[str]]] = { "param:value": [["param"], [], ["value"], [], ["value"]], "param:value,other_param:stuff": [["other_param"], ["param"], ["stuff"], ["value"], ["value", "stuff"]] } for param_string, results in tests.items(): input_param = OperatorParameters(param_string) op = Operator("cmd", "subcmd", {"required": results[0], "optional": results[1]}, []) validated = op.parameters.validate(input_param) assert(isinstance(validated, ValidatedParameters)) assert(list(map(lambda v: v.value, validated.validated_parameters)) == results[2]) assert(list(map(lambda v: v.value, validated.optional_parameters)) == results[3]) assert(list(map(lambda v: v.value, validated.parsed_parameters)) == results[4])
def tests(env: MasonEnvironment, config: Config, op: Operator): # Database and table Exist params = OperatorParameters( parameter_string= "database_name:crawler-poc,table_name:catalog_poc_data") exists = op.validate(config, params).run(env, Response()) assert (exists.with_status() == table.get( config.metastore().client.name(), 1)) # Database DNE params = OperatorParameters( parameter_string= "database_name:bad-database,table_name:catalog_poc_data") dne = op.validate(config, params).run(env, Response()) assert (dne.with_status() == table.get( config.metastore().client.name(), 2)) # Table DNE params = OperatorParameters( parameter_string="database_name:crawler-poc,table_name:bad-table") dne2 = op.validate(config, params).run(env, Response()) assert (dne2.with_status() == table.get( config.metastore().client.name(), 3))
def get_parameters( self, type: str, parameter_string: Optional[str], parameter_path: Optional[str], parameter_dict: Optional[dict] ) -> Union[Parameters, MalformedResource]: parameters: Union[Parameters, MalformedResource] if self.type_workflow(type): parameters = WorkflowParameters(parameter_path, parameter_dict) elif self.type_operator(type): parameters = OperatorParameters(parameter_string, parameter_path, parameter_dict) elif self.type_config(type): parameters = MalformedResource( message=f"Config type not supported: {type}") else: parameters = MalformedResource( message=f"Type not supported: {type}") return parameters
def tests(env: MasonEnvironment, config: Config, op: Operator): # valid job_id params = OperatorParameters(parameter_string=f"job_id:good_job_id") # TODO: consolidate these expect = { 'spark': { 'Data': [{ 'Logs': ['<LOG_DATA>'] }] }, 'athena': { 'Data': [{ 'ResultSetMetadata': { 'ColumnInfo': [{ 'CaseSensitive': True, 'CatalogName': 'hive', 'Label': 'widget', 'Name': 'widget', 'Nullable': 'UNKNOWN', 'Precision': 2147483647, 'Scale': 0, 'SchemaName': '', 'TableName': '', 'Type': 'varchar' }] }, 'Rows': [{ 'Data': [{ 'VarCharValue': 'widget' }] }] }], 'Info': ['Job Status: SUCCEEDED'] }, } good = op.validate(config, params).run(env, Response()) assert ((expect[config.execution().client.name()], 200) == good.with_status()) # invalid job_id params = OperatorParameters(parameter_string="job_id:bad_job_id") bad = op.validate(config, params).run(env, Response()) expect = { 'spark': { 'Errors': [ 'Error from server (NotFound): pods "bad_job_id-driver" not found' ] }, 'athena': { 'Errors': [ 'QueryExecution bad_job_id was not found', 'Job errored: Invalid Job: QueryExecution bad_job_id was not found' ] } } assert (bad.with_status() == (expect[config.execution().client.name()], 400))
def test_bad_from_path(self): params = OperatorParameters(parameter_path=from_root("/test/support/parameters/bad_params.yaml")) message = "Parameters do not conform to specified schema in parameters/schema.json. Must be of form key:value" assert(params.invalid[0].reason == message)
def test_from_path(self): params = OperatorParameters(parameter_path=from_root("/test/support/parameters/good_params.yaml")) assert(list(map(lambda p: p.value, params.parameters)) == ["test_value", "test_value_2"])
def test_no_parameters(self): params = OperatorParameters() assert(params.invalid == []) assert(params.parameters == [])
def validate(self, input_parameters: OperatorParameters) -> ValidatedParameters: return input_parameters.validate(self.required, self.optional)