Beispiel #1
0
def test_get():
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # Database and table Exist
        params = OperatorParameters(
            parameter_string=
            "database_name:crawler-poc,table_name:catalog_poc_data")
        exists = op.validate(config, params).run(env, Response())
        assert (exists.with_status() == table.get(
            config.metastore().client.name(), 1))

        # Database DNE
        params = OperatorParameters(
            parameter_string=
            "database_name:bad-database,table_name:catalog_poc_data")
        dne = op.validate(config, params).run(env, Response())
        assert (dne.with_status() == table.get(
            config.metastore().client.name(), 2))

        # Table DNE
        params = OperatorParameters(
            parameter_string="database_name:crawler-poc,table_name:bad-table")
        dne2 = op.validate(config, params).run(env, Response())
        assert (dne2.with_status() == table.get(
            config.metastore().client.name(), 3))

    run_tests("table", "get", True, "fatal", ["1", "2"], tests)
Beispiel #2
0
def test_delete():
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid delete
        params = OperatorParameters(
            parameter_string=
            f"table_name:good_table,database_name:good_database")
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Info': ['Table good_table successfully deleted.']
        }, 200))

        # database DNE
        params = OperatorParameters(
            parameter_string=f"table_name:bad_table,database_name:bad_database"
        )
        bad = op.validate(config, params).run(env, Response())
        assert (bad.with_status() == ({
            'Errors': ['Database bad_database not found.']
        }, 400))

        # table DNE
        params = OperatorParameters(
            parameter_string=f"table_name:bad_table,database_name:good_database"
        )
        bad = op.validate(config, params).run(env, Response())
        assert (bad.with_status() == ({
            'Errors': ['Table bad_table not found.']
        }, 400))

    run_tests("table", "delete", True, "fatal", ["1"], tests)
Beispiel #3
0
def test_post():
    def tests(env: MasonEnvironment, config: Config, wf: Workflow):
        # DNE
        params = WorkflowParameters(parameter_path=from_root(
            "/test/support/parameters/table_infer_parameters_1.yaml"))
        dne = wf.validate(env, config, params).run(env, Response())
        assert (dne.with_status() == expects.post(False))

        # Exists
        params = WorkflowParameters(parameter_path=from_root(
            "/test/support/parameters/table_infer_parameters_2.yaml"))
        exists = wf.validate(env, config, params).run(env, Response())
        assert (exists.with_status() == expects.post(True))

        # API
        response, status = run(
            "workflow",
            wf.namespace,
            wf.command,
            param_file=from_root(
                "/test/support/parameters/table_infer_parameters_1.yaml"),
            config_id=config.id,
            env=env,
            log_level="fatal")
        assert ((response, status) == expects.post(False))

    run_tests("table", "infer", True, "fatal", ["1"], tests, workflow=True)
Beispiel #4
0
def test_query():
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid query
        query = "SELECT * from $table limit 3"
        output_path = from_root("/.tmp/")
        params = OperatorParameters(
            parameter_string=
            f"query_string:{query},database_name:good_database,table_name:good_table,output_path:{output_path}"
        )
        result = op.validate(config, params).run(env, Response())
        exp = {
            "1": [
                'Running Query "SELECT * from $table limit 3"',
                'Running Athena query.  query_id: test', 'Running job id=test'
            ],
            "4": [
                f'Table succesfully formatted as parquet and exported to {output_path}'
            ]
        }

        expect = {'Info': exp[config.id]}
        assert (result.with_status() == (expect, 200))

        # bad permissions
        query = "SELECT * from $table limit 3"
        params = OperatorParameters(
            parameter_string=
            f"query_string:{query},database_name:access_denied,table_name:good_table,output_path:{output_path}"
        )
        result = op.validate(config, params).run(env, Response())
        exp_2 = {
            "1": ({
                'Errors': [
                    'Job errored: Access denied for credentials.  Ensure associated user or role has permission to CreateNamedQuery on athena'
                ],
                'Info': ['Running Query "SELECT * from $table limit 3"']
            }, 403),
            "4": ({
                'Info': [
                    f'Table succesfully formatted as parquet and exported to {output_path}'
                ]
            }, 200)
        }

        assert (result.with_status() == exp_2[config.id])

    run_tests("table", "query", True, "fatal", ["1", "4"], tests)

    tmp_folder = from_root("/.tmp/")
    if path.exists(tmp_folder):
        shutil.rmtree(tmp_folder)
Beispiel #5
0
def test_refresh():
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid refresh
        params = OperatorParameters(
            parameter_string=
            "table_name:catalog_poc_data,database_name:crawler-poc")
        refresh = op.validate(config, params).run(env, Response())
        assert (refresh.with_status() == table.refresh(False))

        # already refreshing
        params = OperatorParameters(
            parameter_string=
            "table_name:catalog_poc_data_refreshing,database_name:crawler-poc")
        refreshing = op.validate(config, params).run(env, Response())
        assert (refreshing.with_status() == table.refresh(True))

    run_tests("table", "refresh", True, "fatal", ["1"], tests)
Beispiel #6
0
def test_infer():
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # database DNE
        params = OperatorParameters(
            parameter_string=
            f"database_name:bad-database,storage_path:crawler-poc/catalog_poc_data"
        )
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Errors':
            ['Job errored: Metastore database bad-database not found'],
            'Info': ['Table inferred: catalog_poc_data']
        }, 404))

        # bad path
        params = OperatorParameters(
            parameter_string=
            f"database_name:crawler-poc,storage_path:crawler-poc/bad-table")
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Errors': [
                'No keys at s3://crawler-poc/bad-table',
                'Job errored: Invalid Tables: No keys at s3://crawler-poc/bad-table'
            ]
        }, 404))

        # valid path
        params = OperatorParameters(
            parameter_string=
            f"database_name:crawler-poc,storage_path:crawler-poc/catalog_poc_data,output_path:crawler-poc/athena/"
        )
        good = op.validate(config, params).run(env, Response())

        def clean(s: List[str]):
            return list(map(lambda i: clean_uuid(clean_string(i)), s))

        infos = clean(good.formatted()["Info"])
        expect = [
            'Tableinferred:catalog_poc_data',
            'RunningAthenaquery.query_id:test_id', 'Runningjobid=test_id'
        ]
        assert (infos == expect)

    run_tests("table", "infer", True, "fatal", ["3"], tests)
Beispiel #7
0
def test_delete():
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid delete
        params = OperatorParameters(
            parameter_string=f"schedule_name:good_schedule")
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Info': ['Schedule good_schedule successfully deleted.']
        }, 200))

        # dne
        params = OperatorParameters(
            parameter_string=f"schedule_name:bad_schedule")
        good = op.validate(config, params).run(env, Response())
        assert (good.with_status() == ({
            'Errors': ["Crawler entry with name bad_schedule does not exist"]
        }, 400))

    run_tests("schedule", "delete", True, "fatal", ["1"], tests)
Beispiel #8
0
def test_index():
    def tests(env: MasonEnvironment, config: Config, op: Operator):

        # Database Exists
        params = OperatorParameters(
            parameter_string="database_name:crawler-poc")
        valid = op.validate(config, params)
        exists = valid.run(env, Response())
        assert exists.with_status() == table.index(
            config.metastore().client.name())

        # Database DNE
        params = OperatorParameters(
            parameter_string="database_name:bad-database")
        dne = op.validate(config, params).run(env, Response())
        assert (dne.with_status() == table.index(
            config.metastore().client.name(), False))

    run_tests("table", "list", True, "fatal", ["1", "2"], tests)
Beispiel #9
0
def test_format():

    load_dotenv(from_root("/../.env"), override=True)

    def tests(env: MasonEnvironment, config: Config, op: Operator):
        params = OperatorParameters(
            parameter_string=
            f"database_name:mason-sample-data,table_name:tests/in/csv/,format:boogo,output_path:mason-sample-data/tests/out/csv/"
        )
        good = op.validate(config, params).run(env, Response())
        invalid_job = good.object
        assert (isinstance(invalid_job, InvalidJob))

        params = OperatorParameters(
            parameter_string=
            f"database_name:mason-sample-data,table_name:tests/in/csv/,format:csv,output_path:good_output_path"
        )
        good = op.validate(config, params).run(env, Response())
        executed_job = good.object
        assert (isinstance(executed_job, ExecutedJob))

    run_tests("table", "format", True, "fatal", ["4"], tests)
Beispiel #10
0
def test_get():
    def tests(env: MasonEnvironment, config: Config, op: Operator):
        # valid job_id
        params = OperatorParameters(parameter_string=f"job_id:good_job_id")
        # TODO: consolidate these
        expect = {
            'spark': {
                'Data': [{
                    'Logs': ['<LOG_DATA>']
                }]
            },
            'athena': {
                'Data': [{
                    'ResultSetMetadata': {
                        'ColumnInfo': [{
                            'CaseSensitive': True,
                            'CatalogName': 'hive',
                            'Label': 'widget',
                            'Name': 'widget',
                            'Nullable': 'UNKNOWN',
                            'Precision': 2147483647,
                            'Scale': 0,
                            'SchemaName': '',
                            'TableName': '',
                            'Type': 'varchar'
                        }]
                    },
                    'Rows': [{
                        'Data': [{
                            'VarCharValue': 'widget'
                        }]
                    }]
                }],
                'Info': ['Job Status: SUCCEEDED']
            },
        }

        good = op.validate(config, params).run(env, Response())

        assert ((expect[config.execution().client.name()],
                 200) == good.with_status())

        # invalid job_id
        params = OperatorParameters(parameter_string="job_id:bad_job_id")
        bad = op.validate(config, params).run(env, Response())

        expect = {
            'spark': {
                'Errors': [
                    'Error from server (NotFound): pods "bad_job_id-driver" not found'
                ]
            },
            'athena': {
                'Errors': [
                    'QueryExecution bad_job_id was not found',
                    'Job errored: Invalid Job: QueryExecution bad_job_id was not found'
                ]
            }
        }

        assert (bad.with_status() == (expect[config.execution().client.name()],
                                      400))

    run_tests("job", "get", True, "fatal", ["1", "2"], tests)