コード例 #1
0
def test_lakeadmin_2_image_with_apps(kube: TestClient) -> None:
    lake_admin_job_mage_with_apps = LAKE_ADMIN_JOB
    lake_admin_job_mage_with_apps["metadata"]["generateName"]= "test-orbit-job-lake-admin-image-with-apps-"
    lake_admin_job_mage_with_apps["spec"]["tasks"]= [{
                "notebookName": "2-Image_with_apps.ipynb",
                "sourcePath": "shared/samples/notebooks/M-Admin",
                "targetPath": "shared/regression/notebooks/M-Admin",
                "params": {}
            }]

    logger.info(lake_admin_job_mage_with_apps)
    lakeadmin = OrbitJobCustomApiObject(lake_admin_job_mage_with_apps)
    lakeadmin.create(namespace="lake-admin")
    # Logic to wait till OrbitJob creates
    lakeadmin.wait_until_ready(timeout=120)
    # Logic to pass or fail the pytest
    lakeadmin.wait_until_job_completes(timeout=7200, interval=30)
    current_status = lakeadmin.get_status().get("orbitJobOperator").get("jobStatus")
    logger.info(f"current_status={current_status}")
    #Cleanup
    lakeadmin.delete()
    assert current_status == JOB_COMPLETION_STATUS
コード例 #2
0
def test_lakecreator_extractor(zip_file, kube: TestClient):
    # Extract Zip Files in Parallel
    bucketName = lake_creator_config.get("bucketName")
    extractedFolder = lake_creator_config.get("extractedFolder")
    schemas = get_schemas(bucketName, 'landing/cms/schema/')

    # for key in s3.list_objects_v2(Bucket=bucketName, Prefix=sourceFolder)['Contents']:
    file = zip_file['Key']
    schema = get_schema(schemas, file)
    s3_data_folder = os.path.join(extractedFolder,
                                  schema[0] if schema[0] else "")

    notebook_to_run = {
        "apiVersion": "orbit.aws/v1",
        "kind": "OrbitJob",
        "metadata": {
            "generateName": f"test-orbit-job-lake-creator-"
        },
        "spec": {
            "taskType":
            "jupyter",
            "compute": {
                "nodeType": "ec2",
                "container": {
                    "concurrentProcesses": 1
                },
                "podSetting": "orbit-runner-support-small"
            },
            "tasks": [{
                "notebookName": "Example-2-Extract-Files.ipynb",
                "sourcePath":
                "/home/jovyan/shared/samples/notebooks/A-LakeCreator",
                "targetPath":
                "/home/jovyan/shared/regression/notebooks/A-LakeCreator",
                "params": {
                    "bucketName": bucketName,
                    "zipFileName": file,
                    "targetFolder": s3_data_folder,
                    "use_subdirs": "False" if schema[0] else "True"
                }
            }]
        }
    }

    logger.info(notebook_to_run)
    lakecreator = OrbitJobCustomApiObject(notebook_to_run)
    lakecreator.create(namespace="lake-creator")
    # Logic to wait till OrbitJob creates
    lakecreator.wait_until_ready(timeout=60)
    # Logic to pass or fail the pytest
    lakecreator.wait_until_job_completes(timeout=600)
    current_status = lakecreator.get_status().get("orbitJobOperator").get(
        "jobStatus")
    logger.info(f"current_status={current_status}")
    # Cleanup
    lakecreator.delete()
    assert current_status == JOB_COMPLETION_STATUS
コード例 #3
0
def test_lakecreator_lf(kube: TestClient):
    notebook_to_run = {
        "apiVersion": "orbit.aws/v1",
        "kind": "OrbitJob",
        "metadata": {
            "generateName": f"test-orbit-job-lake-creator-"
        },
        "spec": {
            "taskType":
            "jupyter",
            "compute": {
                "nodeType": "ec2",
                "container": {
                    "concurrentProcesses": 1
                },
                "podSetting": "orbit-runner-support-large",
            },
            "tasks": [{
                "notebookName":
                "Example-4-LakeFormation-Secured-DB.ipynb",
                "sourcePath":
                "/home/jovyan/shared/samples/notebooks/A-LakeCreator",
                "targetPath":
                "/home/jovyan/shared/regression/notebooks/A-LakeCreator",
            }]
        }
    }

    logger.info(f"notebook_to_run={notebook_to_run}")

    lakecreator = OrbitJobCustomApiObject(notebook_to_run)
    lakecreator.create(namespace="lake-creator")
    # Logic to wait till OrbitJob creates
    lakecreator.wait_until_ready(timeout=60)
    # Logic to pass or fail the pytest
    lakecreator.wait_until_job_completes(timeout=1800)
    current_status = lakecreator.get_status().get("orbitJobOperator").get(
        "jobStatus")
    logger.info(f"current_status={current_status}")
    # Cleanup
    lakecreator.delete()
    assert current_status == JOB_COMPLETION_STATUS
コード例 #4
0
def test_lakecreator_glue_table_creator(datafile, kube: TestClient):
    region = workspace.get("region")
    bucket_name = lake_creator_config.get("bucketName")
    database_name = lake_creator_config.get("database_name")
    schemas = get_schemas(bucket_name, 'landing/cms/schema/')

    #file = datafile
    p = Path(datafile).parent
    print(f"Path={str(p)}")
    schema = get_schema(schemas, datafile)
    from datetime import datetime
    datetimestring = datetime.now().strftime("%m%d%Y%H%M%S%f")

    notebook_to_run = {
        "apiVersion": "orbit.aws/v1",
        "kind": "OrbitJob",
        "metadata": {
            "generateName": f"test-orbit-job-lake-creator-"
        },
        "spec": {
            "taskType":
            "jupyter",
            "compute": {
                "nodeType": "ec2",
                "container": {
                    "concurrentProcesses": 1
                },
                "podSetting": "orbit-runner-support-small",
                "env": [{
                    'name': 'AWS_ORBIT_S3_BUCKET',
                    'value': bucket_name
                }]
            },
            "tasks": [{
                "notebookName": "Example-3-Load-Database-Athena.ipynb",
                "sourcePath":
                "/home/jovyan/shared/samples/notebooks/A-LakeCreator",
                "targetPath":
                "/home/jovyan/shared/regression/notebooks/A-LakeCreator",
                "targetPrefix": f"unsecured-{datetimestring}",
                "params": {
                    "source_bucket_name": bucket_name,
                    "target_bucket_name": bucket_name,
                    "database_name": database_name,
                    "schema_dir": "landing/cms/schema",
                    "file_path": str(p),
                    "region": region
                }
            }]
        }
    }

    print(notebook_to_run)

    lakecreator = OrbitJobCustomApiObject(notebook_to_run)
    lakecreator.create(namespace="lake-creator")
    # Logic to wait till OrbitJob creates
    lakecreator.wait_until_ready(timeout=60)
    # Logic to pass or fail the pytest
    lakecreator.wait_until_job_completes(timeout=1200)
    current_status = lakecreator.get_status().get("orbitJobOperator").get(
        "jobStatus")
    logger.info(f"current_status={current_status}")
    # Cleanup
    lakecreator.delete()
    assert current_status == JOB_COMPLETION_STATUS
コード例 #5
0
def test_lakeuser_notebooks_xfail(kube: TestClient) -> None:
    notebook_to_run={
        "name": "Example-90-Failure-Behavior.ipynb",
        "folder": "B-DataAnalyst"
    }
    podsetting_name = "orbit-runner-support-small"
    body = {
        "apiVersion": "orbit.aws/v1",
        "kind": "OrbitJob",
        "metadata": {
            "generateName": "test-orbit-job-lake-user-"
        },
        "spec": {
            "taskType": "jupyter",
            "compute": {
                 "nodeType": "ec2",
                 "container": {
                     "concurrentProcesses": 1
                 },
                 "podSetting": podsetting_name
            },
            "tasks": [{
                "notebookName": notebook_to_run['name'],
                "sourcePath": f"shared/samples/notebooks/{notebook_to_run['folder']}",
                "targetPath": f"shared/regression/notebooks/{notebook_to_run['folder']}",
                "params": {}
            }]
        }
    }

    logger.info(body)
    lakeuser = OrbitJobCustomApiObject(body)
    lakeuser.create(namespace="lake-user")
    # Logic to wait till OrbitJob creates
    lakeuser.wait_until_ready(timeout=120)
    # Logic to pass or fail the pytest
    lakeuser.wait_until_job_completes(timeout=1200)
    current_status = lakeuser.get_status().get("orbitJobOperator").get("jobStatus")
    logger.info(f"current_status={current_status}")
    #Cleanup
    lakeuser.delete()
    assert current_status == JOB_FAILED_STATUS
コード例 #6
0
def test_lakeuser_notebooks(notebook_to_run, kube: TestClient) -> None:
    logger.info(f"notebook_to_run={notebook_to_run}")
    notebook_file_name = notebook_to_run['name'].split(".")[0]
    podsetting_name = "orbit-runner-support-xlarge" if notebook_file_name in lake_creator_list_of_files['sagemaker_notebooks_list'] else "orbit-runner-support-large"
    body = {
        "apiVersion": "orbit.aws/v1",
        "kind": "OrbitJob",
        "metadata": {
            "generateName": "test-orbit-job-lake-user-"
        },
        "spec": {
            "taskType": "jupyter",
            "compute": {
                 "nodeType": "ec2",
                 "container": {
                     "concurrentProcesses": 1
                 },
                 "podSetting": podsetting_name
            },
            "tasks": [{
                "notebookName": notebook_to_run['name'],
                "sourcePath": f"shared/samples/notebooks/{notebook_to_run['folder']}",
                "targetPath": f"shared/regression/notebooks/{notebook_to_run['folder']}",
                "params": {}
            }]
        }
    }

    logger.info(body)
    lakeuser = OrbitJobCustomApiObject(body)
    lakeuser.create(namespace="lake-user")
    # Logic to wait till OrbitJob creates
    lakeuser.wait_until_ready(timeout=120)
    # Logic to pass or fail the pytest
    lakeuser.wait_until_job_completes(timeout=7200)
    current_status = lakeuser.get_status().get("orbitJobOperator").get("jobStatus")
    logger.info(f"current_status={current_status}")
    #Cleanup
    lakeuser.delete()
    assert current_status == JOB_COMPLETION_STATUS