def main():
    if len(sys.argv) == 1:
        print(f"Usage: {sys.argv[0]} YYYY-mm-dd")
        sys.exit(1)

    date = sys.argv[1]

    job_bucket = os.environ["PQ_FLATTENER_GLUE_JOB_BUCKET"]
    iam_role = os.environ["PQ_FLATTENER_JOB_IAM_ROLE"]

    source_path = os.environ["PQ_FLATTENER_SOURCE_PATH"]
    dest_path = os.environ["PQ_FLATTENER_DEST_PATH"]

    job = GlueJob(
        "v1/glue_jobs/pq_flattener",
        bucket=job_bucket,
        job_role=iam_role,
        job_arguments={
            "--date": date,
            "--s3_source": source_path,
            "--s3_dest": dest_path,
        },
    )

    job.job_name = f"pq_flattener"

    # Run job on AWS Glue
    print(f'Starting job "{job.job_name}"...')

    try:
        job.run_job()
        job.wait_for_completion()
    finally:
        job.cleanup()
コード例 #2
0
def main(job_role):
    package_name = 'gluejobutils'
    to_path = f'test/glue_test/glue_py_resources/{package_name}.zip'
    zf = zipfile.ZipFile(to_path, "w")
    zf.write(os.path.join(package_name, '__init__.py'))
    zf.write(os.path.join(package_name, 'datatypes.py'))
    zf.write(os.path.join(package_name, 'dates.py'))
    zf.write(os.path.join(package_name, 's3.py'))
    zf.write(os.path.join(package_name, 'utils.py'))
    zf.write(os.path.join(package_name, 'dea_record_datetimes.py'))
    zf.write(os.path.join(package_name, 'data/data_type_conversion.json'))
    zf.close()

    g = GlueJob('test/glue_test/',
                bucket='alpha-gluejobutils',
                job_role=job_role)
    g.job_name = 'gluejobutils_unit_test'
    g.run_job()
コード例 #3
0
ファイル: test_tests.py プロジェクト: cyberhobbes/etl_manager
    def test_db_value_properties(self):
        g = GlueJob('example/glue_jobs/simple_etl_job/',
                    bucket='alpha-everyone',
                    job_role='alpha_user_isichei',
                    job_arguments={'--test_arg': 'this is a test'})

        g.job_name = 'changed_job_name'
        self.assertEqual(g.job_name, 'changed_job_name')

        g.bucket = "new-bucket"
        self.assertEqual(g.bucket, "new-bucket")
        with self.assertRaises(ValueError):
            g.bucket = "s3://new-bucket"

        g.job_role = 'alpha_new_user'
        self.assertEqual(g.job_role, 'alpha_new_user')

        g.job_arguments = {"--new_args": "something"}
        self.assertEqual(g.job_arguments["--new_args"], "something")
コード例 #4
0
    def test_db_value_properties(self):
        g = GlueJob(
            "example/glue_jobs/simple_etl_job/",
            bucket="alpha-everyone",
            job_role="alpha_user_isichei",
            job_arguments={"--test_arg": "this is a test"},
        )

        g.job_name = "changed_job_name"
        self.assertEqual(g.job_name, "changed_job_name")

        g.bucket = "new-bucket"
        self.assertEqual(g.bucket, "new-bucket")
        with self.assertRaises(ValueError):
            g.bucket = "s3://new-bucket"

        g.job_role = "alpha_new_user"
        self.assertEqual(g.job_role, "alpha_new_user")

        g.job_arguments = {"--new_args": "something"}
        self.assertEqual(g.job_arguments["--new_args"], "something")
def main(job_role):
    package_name = "gluejobutils"
    to_path = f"test/glue_test/glue_py_resources/{package_name}.zip"
    zf = zipfile.ZipFile(to_path, "w")
    zf.write(os.path.join(package_name, "__init__.py"))
    zf.write(os.path.join(package_name, "datatypes.py"))
    zf.write(os.path.join(package_name, "s3.py"))
    zf.write(os.path.join(package_name, "utils.py"))
    zf.write(os.path.join(package_name, "record_datetimes.py"))
    zf.write(os.path.join(package_name, "df_transforms.py"))
    zf.write(os.path.join(package_name, "data/data_type_conversion.json"))
    zf.close()

    g = GlueJob("test/glue_test/",
                bucket="alpha-gluejobutils",
                job_role=job_role)
    g.job_name = "gluejobutils_unit_test"
    g.run_job()

    g.wait_for_completion(True)
    if g.job_run_state == "SUCCEEDED":
        print("cleaning up job...")
        g.cleanup()
コード例 #6
0
def main(job_role):
    package_name = 'gluejobutils'
    to_path = f'test/glue_test/glue_py_resources/{package_name}.zip'
    zf = zipfile.ZipFile(to_path, "w")
    zf.write(os.path.join(package_name, '__init__.py'))
    zf.write(os.path.join(package_name, 'datatypes.py'))
    zf.write(os.path.join(package_name, 's3.py'))
    zf.write(os.path.join(package_name, 'utils.py'))
    zf.write(os.path.join(package_name, 'record_datetimes.py'))
    zf.write(os.path.join(package_name, 'df_transforms.py'))
    zf.write(os.path.join(package_name, 'data/data_type_conversion.json'))
    zf.close()

    g = GlueJob('test/glue_test/',
                bucket='alpha-gluejobutils',
                job_role=job_role)
    g.job_name = 'gluejobutils_unit_test'
    g.run_job()

    g.wait_for_completion(True)
    if g.job_run_state == 'SUCCEEDED':
        print("cleaning up job...")
        g.cleanup()
コード例 #7
0
except:
    raise Exception("You must provide a role name")

bucket = 'alpha-data-linking'

job = GlueJob(
    'match/',
    bucket=bucket,
    job_role=ROLE,
    job_arguments={
        "--test_arg": 'some_string',
        "--conf":
        'spark.jars.packages=graphframes:graphframes:0.6.0-spark2.3-s_2.11',
        '--enable-spark-ui': 'true',
        '--spark-event-logs-path':
        's3://alpha-data-linking/glue_test_delete/logsdelete',
        '--enable-continuous-cloudwatch-log': 'true'
    })

job.job_name = '1m_p_50_e_6'
print(job._job_definition())

job.allocated_capacity = 2

try:
    job.run_job()
    job.wait_for_completion()
finally:
    pass
    # job.cleanup()
コード例 #8
0
from etl_manager.etl import GlueJob
import os

ROLE = 'name_of_iam_role_for_glue_job_to_assume'

bucket = 's3_bucket_where_job_python_file_and_jar_will_go'

job = GlueJob('glue_job/', bucket=bucket, job_role=ROLE,
              job_arguments={'--enable-spark-ui': 'true',
                             '--spark-event-logs-path': 's3://path_to_logs',
                             '--enable-continuous-cloudwatch-log': 'true',
                             '--enable-metrics': ''})

job.job_name = 'my_job_name'

job.allocated_capacity = 4

try:
    job.run_job()
    job.wait_for_completion()
finally:
    job.cleanup()