def main():
    if len(sys.argv) == 1:
        print(f"Usage: {sys.argv[0]} YYYY-mm-dd")
        sys.exit(1)

    date = sys.argv[1]

    job_bucket = os.environ["PQ_FLATTENER_GLUE_JOB_BUCKET"]
    iam_role = os.environ["PQ_FLATTENER_JOB_IAM_ROLE"]

    source_path = os.environ["PQ_FLATTENER_SOURCE_PATH"]
    dest_path = os.environ["PQ_FLATTENER_DEST_PATH"]

    job = GlueJob(
        "v1/glue_jobs/pq_flattener",
        bucket=job_bucket,
        job_role=iam_role,
        job_arguments={
            "--date": date,
            "--s3_source": source_path,
            "--s3_dest": dest_path,
        },
    )

    job.job_name = f"pq_flattener"

    # Run job on AWS Glue
    print(f'Starting job "{job.job_name}"...')

    try:
        job.run_job()
        job.wait_for_completion()
    finally:
        job.cleanup()
Esempio n. 2
0
def main(job_role):
    package_name = 'gluejobutils'
    to_path = f'test/glue_test/glue_py_resources/{package_name}.zip'
    zf = zipfile.ZipFile(to_path, "w")
    zf.write(os.path.join(package_name, '__init__.py'))
    zf.write(os.path.join(package_name, 'datatypes.py'))
    zf.write(os.path.join(package_name, 'dates.py'))
    zf.write(os.path.join(package_name, 's3.py'))
    zf.write(os.path.join(package_name, 'utils.py'))
    zf.write(os.path.join(package_name, 'dea_record_datetimes.py'))
    zf.write(os.path.join(package_name, 'data/data_type_conversion.json'))
    zf.close()

    g = GlueJob('test/glue_test/',
                bucket='alpha-gluejobutils',
                job_role=job_role)
    g.job_name = 'gluejobutils_unit_test'
    g.run_job()
Esempio n. 3
0
def main():
    iam_role = os.environ["IAM_ROLE"]
    github_tag = os.environ["GITHUB_TAG"]
    snapshot_date = os.environ["SNAPSHOT_DATE"]

    # Get job parameters for specific glue job
    job_args = {"--github_tag": github_tag, "--snapshot_date": snapshot_date}
    job = GlueJob(f"glue_jobs/example_job/",
                  bucket=job_bucket,
                  job_role=iam_role,
                  job_arguments=job_args)

    print(f'Starting job "{job.job_name}"...')
    job.run_job()
    job.wait_for_completion(verbose=True)

    if job.job_run_state == 'SUCCEEDED':
        print('Job successful - cleaning up')
        job.cleanup()
def main(job_role):
    package_name = "gluejobutils"
    to_path = f"test/glue_test/glue_py_resources/{package_name}.zip"
    zf = zipfile.ZipFile(to_path, "w")
    zf.write(os.path.join(package_name, "__init__.py"))
    zf.write(os.path.join(package_name, "datatypes.py"))
    zf.write(os.path.join(package_name, "s3.py"))
    zf.write(os.path.join(package_name, "utils.py"))
    zf.write(os.path.join(package_name, "record_datetimes.py"))
    zf.write(os.path.join(package_name, "df_transforms.py"))
    zf.write(os.path.join(package_name, "data/data_type_conversion.json"))
    zf.close()

    g = GlueJob("test/glue_test/",
                bucket="alpha-gluejobutils",
                job_role=job_role)
    g.job_name = "gluejobutils_unit_test"
    g.run_job()

    g.wait_for_completion(True)
    if g.job_run_state == "SUCCEEDED":
        print("cleaning up job...")
        g.cleanup()
Esempio n. 5
0
def main(job_role):
    package_name = 'gluejobutils'
    to_path = f'test/glue_test/glue_py_resources/{package_name}.zip'
    zf = zipfile.ZipFile(to_path, "w")
    zf.write(os.path.join(package_name, '__init__.py'))
    zf.write(os.path.join(package_name, 'datatypes.py'))
    zf.write(os.path.join(package_name, 's3.py'))
    zf.write(os.path.join(package_name, 'utils.py'))
    zf.write(os.path.join(package_name, 'record_datetimes.py'))
    zf.write(os.path.join(package_name, 'df_transforms.py'))
    zf.write(os.path.join(package_name, 'data/data_type_conversion.json'))
    zf.close()

    g = GlueJob('test/glue_test/',
                bucket='alpha-gluejobutils',
                job_role=job_role)
    g.job_name = 'gluejobutils_unit_test'
    g.run_job()

    g.wait_for_completion(True)
    if g.job_run_state == 'SUCCEEDED':
        print("cleaning up job...")
        g.cleanup()
Esempio n. 6
0
except:
    raise Exception("You must provide a role name")

bucket = 'alpha-data-linking'

job = GlueJob(
    'match/',
    bucket=bucket,
    job_role=ROLE,
    job_arguments={
        "--test_arg": 'some_string',
        "--conf":
        'spark.jars.packages=graphframes:graphframes:0.6.0-spark2.3-s_2.11',
        '--enable-spark-ui': 'true',
        '--spark-event-logs-path':
        's3://alpha-data-linking/glue_test_delete/logsdelete',
        '--enable-continuous-cloudwatch-log': 'true'
    })

job.job_name = '1m_p_50_e_6'
print(job._job_definition())

job.allocated_capacity = 2

try:
    job.run_job()
    job.wait_for_completion()
finally:
    pass
    # job.cleanup()