def main(): if len(sys.argv) == 1: print(f"Usage: {sys.argv[0]} YYYY-mm-dd") sys.exit(1) date = sys.argv[1] job_bucket = os.environ["PQ_FLATTENER_GLUE_JOB_BUCKET"] iam_role = os.environ["PQ_FLATTENER_JOB_IAM_ROLE"] source_path = os.environ["PQ_FLATTENER_SOURCE_PATH"] dest_path = os.environ["PQ_FLATTENER_DEST_PATH"] job = GlueJob( "v1/glue_jobs/pq_flattener", bucket=job_bucket, job_role=iam_role, job_arguments={ "--date": date, "--s3_source": source_path, "--s3_dest": dest_path, }, ) job.job_name = f"pq_flattener" # Run job on AWS Glue print(f'Starting job "{job.job_name}"...') try: job.run_job() job.wait_for_completion() finally: job.cleanup()
def main(job_role): package_name = 'gluejobutils' to_path = f'test/glue_test/glue_py_resources/{package_name}.zip' zf = zipfile.ZipFile(to_path, "w") zf.write(os.path.join(package_name, '__init__.py')) zf.write(os.path.join(package_name, 'datatypes.py')) zf.write(os.path.join(package_name, 'dates.py')) zf.write(os.path.join(package_name, 's3.py')) zf.write(os.path.join(package_name, 'utils.py')) zf.write(os.path.join(package_name, 'dea_record_datetimes.py')) zf.write(os.path.join(package_name, 'data/data_type_conversion.json')) zf.close() g = GlueJob('test/glue_test/', bucket='alpha-gluejobutils', job_role=job_role) g.job_name = 'gluejobutils_unit_test' g.run_job()
def main(): iam_role = os.environ["IAM_ROLE"] github_tag = os.environ["GITHUB_TAG"] snapshot_date = os.environ["SNAPSHOT_DATE"] # Get job parameters for specific glue job job_args = {"--github_tag": github_tag, "--snapshot_date": snapshot_date} job = GlueJob(f"glue_jobs/example_job/", bucket=job_bucket, job_role=iam_role, job_arguments=job_args) print(f'Starting job "{job.job_name}"...') job.run_job() job.wait_for_completion(verbose=True) if job.job_run_state == 'SUCCEEDED': print('Job successful - cleaning up') job.cleanup()
def main(job_role): package_name = "gluejobutils" to_path = f"test/glue_test/glue_py_resources/{package_name}.zip" zf = zipfile.ZipFile(to_path, "w") zf.write(os.path.join(package_name, "__init__.py")) zf.write(os.path.join(package_name, "datatypes.py")) zf.write(os.path.join(package_name, "s3.py")) zf.write(os.path.join(package_name, "utils.py")) zf.write(os.path.join(package_name, "record_datetimes.py")) zf.write(os.path.join(package_name, "df_transforms.py")) zf.write(os.path.join(package_name, "data/data_type_conversion.json")) zf.close() g = GlueJob("test/glue_test/", bucket="alpha-gluejobutils", job_role=job_role) g.job_name = "gluejobutils_unit_test" g.run_job() g.wait_for_completion(True) if g.job_run_state == "SUCCEEDED": print("cleaning up job...") g.cleanup()
def main(job_role): package_name = 'gluejobutils' to_path = f'test/glue_test/glue_py_resources/{package_name}.zip' zf = zipfile.ZipFile(to_path, "w") zf.write(os.path.join(package_name, '__init__.py')) zf.write(os.path.join(package_name, 'datatypes.py')) zf.write(os.path.join(package_name, 's3.py')) zf.write(os.path.join(package_name, 'utils.py')) zf.write(os.path.join(package_name, 'record_datetimes.py')) zf.write(os.path.join(package_name, 'df_transforms.py')) zf.write(os.path.join(package_name, 'data/data_type_conversion.json')) zf.close() g = GlueJob('test/glue_test/', bucket='alpha-gluejobutils', job_role=job_role) g.job_name = 'gluejobutils_unit_test' g.run_job() g.wait_for_completion(True) if g.job_run_state == 'SUCCEEDED': print("cleaning up job...") g.cleanup()
except: raise Exception("You must provide a role name") bucket = 'alpha-data-linking' job = GlueJob( 'match/', bucket=bucket, job_role=ROLE, job_arguments={ "--test_arg": 'some_string', "--conf": 'spark.jars.packages=graphframes:graphframes:0.6.0-spark2.3-s_2.11', '--enable-spark-ui': 'true', '--spark-event-logs-path': 's3://alpha-data-linking/glue_test_delete/logsdelete', '--enable-continuous-cloudwatch-log': 'true' }) job.job_name = '1m_p_50_e_6' print(job._job_definition()) job.allocated_capacity = 2 try: job.run_job() job.wait_for_completion() finally: pass # job.cleanup()