def main(): if len(sys.argv) == 1: print(f"Usage: {sys.argv[0]} YYYY-mm-dd") sys.exit(1) date = sys.argv[1] job_bucket = os.environ["PQ_FLATTENER_GLUE_JOB_BUCKET"] iam_role = os.environ["PQ_FLATTENER_JOB_IAM_ROLE"] source_path = os.environ["PQ_FLATTENER_SOURCE_PATH"] dest_path = os.environ["PQ_FLATTENER_DEST_PATH"] job = GlueJob( "v1/glue_jobs/pq_flattener", bucket=job_bucket, job_role=iam_role, job_arguments={ "--date": date, "--s3_source": source_path, "--s3_dest": dest_path, }, ) job.job_name = f"pq_flattener" # Run job on AWS Glue print(f'Starting job "{job.job_name}"...') try: job.run_job() job.wait_for_completion() finally: job.cleanup()
def main(): iam_role = os.environ["IAM_ROLE"] github_tag = os.environ["GITHUB_TAG"] snapshot_date = os.environ["SNAPSHOT_DATE"] # Get job parameters for specific glue job job_args = {"--github_tag": github_tag, "--snapshot_date": snapshot_date} job = GlueJob(f"glue_jobs/example_job/", bucket=job_bucket, job_role=iam_role, job_arguments=job_args) print(f'Starting job "{job.job_name}"...') job.run_job() job.wait_for_completion(verbose=True) if job.job_run_state == 'SUCCEEDED': print('Job successful - cleaning up') job.cleanup()
def main(job_role): package_name = "gluejobutils" to_path = f"test/glue_test/glue_py_resources/{package_name}.zip" zf = zipfile.ZipFile(to_path, "w") zf.write(os.path.join(package_name, "__init__.py")) zf.write(os.path.join(package_name, "datatypes.py")) zf.write(os.path.join(package_name, "s3.py")) zf.write(os.path.join(package_name, "utils.py")) zf.write(os.path.join(package_name, "record_datetimes.py")) zf.write(os.path.join(package_name, "df_transforms.py")) zf.write(os.path.join(package_name, "data/data_type_conversion.json")) zf.close() g = GlueJob("test/glue_test/", bucket="alpha-gluejobutils", job_role=job_role) g.job_name = "gluejobutils_unit_test" g.run_job() g.wait_for_completion(True) if g.job_run_state == "SUCCEEDED": print("cleaning up job...") g.cleanup()
def main(job_role): package_name = 'gluejobutils' to_path = f'test/glue_test/glue_py_resources/{package_name}.zip' zf = zipfile.ZipFile(to_path, "w") zf.write(os.path.join(package_name, '__init__.py')) zf.write(os.path.join(package_name, 'datatypes.py')) zf.write(os.path.join(package_name, 's3.py')) zf.write(os.path.join(package_name, 'utils.py')) zf.write(os.path.join(package_name, 'record_datetimes.py')) zf.write(os.path.join(package_name, 'df_transforms.py')) zf.write(os.path.join(package_name, 'data/data_type_conversion.json')) zf.close() g = GlueJob('test/glue_test/', bucket='alpha-gluejobutils', job_role=job_role) g.job_name = 'gluejobutils_unit_test' g.run_job() g.wait_for_completion(True) if g.job_run_state == 'SUCCEEDED': print("cleaning up job...") g.cleanup()
from etl_manager.etl import GlueJob job = GlueJob('gluejob/', bucket='alpha-mojap-curated-open-data', job_role='airflow_osrm_scraper', job_arguments={ '--test_arg': 'this is a test', '--enable-metrics': '' }) job.allocated_capacity = 4 try: job.run_job() job.wait_for_completion() finally: job.cleanup()