def begin_pipeline(**kwargs): print("begin_pipeline:") s3_bucket = os.environ.get("S3_BUCKET", "") folder_path = kwargs['dag_run'].conf.get('folder_path') s3_file = kwargs['dag_run'].conf.get('s3_file') uuid_mapping_file = kwargs['dag_run'].conf.get('uuid_mapping_file') biobank_file = kwargs['dag_run'].conf.get('biobank_file') mapping_file = kwargs['dag_run'].conf.get('mapping_file') dp = DagPebbles() download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) pipeline_state_args = { "s3_bucket": s3_bucket, "folder_path": folder_path, "s3_file": s3_file, "uuid_mapping_file": uuid_mapping_file, "biobank_file": biobank_file, "mapping_file": mapping_file, "download_key": download_key } dp.save_pipeline_state(**pipeline_state_args) kwargs["ti"].xcom_push(key="folder_path", value=folder_path) kwargs["ti"].xcom_push(key="s3_file", value=s3_file) kwargs["ti"].xcom_push(key="download_key", value=download_key) kwargs["ti"].xcom_push(key="uuid_mapping_file", value=uuid_mapping_file) kwargs["ti"].xcom_push(key="biobank_file", value=biobank_file) kwargs["ti"].xcom_push(key="mapping_file", value=mapping_file)
def begin_pipeline(**kwargs): print("begin_pipeline:") dp = DagPebbles() pipeline = dp.get_current_pipeline() s3_bucket = os.environ.get("S3_BUCKET","") folder_path = pipeline['log_file_path'] s3_file = pipeline['log_file_name'] download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) kwargs["ti"].xcom_push(key="folder_path", value=folder_path) kwargs["ti"].xcom_push(key="s3_file", value=s3_file) kwargs["ti"].xcom_push(key="download_key", value=download_key)
dag=dag, ) t_pipeline_begin >> t_check_pipeline t_check_pipeline >> t_pipeline_check_skipped >> t_end_pipeline t_check_pipeline >> t_pipeline_check_passed try: dp = DagPebbles() pipeline = dp.get_current_pipeline() s3_bucket = os.environ.get("S3_BUCKET","") folder_path = pipeline['log_file_path'] s3_file = pipeline['log_file_name'] download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) download_log_file_cmd = "/opt/bitnami/airflow/airflow-data/scripts/download_s3_file.sh " + download_key + " " + s3_file + " " + "N" t_download_log_file = BashOperator( task_id='download_log_file', bash_command=download_log_file_cmd, dag=dag) t_pipeline_check_passed >> t_download_log_file files = dp.get_files(log_file_id = None, type = 'download') if files == None or len(files) == 0: t_download_log_file >> t_end_pipeline else: for index, file in enumerate(files): s3_bucket = os.environ.get("S3_BUCKET","") folder_path = pipeline['log_file_path'] s3_file = file