def begin_pipeline(**kwargs):
    print("begin_pipeline:")
    s3_bucket = os.environ.get("S3_BUCKET", "")
    folder_path = kwargs['dag_run'].conf.get('folder_path')
    s3_file = kwargs['dag_run'].conf.get('s3_file')
    uuid_mapping_file = kwargs['dag_run'].conf.get('uuid_mapping_file')
    biobank_file = kwargs['dag_run'].conf.get('biobank_file')
    mapping_file = kwargs['dag_run'].conf.get('mapping_file')

    dp = DagPebbles()
    download_key = dp.get_download_key(s3_bucket, folder_path, s3_file)
    pipeline_state_args = {
        "s3_bucket": s3_bucket,
        "folder_path": folder_path,
        "s3_file": s3_file,
        "uuid_mapping_file": uuid_mapping_file,
        "biobank_file": biobank_file,
        "mapping_file": mapping_file,
        "download_key": download_key
    }
    dp.save_pipeline_state(**pipeline_state_args)

    kwargs["ti"].xcom_push(key="folder_path", value=folder_path)
    kwargs["ti"].xcom_push(key="s3_file", value=s3_file)
    kwargs["ti"].xcom_push(key="download_key", value=download_key)
    kwargs["ti"].xcom_push(key="uuid_mapping_file", value=uuid_mapping_file)
    kwargs["ti"].xcom_push(key="biobank_file", value=biobank_file)
    kwargs["ti"].xcom_push(key="mapping_file", value=mapping_file)
def begin_pipeline(**kwargs):
    print("begin_pipeline:")
    dp = DagPebbles()
    pipeline = dp.get_current_pipeline() 
    s3_bucket = os.environ.get("S3_BUCKET","")
    folder_path = pipeline['log_file_path']  
    s3_file = pipeline['log_file_name'] 
    download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) 
    kwargs["ti"].xcom_push(key="folder_path", value=folder_path)
    kwargs["ti"].xcom_push(key="s3_file", value=s3_file)   
    kwargs["ti"].xcom_push(key="download_key", value=download_key) 
        dag=dag,
    )
    
    
    t_pipeline_begin >> t_check_pipeline
    t_check_pipeline >> t_pipeline_check_skipped >> t_end_pipeline 
    t_check_pipeline >> t_pipeline_check_passed

    
    try: 
        dp = DagPebbles() 
        pipeline = dp.get_current_pipeline()  
        s3_bucket = os.environ.get("S3_BUCKET","")
        folder_path = pipeline['log_file_path']  
        s3_file = pipeline['log_file_name']
        download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) 
        download_log_file_cmd = "/opt/bitnami/airflow/airflow-data/scripts/download_s3_file.sh  " + download_key + " "  + s3_file +  " " + "N"
        t_download_log_file = BashOperator(
            task_id='download_log_file',
            bash_command=download_log_file_cmd,
            dag=dag) 
        t_pipeline_check_passed >> t_download_log_file
        
        files = dp.get_files(log_file_id = None, type = 'download')
        if files == None or len(files) == 0:
            t_download_log_file  >> t_end_pipeline
        else:
            for index, file in enumerate(files):
                s3_bucket = os.environ.get("S3_BUCKET","")
                folder_path = pipeline['log_file_path']  
                s3_file = file