def begin_pipeline(**kwargs):
    print("begin_pipeline:")
    s3_bucket = os.environ.get("S3_BUCKET", "")
    folder_path = kwargs['dag_run'].conf.get('folder_path')
    s3_file = kwargs['dag_run'].conf.get('s3_file')
    uuid_mapping_file = kwargs['dag_run'].conf.get('uuid_mapping_file')
    biobank_file = kwargs['dag_run'].conf.get('biobank_file')
    mapping_file = kwargs['dag_run'].conf.get('mapping_file')

    dp = DagPebbles()
    download_key = dp.get_download_key(s3_bucket, folder_path, s3_file)
    pipeline_state_args = {
        "s3_bucket": s3_bucket,
        "folder_path": folder_path,
        "s3_file": s3_file,
        "uuid_mapping_file": uuid_mapping_file,
        "biobank_file": biobank_file,
        "mapping_file": mapping_file,
        "download_key": download_key
    }
    dp.save_pipeline_state(**pipeline_state_args)

    kwargs["ti"].xcom_push(key="folder_path", value=folder_path)
    kwargs["ti"].xcom_push(key="s3_file", value=s3_file)
    kwargs["ti"].xcom_push(key="download_key", value=download_key)
    kwargs["ti"].xcom_push(key="uuid_mapping_file", value=uuid_mapping_file)
    kwargs["ti"].xcom_push(key="biobank_file", value=biobank_file)
    kwargs["ti"].xcom_push(key="mapping_file", value=mapping_file)
def validate_log_file(**kwargs):
    print("validate_log_file:")
    dp = DagPebbles()
    if dp.validate_pipeline_log(kwargs["ti"].xcom_pull(key='download_key')):
        return "pipeline_log_validation_passed"
    else:
        return "pipeline_log_validation_failed"
def pipeline_enable_check(**kwargs):
    dp = DagPebbles()
    if dp.pipeline_enable_check('DECRYPT_FILES'):
        kwargs["ti"].xcom_push(key="S3_BUCKET", value=os.environ.get("S3_BUCKET",""))
        kwargs["ti"].xcom_push(key="SKIP_DECRYPT_FILES", value="N")
        return "pipeline_check_passed"
    else:
        return "pipeline_check_skipped" 
def pipeline_enable_check(**kwargs):
    dp = DagPebbles()
    if dp.pipeline_enable_check('DATA_PIPELINE_INITIATOR'):
        #TODO:: get it from db
        kwargs["ti"].xcom_push(key="SKIP_DOWNLOAD_LOG_FILE", value='N')
        kwargs["ti"].xcom_push(key="SKIP_DECRYPT_LOG_FILE", value='N')
        return "pipeline_check_passed"
    else:
        return "pipeline_check_skipped"
def begin_pipeline(**kwargs):
    dp = DagPebbles()
    pipeline = dp.get_current_pipeline()
    print(pipeline)
    packed_dir=os.environ.get("BCH_HPDS_INTERNAL") 
    kwargs["ti"].xcom_push(key='packed_dir', value=packed_dir)
    hpds_encrypted_file_name = dp.get_hpds_packed_file_name()+".encrypted"
    kwargs["ti"].xcom_push(key='hpds_encrypted_file_name', value=hpds_encrypted_file_name) 
    hpds_encrypted_file = packed_dir + '/' + hpds_encrypted_file_name
    kwargs["ti"].xcom_push(key='hpds_encrypted_file', value=hpds_encrypted_file)
def begin_pipeline(**kwargs):
    print("begin_pipeline:")
    dp = DagPebbles()
    pipeline = dp.get_current_pipeline() 
    s3_bucket = os.environ.get("S3_BUCKET","")
    folder_path = pipeline['log_file_path']  
    s3_file = pipeline['log_file_name'] 
    download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) 
    kwargs["ti"].xcom_push(key="folder_path", value=folder_path)
    kwargs["ti"].xcom_push(key="s3_file", value=s3_file)   
    kwargs["ti"].xcom_push(key="download_key", value=download_key) 
Beispiel #7
0
def begin_pipeline(**kwargs):
    dp = DagPebbles()
    pipeline = dp.get_current_pipeline()
    print(pipeline)
    current_time = datetime.now()
    packed_file_name = "hpds_phenotype_" + os.environ.get(
        "BCH_PIC_SURE_HPDS_ALS_TAG") + "_" + current_time.strftime(
            '%m_%d_%Y_%H_%M_%S') + "_" + os.environ.get(
                "BCH_PIC_SURE_HPDS_ETL_TAG") + ".tar.gz"
    packed_dir = os.environ.get("BCH_HPDS_INTERNAL")
    kwargs["ti"].xcom_push(key='packed_file_name', value=packed_file_name)
    kwargs["ti"].xcom_push(key='packed_dir', value=packed_dir)
    dp.save_hpds_package_file_name(packed_file_name)
Beispiel #8
0
def stage_custom_dmp_files(**kwargs):
    print("stage_custom_dmp_files:")
    dp = DagPebbles()
    custom_log_file = kwargs['dag_run'].conf.get('custom_log_file')
    custom_dmp_file = kwargs['dag_run'].conf.get('custom_dmp_file')
    pipeline_args = {
        "custom_log_file": custom_log_file,
        "custom_dmp_file": custom_dmp_file
    }
    dp.save_pipeline_state(**pipeline_args)
    dp.stage_custom_dmp_files(**pipeline_args)
Beispiel #9
0
def stage_uuid_mapping_file(**kwargs):
    print("stage_uuid_mapping_file:")
    dp = DagPebbles()
    dp.stage_uuid_mapping_file(log_file_id=None)
Beispiel #10
0
def stage_biobank_file(**kwargs):
    print("stage_biobank_file:")
    dp = DagPebbles()
    dp.stage_biobank_file(log_file_id=None)
 
 t_end = PythonOperator(
     task_id="end",
     python_callable=end,
     provide_context=True,
     trigger_rule="none_failed",
     dag=dag,
 )
 
 
 t_pipeline_begin >> t_check_pipeline
 t_check_pipeline >> t_pipeline_check_skipped >> t_end_pipeline 
 t_check_pipeline >> t_pipeline_check_passed
 
 try: 
     dp = DagPebbles() 
     pipeline = dp.get_current_pipeline()
     s3_file = pipeline['log_file_name'] 
     target_log_file = pipeline['log_file_name'].replace(".encrypted", "")
     decrypt_log_file_cmd = "/opt/bitnami/airflow/airflow-data/scripts/decrypt_s3_file.sh  " + s3_file + " {{ ti.xcom_pull(key='SKIP_DECRYPT_FILES')}} "
     t_decrypt_log_file = BashOperator(
         task_id='decrypt_log_file',
         bash_command=decrypt_log_file_cmd,
         dag=dag) 
     t_pipeline_check_passed >> t_decrypt_log_file 
             
     files = dp.get_files(log_file_id = None, type = 'decrypt')
     
     if files == None or len(files) == 0:
         t_decrypt_log_file  >> t_end_pipeline
     else:
def load_data(**kwargs):
    print("load_data:")
    dp = DagPebbles()
    dp.load_data(log_file_id=None)
Beispiel #13
0
def pipeline_enable_check(**kwargs):
    dp = DagPebbles()
    if dp.pipeline_enable_check('CONCEPT_DIM_MAPPING'):
        return "pipeline_check_passed"
    else:
        return "pipeline_check_skipped"
Beispiel #14
0
def notify(**kwargs):
    dp = DagPebbles()
    print("notify")
 
 t_end = PythonOperator(
     task_id="end",
     python_callable=end,
     provide_context=True,
     trigger_rule="none_failed",
     dag=dag,
 )
 
 
 t_pipeline_begin >> t_check_pipeline
 t_check_pipeline >> t_pipeline_check_skipped >> t_end_pipeline 
 t_check_pipeline >> t_pipeline_check_passed
 
 try: 
     dp = DagPebbles() 
     pipeline = dp.get_current_pipeline()  
     s3_file = pipeline['log_file_name']
     s3_file = DATA_LOCATION + "/"+ s3_file
     transfer_log_file_cmd = "perl  /opt/bitnami/airflow/airflow-data/scripts/transfer_file_rds.pl   " +  s3_file + "   {{ ti.xcom_pull(key='SKIP_TRANSFER_FILES')}}"
     
     print("transfer_log_file_cmd: ")
     print(transfer_log_file_cmd)
     t_transfer_log_file = BashOperator(
         task_id='transfer_log_file',
         bash_command=transfer_log_file_cmd,
         dag=dag)
     
     t_pipeline_check_passed >> t_transfer_log_file 
                     
     files = dp.get_files(log_file_id = None, type = 'transfer')
def stage_dmp_files1(**kwargs):
    print("stage_dmp_files1:")
    dp = DagPebbles()
    dp.stage_dmp_files1(log_file_id=None)
Beispiel #17
0
def clean_hpds_source_data(**kwargs):
    dp = DagPebbles()
    dp.clean_hpds_source_data()
Beispiel #18
0
def recreate_hpds_source_data(**kwargs):
    dp = DagPebbles()
    dp.recreate_bch_hpds_data()
Beispiel #19
0
def begin_pipeline(**kwargs):
    dp = DagPebbles()
    pipeline = dp.get_current_pipeline()
    print(pipeline)
Beispiel #20
0
def pipeline_enable_check(**kwargs):
    dp = DagPebbles()
    if dp.pipeline_enable_check('STAGE_CUSTOM_DMP_FILES'):
        return "pipeline_check_passed"
    else:
        return "pipeline_check_skipped"
def save_pipeline_log(**kwargs):
    print("save_pipeline_log:")
    dp = DagPebbles()
    dp.save_pipeline_log(kwargs["ti"].xcom_pull(key='folder_path'),
                         kwargs["ti"].xcom_pull(key='s3_file'))
Beispiel #22
0
def cleanup(**kwargs):
    dp = DagPebbles()
    print("cleanup")
def pipeline_enable_check(**kwargs):
    dp = DagPebbles()
    if dp.pipeline_enable_check('DATA_LOAD'):
        return "pipeline_check_passed"
    else:
        return "pipeline_check_skipped"
Beispiel #24
0
def end(**kwargs):
    dp = DagPebbles()
    print("end")
    t_end = PythonOperator(
        task_id="end",
        python_callable=end,
        provide_context=True,
        trigger_rule="none_failed",
        dag=dag,
    )
    
    
    t_pipeline_begin >> t_check_pipeline
    t_check_pipeline >> t_pipeline_check_skipped >> t_end_pipeline 
    t_check_pipeline >> t_pipeline_check_passed

    
    try: 
        dp = DagPebbles() 
        pipeline = dp.get_current_pipeline()  
        s3_bucket = os.environ.get("S3_BUCKET","")
        folder_path = pipeline['log_file_path']  
        s3_file = pipeline['log_file_name']
        download_key = dp.get_download_key(s3_bucket, folder_path, s3_file) 
        download_log_file_cmd = "/opt/bitnami/airflow/airflow-data/scripts/download_s3_file.sh  " + download_key + " "  + s3_file +  " " + "N"
        t_download_log_file = BashOperator(
            task_id='download_log_file',
            bash_command=download_log_file_cmd,
            dag=dag) 
        t_pipeline_check_passed >> t_download_log_file
        
        files = dp.get_files(log_file_id = None, type = 'download')
        if files == None or len(files) == 0:
            t_download_log_file  >> t_end_pipeline