Exemple #1
0
def make_dag(name, default_args):

    # Create the DAG
    dag = DAG(name, schedule_interval='@daily', default_args = default_args)

    # Instantiate tasks for the dag
    Task.add_run_cralwer(dag)
    Task.add_run_model_generator(dag)
    Task.add_stop_start_flask_api(dag)

    # Setup dependencies 
    dag.set_dependency('run_cralwer', 'run_model_generator')
    dag.set_dependency('run_model_generator', 'stop_start_flask_api')

    return dag
Exemple #2
0
             bash_command='shovel_jump.sh',
             dag=dag)
BashOperator(pool='datacollector_disk_io',
             task_id='reports_raw_cleanup',
             bash_command='shovel_jump.sh',
             dag=dag)
BashOperator(pool='datacollector_disk_io',
             task_id='autoclaved_tarlz4_s3_sync',
             bash_command='shovel_jump.sh',
             dag=dag)
BashOperator(pool='datacollector_disk_io',
             task_id='autoclaved_jsonl_s3_sync',
             bash_command='shovel_jump.sh',
             dag=dag)

dag.set_dependency('reports_raw_sensor', 'canning')

dag.set_dependency('reports_raw_sensor', 'tar_reports_raw')
dag.set_dependency('canning', 'tar_reports_raw')

dag.set_dependency('tar_reports_raw', 'reports_tgz_s3_sync')

dag.set_dependency('reports_tgz_s3_sync', 'reports_tgz_s3_ls')

# reports_raw_cleanup -> reports_tgz_cleanup is NOT a dependency as reports_raw_cleanup uses only index file
dag.set_dependency('reports_tgz_s3_sync',
                   'reports_tgz_cleanup')  # can't cleanup unless synced
dag.set_dependency('reports_tgz_s3_ls',
                   'reports_tgz_cleanup')  # data dependency

dag.set_dependency('canning', 'canned_s3_sync')
Exemple #3
0
             dag=dag)
BashOperator(pool='datacollector_disk_io',
             task_id='reports_raw_cleanup',
             bash_command='shovel_jump.sh',
             dag=dag)
BashOperator(pool='datacollector_disk_io',
             task_id='sanitised_s3_ls',
             bash_command='shovel_jump.sh',
             dag=dag)
BashOperator(pool='datacollector_disk_io',
             task_id='sanitised_check',
             bash_command='shovel_jump.sh',
             dag=dag)
BashOperator(pool='datacollector_disk_io',
             task_id='sanitised_cleanup',
             bash_command='shovel_jump.sh',
             dag=dag)

dag.set_dependency('canning', 'autoclaving')
dag.set_dependency('autoclaving', 'simhash_text')
dag.set_dependency('autoclaving', 'meta_pg')

dag.set_dependency('reports_raw_s3_ls', 'reports_raw_cleanup')
dag.set_dependency('canning', 'reports_raw_cleanup')

dag.set_dependency('autoclaving', 'sanitised_check')

dag.set_dependency('autoclaving', 'sanitised_cleanup')
dag.set_dependency('sanitised_s3_ls', 'sanitised_cleanup')
dag.set_dependency('sanitised_check', 'sanitised_cleanup')
BashOperator(pool='datacollector_disk_io', task_id='canning', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='tar_reports_raw', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='reports_tgz_s3_sync', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='reports_tgz_s3_ls', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='reports_tgz_cleanup', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='canned_s3_sync', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='canned_s3_ls', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='canned_cleanup', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='autoclaving', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='meta_pg', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='meta_wal_flush', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='reports_raw_cleanup', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='autoclaved_tarlz4_s3_sync', bash_command='shovel_jump.sh', dag=dag)
BashOperator(pool='datacollector_disk_io', task_id='autoclaved_jsonl_s3_sync', bash_command='shovel_jump.sh', dag=dag)

dag.set_dependency('reports_raw_sensor', 'canning')

dag.set_dependency('reports_raw_sensor', 'tar_reports_raw')
dag.set_dependency('canning', 'tar_reports_raw')

dag.set_dependency('tar_reports_raw', 'reports_tgz_s3_sync')

dag.set_dependency('reports_tgz_s3_sync', 'reports_tgz_s3_ls')

# reports_raw_cleanup -> reports_tgz_cleanup is NOT a dependency as reports_raw_cleanup uses only index file
dag.set_dependency('reports_tgz_s3_sync', 'reports_tgz_cleanup') # can't cleanup unless synced
dag.set_dependency('reports_tgz_s3_ls', 'reports_tgz_cleanup') # data dependency

dag.set_dependency('canning', 'canned_s3_sync')

dag.set_dependency('canned_s3_sync', 'canned_s3_ls')