}

with DAG('fix_s3_recording_url_pipeline',
         default_args=default_args,
         schedule_interval='*/10 * * * *',
         catchup=False) as dag:

    t1 = BashOperator(
        task_id='login_aws',
        bash_command=
        '$(aws ecr get-login --region eu-west-1 --no-include-email)')

    t2 = DockerOperator(
        task_id='fix_s3_recording_url_pipeline',
        auto_remove=True,
        image=IMAGE_NAME,
        api_version='auto',
        command=COMMAND,
        docker_url='unix://var/run/docker.sock',
        network_mode='host',
        environment={
            'DATABASE_HOST': DATABASE_HOST,
            'ELASTICSEARCH_URL': ELASTICSEARCH_URL,
            'DYNAMODB_HOST': DYNAMODB_HOST,
        },
        volumes=[LOG_DIRECTORY, BOTO_CREDENTIAL],
        force_pull=True,
    )

    t2.set_upstream(t1)
    data = json.load(g)

def mapping(dict, dag1):

    if not dict['ttl']:
      t1 = DockerOperator(
            task_id=dict['task_id'],
            image=dict['image'],
            command=eval(dict['command']),
            xcom_push=bool(dict['xcom_push']),
            dag=dag1)
        return [t1]
    else:
        list_to_edit = []
        for task in dict['ttl']:
            list_to_edit.extend(mapping(task, dag))
        t = DockerOperator(
            task_id=dict['task_id'],
            image=dict['image'],
            command=dict['command'],
            xcom_push=bool(dict['xcom_push']),
            dag=dag1)
        final = [t]
        final.extend(list_to_edit)
        for i in range(len(list_to_edit)):
            t.set_upstream(list_to_edit[i])
        return [t]

return1 = mapping(data, dag)
print(return1)
Example #3
0
fda_linker_task = SubDagOperator(
    dag=dag,
    subdag=fda_dap(parent_dag_name='fda',
                   child_dag_name='linker',
                   start_date=dag.start_date,
                   schedule_interval=dag.schedule_interval),
    task_id='linker',
)

remove_unknown_documentcloud_docs_task = DockerOperator(
    task_id='remove_unknown_documentcloud_docs',
    dag=dag,
    image='opentrials/processors:latest',
    force_pull=True,
    api_version='1.23',
    environment={
        'WAREHOUSE_URL': helpers.get_postgres_uri('warehouse_db'),
        'DATABASE_URL': helpers.get_postgres_uri('api_db'),
        'EXPLORERDB_URL': helpers.get_postgres_uri('explorer_db'),
        'LOGGING_URL': Variable.get('LOGGING_URL'),
        'DOCUMENTCLOUD_USERNAME': Variable.get('DOCUMENTCLOUD_USERNAME'),
        'DOCUMENTCLOUD_PASSWORD': Variable.get('DOCUMENTCLOUD_PASSWORD'),
        'DOCUMENTCLOUD_PROJECT': Variable.get('DOCUMENTCLOUD_PROJECT'),
        'FERNET_KEY': os.environ['FERNET_KEY'],
    },
    command='make start remove_unknown_documentcloud_docs')

remove_unknown_documentcloud_docs_task.set_upstream(fda_linker_task)
fda_linker_task.set_upstream(fda_dap_task)
Example #4
0
    **kwargs,
)

load_black_scholes = DockerOperator(
    task_id='load_black_scholes',
    command='python finance/data/td_ameritrade/black_scholes/load.py',
    **kwargs,
)

end_time = BashOperator(
    task_id='end_pipeline',
    bash_command='date',
    dag=dag,
)

scrape_options.set_upstream(start_time)
load_options.set_upstream(scrape_options)
table_creator_options.set_upstream(load_options)

scrape_quotes.set_upstream(scrape_options)
load_quotes.set_upstream(scrape_quotes)
table_creator_quotes.set_upstream(load_quotes)
table_creator_stocks.set_upstream(table_creator_quotes)

report_black_scholes.set_upstream(table_creator_options)
report_black_scholes.set_upstream(table_creator_stocks)
load_black_scholes.set_upstream(report_black_scholes)

report_options.set_upstream(table_creator_options)
report_options.set_upstream(table_creator_stocks)
Example #5
0
        'WAREHOUSE_URL': helpers.get_postgres_uri('warehouse_db'),
        'LOGGING_URL': Variable.get('LOGGING_URL'),
        'PYTHON_ENV': Variable.get('ENV'),
    },
    command='make start fda_dap'
)

processor_task = DockerOperator(
    task_id='fda_dap_processor',
    dag=dag,
    image='okibot/processors:latest',
    force_pull=True,
    environment={
        'WAREHOUSE_URL': helpers.get_postgres_uri('warehouse_db'),
        'DATABASE_URL': helpers.get_postgres_uri('api_db'),
        'EXPLORERDB_URL': helpers.get_postgres_uri('explorer_db'),
        'LOGGING_URL': Variable.get('LOGGING_URL'),
        'AWS_ACCESS_KEY_ID': Variable.get('AWS_ACCESS_KEY_ID'),
        'AWS_SECRET_ACCESS_KEY': Variable.get('AWS_SECRET_ACCESS_KEY'),
        'AWS_S3_BUCKET': Variable.get('AWS_S3_BUCKET'),
        'AWS_S3_REGION': Variable.get('AWS_S3_REGION'),
        'AWS_S3_CUSTOM_DOMAIN': Variable.get('AWS_S3_CUSTOM_DOMAIN'),
        'DOCUMENTCLOUD_USERNAME': Variable.get('DOCUMENTCLOUD_USERNAME'),
        'DOCUMENTCLOUD_PASSWORD': Variable.get('DOCUMENTCLOUD_PASSWORD'),
        'DOCUMENTCLOUD_PROJECT': Variable.get('DOCUMENTCLOUD_PROJECT'),
    },
    command='make start fda_dap'
)

processor_task.set_upstream(collector_task)
Example #6
0
    task_id='end_pipeline',
    bash_command='date',
    dag=dag)

tasks = {}
command_prefix = 'python finance/data/'
command_suffix = '/sql.py'
jobs = ['fred', 'internals', 'td_ameritrade', 'yahoo']
for job in jobs:
    tasks.update({job: command_prefix + job + command_suffix})

prior_task = ''
for task in tasks:
    task_id = 'create_tables_' + task
    dock_task = DockerOperator(
        task_id=task_id,
        image='py-dw-stocks',
        auto_remove=True,
        command=tasks.get(task),
        volumes=['/media/nautilus/fun-times-in-python:/usr/src/app'],
        network_mode='bridge',
        dag=dag
        )
    if prior_task:
        dock_task.set_upstream(prior_task)
    else:
        dock_task.set_upstream(start_time)
    prior_task = dock_task

end_time.set_upstream(dock_task)
)

scrape_equities = DockerOperator(
    task_id='scrape_td_equities',
    command='python finance/data/td_ameritrade/equities/scrape.py',
    **kwargs,
)

load_equities = DockerOperator(
    task_id='load_td_equities',
    command='python finance/data/td_ameritrade/equities/load.py',
    **kwargs,
)

table_creator_equities = DockerOperator(
    task_id='update_td_equities_table',
    command='python finance/data/td_ameritrade/equities/sql.py',
    **kwargs,
)

end_time = BashOperator(
    task_id='end_pipeline',
    bash_command='date',
    dag=dag,
)

scrape_equities.set_upstream(start_time)
load_equities.set_upstream(scrape_equities)
table_creator_equities.set_upstream(load_equities)
end_time.set_upstream(table_creator_equities)
Example #8
0
                    image='useful1',
                    command='5 6',
                    xcom_push=True,
                    dag=dag)

t2 = PythonOperator(
    task_id='print_task',
    python_callable=print11,
    provide_context=True,
    xcom_push=True,
    templates_dict={'a3': "{{ ti.xcom_pull(task_ids='divide1_conv_on')}}"},
    dag=dag)

t3 = DockerOperator(task_id='docker2',
                    image='useful1',
                    command="{{ ti.xcom_pull(task_ids='print_task')}}" + ' 10',
                    xcom_push=True,
                    dag=dag)

t4 = PythonOperator(
    task_id='print_task2',
    python_callable=print11,
    provide_context=True,
    xcom_push=True,
    templates_dict={'a3': "{{ ti.xcom_pull(task_ids='docker2')}}"},
    dag=dag)

t2.set_upstream(t1)
t3.set_upstream(t2)
t4.set_upstream(t3)
    task_id='task_1',
    bash_command=
    'echo "Starting executor Task 1 | Passed Conf : {{ dag_run.conf["json_executor_task"] }}"',
    dag=dag)

executor = DockerOperator(
    task_id='executor',
    image='openjdk:8-jre-alpine',
    api_version='auto',
    auto_remove=True,
    volumes=[
        '/usr/local/airflow/artifacts:/usr/local/airflow/artifacts',
        '/var/run/docker.sock:/var/run/docker.sock'
    ],
    docker_url="unix://var/run/docker.sock",
    network_mode="bridge",
    environment={
        'VPC_EXECUTOR_TASK': '{{ dag_run.conf["json_executor_task"] }}'
    },
    command=
    'java -cp /usr/local/airflow/artifacts/jar-with-dependencies.jar <class>',
    dag=dag)

t2 = BashOperator(
    task_id='task_2',
    bash_command='echo "Finishing executor Task 2 | Execution Time : {{ ts }}"',
    dag=dag)

executor.set_upstream(t1)
executor.set_downstream(t2)
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag = DAG(
    dag_id='yahoo_stocks',
    default_args=args,
    schedule_interval=None,
)

start_time = BashOperator(task_id='start_pipeline',
                          bash_command='date',
                          dag=dag)

task = DockerOperator(task_id='scrape_yahoo_stocks',
                      image='py-dw-stocks',
                      auto_remove=True,
                      command='python finance/data/yahoo/sql.py',
                      volumes=[
                          '/media/nautilus/fun-times-in-python:/usr/src/app',
                          '/media/nautilus/raw_files:/mnt'
                      ],
                      network_mode='bridge',
                      dag=dag)

end_time = BashOperator(task_id='end_pipeline', bash_command='date', dag=dag)

task.set_upstream(start_time)
end_time.set_upstream(task)
Example #11
0
                    image='useful2',
                    command='divide ' + str(ON_SALES) + ' ' + str(ON_CALLS),
                    xcom_push=True,
                    dag=dag)

t2 = DockerOperator(task_id='divide1_conv_off',
                    image='useful2',
                    command='divide ' + str(OFF_SALES) + ' ' + str(OFF_CALLS),
                    xcom_push=True,
                    dag=dag)

t3 = DockerOperator(task_id='numerator',
                    image='useful2',
                    command='subtract ' +
                    "{{ti.xcom_pull(task_ids='divide1_conv_on')}}" + ' ' +
                    "{{ti.xcom_pull(task_ids='divide1_conv_off')}}",
                    xcom_push=True,
                    dag=dag)

t4 = DockerOperator(task_id='divide_results',
                    image='useful2',
                    command='divide ' +
                    "{{ti.xcom_pull(task_ids='numerator')}}" + ' ' +
                    "{{ti.xcom_pull(task_ids='divide1_conv_off')}}",
                    xcom_push=True,
                    dag=dag)

t3.set_upstream(t1)
t3.set_upstream(t2)
t4.set_upstream(t3)
Example #12
0
        stask={'json_executor_task' : task}
        print(stask)
        json_task=json.dumps(stask)
        print(json_task)
        trigger_dag(dag_id="Executor",
                    run_id=run_id,
                    conf=json_task,
                    execution_date=execution_date,
                    replace_microseconds=False)

planner = DockerOperator(
    task_id='planner',
    image='openjdk:8-jre-alpine',
    api_version='auto',
    auto_remove=False,
    volumes=['/usr/local/airflow/artifacts:/usr/local/airflow/artifacts', '/var/run/docker.sock:/var/run/docker.sock'],
    docker_url="unix://var/run/docker.sock",
    network_mode="bridge",
    command='java -cp /usr/local/airflow/artifacts/jar-with-dependencies.jar <class> {{ ts }}',
    xcom_push=True,
    xcom_all=True,
    dag=dag)

end_task = PythonOperator(
    task_id='queue_executor_tasks',
    python_callable=schedule_executor,
    dag=dag)

planner.set_upstream(start_task)
end_task.set_upstream(planner)