import datetime from airflow.models import DAG import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.datetime.utcnow(), 'retries': 1, } dag = DAG(dag_id='takeda', default_args=args, max_active_runs=1, schedule_interval='@monthly') collector_task = helpers.create_collector_task(name='takeda_collector', dag=dag) processor_task = helpers.create_processor_task(name='takeda_processor', dag=dag) merge_trials_identifiers_task = helpers.create_processor_task( name='merge_trials_identifiers', dag=dag) processor_task.set_upstream(collector_task) merge_trials_identifiers_task.set_upstream(processor_task)
from datetime import datetime from airflow.models import DAG import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = DAG( dag_id='icdcm', default_args=args, max_active_runs=1, schedule_interval='@monthly' ) collector_task = helpers.create_collector_task( name='icdcm', dag=dag ) processor_task = helpers.create_processor_task( name='icdcm', dag=dag ) processor_task.set_upstream(collector_task)
dag_id='actrn', default_args=args, max_active_runs=1, schedule_interval='@monthly' ) latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='actrn', dag=dag, command='make start actrn 2001-01-01' ) processor_task = helpers.create_processor_task( name='actrn', dag=dag ) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag ) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task) merge_identifiers_and_reindex_task.set_upstream(processor_task)
import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = airflow.models.DAG(dag_id='hra', default_args=args, max_active_runs=1, schedule_interval='@monthly') collector_task = helpers.create_collector_task( name='hra', dag=dag, environment={ 'HRA_ENV': airflow.models.Variable.get('HRA_ENV'), 'HRA_URL': airflow.models.Variable.get('HRA_URL'), 'HRA_USER': airflow.models.Variable.get('HRA_USER'), 'HRA_PASS': airflow.models.Variable.get('HRA_PASS'), }) processor_task = helpers.create_processor_task(name='hra', dag=dag) hra_linker_task = helpers.create_processor_task(name='hra_linker', dag=dag) processor_task.set_upstream(collector_task) hra_linker_task.set_upstream(processor_task)
dag=dag, ) save_nct_xml_to_s3_task = HTTPToS3Transfer( task_id='save_nct_xml_to_s3', dag=dag, url='https://clinicaltrials.gov/search', url_params={ 'resultsxml': 'True', 'rcv_s': '01/01/2001', 'rcv_e': '{{ macros.ds_format(end_date, "%Y-%m-%d", "%d/%m/%Y") }}', }, s3_conn_id='datastore_s3', s3_url=NCT_DATA_URL.replace('http://', 's3://'), ) collector_task = helpers.create_collector_task( name='nct', dag=dag, command='make start nct {url}'.format(url=NCT_DATA_URL)) processor_task = helpers.create_processor_task(name='nct', dag=dag) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag) save_nct_xml_to_s3_task.set_upstream(latest_only_task) collector_task.set_upstream(save_nct_xml_to_s3_task) processor_task.set_upstream(collector_task) merge_identifiers_and_reindex_task.set_upstream(processor_task)
} dag = DAG(dag_id='pubmed', default_args=args, max_active_runs=1, schedule_interval='@monthly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='pubmed', dag=dag, command='make start pubmed 1900-01-01 2100-01-01') unregistered_trials_task = helpers.create_processor_task( name='pubmed_unregistered_trials', dag=dag) trials_remover_task = helpers.create_processor_task(name='trial_remover', dag=dag) pubmed_publications_task = helpers.create_processor_task( name='pubmed_publications', dag=dag) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag) collector_task.set_upstream(latest_only_task) unregistered_trials_task.set_upstream(collector_task) trials_remover_task.set_upstream(unregistered_trials_task) pubmed_publications_task.set_upstream(trials_remover_task) merge_identifiers_and_reindex_task.set_upstream(pubmed_publications_task)
import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.datetime(2017, 4, 1), 'retries': 1, } dag = DAG(dag_id='isrctn', default_args=args, max_active_runs=1, schedule_interval='@monthly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='isrctn', dag=dag, command='make start isrctn 2001-01-01') processor_task = helpers.create_processor_task(name='isrctn', dag=dag) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task) merge_identifiers_and_reindex_task.set_upstream(processor_task)
args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = airflow.models.DAG(dag_id='cochrane_reviews', default_args=args, max_active_runs=1, schedule_interval='@monthly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='cochrane_reviews', dag=dag, environment={ 'COCHRANE_ARCHIVE_URL': airflow.models.Variable.get('COCHRANE_ARCHIVE_URL'), }) processor_task = helpers.create_processor_task(name='cochrane_reviews', dag=dag) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task)
from airflow.models import DAG from airflow.operators.latest_only_operator import LatestOnlyOperator import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.datetime(2017, 3, 1), 'retries': 1, 'retry_delay': datetime.timedelta(minutes=10), } dag = DAG( dag_id='data_contributions', default_args=args, max_active_runs=1, schedule_interval='@daily' ) latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) data_contributions_processor_task = helpers.create_processor_task( name='data_contributions', dag=dag ) data_contributions_processor_task.set_upstream(latest_only_task)
import datetime from airflow.models import DAG from airflow.operators.latest_only_operator import LatestOnlyOperator import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.datetime(2017, 4, 1), 'retries': 1, } dag = DAG(dag_id='pfizer', default_args=args, max_active_runs=1, schedule_interval='@monthly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task(name='pfizer_collector', dag=dag) processor_task = helpers.create_processor_task(name='pfizer_processor', dag=dag) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task)
'depends_on_past': False, 'start_date': datetime.datetime(2017, 1, 1), 'retries': 1, 'retry_delay': datetime.timedelta(minutes=10), } dag = DAG(dag_id='run_all_processors', default_args=args, max_active_runs=1, schedule_interval=None) latest_only = LatestOnlyOperator( task_id='latest_only', dag=dag, ) merge_identifiers_and_reindex = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag) PROCESSORS = [ 'nct', 'euctr', 'hra', 'ictrp', 'isrctn', ] for processor in PROCESSORS: processor_task = helpers.create_processor_task(name=processor, dag=dag) processor_task.set_upstream(latest_only) processor_task.set_downstream(merge_identifiers_and_reindex)
dag = DAG( dag_id='takeda', default_args=args, max_active_runs=1, schedule_interval='@monthly' ) latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='takeda_collector', dag=dag ) processor_task = helpers.create_processor_task( name='takeda_processor', dag=dag ) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag ) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task) merge_identifiers_and_reindex_task.set_upstream(processor_task)
import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = DAG(dag_id='euctr', default_args=args, max_active_runs=1, schedule_interval='@weekly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='euctr', dag=dag, command='make start euctr 2001-01-01') processor_task = helpers.create_processor_task(name='euctr', dag=dag) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task) merge_identifiers_and_reindex_task.set_upstream(processor_task)
import datetime from airflow.models import DAG import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.datetime.utcnow(), 'retries': 1, } dag = DAG(dag_id='pubmed', default_args=args, max_active_runs=1, schedule_interval='@monthly') collector_task = helpers.create_collector_task( name='pubmed', dag=dag, command='make start pubmed 1900-01-01 2100-01-01') processor_task = helpers.create_processor_task(name='pubmed', dag=dag) pubmed_linker_task = helpers.create_processor_task(name='pubmed_linker', dag=dag) processor_task.set_upstream(collector_task) pubmed_linker_task.set_upstream(processor_task)