dag = DAG( dag_id='actrn', default_args=args, max_active_runs=1, schedule_interval='@monthly' ) latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='actrn', dag=dag, command='make start actrn 2001-01-01' ) processor_task = helpers.create_processor_task( name='actrn', dag=dag ) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag ) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task)
import datetime from airflow.models import DAG import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.datetime.utcnow(), 'retries': 1, } dag = DAG(dag_id='takeda', default_args=args, max_active_runs=1, schedule_interval='@monthly') collector_task = helpers.create_collector_task(name='takeda_collector', dag=dag) processor_task = helpers.create_processor_task(name='takeda_processor', dag=dag) merge_trials_identifiers_task = helpers.create_processor_task( name='merge_trials_identifiers', dag=dag) processor_task.set_upstream(collector_task) merge_trials_identifiers_task.set_upstream(processor_task)
dag=dag, ) save_nct_xml_to_s3_task = HTTPToS3Transfer( task_id='save_nct_xml_to_s3', dag=dag, url='https://clinicaltrials.gov/search', url_params={ 'resultsxml': 'True', 'rcv_s': '01/01/2001', 'rcv_e': '{{ macros.ds_format(end_date, "%Y-%m-%d", "%d/%m/%Y") }}', }, s3_conn_id='datastore_s3', s3_url=NCT_DATA_URL.replace('http://', 's3://'), ) collector_task = helpers.create_collector_task( name='nct', dag=dag, command='make start nct {url}'.format(url=NCT_DATA_URL)) processor_task = helpers.create_processor_task(name='nct', dag=dag) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag) save_nct_xml_to_s3_task.set_upstream(latest_only_task) collector_task.set_upstream(save_nct_xml_to_s3_task) processor_task.set_upstream(collector_task) merge_identifiers_and_reindex_task.set_upstream(processor_task)
from datetime import datetime from airflow.models import DAG import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = DAG( dag_id='icdcm', default_args=args, max_active_runs=1, schedule_interval='@monthly' ) collector_task = helpers.create_collector_task( name='icdcm', dag=dag ) processor_task = helpers.create_processor_task( name='icdcm', dag=dag ) processor_task.set_upstream(collector_task)
from datetime import datetime from airflow.models import DAG import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = DAG(dag_id='gsk', default_args=args, max_active_runs=1, schedule_interval='@monthly') collector_task = helpers.create_collector_task( name='gsk', dag=dag, command='make start gsk 2001-01-01') processor_task = helpers.create_processor_task(name='gsk', dag=dag) merge_trials_identifiers_task = helpers.create_processor_task( name='merge_trials_identifiers', dag=dag) processor_task.set_upstream(collector_task) merge_trials_identifiers_task.set_upstream(processor_task)
from datetime import datetime from airflow.models import DAG import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = DAG(dag_id='euctr', default_args=args, max_active_runs=1, schedule_interval='@monthly') collector_task = helpers.create_collector_task( name='euctr', dag=dag, command='make start euctr 2001-01-01') processor_task = helpers.create_processor_task(name='euctr', dag=dag) merge_trials_identifiers_task = helpers.create_processor_task( name='merge_trials_identifiers', dag=dag) processor_task.set_upstream(collector_task) merge_trials_identifiers_task.set_upstream(processor_task)
import datetime from airflow.models import DAG import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.datetime.utcnow(), 'retries': 1, } dag = DAG(dag_id='isrctn', default_args=args, max_active_runs=1, schedule_interval='@monthly') collector_task = helpers.create_collector_task( name='isrctn', dag=dag, command='make start isrctn 2001-01-01') processor_task = helpers.create_processor_task(name='isrctn', dag=dag) merge_trials_identifiers_task = helpers.create_processor_task( name='merge_trials_identifiers', dag=dag) processor_task.set_upstream(collector_task) merge_trials_identifiers_task.set_upstream(processor_task)
'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = airflow.models.DAG(dag_id='ictrp', default_args=args, max_active_runs=1, schedule_interval='@monthly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='ictrp', dag=dag, environment={ 'ICTRP_USER': airflow.models.Variable.get('ICTRP_USER'), 'ICTRP_PASS': airflow.models.Variable.get('ICTRP_PASS'), }) processor_task = helpers.create_processor_task(name='ictrp', dag=dag) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task) merge_identifiers_and_reindex_task.set_upstream(processor_task)
dag = DAG( dag_id='jprn', default_args=args, max_active_runs=1, schedule_interval='@monthly' ) latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='jprn', dag=dag, command='make start jprn 1 100000' ) processor_task = helpers.create_processor_task( name='jprn', dag=dag ) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag ) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task)
import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = airflow.models.DAG(dag_id='hra', default_args=args, max_active_runs=1, schedule_interval='@monthly') collector_task = helpers.create_collector_task( name='hra', dag=dag, environment={ 'HRA_ENV': airflow.models.Variable.get('HRA_ENV'), 'HRA_URL': airflow.models.Variable.get('HRA_URL'), 'HRA_USER': airflow.models.Variable.get('HRA_USER'), 'HRA_PASS': airflow.models.Variable.get('HRA_PASS'), }) processor_task = helpers.create_processor_task(name='hra', dag=dag) hra_linker_task = helpers.create_processor_task(name='hra_linker', dag=dag) processor_task.set_upstream(collector_task) hra_linker_task.set_upstream(processor_task)
'depends_on_past': False, 'start_date': datetime.datetime(2017, 4, 1), 'retries': 1, } dag = DAG(dag_id='pubmed', default_args=args, max_active_runs=1, schedule_interval='@monthly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='pubmed', dag=dag, command='make start pubmed 1900-01-01 2100-01-01') unregistered_trials_task = helpers.create_processor_task( name='pubmed_unregistered_trials', dag=dag) trials_remover_task = helpers.create_processor_task(name='trial_remover', dag=dag) pubmed_publications_task = helpers.create_processor_task( name='pubmed_publications', dag=dag) merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task( trigger_dag_id='merge_identifiers_and_reindex', dag=dag) collector_task.set_upstream(latest_only_task) unregistered_trials_task.set_upstream(collector_task)
args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2016, 12, 1), 'retries': 1, } dag = airflow.models.DAG(dag_id='cochrane_reviews', default_args=args, max_active_runs=1, schedule_interval='@monthly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task( name='cochrane_reviews', dag=dag, environment={ 'COCHRANE_ARCHIVE_URL': airflow.models.Variable.get('COCHRANE_ARCHIVE_URL'), }) processor_task = helpers.create_processor_task(name='cochrane_reviews', dag=dag) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task)
import datetime from airflow.models import DAG from airflow.operators.latest_only_operator import LatestOnlyOperator import utils.helpers as helpers args = { 'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime.datetime(2017, 4, 1), 'retries': 1, } dag = DAG(dag_id='pfizer', default_args=args, max_active_runs=1, schedule_interval='@monthly') latest_only_task = LatestOnlyOperator( task_id='latest_only', dag=dag, ) collector_task = helpers.create_collector_task(name='pfizer_collector', dag=dag) processor_task = helpers.create_processor_task(name='pfizer_processor', dag=dag) collector_task.set_upstream(latest_only_task) processor_task.set_upstream(collector_task)