Ejemplo n.º 1
0
import datetime
from airflow.models import DAG
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.datetime.utcnow(),
    'retries': 1,
}

dag = DAG(dag_id='takeda',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

collector_task = helpers.create_collector_task(name='takeda_collector',
                                               dag=dag)

processor_task = helpers.create_processor_task(name='takeda_processor',
                                               dag=dag)

merge_trials_identifiers_task = helpers.create_processor_task(
    name='merge_trials_identifiers', dag=dag)

processor_task.set_upstream(collector_task)
merge_trials_identifiers_task.set_upstream(processor_task)
Ejemplo n.º 2
0
from datetime import datetime
from airflow.models import DAG
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = DAG(
    dag_id='icdcm',
    default_args=args,
    max_active_runs=1,
    schedule_interval='@monthly'
)

collector_task = helpers.create_collector_task(
    name='icdcm',
    dag=dag
)

processor_task = helpers.create_processor_task(
    name='icdcm',
    dag=dag
)

processor_task.set_upstream(collector_task)
Ejemplo n.º 3
0
    dag_id='actrn',
    default_args=args,
    max_active_runs=1,
    schedule_interval='@monthly'
)

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='actrn',
    dag=dag,
    command='make start actrn 2001-01-01'
)

processor_task = helpers.create_processor_task(
    name='actrn',
    dag=dag
)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex',
    dag=dag
)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
merge_identifiers_and_reindex_task.set_upstream(processor_task)
Ejemplo n.º 4
0
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = airflow.models.DAG(dag_id='hra',
                         default_args=args,
                         max_active_runs=1,
                         schedule_interval='@monthly')

collector_task = helpers.create_collector_task(
    name='hra',
    dag=dag,
    environment={
        'HRA_ENV': airflow.models.Variable.get('HRA_ENV'),
        'HRA_URL': airflow.models.Variable.get('HRA_URL'),
        'HRA_USER': airflow.models.Variable.get('HRA_USER'),
        'HRA_PASS': airflow.models.Variable.get('HRA_PASS'),
    })

processor_task = helpers.create_processor_task(name='hra', dag=dag)

hra_linker_task = helpers.create_processor_task(name='hra_linker', dag=dag)

processor_task.set_upstream(collector_task)
hra_linker_task.set_upstream(processor_task)
Ejemplo n.º 5
0
    dag=dag,
)

save_nct_xml_to_s3_task = HTTPToS3Transfer(
    task_id='save_nct_xml_to_s3',
    dag=dag,
    url='https://clinicaltrials.gov/search',
    url_params={
        'resultsxml': 'True',
        'rcv_s': '01/01/2001',
        'rcv_e': '{{ macros.ds_format(end_date, "%Y-%m-%d", "%d/%m/%Y") }}',
    },
    s3_conn_id='datastore_s3',
    s3_url=NCT_DATA_URL.replace('http://', 's3://'),
)

collector_task = helpers.create_collector_task(
    name='nct',
    dag=dag,
    command='make start nct {url}'.format(url=NCT_DATA_URL))

processor_task = helpers.create_processor_task(name='nct', dag=dag)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex', dag=dag)

save_nct_xml_to_s3_task.set_upstream(latest_only_task)
collector_task.set_upstream(save_nct_xml_to_s3_task)
processor_task.set_upstream(collector_task)
merge_identifiers_and_reindex_task.set_upstream(processor_task)
Ejemplo n.º 6
0
}

dag = DAG(dag_id='pubmed',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='pubmed', dag=dag, command='make start pubmed 1900-01-01 2100-01-01')

unregistered_trials_task = helpers.create_processor_task(
    name='pubmed_unregistered_trials', dag=dag)

trials_remover_task = helpers.create_processor_task(name='trial_remover',
                                                    dag=dag)

pubmed_publications_task = helpers.create_processor_task(
    name='pubmed_publications', dag=dag)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex', dag=dag)

collector_task.set_upstream(latest_only_task)
unregistered_trials_task.set_upstream(collector_task)
trials_remover_task.set_upstream(unregistered_trials_task)
pubmed_publications_task.set_upstream(trials_remover_task)
merge_identifiers_and_reindex_task.set_upstream(pubmed_publications_task)
Ejemplo n.º 7
0
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.datetime(2017, 4, 1),
    'retries': 1,
}

dag = DAG(dag_id='isrctn',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='isrctn', dag=dag, command='make start isrctn 2001-01-01')

processor_task = helpers.create_processor_task(name='isrctn', dag=dag)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex', dag=dag)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
merge_identifiers_and_reindex_task.set_upstream(processor_task)
args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = airflow.models.DAG(dag_id='cochrane_reviews',
                         default_args=args,
                         max_active_runs=1,
                         schedule_interval='@monthly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='cochrane_reviews',
    dag=dag,
    environment={
        'COCHRANE_ARCHIVE_URL':
        airflow.models.Variable.get('COCHRANE_ARCHIVE_URL'),
    })

processor_task = helpers.create_processor_task(name='cochrane_reviews',
                                               dag=dag)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
from airflow.models import DAG
from airflow.operators.latest_only_operator import LatestOnlyOperator
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.datetime(2017, 3, 1),
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=10),
}

dag = DAG(
    dag_id='data_contributions',
    default_args=args,
    max_active_runs=1,
    schedule_interval='@daily'
)

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

data_contributions_processor_task = helpers.create_processor_task(
    name='data_contributions',
    dag=dag
)

data_contributions_processor_task.set_upstream(latest_only_task)
Ejemplo n.º 10
0
import datetime
from airflow.models import DAG
from airflow.operators.latest_only_operator import LatestOnlyOperator
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.datetime(2017, 4, 1),
    'retries': 1,
}

dag = DAG(dag_id='pfizer',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(name='pfizer_collector',
                                               dag=dag)

processor_task = helpers.create_processor_task(name='pfizer_processor',
                                               dag=dag)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
    'depends_on_past': False,
    'start_date': datetime.datetime(2017, 1, 1),
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=10),
}

dag = DAG(dag_id='run_all_processors',
          default_args=args,
          max_active_runs=1,
          schedule_interval=None)

latest_only = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

merge_identifiers_and_reindex = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex', dag=dag)

PROCESSORS = [
    'nct',
    'euctr',
    'hra',
    'ictrp',
    'isrctn',
]
for processor in PROCESSORS:
    processor_task = helpers.create_processor_task(name=processor, dag=dag)
    processor_task.set_upstream(latest_only)
    processor_task.set_downstream(merge_identifiers_and_reindex)
Ejemplo n.º 12
0
dag = DAG(
    dag_id='takeda',
    default_args=args,
    max_active_runs=1,
    schedule_interval='@monthly'
)

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='takeda_collector',
    dag=dag
)

processor_task = helpers.create_processor_task(
    name='takeda_processor',
    dag=dag
)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex',
    dag=dag
)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
merge_identifiers_and_reindex_task.set_upstream(processor_task)
Ejemplo n.º 13
0
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2016, 12, 1),
    'retries': 1,
}

dag = DAG(dag_id='euctr',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@weekly')

latest_only_task = LatestOnlyOperator(
    task_id='latest_only',
    dag=dag,
)

collector_task = helpers.create_collector_task(
    name='euctr', dag=dag, command='make start euctr 2001-01-01')

processor_task = helpers.create_processor_task(name='euctr', dag=dag)

merge_identifiers_and_reindex_task = helpers.create_trigger_subdag_task(
    trigger_dag_id='merge_identifiers_and_reindex', dag=dag)

collector_task.set_upstream(latest_only_task)
processor_task.set_upstream(collector_task)
merge_identifiers_and_reindex_task.set_upstream(processor_task)
Ejemplo n.º 14
0
import datetime
from airflow.models import DAG
import utils.helpers as helpers

args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime.datetime.utcnow(),
    'retries': 1,
}

dag = DAG(dag_id='pubmed',
          default_args=args,
          max_active_runs=1,
          schedule_interval='@monthly')

collector_task = helpers.create_collector_task(
    name='pubmed', dag=dag, command='make start pubmed 1900-01-01 2100-01-01')

processor_task = helpers.create_processor_task(name='pubmed', dag=dag)

pubmed_linker_task = helpers.create_processor_task(name='pubmed_linker',
                                                   dag=dag)

processor_task.set_upstream(collector_task)
pubmed_linker_task.set_upstream(processor_task)