Esempio n. 1
0
        # rsync_task = BashOperator(
        #     task_id='rsync',
        #     bash_command=as_user(rsync_command, USER),
        #     params={'klustadir': KLUSTA_DIR,
        #             'mansortdir': MANSORT_DIR,
        #             'mansorthost': MANSORT_HOST},
        #     dag=dag)

        email_me = EmailOperator(
            task_id='email_me',
            to=default_args['email'],
            subject='%s is complete' % dag_id,
            html_content='You may now manually sort on NIAO',
            dag=dag)

        make_kwd_task.set_upstream(make_klusta_dir_task)
        phy_task.set_upstream(make_kwd_task)
        #merge_events_task.set_upstream(phy_task)
        clear_phy_task.set_upstream(phy_task)
        make_kwik_bak_dir_task.set_upstream(phy_task)
        mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task)
        # make_mansort_dir_task.set_upstream(phy_task)
        # rsync_task.set_upstream(clear_phy_task)
        # rsync_task.set_upstream(mv_kwik_bak_task)
        # rsync_task.set_upstream(make_mansort_dir_task)
        # email_me.set_upstream(rsync_task)
        email_me.set_upstream(mv_kwik_bak_task)
        email_me.set_upstream(clear_phy_task)

        globals()[dag_id] = dag
Esempio n. 2
0
dag = DAG('vs', default_args=default_args, schedule_interval='@once')


chem1_pdb_prot1_pdb = BashOperator(
    task_id='chem1_pdb_prot1_pdb', 
    bash_command="(cd /working-directory; virtualScreening.py -l chem1.pdb -o result -p prot1.pdb) ", 
    dag=dag)

chem1_pdb_prot1_pdb_success_mail = EmailOperator(
    task_id="chem1_pdb_prot1_pdb_success_mail", 
    to=[u'*****@*****.**'],  
    subject="chem1_pdb_prot1_pdb success",  
    html_content="chem1_pdb_prot1_pdb success",  
    dag=dag)
                
chem1_pdb_prot1_pdb_success_mail.set_upstream(chem1_pdb_prot1_pdb)
#chem1_pdb_prot1_pdb.set_upstream( )


chem1_pdb_prot2_pdb = BashOperator(
    task_id='chem1_pdb_prot2_pdb', 
    bash_command="(cd /working-directory; virtualScreening.py -l chem1.pdb -o result -p prot2.pdb) ", 
    dag=dag)

chem1_pdb_prot2_pdb_success_mail = EmailOperator(
    task_id="chem1_pdb_prot2_pdb_success_mail", 
    to=[u'*****@*****.**'],  
    subject="chem1_pdb_prot2_pdb success",  
    html_content="chem1_pdb_prot2_pdb success",  
    dag=dag)
                
dag = DAG(dag_id="connect_to_monary_and_email_operator", default_args=default_args, params=params)


def connect_to_monary_and_email_operator(ds, **kwargs):
    m = Monary()
    pipeline = [{"$group": {"_id": "$state", "totPop": {"$sum": "$pop"}}}]
    states, population = m.aggregate("zips", "data", pipeline, ["_id", "totPop"], ["string:2", "int64"])
    strs = list(map(lambda x: x.decode("utf-8"), states))
    result = list("%s: %d" % (state, pop) for (state, pop) in zip(strs, population))
    print(result)


run_this = PythonOperator(
    task_id="connect_to_monary_and_email_operator",
    provide_context=True,
    python_callable=connect_to_monary_and_email_operator,
    dag=dag,
)

send_email_notification_flow_successful = EmailOperator(
    task_id="send_email_notification_flow_successful",
    to="*****@*****.**",
    subject="custom email from airflow",
    html_content="{{ params['foo'](execution_date) }}",
    params=params,
    dag=dag,
)

send_email_notification_flow_successful.set_upstream(run_this)
Esempio n. 4
0
branching = BranchPythonOperator(task_id='branching',
                                 python_callable=lambda: 'source_count'
                                 if datetime.now().day <= 7 and datetime.today(
                                 ).weekday() == 6 else 'ignore_not_sunday',
                                 dag=dag)
branching.set_upstream(run_this_first)

esucc = EmailOperator(task_id='email_success_' + dag.dag_id,
                      to=email_addr,
                      subject=dag.dag_id + ' [success] on ' +
                      datetime.now().strftime('%Y-%m-%d'),
                      html_content='Congratulation!',
                      trigger_rule='all_success',
                      dag=dag)

source_count = BashOperator(
    task_id='source_count',
    bash_command='/disk1/source_data_count; ./daily_table_count.sh > out.log ',
    dag=dag)

source_count.set_upstream(branching)
esucc.set_upstream(source_count)

ignore_not_sunday = DummyOperator(task_id='ignore_not_sunday', dag=dag)
ignore_not_sunday.set_upstream(branching)

join = DummyOperator(task_id='join', trigger_rule='all_success', dag=dag)
join << ignore_not_sunday
join << esucc
Esempio n. 5
0
                    'mansortdir': MANSORT_DIR},
            dag=dag)

        email_me = EmailOperator(
            task_id='email_me',
            to=default_args['email'],
            subject='%s is complete' % dag_id,
            html_content='You may now manually sort on NIAO',
            dag=dag)

        slack_it = SlackAPIPostOperator(
            task_id='slack_it',
            token=SLACK_TOKEN,
            text='%s is complete' % dag_id,
            channel='#ephys',
            dag=dag)

        make_kwd_task.set_upstream(make_klusta_dir_task)
        phy_task.set_upstream(make_kwd_task)
        #merge_events_task.set_upstream(phy_task)
        clear_phy_task.set_upstream(phy_task)
        make_kwik_bak_dir_task.set_upstream(phy_task)
        mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task)
        #rsync_task.set_upstream(merge_events_task)
        rsync_task.set_upstream(clear_phy_task)
        rsync_task.set_upstream(mv_kwik_bak_task)
        email_me.set_upstream(rsync_task)
        slack_it.set_upstream(rsync_task)
     
        globals()[dag_id] = dag
Esempio n. 6
0
            params={'rasterdir': RASTER_DIR},
            on_success_callback=lambda c: set_perms(c['params']['rasterdir'],
                                                    default_args['owner']),
            dag=dag)

        make_raster_task = BashOperator(task_id='make_rasters',
                                        bash_command=make_raster_cmd,
                                        env={'PATH': ANACONDA_PATH},
                                        params={
                                            'postphydir': POSTPHY_DIR,
                                            'ecanalysispath': ECANALYSIS_PATH,
                                            'rasterdir': RASTER_DIR
                                        },
                                        dag=dag)

        ############ Report Completion
        email_me = EmailOperator(task_id='email_me',
                                 to=default_args['email'],
                                 subject='%s is merged' % dag_id,
                                 html_content='You may commence analysis.',
                                 dag=dag)

        rsync_task.set_upstream(make_postphy_dir_task)
        merge_events_task.set_upstream(rsync_task)
        kwik2pandas_task.set_upstream(merge_events_task)
        email_me.set_upstream(kwik2pandas_task)
        make_raster_dir_task.set_upstream(kwik2pandas_task)
        make_raster_task.set_upstream(make_raster_dir_task)

        globals()[dag_id] = dag
Esempio n. 7
0
]
# copy table to bi
#bitables = ['hardware', 'hardwareios']
bitables = []

for table in tables:
    imp = BashOperator(
        task_id='import_' + table,
        bash_command=
        '/disk1/bdl/etl/ETL/imp_mongo_doc_with_date_input.sh {table} {begin} {end} > /disk1/bdl/etl/ETL/log/{table}.log '
        .format(table=table, begin='{{ ds }}', end='{{ tomorrow_ds }}'),
        dag=dag)
    if table in bitables:
        bimp = BashOperator(
            task_id='send_2_bi_' + table,
            bash_command=
            '/disk1/bdl/etl/ETL/send_bi_impala_with_date_input.sh {table} {begin} {end}  > /disk1/bdl/etl/ETL/log/BI/{table}.log '
            .format(table=table, begin='{{ ds }}', end='{{ tomorrow_ds }}'),
            dag=dag)
        bimp.set_upstream(imp)
        esucc.set_upstream(bimp)
    else:
        esucc.set_upstream(imp)

imp_software = BashOperator(
    task_id='import_software',
    bash_command=
    '/disk1/bdl/etl/ETL/imp_software_doc_with_date_input.sh {{ ds }} {{ tomorrow_ds }} > /disk1/bdl/etl/ETL/log/software.log ',
    dag=dag)
esucc.set_upstream(imp_software)
Esempio n. 8
0
dag = DAG(dag_id='connect_to_monary_and_email_operator',
          default_args=default_args,
          params=params)


def connect_to_monary_and_email_operator(ds, **kwargs):
    m = Monary()
    pipeline = [{"$group": {"_id": "$state", "totPop": {"$sum": "$pop"}}}]
    states, population = m.aggregate("zips", "data", pipeline,
                                     ["_id", "totPop"], ["string:2", "int64"])
    strs = list(map(lambda x: x.decode("utf-8"), states))
    result = list("%s: %d" % (state, pop)
                  for (state, pop) in zip(strs, population))
    print(result)


run_this = PythonOperator(task_id='connect_to_monary_and_email_operator',
                          provide_context=True,
                          python_callable=connect_to_monary_and_email_operator,
                          dag=dag)

send_email_notification_flow_successful = EmailOperator(
    task_id='send_email_notification_flow_successful',
    to="*****@*****.**",
    subject='custom email from airflow',
    html_content="{{ params['foo'](execution_date) }}",
    params=params,
    dag=dag)

send_email_notification_flow_successful.set_upstream(run_this)
simple_search = PythonOperator(task_id='search_twitter',
                               provide_context=True,
                               python_callable=search_twitter,
                               dag=dag,
                               params={'query': '#python'})


move_tweets_to_sqlite = PythonOperator(task_id='csv_to_sqlite',
                                       provide_context=True,
                                       python_callable=csv_to_sqlite,
                                       dag=dag)


id_popular = PythonOperator(task_id='identify_popular_links',
                            provide_context=True,
                            python_callable=identify_popular_links,
                            dag=dag)


email_links = EmailOperator(task_id='email_best_links',
                            to='*****@*****.**',
                            subject='Latest popular links',
                            html_content='Check out the latest!!',
                            files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)],
                            dag=dag)


simple_search.set_downstream(move_tweets_to_sqlite)
id_popular.set_upstream(move_tweets_to_sqlite)
email_links.set_upstream(id_popular)
Esempio n. 10
0
                                      'mansortdir': MANSORT_DIR
                                  },
                                  dag=dag)

        email_me = EmailOperator(
            task_id='email_me',
            to=default_args['email'],
            subject='%s is complete' % dag_id,
            html_content='You may now manually sort on NIAO',
            dag=dag)

        slack_it = SlackAPIPostOperator(task_id='slack_it',
                                        token=SLACK_TOKEN,
                                        text='%s is complete' % dag_id,
                                        channel='#ephys',
                                        dag=dag)

        make_kwd_task.set_upstream(make_klusta_dir_task)
        phy_task.set_upstream(make_kwd_task)
        #merge_events_task.set_upstream(phy_task)
        clear_phy_task.set_upstream(phy_task)
        make_kwik_bak_dir_task.set_upstream(phy_task)
        mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task)
        #rsync_task.set_upstream(merge_events_task)
        rsync_task.set_upstream(clear_phy_task)
        rsync_task.set_upstream(mv_kwik_bak_task)
        email_me.set_upstream(rsync_task)
        slack_it.set_upstream(rsync_task)

        globals()[dag_id] = dag
Esempio n. 11
0
        #     task_id='rsync',
        #     bash_command=as_user(rsync_command, USER),
        #     params={'klustadir': KLUSTA_DIR,
        #             'mansortdir': MANSORT_DIR,
        #             'mansorthost': MANSORT_HOST},
        #     dag=dag)

        email_me = EmailOperator(
            task_id='email_me',
            to=default_args['email'],
            subject='%s is complete' % dag_id,
            html_content='You may now manually sort on NIAO',
            dag=dag)


        make_kwd_task.set_upstream(make_klusta_dir_task)
        phy_task.set_upstream(make_kwd_task)
        #merge_events_task.set_upstream(phy_task)
        clear_phy_task.set_upstream(phy_task)
        make_kwik_bak_dir_task.set_upstream(phy_task)
        mv_kwik_bak_task.set_upstream(make_kwik_bak_dir_task)
        # make_mansort_dir_task.set_upstream(phy_task)
        # rsync_task.set_upstream(clear_phy_task)
        # rsync_task.set_upstream(mv_kwik_bak_task)
        # rsync_task.set_upstream(make_mansort_dir_task)
        # email_me.set_upstream(rsync_task)
        email_me.set_upstream(mv_kwik_bak_task)
        email_me.set_upstream(clear_phy_task)
     
        globals()[dag_id] = dag
Esempio n. 12
0
    'owner': 'airflow',
    'start_date': datetime.now() - timedelta(seconds=10),
    'retries': 0
}

dag = DAG('Sales_Nov',
          default_args=default_args,
          start_date=datetime.now() - timedelta(seconds=10))

op1 = DummyOperator(task_id='File1_landing', dag=dag)
t1 = EmailOperator(task_id='Processing_File_1',
                   to='*****@*****.**',
                   subject="Airflow_report",
                   html_content="File 1 started",
                   dag=dag)
op2 = DummyOperator(task_id='File2_landing', dag=dag)
t2 = EmailOperator(task_id='Processing_File_2',
                   to='*****@*****.**',
                   subject="Airflow_report",
                   html_content="File 2 started",
                   dag=dag)

op3 = DummyOperator(task_id='Aggregating', dag=dag)
op4 = DummyOperator(task_id='Final_Table_Push', dag=dag)

t1.set_upstream(op1)
t2.set_upstream(op2)
op3.set_upstream(t1)
op3.set_upstream(t2)
op4.set_upstream(op3)
Esempio n. 13
0
            params={'rasterdir': RASTER_DIR},
            on_success_callback = lambda c: set_perms(c['params']['rasterdir'],default_args['owner']), 
            dag=dag)

        make_raster_task = BashOperator(
            task_id='make_rasters',
            bash_command=make_raster_cmd,
            env={'PATH': ANACONDA_PATH},
            params={'postphydir': POSTPHY_DIR,
                    'ecanalysispath': ECANALYSIS_PATH,
                    'rasterdir': RASTER_DIR},
            dag=dag)

    ############ Report Completion
        email_me = EmailOperator(
            task_id='email_me',
            to=default_args['email'],
            subject='%s is merged' % dag_id,
            html_content='You may commence analysis.',
            dag=dag)


        rsync_task.set_upstream(make_postphy_dir_task)
        merge_events_task.set_upstream(rsync_task)
        kwik2pandas_task.set_upstream(merge_events_task)
        email_me.set_upstream(kwik2pandas_task)
        make_raster_dir_task.set_upstream(kwik2pandas_task)
        make_raster_task.set_upstream(make_raster_dir_task)
     
        globals()[dag_id] = dag
Esempio n. 14
0
        wrtr.writerow(['url', 'count'])
        wrtr.writerows(cntr.most_common(5))


simple_search = PythonOperator(task_id='search_twitter',
                               provide_context=True,
                               python_callable=search_twitter,
                               dag=dag,
                               params={'query': '#python'})

move_tweets_to_sqlite = PythonOperator(task_id='csv_to_sqlite',
                                       provide_context=True,
                                       python_callable=csv_to_sqlite,
                                       dag=dag)

id_popular = PythonOperator(task_id='identify_popular_links',
                            provide_context=True,
                            python_callable=identify_popular_links,
                            dag=dag)

email_links = EmailOperator(task_id='email_best_links',
                            to='*****@*****.**',
                            subject='Latest popular links',
                            html_content='Check out the latest!!',
                            files=['{}/latest_links.txt'.format(RAW_TWEET_DIR)],
                            dag=dag)

simple_search.set_downstream(move_tweets_to_sqlite)
id_popular.set_upstream(move_tweets_to_sqlite)
email_links.set_upstream(id_popular)