def _run_as_operator(self, fn, **kwargs): task = PythonVirtualenvOperator( python_callable=fn, task_id='task', dag=self.dag, **kwargs) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
def test_dill_warning(self): def f(): pass with self.assertRaises(AirflowException): PythonVirtualenvOperator(python_callable=f, task_id='task', dag=self.dag, use_dill=True, system_site_packages=False)
def test_provide_context(self): def fn(): pass task = PythonVirtualenvOperator( python_callable=fn, python_version=sys.version_info[0], task_id='task', dag=self.dag, provide_context=True, ) self.assertTrue(task.provide_context)
def test_config_context(self): """ This test ensures we can use dag_run from the context to access the configuration at run time that's being passed from the UI, CLI, and REST API. """ self.dag.create_dagrun( run_id='manual__' + DEFAULT_DATE.isoformat(), execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, state=State.RUNNING, external_trigger=False, ) def pass_function(**kwargs): kwargs['dag_run'].conf t = PythonVirtualenvOperator(task_id='config_dag_run', dag=self.dag, provide_context=True, python_callable=pass_function) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
table=table, s3_bucket=s3_bucket, csv_file=csv_file_name, access_key=credentials.access_key, secret_key=credentials.secret_key, region=region) postgres_hook.run(copy_query) return True try: api_to_s3 = PythonVirtualenvOperator( task_id="task_api_to_s3", python_callable=APItoS3, requirements=["pandas", "pytz", "boto3"], system_site_packages=True, dag=(dag), ) s3_to_staging = PythonVirtualenvOperator( task_id="task_s3_to_staging", python_callable=s3ToRedshift, requirements=["pandas"], system_site_packages=True, dag=(dag), ) upsert = PostgresOperator(task_id='redshift-upsert', sql='queries/analytics/sku_online_status.sql', postgres_conn_id='redshift', dag=(dag))
run_this >> task # [END howto_operator_python_kwargs] def callable_virtualenv(): """ Example function that will be performed in a virtual environment. Importing at the module level ensures that it will not attempt to import the library before it is installed. """ from colorama import Fore, Back, Style from time import sleep print(Fore.RED + 'some red text') print(Back.GREEN + 'and with a green background') print(Style.DIM + 'and in dim text') print(Style.RESET_ALL) for _ in range(10): print(Style.DIM + 'Please wait...', flush=True) sleep(10) print('Finished') virtualenv_task = PythonVirtualenvOperator( task_id="virtualenv_python", python_callable=callable_virtualenv, requirements=["colorama==0.4.0"], system_site_packages=False, dag=dag, )
from airflow import DAG from airflow.operators.python_operator import PythonVirtualenvOperator from loader import ScrapyRunner from datetime import datetime default_args = { 'start_date': datetime(2019, 1, 1) } with DAG(dag_id='marketmind_dag', schedule_interval='0 0 * * *', default_args=default_args, catchup=False) as dag: scrapy_runner = ScrapyRunner(spider_module='us_exchange') t1 = PythonVirtualenvOperator(task_id='us_exchange', python_version='3.6', requirements='scrapy==2.0.1', python_callable=scrapy_runner.run_process(), spider_name='us_exchange')
dag=dag, python_callable=staging_items_to_redshift, ) staging_rides_to_redshift_task = PythonOperator( task_id='s3_staging_rides_to_redshift', dag=dag, python_callable=staging_rides_to_redshift, ) rides_receipts_to_s3_task = PythonVirtualenvOperator( task_id='rides_receipts_to_s3', python_callable=processing_rides_receipts, requirements=[ "fsspec == 0.8.7", "s3fs == 0.5.2", "bs4==0.0.1", "eml-parser==1.14.4" ], system_site_packages=True, op_kwargs={ 'rides': " {{ ti.xcom_pull(task_ids='start_UBER_receipts_processing', key='uber_rides') }}" }, dag=dag) eats_receipts_to_s3_task = PythonVirtualenvOperator( task_id='eats_receipts_to_s3', python_callable=processing_eats_receipts, requirements=[ "fsspec == 0.8.7", "s3fs == 0.5.2", "bs4==0.0.1", "eml-parser==1.14.4" ], system_site_packages=True, op_kwargs={ 'eats':
python_callable=check_csv_load, trigger_rule='all_done', dag=dag) CheckCalculateProbability = BranchPythonOperator( task_id='CheckCalculateProbability', python_callable=check_calculate_probability, trigger_rule='all_done', dag=dag) #python3 /home/airflow/gcs/dags/test.py CallDividendApi = PythonVirtualenvOperator( task_id='CallDividendApi', python_callable=call_dividend_api, requirements=[ 'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary', 'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas', 'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage', 'fsspec', 'gcsfs' ], python_version='3', trigger_rule='all_done', dag=dag) CsvLoad = PythonVirtualenvOperator( task_id='CsvLoad', python_callable=csv_load, requirements=[ 'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary', 'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas', 'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage', 'fsspec', 'gcsfs' ], python_version='3',
"start_date": datetime(2019, 8, 1), "schedule_interval": "30 22 * * *", "retries": 3, # "depends_on_past": True, "catchup": False } # Tasks # 1. File Watcher # def watch_for_file(): # 2. File Transform # 3. Load DB # 4. Trigger_email dag = DAG("load_dxb_proj", start_date=default_args['start_date'], default_args=default_args, catchup=False) with dag: check_for_file = FileSensor(filepath="home/ash/landing/del.out", fs_conn_id='fs_default', task_id="s1") pythonvenv = PythonVirtualenvOperator( task_id="pythonvenv", python_callable=run_myprog, requirements=['elasticsearch==6.3.1', 'pandas==0.23.4'], python_version='3.6') check_for_file >> pythonvenv
'retry_delay': timedelta(minutes=5), 'catchup': False } dag = DAG('example_dag_python', schedule_interval=timedelta(minutes=5), default_args=default_args) def test_func(**kwargs): print("HELLO") def test_func_two(): import sys print(sys.version) print("hi") t1 = PythonOperator(task_id='test_task', python_callable=test_func, provide_context=True, dag=dag) t2 = PythonVirtualenvOperator(task_id='test_two', python_version='2', python_callable=test_func_two, dag=dag) t1 >> t2
python_callable=check_predict_profile, trigger_rule='all_done', dag=dag) CheckUpdateWarehouse = BranchPythonOperator( task_id='CheckUpdateWarehouse', python_callable=check_update_warehouse, trigger_rule='all_done', dag=dag) #python3 /home/airflow/gcs/dags/test.py TrainModel = PythonVirtualenvOperator( task_id='TrainModel', python_callable=train_model_and_store, requirements=[ 'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary', 'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas', 'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage', 'fsspec', 'sklearn', 'gcsfs', 'cloudstorage' ], python_version='3', trigger_rule='all_done', dag=dag) PredictProfile = PythonVirtualenvOperator( task_id='PredictProfile', python_callable=predict_profile, requirements=[ 'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary', 'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas', 'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage', 'fsspec', 'sklearn', 'gcsfs', 'cloudstorage' ], python_version='3',
def callable_virtualenv(): """ Example function that will be performed in a virtual environment. Importing at the module level ensures that it will not attempt to import the library before it is installed. """ import nbschedule from nbschedule import worker, operator #import pip #installed_packages = pip.get_installed_distributions() #installed_packages_list = sorted(["%s==%s" % (i.key, i.version) #for i in installed_packages # print(installed_packages_list) virtualenv_task = PythonVirtualenvOperator( task_id="virtualenv_python", python_callable=callable_virtualenv, requirements=[ "colorama==0.4.0", "scir", "git+https://github.com/as-sher/papermill.git", "git+https://github.com/as-sher/nbschedule.git" ], system_site_packages=False, dag=dag, )
def test_lambda(self): with self.assertRaises(AirflowException): PythonVirtualenvOperator(python_callable=lambda x: 4, task_id='task', dag=self.dag)
filename = csv_buf.getvalue() s3_hook.load_string(filename, key, bucket, replace=True) return True try: customer_retention = PostgresOperator( task_id='redshift-customer_retention', sql='queries/analytics/customer_retention.sql', postgres_conn_id='redshift', dag=(dag)) soh_to_s3 = PythonVirtualenvOperator( task_id="task_api_to_s3", python_callable=SOHDailyToS3, requirements=["pandas"], system_site_packages=True, dag=(dag), ) customer_repeat_rate = PostgresOperator( task_id='redshift-customer_repeat_rate', sql='queries/analytics/customer_repeat_rate.sql', postgres_conn_id='redshift', dag=(dag)) start = DummyOperator(task_id='start', dag=dag) end = DummyOperator(task_id='end', dag=dag) start >> [customer_retention, soh_to_s3] >> customer_repeat_rate >> end
'emp_code': df['user_id'][line], 'index': line }) DOMAIN = "http://10.0.1.49/b/v1" ADD_ATT = DOMAIN + "/attendance/add" json_data = { 'attendances': attendances, 'tz': 'Asia/Kuwait', 'name': dt_string, 'db': 'Boutiquaat_Test', 'login': '******', 'pswd': 'admin', } print(json_data, "PPPPPPPPPPPPPP") response = requests.post(ADD_ATT, json=json_data) print('__________ Response : ') pprint(response.json()) except Exception as e: raise AirflowException(e) virtualenv_task = PythonVirtualenvOperator( task_id="Odoo_Bioumetric", python_callable=update_finger_data, requirements=["pandas"], system_site_packages=True, dag=(dag), )