def _run_as_operator(self, fn, **kwargs):
     task = PythonVirtualenvOperator(
         python_callable=fn,
         task_id='task',
         dag=self.dag,
         **kwargs)
     task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
 def _run_as_operator(self, fn, **kwargs):
     task = PythonVirtualenvOperator(
         python_callable=fn,
         task_id='task',
         dag=self.dag,
         **kwargs)
     task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
    def test_dill_warning(self):
        def f():
            pass

        with self.assertRaises(AirflowException):
            PythonVirtualenvOperator(python_callable=f,
                                     task_id='task',
                                     dag=self.dag,
                                     use_dill=True,
                                     system_site_packages=False)
Ejemplo n.º 4
0
    def test_provide_context(self):
        def fn():
            pass

        task = PythonVirtualenvOperator(
            python_callable=fn,
            python_version=sys.version_info[0],
            task_id='task',
            dag=self.dag,
            provide_context=True,
        )
        self.assertTrue(task.provide_context)
Ejemplo n.º 5
0
    def test_config_context(self):
        """
        This test ensures we can use dag_run from the context
        to access the configuration at run time that's being
        passed from the UI, CLI, and REST API.
        """
        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            state=State.RUNNING,
            external_trigger=False,
        )

        def pass_function(**kwargs):
            kwargs['dag_run'].conf

        t = PythonVirtualenvOperator(task_id='config_dag_run',
                                     dag=self.dag,
                                     provide_context=True,
                                     python_callable=pass_function)
        t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
Ejemplo n.º 6
0
                   table=table,
                   s3_bucket=s3_bucket,
                   csv_file=csv_file_name,
                   access_key=credentials.access_key,
                   secret_key=credentials.secret_key,
                   region=region)
    postgres_hook.run(copy_query)
    return True


try:

    api_to_s3 = PythonVirtualenvOperator(
        task_id="task_api_to_s3",
        python_callable=APItoS3,
        requirements=["pandas", "pytz", "boto3"],
        system_site_packages=True,
        dag=(dag),
    )
    s3_to_staging = PythonVirtualenvOperator(
        task_id="task_s3_to_staging",
        python_callable=s3ToRedshift,
        requirements=["pandas"],
        system_site_packages=True,
        dag=(dag),
    )
    upsert = PostgresOperator(task_id='redshift-upsert',
                              sql='queries/analytics/sku_online_status.sql',
                              postgres_conn_id='redshift',
                              dag=(dag))
Ejemplo n.º 7
0
    run_this >> task
# [END howto_operator_python_kwargs]


def callable_virtualenv():
    """
    Example function that will be performed in a virtual environment.

    Importing at the module level ensures that it will not attempt to import the
    library before it is installed.
    """
    from colorama import Fore, Back, Style
    from time import sleep
    print(Fore.RED + 'some red text')
    print(Back.GREEN + 'and with a green background')
    print(Style.DIM + 'and in dim text')
    print(Style.RESET_ALL)
    for _ in range(10):
        print(Style.DIM + 'Please wait...', flush=True)
        sleep(10)
    print('Finished')


virtualenv_task = PythonVirtualenvOperator(
    task_id="virtualenv_python",
    python_callable=callable_virtualenv,
    requirements=["colorama==0.4.0"],
    system_site_packages=False,
    dag=dag,
)
Ejemplo n.º 8
0
from airflow import DAG
from airflow.operators.python_operator import PythonVirtualenvOperator
from loader import ScrapyRunner


from datetime import datetime

default_args = {
    'start_date': datetime(2019, 1, 1)
}

with DAG(dag_id='marketmind_dag', schedule_interval='0 0 * * *', default_args=default_args, catchup=False) as dag:

    scrapy_runner = ScrapyRunner(spider_module='us_exchange')

    t1 = PythonVirtualenvOperator(task_id='us_exchange',
                                  python_version='3.6',
                                  requirements='scrapy==2.0.1',
                                  python_callable=scrapy_runner.run_process(),
                                  spider_name='us_exchange')
    dag=dag,
    python_callable=staging_items_to_redshift,
)

staging_rides_to_redshift_task = PythonOperator(
    task_id='s3_staging_rides_to_redshift',
    dag=dag,
    python_callable=staging_rides_to_redshift,
)

rides_receipts_to_s3_task = PythonVirtualenvOperator(
    task_id='rides_receipts_to_s3',
    python_callable=processing_rides_receipts,
    requirements=[
        "fsspec == 0.8.7", "s3fs == 0.5.2", "bs4==0.0.1", "eml-parser==1.14.4"
    ],
    system_site_packages=True,
    op_kwargs={
        'rides':
        " {{ ti.xcom_pull(task_ids='start_UBER_receipts_processing', key='uber_rides') }}"
    },
    dag=dag)

eats_receipts_to_s3_task = PythonVirtualenvOperator(
    task_id='eats_receipts_to_s3',
    python_callable=processing_eats_receipts,
    requirements=[
        "fsspec == 0.8.7", "s3fs == 0.5.2", "bs4==0.0.1", "eml-parser==1.14.4"
    ],
    system_site_packages=True,
    op_kwargs={
        'eats':
                                    python_callable=check_csv_load,
                                    trigger_rule='all_done',
                                    dag=dag)

CheckCalculateProbability = BranchPythonOperator(
    task_id='CheckCalculateProbability',
    python_callable=check_calculate_probability,
    trigger_rule='all_done',
    dag=dag)
#python3 /home/airflow/gcs/dags/test.py
CallDividendApi = PythonVirtualenvOperator(
    task_id='CallDividendApi',
    python_callable=call_dividend_api,
    requirements=[
        'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary',
        'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas',
        'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage',
        'fsspec', 'gcsfs'
    ],
    python_version='3',
    trigger_rule='all_done',
    dag=dag)
CsvLoad = PythonVirtualenvOperator(
    task_id='CsvLoad',
    python_callable=csv_load,
    requirements=[
        'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary',
        'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas',
        'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage',
        'fsspec', 'gcsfs'
    ],
    python_version='3',
Ejemplo n.º 11
0
    "start_date": datetime(2019, 8, 1),
    "schedule_interval": "30 22 * * *",
    "retries": 3,
    # "depends_on_past": True,
    "catchup": False
}

# Tasks
# 1.  File Watcher
# def watch_for_file():
# 2.  File Transform
# 3.  Load DB
# 4.  Trigger_email

dag = DAG("load_dxb_proj",
          start_date=default_args['start_date'],
          default_args=default_args,
          catchup=False)

with dag:
    check_for_file = FileSensor(filepath="home/ash/landing/del.out",
                                fs_conn_id='fs_default',
                                task_id="s1")
    pythonvenv = PythonVirtualenvOperator(
        task_id="pythonvenv",
        python_callable=run_myprog,
        requirements=['elasticsearch==6.3.1', 'pandas==0.23.4'],
        python_version='3.6')

    check_for_file >> pythonvenv
Ejemplo n.º 12
0
    'retry_delay': timedelta(minutes=5),
    'catchup': False
}

dag = DAG('example_dag_python',
          schedule_interval=timedelta(minutes=5),
          default_args=default_args)


def test_func(**kwargs):
    print("HELLO")


def test_func_two():
    import sys
    print(sys.version)
    print("hi")


t1 = PythonOperator(task_id='test_task',
                    python_callable=test_func,
                    provide_context=True,
                    dag=dag)

t2 = PythonVirtualenvOperator(task_id='test_two',
                              python_version='2',
                              python_callable=test_func_two,
                              dag=dag)

t1 >> t2
Ejemplo n.º 13
0
    python_callable=check_predict_profile,
    trigger_rule='all_done',
    dag=dag)

CheckUpdateWarehouse = BranchPythonOperator(
    task_id='CheckUpdateWarehouse',
    python_callable=check_update_warehouse,
    trigger_rule='all_done',
    dag=dag)
#python3 /home/airflow/gcs/dags/test.py
TrainModel = PythonVirtualenvOperator(
    task_id='TrainModel',
    python_callable=train_model_and_store,
    requirements=[
        'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary',
        'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas',
        'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage',
        'fsspec', 'sklearn', 'gcsfs', 'cloudstorage'
    ],
    python_version='3',
    trigger_rule='all_done',
    dag=dag)
PredictProfile = PythonVirtualenvOperator(
    task_id='PredictProfile',
    python_callable=predict_profile,
    requirements=[
        'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary',
        'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas',
        'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage',
        'fsspec', 'sklearn', 'gcsfs', 'cloudstorage'
    ],
    python_version='3',
Ejemplo n.º 14
0
def callable_virtualenv():
    """
    Example function that will be performed in a virtual environment.
    Importing at the module level ensures that it will not attempt to import the
    library before it is installed.
    """
    import nbschedule
    from nbschedule import worker, operator


    #import pip
    #installed_packages = pip.get_installed_distributions()
    #installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
    #for i in installed_packages
    #    print(installed_packages_list)


virtualenv_task = PythonVirtualenvOperator(
    task_id="virtualenv_python",
    python_callable=callable_virtualenv,
    requirements=[
        "colorama==0.4.0",
        "scir",
        "git+https://github.com/as-sher/papermill.git",
        "git+https://github.com/as-sher/nbschedule.git"
    ],
    system_site_packages=False,
    dag=dag,
)
 def test_lambda(self):
     with self.assertRaises(AirflowException):
         PythonVirtualenvOperator(python_callable=lambda x: 4,
                                  task_id='task',
                                  dag=self.dag)
Ejemplo n.º 16
0
    filename = csv_buf.getvalue()
    s3_hook.load_string(filename, key, bucket, replace=True)
    return True


try:
    customer_retention = PostgresOperator(
        task_id='redshift-customer_retention',
        sql='queries/analytics/customer_retention.sql',
        postgres_conn_id='redshift',
        dag=(dag))

    soh_to_s3 = PythonVirtualenvOperator(
        task_id="task_api_to_s3",
        python_callable=SOHDailyToS3,
        requirements=["pandas"],
        system_site_packages=True,
        dag=(dag),
    )

    customer_repeat_rate = PostgresOperator(
        task_id='redshift-customer_repeat_rate',
        sql='queries/analytics/customer_repeat_rate.sql',
        postgres_conn_id='redshift',
        dag=(dag))

    start = DummyOperator(task_id='start', dag=dag)

    end = DummyOperator(task_id='end', dag=dag)

    start >> [customer_retention, soh_to_s3] >> customer_repeat_rate >> end
Ejemplo n.º 17
0
                'emp_code': df['user_id'][line],
                'index': line
            })
        DOMAIN = "http://10.0.1.49/b/v1"
        ADD_ATT = DOMAIN + "/attendance/add"
        json_data = {
            'attendances': attendances,
            'tz': 'Asia/Kuwait',
            'name': dt_string,
            'db': 'Boutiquaat_Test',
            'login': '******',
            'pswd': 'admin',
        }

        print(json_data, "PPPPPPPPPPPPPP")
        response = requests.post(ADD_ATT, json=json_data)
        print('__________ Response : ')
        pprint(response.json())

    except Exception as e:
        raise AirflowException(e)


virtualenv_task = PythonVirtualenvOperator(
    task_id="Odoo_Bioumetric",
    python_callable=update_finger_data,
    requirements=["pandas"],
    system_site_packages=True,
    dag=(dag),
)