def _run_as_operator(self, fn, **kwargs):
     task = PythonVirtualenvOperator(
         python_callable=fn,
         task_id='task',
         dag=self.dag,
         **kwargs)
     task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
コード例 #2
0
 def _run_as_operator(self, fn, **kwargs):
     task = PythonVirtualenvOperator(
         python_callable=fn,
         task_id='task',
         dag=self.dag,
         **kwargs)
     task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
コード例 #3
0
    def test_dill_warning(self):
        def f():
            pass

        with self.assertRaises(AirflowException):
            PythonVirtualenvOperator(python_callable=f,
                                     task_id='task',
                                     dag=self.dag,
                                     use_dill=True,
                                     system_site_packages=False)
コード例 #4
0
    def test_provide_context(self):
        def fn():
            pass

        task = PythonVirtualenvOperator(
            python_callable=fn,
            python_version=sys.version_info[0],
            task_id='task',
            dag=self.dag,
            provide_context=True,
        )
        self.assertTrue(task.provide_context)
コード例 #5
0
    def test_config_context(self):
        """
        This test ensures we can use dag_run from the context
        to access the configuration at run time that's being
        passed from the UI, CLI, and REST API.
        """
        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=DEFAULT_DATE,
            state=State.RUNNING,
            external_trigger=False,
        )

        def pass_function(**kwargs):
            kwargs['dag_run'].conf

        t = PythonVirtualenvOperator(task_id='config_dag_run',
                                     dag=self.dag,
                                     provide_context=True,
                                     python_callable=pass_function)
        t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
コード例 #6
0
                   table=table,
                   s3_bucket=s3_bucket,
                   csv_file=csv_file_name,
                   access_key=credentials.access_key,
                   secret_key=credentials.secret_key,
                   region=region)
    postgres_hook.run(copy_query)
    return True


try:

    api_to_s3 = PythonVirtualenvOperator(
        task_id="task_api_to_s3",
        python_callable=APItoS3,
        requirements=["pandas", "pytz", "boto3"],
        system_site_packages=True,
        dag=(dag),
    )
    s3_to_staging = PythonVirtualenvOperator(
        task_id="task_s3_to_staging",
        python_callable=s3ToRedshift,
        requirements=["pandas"],
        system_site_packages=True,
        dag=(dag),
    )
    upsert = PostgresOperator(task_id='redshift-upsert',
                              sql='queries/analytics/sku_online_status.sql',
                              postgres_conn_id='redshift',
                              dag=(dag))
コード例 #7
0
    run_this >> task
# [END howto_operator_python_kwargs]


def callable_virtualenv():
    """
    Example function that will be performed in a virtual environment.

    Importing at the module level ensures that it will not attempt to import the
    library before it is installed.
    """
    from colorama import Fore, Back, Style
    from time import sleep
    print(Fore.RED + 'some red text')
    print(Back.GREEN + 'and with a green background')
    print(Style.DIM + 'and in dim text')
    print(Style.RESET_ALL)
    for _ in range(10):
        print(Style.DIM + 'Please wait...', flush=True)
        sleep(10)
    print('Finished')


virtualenv_task = PythonVirtualenvOperator(
    task_id="virtualenv_python",
    python_callable=callable_virtualenv,
    requirements=["colorama==0.4.0"],
    system_site_packages=False,
    dag=dag,
)
コード例 #8
0
from airflow import DAG
from airflow.operators.python_operator import PythonVirtualenvOperator
from loader import ScrapyRunner


from datetime import datetime

default_args = {
    'start_date': datetime(2019, 1, 1)
}

with DAG(dag_id='marketmind_dag', schedule_interval='0 0 * * *', default_args=default_args, catchup=False) as dag:

    scrapy_runner = ScrapyRunner(spider_module='us_exchange')

    t1 = PythonVirtualenvOperator(task_id='us_exchange',
                                  python_version='3.6',
                                  requirements='scrapy==2.0.1',
                                  python_callable=scrapy_runner.run_process(),
                                  spider_name='us_exchange')
コード例 #9
0
    dag=dag,
    python_callable=staging_items_to_redshift,
)

staging_rides_to_redshift_task = PythonOperator(
    task_id='s3_staging_rides_to_redshift',
    dag=dag,
    python_callable=staging_rides_to_redshift,
)

rides_receipts_to_s3_task = PythonVirtualenvOperator(
    task_id='rides_receipts_to_s3',
    python_callable=processing_rides_receipts,
    requirements=[
        "fsspec == 0.8.7", "s3fs == 0.5.2", "bs4==0.0.1", "eml-parser==1.14.4"
    ],
    system_site_packages=True,
    op_kwargs={
        'rides':
        " {{ ti.xcom_pull(task_ids='start_UBER_receipts_processing', key='uber_rides') }}"
    },
    dag=dag)

eats_receipts_to_s3_task = PythonVirtualenvOperator(
    task_id='eats_receipts_to_s3',
    python_callable=processing_eats_receipts,
    requirements=[
        "fsspec == 0.8.7", "s3fs == 0.5.2", "bs4==0.0.1", "eml-parser==1.14.4"
    ],
    system_site_packages=True,
    op_kwargs={
        'eats':
コード例 #10
0
                                    python_callable=check_csv_load,
                                    trigger_rule='all_done',
                                    dag=dag)

CheckCalculateProbability = BranchPythonOperator(
    task_id='CheckCalculateProbability',
    python_callable=check_calculate_probability,
    trigger_rule='all_done',
    dag=dag)
#python3 /home/airflow/gcs/dags/test.py
CallDividendApi = PythonVirtualenvOperator(
    task_id='CallDividendApi',
    python_callable=call_dividend_api,
    requirements=[
        'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary',
        'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas',
        'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage',
        'fsspec', 'gcsfs'
    ],
    python_version='3',
    trigger_rule='all_done',
    dag=dag)
CsvLoad = PythonVirtualenvOperator(
    task_id='CsvLoad',
    python_callable=csv_load,
    requirements=[
        'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary',
        'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas',
        'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage',
        'fsspec', 'gcsfs'
    ],
    python_version='3',
コード例 #11
0
    "start_date": datetime(2019, 8, 1),
    "schedule_interval": "30 22 * * *",
    "retries": 3,
    # "depends_on_past": True,
    "catchup": False
}

# Tasks
# 1.  File Watcher
# def watch_for_file():
# 2.  File Transform
# 3.  Load DB
# 4.  Trigger_email

dag = DAG("load_dxb_proj",
          start_date=default_args['start_date'],
          default_args=default_args,
          catchup=False)

with dag:
    check_for_file = FileSensor(filepath="home/ash/landing/del.out",
                                fs_conn_id='fs_default',
                                task_id="s1")
    pythonvenv = PythonVirtualenvOperator(
        task_id="pythonvenv",
        python_callable=run_myprog,
        requirements=['elasticsearch==6.3.1', 'pandas==0.23.4'],
        python_version='3.6')

    check_for_file >> pythonvenv
コード例 #12
0
    'retry_delay': timedelta(minutes=5),
    'catchup': False
}

dag = DAG('example_dag_python',
          schedule_interval=timedelta(minutes=5),
          default_args=default_args)


def test_func(**kwargs):
    print("HELLO")


def test_func_two():
    import sys
    print(sys.version)
    print("hi")


t1 = PythonOperator(task_id='test_task',
                    python_callable=test_func,
                    provide_context=True,
                    dag=dag)

t2 = PythonVirtualenvOperator(task_id='test_two',
                              python_version='2',
                              python_callable=test_func_two,
                              dag=dag)

t1 >> t2
コード例 #13
0
    python_callable=check_predict_profile,
    trigger_rule='all_done',
    dag=dag)

CheckUpdateWarehouse = BranchPythonOperator(
    task_id='CheckUpdateWarehouse',
    python_callable=check_update_warehouse,
    trigger_rule='all_done',
    dag=dag)
#python3 /home/airflow/gcs/dags/test.py
TrainModel = PythonVirtualenvOperator(
    task_id='TrainModel',
    python_callable=train_model_and_store,
    requirements=[
        'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary',
        'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas',
        'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage',
        'fsspec', 'sklearn', 'gcsfs', 'cloudstorage'
    ],
    python_version='3',
    trigger_rule='all_done',
    dag=dag)
PredictProfile = PythonVirtualenvOperator(
    task_id='PredictProfile',
    python_callable=predict_profile,
    requirements=[
        'sendgrid==6.4.8', 'apache-airflow', 'psycopg2-binary',
        'google-cloud-bigquery', 'google-cloud-bigquery-storage', 'pandas',
        'pyarrow', 'datetime', 'pandas_gbq', 'tqdm', 'google-cloud-storage',
        'fsspec', 'sklearn', 'gcsfs', 'cloudstorage'
    ],
    python_version='3',
コード例 #14
0
ファイル: sample_dag.py プロジェクト: as-sher/nbschedule
def callable_virtualenv():
    """
    Example function that will be performed in a virtual environment.
    Importing at the module level ensures that it will not attempt to import the
    library before it is installed.
    """
    import nbschedule
    from nbschedule import worker, operator


    #import pip
    #installed_packages = pip.get_installed_distributions()
    #installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
    #for i in installed_packages
    #    print(installed_packages_list)


virtualenv_task = PythonVirtualenvOperator(
    task_id="virtualenv_python",
    python_callable=callable_virtualenv,
    requirements=[
        "colorama==0.4.0",
        "scir",
        "git+https://github.com/as-sher/papermill.git",
        "git+https://github.com/as-sher/nbschedule.git"
    ],
    system_site_packages=False,
    dag=dag,
)
コード例 #15
0
 def test_lambda(self):
     with self.assertRaises(AirflowException):
         PythonVirtualenvOperator(python_callable=lambda x: 4,
                                  task_id='task',
                                  dag=self.dag)
コード例 #16
0
    filename = csv_buf.getvalue()
    s3_hook.load_string(filename, key, bucket, replace=True)
    return True


try:
    customer_retention = PostgresOperator(
        task_id='redshift-customer_retention',
        sql='queries/analytics/customer_retention.sql',
        postgres_conn_id='redshift',
        dag=(dag))

    soh_to_s3 = PythonVirtualenvOperator(
        task_id="task_api_to_s3",
        python_callable=SOHDailyToS3,
        requirements=["pandas"],
        system_site_packages=True,
        dag=(dag),
    )

    customer_repeat_rate = PostgresOperator(
        task_id='redshift-customer_repeat_rate',
        sql='queries/analytics/customer_repeat_rate.sql',
        postgres_conn_id='redshift',
        dag=(dag))

    start = DummyOperator(task_id='start', dag=dag)

    end = DummyOperator(task_id='end', dag=dag)

    start >> [customer_retention, soh_to_s3] >> customer_repeat_rate >> end
コード例 #17
0
                'emp_code': df['user_id'][line],
                'index': line
            })
        DOMAIN = "http://10.0.1.49/b/v1"
        ADD_ATT = DOMAIN + "/attendance/add"
        json_data = {
            'attendances': attendances,
            'tz': 'Asia/Kuwait',
            'name': dt_string,
            'db': 'Boutiquaat_Test',
            'login': '******',
            'pswd': 'admin',
        }

        print(json_data, "PPPPPPPPPPPPPP")
        response = requests.post(ADD_ATT, json=json_data)
        print('__________ Response : ')
        pprint(response.json())

    except Exception as e:
        raise AirflowException(e)


virtualenv_task = PythonVirtualenvOperator(
    task_id="Odoo_Bioumetric",
    python_callable=update_finger_data,
    requirements=["pandas"],
    system_site_packages=True,
    dag=(dag),
)