Example #1
0
def build_email(**context):

    message = ""

    for key, value in context.items():
        try:
            message = message + "\n\n" + key + ": " + str(value)
        except:
            message = message + "\n\n" + key + ": N/A"

    log_dag_name = os.environ['AIRFLOW_CTX_DAG_ID']
    log_task_name = os.environ['AIRFLOW_CTX_TASK_ID']
    log_time = os.environ['AIRFLOW_CTX_EXECUTION_DATE']
    log = '/home/pchoix/airflow/logs/' + log_dag_name + '/' + log_task_name + '/' + log_time + '/' + '1.log'
    print("log path: " + log)
    file1 = open(log, "r")

    me = os.path.realpath(__file__)
    print("me: " + me)
    file2 = open(me, "r")

    email_op = EmailOperator(
        task_id='send_email',
        to="*****@*****.**",
        subject="Test Email With Log Attachment using EmailOperator",
        html_content=message,
        files=[file1.name, file2.name])

    email_op.execute(context)

    file1.close()
    file2.close()
Example #2
0
 def _run_as_operator(self, **kwargs):
     task = EmailOperator(
         to='*****@*****.**',
         subject='Test Run',
         html_content='The quick brown fox jumps over the lazy dog',
         task_id='task',
         dag=self.dag,
         **kwargs)
     task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
 def _run_as_operator(self, **kwargs):
     task = EmailOperator(
         to='*****@*****.**',
         subject='Test Run',
         html_content='The quick brown fox jumps over the lazy dog',
         task_id='task',
         dag=self.dag,
         **kwargs)
     task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
Example #4
0
def build_email(**context):
    with open('/tmp/cc_report.xlsx', mode='r') as file:
        email_op = EmailOperator(
            task_id='send_email',
            to=['*****@*****.**', '*****@*****.**', '*****@*****.**'],
            subject="CC report",
            html_content='Hello, <br/>',
            files=[file.name],
        )
        email_op.execute(context)
Example #5
0
def build_email(**context):
    with NamedTemporaryFile(mode='w+', suffix=".txt") as file:
        file.write("Hello World")

        email_op = EmailOperator(
            task_id='send_email',
            to="*****@*****.**",
            subject="Test Email Please Ignore",
            html_content=None,
            files=[file.name],
        )
        email_op.execute(context)
Example #6
0
def error_email(context):
    """
    Define the callback to post on Slack if a failure is detected in the Workflow
    :return: operator.execute
    """
    error_email = EmailOperator(
        task_id="error_email",
        trigger_rule=TriggerRule.ONE_FAILED,
        to=['*****@*****.**'],
        subject="af_advt_bdt_publish_domestic_sdk_adv_rank error",
        html_content="af_advt_bdt_publish_domestic_sdk_adv_rank error",
    )
    return error_email.execute(context=context)
Example #7
0
def read_data(**kwargs):
    cluster = cl(['10.103.5.51', '10.103.5.52', '10.103.5.53'])
    session = cluster.connect('darbiz')

    conn = PostgresHook(postgres_conn_id='pgConn_pg').get_conn()
    cur = conn.cursor()

    rows = session.execute("SELECT * from darbiz.forte_express_loan_requests where created_on>='2020-09-20' allow filtering")
    cur.execute ("select distinct owner profile_id, uid order_id, pay_title from dar_group.bazar_orders1 where created_on>=now()-interval '24' hour")
    res = cur.fetchall()
    for user_row in rows:
        d = json.loads(user_row.loan_request)
        id0 = user_row.profile_id
        id1 = user_row.order_id
        id2 = user_row.created_on
        pp = d['person_info']['financing_info']['period'] if 'period' in d['person_info']['financing_info'] else None
        lh = datetime.now() - timedelta(hours = 12)
        if id2>=lh:
            for a,b,c in res:
                ll=c.split()
                if id1==b:
                    if pp!=int(ll[2]):
                        email = EmailOperator(\
                            task_id='send_email',\
                            to=['*****@*****.**','*****@*****.**'],\
                            subject='Ошибка в Fortemarket',\
                            html_content='Error in order_id: {} created at: {}, profile_id: {}, months in request: {}, months in orders: {}\n' \
                                .format(a, id2, b, pp, ll[2])\
                            )
                        email.execute(context=kwargs)

                        t3 = SlackWebhookOperator(
                            task_id='send_slack_notification',
                            http_conn_id='slack_connection',
                            message='Error in order_id: {} created at: {}, profile_id: {}, months in request: {}, months in orders: {}\n' \
                                .format(a, id2, b, pp, ll[2]),\
                            # files = '/tmp/BPM_report.xlsx',
                            channel='#reports',\
                            dag=dag
                        )
                        t3.execute(context=kwargs)
                else:
                    continue
        else:
            continue
        # lt = d['person_info']['financing_info']['loan_type'] if 'loan_type' in d['person_info']['financing_info'] else None

    cur.close()
    conn.close()
Example #8
0
    def _get_test_dag(self):
        with DAG(dag_id='test_dag', default_args=DEFAULT_DAG_ARGS) as dag:
            op1 = SparkSubmitOperator(task_id='op1')
            op2 = EmrAddStepsOperator(task_id='op2', job_flow_id='foo')
            op3 = S3ListOperator(task_id='op3', bucket='foo')
            op4 = EmrCreateJobFlowOperator(task_id='op4')
            op5 = TriggerDagRunOperator(task_id='op5', trigger_dag_id='foo')
            op6 = FileToWasbOperator(task_id='op6',
                                     container_name='foo',
                                     blob_name='foo',
                                     file_path='foo')
            op7 = EmailOperator(task_id='op7',
                                subject='foo',
                                to='foo',
                                html_content='foo')
            op8 = S3CopyObjectOperator(task_id='op8',
                                       dest_bucket_key='foo',
                                       source_bucket_key='foo')
            op9 = BranchPythonOperator(task_id='op9', python_callable=print)
            op10 = PythonOperator(task_id='op10', python_callable=range)

            op1 >> [op2, op3, op4]
            op2 >> [op5, op6]
            op6 >> [op7, op8, op9]
            op3 >> [op7, op8]
            op8 >> [op9, op10]

        return dag
Example #9
0
def on_failure(config) -> BaseOperator:
    return EmailOperator(mime_charset='utf-8',
                         to=config.email,
                         subject='Largest Orders Daily Report Failed',
                         html_content='This is urgent!',
                         task_id='email_notification',
                         trigger_rule='one_failed')
def get_grupo_dados(dag, previous_task, next_task, dados):

    for dado in dados:

        extracao = SimpleHttpOperator(
            task_id='Extracao_de_dados_{}'.format(dado),
            endpoint='url...',
            method='GET',
            trigger_rule="all_success",
            dag=dag)

        email_erro = EmailOperator(
            task_id='Email_Erro_{}'.format(dado),
            to='*****@*****.**',
            subject='Airflow Alert Erro',
            html_content='Erro ao realizar captura de {}'.format(dado),
            dag=dag,
            trigger_rule="all_failed",
            default_args={
                'email': ['*****@*****.**'],
                'email_on_failure': True,
                'email_on_retry': True,
                'retries': 2,
                'retry_delay': timedelta(minutes=5)
            })

        salvar_base_raw = BranchPythonOperator(
            task_id='Salvar_DB_Raw_{}'.format(dado),
            python_callable=salva_dados_db_raw,
            trigger_rule="all_success",
            dag=dag)

        stop_falha = BranchPythonOperator(
            task_id='Stop_erro_extracao_{}'.format(dado),
            python_callable=salva_dados_db_raw,
            trigger_rule="dummy",
            dag=dag)

        transformacao = BranchPythonOperator(
            task_id='Transformacao_dados_{}'.format(dado),
            python_callable=transforma_dados,
            trigger_rule="one_success",
            dag=dag)

        salvar_base_staging = BranchPythonOperator(
            task_id='Salvar_DB_Staging_{}'.format(dado),
            python_callable=salva_dados_db_staging,
            trigger_rule="all_success",
            dag=dag)

        #definindo fluxo
        previous_task >> extracao
        extracao >> email_erro
        extracao >> salvar_base_raw
        email_erro >> stop_falha
        stop_falha >> transformacao
        salvar_base_raw >> transformacao
        transformacao >> salvar_base_staging
        salvar_base_staging >> next_task
Example #11
0
def Email_0(config) -> BaseOperator:
    content = '''
    
    '''

    return EmailOperator(mime_charset="utf-8",
                         to="",
                         subject="''",
                         html_content=content,
                         task_id="Email_0",
                         trigger_rule="one_failed")
Example #12
0
def Test_Email(config) -> BaseOperator:
    content = '''
    Test
    '''

    return EmailOperator(mime_charset="utf-8",
                         to="Test",
                         subject="'Test'",
                         html_content=content,
                         task_id="Test_Email",
                         trigger_rule="all_success")
Example #13
0
def Email_0(config) -> BaseOperator:
    content = '''
    Yay
    '''

    return EmailOperator(mime_charset="utf-8",
                         to="*****@*****.**",
                         subject="'Success'",
                         html_content=content,
                         task_id="Email_0",
                         trigger_rule="one_success")
Example #14
0
def Emailhasanewname(config) -> BaseOperator:
    content = '''
    Delta
    '''

    return EmailOperator(mime_charset="utf-8",
                         to="*****@*****.**",
                         subject="'Delta'",
                         html_content=content,
                         task_id="Emailhasanewname",
                         trigger_rule="all_success")
Example #15
0
def Email_1(config) -> BaseOperator:
    content = '''
    Test email
    '''

    return EmailOperator(mime_charset="utf-8",
                         to="*****@*****.**",
                         subject="'Delta'",
                         html_content=content,
                         task_id="Email_1",
                         trigger_rule="all_success")
Example #16
0
def get_spy_data(**context):

    dates = []
    closings = []
    print('hello bollinger bands')
    r = requests.get(
        "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=SPY&apikey="
        + API_KEY)
    data_dict = r.json()['Time Series (Daily)']

    # Parse dict to capture desired info into lists
    for k, v in data_dict.items():
        dates.append(k)
        closings.append(v['5. adjusted close'])

    df = {'Date': dates, 'AdjClose': closings}

    stock_data = work_up_data(df)

    email = EmailOperator(task_id="email_task",
                          to="*****@*****.**",
                          subject='',
                          html_content='',
                          dag=context.get("dag"))

    # Send email based on conditions
    if stock_data.iloc[-1]['buy'] == 1:
        print("BUY!!!!")
        email.subject = "BUT SOME SPY STOCK!"
        email.html_content = "Bollinger bands say to buy some SPY"
        email.execute(context=context)

    if stock_data.iloc[-1]['sell'] == 1:
        print("SELL!!!!")
        email.subject = "SELL SOME SPY STOCK!"
        email.html_content = "Bollinger bands say to sell SPY"
        email.execute(context=context)

    # Populate db
    stock_data.to_csv('/tmp/spy_data.csv')
Example #17
0
def report_failure(context):
    subject = 'Airflow critical: {ti}'.format(**context)
    html_content = """
         Exception:<br>{exception}<br>
         Execution Date: {execution_date}<br>
         Log: <a href="{ti.log_url}">Link</a><br>
         Host: {ti.hostname}<br>
         Log file: {ti.log_filepath}<br>
         Mark success: <a href="{ti.mark_success_url}">Link</a><br>
    """.format(**context)
    """Send custom email alerts."""
    return EmailOperator(
        task_id='report_email_failure',
        to='*****@*****.**',
        subject=subject,
        html_content=html_content,
    ).execute(context)
Example #18
0
def report_notify_email(report, email_template_location, **context):
    """
    For the given report, sends a notification email in the format given
    in the email_template

    :param report: report being notified on
    :type report: Report

    :param email_template_location: location of html template to use for status
    :type email_template_location: str

    :param test_prefix: the prefix that precedes all test tasks
    :type test_prefix: str
    """
    ri = ReportInstance(context["dag_run"])

    updated_time = ri.updated
    timezone = pendulum.timezone(conf.get("core", "default_timezone"))
    updated_time.replace(tzinfo=timezone)
    passed = ri.passed
    status = get_status(passed)
    details_link = get_details_link()

    with open(email_template_location) as file:
        send_email = EmailOperator(
            task_id="custom_email_notification",
            to=report.subscribers,
            subject="[{{status}}] {{title}}",
            html_content=file.read(),
        )
        params = {
            "passed": passed,
            "status": status,
            "updated": updated_time,
            "title": report.report_title,
            "details_link": details_link,
        }
        send_email.render_template_fields(
            context=params, jinja_env=context["dag"].get_template_env())
        logging.info(f'Sending "{send_email.subject}" email...')
        send_email.execute(context)

def predict_img():
    img = cv2.imread(self.input().path, cv2.IMREAD_GRAYSCALE)
    circles = cv2.HoughCircles(img,
                               cv2.HOUGH_GRADIENT,
                               dp=2,
                               minDist=15,
                               param1=100,
                               param2=70)
    label = "lemon" if circles is not None else "banana"
    with open(prediction, "w") as out:
        json.dump({"class": label}, out)


predict = PythonOperator(
    task_id='predict',
    python_callable=predict_img,
    dag=dag,
)

notify = EmailOperator(
    task_id='notify',
    to='*****@*****.**',
    subject='Your daily prediction has finished!',
    html_content='Gratz!',
    dag=dag,
)

# connect our pipeline steps
file_sensor >> preprocess >> predict >> notify
def build_load_dag(dag_id,
                   output_bucket,
                   destination_dataset_project_id,
                   chain='ethereum',
                   notification_emails=None,
                   load_start_date=datetime(2018, 7, 1),
                   schedule_interval='0 0 * * *',
                   load_all_partitions=True):
    # The following datasets must be created in BigQuery:
    # - crypto_{chain}_raw
    # - crypto_{chain}_temp
    # - crypto_{chain}

    dataset_name = f'crypto_{chain}'
    dataset_name_raw = f'crypto_{chain}_raw'
    dataset_name_temp = f'crypto_{chain}_temp'

    if not destination_dataset_project_id:
        raise ValueError('destination_dataset_project_id is required')

    environment = {
        'dataset_name': dataset_name,
        'dataset_name_raw': dataset_name_raw,
        'dataset_name_temp': dataset_name_temp,
        'destination_dataset_project_id': destination_dataset_project_id,
        'load_all_partitions': load_all_partitions
    }

    def read_bigquery_schema_from_file(filepath):
        result = []
        file_content = read_file(filepath)
        json_content = json.loads(file_content)
        for field in json_content:
            result.append(
                bigquery.SchemaField(name=field.get('name'),
                                     field_type=field.get('type', 'STRING'),
                                     mode=field.get('mode', 'NULLABLE'),
                                     description=field.get('description')))
        return result

    def read_file(filepath):
        with open(filepath) as file_handle:
            content = file_handle.read()
            return content

    def submit_bigquery_job(job, configuration):
        try:
            logging.info('Creating a job: ' +
                         json.dumps(configuration.to_api_repr()))
            result = job.result()
            logging.info(result)
            assert job.errors is None or len(job.errors) == 0
            return result
        except Exception:
            logging.info(job.errors)
            raise

    default_dag_args = {
        'depends_on_past': False,
        'start_date': load_start_date,
        'email_on_failure': True,
        'email_on_retry': False,
        'retries': 5,
        'retry_delay': timedelta(minutes=5)
    }

    if notification_emails and len(notification_emails) > 0:
        default_dag_args['email'] = [
            email.strip() for email in notification_emails.split(',')
        ]

    # Define a DAG (directed acyclic graph) of tasks.
    dag = models.DAG(dag_id,
                     catchup=False,
                     schedule_interval=schedule_interval,
                     default_args=default_dag_args)

    dags_folder = os.environ.get('DAGS_FOLDER', '/home/airflow/gcs/dags')

    def add_load_tasks(task, file_format, allow_quoted_newlines=False):
        wait_sensor = GoogleCloudStorageObjectSensor(
            task_id='wait_latest_{task}'.format(task=task),
            timeout=60 * 60,
            poke_interval=60,
            bucket=output_bucket,
            object='export/{task}/block_date={datestamp}/{task}.{file_format}'.
            format(task=task, datestamp='{{ds}}', file_format=file_format),
            dag=dag)

        def load_task():
            client = bigquery.Client()
            job_config = bigquery.LoadJobConfig()
            schema_path = os.path.join(
                dags_folder,
                'resources/stages/raw/schemas/{task}.json'.format(task=task))
            job_config.schema = read_bigquery_schema_from_file(schema_path)
            job_config.source_format = bigquery.SourceFormat.CSV if file_format == 'csv' else bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
            if file_format == 'csv':
                job_config.skip_leading_rows = 1
            job_config.write_disposition = 'WRITE_TRUNCATE'
            job_config.allow_quoted_newlines = allow_quoted_newlines
            job_config.ignore_unknown_values = True

            export_location_uri = 'gs://{bucket}/export'.format(
                bucket=output_bucket)
            uri = '{export_location_uri}/{task}/*.{file_format}'.format(
                export_location_uri=export_location_uri,
                task=task,
                file_format=file_format)
            table_ref = client.dataset(dataset_name_raw).table(task)
            load_job = client.load_table_from_uri(uri,
                                                  table_ref,
                                                  job_config=job_config)
            submit_bigquery_job(load_job, job_config)
            assert load_job.state == 'DONE'

        load_operator = PythonOperator(task_id='load_{task}'.format(task=task),
                                       python_callable=load_task,
                                       execution_timeout=timedelta(minutes=30),
                                       dag=dag)

        wait_sensor >> load_operator
        return load_operator

    def add_enrich_tasks(task,
                         time_partitioning_field='block_timestamp',
                         dependencies=None,
                         always_load_all_partitions=False):
        def enrich_task(ds, **kwargs):
            template_context = kwargs.copy()
            template_context['ds'] = ds
            template_context['params'] = environment

            client = bigquery.Client()

            # Need to use a temporary table because bq query sets field modes to NULLABLE and descriptions to null
            # when writeDisposition is WRITE_TRUNCATE

            # Create a temporary table
            temp_table_name = '{task}_{milliseconds}'.format(
                task=task, milliseconds=int(round(time.time() * 1000)))
            temp_table_ref = client.dataset(dataset_name_temp).table(
                temp_table_name)

            schema_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/schemas/{task}.json'.format(
                    task=task))
            schema = read_bigquery_schema_from_file(schema_path)
            table = bigquery.Table(temp_table_ref, schema=schema)

            description_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/descriptions/{task}.txt'.format(
                    task=task))
            table.description = read_file(description_path)
            if time_partitioning_field is not None:
                table.time_partitioning = TimePartitioning(
                    field=time_partitioning_field)
            logging.info('Creating table: ' + json.dumps(table.to_api_repr()))
            table = client.create_table(table)
            assert table.table_id == temp_table_name

            # Query from raw to temporary table
            query_job_config = bigquery.QueryJobConfig()
            # Finishes faster, query limit for concurrent interactive queries is 50
            query_job_config.priority = bigquery.QueryPriority.INTERACTIVE
            query_job_config.destination = temp_table_ref

            sql_path = os.path.join(
                dags_folder,
                'resources/stages/enrich/sqls/{task}.sql'.format(task=task))
            sql_template = read_file(sql_path)
            sql = kwargs['task'].render_template('', sql_template,
                                                 template_context)
            print('Enrichment sql:')
            print(sql)

            query_job = client.query(sql,
                                     location='US',
                                     job_config=query_job_config)
            submit_bigquery_job(query_job, query_job_config)
            assert query_job.state == 'DONE'

            if load_all_partitions or always_load_all_partitions:
                # Copy temporary table to destination
                copy_job_config = bigquery.CopyJobConfig()
                copy_job_config.write_disposition = 'WRITE_TRUNCATE'
                dest_table_name = '{task}'.format(task=task)
                dest_table_ref = client.dataset(
                    dataset_name,
                    project=destination_dataset_project_id).table(
                        dest_table_name)
                copy_job = client.copy_table(temp_table_ref,
                                             dest_table_ref,
                                             location='US',
                                             job_config=copy_job_config)
                submit_bigquery_job(copy_job, copy_job_config)
                assert copy_job.state == 'DONE'
            else:
                # Merge
                # https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement
                merge_job_config = bigquery.QueryJobConfig()
                # Finishes faster, query limit for concurrent interactive queries is 50
                merge_job_config.priority = bigquery.QueryPriority.INTERACTIVE

                merge_sql_path = os.path.join(
                    dags_folder,
                    'resources/stages/enrich/sqls/merge/merge_{task}.sql'.
                    format(task=task))
                merge_sql_template = read_file(merge_sql_path)

                merge_template_context = template_context.copy()
                merge_template_context['params'][
                    'source_table'] = temp_table_name
                merge_template_context['params'][
                    'destination_dataset_project_id'] = destination_dataset_project_id
                merge_template_context['params'][
                    'destination_dataset_name'] = dataset_name
                merge_sql = kwargs['task'].render_template(
                    '', merge_sql_template, merge_template_context)
                print('Merge sql:')
                print(merge_sql)
                merge_job = client.query(merge_sql,
                                         location='US',
                                         job_config=merge_job_config)
                submit_bigquery_job(merge_job, merge_job_config)
                assert merge_job.state == 'DONE'

            # Delete temp table
            client.delete_table(temp_table_ref)

        enrich_operator = PythonOperator(
            task_id='enrich_{task}'.format(task=task),
            python_callable=enrich_task,
            provide_context=True,
            execution_timeout=timedelta(minutes=60),
            dag=dag)

        if dependencies is not None and len(dependencies) > 0:
            for dependency in dependencies:
                dependency >> enrich_operator
        return enrich_operator

    def add_verify_tasks(task, dependencies=None):
        # The queries in verify/sqls will fail when the condition is not met
        # Have to use this trick since the Python 2 version of BigQueryCheckOperator doesn't support standard SQL
        # and legacy SQL can't be used to query partitioned tables.
        sql_path = os.path.join(
            dags_folder,
            'resources/stages/verify/sqls/{task}.sql'.format(task=task))
        sql = read_file(sql_path)
        verify_task = BigQueryOperator(
            task_id='verify_{task}'.format(task=task),
            bql=sql,
            params=environment,
            use_legacy_sql=False,
            dag=dag)
        if dependencies is not None and len(dependencies) > 0:
            for dependency in dependencies:
                dependency >> verify_task
        return verify_task

    load_blocks_task = add_load_tasks('blocks', 'csv')
    load_transactions_task = add_load_tasks('transactions', 'csv')
    load_receipts_task = add_load_tasks('receipts', 'csv')
    load_logs_task = add_load_tasks('logs', 'json')
    load_contracts_task = add_load_tasks('contracts', 'json')
    load_tokens_task = add_load_tasks('tokens',
                                      'csv',
                                      allow_quoted_newlines=True)
    load_token_transfers_task = add_load_tasks('token_transfers', 'csv')
    load_traces_task = add_load_tasks('traces', 'csv')

    enrich_blocks_task = add_enrich_tasks('blocks',
                                          time_partitioning_field='timestamp',
                                          dependencies=[load_blocks_task])
    enrich_transactions_task = add_enrich_tasks('transactions',
                                                dependencies=[
                                                    load_blocks_task,
                                                    load_transactions_task,
                                                    load_receipts_task
                                                ])
    enrich_logs_task = add_enrich_tasks(
        'logs', dependencies=[load_blocks_task, load_logs_task])
    enrich_token_transfers_task = add_enrich_tasks(
        'token_transfers',
        dependencies=[load_blocks_task, load_token_transfers_task])
    enrich_traces_task = add_enrich_tasks(
        'traces', dependencies=[load_blocks_task, load_traces_task])
    enrich_contracts_task = add_enrich_tasks(
        'contracts', dependencies=[load_blocks_task, load_contracts_task])
    enrich_tokens_task = add_enrich_tasks(
        'tokens', dependencies=[load_blocks_task, load_tokens_task])

    calculate_balances_task = add_enrich_tasks('balances',
                                               dependencies=[
                                                   enrich_blocks_task,
                                                   enrich_transactions_task,
                                                   enrich_traces_task
                                               ],
                                               time_partitioning_field=None,
                                               always_load_all_partitions=True)

    verify_blocks_count_task = add_verify_tasks('blocks_count',
                                                [enrich_blocks_task])
    verify_blocks_have_latest_task = add_verify_tasks('blocks_have_latest',
                                                      [enrich_blocks_task])
    verify_transactions_count_task = add_verify_tasks(
        'transactions_count', [enrich_blocks_task, enrich_transactions_task])
    verify_transactions_have_latest_task = add_verify_tasks(
        'transactions_have_latest', [enrich_transactions_task])
    verify_logs_have_latest_task = add_verify_tasks('logs_have_latest',
                                                    [enrich_logs_task])
    verify_token_transfers_have_latest_task = add_verify_tasks(
        'token_transfers_have_latest', [enrich_token_transfers_task])
    verify_traces_blocks_count_task = add_verify_tasks(
        'traces_blocks_count', [enrich_blocks_task, enrich_traces_task])
    verify_traces_transactions_count_task = add_verify_tasks(
        'traces_transactions_count',
        [enrich_transactions_task, enrich_traces_task])
    verify_traces_contracts_count_task = add_verify_tasks(
        'traces_contracts_count',
        [enrich_transactions_task, enrich_traces_task, enrich_contracts_task])

    if notification_emails and len(notification_emails) > 0:
        send_email_task = EmailOperator(
            task_id='send_email',
            to=[email.strip() for email in notification_emails.split(',')],
            subject='Ethereum ETL Airflow Load DAG Succeeded',
            html_content='Ethereum ETL Airflow Load DAG Succeeded - {}'.format(
                chain),
            dag=dag)
        verify_blocks_count_task >> send_email_task
        verify_blocks_have_latest_task >> send_email_task
        verify_transactions_count_task >> send_email_task
        verify_transactions_have_latest_task >> send_email_task
        verify_logs_have_latest_task >> send_email_task
        verify_token_transfers_have_latest_task >> send_email_task
        verify_traces_blocks_count_task >> send_email_task
        verify_traces_transactions_count_task >> send_email_task
        verify_traces_contracts_count_task >> send_email_task
        enrich_tokens_task >> send_email_task
        calculate_balances_task >> send_email_task

    return dag
Example #21
0
                         bash_command='rm -f /home/repl/*.tmp',
                         dag=dag)

python_task = PythonOperator(task_id='run_processing',
                             python_callable=process_data,
                             provide_context=True,
                             dag=dag)

email_subject = """
  Email report for {{ params.department }} on {{ ds_nodash }}
"""

email_report_task = EmailOperator(
    task_id='email_report_task',
    to='*****@*****.**',
    subject=email_subject,
    html_content='',
    params={'department': 'Data subscription services'},
    dag=dag)

no_email_task = DummyOperator(task_id='no_email_task', dag=dag)


def check_weekend(**kwargs):
    dt = datetime.strptime(kwargs['execution_date'], "%Y-%m-%d")
    # If dt.weekday() is 0-4, it's Monday - Friday. If 5 or 6, it's Sat / Sun.
    if (dt.weekday() < 5):
        return 'email_report_task'
    else:
        return 'no_email_task'
    for file in files:
        with open(file, "rb") as f:
            try:
                data = f.read()
                bucket_path = "soumil/{}".format(file)
                aws_helper.put_files(Key=bucket_path, Body=data)
                print("File : {} Uploaded ".format(file))
            except Exception as e:
                print("Failed to upload File :{} ".format(e))


def trigger_glue(**context):
    pass


with DAG(dag_id="project", schedule_interval="@once", default_args=default_args, catchup=False) as dag:

    crawl_files = PythonOperator(task_id="crawl_files",python_callable=crawl_files,provide_context=True,)
    upload_s3 = PythonOperator(task_id="upload_s3",python_callable=upload_s3,provide_context=True,)
    trigger_glue = PythonOperator(task_id="trigger_glue",python_callable=trigger_glue,provide_context=True,)

    email = EmailOperator(
        task_id='send_email',
        to='XXXXXXXXXXXXXXX',
        subject='Airflow Alert',
        html_content=""" <h3>ETL Pipeline complete </h3> """,
    )

crawl_files >> upload_s3 >> trigger_glue >> email

    return html

##########################################################################
# Airflow code to define the DAG for the weekly usage updates
#########################################################################

default_args = {
    'owner': 'fiddler-analytics',
    'depends_on_past': False,
    'start_date': datetime(2019, 8, 8),
    'email': ['*****@*****.**',
              '*****@*****.**',
              '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}

dag = DAG('shir-connect-usage',
          default_args=default_args,
          schedule_interval='0 12 * * 5')

send_email = EmailOperator(task_id='usage-statistics-email',
                           to=['*****@*****.**',
                               '*****@*****.**',
                               '*****@*****.**'],
                           subject='Shir Connect Usage Statistics',
                           html_content=build_email_content(),
                           dag=dag)
Example #24
0
from datetime import datetime
from airflow import DAG

from airflow.operators.email_operator import EmailOperator

default_args = {
    'owner': 'owner',
    'depends_on_past': False,
    'start_date': datetime(2020, 10, 13),
    'email': ['*****@*****.**'],
    'email_on_failure': True
}

dag = DAG('email_send',
          schedule_interval='@hourly',
          default_args=default_args,
          start_date=datetime(2020, 10, 13))

with dag:
    send_email = EmailOperator(task_id='send_email',
                               to='*****@*****.**',
                               subject='Airflow Alert',
                               html_content='This is trial mail',
                               dag=dag)
Example #25
0
    'jars': '/Users/ravimuthyala/AirflowSparkTestCode/postgresql-42.2.12.jar',
    'application_args':
    ["/Users/ravimuthyala/AirflowSparkTestCode/receipts.csv"],
    'driver_memory': '1g',
    'executor_cores': 1,
    'num_executors': 1,
    'executor_memory': '1g'
}

spark_submit_operator = SparkSubmitOperator(task_id='Spark_Scala_Submit_Job',
                                            dag=dag,
                                            **spark_config)

emailNotify = EmailOperator(task_id='email_notification',
                            to='*****@*****.**',
                            subject='Spark Submit Job Alert',
                            html_content='Airflow Spark Submit Job Done',
                            dag=dag)

t1Failed = EmailOperator(dag=dag,
                         trigger_rule=TriggerRule.ONE_FAILED,
                         task_id="SparkJobFailed",
                         to=["*****@*****.**"],
                         subject="Spark job Failed",
                         html_content='<h3>Spark job has failed</h3>')

python_operator.set_downstream(spark_submit_operator)
spark_submit_operator.set_downstream(emailNotify)
t1Failed.set_upstream([spark_submit_operator])

if __name__ == '__main__':
    # TASK III: Write filtered questions to S3 Bucket
    Task_III = PythonOperator(task_id="write_questions_to_s3",
                              python_callable=write_questions_to_s3)

    # TASK IV: Render HTML template
    Task_IV = PythonOperator(task_id="render_template",
                             python_callable=render_template,
                             provide_context=True)

    # TASK V: Send email notification
    Task_V = EmailOperator(
        task_id="send_email",
        provide_context=True,
        to="*****@*****.**",
        subject=
        "Top questions with tag 'pandas' on {{ ds }}",  # uses jinja template ('ds' is date)
        html_content=
        "{{ task_instance.xcom_pull(task_ids='render_template', key='html_content') }}"
    )

# execute pipeline tasks in series
Task_I >> Task_II >> Task_III >> Task_IV >> Task_V

# _______________________________PYTHON FUNCS__________________________________#


def call_stack_overflow_api() -> dict:
    """ Get first 100 questions created two days ago sorted by user votes """

    stack_overflow_question_url = Variable.get("STACK_OVERFLOW_QUESTION_URL")
)

#
# A bad command we need failing
#
run_fail = BashOperator(
    task_id="trigger_failure",
    bash_command="invalidcommand",
    env=environment_vars,
    dag=dag,
)

#
# Send an email when done
#
email_task = EmailOperator(
    to=environment_vars.get("EMAIL_RECIPIENT", ""),
    task_id="email_admin",
    subject="Templated Subject: start_date {{ ds }}",
    mime_charset="utf-8",
    params={"content1": "random"},
    html_content=
    "Templated Content: content1 - {{ params.content1 }}  task_key - {{ task_instance_key_str }} test_mode - {{ test_mode }} task_owner - {{ task.owner}} hostname - {{ ti.hostname }}",
    dag=dag,
)

run_success >> run_fail >> email_task

if __name__ == "__main__":
    dag.cli()
    ssh_hook=sshHook,
    dag=dag)

hive = SSHExecuteOperator(
    task_id="comment_import",
    bash_command=
    '(bash {path}/xianyu_itemcomment_import.sh {lastday} {last_update_date})'.
    format(path=path,
           lastday=get_lastday(),
           last_update_date=get_last_update_date()),
    ssh_hook=sshHook,
    dag=dag)

email_update = EmailOperator(task_id='xianyu_itemcomment_update_email',
                             to=['*****@*****.**'],
                             subject='xianyu itemcomment workflow',
                             html_content='[ xianyu data updated!!! ]',
                             dag=dag)
email_update_not = EmailOperator(task_id='xianyu_itemcomment_update_not_email',
                                 to=['*****@*****.**'],
                                 subject='xianyu itemcomment workflow',
                                 html_content='[ xianyu data updating!!! ]',
                                 dag=dag)
branching = BranchPythonOperator(task_id='check_attach',
                                 python_callable=lambda: check_attach(),
                                 dag=dag)

passover = DummyOperator(task_id='pass', dag=dag)
update = DummyOperator(task_id='update', dag=dag)

chain(branching, passover, email_update_not)