def build_email(**context): message = "" for key, value in context.items(): try: message = message + "\n\n" + key + ": " + str(value) except: message = message + "\n\n" + key + ": N/A" log_dag_name = os.environ['AIRFLOW_CTX_DAG_ID'] log_task_name = os.environ['AIRFLOW_CTX_TASK_ID'] log_time = os.environ['AIRFLOW_CTX_EXECUTION_DATE'] log = '/home/pchoix/airflow/logs/' + log_dag_name + '/' + log_task_name + '/' + log_time + '/' + '1.log' print("log path: " + log) file1 = open(log, "r") me = os.path.realpath(__file__) print("me: " + me) file2 = open(me, "r") email_op = EmailOperator( task_id='send_email', to="*****@*****.**", subject="Test Email With Log Attachment using EmailOperator", html_content=message, files=[file1.name, file2.name]) email_op.execute(context) file1.close() file2.close()
def _run_as_operator(self, **kwargs): task = EmailOperator( to='*****@*****.**', subject='Test Run', html_content='The quick brown fox jumps over the lazy dog', task_id='task', dag=self.dag, **kwargs) task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
def build_email(**context): with open('/tmp/cc_report.xlsx', mode='r') as file: email_op = EmailOperator( task_id='send_email', to=['*****@*****.**', '*****@*****.**', '*****@*****.**'], subject="CC report", html_content='Hello, <br/>', files=[file.name], ) email_op.execute(context)
def build_email(**context): with NamedTemporaryFile(mode='w+', suffix=".txt") as file: file.write("Hello World") email_op = EmailOperator( task_id='send_email', to="*****@*****.**", subject="Test Email Please Ignore", html_content=None, files=[file.name], ) email_op.execute(context)
def error_email(context): """ Define the callback to post on Slack if a failure is detected in the Workflow :return: operator.execute """ error_email = EmailOperator( task_id="error_email", trigger_rule=TriggerRule.ONE_FAILED, to=['*****@*****.**'], subject="af_advt_bdt_publish_domestic_sdk_adv_rank error", html_content="af_advt_bdt_publish_domestic_sdk_adv_rank error", ) return error_email.execute(context=context)
def read_data(**kwargs): cluster = cl(['10.103.5.51', '10.103.5.52', '10.103.5.53']) session = cluster.connect('darbiz') conn = PostgresHook(postgres_conn_id='pgConn_pg').get_conn() cur = conn.cursor() rows = session.execute("SELECT * from darbiz.forte_express_loan_requests where created_on>='2020-09-20' allow filtering") cur.execute ("select distinct owner profile_id, uid order_id, pay_title from dar_group.bazar_orders1 where created_on>=now()-interval '24' hour") res = cur.fetchall() for user_row in rows: d = json.loads(user_row.loan_request) id0 = user_row.profile_id id1 = user_row.order_id id2 = user_row.created_on pp = d['person_info']['financing_info']['period'] if 'period' in d['person_info']['financing_info'] else None lh = datetime.now() - timedelta(hours = 12) if id2>=lh: for a,b,c in res: ll=c.split() if id1==b: if pp!=int(ll[2]): email = EmailOperator(\ task_id='send_email',\ to=['*****@*****.**','*****@*****.**'],\ subject='Ошибка в Fortemarket',\ html_content='Error in order_id: {} created at: {}, profile_id: {}, months in request: {}, months in orders: {}\n' \ .format(a, id2, b, pp, ll[2])\ ) email.execute(context=kwargs) t3 = SlackWebhookOperator( task_id='send_slack_notification', http_conn_id='slack_connection', message='Error in order_id: {} created at: {}, profile_id: {}, months in request: {}, months in orders: {}\n' \ .format(a, id2, b, pp, ll[2]),\ # files = '/tmp/BPM_report.xlsx', channel='#reports',\ dag=dag ) t3.execute(context=kwargs) else: continue else: continue # lt = d['person_info']['financing_info']['loan_type'] if 'loan_type' in d['person_info']['financing_info'] else None cur.close() conn.close()
def _get_test_dag(self): with DAG(dag_id='test_dag', default_args=DEFAULT_DAG_ARGS) as dag: op1 = SparkSubmitOperator(task_id='op1') op2 = EmrAddStepsOperator(task_id='op2', job_flow_id='foo') op3 = S3ListOperator(task_id='op3', bucket='foo') op4 = EmrCreateJobFlowOperator(task_id='op4') op5 = TriggerDagRunOperator(task_id='op5', trigger_dag_id='foo') op6 = FileToWasbOperator(task_id='op6', container_name='foo', blob_name='foo', file_path='foo') op7 = EmailOperator(task_id='op7', subject='foo', to='foo', html_content='foo') op8 = S3CopyObjectOperator(task_id='op8', dest_bucket_key='foo', source_bucket_key='foo') op9 = BranchPythonOperator(task_id='op9', python_callable=print) op10 = PythonOperator(task_id='op10', python_callable=range) op1 >> [op2, op3, op4] op2 >> [op5, op6] op6 >> [op7, op8, op9] op3 >> [op7, op8] op8 >> [op9, op10] return dag
def on_failure(config) -> BaseOperator: return EmailOperator(mime_charset='utf-8', to=config.email, subject='Largest Orders Daily Report Failed', html_content='This is urgent!', task_id='email_notification', trigger_rule='one_failed')
def get_grupo_dados(dag, previous_task, next_task, dados): for dado in dados: extracao = SimpleHttpOperator( task_id='Extracao_de_dados_{}'.format(dado), endpoint='url...', method='GET', trigger_rule="all_success", dag=dag) email_erro = EmailOperator( task_id='Email_Erro_{}'.format(dado), to='*****@*****.**', subject='Airflow Alert Erro', html_content='Erro ao realizar captura de {}'.format(dado), dag=dag, trigger_rule="all_failed", default_args={ 'email': ['*****@*****.**'], 'email_on_failure': True, 'email_on_retry': True, 'retries': 2, 'retry_delay': timedelta(minutes=5) }) salvar_base_raw = BranchPythonOperator( task_id='Salvar_DB_Raw_{}'.format(dado), python_callable=salva_dados_db_raw, trigger_rule="all_success", dag=dag) stop_falha = BranchPythonOperator( task_id='Stop_erro_extracao_{}'.format(dado), python_callable=salva_dados_db_raw, trigger_rule="dummy", dag=dag) transformacao = BranchPythonOperator( task_id='Transformacao_dados_{}'.format(dado), python_callable=transforma_dados, trigger_rule="one_success", dag=dag) salvar_base_staging = BranchPythonOperator( task_id='Salvar_DB_Staging_{}'.format(dado), python_callable=salva_dados_db_staging, trigger_rule="all_success", dag=dag) #definindo fluxo previous_task >> extracao extracao >> email_erro extracao >> salvar_base_raw email_erro >> stop_falha stop_falha >> transformacao salvar_base_raw >> transformacao transformacao >> salvar_base_staging salvar_base_staging >> next_task
def Email_0(config) -> BaseOperator: content = ''' ''' return EmailOperator(mime_charset="utf-8", to="", subject="''", html_content=content, task_id="Email_0", trigger_rule="one_failed")
def Test_Email(config) -> BaseOperator: content = ''' Test ''' return EmailOperator(mime_charset="utf-8", to="Test", subject="'Test'", html_content=content, task_id="Test_Email", trigger_rule="all_success")
def Email_0(config) -> BaseOperator: content = ''' Yay ''' return EmailOperator(mime_charset="utf-8", to="*****@*****.**", subject="'Success'", html_content=content, task_id="Email_0", trigger_rule="one_success")
def Emailhasanewname(config) -> BaseOperator: content = ''' Delta ''' return EmailOperator(mime_charset="utf-8", to="*****@*****.**", subject="'Delta'", html_content=content, task_id="Emailhasanewname", trigger_rule="all_success")
def Email_1(config) -> BaseOperator: content = ''' Test email ''' return EmailOperator(mime_charset="utf-8", to="*****@*****.**", subject="'Delta'", html_content=content, task_id="Email_1", trigger_rule="all_success")
def get_spy_data(**context): dates = [] closings = [] print('hello bollinger bands') r = requests.get( "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol=SPY&apikey=" + API_KEY) data_dict = r.json()['Time Series (Daily)'] # Parse dict to capture desired info into lists for k, v in data_dict.items(): dates.append(k) closings.append(v['5. adjusted close']) df = {'Date': dates, 'AdjClose': closings} stock_data = work_up_data(df) email = EmailOperator(task_id="email_task", to="*****@*****.**", subject='', html_content='', dag=context.get("dag")) # Send email based on conditions if stock_data.iloc[-1]['buy'] == 1: print("BUY!!!!") email.subject = "BUT SOME SPY STOCK!" email.html_content = "Bollinger bands say to buy some SPY" email.execute(context=context) if stock_data.iloc[-1]['sell'] == 1: print("SELL!!!!") email.subject = "SELL SOME SPY STOCK!" email.html_content = "Bollinger bands say to sell SPY" email.execute(context=context) # Populate db stock_data.to_csv('/tmp/spy_data.csv')
def report_failure(context): subject = 'Airflow critical: {ti}'.format(**context) html_content = """ Exception:<br>{exception}<br> Execution Date: {execution_date}<br> Log: <a href="{ti.log_url}">Link</a><br> Host: {ti.hostname}<br> Log file: {ti.log_filepath}<br> Mark success: <a href="{ti.mark_success_url}">Link</a><br> """.format(**context) """Send custom email alerts.""" return EmailOperator( task_id='report_email_failure', to='*****@*****.**', subject=subject, html_content=html_content, ).execute(context)
def report_notify_email(report, email_template_location, **context): """ For the given report, sends a notification email in the format given in the email_template :param report: report being notified on :type report: Report :param email_template_location: location of html template to use for status :type email_template_location: str :param test_prefix: the prefix that precedes all test tasks :type test_prefix: str """ ri = ReportInstance(context["dag_run"]) updated_time = ri.updated timezone = pendulum.timezone(conf.get("core", "default_timezone")) updated_time.replace(tzinfo=timezone) passed = ri.passed status = get_status(passed) details_link = get_details_link() with open(email_template_location) as file: send_email = EmailOperator( task_id="custom_email_notification", to=report.subscribers, subject="[{{status}}] {{title}}", html_content=file.read(), ) params = { "passed": passed, "status": status, "updated": updated_time, "title": report.report_title, "details_link": details_link, } send_email.render_template_fields( context=params, jinja_env=context["dag"].get_template_env()) logging.info(f'Sending "{send_email.subject}" email...') send_email.execute(context)
def predict_img(): img = cv2.imread(self.input().path, cv2.IMREAD_GRAYSCALE) circles = cv2.HoughCircles(img, cv2.HOUGH_GRADIENT, dp=2, minDist=15, param1=100, param2=70) label = "lemon" if circles is not None else "banana" with open(prediction, "w") as out: json.dump({"class": label}, out) predict = PythonOperator( task_id='predict', python_callable=predict_img, dag=dag, ) notify = EmailOperator( task_id='notify', to='*****@*****.**', subject='Your daily prediction has finished!', html_content='Gratz!', dag=dag, ) # connect our pipeline steps file_sensor >> preprocess >> predict >> notify
def build_load_dag(dag_id, output_bucket, destination_dataset_project_id, chain='ethereum', notification_emails=None, load_start_date=datetime(2018, 7, 1), schedule_interval='0 0 * * *', load_all_partitions=True): # The following datasets must be created in BigQuery: # - crypto_{chain}_raw # - crypto_{chain}_temp # - crypto_{chain} dataset_name = f'crypto_{chain}' dataset_name_raw = f'crypto_{chain}_raw' dataset_name_temp = f'crypto_{chain}_temp' if not destination_dataset_project_id: raise ValueError('destination_dataset_project_id is required') environment = { 'dataset_name': dataset_name, 'dataset_name_raw': dataset_name_raw, 'dataset_name_temp': dataset_name_temp, 'destination_dataset_project_id': destination_dataset_project_id, 'load_all_partitions': load_all_partitions } def read_bigquery_schema_from_file(filepath): result = [] file_content = read_file(filepath) json_content = json.loads(file_content) for field in json_content: result.append( bigquery.SchemaField(name=field.get('name'), field_type=field.get('type', 'STRING'), mode=field.get('mode', 'NULLABLE'), description=field.get('description'))) return result def read_file(filepath): with open(filepath) as file_handle: content = file_handle.read() return content def submit_bigquery_job(job, configuration): try: logging.info('Creating a job: ' + json.dumps(configuration.to_api_repr())) result = job.result() logging.info(result) assert job.errors is None or len(job.errors) == 0 return result except Exception: logging.info(job.errors) raise default_dag_args = { 'depends_on_past': False, 'start_date': load_start_date, 'email_on_failure': True, 'email_on_retry': False, 'retries': 5, 'retry_delay': timedelta(minutes=5) } if notification_emails and len(notification_emails) > 0: default_dag_args['email'] = [ email.strip() for email in notification_emails.split(',') ] # Define a DAG (directed acyclic graph) of tasks. dag = models.DAG(dag_id, catchup=False, schedule_interval=schedule_interval, default_args=default_dag_args) dags_folder = os.environ.get('DAGS_FOLDER', '/home/airflow/gcs/dags') def add_load_tasks(task, file_format, allow_quoted_newlines=False): wait_sensor = GoogleCloudStorageObjectSensor( task_id='wait_latest_{task}'.format(task=task), timeout=60 * 60, poke_interval=60, bucket=output_bucket, object='export/{task}/block_date={datestamp}/{task}.{file_format}'. format(task=task, datestamp='{{ds}}', file_format=file_format), dag=dag) def load_task(): client = bigquery.Client() job_config = bigquery.LoadJobConfig() schema_path = os.path.join( dags_folder, 'resources/stages/raw/schemas/{task}.json'.format(task=task)) job_config.schema = read_bigquery_schema_from_file(schema_path) job_config.source_format = bigquery.SourceFormat.CSV if file_format == 'csv' else bigquery.SourceFormat.NEWLINE_DELIMITED_JSON if file_format == 'csv': job_config.skip_leading_rows = 1 job_config.write_disposition = 'WRITE_TRUNCATE' job_config.allow_quoted_newlines = allow_quoted_newlines job_config.ignore_unknown_values = True export_location_uri = 'gs://{bucket}/export'.format( bucket=output_bucket) uri = '{export_location_uri}/{task}/*.{file_format}'.format( export_location_uri=export_location_uri, task=task, file_format=file_format) table_ref = client.dataset(dataset_name_raw).table(task) load_job = client.load_table_from_uri(uri, table_ref, job_config=job_config) submit_bigquery_job(load_job, job_config) assert load_job.state == 'DONE' load_operator = PythonOperator(task_id='load_{task}'.format(task=task), python_callable=load_task, execution_timeout=timedelta(minutes=30), dag=dag) wait_sensor >> load_operator return load_operator def add_enrich_tasks(task, time_partitioning_field='block_timestamp', dependencies=None, always_load_all_partitions=False): def enrich_task(ds, **kwargs): template_context = kwargs.copy() template_context['ds'] = ds template_context['params'] = environment client = bigquery.Client() # Need to use a temporary table because bq query sets field modes to NULLABLE and descriptions to null # when writeDisposition is WRITE_TRUNCATE # Create a temporary table temp_table_name = '{task}_{milliseconds}'.format( task=task, milliseconds=int(round(time.time() * 1000))) temp_table_ref = client.dataset(dataset_name_temp).table( temp_table_name) schema_path = os.path.join( dags_folder, 'resources/stages/enrich/schemas/{task}.json'.format( task=task)) schema = read_bigquery_schema_from_file(schema_path) table = bigquery.Table(temp_table_ref, schema=schema) description_path = os.path.join( dags_folder, 'resources/stages/enrich/descriptions/{task}.txt'.format( task=task)) table.description = read_file(description_path) if time_partitioning_field is not None: table.time_partitioning = TimePartitioning( field=time_partitioning_field) logging.info('Creating table: ' + json.dumps(table.to_api_repr())) table = client.create_table(table) assert table.table_id == temp_table_name # Query from raw to temporary table query_job_config = bigquery.QueryJobConfig() # Finishes faster, query limit for concurrent interactive queries is 50 query_job_config.priority = bigquery.QueryPriority.INTERACTIVE query_job_config.destination = temp_table_ref sql_path = os.path.join( dags_folder, 'resources/stages/enrich/sqls/{task}.sql'.format(task=task)) sql_template = read_file(sql_path) sql = kwargs['task'].render_template('', sql_template, template_context) print('Enrichment sql:') print(sql) query_job = client.query(sql, location='US', job_config=query_job_config) submit_bigquery_job(query_job, query_job_config) assert query_job.state == 'DONE' if load_all_partitions or always_load_all_partitions: # Copy temporary table to destination copy_job_config = bigquery.CopyJobConfig() copy_job_config.write_disposition = 'WRITE_TRUNCATE' dest_table_name = '{task}'.format(task=task) dest_table_ref = client.dataset( dataset_name, project=destination_dataset_project_id).table( dest_table_name) copy_job = client.copy_table(temp_table_ref, dest_table_ref, location='US', job_config=copy_job_config) submit_bigquery_job(copy_job, copy_job_config) assert copy_job.state == 'DONE' else: # Merge # https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement merge_job_config = bigquery.QueryJobConfig() # Finishes faster, query limit for concurrent interactive queries is 50 merge_job_config.priority = bigquery.QueryPriority.INTERACTIVE merge_sql_path = os.path.join( dags_folder, 'resources/stages/enrich/sqls/merge/merge_{task}.sql'. format(task=task)) merge_sql_template = read_file(merge_sql_path) merge_template_context = template_context.copy() merge_template_context['params'][ 'source_table'] = temp_table_name merge_template_context['params'][ 'destination_dataset_project_id'] = destination_dataset_project_id merge_template_context['params'][ 'destination_dataset_name'] = dataset_name merge_sql = kwargs['task'].render_template( '', merge_sql_template, merge_template_context) print('Merge sql:') print(merge_sql) merge_job = client.query(merge_sql, location='US', job_config=merge_job_config) submit_bigquery_job(merge_job, merge_job_config) assert merge_job.state == 'DONE' # Delete temp table client.delete_table(temp_table_ref) enrich_operator = PythonOperator( task_id='enrich_{task}'.format(task=task), python_callable=enrich_task, provide_context=True, execution_timeout=timedelta(minutes=60), dag=dag) if dependencies is not None and len(dependencies) > 0: for dependency in dependencies: dependency >> enrich_operator return enrich_operator def add_verify_tasks(task, dependencies=None): # The queries in verify/sqls will fail when the condition is not met # Have to use this trick since the Python 2 version of BigQueryCheckOperator doesn't support standard SQL # and legacy SQL can't be used to query partitioned tables. sql_path = os.path.join( dags_folder, 'resources/stages/verify/sqls/{task}.sql'.format(task=task)) sql = read_file(sql_path) verify_task = BigQueryOperator( task_id='verify_{task}'.format(task=task), bql=sql, params=environment, use_legacy_sql=False, dag=dag) if dependencies is not None and len(dependencies) > 0: for dependency in dependencies: dependency >> verify_task return verify_task load_blocks_task = add_load_tasks('blocks', 'csv') load_transactions_task = add_load_tasks('transactions', 'csv') load_receipts_task = add_load_tasks('receipts', 'csv') load_logs_task = add_load_tasks('logs', 'json') load_contracts_task = add_load_tasks('contracts', 'json') load_tokens_task = add_load_tasks('tokens', 'csv', allow_quoted_newlines=True) load_token_transfers_task = add_load_tasks('token_transfers', 'csv') load_traces_task = add_load_tasks('traces', 'csv') enrich_blocks_task = add_enrich_tasks('blocks', time_partitioning_field='timestamp', dependencies=[load_blocks_task]) enrich_transactions_task = add_enrich_tasks('transactions', dependencies=[ load_blocks_task, load_transactions_task, load_receipts_task ]) enrich_logs_task = add_enrich_tasks( 'logs', dependencies=[load_blocks_task, load_logs_task]) enrich_token_transfers_task = add_enrich_tasks( 'token_transfers', dependencies=[load_blocks_task, load_token_transfers_task]) enrich_traces_task = add_enrich_tasks( 'traces', dependencies=[load_blocks_task, load_traces_task]) enrich_contracts_task = add_enrich_tasks( 'contracts', dependencies=[load_blocks_task, load_contracts_task]) enrich_tokens_task = add_enrich_tasks( 'tokens', dependencies=[load_blocks_task, load_tokens_task]) calculate_balances_task = add_enrich_tasks('balances', dependencies=[ enrich_blocks_task, enrich_transactions_task, enrich_traces_task ], time_partitioning_field=None, always_load_all_partitions=True) verify_blocks_count_task = add_verify_tasks('blocks_count', [enrich_blocks_task]) verify_blocks_have_latest_task = add_verify_tasks('blocks_have_latest', [enrich_blocks_task]) verify_transactions_count_task = add_verify_tasks( 'transactions_count', [enrich_blocks_task, enrich_transactions_task]) verify_transactions_have_latest_task = add_verify_tasks( 'transactions_have_latest', [enrich_transactions_task]) verify_logs_have_latest_task = add_verify_tasks('logs_have_latest', [enrich_logs_task]) verify_token_transfers_have_latest_task = add_verify_tasks( 'token_transfers_have_latest', [enrich_token_transfers_task]) verify_traces_blocks_count_task = add_verify_tasks( 'traces_blocks_count', [enrich_blocks_task, enrich_traces_task]) verify_traces_transactions_count_task = add_verify_tasks( 'traces_transactions_count', [enrich_transactions_task, enrich_traces_task]) verify_traces_contracts_count_task = add_verify_tasks( 'traces_contracts_count', [enrich_transactions_task, enrich_traces_task, enrich_contracts_task]) if notification_emails and len(notification_emails) > 0: send_email_task = EmailOperator( task_id='send_email', to=[email.strip() for email in notification_emails.split(',')], subject='Ethereum ETL Airflow Load DAG Succeeded', html_content='Ethereum ETL Airflow Load DAG Succeeded - {}'.format( chain), dag=dag) verify_blocks_count_task >> send_email_task verify_blocks_have_latest_task >> send_email_task verify_transactions_count_task >> send_email_task verify_transactions_have_latest_task >> send_email_task verify_logs_have_latest_task >> send_email_task verify_token_transfers_have_latest_task >> send_email_task verify_traces_blocks_count_task >> send_email_task verify_traces_transactions_count_task >> send_email_task verify_traces_contracts_count_task >> send_email_task enrich_tokens_task >> send_email_task calculate_balances_task >> send_email_task return dag
bash_command='rm -f /home/repl/*.tmp', dag=dag) python_task = PythonOperator(task_id='run_processing', python_callable=process_data, provide_context=True, dag=dag) email_subject = """ Email report for {{ params.department }} on {{ ds_nodash }} """ email_report_task = EmailOperator( task_id='email_report_task', to='*****@*****.**', subject=email_subject, html_content='', params={'department': 'Data subscription services'}, dag=dag) no_email_task = DummyOperator(task_id='no_email_task', dag=dag) def check_weekend(**kwargs): dt = datetime.strptime(kwargs['execution_date'], "%Y-%m-%d") # If dt.weekday() is 0-4, it's Monday - Friday. If 5 or 6, it's Sat / Sun. if (dt.weekday() < 5): return 'email_report_task' else: return 'no_email_task'
for file in files: with open(file, "rb") as f: try: data = f.read() bucket_path = "soumil/{}".format(file) aws_helper.put_files(Key=bucket_path, Body=data) print("File : {} Uploaded ".format(file)) except Exception as e: print("Failed to upload File :{} ".format(e)) def trigger_glue(**context): pass with DAG(dag_id="project", schedule_interval="@once", default_args=default_args, catchup=False) as dag: crawl_files = PythonOperator(task_id="crawl_files",python_callable=crawl_files,provide_context=True,) upload_s3 = PythonOperator(task_id="upload_s3",python_callable=upload_s3,provide_context=True,) trigger_glue = PythonOperator(task_id="trigger_glue",python_callable=trigger_glue,provide_context=True,) email = EmailOperator( task_id='send_email', to='XXXXXXXXXXXXXXX', subject='Airflow Alert', html_content=""" <h3>ETL Pipeline complete </h3> """, ) crawl_files >> upload_s3 >> trigger_glue >> email
return html ########################################################################## # Airflow code to define the DAG for the weekly usage updates ######################################################################### default_args = { 'owner': 'fiddler-analytics', 'depends_on_past': False, 'start_date': datetime(2019, 8, 8), 'email': ['*****@*****.**', '*****@*****.**', '*****@*****.**'], 'email_on_failure': True, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5) } dag = DAG('shir-connect-usage', default_args=default_args, schedule_interval='0 12 * * 5') send_email = EmailOperator(task_id='usage-statistics-email', to=['*****@*****.**', '*****@*****.**', '*****@*****.**'], subject='Shir Connect Usage Statistics', html_content=build_email_content(), dag=dag)
from datetime import datetime from airflow import DAG from airflow.operators.email_operator import EmailOperator default_args = { 'owner': 'owner', 'depends_on_past': False, 'start_date': datetime(2020, 10, 13), 'email': ['*****@*****.**'], 'email_on_failure': True } dag = DAG('email_send', schedule_interval='@hourly', default_args=default_args, start_date=datetime(2020, 10, 13)) with dag: send_email = EmailOperator(task_id='send_email', to='*****@*****.**', subject='Airflow Alert', html_content='This is trial mail', dag=dag)
'jars': '/Users/ravimuthyala/AirflowSparkTestCode/postgresql-42.2.12.jar', 'application_args': ["/Users/ravimuthyala/AirflowSparkTestCode/receipts.csv"], 'driver_memory': '1g', 'executor_cores': 1, 'num_executors': 1, 'executor_memory': '1g' } spark_submit_operator = SparkSubmitOperator(task_id='Spark_Scala_Submit_Job', dag=dag, **spark_config) emailNotify = EmailOperator(task_id='email_notification', to='*****@*****.**', subject='Spark Submit Job Alert', html_content='Airflow Spark Submit Job Done', dag=dag) t1Failed = EmailOperator(dag=dag, trigger_rule=TriggerRule.ONE_FAILED, task_id="SparkJobFailed", to=["*****@*****.**"], subject="Spark job Failed", html_content='<h3>Spark job has failed</h3>') python_operator.set_downstream(spark_submit_operator) spark_submit_operator.set_downstream(emailNotify) t1Failed.set_upstream([spark_submit_operator]) if __name__ == '__main__':
# TASK III: Write filtered questions to S3 Bucket Task_III = PythonOperator(task_id="write_questions_to_s3", python_callable=write_questions_to_s3) # TASK IV: Render HTML template Task_IV = PythonOperator(task_id="render_template", python_callable=render_template, provide_context=True) # TASK V: Send email notification Task_V = EmailOperator( task_id="send_email", provide_context=True, to="*****@*****.**", subject= "Top questions with tag 'pandas' on {{ ds }}", # uses jinja template ('ds' is date) html_content= "{{ task_instance.xcom_pull(task_ids='render_template', key='html_content') }}" ) # execute pipeline tasks in series Task_I >> Task_II >> Task_III >> Task_IV >> Task_V # _______________________________PYTHON FUNCS__________________________________# def call_stack_overflow_api() -> dict: """ Get first 100 questions created two days ago sorted by user votes """ stack_overflow_question_url = Variable.get("STACK_OVERFLOW_QUESTION_URL")
) # # A bad command we need failing # run_fail = BashOperator( task_id="trigger_failure", bash_command="invalidcommand", env=environment_vars, dag=dag, ) # # Send an email when done # email_task = EmailOperator( to=environment_vars.get("EMAIL_RECIPIENT", ""), task_id="email_admin", subject="Templated Subject: start_date {{ ds }}", mime_charset="utf-8", params={"content1": "random"}, html_content= "Templated Content: content1 - {{ params.content1 }} task_key - {{ task_instance_key_str }} test_mode - {{ test_mode }} task_owner - {{ task.owner}} hostname - {{ ti.hostname }}", dag=dag, ) run_success >> run_fail >> email_task if __name__ == "__main__": dag.cli()
ssh_hook=sshHook, dag=dag) hive = SSHExecuteOperator( task_id="comment_import", bash_command= '(bash {path}/xianyu_itemcomment_import.sh {lastday} {last_update_date})'. format(path=path, lastday=get_lastday(), last_update_date=get_last_update_date()), ssh_hook=sshHook, dag=dag) email_update = EmailOperator(task_id='xianyu_itemcomment_update_email', to=['*****@*****.**'], subject='xianyu itemcomment workflow', html_content='[ xianyu data updated!!! ]', dag=dag) email_update_not = EmailOperator(task_id='xianyu_itemcomment_update_not_email', to=['*****@*****.**'], subject='xianyu itemcomment workflow', html_content='[ xianyu data updating!!! ]', dag=dag) branching = BranchPythonOperator(task_id='check_attach', python_callable=lambda: check_attach(), dag=dag) passover = DummyOperator(task_id='pass', dag=dag) update = DummyOperator(task_id='update', dag=dag) chain(branching, passover, email_update_not)