for connection in sakila_connections: for table in sakila_tables: extract = MySqlToGoogleCloudStorageOperator( task_id="extract_mysql_%s_%s" % (connection, table), mysql_conn_id=connection, google_cloud_storage_conn_id='gcp_test', sql="SELECT *, '%s' as source FROM sakila.%s" % (connection, table), bucket='ghen-airflow', filename="%s/%s/%s{}.json" % (connection, table, table), schema_filename="%s/schemas/%s.json" % (connection, table), dag=dag) load = GoogleCloudStorageToBigQueryOperator( task_id="load_bq_%s_%s" % (connection, table), bigquery_conn_id='gcp_test', google_cloud_storage_conn_id='gcp_test', bucket='ghen-airflow', destination_project_dataset_table="spark-test-173322.%s.%s" % (connection, table), source_objects=["%s/%s/%s*.json" % (connection, table, table)], schema_object="%s/schemas/%s.json" % (connection, table), source_format='NEWLINE_DELIMITED_JSON', create_disposition='CREATE_IF_NEEDED', write_disposition='WRITE_TRUNCATE', project_id='spark-test-173322', dag=dag) load.set_upstream(extract) slack_notify.set_upstream(load)
nothing_to_update_op = DummyOperator( task_id='nothing_to_update', dag=dag ) check_job_posting_to_be_updated_op.set_downstream(check_to_remove_op) check_job_posting_to_be_updated_op.set_downstream(check_to_update_op) check_work_experience_to_be_updated_op.set_downstream(check_to_remove_op) check_work_experience_to_be_updated_op.set_downstream(check_to_update_op) update_scores_branch_op.set_upstream(check_to_update_op) remove_scores_op.set_upstream(check_to_remove_op) nothing_to_remove_op.set_upstream(check_to_remove_op) nothing_to_update_op.set_upstream(check_to_update_op) notify_processing_completion_op.set_upstream(nothing_to_remove_op) notify_processing_completion_op.set_upstream(nothing_to_update_op) update_scores_branch_op.set_downstream(compute_title_feature_op) update_scores_branch_op.set_downstream(compute_skill_feature_op) update_scores_branch_op.set_downstream(compute_description_feature_op) compute_similarity_op.set_upstream(compute_title_feature_op) compute_similarity_op.set_upstream(compute_skill_feature_op) compute_similarity_op.set_upstream(compute_description_feature_op) compute_similarity_op.set_downstream(update_scores_op) notify_processing_completion_op.set_upstream(update_scores_op) notify_processing_completion_op.set_upstream(remove_scores_op)
channel=slack_channel, username='******', text='Cluster has been *restarted!*\n' 'It\'s all fine move forward with your ETLs and Crawlers!\n' 'Message datetime: {{params.curr_date}}', params={'curr_date': str(datetime.now(pytz.timezone('America/Sao_Paulo')))}, dag=dag ) run_etl_crawler_cluster_up = SubDagOperator( subdag=sub_dag('check_cluster_slack', 'crawler_dag_cluster_up', dag.schedule_interval), task_id='crawler_dag_cluster_up', dag=dag, ) run_etl_crawler_cluster_restarted = SubDagOperator( subdag=sub_dag('check_cluster_slack', 'crawler_dag_cluster_restarted', dag.schedule_interval), task_id='crawler_dag_cluster_restarted', dag=dag, ) branch1.set_upstream(check_cluster) send_slack_cluster_ok.set_upstream(branch1) send_slack_cluster_start.set_upstream(branch1) start_cluster.set_upstream(send_slack_cluster_start) branch2.set_upstream(start_cluster) send_slack_cluster_down.set_upstream(branch2) send_slack_cluster_restarted_ok.set_upstream(branch2) run_etl_crawler_cluster_up.set_upstream(send_slack_cluster_ok) run_etl_crawler_cluster_restarted.set_upstream(send_slack_cluster_restarted_ok)
""" create_cluster = PythonOperator(task_id='create_databricks_cluster', dag=dag, python_callable=create_databricks_cluster) create_cluster_notify = SlackAPIPostOperator( task_id='create_cluster_notify', username='******', token='XXX', channel='#databricks_jobs', text= ":databricks: Databricks Cluster Created with ID: {{ task_instance.xcom_pull(task_ids='create_databricks_cluster') }}", dag=dag) create_cluster_notify.set_upstream(create_cluster) train_model = ECSOperator( task_id="train_model", task_definition='trainmodelriskassessment', cluster='TalendECS', aws_conn_id='aws_default', overrides={ 'containerOverrides': [ { 'name': "trainmodelriskassessment", 'command': [ "--context_param DATABRICKS_ENDPOINT=XXX", "--context_param DATABRICKS_TOKEN=XXX", "--context_param DATABRICKS_CLUSTER_ID={{ task_instance.xcom_pull(task_ids='create_databricks_cluster') }}"