Beispiel #1
0
        },
        {
            'name': 'retention',
            'type': 'INTEGER',
            'mode': 'NULLABLE'
        },
    ],
    create_disposition='CREATE_IF_NEEDED',
    write_disposition='WRITE_APPEND',
    dag=dag)

# =================
# == tasks flow ===
# =================

# dataproc upstream & downstream for both create and delete dataproc
create_cluster_1.set_upstream(push_unique_cluster_name)
create_cluster_1.set_upstream(sensor_task)
create_cluster_2.set_upstream(push_unique_cluster_name)
create_cluster_2.set_upstream(delete_cluster_1)
# create job upstream & downstream
calc_unique_users.set_upstream(create_cluster_1)
calc_unique_users.set_downstream(bq_load_user)
calc_unique_users.set_downstream(calc_agg)
calc_agg.set_downstream(bq_load_agg)
calc_retention_day1.set_upstream(create_cluster_2)
calc_retention_day1.set_downstream(bq_load_retention)
# create cfs_to_bq upstream & downstream
bq_load_agg.set_downstream(delete_cluster_1)
bq_load_retention.set_downstream(delete_cluster_2)
Beispiel #2
0
      main_class='com.cohort.process.RetentionProcess',
      region='us-west1',
      job_name=dag_name + 'bike_share_retention_d7',
      cluster_name='{{ ti.xcom_pull(key="cluster_name", task_ids="push-cluster-name") }}'
      + '4',
      execution_timeout=timedelta(minutes=180),
      arguments=args)

  unique_user_sensor = GoogleCloudStorageObjectSensor(
      task_id='unique_user_sensor',
      bucket='jiuzhangsuanfa',
      object='bike/unique-user/_SUCCESS',
      poke_interval=30,
      timeout=2700)

  unique_user.set_upstream(dataproc_create_cluster_1)

  unique_user.set_downstream(bike_share_aggregator)

  bike_share_aggregator.set_downstream(dataproc_destroy_cluster_1)

  bike_share_retention_d1.set_upstream(dataproc_create_cluster_2)

  bike_share_retention_d1.set_downstream(dataproc_destroy_cluster_2)

  bike_share_retention_d3.set_upstream(dataproc_create_cluster_3)

  bike_share_retention_d3.set_downstream(dataproc_destroy_cluster_3)

  bike_share_retention_d7.set_upstream(dataproc_create_cluster_4)