コード例 #1
0
)

main_summary_dataproc = SubDagOperator(
    subdag=moz_dataproc_jar_runner(
        parent_dag_name="main_summary",
        dag_name="main_summary_dataproc",
        default_args=default_args,
        cluster_name="main-summary-{{ds}}",
        image_version="1.3",
        worker_machine_type="n1-standard-8",
        num_preemptible_workers=40,
        service_account=
        "*****@*****.**",
        optional_components=[],
        install_component_gateway=False,
        jar_urls=[
            "https://s3-us-west-2.amazonaws.com/net-mozaws-data-us-west-2-ops-ci-artifacts/mozilla/telemetry-batch-view/master/telemetry-batch-view.jar",
        ],
        main_class="com.mozilla.telemetry.views.MainSummaryView",
        jar_args=[
            "--from={{ds_nodash}}",
            "--to={{ds_nodash}}",
            "--bucket=" + main_summary_dataproc_bucket,
            "--export-path=" + main_ping_bigquery_export_prefix,
        ],
        job_name="main_summary_view_{{ds_nodash}}",
        init_actions_uris=[],
        gcp_conn_id="google_cloud_airflow_dataproc",
    ),
    task_id="main_summary_dataproc",
    dag=dag,
)
コード例 #2
0
 subdag=moz_dataproc_jar_runner(
     parent_dag_name=dag.dag_id,
     dag_name="addon_recommender",
     job_name="Train_the_Collaborative_Addon_Recommender",
     main_class="com.mozilla.telemetry.ml.AddonRecommender",
     jar_urls=[
         "https://s3-us-west-2.amazonaws.com/net-mozaws-data-us-west-2-ops-ci-artifacts"
         "/mozilla/telemetry-batch-view/master/telemetry-batch-view.jar",
     ],
     jar_args=[
       "train",
       "--runDate={{ds_nodash}}",
       "--inputTable=gs://moz-fx-data-derived-datasets-parquet/clients_daily/v6",
       "--privateBucket=s3a://telemetry-parquet",
       "--publicBucket=s3a://telemetry-public-analysis-2",
     ],
     cluster_name="addon-recommender-{{ds_nodash}}",
     image_version="1.3",
     worker_machine_type="n1-standard-8",
     num_workers=20,
     optional_components=[],
     install_component_gateway=False,
     init_actions_uris=[],
     aws_conn_id=taar_aws_conn_id,
     gcp_conn_id=taar_gcpdataproc_conn_id,
     default_args={
         key: value
         for key, value in chain(default_args.items(), [
             ("owner", "*****@*****.**"),
             ("email", ["*****@*****.**", "*****@*****.**", "*****@*****.**"]),
         ])
     },
 ),
コード例 #3
0
taar_collaborative_recommender = SubDagOperator(
    task_id="addon_recommender",
    subdag=moz_dataproc_jar_runner(
        parent_dag_name=dag.dag_id,
        dag_name="addon_recommender",
        job_name="Train_the_Collaborative_Addon_Recommender",
        main_class="com.mozilla.telemetry.ml.AddonRecommender",
        jar_urls=[
            # GCS bucket for testing is located in `cfr-personalization-experiment` project
            # 'gs://taar_models/tmp/telemetry-batch-view-1.2.jar'
            # we should move artifacts to GCS eventually
            "https://s3-us-west-2.amazonaws.com/net-mozaws-data-us-west-2-ops-ci-artifacts"
            "/mozilla/telemetry-batch-view/main/telemetry-batch-view.jar",
        ],
        jar_args=[
            "train", "--runDate={{ds_nodash}}",
            "--inputTable=gs://moz-fx-data-derived-datasets-parquet/clients_daily/v6",
            f"--privateBucket=gs://{TAAR_ETL_MODEL_STORAGE_BUCKET}",
            f"--checkpointDir=gs://{TAAR_ETL_STORAGE_BUCKET}/spark-checkpoints"
        ],
        cluster_name="addon-recommender-{{ds_nodash}}",
        image_version="1.3",
        worker_machine_type="n1-standard-8",
        num_workers=20,
        optional_components=[],
        install_component_gateway=False,
        init_actions_uris=[],
        aws_conn_id=taar_aws_conn_id,
        gcp_conn_id=taar_gcpdataproc_conn_id,
        default_args=default_args),
    dag=dag,
)