Beispiel #1
0
        additional_metadata={"PIP_PACKAGES": "click==7.1.2 bcrypt==3.1.7"},
        python_driver_code="gs://{}/jobs/adjust_import.py".format(
            params.artifact_bucket),
        py_args=[
            "--bcrypt",
            "--salt",
            "$2a$10$ZfglUfcbmTyaBbAQ7SL9OO",
            "--project",
            project,
            "--input_table",
            "tmp.adjust_fennec_release",
            "--output_table",
            "firefox_android_release_external.adjust_install_time_v1",
            "--bucket",
            params.storage_bucket,
        ],
        gcp_conn_id=params.conn_id,
        service_account=params.client_email,
        artifact_bucket=params.artifact_bucket,
        storage_bucket=params.storage_bucket,
        default_args=subdag_args,
    ),
)

if params.is_dev:
    copy_to_dev = copy_artifacts_dev(dag, params.project_id,
                                     params.artifact_bucket,
                                     params.storage_bucket)
    copy_to_dev >> fenix_beta_adjust_import
    copy_to_dev >> fennec_adjust_import
Beispiel #2
0
        task_id="export_main_avro",
        cmds=["bash"],
        command=[
            "bin/export-avro.sh",
            "moz-fx-data-shared-prod",
            "moz-fx-data-shared-prod:analysis",
            "gs://moz-fx-data-derived-datasets-parquet-tmp/avro/mozaggregator/prerelease",
            "main_v4",
            "'nightly', 'beta'",
            "{{ ds }}",
        ],
        docker_image="mozilla/python_mozaggregator:latest",
        dag=dag,
    ).set_downstream(prerelease_telemetry_aggregate_view_dataproc)

    # Delete the GCS data
    GoogleCloudStorageDeleteOperator(
        task_id="delete_main_avro",
        bucket_name="moz-fx-data-derived-datasets-parquet-tmp",
        prefix=
        "avro/mozaggregator/prerelease/moz-fx-data-shared-prod/{{ ds_nodash }}/main_v4",
        google_cloud_storage_conn_id=gcp_conn.gcp_conn_id,
        dag=dag,
    ).set_upstream(prerelease_telemetry_aggregate_view_dataproc)

# copy over artifacts if we're running in dev
if is_dev:
    copy_to_dev = copy_artifacts_dev(dag, project_id, artifact_bucket,
                                     storage_bucket)
    copy_to_dev.set_downstream(prerelease_telemetry_aggregate_view_dataproc)