Beispiel #1
0
        parser.add_value_provider_argument(
            "--user_project_id",
            dest="user_project_id",
            help=("User's CloudML project id. It is not the project id of the "
                  "Dataflow job. The logs are sent to user job project in "
                  "Stackdriver with job id as its label."))

        parser.add_value_provider_argument(
            "--user_job_id",
            dest="user_job_id",
            help=
            ("User's CloudML job id. It is not the job id of the Dataflow job."
             " The logs are sent to user job project in Stackdriver with job"
             " id as its label."))


if __name__ == "__main__":
    logging.getLogger().setLevel(logging.INFO)
    dataflow_pipeline_options = PipelineOptions()
    logging.info("Dataflow option: %s",
                 dataflow_pipeline_options.get_all_options())
    # Create the pipeline
    p = beam.Pipeline(options=dataflow_pipeline_options)
    # Create a dict of aggregators.
    aggregator_dict = aggregators.CreateAggregatorsDict()
    # Actually start the pipeline
    result = batch_prediction_pipeline.run(
        p, dataflow_pipeline_options.view_as(BatchPredictionOptions),
        aggregator_dict)
Beispiel #2
0
        "--user_job_id",
        dest="user_job_id",
        help=(
            "User's CloudML job id. It is not the job id of the Dataflow job. "
            "The logs are sent to user job project with job id as its label."))

    known_args, pipeline_args = parser.parse_known_args(sys.argv[1:])
    pipeline_options = PipelineOptions(flags=pipeline_args)

    return known_args, pipeline_options


if __name__ == "__main__":
    logging.getLogger().setLevel(logging.INFO)
    dataflow_args, dataflow_pipeline_options = _parse_args()
    logging.info("Dataflow option:%s",
                 dataflow_pipeline_options.get_all_options())
    # Create the pipeline
    p = beam.Pipeline(options=dataflow_pipeline_options)
    # Create a dict of aggregators.
    aggregator_dict = aggregators.CreateAggregatorsDict()
    # Create a cloud logging client.
    cloud_logger = None
    if dataflow_args.user_project_id and dataflow_args.user_job_id:
        cloud_logger = cloud_logging_client.MLCloudLoggingClient.create(
            dataflow_args.user_project_id, dataflow_args.user_job_id, LOG_NAME,
            "jsonPayload")
    # Actually start the pipeline
    batch_prediction_pipeline.run(p, dataflow_args, aggregator_dict,
                                  cloud_logger)