def run():
    """Define a beam pipeline."""

    BeamDagRunner().run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            gcp_project=configs.GOOGLE_CLOUD_PROJECT,
            gcs_bucket=configs.GCS_BUCKET_NAME,
            tcga_betas_query=configs.TCGA_BETAS_QUERY,
            tcga_betas_output_schema=configs.TCGA_BETAS_OUTPUT_SCHEMA,
            tcga_betas_output_table_name=configs.TCGA_BETAS_OUTPUT_TABLE,
            cpg_sites_list_query=configs.CPG_SITES_LIST_QUERY,
            cpg_sites_list_output_schema=configs.CPG_SITES_OUTPUT_SCHEMA,
            cpg_sites_list_output_table_name=configs.CPG_SITES_OUTPUT_TABLE,
            pivot_query=configs.PIVOT_DATASET_QUERY,
            pivot_output_table=configs.PIVOT_OUTPUT_TABLE,
            final_dataset_query=configs.TRAIN_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment here to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            beam_pipeline_args=configs.BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            metadata_connection_config=metadata.sqlite_metadata_connection_config(
                METADATA_PATH)))
예제 #2
0
def run():
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    os.environ[kubeflow_dag_runner.SDK_ENV_LABEL] = 'tfx-template'

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=config.PIPELINE_NAME,
            pipeline_root=pipeline_config.PIPELINE_ROOT_GCS,
            data_path=pipeline_config.DATA_PATH_KUBEFLOW,
            preprocessing_fn=config.PREPROCESSING_FN,
            run_fn=config.RUN_FN,
            train_args=trainer_pb2.TrainArgs(num_steps=config.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=config.EVAL_NUM_STEPS),
            eval_accuracy_threshold=config.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=pipeline_config.SERVING_MODEL_DIR_GCS,
            query=config.BIG_QUERY_QUERY,
            beam_pipeline_args=config.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # beam_pipeline_args=config.DATAFLOW_BEAM_PIPELINE_ARGS,
            # ai_platform_training_args=config.GCP_AI_PLATFORM_TRAINING_ARGS,
            # ai_platform_serving_args=config.GCP_AI_PLATFORM_SERVING_ARGS
        ))
def run(metadata_file: Optional[Text] = None):
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    metadata = get_metadata(metadata_file)
    system_config = get_config(metadata, "system_configurations")
    model_config = get_config(metadata, "model_configurations")
    # tfx_image = system_config.get("TFX_IMAGE", None)
    tfx_image = os.environ.get("KUBEFLOW_TFX_IMAGE", None)
    logging.info(f"Current tfx image used: {tfx_image}")

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        tfx_image=tfx_image,
        #pipeline_operator_funcs=([set_memory_request_and_limits(
        #    system_config["memory_request"], system_config["memory_limit"])]),
    )
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({
        telemetry_utils.LABEL_KFP_SDK_ENV:
        metadata["pipeline_name"] + "_" + metadata["pipeline_version"]
    })

    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=metadata["pipeline_name"] + "_" +
            metadata["pipeline_version"],
            pipeline_root=system_config["PIPELINE_ROOT"],
            query=model_config["query_script_path"],
            preprocessing_fn=system_config["preprocessing_fn"],
            run_fn=system_config["run_fn"],
            train_args=trainer_pb2.TrainArgs(splits=["train"], num_steps=100),
            eval_args=trainer_pb2.EvalArgs(splits=["train"], num_steps=50),
            model_serve_dir=system_config["MODEL_SERVE_DIR"],
            beam_pipeline_args=system_config["DATAFLOW_BEAM_PIPELINE_ARGS"],
            ai_platform_training_args=system_config[
                "GCP_AI_PLATFORM_TRAINING_ARGS"]
            if system_config["enable_gpc_ai_platform_training"] else None,
            # (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=system_config["GCP_AI_PLATFORM_SERVING_ARGS"],
            enable_cache=system_config["enable_cache"],
            system_config=system_config,  # passing config parameters downstream
            model_config=model_config,  # passing model parameters downstream
        ))
예제 #4
0
def create_pipeline_api(name):
    args = reqparse.RequestParser(). \
        add_argument("description", type=str, required=True). \
        add_argument("processors", type=dict, required=True, action="append"). \
        add_argument("encoder", type=dict, required=True). \
        parse_args()
    args = from_view_dict(args)
    args['name'] = name
    return create_pipeline(**args)
예제 #5
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    # tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
    tfx_image = 'gcr.io/gcp-nyc/tfx-pipeline'

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            gcp_project=configs.GOOGLE_CLOUD_PROJECT,
            gcs_bucket=configs.GCS_BUCKET_NAME,
            tcga_betas_query=configs.TCGA_BETAS_QUERY,
            tcga_betas_output_schema=configs.TCGA_BETAS_OUTPUT_SCHEMA,
            tcga_betas_output_table_name=configs.TCGA_BETAS_OUTPUT_TABLE,
            cpg_sites_list_query=configs.CPG_SITES_LIST_QUERY,
            cpg_sites_list_output_schema=configs.CPG_SITES_OUTPUT_SCHEMA,
            cpg_sites_list_output_table_name=configs.CPG_SITES_OUTPUT_TABLE,
            pivot_query=configs.PIVOT_DATASET_QUERY,
            pivot_output_table=configs.PIVOT_OUTPUT_TABLE,
            final_dataset_query=configs.TRAIN_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            beam_pipeline_args=configs.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
예제 #6
0
def run():
    LocalDagRunner().run(
        pipeline.create_pipeline(
            pipeline_name="fishing-classifier",
            data_path="data",
            outputs_path="outputs",
            output_model_path="outputs/model",
            train_args=trainer_pb2.TrainArgs(num_steps=100),
            eval_args=trainer_pb2.EvalArgs(num_steps=15),
            eval_accuracy_threshold=0.6,
            metadata_connection_config=sqlite_metadata_connection_config(
                f"outputs/metadata.db"),
        ))
예제 #7
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=conf['kfp']['pipeline_name'],
            pipeline_root=conf['pipeline_root_dir'],
            data_path=conf['train_data'],
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            module_file='pjm_trainer.py',
            #   preprocessing_fn=configs.PREPROCESSING_FN,
            #   run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=conf['serving_model_dir'],
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs
            # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
예제 #8
0
def run():
    BeamDagRunner().run(
        pipeline.create_pipeline(
            pipeline_name=config.PIPELINE_NAME,
            pipeline_root=pipeline_config.PIPELINE_ROOT,
            data_path=pipeline_config.DATA_PATH,
            preprocessing_fn=config.PREPROCESSING_FN,
            run_fn=config.RUN_FN,
            train_args=trainer_pb2.TrainArgs(num_steps=config.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=config.EVAL_NUM_STEPS),
            eval_accuracy_threshold=config.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=pipeline_config.SERVING_MODEL_DIR,
            # query=config.BIG_QUERY_QUERY,
            # beam_pipeline_args=config.BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            metadata_connection_config=metadata.
            sqlite_metadata_connection_config(pipeline_config.METADATA_PATH)))
예제 #9
0
def create_pipeline_api(name):
    args = reqparse.RequestParser(). \
        add_argument("description", type=str, required=True). \
        add_argument("processors", type=dict, action="append"). \
        add_argument("encoder", type=dict, required=True). \
        parse_args()
    args = from_view_dict(args)
    args['name'] = name
    if not args["processors"]:
        args["processors"] = []
    if "name" not in args['encoder'] or "instance" not in args["encoder"]:
        raise RequestError("name or instance not in encoder", "")
    for processor in args['processors']:
        if "name" not in processor or "instance" not in processor:
            raise RequestError(
                f"name or instance not in processor <{processor}>", "")
    return create_pipeline(**args)
def run():
    """Define a pipeline to be executed using Kubeflow V2 runner."""
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get(labels.TFX_IMAGE_ENV)
    project_id = os.environ.get(labels.GCP_PROJECT_ID_ENV)
    api_key = os.environ.get(labels.API_KEY_ENV)

    runner_config = kubeflow_v2_dag_runner.KubeflowV2DagRunnerConfig(
        project_id=project_id,
        display_name="tfx-kubeflow-v2-pipeline-{}".format(
            configs.PIPELINE_NAME),
        default_image=tfx_image,
    )

    dsl_pipeline = pipeline.create_pipeline(
        pipeline_name=configs.PIPELINE_NAME,
        pipeline_root=_PIPELINE_ROOT,
        data_path=_DATA_PATH,
        # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen.
        # query=configs.BIG_QUERY_QUERY,
        preprocessing_fn=configs.PREPROCESSING_FN,
        run_fn=configs.RUN_FN,
        train_args=trainer_pb2.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS),
        eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
        eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
        serving_model_dir=_SERVING_MODEL_DIR,
        # TODO(step 7): (Optional) Uncomment here to use provide GCP related
        #               config for BigQuery with Beam DirectRunner.
        # beam_pipeline_args=configs.
        # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
        # TODO(step 8): (Optional) Uncomment below to use Dataflow.
        # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
        # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
        # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
        # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
        # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
    )

    runner = kubeflow_v2_dag_runner.KubeflowV2DagRunner(config=runner_config)

    if os.environ.get(labels.RUN_FLAG_ENV, False):
        # Only trigger the execution when invoked by 'run' command.
        runner.run(pipeline=dsl_pipeline, api_key=api_key)
    else:
        runner.compile(pipeline=dsl_pipeline, write_out=True)
def run():
    """Define a beam pipeline."""

    BeamDagRunner().run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            metadata_connection_config=metadata.
            sqlite_metadata_connection_config(  # noqa
                METADATA_PATH),
        ))
예제 #12
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # NOTE: Use `query` instead of `data_path` to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # NOTE: Provide GCP configs to use BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs.
            # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
        ))
예제 #13
0
def run():
    """Define a beam pipeline."""

    BeamDagRunner().run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment here to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs.
            # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            metadata_connection_config=metadata.
            sqlite_metadata_connection_config(METADATA_PATH)))
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            query=configs.BIG_QUERY_QUERY,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            serving_model_dir=SERVING_MODEL_DIR,
            beam_pipeline_args=configs.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
예제 #15
0
    parser.add_argument("--log-level", type=str, default=os.environ.get("LOGLEVEL", "INFO").upper())
    parser.add_argument("--sagemaker-project-id", type=str, required=True)
    parser.add_argument("--sagemaker-project-name", type=str, required=True)
    parser.add_argument("--pipeline-description", type=str, default="automated ingestion from s3 to feature store")
    parser.add_argument("--pipeline-name-prefix", type=str, default="s3-fs-ingest-pipeline")
    parser.add_argument("--dw-flow-url", type=str, required=True)
    parser.add_argument("--dw-flow-output-name", type=str, required=True)
    parser.add_argument("--s3-data-prefix", type=str, required=True)
    parser.add_argument("--feature-group-name", type=str, required=True)
    parser.add_argument("--execution-role", type=str, default="")

    args, _ = parser.parse_known_args()

    # Configure logging to output the line number and message
    log_format = "%(levelname)s: [%(filename)s:%(lineno)s] %(message)s"
    logging.basicConfig(format=log_format, level=args.log_level)

    pipeline = create_pipeline(
            pipeline_name=f"{args.pipeline_name_prefix}-{args.sagemaker_project_id}",
            pipeline_description=args.pipeline_description,
            project_id=args.sagemaker_project_id,
            project_name=args.sagemaker_project_name,
            data_wrangler_flow_s3_url=args.dw_flow_url,
            flow_output_name=args.dw_flow_output_name,
            input_data_s3_url=f"s3://{args.s3_data_prefix}",
            feature_group_name=args.feature_group_name,
            execution_role=args.execution_role,
    )

    logger.info(f"pipeline created:")
    logger.info(f"{json.dumps(json.loads(pipeline.definition()), indent=2, sort_keys=True)}")
예제 #16
0
def main():
    df = get_data()
    pipeline = create_pipeline()
    # data = pipeline.fit_transform(df)
    print(df)
예제 #17
0
 def wrapper(*args, **kwargs):
     create_pipeline(name=name, processors=processors, encoder=encoder)
     func(*args, **kwargs)
     delete_pipeline(name)