Ejemplo n.º 1
0
import kfp
import kfp.components as comp
import kfp.dsl as dsl
from os import path
import json
import yaml

cs = comp.ComponentStore()

component_path = path.join(path.dirname(__file__), '..')

cs.local_search_paths.append(component_path)
caip_train_op = comp.load_component_from_url(
                    'https://raw.githubusercontent.com/kubeflow/pipelines/1.0.0/'
                                'components/gcp/ml_engine/train/component.yaml')
#pre_process_op = cs.load_component('preProcess')
param_comp = cs.load_component('get_tuned_params')
preprocess_op = cs.load_component('preprocess')

# Config parameters
PROJECT_ID = 'pytorch-tpu-nfs'
REGION = 'us-central1'
FAIRSEQ_IMAGE = 'gcr.io/pytorch-tpu-nfs/fairseq-lm-train'
training_input_json = './config.yaml'
with open(training_input_json) as f:
    training_input = json.dumps(yaml.safe_load(f)['trainingInput'])

pipeline_args = {
    'project_id': PROJECT_ID,
    'region': REGION,
    'args': json.dumps([
import kfp
from kfp import components

component_store = components.ComponentStore(url_search_prefixes=['https://raw.githubusercontent.com/kubeflow/pipelines/af3eaf64e87313795cad1add9bfd9fa1e86af6de/components/'])

chicago_taxi_dataset_op = component_store.load_component(name='datasets/Chicago_Taxi_Trips')
convert_csv_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_CSV')
convert_tsv_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_TSV')
convert_apache_parquet_to_csv_op = component_store.load_component(name='_converters/ApacheParquet/to_CSV')
convert_apache_parquet_to_tsv_op = component_store.load_component(name='_converters/ApacheParquet/to_TSV')
convert_apache_parquet_to_apache_arrow_feather_op = component_store.load_component(name='_converters/ApacheParquet/to_ApacheArrowFeather')
convert_apache_arrow_feather_to_apache_parquet_op = component_store.load_component(name='_converters/ApacheParquet/from_ApacheArrowFeather')


def parquet_pipeline():
    csv = chicago_taxi_dataset_op(
        where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
        select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
        limit=10000,
    ).output

    tsv = chicago_taxi_dataset_op(
        where='trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
        select='tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
        limit=10000,
        format='tsv',
    ).output
    
    csv_parquet = convert_csv_to_apache_parquet_op(csv).output
    csv_parquet_csv = convert_apache_parquet_to_csv_op(csv_parquet).output
    csv_parquet_feather = convert_apache_parquet_to_apache_arrow_feather_op(csv_parquet).output
Ejemplo n.º 3
0
import kfp
from kfp import components

component_store = components.ComponentStore(url_search_prefixes=[
    'https://raw.githubusercontent.com/kubeflow/pipelines/0d7d6f41c92bdc05c2825232afe2b47e5cb6c4b3/components/'
])

chicago_taxi_dataset_op = component_store.load_component(
    name='datasets/Chicago_Taxi_Trips')
convert_csv_to_apache_parquet_op = component_store.load_component(
    name='_converters/ApacheParquet/from_CSV')
convert_tsv_to_apache_parquet_op = component_store.load_component(
    name='_converters/ApacheParquet/from_TSV')
convert_apache_parquet_to_apache_arrow_feather_op = component_store.load_component(
    name='_converters/ApacheParquet/to_ApacheArrowFeather')
convert_apache_arrow_feather_to_apache_parquet_op = component_store.load_component(
    name='_converters/ApacheParquet/from_ApacheArrowFeather')


def parquet_pipeline():
    csv = chicago_taxi_dataset_op(
        where=
        'trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',
        select=
        'tips,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tolls,extras,trip_total',
        limit=10000,
    ).output

    tsv = chicago_taxi_dataset_op(
        where=
        'trip_start_timestamp >= "2019-01-01" AND trip_start_timestamp < "2019-02-01"',