コード例 #1
0
 class StaticSubWorkflowCaller(object):
     outer_a = Input(Types.Integer,
                     default=5,
                     help="Input for inner workflow")
     identity_wf_execution = IdentityWorkflow(a=outer_a)
     wf_output = Output(identity_wf_execution.outputs.task_output,
                        sdk_type=Types.Integer)
コード例 #2
0
def workflow_builder(wf_params, task_input_num, decider, out):
    wf_params.logging.info(
        "Running inner task... yielding a code generated sub workflow")

    input_a = Input(Types.Integer, help="Tell me something")
    if decider:
        node1 = inverse_inner_task(num=input_a)
    else:
        node1 = inner_task(num=input_a)

    MyUnregisteredWorkflow = workflow(inputs={
        'a': input_a,
    },
                                      outputs={
                                          'ooo':
                                          Output(
                                              node1.outputs.out,
                                              sdk_type=Types.Integer,
                                              help='This is an integer output')
                                      },
                                      nodes={
                                          'node_one': node1,
                                      })

    # This is an unfortunate setting that will hopefully not be necessary in the future.
    setattr(MyUnregisteredWorkflow, 'auto_assign_name', manual_assign_name)
    MyUnregisteredWorkflow._platform_valid_name = 'unregistered'

    unregistered_workflow_execution = MyUnregisteredWorkflow(a=task_input_num)

    yield unregistered_workflow_execution
    out.set(unregistered_workflow_execution.outputs.ooo)
コード例 #3
0
class PrestoWorkflow(object):
    ds = Input(Types.String, required=True, help="Test string with no default")
    # routing_group = Input(Types.String, required=True, help="Test string with no default")

    p_task = presto_task(ds=ds, rg="etl")

    output_a = Output(p_task.outputs.results, sdk_type=schema)
コード例 #4
0
class InverterDynamicWorkflow(object):
    input_a = Input(Types.Integer, default=5, help="Input for inner workflow")
    inverter_input = Input(Types.Boolean,
                           default=False,
                           help="Should invert or not")
    lp_task = workflow_builder(task_input_num=input_a, decider=inverter_input)
    wf_output = Output(lp_task.outputs.out, sdk_type=Types.Integer)
コード例 #5
0
ファイル: nested.py プロジェクト: ybubnov/flytekit
class Child(object):
    input_1 = Input(Types.Integer)
    input_2 = Input(Types.Integer, default=5, help="Not required.")
    a = add_one(a=input_1)
    b = add_one(a=input_2)
    c = add_one(a=100)
    output = Output(c.outputs.b, sdk_type=Types.Integer)
コード例 #6
0
class RawContainerWorkflow(object):
    val1 = Input(Types.Integer)
    val2 = Input(Types.Integer)
    sq1 = square(val=val1)
    sq2 = square(val=val2)
    sm = sum(x=sq1.outputs.out, y=sq2.outputs.out)
    sum_of_squares = Output(sm.outputs.out, sdk_type=Types.Integer)
コード例 #7
0
class PrestoWorkflow(object):
    length = Input(Types.Integer, required=True, help="Int between 1 and 26")
    routing_group = Input(Types.String,
                          required=True,
                          help="Test string with no default")
    p_task = presto_task(length=length, rg=routing_group)
    output_a = Output(p_task.outputs.results, sdk_type=schema)
コード例 #8
0
ファイル: generic.py プロジェクト: oleng/katacoda
class GenericDemoWorkflow(object):
    a = Input(Types.Generic, default={}, help="Input for inner workflow")
    generic_type_example = generic_type_task(custom=a)
    generic_json = generic_to_json(
        replicated=generic_type_example.outputs.replicated)
    counts = Output(generic_type_example.outputs.counts,
                    sdk_type=Types.Generic)
class ClassifierEvaluateWorkflow:
    available_streams_mpblobs = Input([Types.MultiPartBlob], required=True)
    available_streams_names = Input([Types.String], required=True)
    validation_data_ratio = Input(Types.Float,
                                  default=DEFAULT_VALIDATION_DATA_RATIO)
    streams_metadata_path = Input(Types.String, required=True)
    model = Input(Types.Blob, default=None)
    evaluation_config_json = Input(
        Types.Generic,
        default=ujson.loads(open(DEFAULT_EVALUATION_CONFIG_FILE).read()))

    fetch_model_task = fetch_model(model=model)

    rearrange_data_task = rearrange_data(
        available_streams_mpblobs=available_streams_mpblobs,
        available_streams_names=available_streams_names,
        training_validation_config_json=evaluation_config_json,
        streams_metadata_path=streams_metadata_path,
        validation_data_ratio=validation_data_ratio,
    )

    evaluate_on_datasets_task = evaluate_on_datasets(
        model=fetch_model_task.outputs.model_blob,
        evaluation_clean_mpblob=rearrange_data_task.outputs.
        validation_clean_mpblob,
        evaluation_dirty_mpblob=rearrange_data_task.outputs.
        validation_dirty_mpblob,
    )

    analyze_task = analyze_prediction_results(
        ground_truths=evaluate_on_datasets_task.outputs.ground_truths_out,
        predictions=evaluate_on_datasets_task.outputs.predictions_out,
    )

    predict = generate_predictions(
        ground_truths=evaluate_on_datasets_task.outputs.ground_truths_out,
        probabilities=evaluate_on_datasets_task.outputs.predictions_out)

    analyze_results_blobs = Output(analyze_task.outputs.result_blobs,
                                   sdk_type=[Types.Blob])
    analyze_results_files_names = Output(
        analyze_task.outputs.result_files_names, sdk_type=[Types.String])
    ground_truths = Output(evaluate_on_datasets_task.outputs.ground_truths_out,
                           sdk_type=[Types.Integer])
    predictions = Output(predict.outputs.predictions, sdk_type=[Types.Integer])
コード例 #10
0
def nested_dynamic_wf_task(wf_params, task_input_num, out):
    wf_params.logging.info(
        "Running inner task... yielding a code generated sub workflow")

    # Inner workflow
    input_a = Input(Types.Integer, help="Tell me something")
    node1 = sq_sub_task(in1=input_a)

    MyUnregisteredWorkflowInner = workflow(
        inputs={"a": input_a},
        outputs={
            "ooo":
            Output(node1.outputs.out1,
                   sdk_type=Types.Integer,
                   help="This is an integer output")
        },
        nodes={"node_one": node1},
    )

    setattr(MyUnregisteredWorkflowInner, "auto_assign_name",
            manual_assign_name)
    MyUnregisteredWorkflowInner._platform_valid_name = "unregistered"

    # Output workflow
    input_a = Input(Types.Integer, help="Tell me something")
    node1 = MyUnregisteredWorkflowInner(a=task_input_num)

    MyUnregisteredWorkflowOuter = workflow(
        inputs={"a": input_a},
        outputs={
            "ooo":
            Output(node1.outputs.ooo,
                   sdk_type=Types.Integer,
                   help="This is an integer output")
        },
        nodes={"node_one": node1},
    )

    setattr(MyUnregisteredWorkflowOuter, "auto_assign_name",
            manual_assign_name)
    MyUnregisteredWorkflowOuter._platform_valid_name = "unregistered"

    unregistered_workflow_execution = MyUnregisteredWorkflowOuter(
        a=task_input_num)
    out.set(unregistered_workflow_execution.outputs.ooo)
コード例 #11
0
class SimpleWorkflow(object):
    triggered_date = Input(Types.Datetime)
    print1a = add_one_and_print(value_to_print=3)
    print1b = add_one_and_print(value_to_print=101)
    print2 = sum_non_none(
        values_to_print=[print1a.outputs.out, print1b.outputs.out])
    print3 = add_one_and_print(value_to_print=print2.outputs.out)
    print4 = add_one_and_print(value_to_print=print3.outputs.out)
    final_value = Output(print4.outputs.out, sdk_type=Types.Integer)
コード例 #12
0
class MNISTTest(object):
    no_cuda = Input(Types.Boolean,
                    default=False,
                    help="disables CUDA training")
    batch_size = Input(Types.Integer,
                       default=64,
                       help='input batch size for training (default: 64)')
    test_batch_size = Input(
        Types.Integer,
        default=1000,
        help='input batch size for testing (default: 1000)')
    epochs = Input(Types.Integer,
                   default=1,
                   help='number of epochs to train (default: 10)')
    learning_rate = Input(Types.Float,
                          default=0.01,
                          help='learning rate (default: 0.01)')
    sgd_momentum = Input(Types.Float,
                         default=0.5,
                         help='SGD momentum (default: 0.5)')
    seed = Input(Types.Integer, default=1, help='random seed (default: 1)')
    log_interval = Input(
        Types.Integer,
        default=10,
        help='how many batches to wait before logging training status')
    dir = Input(Types.String,
                default='logs',
                help='directory where summary logs are stored')

    mnist_result = mnist_pytorch_job(no_cuda=no_cuda,
                                     batch_size=batch_size,
                                     test_batch_size=test_batch_size,
                                     epochs=epochs,
                                     learning_rate=learning_rate,
                                     sgd_momentum=sgd_momentum,
                                     seed=seed,
                                     log_interval=log_interval,
                                     dir=dir)

    accuracies = Output(mnist_result.outputs.epoch_accuracies,
                        sdk_type=[Types.Float])
    model = Output(mnist_result.outputs.model_state, sdk_type=Types.Blob)
コード例 #13
0
class BackfillWorkflow(object):
    """
    So if FailingWorkflow Fails, we can resurrect and backfill the FailingWorkflow, using the BackfillWorkflow.
    The Backfill workflow just has one step
    """
    in_image = Input(Types.Blob, required=True)
    angle = Input(Types.Float, default=180.0)

    rotate_task = rotate(image=in_image, angle=angle, fail=False)

    out_image = Output(rotate_task.outputs.out_image, sdk_type=Types.Blob)
コード例 #14
0
class HousePricePredictionModelTrainer(object):
    """
    This pipeline trains an XGBoost model, also generated synthetic data and runs predictions against test dataset
    """

    loc = Input(Types.String, help="Location for where to train the model.")
    seed = Input(Types.Integer, default=7, help="Seed to use for splitting.")
    num_houses = Input(Types.Integer,
                       default=1000,
                       help="Number of houses to generate data for")

    # the actual algorithm
    split = generate_and_split_data(loc=loc,
                                    number_of_houses=num_houses,
                                    seed=seed)
    fit_task = fit(train=split.outputs.train)
    predicted = predict(model_ser=fit_task.outputs.model,
                        test=split.outputs.test)

    # Outputs: joblib seralized model and accuracy of the model
    model = Output(fit_task.outputs.model, sdk_type=Types.Blob)
    accuracy = Output(predicted.outputs.accuracy, sdk_type=Types.Float)
コード例 #15
0
class DiabetesXGBoostModelTrainer(object):
    """
    This pipeline trains an XGBoost mode for any given dataset that matches the schema as specified in
    https://github.com/jbrownlee/Datasets/blob/master/pima-indians-diabetes.names.
    """

    # Inputs dataset, fraction of the dataset to be split out for validations and seed to use to perform the split
    dataset = Input(
        Types.CSV,
        default=Types.CSV.create_at_known_location(
            "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
        ),
        help=
        "A CSV File that matches the format https://github.com/jbrownlee/Datasets/blob/master/pima-indians-diabetes.names"
    )

    test_split_ratio = Input(Types.Float,
                             default=0.33,
                             help="Ratio of how much should be test to Train")
    seed = Input(Types.Integer, default=7, help="Seed to use for splitting.")

    # the actual algorithm
    split = get_traintest_splitdatabase(dataset=dataset,
                                        seed=seed,
                                        test_split_ratio=test_split_ratio)
    fit_task = fit(x=split.outputs.x_train,
                   y=split.outputs.y_train,
                   hyperparams=XGBoostModelHyperparams(
                       max_depth=4, ).to_dict())
    predicted = predict(model_ser=fit_task.outputs.model,
                        x=split.outputs.x_test)
    score_task = metrics(predictions=predicted.outputs.predictions,
                         y=split.outputs.y_test)

    # Outputs: joblib seralized model and accuracy of the model
    model = Output(fit_task.outputs.model, sdk_type=Types.Blob)
    accuracy = Output(score_task.outputs.accuracy, sdk_type=Types.Float)
コード例 #16
0
class StructuredSagemakerXGBoostHPO(object):
    # Input parameters
    static_hyperparameters = Input(
        Types.Generic,
        help=
        "A list of the static hyperparameters to pass to the training jobs.",
        default=example_hyperparams,
    )
    train_data = Input(
        Types.Schema(),
        help=
        "A Columnar schema that contains all the features used for training.",
    )
    train_target = Input(
        Types.Schema(),
        help=
        "A Columnar schema that contains all the labeled results for train_data.",
    )

    validation_data = Input(
        Types.Schema(),
        help=
        "A Columnar schema that contains all the features used for validation.",
    )
    validation_target = Input(
        Types.Schema(),
        help=
        "A Columnar schema that contains all the labeled results for validation_data.",
    )

    sagemaker_transform = convert_to_sagemaker_csv(x_train=train_data,
                                                   y_train=train_target,
                                                   x_test=validation_data,
                                                   y_test=validation_target)

    # Node definitions
    train_node = xgtrainer_task(
        static_hyperparameters=static_hyperparameters,
        train=sagemaker_transform.outputs.train,
        validation=sagemaker_transform.outputs.validation,
    )

    untar = untar_xgboost(model_tar=train_node.outputs.model, )

    # Outputs
    model = Output(untar.outputs.model, sdk_type=Types.Blob)
コード例 #17
0
ファイル: flyte_compiler.py プロジェクト: sd2k/dagster
    def __call__(self, module="__main__"):
        """
        Creates an SdkWorkflow from a dagster pipeline. Then, adds the nodes as attrs within the module
        that this function is invoked from. User will need to manually provide the module name.
        This is required because flytekit runs dir() on the module that the resultant container
        registers, in order  to discover the DAG structure.
        """

        self.execution_plan = create_execution_plan(self.pipeline,
                                                    run_config=self.run_config)

        self.build_flyte_sdk_workflow()
        nodes = {}
        for name, node in self.get_sdk_tasks():
            setattr(sys.modules[module], name, node)
            nodes[name] = node(**self.inputs[name],
                               **self.source_handle_inputs(
                                   name, nodes)).assign_id_and_return(name)

        _inputs = [
            _input.rename_and_return_reference(name)
            for key in self.inputs.keys()
            for name, _input in self.inputs[key].items()
        ]

        # currently, we create an Output for every solid's output. A user may only want outputs for
        # solids at the highest topological level or for solids whose output is not used elsewhere.
        # However they may want to persist outputs from other levels as well.
        # Therefore, it may be simplest to create an Output for every Solid's output
        _outputs = [
            Output(getattr(nodes[key].outputs, name),
                   sdk_type=flyte_type).rename_and_return_reference(
                       "{}_{}".format(key, name))
            for key in self.outputs.keys()
            for name, flyte_type in self.outputs[key].items()
        ]

        return SdkWorkflow(
            inputs=sorted(_inputs, key=lambda x: x.name),
            outputs=sorted(_outputs, key=lambda x: x.name),
            nodes=sorted(nodes.values(), key=lambda x: x.id),
        )
コード例 #18
0
ファイル: sample.py プロジェクト: kumare3/awsflyteplugins
class DemoWorkflow(object):
    # Input parameters
    static_hyperparameters = Input(
        Types.Generic,
        help=
        "A list of the static hyperparameters to pass to the training jobs.",
    )
    train_data = Input(Types.MultiPartCSV,
                       help="S3 path to a flat directory of CSV files.")
    validation_data = Input(Types.MultiPartCSV,
                            help="S3 path to a flat directory of CSV files.")

    # Node definitions
    train_node = xgtrainer_task(
        static_hyperparameters=example_hyperparams,
        train=train_data,
        validation=validation_data,
    )

    # Outputs
    trained_model = Output(train_node.outputs.model, sdk_type=Types.Blob)
コード例 #19
0
class FailingWorkflow(object):
    """
    This workflow is  two step workflow,
    Step 1: scale an image
    Step 2: Rotate an image
    NOTE: This is not an efficient workflow as one image - scaling and rotation can be done with one OPEN CV call. But this example exists only for a demo

    Step 2: in this case will always fail as it is hard-coded to indicate fail=True
    """
    in_image = Input(
        Types.Blob,
        default=Types.Blob.create_at_known_location(
            "https://miro.medium.com/max/1400/1*qL8UYfaStcEo_YVPrA4cbA.png"))
    angle = Input(Types.Float, default=180.0)
    scale = Input(Types.Integer, default=2)

    scale_task = tasks.scale(image=in_image, scale_factor=scale)
    rotate_task = tasks.rotate(image=scale_task.outputs.out_image,
                               angle=angle,
                               fail=True)

    out_image = Output(rotate_task.outputs.out_image, sdk_type=Types.Blob)
コード例 #20
0
class DemoWorkflow(object):
    # Input parameters
    train_data = Input(Types.MultiPartCSV,
                       help="s3 path to a flat directory of CSV files.")
    validation_data = Input(Types.MultiPartCSV,
                            help="s3 path to a flat directory of CSV files.")

    # Node definitions
    train_node = xgtrainer_task(
        static_hyperparameters={
            "eval_metric": "auc",
            "num_round": "100",
            "objective": "binary:logistic",
            "rate_drop": "0.3",
            "tweedie_variance_power": "1.4",
        },
        train=train_data,
        validation=validation_data,
    )

    # Outputs
    trained_model = Output(train_node.outputs.model, sdk_type=Types.Blob)
コード例 #21
0
class DynamicLaunchPlanCaller(object):
    outer_a = Input(Types.Integer, default=5, help="Input for inner workflow")
    lp_task = lp_yield_task(num=outer_a)
    wf_output = Output(lp_task.outputs.out, sdk_type=Types.Integer)
コード例 #22
0
class StaticLaunchPlanCaller(object):
    outer_a = Input(Types.Integer, default=5, help="Input for inner workflow")
    identity_lp_execution = id_lp(a=outer_a)
    wf_output = Output(identity_lp_execution.outputs.task_output,
                       sdk_type=Types.Integer)
コード例 #23
0
class EdgeDetectorWf(object):
    image_input = Input(Types.String, required=True, help="Image to run for")
    run_edge_detection = edge_detection_canny(image_location=image_input)
    edges = Output(run_edge_detection.outputs.parsed_image,
                   sdk_type=Types.Blob)
コード例 #24
0
class TimeDemoWorkflow(object):
    dt = Input(Types.Datetime, help="Input time")
    duration = Input(Types.Timedelta, help="Input timedelta")
    time_example = time_task(dt=dt, duration=duration)
    new_time = Output(time_example.outputs.new_time, sdk_type=Types.Datetime)
コード例 #25
0
class DynamicSubWorkflowCaller(object):
    outer_a = Input(Types.Integer, default=5, help="Input for inner workflow")
    sub_wf_task = sub_wf_yield_task(num=outer_a)
    wf_output = Output(sub_wf_task.outputs.out, sdk_type=Types.Integer)
コード例 #26
0
class SimpleWorkflow(object):
    input_1 = Input(Types.Integer)
    input_2 = Input(Types.Integer, default=5, help='Not required.')
    a = add_one(a=input_1)
    output = Output(a.outputs.b, sdk_type=Types.Integer)
コード例 #27
0
 class IdentityWorkflow(object):
     a = Input(Types.Integer, default=5, help="Input for inner workflow")
     odd_nums_task = inner_task(num=a)
     task_output = Output(odd_nums_task.outputs.out, sdk_type=Types.Integer)
コード例 #28
0
class EdgeDetector(object):
    script = Input(Types.Blob)
    image = Input(Types.Blob)
    edge_task = edges(script=script, image=image)
    out =  Output(edge_task.outputs.edges, sdk_type=Types.Blob)
コード例 #29
0
class Parent(object):
    input_1 = Input(Types.Integer)
    child1 = child_lp(input_1=input_1)
    child2 = child_lp(input_1=input_1, input_2=10)
    final_sum = sum(a=child1.outputs.output, b=child2.outputs.output)
    output = Output(final_sum.outputs.c, sdk_type=Types.Integer)
コード例 #30
0
class SimpleDynamicSubworkflow(object):
    input_a = Input(Types.Integer, default=5, help="Input for inner workflow")
    lp_task = dynamic_wf_task(task_input_num=input_a)
    wf_output = Output(lp_task.outputs.out, sdk_type=Types.Integer)