)
step_5.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.4.produce')
step_5.add_argument(name='reference',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.1.produce')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions',
                                data_reference='steps.5.produce')

# Output json pipeline
blob = pipeline_description.to_json()
filename = blob[8:44] + '.json'
with open(filename, 'w') as outfile:
    outfile.write(blob)

# output dataset metafile (from command line argument)
metafile = blob[8:44] + '.meta'
dataset = sys.argv[1]
with open(metafile, 'w') as outfile:
    outfile.write('{')
    outfile.write(f'"problem": "{dataset}_problem",')
    outfile.write(f'"full_inputs": ["{dataset}_dataset"],')
    outfile.write(f'"train_inputs": ["{dataset}_dataset_TRAIN"],')
    outfile.write(f'"test_inputs": ["{dataset}_dataset_TEST"],')
    outfile.write(f'"score_inputs": ["{dataset}_dataset_SCORE"]')
    outfile.write('}')
Example #2
0
pipeline_description.add_step(step_5)

# Step 6: Fairness post-processing Distil
step_6 = PrimitiveStep(primitive=index.get_primitive(
    'd3m.primitives.data_augmentation.data_conversion.FairnessPostProcessing'))
step_6.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.5.produce')
step_6.add_argument(name='outputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.5.produce')
step_6.add_output('produce')
step_6.add_hyperparameter(name='algorithm',
                          argument_type=ArgumentType.VALUE,
                          data='Calibrated_Equality_of_Odds')
step_6.add_hyperparameter(name='protected_attribute_cols',
                          argument_type=ArgumentType.VALUE,
                          data=[3])
step_6.add_hyperparameter(name='favorable_label',
                          argument_type=ArgumentType.VALUE,
                          data=0.)
pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name='output predictions',
                                data_reference='steps.6.produce')

# Output to JSON
with open('pipeline.json', 'w') as outfile:
    outfile.write(pipeline_description.to_json())
Example #3
0
def build_pipeline(pipepline_info, pipepline_mapping, stdout=None):

    default_stdout = sys.stdout
    if stdout is not None:
        sys.stdout = stdout

    # Creating pipeline
    pipeline_description = Pipeline()
    pipeline_description.add_input(name='inputs')

    for primitive_info in pipepline_info:
        print(primitive_info.python_path)
        print(primitive_info.hyperparameter)
        print(primitive_info.ancestors)

        if primitive_info.python_path == 'HEAD':
            dataset_fullname = primitive_info.hyperparameter['dataset_folder']
            print(dataset_fullname)
            continue

        elif primitive_info.python_path == 'ENDING':

            ancestors = primitive_info.ancestors
            end_step_num = pipepline_mapping[ancestors['inputs']] - 1
            pipeline_description.add_output(name='output predictions', data_reference='steps.' + str(end_step_num) + '.produce')

        else:
            # print(primitive_info.python_path)
            primitive = index.get_primitive(primitive_info.python_path)
            step = PrimitiveStep(primitive=primitive)

            hyperparameters = primitive_info.hyperparameter
            ancestors = primitive_info.ancestors

            # add add_inputs
            # print(ancestors)

            if ancestors['inputs'] != 0:
                for ances_key in ancestors.keys():
                    print(ances_key, ancestors[ances_key], pipepline_mapping[ancestors[ances_key]] - 1)

                    step_num = pipepline_mapping[ancestors[ances_key]] - 1
                    step.add_argument(name=ances_key, argument_type=ArgumentType.CONTAINER, data_reference='steps.' + str(step_num) + '.produce')

            else:
                step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')

            # add add_hyperparameter
            for hyper in hyperparameters.keys():
                # print(hyper, hyperparameters[hyper], type(hyperparameters[hyper]))

                hyper_value = hyperparameters[hyper]

                step.add_hyperparameter(name=hyper, argument_type=ArgumentType.VALUE, data=hyper_value)

            step.add_output('produce')
            pipeline_description.add_step(step)

            # print('\n')

    # Output to json
    data = pipeline_description.to_json()
    with open('example_pipeline.json', 'w') as f:
        f.write(data)
        print(data)

    # yaml = pipeline_description.to_yaml()
    # with open('example_pipeline.yml', 'w') as f:
    #     f.write(yaml)
    # print(yaml)

    sys.stdout.flush()
    sys.stdout = default_stdout
Example #4
0
step_4.add_hyperparameter(name='return_result',
                          argument_type=ArgumentType.VALUE,
                          data='new')
step_4.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference=attributes)
step_4.add_output('produce')
pipeline_description.add_step(step_4)

# Step 5: Predictions
step_5 = PrimitiveStep(primitive=index.get_primitive(
    'd3m.primitives.data_transformation.construct_predictions.Common'))
step_5.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.4.produce')
step_5.add_argument(name='reference',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.1.produce')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions',
                                data_reference='steps.5.produce')

# Output to json
data = pipeline_description.to_json()
with open('example_pipeline.json', 'w') as f:
    f.write(data)
    print(data)