) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') # Output json pipeline blob = pipeline_description.to_json() filename = blob[8:44] + '.json' with open(filename, 'w') as outfile: outfile.write(blob) # output dataset metafile (from command line argument) metafile = blob[8:44] + '.meta' dataset = sys.argv[1] with open(metafile, 'w') as outfile: outfile.write('{') outfile.write(f'"problem": "{dataset}_problem",') outfile.write(f'"full_inputs": ["{dataset}_dataset"],') outfile.write(f'"train_inputs": ["{dataset}_dataset_TRAIN"],') outfile.write(f'"test_inputs": ["{dataset}_dataset_TEST"],') outfile.write(f'"score_inputs": ["{dataset}_dataset_SCORE"]') outfile.write('}')
pipeline_description.add_step(step_5) # Step 6: Fairness post-processing Distil step_6 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_augmentation.data_conversion.FairnessPostProcessing')) step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') step_6.add_output('produce') step_6.add_hyperparameter(name='algorithm', argument_type=ArgumentType.VALUE, data='Calibrated_Equality_of_Odds') step_6.add_hyperparameter(name='protected_attribute_cols', argument_type=ArgumentType.VALUE, data=[3]) step_6.add_hyperparameter(name='favorable_label', argument_type=ArgumentType.VALUE, data=0.) pipeline_description.add_step(step_6) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') # Output to JSON with open('pipeline.json', 'w') as outfile: outfile.write(pipeline_description.to_json())
def build_pipeline(pipepline_info, pipepline_mapping, stdout=None): default_stdout = sys.stdout if stdout is not None: sys.stdout = stdout # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') for primitive_info in pipepline_info: print(primitive_info.python_path) print(primitive_info.hyperparameter) print(primitive_info.ancestors) if primitive_info.python_path == 'HEAD': dataset_fullname = primitive_info.hyperparameter['dataset_folder'] print(dataset_fullname) continue elif primitive_info.python_path == 'ENDING': ancestors = primitive_info.ancestors end_step_num = pipepline_mapping[ancestors['inputs']] - 1 pipeline_description.add_output(name='output predictions', data_reference='steps.' + str(end_step_num) + '.produce') else: # print(primitive_info.python_path) primitive = index.get_primitive(primitive_info.python_path) step = PrimitiveStep(primitive=primitive) hyperparameters = primitive_info.hyperparameter ancestors = primitive_info.ancestors # add add_inputs # print(ancestors) if ancestors['inputs'] != 0: for ances_key in ancestors.keys(): print(ances_key, ancestors[ances_key], pipepline_mapping[ancestors[ances_key]] - 1) step_num = pipepline_mapping[ancestors[ances_key]] - 1 step.add_argument(name=ances_key, argument_type=ArgumentType.CONTAINER, data_reference='steps.' + str(step_num) + '.produce') else: step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') # add add_hyperparameter for hyper in hyperparameters.keys(): # print(hyper, hyperparameters[hyper], type(hyperparameters[hyper])) hyper_value = hyperparameters[hyper] step.add_hyperparameter(name=hyper, argument_type=ArgumentType.VALUE, data=hyper_value) step.add_output('produce') pipeline_description.add_step(step) # print('\n') # Output to json data = pipeline_description.to_json() with open('example_pipeline.json', 'w') as f: f.write(data) print(data) # yaml = pipeline_description.to_yaml() # with open('example_pipeline.yml', 'w') as f: # f.write(yaml) # print(yaml) sys.stdout.flush() sys.stdout = default_stdout
step_4.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) step_4.add_output('produce') pipeline_description.add_step(step_4) # Step 5: Predictions step_5 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common')) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') # Output to json data = pipeline_description.to_json() with open('example_pipeline.json', 'w') as f: f.write(data) print(data)