def bind_primitive_IO(self, primitive: PrimitiveStep, templateIO): # print(templateIO) if len(templateIO) == 1: primitive.add_argument( name="inputs", argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=templateIO[0]) # if len(templateIO) > 1: else: arguments_train = primitive.primitive.metadata.query( )['primitive_code']['instance_methods']['set_training_data'][ 'arguments'] arguments_produce = primitive.primitive.metadata.query( )['primitive_code']['instance_methods']['produce']['arguments'] arguments = [] added = set() for t in arguments_train: arguments.append(t) added.add(t) for p in arguments_produce: if p not in added and p != 'timeout' and p != 'iterations': arguments.append(p) added.add(p) for index, argument in enumerate(arguments): primitive.add_argument( name=argument, argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=templateIO[index])
def create_pipeline_json(self, prim_dict): """ Generate pipeline.json """ name = "Pipeline for evaluation" pipeline_id = self.id #+ "_" + str(self.rank) pipeline_description = Pipeline(pipeline_id=pipeline_id, context=Context.EVALUATION, name=name) for ip in self.inputs: pipeline_description.add_input(name=ip['name']) num = self.num_steps() for i in range(num): p = prim_dict[self.primitives[i]] pdesc = {} pdesc['id'] = p.id pdesc['version'] = p.primitive_class.version pdesc['python_path'] = p.primitive_class.python_path pdesc['name'] = p.primitive_class.name pdesc['digest'] = p.primitive_class.digest step = PrimitiveStep(primitive_description=pdesc) for name, value in self.primitives_arguments[i].items(): origin = value['origin'] argument_type = ArgumentType.CONTAINER step.add_argument(name=name, argument_type=argument_type, data_reference=value['data']) step.add_output(output_id=p.primitive_class.produce_methods[0]) if self.hyperparams[i] is not None: for name, value in self.hyperparams[i].items(): step.add_hyperparameter(name=name, argument_type=ArgumentType.VALUE, data=value) pipeline_description.add_step(step) for op in self.outputs: pipeline_description.add_output(data_reference=op[2], name=op[3]) self.pipeline_description = pipeline_description
def set_loss(pipeline_description): global LOSS_SETUP_IDX LOSS_SETUP_IDX = len(pipeline_description.steps) step = PrimitiveStep(primitive_description=d3m.primitives.loss_function. categorical_crossentropy.KerasWrap.metadata.query()) pipeline_description.add_step(step)
def _new_pipeline(pipeline, hyperparams=None): hyperparams = to_dicts(hyperparams) if hyperparams else dict() new_pipeline = Pipeline(context=Context.TESTING) for input_ in pipeline.inputs: new_pipeline.add_input(name=input_['name']) for step_id, old_step in enumerate(pipeline.steps): new_step = PrimitiveStep(primitive=old_step.primitive) for name, argument in old_step.arguments.items(): new_step.add_argument( name=name, argument_type=argument['type'], data_reference=argument['data'] ) for output in old_step.outputs: new_step.add_output(output) new_hyperparams = hyperparams.get(str(step_id), dict()) for name, hyperparam in old_step.hyperparams.items(): if name not in new_hyperparams: new_step.add_hyperparameter( name=name, argument_type=ArgumentType.VALUE, data=hyperparam['data'] ) for name, value in new_hyperparams.items(): new_step.add_hyperparameter( name=name, argument_type=ArgumentType.VALUE, data=value ) new_pipeline.add_step(new_step) for output in pipeline.outputs: new_pipeline.add_output( name=output['name'], data_reference=output['data'] ) new_pipeline.cv_scores = list() new_pipeline.score = None return new_pipeline
def bind_primitive_IO(self, primitive: PrimitiveStep, templateIO): # print(templateIO) if len(templateIO) > 0: primitive.add_argument( name="inputs", argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=templateIO[0]) if len(templateIO) > 1: arguments = primitive.primitive.metadata.query()['primitive_code'][ 'instance_methods']['set_training_data']['arguments'] if "outputs" in arguments: # Some primitives (e.g. GreedyImputer) require "outputs", while others do # not (e.g. MeanImputer) primitive.add_argument("outputs", metadata_base.ArgumentType.CONTAINER, templateIO[1]) if len(templateIO) > 2: raise exceptions.InvalidArgumentValueError( "Should be less than 3 arguments!")
def set_prediction(pipeline_description): pred = PrimitiveStep( primitive_description=d3m.primitives.data_transformation. construct_predictions.Common.metadata.query()) pred.add_argument( name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f"steps.{len(pipeline_description.steps) - 1}.produce") pred.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.{}.produce'.format(IP_STEP)) pred.add_output('produce') pipeline_description.add_step(pred)
def generate_only(): # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: column_parser step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.schema_discovery.profiler.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: column_parser step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 3: DFS Single Table step_3 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization' )) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_output('produce') pipeline_description.add_step(step_3) # Step 4: learn model step_4 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.regression.xgboost_gbtree.Common')) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Step 5: construct output step_5 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common')) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') # Generate .yml file for the pipeline import featuretools_ta1 from pipeline_tests.utils import generate_pipeline dataset_name = 'LL1_retail_sales_total_MIN_METADATA' dataset_path = '/featuretools_ta1/datasets/seed_datasets_current' primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization' version = featuretools_ta1.__version__ test_name = os.path.splitext(os.path.basename(__file__))[0][5:] yml, pipeline_run_file = generate_pipeline( primitive_name=primitive_name, pipeline_description=pipeline_description, dataset_name=dataset_name, test_name=test_name) # fit-score command fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format( yml) fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format( dataset_path, dataset_name, dataset_name) fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -O {}'.format(pipeline_run_file) # Run pipeline to save pipeline_run file os.system(fs_cmd) # Create and return command for running from pipeline_run file: pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format( primitive_name, version) pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score' pipeline_run_cmd += ' -u {}'.format(pipeline_run_file) return pipeline_run_cmd
def build_pipeline(pipepline_info, pipepline_mapping, stdout=None): default_stdout = sys.stdout if stdout is not None: sys.stdout = stdout # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') for primitive_info in pipepline_info: print(primitive_info.python_path) print(primitive_info.hyperparameter) print(primitive_info.ancestors) if primitive_info.python_path == 'HEAD': dataset_fullname = primitive_info.hyperparameter['dataset_folder'] print(dataset_fullname) continue elif primitive_info.python_path == 'ENDING': ancestors = primitive_info.ancestors end_step_num = pipepline_mapping[ancestors['inputs']] - 1 pipeline_description.add_output(name='output predictions', data_reference='steps.' + str(end_step_num) + '.produce') else: # print(primitive_info.python_path) primitive = index.get_primitive(primitive_info.python_path) step = PrimitiveStep(primitive=primitive) hyperparameters = primitive_info.hyperparameter ancestors = primitive_info.ancestors # add add_inputs # print(ancestors) if ancestors['inputs'] != 0: for ances_key in ancestors.keys(): print(ances_key, ancestors[ances_key], pipepline_mapping[ancestors[ances_key]] - 1) step_num = pipepline_mapping[ancestors[ances_key]] - 1 step.add_argument(name=ances_key, argument_type=ArgumentType.CONTAINER, data_reference='steps.' + str(step_num) + '.produce') else: step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') # add add_hyperparameter for hyper in hyperparameters.keys(): # print(hyper, hyperparameters[hyper], type(hyperparameters[hyper])) hyper_value = hyperparameters[hyper] step.add_hyperparameter(name=hyper, argument_type=ArgumentType.VALUE, data=hyper_value) step.add_output('produce') pipeline_description.add_step(step) # print('\n') # Output to json data = pipeline_description.to_json() with open('example_pipeline.json', 'w') as f: f.write(data) print(data) # yaml = pipeline_description.to_yaml() # with open('example_pipeline.yml', 'w') as f: # f.write(yaml) # print(yaml) sys.stdout.flush() sys.stdout = default_stdout
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') #d3m.primitives.data_transformation.column_parser.Common #d3m.primitives.data_cleaning.column_type_profiler.Simon # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: Column profiler step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.schema_discovery.profiler.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: DISTIL/NK pca feature selection
def __init__(self, ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step.add_output("produce") pipeline_description.add_step(step) # column parser step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Duke primitive step = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_cleaning.text_summarization.Duke')) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_output('produce') pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.2.produce") self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest # extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe primitive_0 = index.get_primitive( 'd3m.primitives.tods.data_processing.dataset_to_dataframe') step_0 = PrimitiveStep(primitive=primitive_0) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: Column Parser primitive_1 = index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common') step_1 = PrimitiveStep(primitive=primitive_1) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1)
from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Step 1: Ts formatter step_0 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_preprocessing.data_cleaning.DistilTimeSeriesFormatter" )) step_0.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step_0.add_output("produce") pipeline_description.add_step(step_0) # Step 2: DS to DF on formatted ts DS step_1 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step_1.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step_1.add_output("produce") pipeline_description.add_step(step_1) # Step 3: DS to DF on input DS step_2 = PrimitiveStep(primitive=index.get_primitive(
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 1: Ts formatter step_0 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_preprocessing.data_cleaning.DistilTimeSeriesFormatter" )) step_0.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step_0.add_output("produce") pipeline_description.add_step(step_0) # Step 1: dataset_to_dataframe on formatted dataset step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: Grouping Field Compose step_2 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.grouping_field_compose.Common"))
from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest # extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe primitive_0 = index.get_primitive( 'd3m.primitives.tods.data_processing.dataset_to_dataframe') step_0 = PrimitiveStep(primitive=primitive_0) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: column_parser step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: extract_columns_by_semantic_types(attributes)
def __init__(self, algorithm: str = 'PseudoLabel'): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", ], ) pipeline_description.add_step(step) # imputer step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.imputer.SKlearn")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") step.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["https://metadata.datadrivendiscovery.org/types/Attribute"], ) step.add_output("produce") pipeline_description.add_step(step) # parse integer/float attribute semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["http://schema.org/Integer", "http://schema.org/Float"], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # Tabular Semi Supervised step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.semisupervised_classification.iterative_labeling.TabularSemiSupervised" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.6.produce", ) step.add_hyperparameter(name="algorithm", argument_type=ArgumentType.VALUE, data=algorithm) step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.7.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.8.produce") self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata import hyperparams # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest # extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe primitive_0 = index.get_primitive( 'd3m.primitives.tods.data_processing.dataset_to_dataframe') step_0 = PrimitiveStep(primitive=primitive_0) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # # Step 1: column_parser primitive_1 = index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common') step_1 = PrimitiveStep(primitive=primitive_1) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1)
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: Denormalize primitive step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.denormalize.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: dataset_to_dataframe step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='learningData') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: DISTIL/NK VAR primitive
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step_0.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step_0.add_output("produce") pipeline_description.add_step(step_0) # Step 1: Simple Profiler Column Role Annotation step_1 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step_1.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step_1.add_output("produce") pipeline_description.add_step(step_1)
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: Denormalize primitive step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.denormalize.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: dataset_to_dataframe step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='learningData') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: DISTIL/NK data cleaning step_2 = PrimitiveStep(primitive=index.get_primitive(
def _gen_pipeline(self): pipeline = meta_pipeline.Pipeline() pipeline.add_input(name='inputs') # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline.add_step(step_0) # Step 1: dataset_to_dataframe step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_1.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='1') step_1.add_output('produce') pipeline.add_step(step_1) # Step 2: dataset_to_dataframe step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_2.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='2') step_2.add_output('produce') pipeline.add_step(step_2) step_3 = meta_pipeline.PrimitiveStep( primitive_description=EuclideanNomination.metadata.query()) step_3.add_argument(name='inputs_1', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_3.add_argument(name='inputs_2', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_3.add_output('produce') pipeline.add_step(step_3) # Adding output step to the pipeline pipeline.add_output(name='Predictions', data_reference='steps.3.produce') return pipeline
from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Step 0: DS to DF on input DS step_0 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step_0.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step_0.add_output("produce") pipeline_description.add_step(step_0) # Step 1: Simple Profiler Column Role Annotation step_1 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step_1.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step_1.add_output("produce") pipeline_description.add_step(step_1) # Step 2: column parser on input DF step_2 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common"))
from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: column_parser step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: BKFilter step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.bk_filter')) # step_2.add_hyperparameter(name = 'columns_using_method', argument_type=ArgumentType.VALUE, data = 'name') step_2.add_hyperparameter(name = 'use_semantic_types', argument_type=ArgumentType.VALUE, data = True) step_2.add_hyperparameter(name = 'use_columns', argument_type=ArgumentType.VALUE, data = (2,3)) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') pipeline_description.add_step(step_2)
def __init__( self, epochs: int = 5000, attention_lstm: bool = True, ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Ts formatter step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on formatted ts DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ], ) pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # LSTM FCN step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.time_series_classification.convolutional_neural_net.LSTM_FCN" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="epochs", argument_type=ArgumentType.VALUE, data=epochs ) step.add_hyperparameter( name="attention_lstm", argument_type=ArgumentType.VALUE, data=attention_lstm ) step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output( name="output predictions", data_reference="steps.6.produce" ) self.pipeline = pipeline_description
def load_pipeline_architecture(self, pipeline_architecture_dict): """ Loads pipeline architecture dictionary and returns a d3m Pipeline object. Return pipeline """ pipeline_description = Pipeline(context=Context.TESTING) pipeline_description.add_input(name='inputs') # For each corresponding stage in the dictionary create a step steps = [] stage_name_to_reference_name = {} for stage_dict in pipeline_architecture_dict: # Extract stage attributes primitive = stage_dict["primitive"] if type(primitive) == str: primitive = get_primitive_with_name(primitive) cur_stage_name = stage_dict["stage_name"] input_stage = stage_dict["input"] # Create primitive step step = PrimitiveStep(primitive_description=primitive.metadata.query()) data_reference = "inputs.0" if input_stage == PipelineWrapper.PIPELINE_INPUT else stage_name_to_reference_name[input_stage] step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_reference) if "hyperparameters" in stage_dict: for k,v in stage_dict["hyperparameters"].items(): step.add_hyperparameter(name=k, argument_type=ArgumentType.VALUE, data=v) if "arguments" in stage_dict: for k,v in stage_dict["arguments"].items(): step.add_argument(name=k, argument_type=ArgumentType.CONTAINER, data_reference=stage_name_to_reference_name[v]) step.add_output("produce") pipeline_description.add_step(step) reference_name = next(iter(step.get_output_data_references())) # Update accounting stage_name_to_reference_name[cur_stage_name] = reference_name steps.append(step) # Output is output of the last step last_output_reference = next(iter(steps[-1].get_output_data_references())) pipeline_description.add_output(name="output", data_reference=last_output_reference) return pipeline_description
from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata import hyperparams import numpy as np # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest # extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe primitive_0 = index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common') step_0 = PrimitiveStep(primitive=primitive_0) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # # Step 1: column_parser primitive_1 = index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common') step_1 = PrimitiveStep(primitive=primitive_1) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1)
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Goat forward step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.geocoding.Goat_forward")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter(name="target_columns", argument_type=ArgumentType.VALUE, data=[1]) step.add_hyperparameter(name="cache_size", argument_type=ArgumentType.VALUE, data=2000) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # XG Boost step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.classification.xgboost_gbtree.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.5.produce") self.pipeline = pipeline_description
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step.add_output("produce") pipeline_description.add_step(step) # Simon step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.column_type_profiler.Simon")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # XG Boost step = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.classification.xgboost_gbtree.Common')) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step.add_output('produce') step.add_hyperparameter(name='add_index_columns', argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.4.produce") self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: Denormalize primitive step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.denormalize.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: DISTIL/NK Storc primitive step_1 = PrimitiveStep( primitive=index.get_primitive('d3m.primitives.clustering.k_means.Sloth')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_hyperparameter(name='nclusters', argument_type=ArgumentType.VALUE, data=10) step_1.add_hyperparameter(name='long_format', argument_type=ArgumentType.VALUE, data=True) step_1.add_output('produce')
def create_pipeline(metric: str) -> Pipeline: previous_step = 0 input_val = 'steps.{}.produce' # create the basic pipeline qa_pipeline = Pipeline(context=PipelineContext.TESTING) qa_pipeline.add_input(name='inputs') # Denormalize so that we have a single dataframe in the dataset step = PrimitiveStep( primitive_description=DenormalizePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') qa_pipeline.add_step(step) # Extract dataframe from dataset step = PrimitiveStep( primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_output('produce') qa_pipeline.add_step(step) previous_step += 1 # Parse columns. step = PrimitiveStep( primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_output('produce') semantic_types = ( 'http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) qa_pipeline.add_step(step) previous_step += 1 parse_step = previous_step # Extract attributes step = PrimitiveStep( primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') step.add_hyperparameter( 'semantic_types', ArgumentType.VALUE, ('https://metadata.datadrivendiscovery.org/types/Attribute', )) qa_pipeline.add_step(step) previous_step += 1 attributes_step = previous_step # Extract targets step = PrimitiveStep( primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') target_types = ( 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/TrueTarget') step.add_hyperparameter('semantic_types', ArgumentType.VALUE, target_types) qa_pipeline.add_step(step) previous_step += 1 target_step = previous_step # Generates a bert pair classification model. step = PrimitiveStep( primitive_description=BertPairClassificationPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(attributes_step)) step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(target_step)) step.add_output('produce') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) step.add_hyperparameter('doc_col_0', ArgumentType.VALUE, 1) step.add_hyperparameter('doc_col_1', ArgumentType.VALUE, 3) qa_pipeline.add_step(step) previous_step += 1 # convert predictions to expected format step = PrimitiveStep( primitive_description=ConstructPredictionsPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') qa_pipeline.add_step(step) previous_step += 1 # Adding output step to the pipeline qa_pipeline.add_output(name='output', data_reference=input_val.format(previous_step)) return qa_pipeline