def create_pipeline_json(self, prim_dict): """ Generate pipeline.json """ name = "Pipeline for evaluation" pipeline_id = self.id #+ "_" + str(self.rank) pipeline_description = Pipeline(pipeline_id=pipeline_id, context=Context.EVALUATION, name=name) for ip in self.inputs: pipeline_description.add_input(name=ip['name']) num = self.num_steps() for i in range(num): p = prim_dict[self.primitives[i]] pdesc = {} pdesc['id'] = p.id pdesc['version'] = p.primitive_class.version pdesc['python_path'] = p.primitive_class.python_path pdesc['name'] = p.primitive_class.name pdesc['digest'] = p.primitive_class.digest step = PrimitiveStep(primitive_description=pdesc) for name, value in self.primitives_arguments[i].items(): origin = value['origin'] argument_type = ArgumentType.CONTAINER step.add_argument(name=name, argument_type=argument_type, data_reference=value['data']) step.add_output(output_id=p.primitive_class.produce_methods[0]) if self.hyperparams[i] is not None: for name, value in self.hyperparams[i].items(): step.add_hyperparameter(name=name, argument_type=ArgumentType.VALUE, data=value) pipeline_description.add_step(step) for op in self.outputs: pipeline_description.add_output(data_reference=op[2], name=op[3]) self.pipeline_description = pipeline_description
def create_pipeline(metric: str) -> Pipeline: # create the basic pipeline kanine_pipeline = Pipeline(context=PipelineContext.TESTING) kanine_pipeline.add_input(name='inputs') # Denormalize so that we have a single dataframe in the dataset step = PrimitiveStep( primitive_description=DenormalizePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') kanine_pipeline.add_step(step) # step 1 - kanine classification step = PrimitiveStep(primitive_description=Kanine.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_output('produce') kanine_pipeline.add_step(step) # Adding output step to the pipeline kanine_pipeline.add_output(name='output', data_reference='steps.1.produce') return kanine_pipeline
def create_pipeline(metric: str) -> Pipeline: # create the basic pipeline vertex_classification_pipeline = Pipeline(context=PipelineContext.TESTING) vertex_classification_pipeline.add_input(name='inputs') # step 0 - extract the graphs step = PrimitiveStep( primitive_description=VertexClassificationParser.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') vertex_classification_pipeline.add_step(step) # step 1 - classify step = PrimitiveStep( primitive_description=VertexClassification.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_hyperparameter('jvm_memory', ArgumentType.VALUE, 0.6) step.add_output('produce') vertex_classification_pipeline.add_step(step) # Adding output step to the pipeline vertex_classification_pipeline.add_output(name='output', data_reference='steps.1.produce') return vertex_classification_pipeline
def _add_primitive_to_pipeline(pipeline_description, primitive, resolver, attributes=None, targets=None, dataframe_step=None): step_model = PrimitiveStep(primitive=primitive, resolver=resolver) if dataframe_step is None: step_model.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) step_model.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) else: last_step_idx = len(pipeline_description.steps) - 1 step_model.add_argument( name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=pipeline_utils.int_to_step(last_step_idx)) step_model.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference=dataframe_step) step_model.add_output('produce') pipeline_description.add_step(step_model)
def add_primitive_to_pipeline(self, primitive, attributes, hyperparameters=[], targets=None, produce_collection=False): inputs_ref = attributes if isinstance( attributes, str) else self.get_output_str(attributes) step = PrimitiveStep(primitive=primitive, resolver=self.resolver) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=inputs_ref) for hyperparam in hyperparameters: name, argument_type, data = hyperparam step.add_hyperparameter(name=name, argument_type=argument_type, data=data) if targets: outputs_ref = targets if isinstance( targets, str) else self.get_output_str(targets) step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=outputs_ref) step.add_output('produce') if produce_collection: step.add_output('produce_collection') self.pipeline.add_step(step) return step
def set_prediction(pipeline_description): pred = PrimitiveStep( primitive_description=d3m.primitives.data_transformation. construct_predictions.Common.metadata.query()) pred.add_argument( name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f"steps.{len(pipeline_description.steps) - 1}.produce") pred.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.{}.produce'.format(IP_STEP)) pred.add_output('produce') pipeline_description.add_step(pred)
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Ts formatter step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on formatted ts DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Grouping Field Compose step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.grouping_field_compose.Common") ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Storc primitive -> KMeans step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.clustering.k_means.Sloth")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_hyperparameter(name="nclusters", argument_type=ArgumentType.VALUE, data=3) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.3.produce") self.pipeline = pipeline_description
def set_learner(pipeline_description, batch_size=BATCH_SIZE): learner_idx = len(pipeline_description.steps) step = PrimitiveStep(primitive_description=d3m.primitives.learner.model. KerasWrap.metadata.query()) step.add_hyperparameter(name='loss', argument_type=ArgumentType.PRIMITIVE, data=LOSS_SETUP_IDX) step.add_hyperparameter(name='model_type', argument_type=ArgumentType.VALUE, data='classification') step.add_hyperparameter(name='network_last_layer', argument_type=ArgumentType.PRIMITIVE, data=learner_idx - 1) step.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='replace') lr = 0.0001 adam_hypers = d3m.primitives.learner.model.KerasWrap.metadata.get_hyperparams( ).defaults(path='optimizer.Adam') adam_hypers = adam_hypers.replace({'lr': lr}) step.add_hyperparameter(name='optimizer', argument_type=ArgumentType.VALUE, data=adam_hypers) pipeline_description.add_step(step) bz_loader = PrimitiveStep(primitive_description=d3m.primitives. data_wrangling.batching.TAMU.metadata.query()) bz_loader.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{IP_STEP}.produce') bz_loader.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.{}.produce'.format(OP_STEP)) bz_loader.add_hyperparameter(name='primitive_reader', argument_type=ArgumentType.PRIMITIVE, data=READER_STEP) bz_loader.add_hyperparameter(name='primitive_learner', argument_type=ArgumentType.PRIMITIVE, data=learner_idx) bz_loader.add_hyperparameter(name='batch_size', argument_type=ArgumentType.VALUE, data=batch_size) bz_loader.add_hyperparameter(name='sampling_method', argument_type=ArgumentType.VALUE, data='random') bz_loader.add_output('produce') pipeline_description.add_step(bz_loader)
def build_demo_pipeline(): # Creating pipeline pipeline = Pipeline(context=Context.TESTING) pipeline.add_input(name='inputs') # Step 0: DFS step_0 = PrimitiveStep(primitive_description=Featuretools.metadata.query()) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline.add_step(step_0) # Step 1: SKlearnImputer step_1 = PrimitiveStep( primitive_description=SKlearnImputer.metadata.query()) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline.add_step(step_1) # Step 2: SKlearnRFC step_2 = PrimitiveStep(primitive_description=SKlearnRFC.metadata.query()) step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) step_2.add_hyperparameter(name='add_index_columns', argument_type=ArgumentType.VALUE, data=True) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') pipeline.add_step(step_2) # Step 3: ConstructPredictions step_3 = PrimitiveStep( primitive_description=DataFrameCommon.metadata.query()) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_output('produce') pipeline.add_step(step_3) # Final Output pipeline.add_output(name='output predictions', data_reference='steps.3.produce') return pipeline
def _new_pipeline(pipeline, hyperparams=None): hyperparams = to_dicts(hyperparams) if hyperparams else dict() new_pipeline = Pipeline(context=Context.TESTING) for input_ in pipeline.inputs: new_pipeline.add_input(name=input_['name']) for step_id, old_step in enumerate(pipeline.steps): new_step = PrimitiveStep(primitive=old_step.primitive) for name, argument in old_step.arguments.items(): new_step.add_argument( name=name, argument_type=argument['type'], data_reference=argument['data'] ) for output in old_step.outputs: new_step.add_output(output) new_hyperparams = hyperparams.get(str(step_id), dict()) for name, hyperparam in old_step.hyperparams.items(): if name not in new_hyperparams: new_step.add_hyperparameter( name=name, argument_type=ArgumentType.VALUE, data=hyperparam['data'] ) for name, value in new_hyperparams.items(): new_step.add_hyperparameter( name=name, argument_type=ArgumentType.VALUE, data=value ) new_pipeline.add_step(new_step) for output in pipeline.outputs: new_pipeline.add_output( name=output['name'], data_reference=output['data'] ) new_pipeline.cv_scores = list() new_pipeline.score = None return new_pipeline
def load_pipeline_architecture(self, pipeline_architecture_dict): """ Loads pipeline architecture dictionary and returns a d3m Pipeline object. Return pipeline """ pipeline_description = Pipeline(context=Context.TESTING) pipeline_description.add_input(name='inputs') # For each corresponding stage in the dictionary create a step steps = [] stage_name_to_reference_name = {} for stage_dict in pipeline_architecture_dict: # Extract stage attributes primitive = stage_dict["primitive"] if type(primitive) == str: primitive = get_primitive_with_name(primitive) cur_stage_name = stage_dict["stage_name"] input_stage = stage_dict["input"] # Create primitive step step = PrimitiveStep(primitive_description=primitive.metadata.query()) data_reference = "inputs.0" if input_stage == PipelineWrapper.PIPELINE_INPUT else stage_name_to_reference_name[input_stage] step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_reference) if "hyperparameters" in stage_dict: for k,v in stage_dict["hyperparameters"].items(): step.add_hyperparameter(name=k, argument_type=ArgumentType.VALUE, data=v) if "arguments" in stage_dict: for k,v in stage_dict["arguments"].items(): step.add_argument(name=k, argument_type=ArgumentType.CONTAINER, data_reference=stage_name_to_reference_name[v]) step.add_output("produce") pipeline_description.add_step(step) reference_name = next(iter(step.get_output_data_references())) # Update accounting stage_name_to_reference_name[cur_stage_name] = reference_name steps.append(step) # Output is output of the last step last_output_reference = next(iter(steps[-1].get_output_data_references())) pipeline_description.add_output(name="output", data_reference=last_output_reference) return pipeline_description
def add_classifier(pipeline_description, dataset_to_dataframe_step, attributes, targets): lr = PrimitiveStep(primitive=SKLogisticRegression) lr.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes) lr.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets) lr.add_output('produce') pipeline_description.add_step(lr) construct_pred = PrimitiveStep( primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) construct_pred.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=pipeline_utils.int_to_step(lr.index)) construct_pred.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference=dataset_to_dataframe_step) construct_pred.add_output('produce') pipeline_description.add_step(construct_pred) # Final Output pipeline_description.add_output(name='output predictions', data_reference=pipeline_utils.int_to_step(construct_pred.index))
def create_pipeline(metric: str) -> Pipeline: previous_step = 0 input_val = 'steps.{}.produce' # create the basic pipeline tsf_pipeline = Pipeline(context=PipelineContext.TESTING) tsf_pipeline.add_input(name='inputs') # step 0 - Extract dataframe from dataset step = PrimitiveStep( primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') tsf_pipeline.add_step(step) # step 1 - Parse columns. step = PrimitiveStep( primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_output('produce') semantic_types = ( 'http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) tsf_pipeline.add_step(step) # step 2 - Parrot ARIMA step = PrimitiveStep(primitive_description=Parrot.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_hyperparameter(name='seasonal_differencing', argument_type=ArgumentType.VALUE, data=11) step.add_hyperparameter(name='n_periods', argument_type=ArgumentType.VALUE, data=21) step.add_output('produce') tsf_pipeline.add_step(step) # step 3 - convert predictions to expected format # step = PrimitiveStep(primitive_description=ConstructPredictionsPrimitive.metadata.query()) # step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') # step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') # step.add_output('produce') # tsf_pipeline.add_step(step) # Adding output step to the pipeline tsf_pipeline.add_output(name='output', data_reference='steps.2.produce') return tsf_pipeline
def create_pipeline(metric: str) -> Pipeline: # create the basic pipeline vertex_nomination_pipeline = Pipeline(context=PipelineContext.TESTING) vertex_nomination_pipeline.add_input(name='inputs') # step 0 - extract the graphs step = PrimitiveStep(primitive_description=DistilSingleGraphLoaderPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') step.add_output('produce_target') vertex_nomination_pipeline.add_step(step) # step 1 - predict links step = PrimitiveStep(primitive_description=DistilLinkPredictionPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce_target') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) step.add_output('produce') vertex_nomination_pipeline.add_step(step) # Adding output step to the pipeline vertex_nomination_pipeline.add_output(name='output', data_reference='steps.1.produce') return vertex_nomination_pipeline
def __init__(self, epochs: int = 10, n_steps: int = 20): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # RetinaNet primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.object_detection.retina_net.ObjectDetectionRN")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter(name="n_epochs", argument_type=ArgumentType.VALUE, data=epochs) step.add_hyperparameter(name="n_steps", argument_type=ArgumentType.VALUE, data=n_steps) step.add_hyperparameter(name="weights_path", argument_type=ArgumentType.VALUE, data="/scratch_dir/") step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.2.produce") self.pipeline = pipeline_description
def _gen_pipeline(self): pipeline = meta_pipeline.Pipeline() pipeline.add_input(name='inputs') # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline.add_step(step_0) # Step 1: dataset_to_dataframe step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_1.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='1') step_1.add_output('produce') pipeline.add_step(step_1) # Step 2: dataset_to_dataframe step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_2.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='2') step_2.add_output('produce') pipeline.add_step(step_2) step_3 = meta_pipeline.PrimitiveStep( primitive_description=EuclideanNomination.metadata.query()) step_3.add_argument(name='inputs_1', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_3.add_argument(name='inputs_2', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_3.add_output('produce') pipeline.add_step(step_3) # Adding output step to the pipeline pipeline.add_output(name='Predictions', data_reference='steps.3.produce') return pipeline
def community_detection(resolver=None): if resolver is None: resolver = custom_resolver.BlackListResolver() # Creating Pipeline pipeline_description = Pipeline(context=PipelineContext.TESTING) pipeline_description.add_input(name='inputs') start_step = "inputs.0" # Step 0 step_0 = PrimitiveStep(primitive_description=d3m.primitives.sri.graph. CommunityDetectionParser.metadata.query(), resolver=resolver) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=start_step) step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1 step_1 = PrimitiveStep(primitive_description=d3m.primitives.sri.psl. CommunityDetection.metadata.query(), resolver=resolver) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_hyperparameter(name='jvm_memory', argument_type=ArgumentType.VALUE, data=0.5) step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: RemoveColumns step_2 = PrimitiveStep(primitive_description=d3m.primitives.data. RemoveColumns.metadata.query(), resolver=resolver) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_hyperparameter(name='columns', argument_type=ArgumentType.VALUE, data=[0]) step_2.add_output('produce') pipeline_description.add_step(step_2) pipeline_description.add_output(name='Result', data_reference='steps.2.produce') last_step = len(pipeline_description.steps) - 1 attributes = pipelines.int_to_step(last_step - 1) targets = pipelines.int_to_step(last_step) return pipeline_description
def create_pipeline(metric: str) -> Pipeline: previous_step = 0 input_val = 'steps.{}.produce' # create the basic pipeline var_pipeline = Pipeline(context=PipelineContext.TESTING) var_pipeline.add_input(name='inputs') # step 0 - Extract dataframe from dataset step = PrimitiveStep( primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') var_pipeline.add_step(step) # step 1 - Parse columns. step = PrimitiveStep( primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_output('produce') semantic_types = ( 'http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) var_pipeline.add_step(step) # step 2 - Vector Auto Regression step = PrimitiveStep(primitive_description=VAR.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_output('produce') var_pipeline.add_step(step) # Adding output step to the pipeline var_pipeline.add_output(name='output', data_reference='steps.2.produce') return var_pipeline
def create_pipeline(metric: str) -> Pipeline: # create the basic pipeline graph_matching_pipeline = Pipeline(context=PipelineContext.TESTING) graph_matching_pipeline.add_input(name='inputs') # step 0 - extract the graphs step = PrimitiveStep( primitive_description=DistilGraphLoaderPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') step.add_output('produce_target') graph_matching_pipeline.add_step(step) # step 1 - match the graphs that have been seeded step = PrimitiveStep( primitive_description=DistilSeededGraphMatchingPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce_target') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) step.add_output('produce') graph_matching_pipeline.add_step(step) # convert predictions to expected format #step = PrimitiveStep(primitive_description=ConstructPredictionsPrimitive.metadata.query()) #step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') #step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce_target') #step.add_output('produce') #step.add_hyperparameter('use_columns', ArgumentType.VALUE, [0, 1]) #graph_matching_pipeline.add_step(step) # Adding output step to the pipeline graph_matching_pipeline.add_output(name='output', data_reference='steps.1.produce') return graph_matching_pipeline
def __init__(self, ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # column parser step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Duke primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.text_summarization.Duke")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.2.produce") self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: Denormalize primitive step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.denormalize.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: DISTIL/NK Storc primitive step_1 = PrimitiveStep( primitive=index.get_primitive('d3m.primitives.clustering.k_means.Sloth')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_hyperparameter(name='nclusters', argument_type=ArgumentType.VALUE, data=10) step_1.add_hyperparameter(name='long_format', argument_type=ArgumentType.VALUE, data=True) step_1.add_output('produce')
def generate_only(): # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: column_parser step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.schema_discovery.profiler.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: column_parser step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 3: DFS Single Table step_3 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization' )) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_output('produce') pipeline_description.add_step(step_3) # Step 4: learn model step_4 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.regression.xgboost_gbtree.Common')) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Step 5: construct output step_5 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common')) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') # Generate .yml file for the pipeline import featuretools_ta1 from pipeline_tests.utils import generate_pipeline dataset_name = 'LL1_retail_sales_total_MIN_METADATA' dataset_path = '/featuretools_ta1/datasets/seed_datasets_current' primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization' version = featuretools_ta1.__version__ test_name = os.path.splitext(os.path.basename(__file__))[0][5:] yml, pipeline_run_file = generate_pipeline( primitive_name=primitive_name, pipeline_description=pipeline_description, dataset_name=dataset_name, test_name=test_name) # fit-score command fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format( yml) fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format( dataset_path, dataset_name, dataset_name) fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -O {}'.format(pipeline_run_file) # Run pipeline to save pipeline_run file os.system(fs_cmd) # Create and return command for running from pipeline_run file: pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format( primitive_name, version) pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score' pipeline_run_cmd += ' -u {}'.format(pipeline_run_file) return pipeline_run_cmd
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step.add_output("produce") pipeline_description.add_step(step) # Simon step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.column_type_profiler.Simon")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # XG Boost step = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.classification.xgboost_gbtree.Common')) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step.add_output('produce') step.add_hyperparameter(name='add_index_columns', argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.4.produce") self.pipeline = pipeline_description
def build_pipeline(pipepline_info, pipepline_mapping, stdout=None): default_stdout = sys.stdout if stdout is not None: sys.stdout = stdout # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') for primitive_info in pipepline_info: print(primitive_info.python_path) print(primitive_info.hyperparameter) print(primitive_info.ancestors) if primitive_info.python_path == 'HEAD': dataset_fullname = primitive_info.hyperparameter['dataset_folder'] print(dataset_fullname) continue elif primitive_info.python_path == 'ENDING': ancestors = primitive_info.ancestors end_step_num = pipepline_mapping[ancestors['inputs']] - 1 pipeline_description.add_output(name='output predictions', data_reference='steps.' + str(end_step_num) + '.produce') else: # print(primitive_info.python_path) primitive = index.get_primitive(primitive_info.python_path) step = PrimitiveStep(primitive=primitive) hyperparameters = primitive_info.hyperparameter ancestors = primitive_info.ancestors # add add_inputs # print(ancestors) if ancestors['inputs'] != 0: for ances_key in ancestors.keys(): print(ances_key, ancestors[ances_key], pipepline_mapping[ancestors[ances_key]] - 1) step_num = pipepline_mapping[ancestors[ances_key]] - 1 step.add_argument(name=ances_key, argument_type=ArgumentType.CONTAINER, data_reference='steps.' + str(step_num) + '.produce') else: step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') # add add_hyperparameter for hyper in hyperparameters.keys(): # print(hyper, hyperparameters[hyper], type(hyperparameters[hyper])) hyper_value = hyperparameters[hyper] step.add_hyperparameter(name=hyper, argument_type=ArgumentType.VALUE, data=hyper_value) step.add_output('produce') pipeline_description.add_step(step) # print('\n') # Output to json data = pipeline_description.to_json() with open('example_pipeline.json', 'w') as f: f.write(data) print(data) # yaml = pipeline_description.to_yaml() # with open('example_pipeline.yml', 'w') as f: # f.write(yaml) # print(yaml) sys.stdout.flush() sys.stdout = default_stdout
def __init__( self, epochs: int = 5000, attention_lstm: bool = True, ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Ts formatter step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on formatted ts DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ], ) pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # LSTM FCN step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.time_series_classification.convolutional_neural_net.LSTM_FCN" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="epochs", argument_type=ArgumentType.VALUE, data=epochs ) step.add_hyperparameter( name="attention_lstm", argument_type=ArgumentType.VALUE, data=attention_lstm ) step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output( name="output predictions", data_reference="steps.6.produce" ) self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Step 0: DS to DF on input DS step_0 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step_0.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step_0.add_output("produce") pipeline_description.add_step(step_0) # Step 1: Simple Profiler Column Role Annotation step_1 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step_1.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step_1.add_output("produce") pipeline_description.add_step(step_1) # Step 2: column parser on input DF step_2 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common"))
# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest # extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe primitive_0 = index.get_primitive( 'd3m.primitives.tods.data_processing.dataset_to_dataframe') step_0 = PrimitiveStep(primitive=primitive_0) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: Column Parser primitive_1 = index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common') step_1 = PrimitiveStep(primitive=primitive_1) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: Discrete Cosine Transform primitive_2 = index.get_primitive( 'd3m.primitives.tods.feature_analysis.discrete_cosine_transform')
def create_pipeline(metric: str) -> Pipeline: previous_step = 0 input_val = 'steps.{}.produce' # create the basic pipeline qa_pipeline = Pipeline(context=PipelineContext.TESTING) qa_pipeline.add_input(name='inputs') # Denormalize so that we have a single dataframe in the dataset step = PrimitiveStep( primitive_description=DenormalizePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') qa_pipeline.add_step(step) # Extract dataframe from dataset step = PrimitiveStep( primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_output('produce') qa_pipeline.add_step(step) previous_step += 1 # Parse columns. step = PrimitiveStep( primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_output('produce') semantic_types = ( 'http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) qa_pipeline.add_step(step) previous_step += 1 parse_step = previous_step # Extract attributes step = PrimitiveStep( primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') step.add_hyperparameter( 'semantic_types', ArgumentType.VALUE, ('https://metadata.datadrivendiscovery.org/types/Attribute', )) qa_pipeline.add_step(step) previous_step += 1 attributes_step = previous_step # Extract targets step = PrimitiveStep( primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') target_types = ( 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/TrueTarget') step.add_hyperparameter('semantic_types', ArgumentType.VALUE, target_types) qa_pipeline.add_step(step) previous_step += 1 target_step = previous_step # Generates a bert pair classification model. step = PrimitiveStep( primitive_description=BertPairClassificationPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(attributes_step)) step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(target_step)) step.add_output('produce') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) step.add_hyperparameter('doc_col_0', ArgumentType.VALUE, 1) step.add_hyperparameter('doc_col_1', ArgumentType.VALUE, 3) qa_pipeline.add_step(step) previous_step += 1 # convert predictions to expected format step = PrimitiveStep( primitive_description=ConstructPredictionsPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') qa_pipeline.add_step(step) previous_step += 1 # Adding output step to the pipeline qa_pipeline.add_output(name='output', data_reference=input_val.format(previous_step)) return qa_pipeline
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Goat forward step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.geocoding.Goat_forward")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter(name="target_columns", argument_type=ArgumentType.VALUE, data=[1]) step.add_hyperparameter(name="cache_size", argument_type=ArgumentType.VALUE, data=2000) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # XG Boost step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.classification.xgboost_gbtree.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.5.produce") self.pipeline = pipeline_description
from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step_0.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step_0.add_output("produce") pipeline_description.add_step(step_0) # Step 1: Simple Profiler Column Role Annotation step_1 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step_1.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step_1.add_output("produce") pipeline_description.add_step(step_1) # Step 2 column parser -> labeled semantic types to data types step_2 = PrimitiveStep(primitive=index.get_primitive(