def create_pipeline(metric: str) -> Pipeline: # create the basic pipeline vertex_nomination_pipeline = Pipeline(context=PipelineContext.TESTING) vertex_nomination_pipeline.add_input(name='inputs') # step 0 - extract the graphs step = PrimitiveStep(primitive_description=DistilSingleGraphLoaderPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') step.add_output('produce_target') vertex_nomination_pipeline.add_step(step) # step 1 - predict links step = PrimitiveStep(primitive_description=DistilLinkPredictionPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce_target') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) step.add_output('produce') vertex_nomination_pipeline.add_step(step) # Adding output step to the pipeline vertex_nomination_pipeline.add_output(name='output', data_reference='steps.1.produce') return vertex_nomination_pipeline
def create_pipeline(metric: str) -> Pipeline: # create the basic pipeline kanine_pipeline = Pipeline(context=PipelineContext.TESTING) kanine_pipeline.add_input(name='inputs') # Denormalize so that we have a single dataframe in the dataset step = PrimitiveStep( primitive_description=DenormalizePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') kanine_pipeline.add_step(step) # step 1 - kanine classification step = PrimitiveStep(primitive_description=Kanine.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_output('produce') kanine_pipeline.add_step(step) # Adding output step to the pipeline kanine_pipeline.add_output(name='output', data_reference='steps.1.produce') return kanine_pipeline
def create_pipeline(metric: str) -> Pipeline: previous_step = 0 input_val = 'steps.{}.produce' # create the basic pipeline tsf_pipeline = Pipeline(context=PipelineContext.TESTING) tsf_pipeline.add_input(name='inputs') # step 0 - Extract dataframe from dataset step = PrimitiveStep( primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') tsf_pipeline.add_step(step) # step 1 - Parse columns. step = PrimitiveStep( primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_output('produce') semantic_types = ( 'http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) tsf_pipeline.add_step(step) # step 2 - Parrot ARIMA step = PrimitiveStep(primitive_description=Parrot.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_hyperparameter(name='seasonal_differencing', argument_type=ArgumentType.VALUE, data=11) step.add_hyperparameter(name='n_periods', argument_type=ArgumentType.VALUE, data=21) step.add_output('produce') tsf_pipeline.add_step(step) # step 3 - convert predictions to expected format # step = PrimitiveStep(primitive_description=ConstructPredictionsPrimitive.metadata.query()) # step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') # step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') # step.add_output('produce') # tsf_pipeline.add_step(step) # Adding output step to the pipeline tsf_pipeline.add_output(name='output', data_reference='steps.2.produce') return tsf_pipeline
def create_pipeline(metric: str) -> Pipeline: # create the basic pipeline vertex_classification_pipeline = Pipeline(context=PipelineContext.TESTING) vertex_classification_pipeline.add_input(name='inputs') # step 0 - extract the graphs step = PrimitiveStep( primitive_description=VertexClassificationParser.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') vertex_classification_pipeline.add_step(step) # step 1 - classify step = PrimitiveStep( primitive_description=VertexClassification.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_hyperparameter('jvm_memory', ArgumentType.VALUE, 0.6) step.add_output('produce') vertex_classification_pipeline.add_step(step) # Adding output step to the pipeline vertex_classification_pipeline.add_output(name='output', data_reference='steps.1.produce') return vertex_classification_pipeline
def create_pipeline_json(self, prim_dict): """ Generate pipeline.json """ name = "Pipeline for evaluation" pipeline_id = self.id #+ "_" + str(self.rank) pipeline_description = Pipeline(pipeline_id=pipeline_id, context=Context.EVALUATION, name=name) for ip in self.inputs: pipeline_description.add_input(name=ip['name']) num = self.num_steps() for i in range(num): p = prim_dict[self.primitives[i]] pdesc = {} pdesc['id'] = p.id pdesc['version'] = p.primitive_class.version pdesc['python_path'] = p.primitive_class.python_path pdesc['name'] = p.primitive_class.name pdesc['digest'] = p.primitive_class.digest step = PrimitiveStep(primitive_description=pdesc) for name, value in self.primitives_arguments[i].items(): origin = value['origin'] argument_type = ArgumentType.CONTAINER step.add_argument(name=name, argument_type=argument_type, data_reference=value['data']) step.add_output(output_id=p.primitive_class.produce_methods[0]) if self.hyperparams[i] is not None: for name, value in self.hyperparams[i].items(): step.add_hyperparameter(name=name, argument_type=ArgumentType.VALUE, data=value) pipeline_description.add_step(step) for op in self.outputs: pipeline_description.add_output(data_reference=op[2], name=op[3]) self.pipeline_description = pipeline_description
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Ts formatter step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on formatted ts DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Grouping Field Compose step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.grouping_field_compose.Common") ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Storc primitive -> KMeans step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.clustering.k_means.Sloth")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_hyperparameter(name="nclusters", argument_type=ArgumentType.VALUE, data=3) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.3.produce") self.pipeline = pipeline_description
def build_demo_pipeline(): # Creating pipeline pipeline = Pipeline(context=Context.TESTING) pipeline.add_input(name='inputs') # Step 0: DFS step_0 = PrimitiveStep(primitive_description=Featuretools.metadata.query()) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline.add_step(step_0) # Step 1: SKlearnImputer step_1 = PrimitiveStep( primitive_description=SKlearnImputer.metadata.query()) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline.add_step(step_1) # Step 2: SKlearnRFC step_2 = PrimitiveStep(primitive_description=SKlearnRFC.metadata.query()) step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) step_2.add_hyperparameter(name='add_index_columns', argument_type=ArgumentType.VALUE, data=True) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') pipeline.add_step(step_2) # Step 3: ConstructPredictions step_3 = PrimitiveStep( primitive_description=DataFrameCommon.metadata.query()) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_output('produce') pipeline.add_step(step_3) # Final Output pipeline.add_output(name='output predictions', data_reference='steps.3.produce') return pipeline
def __init__(self, epochs: int = 10, n_steps: int = 20): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # RetinaNet primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.object_detection.retina_net.ObjectDetectionRN")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter(name="n_epochs", argument_type=ArgumentType.VALUE, data=epochs) step.add_hyperparameter(name="n_steps", argument_type=ArgumentType.VALUE, data=n_steps) step.add_hyperparameter(name="weights_path", argument_type=ArgumentType.VALUE, data="/scratch_dir/") step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.2.produce") self.pipeline = pipeline_description
def community_detection(resolver=None): if resolver is None: resolver = custom_resolver.BlackListResolver() # Creating Pipeline pipeline_description = Pipeline(context=PipelineContext.TESTING) pipeline_description.add_input(name='inputs') start_step = "inputs.0" # Step 0 step_0 = PrimitiveStep(primitive_description=d3m.primitives.sri.graph. CommunityDetectionParser.metadata.query(), resolver=resolver) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=start_step) step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1 step_1 = PrimitiveStep(primitive_description=d3m.primitives.sri.psl. CommunityDetection.metadata.query(), resolver=resolver) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_hyperparameter(name='jvm_memory', argument_type=ArgumentType.VALUE, data=0.5) step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: RemoveColumns step_2 = PrimitiveStep(primitive_description=d3m.primitives.data. RemoveColumns.metadata.query(), resolver=resolver) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_hyperparameter(name='columns', argument_type=ArgumentType.VALUE, data=[0]) step_2.add_output('produce') pipeline_description.add_step(step_2) pipeline_description.add_output(name='Result', data_reference='steps.2.produce') last_step = len(pipeline_description.steps) - 1 attributes = pipelines.int_to_step(last_step - 1) targets = pipelines.int_to_step(last_step) return pipeline_description
def create_pipeline(metric: str) -> Pipeline: previous_step = 0 input_val = 'steps.{}.produce' # create the basic pipeline var_pipeline = Pipeline(context=PipelineContext.TESTING) var_pipeline.add_input(name='inputs') # step 0 - Extract dataframe from dataset step = PrimitiveStep( primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') var_pipeline.add_step(step) # step 1 - Parse columns. step = PrimitiveStep( primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_output('produce') semantic_types = ( 'http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) var_pipeline.add_step(step) # step 2 - Vector Auto Regression step = PrimitiveStep(primitive_description=VAR.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_output('produce') var_pipeline.add_step(step) # Adding output step to the pipeline var_pipeline.add_output(name='output', data_reference='steps.2.produce') return var_pipeline
def _new_pipeline(pipeline, hyperparams=None): hyperparams = to_dicts(hyperparams) if hyperparams else dict() new_pipeline = Pipeline(context=Context.TESTING) for input_ in pipeline.inputs: new_pipeline.add_input(name=input_['name']) for step_id, old_step in enumerate(pipeline.steps): new_step = PrimitiveStep(primitive=old_step.primitive) for name, argument in old_step.arguments.items(): new_step.add_argument( name=name, argument_type=argument['type'], data_reference=argument['data'] ) for output in old_step.outputs: new_step.add_output(output) new_hyperparams = hyperparams.get(str(step_id), dict()) for name, hyperparam in old_step.hyperparams.items(): if name not in new_hyperparams: new_step.add_hyperparameter( name=name, argument_type=ArgumentType.VALUE, data=hyperparam['data'] ) for name, value in new_hyperparams.items(): new_step.add_hyperparameter( name=name, argument_type=ArgumentType.VALUE, data=value ) new_pipeline.add_step(new_step) for output in pipeline.outputs: new_pipeline.add_output( name=output['name'], data_reference=output['data'] ) new_pipeline.cv_scores = list() new_pipeline.score = None return new_pipeline
def keras2pipeline(keras_model, batch_size=32): # Creating pipeline from tensorflow.python.keras.activations import softmax pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') set_data(pipeline_description) set_loss(pipeline_description) offset = len(pipeline_description.steps) previous_layer_ids = get_previous_layer_ids(keras_model) layers = keras_model.layers step_id = 0 layer_to_step_id = {} total_layer_num = len(layers) for i, layer in enumerate(layers): cls_name = get_layer_class_name(layer) if cls_name in OMIT_LAYERS: continue layer_id = get_layer_id(layer) if len(previous_layer_ids[layer_id]) > 0: layer.previous_layer_ids = tuple( layer_to_step_id[i] + offset for i in previous_layer_ids[layer_id] ) else: layer.previous_layer_ids = [None] # Since JPL does not support Softmax Layer, we add the workaround to make use of softmax if i == total_layer_num - 2 and cls_name == 'Dense': layer.activation = softmax d3m_step = step_function[cls_name](step_id, layer) pipeline_description.add_step(d3m_step) layer_to_step_id[layer_id] = step_id step_id += 1 set_learner(pipeline_description, batch_size) set_prediction(pipeline_description) pipeline_description.add_output( name='output predictions', data_reference=f"steps.{len(pipeline_description.steps) - 1}.produce") return pipeline_description
def load_pipeline_architecture(self, pipeline_architecture_dict): """ Loads pipeline architecture dictionary and returns a d3m Pipeline object. Return pipeline """ pipeline_description = Pipeline(context=Context.TESTING) pipeline_description.add_input(name='inputs') # For each corresponding stage in the dictionary create a step steps = [] stage_name_to_reference_name = {} for stage_dict in pipeline_architecture_dict: # Extract stage attributes primitive = stage_dict["primitive"] if type(primitive) == str: primitive = get_primitive_with_name(primitive) cur_stage_name = stage_dict["stage_name"] input_stage = stage_dict["input"] # Create primitive step step = PrimitiveStep(primitive_description=primitive.metadata.query()) data_reference = "inputs.0" if input_stage == PipelineWrapper.PIPELINE_INPUT else stage_name_to_reference_name[input_stage] step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_reference) if "hyperparameters" in stage_dict: for k,v in stage_dict["hyperparameters"].items(): step.add_hyperparameter(name=k, argument_type=ArgumentType.VALUE, data=v) if "arguments" in stage_dict: for k,v in stage_dict["arguments"].items(): step.add_argument(name=k, argument_type=ArgumentType.CONTAINER, data_reference=stage_name_to_reference_name[v]) step.add_output("produce") pipeline_description.add_step(step) reference_name = next(iter(step.get_output_data_references())) # Update accounting stage_name_to_reference_name[cur_stage_name] = reference_name steps.append(step) # Output is output of the last step last_output_reference = next(iter(steps[-1].get_output_data_references())) pipeline_description.add_output(name="output", data_reference=last_output_reference) return pipeline_description
def create_pipeline(metric: str) -> Pipeline: # create the basic pipeline graph_matching_pipeline = Pipeline(context=PipelineContext.TESTING) graph_matching_pipeline.add_input(name='inputs') # step 0 - extract the graphs step = PrimitiveStep( primitive_description=DistilGraphLoaderPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') step.add_output('produce_target') graph_matching_pipeline.add_step(step) # step 1 - match the graphs that have been seeded step = PrimitiveStep( primitive_description=DistilSeededGraphMatchingPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce_target') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) step.add_output('produce') graph_matching_pipeline.add_step(step) # convert predictions to expected format #step = PrimitiveStep(primitive_description=ConstructPredictionsPrimitive.metadata.query()) #step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') #step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce_target') #step.add_output('produce') #step.add_hyperparameter('use_columns', ArgumentType.VALUE, [0, 1]) #graph_matching_pipeline.add_step(step) # Adding output step to the pipeline graph_matching_pipeline.add_output(name='output', data_reference='steps.1.produce') return graph_matching_pipeline
def __init__(self, ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # column parser step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Duke primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.text_summarization.Duke")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.2.produce") self.pipeline = pipeline_description
def __init__(self, metadata, main_resource, data_types, loaded_primitives, problem=None, start_resource='inputs.0'): self.metadata = metadata self.main_resource = main_resource self.data_types = data_types self.loaded_primitives = loaded_primitives self.start_resource = start_resource self.problem = problem # Creating pipeline pipeline_description = Pipeline(context=Context.TESTING) pipeline_description.add_input(name='inputs') self.pipeline = pipeline_description self.d2d_step = None self.attr_step = None self.targ_step = None self._generate_pipeline()
def __init__( self, epochs: int = 5000, attention_lstm: bool = True, ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Ts formatter step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on formatted ts DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ], ) pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # LSTM FCN step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.time_series_classification.convolutional_neural_net.LSTM_FCN" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="epochs", argument_type=ArgumentType.VALUE, data=epochs ) step.add_hyperparameter( name="attention_lstm", argument_type=ArgumentType.VALUE, data=attention_lstm ) step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output( name="output predictions", data_reference="steps.6.produce" ) self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType from d3m.metadata.pipeline import Pipeline, PrimitiveStep # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Step 0: DS to DF on input DS step_0 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step_0.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step_0.add_output("produce") pipeline_description.add_step(step_0) # Step 1: Simple Profiler Column Role Annotation step_1 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step_1.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step_1.add_output("produce") pipeline_description.add_step(step_1) # Step 2: column parser on input DF step_2 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common"))
def create_pipeline(metric: str) -> Pipeline: previous_step = 0 input_val = 'steps.{}.produce' # create the basic pipeline qa_pipeline = Pipeline(context=PipelineContext.TESTING) qa_pipeline.add_input(name='inputs') # Denormalize so that we have a single dataframe in the dataset step = PrimitiveStep( primitive_description=DenormalizePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') qa_pipeline.add_step(step) # Extract dataframe from dataset step = PrimitiveStep( primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_output('produce') qa_pipeline.add_step(step) previous_step += 1 # Parse columns. step = PrimitiveStep( primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_output('produce') semantic_types = ( 'http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) qa_pipeline.add_step(step) previous_step += 1 parse_step = previous_step # Extract attributes step = PrimitiveStep( primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') step.add_hyperparameter( 'semantic_types', ArgumentType.VALUE, ('https://metadata.datadrivendiscovery.org/types/Attribute', )) qa_pipeline.add_step(step) previous_step += 1 attributes_step = previous_step # Extract targets step = PrimitiveStep( primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') target_types = ( 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/TrueTarget') step.add_hyperparameter('semantic_types', ArgumentType.VALUE, target_types) qa_pipeline.add_step(step) previous_step += 1 target_step = previous_step # Generates a bert pair classification model. step = PrimitiveStep( primitive_description=BertPairClassificationPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(attributes_step)) step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(target_step)) step.add_output('produce') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) step.add_hyperparameter('doc_col_0', ArgumentType.VALUE, 1) step.add_hyperparameter('doc_col_1', ArgumentType.VALUE, 3) qa_pipeline.add_step(step) previous_step += 1 # convert predictions to expected format step = PrimitiveStep( primitive_description=ConstructPredictionsPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') qa_pipeline.add_step(step) previous_step += 1 # Adding output step to the pipeline qa_pipeline.add_output(name='output', data_reference=input_val.format(previous_step)) return qa_pipeline
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Goat forward step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.geocoding.Goat_forward")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter(name="target_columns", argument_type=ArgumentType.VALUE, data=[1]) step.add_hyperparameter(name="cache_size", argument_type=ArgumentType.VALUE, data=2000) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # XG Boost step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.classification.xgboost_gbtree.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.5.produce") self.pipeline = pipeline_description
def __init__(self, algorithm: str = 'PseudoLabel'): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", ], ) pipeline_description.add_step(step) # imputer step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.imputer.SKlearn")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") step.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["https://metadata.datadrivendiscovery.org/types/Attribute"], ) step.add_output("produce") pipeline_description.add_step(step) # parse integer/float attribute semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["http://schema.org/Integer", "http://schema.org/Float"], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # Tabular Semi Supervised step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.semisupervised_classification.iterative_labeling.TabularSemiSupervised" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.6.produce", ) step.add_hyperparameter(name="algorithm", argument_type=ArgumentType.VALUE, data=algorithm) step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.7.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.8.produce") self.pipeline = pipeline_description
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # PcaFeatures step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.feature_selection.pca_features.Pcafeatures")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_hyperparameter(name="add_index_columns", argument_type=ArgumentType.VALUE, data=True) step.add_output("produce") pipeline_description.add_step(step) # XGBoost step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.classification.xgboost_gbtree.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_output("produce") step.add_hyperparameter(name="add_index_columns", argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.6.produce") self.pipeline = pipeline_description
def generate_only(): # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: Parse columns step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.operator.dataset_map.DataFrameCommon')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_hyperparameter(name='primitive', argument_type=ArgumentType.VALUE, data=column_parser.Common) step_0.add_hyperparameter(name='resources', argument_type=ArgumentType.VALUE, data='all') step_0.add_hyperparameter(name='fit_primitive', argument_type=ArgumentType.VALUE, data='no') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: MultiTableFeaturization step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.feature_construction.deep_feature_synthesis.MultiTableFeaturization' )) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: imputer step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_cleaning.imputer.SKlearn')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce") step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 3: learn model step_3 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.regression.xgboost_gbtree.Common')) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_3.add_output('produce') pipeline_description.add_step(step_3) # step 4: construct output step_4 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common')) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') # Generate .yml file for the pipeline import featuretools_ta1 from pipeline_tests.utils import generate_pipeline dataset_name = 'uu3_world_development_indicators' dataset_path = '/featuretools_ta1/datasets/seed_datasets_current' primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.MultiTableFeaturization' version = featuretools_ta1.__version__ test_name = os.path.splitext(os.path.basename(__file__))[0][5:] yml, pipeline_run_file = generate_pipeline( primitive_name=primitive_name, pipeline_description=pipeline_description, dataset_name=dataset_name, test_name=test_name) # fit-score command fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format( yml) fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format( dataset_path, dataset_name, dataset_name) fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -O {}'.format(pipeline_run_file) # Run pipeline to save pipeline_run file os.system(fs_cmd) # Create and return command for running from pipeline_run file: pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format( primitive_name, version) pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score' pipeline_run_cmd += ' -u {}'.format(pipeline_run_file) return pipeline_run_cmd
def __init__( self, interpretable: bool = True, epochs: int = 10, steps_per_epoch: int = 50, prediction_length: int = 30, num_context_lengths: int = 1, num_estimators: int = 2, #quantiles: List[float] = [0.1, 0.9], group_compose: bool = False, confidence_intervals: bool = False, output_mean: bool = True, weights_dir: str = '/scratch_dir/nbeats' ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0" ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep( primitive=index.get_primitive("d3m.primitives.schema_discovery.profiler.Common") ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", ], ) pipeline_description.add_step(step) # group compose if group_compose: step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.grouping_field_compose.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) data_ref = "steps.3.produce" if group_compose else "steps.2.produce" step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Attribute", 'https://metadata.datadrivendiscovery.org/types/GroupingKey' ], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) data_ref = "steps.3.produce" if group_compose else "steps.2.produce" step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/TrueTarget", "https://metadata.datadrivendiscovery.org/types/SuggestedTarget", ], ) step.add_output("produce") pipeline_description.add_step(step) # forecasting primitive step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.time_series_forecasting.feed_forward_neural_net.NBEATS" ) ) data_ref = "steps.4.produce" if group_compose else "steps.3.produce" step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) data_ref = "steps.5.produce" if group_compose else "steps.4.produce" step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) step.add_hyperparameter( name="interpretable", argument_type=ArgumentType.VALUE, data=interpretable, ) step.add_hyperparameter( name="num_estimators", argument_type=ArgumentType.VALUE, data=num_estimators, ) step.add_hyperparameter( name="epochs", argument_type=ArgumentType.VALUE, data=epochs, ) step.add_hyperparameter( name="steps_per_epoch", argument_type=ArgumentType.VALUE, data=steps_per_epoch, ) step.add_hyperparameter( name="prediction_length", argument_type=ArgumentType.VALUE, data=prediction_length, ) step.add_hyperparameter( name="num_context_lengths", argument_type=ArgumentType.VALUE, data=num_context_lengths, ) # step.add_hyperparameter( # name="quantiles", # argument_type=ArgumentType.VALUE, # data=quantiles, # ) step.add_hyperparameter( name="weights_dir", argument_type=ArgumentType.VALUE, data=weights_dir, ) step.add_hyperparameter( name="output_mean", argument_type=ArgumentType.VALUE, data=output_mean, ) if confidence_intervals: step.add_output("produce_confidence_intervals") pipeline_description.add_step(step) data_ref = "steps.6.produce_confidence_intervals" if group_compose else "steps.5.produce_confidence_intervals" pipeline_description.add_output( name="output predictions", data_reference=data_ref ) else: step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common" ) ) data_ref = "steps.6.produce" if group_compose else "steps.5.produce" step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) data_ref = "steps.7.produce" if group_compose else "steps.6.produce" pipeline_description.add_output( name="output predictions", data_reference=data_ref ) self.pipeline = pipeline_description
def generate_only(): # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: column_parser step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.schema_discovery.profiler.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: column_parser step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 3: DFS Single Table step_3 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization' )) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_output('produce') pipeline_description.add_step(step_3) # Step 4: learn model step_4 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.regression.xgboost_gbtree.Common')) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Step 5: construct output step_5 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common')) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') # Generate .yml file for the pipeline import featuretools_ta1 from pipeline_tests.utils import generate_pipeline dataset_name = 'LL1_retail_sales_total_MIN_METADATA' dataset_path = '/featuretools_ta1/datasets/seed_datasets_current' primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization' version = featuretools_ta1.__version__ test_name = os.path.splitext(os.path.basename(__file__))[0][5:] yml, pipeline_run_file = generate_pipeline( primitive_name=primitive_name, pipeline_description=pipeline_description, dataset_name=dataset_name, test_name=test_name) # fit-score command fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format( yml) fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format( dataset_path, dataset_name, dataset_name) fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -O {}'.format(pipeline_run_file) # Run pipeline to save pipeline_run file os.system(fs_cmd) # Create and return command for running from pipeline_run file: pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format( primitive_name, version) pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score' pipeline_run_cmd += ' -u {}'.format(pipeline_run_file) return pipeline_run_cmd
def build_pipeline(pipepline_info, pipepline_mapping, stdout=None): default_stdout = sys.stdout if stdout is not None: sys.stdout = stdout # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') for primitive_info in pipepline_info: print(primitive_info.python_path) print(primitive_info.hyperparameter) print(primitive_info.ancestors) if primitive_info.python_path == 'HEAD': dataset_fullname = primitive_info.hyperparameter['dataset_folder'] print(dataset_fullname) continue elif primitive_info.python_path == 'ENDING': ancestors = primitive_info.ancestors end_step_num = pipepline_mapping[ancestors['inputs']] - 1 pipeline_description.add_output(name='output predictions', data_reference='steps.' + str(end_step_num) + '.produce') else: # print(primitive_info.python_path) primitive = index.get_primitive(primitive_info.python_path) step = PrimitiveStep(primitive=primitive) hyperparameters = primitive_info.hyperparameter ancestors = primitive_info.ancestors # add add_inputs # print(ancestors) if ancestors['inputs'] != 0: for ances_key in ancestors.keys(): print(ances_key, ancestors[ances_key], pipepline_mapping[ancestors[ances_key]] - 1) step_num = pipepline_mapping[ancestors[ances_key]] - 1 step.add_argument(name=ances_key, argument_type=ArgumentType.CONTAINER, data_reference='steps.' + str(step_num) + '.produce') else: step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') # add add_hyperparameter for hyper in hyperparameters.keys(): # print(hyper, hyperparameters[hyper], type(hyperparameters[hyper])) hyper_value = hyperparameters[hyper] step.add_hyperparameter(name=hyper, argument_type=ArgumentType.VALUE, data=hyper_value) step.add_output('produce') pipeline_description.add_step(step) # print('\n') # Output to json data = pipeline_description.to_json() with open('example_pipeline.json', 'w') as f: f.write(data) print(data) # yaml = pipeline_description.to_yaml() # with open('example_pipeline.yml', 'w') as f: # f.write(yaml) # print(yaml) sys.stdout.flush() sys.stdout = default_stdout
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step.add_output("produce") pipeline_description.add_step(step) # Simon step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.column_type_profiler.Simon")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # XG Boost step = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.classification.xgboost_gbtree.Common')) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step.add_output('produce') step.add_hyperparameter(name='add_index_columns', argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.4.produce") self.pipeline = pipeline_description
def image_regress_pipeline(resolver=None) -> Pipeline: if resolver is None: resolver = custom_resolver.BlackListResolver() # Creating Pipeline pipeline_description = Pipeline(context='PRETRAINING') pipeline_description.add_input(name='inputs') start_step = "inputs.0" # Step 1: Denormalize step_0 = PrimitiveStep(primitive_description=d3m.primitives.dsbox. Denormalize.metadata.query(), resolver=resolver) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=start_step) step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: DatasetToDataFrame step_1 = PrimitiveStep(primitive_description=d3m.primitives.datasets. DatasetToDataFrame.metadata.query(), resolver=resolver) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') add_hyperparameters(step_1, d3m.primitives.data.DataFrameToList) step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 3: Extract Target Column step_2 = PrimitiveStep(primitive_description=d3m.primitives.data. ExtractColumnsBySemanticTypes.metadata.query(), resolver=resolver) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_hyperparameter( name='semantic_types', argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/SuggestedTarget" ]) step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 4: Dataframe to tensor step_3 = PrimitiveStep(primitive_description=d3m.primitives.dsbox. DataFrameToTensor.metadata.query(), resolver=resolver) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_3.add_output('produce') pipeline_description.add_step(step_3) # Step 5: Vgg16 Feature Extractor step_4 = PrimitiveStep(primitive_description=d3m.primitives.dsbox. Vgg16ImageFeature.metadata.query(), resolver=resolver) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Step 6: Apply PCA to feature generated step_5 = PrimitiveStep(primitive_description=d3m.primitives.sklearn_wrap. SKPCA.metadata.query(), resolver=resolver) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Step 7: Apply Random Forest Regressor step_6 = PrimitiveStep(primitive_description=d3m.primitives.sklearn_wrap. SKRandomForestRegressor.metadata.query(), resolver=resolver) step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_6.add_output('produce') pipeline_description.add_step(step_6) # Output Generated pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') last_step = len(pipeline_description.steps) - 1 attributes = pipelines.int_to_step(last_step - 1) targets = pipelines.int_to_step(last_step) return pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: Denormalize primitive step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.denormalize.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: DISTIL/NK Storc primitive step_1 = PrimitiveStep( primitive=index.get_primitive('d3m.primitives.clustering.k_means.Sloth')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_hyperparameter(name='nclusters', argument_type=ArgumentType.VALUE, data=10) step_1.add_hyperparameter(name='long_format', argument_type=ArgumentType.VALUE, data=True) step_1.add_output('produce')
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0" ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep( primitive=index.get_primitive("d3m.primitives.schema_discovery.profiler.Common") ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", ], ) pipeline_description.add_step(step) # imputer step = PrimitiveStep( primitive=index.get_primitive("d3m.primitives.data_cleaning.imputer.SKlearn") ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="return_result", argument_type=ArgumentType.VALUE, data="replace" ) step.add_hyperparameter( name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True ) pipeline_description.add_step(step) # TSNE step = PrimitiveStep( primitive=index.get_primitive( 'd3m.primitives.dimensionality_reduction.t_distributed_stochastic_neighbor_embedding.Tsne' ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name='n_components', argument_type=ArgumentType.VALUE, data=3 ) step.add_output("produce") pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["https://metadata.datadrivendiscovery.org/types/Attribute"], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # R Forest step = PrimitiveStep( primitive=index.get_primitive( 'd3m.primitives.learner.random_forest.DistilEnsembleForest' ) ) step.add_argument( name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce' ) step.add_argument( name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce' ) step.add_output('produce') pipeline_description.add_step(step) # construct predictions step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.7.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output( name="output predictions", data_reference="steps.8.produce" ) self.pipeline = pipeline_description