def create_pipeline(metric: str) -> Pipeline:

    # create the basic pipeline
    vertex_nomination_pipeline = Pipeline(context=PipelineContext.TESTING)
    vertex_nomination_pipeline.add_input(name='inputs')

    # step 0 - extract the graphs
    step = PrimitiveStep(primitive_description=DistilSingleGraphLoaderPrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
    step.add_output('produce')
    step.add_output('produce_target')
    vertex_nomination_pipeline.add_step(step)

    # step 1 - predict links
    step = PrimitiveStep(primitive_description=DistilLinkPredictionPrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
    step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce_target')
    step.add_hyperparameter('metric', ArgumentType.VALUE, metric)
    step.add_output('produce')
    vertex_nomination_pipeline.add_step(step)

    # Adding output step to the pipeline
    vertex_nomination_pipeline.add_output(name='output', data_reference='steps.1.produce')

    return vertex_nomination_pipeline
def create_pipeline(metric: str) -> Pipeline:

    # create the basic pipeline
    vertex_classification_pipeline = Pipeline(context=PipelineContext.TESTING)
    vertex_classification_pipeline.add_input(name='inputs')

    # step 0 - extract the graphs
    step = PrimitiveStep(
        primitive_description=VertexClassificationParser.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='inputs.0')
    step.add_output('produce')
    vertex_classification_pipeline.add_step(step)

    # step 1 - classify
    step = PrimitiveStep(
        primitive_description=VertexClassification.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.0.produce')
    step.add_hyperparameter('jvm_memory', ArgumentType.VALUE, 0.6)
    step.add_output('produce')
    vertex_classification_pipeline.add_step(step)

    # Adding output step to the pipeline
    vertex_classification_pipeline.add_output(name='output',
                                              data_reference='steps.1.produce')

    return vertex_classification_pipeline
Example #3
0
def create_pipeline(metric: str) -> Pipeline:

    # create the basic pipeline
    kanine_pipeline = Pipeline(context=PipelineContext.TESTING)
    kanine_pipeline.add_input(name='inputs')

    # Denormalize so that we have a single dataframe in the dataset
    step = PrimitiveStep(
        primitive_description=DenormalizePrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='inputs.0')
    step.add_output('produce')
    kanine_pipeline.add_step(step)

    # step 1 - kanine classification
    step = PrimitiveStep(primitive_description=Kanine.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.0.produce')
    step.add_argument(name='outputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.0.produce')
    step.add_output('produce')
    kanine_pipeline.add_step(step)

    # Adding output step to the pipeline
    kanine_pipeline.add_output(name='output', data_reference='steps.1.produce')

    return kanine_pipeline
Example #4
0
def create_pipeline(metric: str) -> Pipeline:
    previous_step = 0
    input_val = 'steps.{}.produce'

    # create the basic pipeline
    tsf_pipeline = Pipeline(context=PipelineContext.TESTING)
    tsf_pipeline.add_input(name='inputs')

    # step 0 - Extract dataframe from dataset
    step = PrimitiveStep(
        primitive_description=DatasetToDataFramePrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='inputs.0')
    step.add_output('produce')
    tsf_pipeline.add_step(step)

    # step 1 - Parse columns.
    step = PrimitiveStep(
        primitive_description=ColumnParserPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.0.produce')
    step.add_output('produce')
    semantic_types = (
        'http://schema.org/Boolean', 'http://schema.org/Integer',
        'http://schema.org/Float',
        'https://metadata.datadrivendiscovery.org/types/FloatVector')
    step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE,
                            semantic_types)
    tsf_pipeline.add_step(step)

    # step 2 - Parrot ARIMA
    step = PrimitiveStep(primitive_description=Parrot.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.1.produce')
    step.add_argument(name='outputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.1.produce')
    step.add_hyperparameter(name='seasonal_differencing',
                            argument_type=ArgumentType.VALUE,
                            data=11)
    step.add_hyperparameter(name='n_periods',
                            argument_type=ArgumentType.VALUE,
                            data=21)
    step.add_output('produce')
    tsf_pipeline.add_step(step)

    # step 3 - convert predictions to expected format
    # step = PrimitiveStep(primitive_description=ConstructPredictionsPrimitive.metadata.query())
    # step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
    # step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    # step.add_output('produce')
    # tsf_pipeline.add_step(step)

    # Adding output step to the pipeline
    tsf_pipeline.add_output(name='output', data_reference='steps.2.produce')

    return tsf_pipeline
    def create_pipeline_json(self, prim_dict):
        """
        Generate pipeline.json
        """
        name = "Pipeline for evaluation"
        pipeline_id = self.id #+ "_" + str(self.rank)
        pipeline_description = Pipeline(pipeline_id=pipeline_id, context=Context.EVALUATION, name=name)
        for ip in self.inputs:
            pipeline_description.add_input(name=ip['name'])

        num = self.num_steps()
        for i in range(num):
            p = prim_dict[self.primitives[i]]
            pdesc = {}
            pdesc['id'] = p.id
            pdesc['version'] = p.primitive_class.version
            pdesc['python_path'] = p.primitive_class.python_path
            pdesc['name'] = p.primitive_class.name
            pdesc['digest'] = p.primitive_class.digest
            step = PrimitiveStep(primitive_description=pdesc)

            for name, value in self.primitives_arguments[i].items():
                origin = value['origin']
                argument_type = ArgumentType.CONTAINER
                step.add_argument(name=name, argument_type=argument_type, data_reference=value['data'])
            step.add_output(output_id=p.primitive_class.produce_methods[0])
            if self.hyperparams[i] is not None:
                for name, value in self.hyperparams[i].items():
                    step.add_hyperparameter(name=name, argument_type=ArgumentType.VALUE, data=value)
            pipeline_description.add_step(step)

        for op in self.outputs:
            pipeline_description.add_output(data_reference=op[2], name=op[3])

        self.pipeline_description = pipeline_description
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Ts formatter
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on formatted ts DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Grouping Field Compose
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.grouping_field_compose.Common")
                             )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Storc primitive -> KMeans
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.clustering.k_means.Sloth"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_hyperparameter(name="nclusters",
                                argument_type=ArgumentType.VALUE,
                                data=3)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.3.produce")

        self.pipeline = pipeline_description
Example #7
0
def build_demo_pipeline():

    # Creating pipeline
    pipeline = Pipeline(context=Context.TESTING)
    pipeline.add_input(name='inputs')

    # Step 0: DFS
    step_0 = PrimitiveStep(primitive_description=Featuretools.metadata.query())
    step_0.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='inputs.0')
    step_0.add_output('produce')
    pipeline.add_step(step_0)

    # Step 1: SKlearnImputer
    step_1 = PrimitiveStep(
        primitive_description=SKlearnImputer.metadata.query())
    step_1.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline.add_step(step_1)

    # Step 2: SKlearnRFC
    step_2 = PrimitiveStep(primitive_description=SKlearnRFC.metadata.query())
    step_2.add_hyperparameter(name='use_semantic_types',
                              argument_type=ArgumentType.VALUE,
                              data=True)
    step_2.add_hyperparameter(name='add_index_columns',
                              argument_type=ArgumentType.VALUE,
                              data=True)
    step_2.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_2.add_argument(name='outputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_2.add_output('produce')
    pipeline.add_step(step_2)

    # Step 3: ConstructPredictions
    step_3 = PrimitiveStep(
        primitive_description=DataFrameCommon.metadata.query())
    step_3.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_argument(name='reference',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_output('produce')
    pipeline.add_step(step_3)

    # Final Output
    pipeline.add_output(name='output predictions',
                        data_reference='steps.3.produce')

    return pipeline
Example #8
0
    def __init__(self, epochs: int = 10, n_steps: int = 20):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Denormalize primitive
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.denormalize.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # RetinaNet primitive
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.object_detection.retina_net.ObjectDetectionRN"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_hyperparameter(name="n_epochs",
                                argument_type=ArgumentType.VALUE,
                                data=epochs)
        step.add_hyperparameter(name="n_steps",
                                argument_type=ArgumentType.VALUE,
                                data=n_steps)
        step.add_hyperparameter(name="weights_path",
                                argument_type=ArgumentType.VALUE,
                                data="/scratch_dir/")
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.2.produce")

        self.pipeline = pipeline_description
Example #9
0
def community_detection(resolver=None):
    if resolver is None:
        resolver = custom_resolver.BlackListResolver()

    # Creating Pipeline
    pipeline_description = Pipeline(context=PipelineContext.TESTING)
    pipeline_description.add_input(name='inputs')
    start_step = "inputs.0"

    # Step 0
    step_0 = PrimitiveStep(primitive_description=d3m.primitives.sri.graph.
                           CommunityDetectionParser.metadata.query(),
                           resolver=resolver)
    step_0.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference=start_step)
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1
    step_1 = PrimitiveStep(primitive_description=d3m.primitives.sri.psl.
                           CommunityDetection.metadata.query(),
                           resolver=resolver)
    step_1.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.0.produce')
    step_1.add_hyperparameter(name='jvm_memory',
                              argument_type=ArgumentType.VALUE,
                              data=0.5)
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 2: RemoveColumns
    step_2 = PrimitiveStep(primitive_description=d3m.primitives.data.
                           RemoveColumns.metadata.query(),
                           resolver=resolver)
    step_2.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_2.add_hyperparameter(name='columns',
                              argument_type=ArgumentType.VALUE,
                              data=[0])
    step_2.add_output('produce')
    pipeline_description.add_step(step_2)

    pipeline_description.add_output(name='Result',
                                    data_reference='steps.2.produce')

    last_step = len(pipeline_description.steps) - 1
    attributes = pipelines.int_to_step(last_step - 1)
    targets = pipelines.int_to_step(last_step)

    return pipeline_description
Example #10
0
def create_pipeline(metric: str) -> Pipeline:
    previous_step = 0
    input_val = 'steps.{}.produce'

    # create the basic pipeline
    var_pipeline = Pipeline(context=PipelineContext.TESTING)
    var_pipeline.add_input(name='inputs')

    # step 0 - Extract dataframe from dataset
    step = PrimitiveStep(
        primitive_description=DatasetToDataFramePrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='inputs.0')
    step.add_output('produce')
    var_pipeline.add_step(step)

    # step 1 - Parse columns.
    step = PrimitiveStep(
        primitive_description=ColumnParserPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.0.produce')
    step.add_output('produce')
    semantic_types = (
        'http://schema.org/Boolean', 'http://schema.org/Integer',
        'http://schema.org/Float',
        'https://metadata.datadrivendiscovery.org/types/FloatVector')
    step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE,
                            semantic_types)
    var_pipeline.add_step(step)

    # step 2 - Vector Auto Regression
    step = PrimitiveStep(primitive_description=VAR.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.1.produce')
    step.add_argument(name='outputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.1.produce')
    step.add_output('produce')
    var_pipeline.add_step(step)

    # Adding output step to the pipeline
    var_pipeline.add_output(name='output', data_reference='steps.2.produce')

    return var_pipeline
Example #11
0
    def _new_pipeline(pipeline, hyperparams=None):
        hyperparams = to_dicts(hyperparams) if hyperparams else dict()

        new_pipeline = Pipeline(context=Context.TESTING)
        for input_ in pipeline.inputs:
            new_pipeline.add_input(name=input_['name'])

        for step_id, old_step in enumerate(pipeline.steps):
            new_step = PrimitiveStep(primitive=old_step.primitive)
            for name, argument in old_step.arguments.items():
                new_step.add_argument(
                    name=name,
                    argument_type=argument['type'],
                    data_reference=argument['data']
                )
            for output in old_step.outputs:
                new_step.add_output(output)

            new_hyperparams = hyperparams.get(str(step_id), dict())
            for name, hyperparam in old_step.hyperparams.items():
                if name not in new_hyperparams:
                    new_step.add_hyperparameter(
                        name=name,
                        argument_type=ArgumentType.VALUE,
                        data=hyperparam['data']
                    )

            for name, value in new_hyperparams.items():
                new_step.add_hyperparameter(
                    name=name,
                    argument_type=ArgumentType.VALUE,
                    data=value
                )

            new_pipeline.add_step(new_step)

        for output in pipeline.outputs:
            new_pipeline.add_output(
                name=output['name'],
                data_reference=output['data']
            )

        new_pipeline.cv_scores = list()
        new_pipeline.score = None

        return new_pipeline
Example #12
0
    def load_pipeline_architecture(self, pipeline_architecture_dict):
        """
        Loads pipeline architecture dictionary and returns a d3m Pipeline object.

        Return pipeline 
        """

        pipeline_description = Pipeline(context=Context.TESTING)
        pipeline_description.add_input(name='inputs')

        # For each corresponding stage in the dictionary create a step
        steps = []
        stage_name_to_reference_name = {}
        for stage_dict in pipeline_architecture_dict:
            
            # Extract stage attributes
            primitive = stage_dict["primitive"]
            if type(primitive) == str:
                primitive = get_primitive_with_name(primitive)
            cur_stage_name = stage_dict["stage_name"]
            input_stage = stage_dict["input"]
            
            # Create primitive step
            step = PrimitiveStep(primitive_description=primitive.metadata.query())
            data_reference = "inputs.0" if input_stage == PipelineWrapper.PIPELINE_INPUT else stage_name_to_reference_name[input_stage]            
            step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_reference)
            if "hyperparameters" in stage_dict:
                for k,v in stage_dict["hyperparameters"].items():
                    step.add_hyperparameter(name=k, argument_type=ArgumentType.VALUE, data=v)
            if "arguments" in stage_dict:
                for k,v in stage_dict["arguments"].items():
                    step.add_argument(name=k, argument_type=ArgumentType.CONTAINER, data_reference=stage_name_to_reference_name[v])
            step.add_output("produce")
            pipeline_description.add_step(step)
            reference_name = next(iter(step.get_output_data_references()))

            # Update accounting
            stage_name_to_reference_name[cur_stage_name] = reference_name
            steps.append(step)

        # Output is output of the last step
        last_output_reference = next(iter(steps[-1].get_output_data_references()))
        pipeline_description.add_output(name="output", data_reference=last_output_reference)

        return pipeline_description
Example #13
0
def keras2pipeline(keras_model, batch_size=32):
    # Creating pipeline
    from tensorflow.python.keras.activations import softmax
    pipeline_description = Pipeline()

    pipeline_description.add_input(name='inputs')

    set_data(pipeline_description)
    set_loss(pipeline_description)

    offset = len(pipeline_description.steps)

    previous_layer_ids = get_previous_layer_ids(keras_model)

    layers = keras_model.layers

    step_id = 0
    layer_to_step_id = {}

    total_layer_num = len(layers)
    for i, layer in enumerate(layers):
        cls_name = get_layer_class_name(layer)
        if cls_name in OMIT_LAYERS:
            continue
        layer_id = get_layer_id(layer)
        if len(previous_layer_ids[layer_id]) > 0:
            layer.previous_layer_ids = tuple(
                layer_to_step_id[i] + offset for i in previous_layer_ids[layer_id]
            )
        else:
            layer.previous_layer_ids = [None]
        # Since JPL does not support Softmax Layer, we add the workaround to make use of softmax
        if i == total_layer_num - 2 and cls_name == 'Dense':
            layer.activation = softmax
        d3m_step = step_function[cls_name](step_id, layer)
        pipeline_description.add_step(d3m_step)
        layer_to_step_id[layer_id] = step_id
        step_id += 1

    set_learner(pipeline_description, batch_size)
    set_prediction(pipeline_description)
    pipeline_description.add_output(
        name='output predictions', data_reference=f"steps.{len(pipeline_description.steps) - 1}.produce")

    return pipeline_description
Example #14
0
def create_pipeline(metric: str) -> Pipeline:

    # create the basic pipeline
    graph_matching_pipeline = Pipeline(context=PipelineContext.TESTING)
    graph_matching_pipeline.add_input(name='inputs')

    # step 0 - extract the graphs
    step = PrimitiveStep(
        primitive_description=DistilGraphLoaderPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='inputs.0')
    step.add_output('produce')
    step.add_output('produce_target')
    graph_matching_pipeline.add_step(step)

    # step 1 - match the graphs that have been seeded
    step = PrimitiveStep(
        primitive_description=DistilSeededGraphMatchingPrimitive.metadata.
        query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.0.produce')
    step.add_argument(name='outputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='steps.0.produce_target')
    step.add_hyperparameter('metric', ArgumentType.VALUE, metric)
    step.add_output('produce')
    graph_matching_pipeline.add_step(step)

    # convert predictions to expected format
    #step = PrimitiveStep(primitive_description=ConstructPredictionsPrimitive.metadata.query())
    #step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    #step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce_target')
    #step.add_output('produce')
    #step.add_hyperparameter('use_columns', ArgumentType.VALUE, [0, 1])
    #graph_matching_pipeline.add_step(step)

    # Adding output step to the pipeline
    graph_matching_pipeline.add_output(name='output',
                                       data_reference='steps.1.produce')

    return graph_matching_pipeline
Example #15
0
    def __init__(self, ):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Duke primitive
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_cleaning.text_summarization.Duke"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.2.produce")

        self.pipeline = pipeline_description
Example #16
0
step_3 = PrimitiveStep(primitive=primitive_3)
step_3.add_hyperparameter(name='window_size',
                          argument_type=ArgumentType.VALUE,
                          data=4)
step_3.add_hyperparameter(name='use_semantic_types',
                          argument_type=ArgumentType.VALUE,
                          data=True)
step_3.add_hyperparameter(
    name='use_columns', argument_type=ArgumentType.VALUE,
    data=(5, 6))  # There is sth wrong with multi-dimensional
step_3.add_hyperparameter(name='return_result',
                          argument_type=ArgumentType.VALUE,
                          data='append')
step_3.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.2.produce')
step_3.add_output('produce')
pipeline_description.add_step(step_3)

# Final Output
pipeline_description.add_output(name='output',
                                data_reference='steps.3.produce')

# Output to YAML
yaml = pipeline_description.to_yaml()
with open('pipeline.yml', 'w') as f:
    f.write(yaml)

# Or you can output json
#data = pipline_description.to_json()
Example #17
0
def create_pipeline(metric: str) -> Pipeline:
    previous_step = 0
    input_val = 'steps.{}.produce'

    # create the basic pipeline
    qa_pipeline = Pipeline(context=PipelineContext.TESTING)
    qa_pipeline.add_input(name='inputs')

    # Denormalize so that we have a single dataframe in the dataset
    step = PrimitiveStep(
        primitive_description=DenormalizePrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='inputs.0')
    step.add_output('produce')
    qa_pipeline.add_step(step)

    # Extract dataframe from dataset
    step = PrimitiveStep(
        primitive_description=DatasetToDataFramePrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(previous_step))
    step.add_output('produce')
    qa_pipeline.add_step(step)
    previous_step += 1

    # Parse columns.
    step = PrimitiveStep(
        primitive_description=ColumnParserPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(previous_step))
    step.add_output('produce')
    semantic_types = (
        'http://schema.org/Boolean', 'http://schema.org/Integer',
        'http://schema.org/Float',
        'https://metadata.datadrivendiscovery.org/types/FloatVector')
    step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE,
                            semantic_types)
    qa_pipeline.add_step(step)
    previous_step += 1
    parse_step = previous_step

    # Extract attributes
    step = PrimitiveStep(
        primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata.
        query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(parse_step))
    step.add_output('produce')
    step.add_hyperparameter(
        'semantic_types', ArgumentType.VALUE,
        ('https://metadata.datadrivendiscovery.org/types/Attribute', ))
    qa_pipeline.add_step(step)
    previous_step += 1
    attributes_step = previous_step

    # Extract targets
    step = PrimitiveStep(
        primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata.
        query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(parse_step))
    step.add_output('produce')
    target_types = (
        'https://metadata.datadrivendiscovery.org/types/Target',
        'https://metadata.datadrivendiscovery.org/types/TrueTarget')
    step.add_hyperparameter('semantic_types', ArgumentType.VALUE, target_types)
    qa_pipeline.add_step(step)
    previous_step += 1
    target_step = previous_step

    # Generates a bert pair classification model.
    step = PrimitiveStep(
        primitive_description=BertPairClassificationPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(attributes_step))
    step.add_argument(name='outputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(target_step))
    step.add_output('produce')
    step.add_hyperparameter('metric', ArgumentType.VALUE, metric)
    step.add_hyperparameter('doc_col_0', ArgumentType.VALUE, 1)
    step.add_hyperparameter('doc_col_1', ArgumentType.VALUE, 3)
    qa_pipeline.add_step(step)
    previous_step += 1

    # convert predictions to expected format
    step = PrimitiveStep(
        primitive_description=ConstructPredictionsPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(previous_step))
    step.add_argument(name='reference',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(parse_step))
    step.add_output('produce')
    qa_pipeline.add_step(step)
    previous_step += 1

    # Adding output step to the pipeline
    qa_pipeline.add_output(name='output',
                           data_reference=input_val.format(previous_step))

    return qa_pipeline
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Denormalize primitive
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.denormalize.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Goat forward
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_cleaning.geocoding.Goat_forward"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_hyperparameter(name="target_columns",
                                argument_type=ArgumentType.VALUE,
                                data=[1])
        step.add_hyperparameter(name="cache_size",
                                argument_type=ArgumentType.VALUE,
                                data=2000)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # XG Boost
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.classification.xgboost_gbtree.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="return_result",
                                argument_type=ArgumentType.VALUE,
                                data="replace")
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.5.produce")

        self.pipeline = pipeline_description
Example #19
0
)
step_6.add_output("produce")
pipeline_description.add_step(step_6)

# Step 7: Random forest
step_7 = PrimitiveStep(primitive=index.get_primitive(
    "d3m.primitives.learner.random_forest.DistilEnsembleForest"))
step_7.add_argument(
    name="inputs",
    argument_type=ArgumentType.CONTAINER,
    data_reference="steps.4.produce",
)
step_7.add_argument(
    name="outputs",
    argument_type=ArgumentType.CONTAINER,
    data_reference="steps.5.produce",
)
step_7.add_output("produce_shap_values")
pipeline_description.add_step(step_7)

# Final Output
pipeline_description.add_output(name="output",
                                data_reference="steps.7.produce_shap_values")

# Output json pipeline
blob = pipeline_description.to_json()
filename = 'pipeline.json'
# filename = blob[8:44] + '.json'
with open(filename, "w") as outfile:
    outfile.write(blob)
)
step_5.add_output("produce")
pipeline_description.add_step(step_5)

# Step 7: construct predictions
step_6 = PrimitiveStep(primitive=index.get_primitive(
    "d3m.primitives.data_transformation.construct_predictions.Common"))
step_6.add_argument(
    name="inputs",
    argument_type=ArgumentType.CONTAINER,
    data_reference="steps.5.produce",
)
step_6.add_argument(
    name="reference",
    argument_type=ArgumentType.CONTAINER,
    data_reference="steps.2.produce",
)
step_6.add_output("produce")
pipeline_description.add_step(step_6)

# Final Output
pipeline_description.add_output(name="output predictions",
                                data_reference="steps.6.produce")

# Output json pipeline
blob = pipeline_description.to_json()
filename = blob[8:44] + ".json"
# filename = 'pipeline.json'
with open(filename, "w") as outfile:
    outfile.write(blob)
Example #21
0
    def __init__(self, algorithm: str = 'PseudoLabel'):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.schema_discovery.profiler.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
                "http://schema.org/DateTime",
            ],
        )
        pipeline_description.add_step(step)

        # imputer
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_cleaning.imputer.SKlearn"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="return_result",
                                argument_type=ArgumentType.VALUE,
                                data="replace")
        step.add_hyperparameter(name="use_semantic_types",
                                argument_type=ArgumentType.VALUE,
                                data=True)
        pipeline_description.add_step(step)

        # parse attribute semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=["https://metadata.datadrivendiscovery.org/types/Attribute"],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse integer/float attribute semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=["http://schema.org/Integer", "http://schema.org/Float"],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Tabular Semi Supervised
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.semisupervised_classification.iterative_labeling.TabularSemiSupervised"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.6.produce",
        )
        step.add_hyperparameter(name="algorithm",
                                argument_type=ArgumentType.VALUE,
                                data=algorithm)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.7.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.8.produce")

        self.pipeline = pipeline_description
Example #22
0
def generate_only():
    # Creating pipeline
    pipeline_description = Pipeline()
    pipeline_description.add_input(name='inputs')

    # Step 0: dataset_to_dataframe
    step_0 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.data_transformation.dataset_to_dataframe.Common'))
    step_0.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='inputs.0')
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1: column_parser
    step_1 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.schema_discovery.profiler.Common'))
    step_1.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 2: column_parser
    step_2 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.data_transformation.column_parser.Common'))
    step_2.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_2.add_output('produce')
    pipeline_description.add_step(step_2)

    # Step 3: DFS Single Table
    step_3 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization'
    ))
    step_3.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_output('produce')
    pipeline_description.add_step(step_3)

    # Step 4: learn model
    step_4 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.regression.xgboost_gbtree.Common'))
    step_4.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.3.produce')
    step_4.add_argument(name='outputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_4.add_output('produce')
    pipeline_description.add_step(step_4)

    # Step 5: construct output
    step_5 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.data_transformation.construct_predictions.Common'))
    step_5.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.4.produce')
    step_5.add_argument(name='reference',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_5.add_output('produce')
    pipeline_description.add_step(step_5)

    # Final Output
    pipeline_description.add_output(name='output predictions',
                                    data_reference='steps.5.produce')

    # Generate .yml file for the pipeline
    import featuretools_ta1
    from pipeline_tests.utils import generate_pipeline
    dataset_name = 'LL1_retail_sales_total_MIN_METADATA'
    dataset_path = '/featuretools_ta1/datasets/seed_datasets_current'
    primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization'
    version = featuretools_ta1.__version__
    test_name = os.path.splitext(os.path.basename(__file__))[0][5:]
    yml, pipeline_run_file = generate_pipeline(
        primitive_name=primitive_name,
        pipeline_description=pipeline_description,
        dataset_name=dataset_name,
        test_name=test_name)

    # fit-score command
    fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format(
        yml)
    fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format(
        dataset_path, dataset_name, dataset_name)
    fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -O {}'.format(pipeline_run_file)

    # Run pipeline to save pipeline_run file
    os.system(fs_cmd)

    # Create and return command for running from pipeline_run file:
    pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format(
        primitive_name, version)
    pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score'
    pipeline_run_cmd += ' -u {}'.format(pipeline_run_file)

    return pipeline_run_cmd
Example #23
0
def build_pipeline(pipepline_info, pipepline_mapping, stdout=None):

    default_stdout = sys.stdout
    if stdout is not None:
        sys.stdout = stdout

    # Creating pipeline
    pipeline_description = Pipeline()
    pipeline_description.add_input(name='inputs')

    for primitive_info in pipepline_info:
        print(primitive_info.python_path)
        print(primitive_info.hyperparameter)
        print(primitive_info.ancestors)

        if primitive_info.python_path == 'HEAD':
            dataset_fullname = primitive_info.hyperparameter['dataset_folder']
            print(dataset_fullname)
            continue

        elif primitive_info.python_path == 'ENDING':

            ancestors = primitive_info.ancestors
            end_step_num = pipepline_mapping[ancestors['inputs']] - 1
            pipeline_description.add_output(name='output predictions', data_reference='steps.' + str(end_step_num) + '.produce')

        else:
            # print(primitive_info.python_path)
            primitive = index.get_primitive(primitive_info.python_path)
            step = PrimitiveStep(primitive=primitive)

            hyperparameters = primitive_info.hyperparameter
            ancestors = primitive_info.ancestors

            # add add_inputs
            # print(ancestors)

            if ancestors['inputs'] != 0:
                for ances_key in ancestors.keys():
                    print(ances_key, ancestors[ances_key], pipepline_mapping[ancestors[ances_key]] - 1)

                    step_num = pipepline_mapping[ancestors[ances_key]] - 1
                    step.add_argument(name=ances_key, argument_type=ArgumentType.CONTAINER, data_reference='steps.' + str(step_num) + '.produce')

            else:
                step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')

            # add add_hyperparameter
            for hyper in hyperparameters.keys():
                # print(hyper, hyperparameters[hyper], type(hyperparameters[hyper]))

                hyper_value = hyperparameters[hyper]

                step.add_hyperparameter(name=hyper, argument_type=ArgumentType.VALUE, data=hyper_value)

            step.add_output('produce')
            pipeline_description.add_step(step)

            # print('\n')

    # Output to json
    data = pipeline_description.to_json()
    with open('example_pipeline.json', 'w') as f:
        f.write(data)
        print(data)

    # yaml = pipeline_description.to_yaml()
    # with open('example_pipeline.yml', 'w') as f:
    #     f.write(yaml)
    # print(yaml)

    sys.stdout.flush()
    sys.stdout = default_stdout
)
step_4.add_output("produce")
pipeline_description.add_step(step_4)

# Step 5: forecasting primitive
step_5 = PrimitiveStep(primitive=index.get_primitive(
    "d3m.primitives.time_series_forecasting.vector_autoregression.VAR"))
step_5.add_argument(
    name="inputs",
    argument_type=ArgumentType.CONTAINER,
    data_reference="steps.3.produce",
)
step_5.add_argument(
    name="outputs",
    argument_type=ArgumentType.CONTAINER,
    data_reference="steps.4.produce",
)
step_5.add_output("produce_weights")
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name="aggregated regression coefficients",
                                data_reference="steps.5.produce_weights")

# Output json pipeline
blob = pipeline_description.to_json()
#filename = blob[8:44] + ".json"
filename = "pipeline_ci_var_weights_value.json"
with open(filename, "w") as outfile:
    outfile.write(blob)
    def __init__(
        self,
        epochs: int = 5000,
        attention_lstm: bool = True,
    ):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Ts formatter
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on formatted ts DS
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.dataset_to_dataframe.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.dataset_to_dataframe.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.column_parser.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
            ],
        )
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # LSTM FCN
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.time_series_classification.convolutional_neural_net.LSTM_FCN"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_hyperparameter(
            name="epochs", argument_type=ArgumentType.VALUE, data=epochs
        )
        step.add_hyperparameter(
            name="attention_lstm", argument_type=ArgumentType.VALUE, data=attention_lstm
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.construct_predictions.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(
            name="output predictions", data_reference="steps.6.produce"
        )

        self.pipeline = pipeline_description
Example #26
0
    def __init__(
        self, 
        interpretable: bool = True,
        epochs: int = 10,
        steps_per_epoch: int = 50,
        prediction_length: int = 30,
        num_context_lengths: int = 1,
        num_estimators: int = 2,
        #quantiles: List[float] = [0.1, 0.9],
        group_compose: bool = False,
        confidence_intervals: bool = False,
        output_mean: bool = True,
        weights_dir: str = '/scratch_dir/nbeats'
    ):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.dataset_to_dataframe.Common"
            )
        )
        step.add_argument(
            name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0"
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(
            primitive=index.get_primitive("d3m.primitives.schema_discovery.profiler.Common")
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.column_parser.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
                "http://schema.org/DateTime",
            ],
        )
        pipeline_description.add_step(step)

        # group compose
        if group_compose:
            step = PrimitiveStep(
                primitive=index.get_primitive(
                    "d3m.primitives.data_transformation.grouping_field_compose.Common"
                )
            )
            step.add_argument(
                name="inputs",
                argument_type=ArgumentType.CONTAINER,
                data_reference="steps.2.produce",
            )
            step.add_output("produce")
            pipeline_description.add_step(step)

        # parse attribute semantic types
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        data_ref = "steps.3.produce" if group_compose else "steps.2.produce"
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference=data_ref,
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Attribute",
                'https://metadata.datadrivendiscovery.org/types/GroupingKey'
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        data_ref = "steps.3.produce" if group_compose else "steps.2.produce"
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference=data_ref,
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
                "https://metadata.datadrivendiscovery.org/types/TrueTarget",
                "https://metadata.datadrivendiscovery.org/types/SuggestedTarget",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # forecasting primitive
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.time_series_forecasting.feed_forward_neural_net.NBEATS"
            )
        )
        data_ref = "steps.4.produce" if group_compose else "steps.3.produce"
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference=data_ref,
        )
        data_ref = "steps.5.produce" if group_compose else "steps.4.produce"
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference=data_ref,
        )
        step.add_hyperparameter(
            name="interpretable",
            argument_type=ArgumentType.VALUE,
            data=interpretable,
        )
        step.add_hyperparameter(
            name="num_estimators",
            argument_type=ArgumentType.VALUE,
            data=num_estimators,
        )
        step.add_hyperparameter(
            name="epochs",
            argument_type=ArgumentType.VALUE,
            data=epochs,
        )
        step.add_hyperparameter(
            name="steps_per_epoch",
            argument_type=ArgumentType.VALUE,
            data=steps_per_epoch,
        )
        step.add_hyperparameter(
            name="prediction_length",
            argument_type=ArgumentType.VALUE,
            data=prediction_length,
        )
        step.add_hyperparameter(
            name="num_context_lengths",
            argument_type=ArgumentType.VALUE,
            data=num_context_lengths,
        )
        # step.add_hyperparameter(
        #     name="quantiles",
        #     argument_type=ArgumentType.VALUE,
        #     data=quantiles,
        # )
        step.add_hyperparameter(
            name="weights_dir",
            argument_type=ArgumentType.VALUE,
            data=weights_dir,
        )
        step.add_hyperparameter(
            name="output_mean",
            argument_type=ArgumentType.VALUE,
            data=output_mean,
        )
        if confidence_intervals:
            step.add_output("produce_confidence_intervals")
            pipeline_description.add_step(step)
            
            data_ref = "steps.6.produce_confidence_intervals" if group_compose else "steps.5.produce_confidence_intervals"
            pipeline_description.add_output(
                name="output predictions", data_reference=data_ref
            )

        else:
            step.add_output("produce")
            pipeline_description.add_step(step)

            # construct predictions
            step = PrimitiveStep(
                primitive=index.get_primitive(
                    "d3m.primitives.data_transformation.construct_predictions.Common"
                )
            )
            data_ref = "steps.6.produce" if group_compose else "steps.5.produce"
            step.add_argument(
                name="inputs",
                argument_type=ArgumentType.CONTAINER,
                data_reference=data_ref,
            )
            step.add_argument(
                name="reference",
                argument_type=ArgumentType.CONTAINER,
                data_reference="steps.1.produce",
            )
            step.add_output("produce")
            pipeline_description.add_step(step)

            data_ref = "steps.7.produce" if group_compose else "steps.6.produce"
            pipeline_description.add_output(
                name="output predictions", data_reference=data_ref
            )

        self.pipeline = pipeline_description
Example #27
0
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(name="inputs",
                          argument_type=ArgumentType.CONTAINER,
                          data_reference="inputs.0")
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simon
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_cleaning.column_type_profiler.Simon"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # XG Boost
        step = PrimitiveStep(primitive=index.get_primitive(
            'd3m.primitives.classification.xgboost_gbtree.Common'))
        step.add_argument(name='inputs',
                          argument_type=ArgumentType.CONTAINER,
                          data_reference='steps.2.produce')
        step.add_argument(name='outputs',
                          argument_type=ArgumentType.CONTAINER,
                          data_reference='steps.2.produce')
        step.add_output('produce')
        step.add_hyperparameter(name='add_index_columns',
                                argument_type=ArgumentType.VALUE,
                                data=True)
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.4.produce")

        self.pipeline = pipeline_description
Example #28
0
def generate_only():
    # Creating pipeline
    pipeline_description = Pipeline()
    pipeline_description.add_input(name='inputs')

    # Step 0: Parse columns
    step_0 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.operator.dataset_map.DataFrameCommon'))
    step_0.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='inputs.0')
    step_0.add_hyperparameter(name='primitive',
                              argument_type=ArgumentType.VALUE,
                              data=column_parser.Common)
    step_0.add_hyperparameter(name='resources',
                              argument_type=ArgumentType.VALUE,
                              data='all')
    step_0.add_hyperparameter(name='fit_primitive',
                              argument_type=ArgumentType.VALUE,
                              data='no')
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1: MultiTableFeaturization
    step_1 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.feature_construction.deep_feature_synthesis.MultiTableFeaturization'
    ))
    step_1.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 2: imputer
    step_2 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.data_cleaning.imputer.SKlearn'))
    step_2.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference="steps.1.produce")
    step_2.add_hyperparameter(name='use_semantic_types',
                              argument_type=ArgumentType.VALUE,
                              data=True)
    step_2.add_output('produce')
    pipeline_description.add_step(step_2)

    # Step 3: learn model
    step_3 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.regression.xgboost_gbtree.Common'))
    step_3.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_argument(name='outputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_3.add_output('produce')
    pipeline_description.add_step(step_3)

    # step 4: construct output
    step_4 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.data_transformation.construct_predictions.Common'))
    step_4.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.3.produce')
    step_4.add_argument(name='reference',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_4.add_output('produce')
    pipeline_description.add_step(step_4)

    # Final Output
    pipeline_description.add_output(name='output predictions',
                                    data_reference='steps.4.produce')

    # Generate .yml file for the pipeline
    import featuretools_ta1
    from pipeline_tests.utils import generate_pipeline

    dataset_name = 'uu3_world_development_indicators'
    dataset_path = '/featuretools_ta1/datasets/seed_datasets_current'
    primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.MultiTableFeaturization'
    version = featuretools_ta1.__version__
    test_name = os.path.splitext(os.path.basename(__file__))[0][5:]
    yml, pipeline_run_file = generate_pipeline(
        primitive_name=primitive_name,
        pipeline_description=pipeline_description,
        dataset_name=dataset_name,
        test_name=test_name)

    # fit-score command
    fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format(
        yml)
    fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format(
        dataset_path, dataset_name, dataset_name)
    fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -O {}'.format(pipeline_run_file)

    # Run pipeline to save pipeline_run file
    os.system(fs_cmd)

    # Create and return command for running from pipeline_run file:
    pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format(
        primitive_name, version)
    pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score'
    pipeline_run_cmd += ' -u {}'.format(pipeline_run_file)

    return pipeline_run_cmd
# Step 5: construct output
step_5 = PrimitiveStep(primitive=index.get_primitive(
    'd3m.primitives.data_transformation.construct_predictions.DataFrameCommon')
                       )
step_5.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.4.produce')
step_5.add_argument(name='reference',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.1.produce')
step_5.add_output('produce')
pipeline_description.add_step(step_5)

# Final Output
pipeline_description.add_output(name='output predictions',
                                data_reference='steps.5.produce')

# Output json pipeline
blob = pipeline_description.to_json()
filename = blob[8:44] + '.json'
with open(filename, 'w') as outfile:
    outfile.write(blob)

# output dataset metafile (from command line argument)
metafile = blob[8:44] + '.meta'
dataset = sys.argv[1]
with open(metafile, 'w') as outfile:
    outfile.write('{')
    outfile.write(f'"problem": "{dataset}_problem",')
    outfile.write(f'"full_inputs": ["{dataset}_dataset"],')
    outfile.write(f'"train_inputs": ["{dataset}_dataset_TRAIN"],')
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.schema_discovery.profiler.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # PcaFeatures
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.feature_selection.pca_features.Pcafeatures"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_hyperparameter(name="add_index_columns",
                                argument_type=ArgumentType.VALUE,
                                data=True)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # XGBoost
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.classification.xgboost_gbtree.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="add_index_columns",
                                argument_type=ArgumentType.VALUE,
                                data=True)
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.6.produce")

        self.pipeline = pipeline_description