def __init__(self,
                 sample_size: int = 1000,
                 all_layer_epochs: int = 100,
                 top_layer_epochs: int = 100):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Denormalize primitive
        step = PrimitiveStep(primitive=index.get_primitive(
            'd3m.primitives.data_transformation.denormalize.Common'))
        step.add_argument(name='inputs',
                          argument_type=ArgumentType.CONTAINER,
                          data_reference='inputs.0')
        step.add_output('produce')
        pipeline_description.add_step(step)

        # Dataset sample primitive to reduce computation time
        step = PrimitiveStep(primitive=index.get_primitive(
            'd3m.primitives.data_preprocessing.dataset_sample.Common'))
        step.add_argument(name='inputs',
                          argument_type=ArgumentType.CONTAINER,
                          data_reference='steps.0.produce')
        step.add_hyperparameter(name='sample_size',
                                argument_type=ArgumentType.VALUE,
                                data=sample_size)
        step.add_output('produce')
        pipeline_description.add_step(step)

        # DS to DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_hyperparameter(name='dataframe_resource',
                                argument_type=ArgumentType.VALUE,
                                data='learningData')
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
            ],
        )
        pipeline_description.add_step(step)

        # parse attribute semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Attribute",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Gator
        step = PrimitiveStep(primitive=index.get_primitive(
            'd3m.primitives.classification.inceptionV3_image_feature.Gator'))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_hyperparameter(name='unfreeze_proportions',
                                argument_type=ArgumentType.VALUE,
                                data=[0.5])
        step.add_hyperparameter(name='top_layer_epochs',
                                argument_type=ArgumentType.VALUE,
                                data=top_layer_epochs)
        step.add_hyperparameter(name='all_layer_epochs',
                                argument_type=ArgumentType.VALUE,
                                data=all_layer_epochs)
        step.add_hyperparameter(name='weights_filepath',
                                argument_type=ArgumentType.VALUE,
                                data='/scratch_dir/model_weights.h5')
        step.add_output("produce")
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.6.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.7.produce")

        self.pipeline = pipeline_description
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Denormalize primitive
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.denormalize.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Goat forward
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_cleaning.geocoding.Goat_forward"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_hyperparameter(name="target_columns",
                                argument_type=ArgumentType.VALUE,
                                data=[1])
        step.add_hyperparameter(name="cache_size",
                                argument_type=ArgumentType.VALUE,
                                data=2000)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # XG Boost
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.classification.xgboost_gbtree.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="return_result",
                                argument_type=ArgumentType.VALUE,
                                data="replace")
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.5.produce")

        self.pipeline = pipeline_description
Beispiel #3
0
def generate_only():
    # Creating pipeline
    pipeline_description = Pipeline()
    pipeline_description.add_input(name='inputs')

    # Step 0: dataset_to_dataframe
    step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common'))
    step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1: column_parser
    step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.schema_discovery.profiler.Common'))
    step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 2: column_parser
    step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common'))
    step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    step_2.add_output('produce')
    pipeline_description.add_step(step_2)

    # Step 3: DFS Single Table
    step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization'))
    step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
    step_3.add_output('produce')
    pipeline_description.add_step(step_3)

    # Step 4: learn model
    step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.xgboost_gbtree.Common'))
    step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
    step_4.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
    step_4.add_output('produce')
    pipeline_description.add_step(step_4)

    # Step 5: construct output
    step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common'))
    step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
    step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
    step_5.add_output('produce')
    pipeline_description.add_step(step_5)

    # Final Output
    pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce')

    # Generate .yml file for the pipeline
    import featuretools_ta1
    from pipeline_tests.utils import generate_pipeline

    dataset_name = '196_autoMpg_MIN_METADATA'
    dataset_path = '/featuretools_ta1/datasets/seed_datasets_current'
    primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization'
    version = featuretools_ta1.__version__
    test_name = os.path.splitext(os.path.basename(__file__))[0][5:]
    yml, pipeline_run_file = generate_pipeline(primitive_name=primitive_name,
                                               pipeline_description=pipeline_description,
                                               dataset_name=dataset_name,
                                               test_name=test_name)

    # fit-score command
    fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format(yml)
    fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format(dataset_path, dataset_name, dataset_name)
    fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format(dataset_path, dataset_name)
    fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format(dataset_path, dataset_name)
    fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format(dataset_path, dataset_name)
    fs_cmd += ' -O {}'.format(pipeline_run_file)

    # Run pipeline to save pipeline_run file
    os.system(fs_cmd)

    # Create and return command for running from pipeline_run file:
    pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format(primitive_name, version)
    pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score'
    pipeline_run_cmd += ' -u {}'.format(pipeline_run_file)

    return pipeline_run_cmd
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.schema_discovery.profiler.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # PcaFeatures
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.feature_selection.pca_features.Pcafeatures"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_hyperparameter(name="add_index_columns",
                                argument_type=ArgumentType.VALUE,
                                data=True)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # XGBoost
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.classification.xgboost_gbtree.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="add_index_columns",
                                argument_type=ArgumentType.VALUE,
                                data=True)
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.6.produce")

        self.pipeline = pipeline_description
Beispiel #5
0
    def _gen_pipeline(self):
        pipeline = meta_pipeline.Pipeline()
        pipeline.add_input(name='inputs')

        # Step 0: dataset_to_dataframe
        step_0 = PrimitiveStep(primitive=index.get_primitive(
            'd3m.primitives.data_transformation.dataset_to_dataframe.Common'))
        step_0.add_argument(name='inputs',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='inputs.0')
        step_0.add_output('produce')
        pipeline.add_step(step_0)

        # Step 1: dataset_to_dataframe
        step_1 = PrimitiveStep(primitive=index.get_primitive(
            'd3m.primitives.data_transformation.dataset_to_dataframe.Common'))
        step_1.add_argument(name='inputs',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='inputs.0')
        step_1.add_hyperparameter(name='dataframe_resource',
                                  argument_type=ArgumentType.VALUE,
                                  data='1')
        step_1.add_output('produce')
        pipeline.add_step(step_1)

        # Step 2: dataset_to_dataframe
        step_2 = PrimitiveStep(primitive=index.get_primitive(
            'd3m.primitives.data_transformation.dataset_to_dataframe.Common'))
        step_2.add_argument(name='inputs',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='inputs.0')
        step_2.add_hyperparameter(name='dataframe_resource',
                                  argument_type=ArgumentType.VALUE,
                                  data='2')
        step_2.add_output('produce')
        pipeline.add_step(step_2)

        # Step 3
        step_3 = meta_pipeline.PrimitiveStep(
            primitive_description=PartialProcrustes.metadata.query())
        step_3.add_argument(name='inputs_1',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='steps.1.produce')
        step_3.add_argument(name='inputs_2',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='steps.2.produce')
        step_3.add_argument(name='reference',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='steps.0.produce')

        step_3.add_output('produce')
        pipeline.add_step(step_3)

        # Step 4
        step_4 = meta_pipeline.PrimitiveStep(
            primitive_description=EuclideanNomination.metadata.query())
        step_4.add_argument(name='inputs_1',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='steps.1.produce')
        step_4.add_argument(name='inputs_2',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='steps.3.produce')
        step_4.add_argument(name='reference',
                            argument_type=ArgumentType.CONTAINER,
                            data_reference='steps.0.produce')

        step_4.add_output('produce')
        pipeline.add_step(step_4)

        # Adding output step to the pipeline
        pipeline.add_output(name='Predictions',
                            data_reference='steps.4.produce')

        return pipeline
Beispiel #6
0
    def __init__(
        self, 
        interpretable: bool = True,
        epochs: int = 10,
        steps_per_epoch: int = 50,
        prediction_length: int = 30,
        num_context_lengths: int = 1,
        num_estimators: int = 2,
        #quantiles: List[float] = [0.1, 0.9],
        group_compose: bool = False,
        confidence_intervals: bool = False,
        output_mean: bool = True,
        weights_dir: str = '/scratch_dir/nbeats'
    ):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.dataset_to_dataframe.Common"
            )
        )
        step.add_argument(
            name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0"
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(
            primitive=index.get_primitive("d3m.primitives.schema_discovery.profiler.Common")
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.column_parser.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
                "http://schema.org/DateTime",
            ],
        )
        pipeline_description.add_step(step)

        # group compose
        if group_compose:
            step = PrimitiveStep(
                primitive=index.get_primitive(
                    "d3m.primitives.data_transformation.grouping_field_compose.Common"
                )
            )
            step.add_argument(
                name="inputs",
                argument_type=ArgumentType.CONTAINER,
                data_reference="steps.2.produce",
            )
            step.add_output("produce")
            pipeline_description.add_step(step)

        # parse attribute semantic types
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        data_ref = "steps.3.produce" if group_compose else "steps.2.produce"
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference=data_ref,
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Attribute",
                'https://metadata.datadrivendiscovery.org/types/GroupingKey'
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        data_ref = "steps.3.produce" if group_compose else "steps.2.produce"
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference=data_ref,
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
                "https://metadata.datadrivendiscovery.org/types/TrueTarget",
                "https://metadata.datadrivendiscovery.org/types/SuggestedTarget",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # forecasting primitive
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.time_series_forecasting.feed_forward_neural_net.NBEATS"
            )
        )
        data_ref = "steps.4.produce" if group_compose else "steps.3.produce"
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference=data_ref,
        )
        data_ref = "steps.5.produce" if group_compose else "steps.4.produce"
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference=data_ref,
        )
        step.add_hyperparameter(
            name="interpretable",
            argument_type=ArgumentType.VALUE,
            data=interpretable,
        )
        step.add_hyperparameter(
            name="num_estimators",
            argument_type=ArgumentType.VALUE,
            data=num_estimators,
        )
        step.add_hyperparameter(
            name="epochs",
            argument_type=ArgumentType.VALUE,
            data=epochs,
        )
        step.add_hyperparameter(
            name="steps_per_epoch",
            argument_type=ArgumentType.VALUE,
            data=steps_per_epoch,
        )
        step.add_hyperparameter(
            name="prediction_length",
            argument_type=ArgumentType.VALUE,
            data=prediction_length,
        )
        step.add_hyperparameter(
            name="num_context_lengths",
            argument_type=ArgumentType.VALUE,
            data=num_context_lengths,
        )
        # step.add_hyperparameter(
        #     name="quantiles",
        #     argument_type=ArgumentType.VALUE,
        #     data=quantiles,
        # )
        step.add_hyperparameter(
            name="weights_dir",
            argument_type=ArgumentType.VALUE,
            data=weights_dir,
        )
        step.add_hyperparameter(
            name="output_mean",
            argument_type=ArgumentType.VALUE,
            data=output_mean,
        )
        if confidence_intervals:
            step.add_output("produce_confidence_intervals")
            pipeline_description.add_step(step)
            
            data_ref = "steps.6.produce_confidence_intervals" if group_compose else "steps.5.produce_confidence_intervals"
            pipeline_description.add_output(
                name="output predictions", data_reference=data_ref
            )

        else:
            step.add_output("produce")
            pipeline_description.add_step(step)

            # construct predictions
            step = PrimitiveStep(
                primitive=index.get_primitive(
                    "d3m.primitives.data_transformation.construct_predictions.Common"
                )
            )
            data_ref = "steps.6.produce" if group_compose else "steps.5.produce"
            step.add_argument(
                name="inputs",
                argument_type=ArgumentType.CONTAINER,
                data_reference=data_ref,
            )
            step.add_argument(
                name="reference",
                argument_type=ArgumentType.CONTAINER,
                data_reference="steps.1.produce",
            )
            step.add_output("produce")
            pipeline_description.add_step(step)

            data_ref = "steps.7.produce" if group_compose else "steps.6.produce"
            pipeline_description.add_output(
                name="output predictions", data_reference=data_ref
            )

        self.pipeline = pipeline_description
Beispiel #7
0
def generate_only():
    # Creating pipeline
    pipeline_description = Pipeline()
    pipeline_description.add_input(name='inputs')

    # Step 0: Parse columns
    step_0 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.operator.dataset_map.DataFrameCommon'))
    step_0.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='inputs.0')
    step_0.add_hyperparameter(name='primitive',
                              argument_type=ArgumentType.VALUE,
                              data=column_parser.Common)
    step_0.add_hyperparameter(name='resources',
                              argument_type=ArgumentType.VALUE,
                              data='all')
    step_0.add_hyperparameter(name='fit_primitive',
                              argument_type=ArgumentType.VALUE,
                              data='no')
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1: MultiTableFeaturization
    step_1 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.feature_construction.deep_feature_synthesis.MultiTableFeaturization'
    ))
    step_1.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 2: imputer
    step_2 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.data_cleaning.imputer.SKlearn'))
    step_2.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference="steps.1.produce")
    step_2.add_hyperparameter(name='use_semantic_types',
                              argument_type=ArgumentType.VALUE,
                              data=True)
    step_2.add_output('produce')
    pipeline_description.add_step(step_2)

    # Step 3: learn model
    step_3 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.regression.xgboost_gbtree.Common'))
    step_3.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_argument(name='outputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_3.add_output('produce')
    pipeline_description.add_step(step_3)

    # step 4: construct output
    step_4 = PrimitiveStep(primitive=index.get_primitive(
        'd3m.primitives.data_transformation.construct_predictions.Common'))
    step_4.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.3.produce')
    step_4.add_argument(name='reference',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_4.add_output('produce')
    pipeline_description.add_step(step_4)

    # Final Output
    pipeline_description.add_output(name='output predictions',
                                    data_reference='steps.4.produce')

    # Generate .yml file for the pipeline
    import featuretools_ta1
    from pipeline_tests.utils import generate_pipeline

    dataset_name = 'uu3_world_development_indicators'
    dataset_path = '/featuretools_ta1/datasets/seed_datasets_current'
    primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.MultiTableFeaturization'
    version = featuretools_ta1.__version__
    test_name = os.path.splitext(os.path.basename(__file__))[0][5:]
    yml, pipeline_run_file = generate_pipeline(
        primitive_name=primitive_name,
        pipeline_description=pipeline_description,
        dataset_name=dataset_name,
        test_name=test_name)

    # fit-score command
    fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format(
        yml)
    fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format(
        dataset_path, dataset_name, dataset_name)
    fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format(
        dataset_path, dataset_name)
    fs_cmd += ' -O {}'.format(pipeline_run_file)

    # Run pipeline to save pipeline_run file
    os.system(fs_cmd)

    # Create and return command for running from pipeline_run file:
    pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format(
        primitive_name, version)
    pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score'
    pipeline_run_cmd += ' -u {}'.format(pipeline_run_file)

    return pipeline_run_cmd
def collaborative_filtering_link_prediction():
    # Creating Pipeline
    pipeline_description = Pipeline(context='PRETRAINING')
    pipeline_description.add_input(name='inputs')

    # Step 0: GraphMatchingParser
    step_0 = PrimitiveStep(primitive_description=d3m.primitives.sri.graph.
                           CollaborativeFilteringParser.metadata.query())
    step_0.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='inputs.0')
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1: Apply GraphTransformer
    step_1 = PrimitiveStep(primitive_description=d3m.primitives.sri.graph.
                           GraphTransformer.metadata.query())
    step_1.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 2: Apply LinkPrediction
    step_2 = PrimitiveStep(primitive_description=d3m.primitives.sri.psl.
                           LinkPrediction.metadata.query())
    step_2.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_2.add_hyperparameter(name='prediction_column',
                              argument_type=ArgumentType.VALUE,
                              data="rating")
    step_2.add_hyperparameter(name='truth_threshold',
                              argument_type=ArgumentType.VALUE,
                              data=1e-07)
    step_2.add_hyperparameter(name="jvm_memory",
                              argument_type=ArgumentType.VALUE,
                              data=0.5)
    step_2.add_output('produce')
    pipeline_description.add_step(step_2)

    # Step 3: ConstructPredictions
    step_3 = PrimitiveStep(primitive_description=d3m.primitives.data.
                           ConstructPredictions.metadata.query())
    step_3.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_argument(name='reference',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_hyperparameter(name='use_columns',
                              argument_type=ArgumentType.VALUE,
                              data=[0, 1])
    step_3.add_output('produce')
    pipeline_description.add_step(step_3)

    # Step 4: RemoveColumns
    step_4 = PrimitiveStep(primitive_description=d3m.primitives.data.
                           RemoveColumns.metadata.query())
    step_4.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.3.produce')
    step_4.add_hyperparameter(name='columns',
                              argument_type=ArgumentType.VALUE,
                              data=[0])
    step_4.add_output('produce')
    pipeline_description.add_step(step_4)

    pipeline_description.add_output(name='Result',
                                    data_reference='steps.4.produce')

    return pipeline_description
Beispiel #9
0
    def __init__(self,
                 weights_filepath: str = '/scratch_dir/model_weights.pth',
                 explain_all_classes: bool = False,
                 all_confidences: bool = False,
                 epochs: int = 25):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Denormalize
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.denormalize.Common"))
        step.add_argument(name="inputs",
                          argument_type=ArgumentType.CONTAINER,
                          data_reference="inputs.0")
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(name="inputs",
                          argument_type=ArgumentType.CONTAINER,
                          data_reference="steps.0.produce")
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Satellite Image Loader
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.satellite_image_loader.DistilSatelliteImageLoader"
        ))
        step.add_argument(name="inputs",
                          argument_type=ArgumentType.CONTAINER,
                          data_reference="steps.1.produce")
        step.add_hyperparameter(name="return_result",
                                argument_type=ArgumentType.VALUE,
                                data="replace")
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Distil column parser
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.DistilColumnParser"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parsing_semantics",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
            ],
        )
        pipeline_description.add_step(step)

        # parse image semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/ImageObject",
            ],
        )
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
                "https://metadata.datadrivendiscovery.org/types/TrueTarget"
            ],
        )
        pipeline_description.add_step(step)

        # remote sensing pretrained
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.remote_sensing.remote_sensing_pretrained.RemoteSensingPretrained"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="pool_features",
                                argument_type=ArgumentType.VALUE,
                                data=False)
        pipeline_description.add_step(step)

        # mlp classifier
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.remote_sensing.mlp.MlpClassifier"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.6.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="weights_filepath",
                                argument_type=ArgumentType.VALUE,
                                data=weights_filepath)
        step.add_hyperparameter(name="explain_all_classes",
                                argument_type=ArgumentType.VALUE,
                                data=explain_all_classes)
        step.add_hyperparameter(name="all_confidences",
                                argument_type=ArgumentType.VALUE,
                                data=all_confidences)
        step.add_hyperparameter(name="epochs",
                                argument_type=ArgumentType.VALUE,
                                data=epochs)
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.7.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="use_columns",
                                argument_type=ArgumentType.VALUE,
                                data=[0, 1])
        pipeline_description.add_step(step)

        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.8.produce")

        self.pipeline = pipeline_description
Beispiel #10
0
def build_demo_pipeline():

    # Creating pipeline
    pipeline = Pipeline(context=Context.TESTING)
    pipeline.add_input(name='inputs')

    # Step 0: DFS
    step_0 = PrimitiveStep(primitive_description=Featuretools.metadata.query())
    step_0.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='inputs.0')
    step_0.add_output('produce')
    pipeline.add_step(step_0)

    # Step 1: SKlearnImputer
    step_1 = PrimitiveStep(
        primitive_description=SKlearnImputer.metadata.query())
    step_1.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline.add_step(step_1)

    # Step 2: SKlearnRFC
    step_2 = PrimitiveStep(primitive_description=SKlearnRFC.metadata.query())
    step_2.add_hyperparameter(name='use_semantic_types',
                              argument_type=ArgumentType.VALUE,
                              data=True)
    step_2.add_hyperparameter(name='add_index_columns',
                              argument_type=ArgumentType.VALUE,
                              data=True)
    step_2.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_2.add_argument(name='outputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_2.add_output('produce')
    pipeline.add_step(step_2)

    # Step 3: ConstructPredictions
    step_3 = PrimitiveStep(
        primitive_description=DataFrameCommon.metadata.query())
    step_3.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_argument(name='reference',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_3.add_output('produce')
    pipeline.add_step(step_3)

    # Final Output
    pipeline.add_output(name='output predictions',
                        data_reference='steps.3.produce')

    return pipeline
Beispiel #11
0
def set_data(pipeline_description):
    global IP_STEP, OP_STEP, READER_STEP

    # denormalize
    denorm_step_idx = 0
    step = PrimitiveStep(
        primitive_description=d3m.primitives.data_transformation.denormalize.
        Common.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='inputs.0')
    step.add_output('produce')
    pipeline_description.add_step(step)

    # dataset_to_dataframe
    dataset_to_dataframe_step_idx = len(pipeline_description.steps)
    step = PrimitiveStep(
        primitive_description=d3m.primitives.data_transformation.
        dataset_to_dataframe.Common.metadata.query())
    step.add_argument(
        name='inputs',
        argument_type=ArgumentType.CONTAINER,
        data_reference='steps.{}.produce'.format(denorm_step_idx))
    step.add_output('produce')
    pipeline_description.add_step(step)

    # extract targets
    extract_step_idx = len(pipeline_description.steps)
    extract_targets = PrimitiveStep(
        d3m.primitives.data_transformation.extract_columns_by_semantic_types.
        Common.metadata.query())
    extract_targets.add_argument(name='inputs',
                                 argument_type=ArgumentType.CONTAINER,
                                 data_reference='steps.{}.produce'.format(
                                     dataset_to_dataframe_step_idx))
    extract_targets.add_hyperparameter(
        name='semantic_types',
        argument_type=ArgumentType.VALUE,
        data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
    extract_targets.add_output('produce')
    pipeline_description.add_step(extract_targets)

    # replace semantic types
    # Need to be used for CIFAR-10
    replace_step_idx = len(pipeline_description.steps)
    replace_semantic = PrimitiveStep(
        d3m.primitives.data_transformation.replace_semantic_types.Common.
        metadata.query())
    replace_semantic.add_argument(
        name='inputs',
        argument_type=ArgumentType.CONTAINER,
        data_reference=f'steps.{extract_step_idx}.produce')
    replace_semantic.add_hyperparameter(
        name='to_semantic_types',
        argument_type=ArgumentType.VALUE,
        data=[
            'https://metadata.datadrivendiscovery.org/types/SuggestedTarget',
            'https://metadata.datadrivendiscovery.org/types/TrueTarget'
        ])
    replace_semantic.add_hyperparameter(
        name='from_semantic_types',
        argument_type=ArgumentType.VALUE,
        data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
    replace_semantic.add_output('produce')
    pipeline_description.add_step(replace_semantic)

    # image reader
    reader_step_idx = len(pipeline_description.steps)
    reader = PrimitiveStep(
        primitive_description=d3m.primitives.data_preprocessing.image_reader.
        Common.metadata.query())
    reader.add_hyperparameter(name='return_result',
                              argument_type=ArgumentType.VALUE,
                              data='new')
    pipeline_description.add_step(reader)

    IP_STEP, OP_STEP, READER_STEP = dataset_to_dataframe_step_idx, replace_step_idx, reader_step_idx
Beispiel #12
0
    def _to_pipeline(self, binding, sequence) -> Pipeline:
        """
        Args:
            binding:

        Returns:

        """

        # define an empty pipeline with the general dataset input primitive
        # generate empty pipeline with i/o/s/u =[]
        # pprint(binding)
        # print(sequence)
        # print("[INFO] list:",list(map(str, metadata_base.Context)))
        pipeline = Pipeline(name=self.template['name'] + ":" +
                            str(id(binding)),
                            description=self.description_info)
        templateinput = pipeline.add_input("input dataset")

        # save temporary output for another step to take as input
        outputs = {}
        outputs["template_input"] = templateinput

        # iterate through steps in the given binding and add each step to the
        #  pipeline. The IO and hyperparameter are also handled here.
        for i, step in enumerate(sequence):
            self.step_number[step] = i
            # primitive_step = PrimitiveStep(self.primitive[binding[step][
            # "primitive"]].metadata.query())
            primitive_name = binding[step]["primitive"]
            if primitive_name in self.primitive:
                primitive_desc = dict(
                    d3m_index.get_primitive(primitive_name).metadata.query())

                primitive_step = PrimitiveStep(primitive_desc)

                # D3M version v2019.1.21 removes primitive description. Need another way
                # to pass "runtime"
                if "runtime" in binding[step]:
                    # primitive_desc["runtime"] = binding[step]["runtime"]
                    primitive_step.__dict__['_dsbox_runtime'] = binding[step][
                        "runtime"]
                    # print('==== ', primitive_step._dsbox_runtime)

            else:
                raise exceptions.InvalidArgumentValueError(
                    "Error, can't find the primitive : ", primitive_name)

            if binding[step]["hyperparameters"] != {}:
                hyper = binding[step]["hyperparameters"]
                for hyperName in hyper:
                    primitive_step.add_hyperparameter(
                        # argument_type should be fixed type not the type of the data!!
                        name=hyperName,
                        argument_type=self.argmentsmapper["value"],
                        data=hyper[hyperName])

            if self.need_add_reference and primitive_name == 'd3m.primitives.data_transformation.construct_predictions.DataFrameCommon':
                primitive_step.add_argument(
                    "reference", metadata_base.ArgumentType.CONTAINER,
                    "steps.0.produce")

            # first we need to extract the types of the primtive's input and
            # the generators's output type.
            # then we need to compare those and in case we have different
            # types, add the intermediate type caster in the pipeline
            # print(outputs)
            step_parameters = binding[step]["inputs"]
            step_arguments = []
            for parameter in step_parameters:
                if type(parameter) is list:
                    argument = [outputs[subparam] for subparam in parameter]
                else:
                    argument = outputs[parameter]
                step_arguments.append(argument)
            self.bind_primitive_IO(primitive_step, step_arguments)
            pipeline.add_step(primitive_step)
            # pre v2019.1.21
            # outputs[step] = primitive_step.add_output("produce")
            primitive_step.add_output("produce")
            outputs[step] = f'steps.{primitive_step.index}.produce'
        # END FOR

        # Add final output as the prediction of target attribute
        general_output = outputs[self.template["steps"][-1]["name"]]
        # print(general_output)
        pipeline.add_output(general_output, "predictions of input dataset")

        return pipeline
Beispiel #13
0
def create_pipeline(metric: str,
                    cat_mode: str = 'one_hot',
                    max_one_hot: int = 16,
                    scale: bool = False) -> Pipeline:

    # create the basic pipeline
    image_pipeline = Pipeline(context=PipelineContext.TESTING)
    image_pipeline.add_input(name='inputs')


    # step 0 - denormalize dataframe (N.B.: injects semantic type information)
    step = PrimitiveStep(primitive_description=DenormalizePrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
    step.add_output('produce')
    image_pipeline.add_step(step)


    # step 1 - extract dataframe from dataset
    step = PrimitiveStep(primitive_description=DatasetToDataFramePrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
    step.add_output('produce')
    image_pipeline.add_step(step)


    # step 2 - read images
    step = PrimitiveStep(primitive_description=DataFrameImageReaderPrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    step.add_output('produce')
    step.add_hyperparameter('use_columns', ArgumentType.VALUE,[0,1])
    step.add_hyperparameter('return_result', ArgumentType.VALUE, 'replace')
    image_pipeline.add_step(step)


    # step 3 - parse columns
    step = PrimitiveStep(primitive_description=ColumnParserPrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
    step.add_output('produce')
    semantic_types = ('http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float',
                      'https://metadata.datadrivendiscovery.org/types/FloatVector')
    step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types)
    image_pipeline.add_step(step)


    # step 4 - featurize images
    step = PrimitiveStep(primitive_description=ImageTransferPrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
    step.add_output('produce')
    image_pipeline.add_step(step)
    

    # step 5 - extract targets
    step = PrimitiveStep(primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
    step.add_output('produce')
    target_types = ('https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/TrueTarget')
    step.add_hyperparameter('semantic_types', ArgumentType.VALUE, target_types)
    image_pipeline.add_step(step)


    # step 6 - Generates a random forest ensemble model.
    step = PrimitiveStep(primitive_description=EnsembleForestPrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
    step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
    step.add_output('produce')
    step.add_hyperparameter('metric', ArgumentType.VALUE, metric)
    image_pipeline.add_step(step)


    # step 7 - convert predictions to expected format
    step = PrimitiveStep(primitive_description=ConstructPredictionsPrimitive.metadata.query())
    step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce')
    step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    step.add_output('produce')
    step.add_hyperparameter('use_columns', ArgumentType.VALUE, [0, 1])
    image_pipeline.add_step(step)


    # Adding output step to the pipeline
    image_pipeline.add_output(name='output', data_reference='steps.7.produce')

    return image_pipeline
    def __init__(
        self,
        annotations: List[int] = None,
        gem_p: int = 1,
        dataset: str = "LL1_bigearth_landuse_detection",
    ):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")
        if annotations is None:
            pipeline_description.add_input(name="annotations")

        # Denormalize
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.denormalize.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.dataset_to_dataframe.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Satellite Image Loader
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.satellite_image_loader.DistilSatelliteImageLoader"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_hyperparameter(
            name="return_result", argument_type=ArgumentType.VALUE, data="replace"
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Distil column parser
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.column_parser.DistilColumnParser"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parsing_semantics",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
            ],
        )
        pipeline_description.add_step(step)

        # parse image semantic types
        # TODO test without index
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/ImageObject",
                "https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey",
            ],
        )
        pipeline_description.add_step(step)

        # remote sensing pretrained
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.remote_sensing.remote_sensing_pretrained.RemoteSensingPretrained"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on annotations DS
        if annotations is None:
            step = PrimitiveStep(
                primitive=index.get_primitive(
                    "d3m.primitives.data_transformation.dataset_to_dataframe.Common"
                )
            )
            step.add_argument(
                name="inputs",
                argument_type=ArgumentType.CONTAINER,
                data_reference="inputs.1",
            )
            step.add_output("produce")
            pipeline_description.add_step(step)

        # image retrieval primitive
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.similarity_modeling.iterative_labeling.ImageRetrieval"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        if annotations is not None:
            step.add_argument(
                name="outputs",
                argument_type=ArgumentType.VALUE,
                data=annotations,
            )
        else:
            step.add_argument(
                name="outputs",
                argument_type=ArgumentType.CONTAINER,
                data_reference="steps.6.produce",
            )
        step.add_output("produce")
        step.add_hyperparameter(
            name="gem_p", argument_type=ArgumentType.VALUE, data=gem_p
        )
        pipeline_description.add_step(step)

        if annotations is not None:
            pipeline_description.add_output(
                name="output ranking", data_reference="steps.6.produce"
            )
        else:
            pipeline_description.add_output(
                name="output ranking", data_reference="steps.7.produce"
            )

        self.pipeline = pipeline_description
        self.dataset = dataset
        self.annotations = annotations
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.dataset_to_dataframe.Common"
            )
        )
        step.add_argument(
            name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0"
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(
            primitive=index.get_primitive("d3m.primitives.schema_discovery.profiler.Common")
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.column_parser.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
                "http://schema.org/DateTime",
            ],
        )
        pipeline_description.add_step(step)

        # imputer
        step = PrimitiveStep(
            primitive=index.get_primitive("d3m.primitives.data_cleaning.imputer.SKlearn")
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="return_result", argument_type=ArgumentType.VALUE, data="replace"
        )
        step.add_hyperparameter(
            name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True
        )
        pipeline_description.add_step(step)

        # TSNE
        step = PrimitiveStep(
            primitive=index.get_primitive(
                'd3m.primitives.dimensionality_reduction.t_distributed_stochastic_neighbor_embedding.Tsne'
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name='n_components', argument_type=ArgumentType.VALUE, data=3
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse attribute semantic types
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=["https://metadata.datadrivendiscovery.org/types/Attribute"],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)
        
        # parse target semantic types
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # R Forest
        step = PrimitiveStep(
            primitive=index.get_primitive(
                'd3m.primitives.learner.random_forest.DistilEnsembleForest'
            )
        )
        step.add_argument(
            name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce'
        )
        step.add_argument(
            name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce'
        )
        step.add_output('produce')
        pipeline_description.add_step(step)
        
        # construct predictions
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.construct_predictions.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.7.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(
            name="output predictions", data_reference="steps.8.produce"
        )

        self.pipeline = pipeline_description
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simon
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_cleaning.column_type_profiler.Simon"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # XG Boost
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.classification.xgboost_gbtree.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="add_index_columns",
                                argument_type=ArgumentType.VALUE,
                                data=True)
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.4.produce")

        self.pipeline = pipeline_description
from d3m import index
from d3m.metadata.base import ArgumentType, Context
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
import sys

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: Denormalize primitive -> put all resources in one dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive(
    'd3m.primitives.data_transformation.denormalize.Common'))
step_0.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Step 1: dataset_to_dataframe
step_1 = PrimitiveStep(primitive=index.get_primitive(
    'd3m.primitives.data_transformation.dataset_to_dataframe.Common'))
step_1.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2 column parser -> labeled semantic types to data types
step_2 = PrimitiveStep(primitive=index.get_primitive(
    'd3m.primitives.data_transformation.column_parser.Common'))
step_2.add_argument(name='inputs',
Beispiel #18
0
def generate_only():
    # Creating pipeline
    pipeline_description = Pipeline()
    pipeline_description.add_input(name='inputs')

    # Step 0 - Denormalize
    step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.denormalize.Common'))
    step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1 - Transform to dataframe
    step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common'))
    step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 2 - Extract target
    step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common'))
    step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce")
    step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=["https://metadata.datadrivendiscovery.org/types/TrueTarget"])
    step_2.add_output('produce')
    pipeline_description.add_step(step_2)

    # Step 3 - Transform
    step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.to_numeric.DSBOX'))
    step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce')
    step_3.add_hyperparameter(name='drop_non_numeric_columns', argument_type=ArgumentType.VALUE, data=False)
    step_3.add_output('produce')
    pipeline_description.add_step(step_3)

    # Step 4 - Single table featurization
    step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization'))
    step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    step_4.add_output('produce')
    pipeline_description.add_step(step_4)

    # Step 5 - Time series to list
    step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_preprocessing.time_series_to_list.DSBOX'))
    step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
    step_5.add_output('produce')
    pipeline_description.add_step(step_5)

    # Step 6 - Time series featurization
    step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.feature_extraction.random_projection_timeseries_featurization.DSBOX'))
    step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
    step_6.add_hyperparameter(name='generate_metadata', argument_type=ArgumentType.VALUE, data=True)
    step_6.add_output('produce')
    pipeline_description.add_step(step_6)

    # Step 7 - Concat singletable features with time series features
    step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.horizontal_concat.DataFrameCommon'))
    step_7.add_argument(name='left', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce')
    step_7.add_argument(name='right', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
    step_7.add_output('produce')
    pipeline_description.add_step(step_7)

    # Step 8 - Classification
    step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.classification.random_forest.SKlearn'))
    step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce')
    step_8.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce')
    step_8.add_hyperparameter(name='add_index_columns', argument_type=ArgumentType.VALUE, data=True)
    step_8.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True)
    step_8.add_output('produce')
    pipeline_description.add_step(step_8)

    # Final Output
    pipeline_description.add_output(name='output predictions', data_reference='steps.8.produce')

    # Generate .yml file for the pipeline
    import featuretools_ta1
    from pipeline_tests.utils import generate_pipeline

    dataset_name = 'LL1_50words_MIN_METADATA'
    dataset_path = '/featuretools_ta1/datasets/seed_datasets_current'
    primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization'
    version = featuretools_ta1.__version__
    test_name = os.path.splitext(os.path.basename(__file__))[0][5:]
    yml, pipeline_run_file = generate_pipeline(primitive_name=primitive_name,
                                               pipeline_description=pipeline_description,
                                               dataset_name=dataset_name,
                                               test_name=test_name)

    # fit-score command
    fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format(yml)
    fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format(dataset_path, dataset_name, dataset_name)
    fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format(dataset_path, dataset_name)
    fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format(dataset_path, dataset_name)
    fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format(dataset_path, dataset_name)
    fs_cmd += ' -O {}'.format(pipeline_run_file)

    # Run pipeline to save pipeline_run file
    os.system(fs_cmd)

    # Create and return command for running from pipeline_run file:
    pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format(primitive_name, version)
    pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score'
    pipeline_run_cmd += ' -u {}'.format(pipeline_run_file)

    return pipeline_run_cmd
    def __init__(
        self,
        epochs: int = 5000,
        attention_lstm: bool = True,
    ):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Ts formatter
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter"
        ))
        step.add_argument(name="inputs",
                          argument_type=ArgumentType.CONTAINER,
                          data_reference="inputs.0")
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on formatted ts DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(name="inputs",
                          argument_type=ArgumentType.CONTAINER,
                          data_reference="inputs.0")
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
            ],
        )
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # LSTM FCN
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.time_series_classification.convolutional_neural_net.LSTM_FCN"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_hyperparameter(name='epochs',
                                argument_type=ArgumentType.VALUE,
                                data=epochs)
        step.add_hyperparameter(name='attention_lstm',
                                argument_type=ArgumentType.VALUE,
                                data=attention_lstm)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.6.produce")

        self.pipeline = pipeline_description
Beispiel #20
0
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Denormalize primitive
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.denormalize.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Text reader
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.text_reader.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.schema_discovery.profiler.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=(
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
            ),
        )
        pipeline_description.add_step(step)

        # parse attribute semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=["https://metadata.datadrivendiscovery.org/types/Attribute"],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Sent2Vec primitive
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.feature_extraction.nk_sent2vec.Sent2Vec"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # R Forest
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.learner.random_forest.DistilEnsembleForest"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.7.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.6.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.8.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.9.produce")

        self.pipeline = pipeline_description
from d3m import index
from d3m.metadata.base import ArgumentType, Context
from d3m.metadata.pipeline import Pipeline, PrimitiveStep

# Creating pipeline
pipeline_description = Pipeline(context=Context.TESTING)
pipeline_description.add_input(name='inputs')

# Step 2: DISTIL/NK Storc primitive
step_0 = PrimitiveStep(primitive=index.get_primitive(
    'd3m.primitives.time_series_classification.k_neighbors.Kanine'))
step_0.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='inputs.0')
step_0.add_argument(name='outputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# Final Output
pipeline_description.add_output(name='output predictions',
                                data_reference='steps.0.produce')

# Output to JSON
with open('pipeline.json', 'w') as outfile:
    outfile.write(pipeline_description.to_json())
Beispiel #22
0
    def __init__(
        self,
        binary_labels,
        weights_filepath: str = "scratch_dir/model_weights.pth",
        epochs_frozen: int = 20,
        epochs_unfrozen: int = 100,
    ):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # Denormalize
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.denormalize.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Satellite Image Loader
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.satellite_image_loader.DistilSatelliteImageLoader"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_hyperparameter(name="return_result",
                                argument_type=ArgumentType.VALUE,
                                data="replace")
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Distil column parser
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.DistilColumnParser"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parsing_semantics",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
            ],
        )
        pipeline_description.add_step(step)

        # parse image semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/ImageObject",
            ],
        )
        pipeline_description.add_step(step)

        # image segmentation
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.remote_sensing.convolutional_neural_net.ImageSegmentation"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.VALUE,
            data=binary_labels,
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="weights_filepath",
            argument_type=ArgumentType.VALUE,
            data=weights_filepath,
        )
        step.add_hyperparameter(name="epochs_frozen",
                                argument_type=ArgumentType.VALUE,
                                data=epochs_frozen)
        step.add_hyperparameter(
            name="epochs_unfrozen",
            argument_type=ArgumentType.VALUE,
            data=epochs_unfrozen,
        )
        pipeline_description.add_step(step)

        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.5.produce")

        self.pipeline = pipeline_description
Beispiel #23
0
    def __init__(self, algorithm: str = 'PseudoLabel'):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.schema_discovery.profiler.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="parse_semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "http://schema.org/Boolean",
                "http://schema.org/Integer",
                "http://schema.org/Float",
                "https://metadata.datadrivendiscovery.org/types/FloatVector",
                "http://schema.org/DateTime",
            ],
        )
        pipeline_description.add_step(step)

        # imputer
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_cleaning.imputer.SKlearn"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(name="return_result",
                                argument_type=ArgumentType.VALUE,
                                data="replace")
        step.add_hyperparameter(name="use_semantic_types",
                                argument_type=ArgumentType.VALUE,
                                data=True)
        pipeline_description.add_step(step)

        # parse attribute semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=["https://metadata.datadrivendiscovery.org/types/Attribute"],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse integer/float attribute semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=["http://schema.org/Integer", "http://schema.org/Float"],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Tabular Semi Supervised
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.semisupervised_classification.iterative_labeling.TabularSemiSupervised"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.6.produce",
        )
        step.add_hyperparameter(name="algorithm",
                                argument_type=ArgumentType.VALUE,
                                data=algorithm)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.construct_predictions.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.7.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(name="output predictions",
                                        data_reference="steps.8.produce")

        self.pipeline = pipeline_description
Beispiel #24
0
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # DS to DF on input DS
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.dataset_to_dataframe.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.schema_discovery.profiler.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser on input DF
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.column_parser.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # # imputer
        # step = PrimitiveStep(
        #     primitive=index.get_primitive("d3m.primitives.data_cleaning.imputer.SKlearn")
        # )
        # step.add_argument(
        #     name="inputs",
        #     argument_type=ArgumentType.CONTAINER,
        #     data_reference="steps.2.produce",
        # )
        # step.add_output("produce")
        # step.add_hyperparameter(
        #     name="return_result", argument_type=ArgumentType.VALUE, data="replace"
        # )
        # step.add_hyperparameter(
        #     name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True
        # )
        # pipeline_description.add_step(step)

        # Rffeatures
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.feature_selection.rffeatures.Rffeatures"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_hyperparameter(
            name="only_numeric_cols", argument_type=ArgumentType.VALUE, data=True
        )
        step.add_hyperparameter(
            name="proportion_of_features", argument_type=ArgumentType.VALUE, data=1.0
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # parse target semantic types
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=[
                "https://metadata.datadrivendiscovery.org/types/Target",
            ],
        )
        step.add_hyperparameter(
            name="add_index_columns", argument_type=ArgumentType.VALUE, data=True
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # XGBoost
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.classification.xgboost_gbtree.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_output("produce")
        step.add_hyperparameter(
            name="add_index_columns", argument_type=ArgumentType.VALUE, data=True
        )
        pipeline_description.add_step(step)

        # # R Forest
        # step = PrimitiveStep(
        #     primitive=index.get_primitive(
        #         'd3m.primitives.classification.random_forest.SKlearn'
        #     )
        # )
        # step.add_argument(
        #     name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce'
        # )
        # step.add_argument(
        #     name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce'
        # )
        # step.add_output('produce')
        # step.add_hyperparameter(
        #     name='add_index_columns', argument_type=ArgumentType.VALUE,data=True
        # )
        # step.add_hyperparameter(
        #     name='use_semantic_types', argument_type=ArgumentType.VALUE,data=True
        # )
        # pipeline_description.add_step(step)

        # construct predictions
        step = PrimitiveStep(
            primitive=index.get_primitive(
                "d3m.primitives.data_transformation.construct_predictions.Common"
            )
        )
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_argument(
            name="reference",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(
            name="output predictions", data_reference="steps.6.produce"
        )

        self.pipeline = pipeline_description
Beispiel #25
0
from d3m import index
from d3m.metadata.base import ArgumentType, Context
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
import sys

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name="inputs")

# Step 0: dataset_to_dataframe
step_0 = PrimitiveStep(primitive=index.get_primitive(
    "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
step_0.add_argument(
    name="inputs",
    argument_type=ArgumentType.CONTAINER,
    data_reference="inputs.0",
)
step_0.add_output("produce")
pipeline_description.add_step(step_0)

# Step 1: Simple Profiler Column Role Annotation
step_1 = PrimitiveStep(primitive=index.get_primitive(
    "d3m.primitives.schema_discovery.profiler.Common"))
step_1.add_argument(
    name="inputs",
    argument_type=ArgumentType.CONTAINER,
    data_reference="steps.0.produce",
)
step_1.add_output("produce")
pipeline_description.add_step(step_1)
Beispiel #26
0
from d3m.metadata import hyperparams
import copy

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
#                                             extract_columns_by_semantic_types(targets)    ->            ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive(
    'd3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive(
    'd3m.primitives.data_transformation.column_parser.Common')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# Step 2: extract_columns_by_semantic_types(attributes)
def create_pipeline(metric: str) -> Pipeline:
    previous_step = 0
    input_val = 'steps.{}.produce'

    # create the basic pipeline
    qa_pipeline = Pipeline(context=PipelineContext.TESTING)
    qa_pipeline.add_input(name='inputs')

    # Denormalize so that we have a single dataframe in the dataset
    step = PrimitiveStep(
        primitive_description=DenormalizePrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference='inputs.0')
    step.add_output('produce')
    qa_pipeline.add_step(step)

    # Extract dataframe from dataset
    step = PrimitiveStep(
        primitive_description=DatasetToDataFramePrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(previous_step))
    step.add_output('produce')
    qa_pipeline.add_step(step)
    previous_step += 1

    # Parse columns.
    step = PrimitiveStep(
        primitive_description=ColumnParserPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(previous_step))
    step.add_output('produce')
    semantic_types = (
        'http://schema.org/Boolean', 'http://schema.org/Integer',
        'http://schema.org/Float',
        'https://metadata.datadrivendiscovery.org/types/FloatVector')
    step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE,
                            semantic_types)
    qa_pipeline.add_step(step)
    previous_step += 1
    parse_step = previous_step

    # Extract attributes
    step = PrimitiveStep(
        primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata.
        query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(parse_step))
    step.add_output('produce')
    step.add_hyperparameter(
        'semantic_types', ArgumentType.VALUE,
        ('https://metadata.datadrivendiscovery.org/types/Attribute', ))
    qa_pipeline.add_step(step)
    previous_step += 1
    attributes_step = previous_step

    # Extract targets
    step = PrimitiveStep(
        primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata.
        query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(parse_step))
    step.add_output('produce')
    target_types = (
        'https://metadata.datadrivendiscovery.org/types/Target',
        'https://metadata.datadrivendiscovery.org/types/TrueTarget')
    step.add_hyperparameter('semantic_types', ArgumentType.VALUE, target_types)
    qa_pipeline.add_step(step)
    previous_step += 1
    target_step = previous_step

    # Generates a bert pair classification model.
    step = PrimitiveStep(
        primitive_description=BertPairClassificationPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(attributes_step))
    step.add_argument(name='outputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(target_step))
    step.add_output('produce')
    step.add_hyperparameter('metric', ArgumentType.VALUE, metric)
    step.add_hyperparameter('doc_col_0', ArgumentType.VALUE, 1)
    step.add_hyperparameter('doc_col_1', ArgumentType.VALUE, 3)
    qa_pipeline.add_step(step)
    previous_step += 1

    # convert predictions to expected format
    step = PrimitiveStep(
        primitive_description=ConstructPredictionsPrimitive.metadata.query())
    step.add_argument(name='inputs',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(previous_step))
    step.add_argument(name='reference',
                      argument_type=ArgumentType.CONTAINER,
                      data_reference=input_val.format(parse_step))
    step.add_output('produce')
    qa_pipeline.add_step(step)
    previous_step += 1

    # Adding output step to the pipeline
    qa_pipeline.add_output(name='output',
                           data_reference=input_val.format(previous_step))

    return qa_pipeline
Beispiel #28
0
def image_regress_pipeline(resolver=None) -> Pipeline:

    if resolver is None:
        resolver = custom_resolver.BlackListResolver()
    # Creating Pipeline
    pipeline_description = Pipeline(context='PRETRAINING')
    pipeline_description.add_input(name='inputs')

    start_step = "inputs.0"

    # Step 1: Denormalize
    step_0 = PrimitiveStep(primitive_description=d3m.primitives.dsbox.
                           Denormalize.metadata.query(),
                           resolver=resolver)
    step_0.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference=start_step)
    step_0.add_output('produce')
    pipeline_description.add_step(step_0)

    # Step 1: DatasetToDataFrame
    step_1 = PrimitiveStep(primitive_description=d3m.primitives.datasets.
                           DatasetToDataFrame.metadata.query(),
                           resolver=resolver)
    step_1.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.0.produce')
    add_hyperparameters(step_1, d3m.primitives.data.DataFrameToList)
    step_1.add_output('produce')
    pipeline_description.add_step(step_1)

    # Step 3: Extract Target Column
    step_2 = PrimitiveStep(primitive_description=d3m.primitives.data.
                           ExtractColumnsBySemanticTypes.metadata.query(),
                           resolver=resolver)
    step_2.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_2.add_hyperparameter(
        name='semantic_types',
        argument_type=ArgumentType.VALUE,
        data=[
            "https://metadata.datadrivendiscovery.org/types/Target",
            "https://metadata.datadrivendiscovery.org/types/SuggestedTarget"
        ])
    step_2.add_output('produce')
    pipeline_description.add_step(step_2)

    # Step 4: Dataframe to tensor
    step_3 = PrimitiveStep(primitive_description=d3m.primitives.dsbox.
                           DataFrameToTensor.metadata.query(),
                           resolver=resolver)
    step_3.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.1.produce')
    step_3.add_output('produce')
    pipeline_description.add_step(step_3)

    # Step 5: Vgg16 Feature Extractor
    step_4 = PrimitiveStep(primitive_description=d3m.primitives.dsbox.
                           Vgg16ImageFeature.metadata.query(),
                           resolver=resolver)
    step_4.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.3.produce')
    step_4.add_output('produce')
    pipeline_description.add_step(step_4)

    # Step 6: Apply PCA to feature generated
    step_5 = PrimitiveStep(primitive_description=d3m.primitives.sklearn_wrap.
                           SKPCA.metadata.query(),
                           resolver=resolver)
    step_5.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.4.produce')
    step_5.add_output('produce')
    pipeline_description.add_step(step_5)

    # Step 7: Apply Random Forest Regressor
    step_6 = PrimitiveStep(primitive_description=d3m.primitives.sklearn_wrap.
                           SKRandomForestRegressor.metadata.query(),
                           resolver=resolver)
    step_6.add_argument(name='inputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.5.produce')
    step_6.add_argument(name='outputs',
                        argument_type=ArgumentType.CONTAINER,
                        data_reference='steps.2.produce')
    step_6.add_output('produce')
    pipeline_description.add_step(step_6)

    # Output Generated
    pipeline_description.add_output(name='output predictions',
                                    data_reference='steps.6.produce')

    last_step = len(pipeline_description.steps) - 1
    attributes = pipelines.int_to_step(last_step - 1)
    targets = pipelines.int_to_step(last_step)

    return pipeline_description
Beispiel #29
0
from d3m.metadata.pipeline import Pipeline, PrimitiveStep
from d3m.metadata import hyperparams

# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
#                                             extract_columns_by_semantic_types(targets)    ->            ^

# Creating pipeline
pipeline_description = Pipeline()
pipeline_description.add_input(name='inputs')

# Step 0: dataset_to_dataframe
primitive_0 = index.get_primitive(
    'd3m.primitives.tods.data_processing.dataset_to_dataframe')
step_0 = PrimitiveStep(primitive=primitive_0)
step_0.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='inputs.0')
step_0.add_output('produce')
pipeline_description.add_step(step_0)

# # Step 1: column_parser
primitive_1 = index.get_primitive(
    'd3m.primitives.data_transformation.column_parser.Common')
step_1 = PrimitiveStep(primitive=primitive_1)
step_1.add_argument(name='inputs',
                    argument_type=ArgumentType.CONTAINER,
                    data_reference='steps.0.produce')
step_1.add_output('produce')
pipeline_description.add_step(step_1)

# # Step 2: Standardization
Beispiel #30
0
    def __init__(self):

        pipeline_description = Pipeline()
        pipeline_description.add_input(name="inputs")

        # dataset_to_dataframe
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.dataset_to_dataframe.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="inputs.0",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Simple Profiler Column Role Annotation
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.schema_discovery.profiler.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.0.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # column parser -> labeled semantic types to data types
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.column_parser.Common"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.1.produce",
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # imputer -> imputes null values based on mean of column
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_cleaning.imputer.SKlearn"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.2.produce",
        )
        step.add_hyperparameter(name="return_result",
                                argument_type=ArgumentType.VALUE,
                                data="replace")
        step.add_hyperparameter(name="use_semantic_types",
                                argument_type=ArgumentType.VALUE,
                                data=True)
        step.add_output("produce")
        pipeline_description.add_step(step)

        # extract feature columns
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=(
                "https://metadata.datadrivendiscovery.org/types/Attribute", ),
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # extract target columns
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common"
        ))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.3.produce",
        )
        step.add_hyperparameter(
            name="semantic_types",
            argument_type=ArgumentType.VALUE,
            data=("https://metadata.datadrivendiscovery.org/types/Target",
                  "https://metadata.datadrivendiscovery.org/types/TrueTarget"),
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Text encoder
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.data_transformation.encoder.DistilTextEncoder"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.4.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_hyperparameter(
            name="metric",
            argument_type=ArgumentType.VALUE,
            data='accuracy',
        )
        step.add_output("produce")
        pipeline_description.add_step(step)

        # Random forest shap values
        step = PrimitiveStep(primitive=index.get_primitive(
            "d3m.primitives.learner.random_forest.DistilEnsembleForest"))
        step.add_argument(
            name="inputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.6.produce",
        )
        step.add_argument(
            name="outputs",
            argument_type=ArgumentType.CONTAINER,
            data_reference="steps.5.produce",
        )
        step.add_output("produce_shap_values")
        pipeline_description.add_step(step)

        # Final Output
        pipeline_description.add_output(
            name="output", data_reference="steps.7.produce_shap_values")

        self.pipeline = pipeline_description