def __init__(self, sample_size: int = 1000, all_layer_epochs: int = 100, top_layer_epochs: int = 100): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize primitive step = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.denormalize.Common')) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') pipeline_description.add_step(step) # Dataset sample primitive to reduce computation time step = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_preprocessing.dataset_sample.Common')) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_hyperparameter(name='sample_size', argument_type=ArgumentType.VALUE, data=sample_size) step.add_output('produce') pipeline_description.add_step(step) # DS to DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='learningData') step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ], ) pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Attribute", ], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # Gator step = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.classification.inceptionV3_image_feature.Gator')) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_hyperparameter(name='unfreeze_proportions', argument_type=ArgumentType.VALUE, data=[0.5]) step.add_hyperparameter(name='top_layer_epochs', argument_type=ArgumentType.VALUE, data=top_layer_epochs) step.add_hyperparameter(name='all_layer_epochs', argument_type=ArgumentType.VALUE, data=all_layer_epochs) step.add_hyperparameter(name='weights_filepath', argument_type=ArgumentType.VALUE, data='/scratch_dir/model_weights.h5') step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.6.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.7.produce") self.pipeline = pipeline_description
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Goat forward step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.geocoding.Goat_forward")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter(name="target_columns", argument_type=ArgumentType.VALUE, data=[1]) step.add_hyperparameter(name="cache_size", argument_type=ArgumentType.VALUE, data=2000) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # XG Boost step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.classification.xgboost_gbtree.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.5.produce") self.pipeline = pipeline_description
def generate_only(): # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: column_parser step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.schema_discovery.profiler.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: column_parser step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.column_parser.Common')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 3: DFS Single Table step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization')) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_output('produce') pipeline_description.add_step(step_3) # Step 4: learn model step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.regression.xgboost_gbtree.Common')) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Step 5: construct output step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.construct_predictions.Common')) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_5.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.5.produce') # Generate .yml file for the pipeline import featuretools_ta1 from pipeline_tests.utils import generate_pipeline dataset_name = '196_autoMpg_MIN_METADATA' dataset_path = '/featuretools_ta1/datasets/seed_datasets_current' primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization' version = featuretools_ta1.__version__ test_name = os.path.splitext(os.path.basename(__file__))[0][5:] yml, pipeline_run_file = generate_pipeline(primitive_name=primitive_name, pipeline_description=pipeline_description, dataset_name=dataset_name, test_name=test_name) # fit-score command fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format(yml) fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format(dataset_path, dataset_name, dataset_name) fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format(dataset_path, dataset_name) fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format(dataset_path, dataset_name) fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format(dataset_path, dataset_name) fs_cmd += ' -O {}'.format(pipeline_run_file) # Run pipeline to save pipeline_run file os.system(fs_cmd) # Create and return command for running from pipeline_run file: pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format(primitive_name, version) pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score' pipeline_run_cmd += ' -u {}'.format(pipeline_run_file) return pipeline_run_cmd
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # PcaFeatures step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.feature_selection.pca_features.Pcafeatures")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_hyperparameter(name="add_index_columns", argument_type=ArgumentType.VALUE, data=True) step.add_output("produce") pipeline_description.add_step(step) # XGBoost step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.classification.xgboost_gbtree.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_output("produce") step.add_hyperparameter(name="add_index_columns", argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.6.produce") self.pipeline = pipeline_description
def _gen_pipeline(self): pipeline = meta_pipeline.Pipeline() pipeline.add_input(name='inputs') # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline.add_step(step_0) # Step 1: dataset_to_dataframe step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_1.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='1') step_1.add_output('produce') pipeline.add_step(step_1) # Step 2: dataset_to_dataframe step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_2.add_hyperparameter(name='dataframe_resource', argument_type=ArgumentType.VALUE, data='2') step_2.add_output('produce') pipeline.add_step(step_2) # Step 3 step_3 = meta_pipeline.PrimitiveStep( primitive_description=PartialProcrustes.metadata.query()) step_3.add_argument(name='inputs_1', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_3.add_argument(name='inputs_2', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_3.add_output('produce') pipeline.add_step(step_3) # Step 4 step_4 = meta_pipeline.PrimitiveStep( primitive_description=EuclideanNomination.metadata.query()) step_4.add_argument(name='inputs_1', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_4.add_argument(name='inputs_2', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_4.add_output('produce') pipeline.add_step(step_4) # Adding output step to the pipeline pipeline.add_output(name='Predictions', data_reference='steps.4.produce') return pipeline
def __init__( self, interpretable: bool = True, epochs: int = 10, steps_per_epoch: int = 50, prediction_length: int = 30, num_context_lengths: int = 1, num_estimators: int = 2, #quantiles: List[float] = [0.1, 0.9], group_compose: bool = False, confidence_intervals: bool = False, output_mean: bool = True, weights_dir: str = '/scratch_dir/nbeats' ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0" ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep( primitive=index.get_primitive("d3m.primitives.schema_discovery.profiler.Common") ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", ], ) pipeline_description.add_step(step) # group compose if group_compose: step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.grouping_field_compose.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) data_ref = "steps.3.produce" if group_compose else "steps.2.produce" step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Attribute", 'https://metadata.datadrivendiscovery.org/types/GroupingKey' ], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) data_ref = "steps.3.produce" if group_compose else "steps.2.produce" step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/TrueTarget", "https://metadata.datadrivendiscovery.org/types/SuggestedTarget", ], ) step.add_output("produce") pipeline_description.add_step(step) # forecasting primitive step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.time_series_forecasting.feed_forward_neural_net.NBEATS" ) ) data_ref = "steps.4.produce" if group_compose else "steps.3.produce" step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) data_ref = "steps.5.produce" if group_compose else "steps.4.produce" step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) step.add_hyperparameter( name="interpretable", argument_type=ArgumentType.VALUE, data=interpretable, ) step.add_hyperparameter( name="num_estimators", argument_type=ArgumentType.VALUE, data=num_estimators, ) step.add_hyperparameter( name="epochs", argument_type=ArgumentType.VALUE, data=epochs, ) step.add_hyperparameter( name="steps_per_epoch", argument_type=ArgumentType.VALUE, data=steps_per_epoch, ) step.add_hyperparameter( name="prediction_length", argument_type=ArgumentType.VALUE, data=prediction_length, ) step.add_hyperparameter( name="num_context_lengths", argument_type=ArgumentType.VALUE, data=num_context_lengths, ) # step.add_hyperparameter( # name="quantiles", # argument_type=ArgumentType.VALUE, # data=quantiles, # ) step.add_hyperparameter( name="weights_dir", argument_type=ArgumentType.VALUE, data=weights_dir, ) step.add_hyperparameter( name="output_mean", argument_type=ArgumentType.VALUE, data=output_mean, ) if confidence_intervals: step.add_output("produce_confidence_intervals") pipeline_description.add_step(step) data_ref = "steps.6.produce_confidence_intervals" if group_compose else "steps.5.produce_confidence_intervals" pipeline_description.add_output( name="output predictions", data_reference=data_ref ) else: step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common" ) ) data_ref = "steps.6.produce" if group_compose else "steps.5.produce" step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference=data_ref, ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) data_ref = "steps.7.produce" if group_compose else "steps.6.produce" pipeline_description.add_output( name="output predictions", data_reference=data_ref ) self.pipeline = pipeline_description
def generate_only(): # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: Parse columns step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.operator.dataset_map.DataFrameCommon')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_hyperparameter(name='primitive', argument_type=ArgumentType.VALUE, data=column_parser.Common) step_0.add_hyperparameter(name='resources', argument_type=ArgumentType.VALUE, data='all') step_0.add_hyperparameter(name='fit_primitive', argument_type=ArgumentType.VALUE, data='no') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: MultiTableFeaturization step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.feature_construction.deep_feature_synthesis.MultiTableFeaturization' )) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: imputer step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_cleaning.imputer.SKlearn')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce") step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 3: learn model step_3 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.regression.xgboost_gbtree.Common')) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_3.add_output('produce') pipeline_description.add_step(step_3) # step 4: construct output step_4 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common')) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.4.produce') # Generate .yml file for the pipeline import featuretools_ta1 from pipeline_tests.utils import generate_pipeline dataset_name = 'uu3_world_development_indicators' dataset_path = '/featuretools_ta1/datasets/seed_datasets_current' primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.MultiTableFeaturization' version = featuretools_ta1.__version__ test_name = os.path.splitext(os.path.basename(__file__))[0][5:] yml, pipeline_run_file = generate_pipeline( primitive_name=primitive_name, pipeline_description=pipeline_description, dataset_name=dataset_name, test_name=test_name) # fit-score command fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format( yml) fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format( dataset_path, dataset_name, dataset_name) fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format( dataset_path, dataset_name) fs_cmd += ' -O {}'.format(pipeline_run_file) # Run pipeline to save pipeline_run file os.system(fs_cmd) # Create and return command for running from pipeline_run file: pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format( primitive_name, version) pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score' pipeline_run_cmd += ' -u {}'.format(pipeline_run_file) return pipeline_run_cmd
def collaborative_filtering_link_prediction(): # Creating Pipeline pipeline_description = Pipeline(context='PRETRAINING') pipeline_description.add_input(name='inputs') # Step 0: GraphMatchingParser step_0 = PrimitiveStep(primitive_description=d3m.primitives.sri.graph. CollaborativeFilteringParser.metadata.query()) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: Apply GraphTransformer step_1 = PrimitiveStep(primitive_description=d3m.primitives.sri.graph. GraphTransformer.metadata.query()) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: Apply LinkPrediction step_2 = PrimitiveStep(primitive_description=d3m.primitives.sri.psl. LinkPrediction.metadata.query()) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_hyperparameter(name='prediction_column', argument_type=ArgumentType.VALUE, data="rating") step_2.add_hyperparameter(name='truth_threshold', argument_type=ArgumentType.VALUE, data=1e-07) step_2.add_hyperparameter(name="jvm_memory", argument_type=ArgumentType.VALUE, data=0.5) step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 3: ConstructPredictions step_3 = PrimitiveStep(primitive_description=d3m.primitives.data. ConstructPredictions.metadata.query()) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_hyperparameter(name='use_columns', argument_type=ArgumentType.VALUE, data=[0, 1]) step_3.add_output('produce') pipeline_description.add_step(step_3) # Step 4: RemoveColumns step_4 = PrimitiveStep(primitive_description=d3m.primitives.data. RemoveColumns.metadata.query()) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_hyperparameter(name='columns', argument_type=ArgumentType.VALUE, data=[0]) step_4.add_output('produce') pipeline_description.add_step(step_4) pipeline_description.add_output(name='Result', data_reference='steps.4.produce') return pipeline_description
def __init__(self, weights_filepath: str = '/scratch_dir/model_weights.pth', explain_all_classes: bool = False, all_confidences: bool = False, epochs: int = 25): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce") step.add_output("produce") pipeline_description.add_step(step) # Satellite Image Loader step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.satellite_image_loader.DistilSatelliteImageLoader" )) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce") step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") step.add_output("produce") pipeline_description.add_step(step) # Distil column parser step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.DistilColumnParser" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="parsing_semantics", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ], ) pipeline_description.add_step(step) # parse image semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/ImageObject", ], ) pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/TrueTarget" ], ) pipeline_description.add_step(step) # remote sensing pretrained step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.remote_sensing.remote_sensing_pretrained.RemoteSensingPretrained" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_output("produce") step.add_hyperparameter(name="pool_features", argument_type=ArgumentType.VALUE, data=False) pipeline_description.add_step(step) # mlp classifier step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.remote_sensing.mlp.MlpClassifier")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.6.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_output("produce") step.add_hyperparameter(name="weights_filepath", argument_type=ArgumentType.VALUE, data=weights_filepath) step.add_hyperparameter(name="explain_all_classes", argument_type=ArgumentType.VALUE, data=explain_all_classes) step.add_hyperparameter(name="all_confidences", argument_type=ArgumentType.VALUE, data=all_confidences) step.add_hyperparameter(name="epochs", argument_type=ArgumentType.VALUE, data=epochs) pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.7.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter(name="use_columns", argument_type=ArgumentType.VALUE, data=[0, 1]) pipeline_description.add_step(step) pipeline_description.add_output(name="output predictions", data_reference="steps.8.produce") self.pipeline = pipeline_description
def build_demo_pipeline(): # Creating pipeline pipeline = Pipeline(context=Context.TESTING) pipeline.add_input(name='inputs') # Step 0: DFS step_0 = PrimitiveStep(primitive_description=Featuretools.metadata.query()) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline.add_step(step_0) # Step 1: SKlearnImputer step_1 = PrimitiveStep( primitive_description=SKlearnImputer.metadata.query()) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline.add_step(step_1) # Step 2: SKlearnRFC step_2 = PrimitiveStep(primitive_description=SKlearnRFC.metadata.query()) step_2.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) step_2.add_hyperparameter(name='add_index_columns', argument_type=ArgumentType.VALUE, data=True) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_output('produce') pipeline.add_step(step_2) # Step 3: ConstructPredictions step_3 = PrimitiveStep( primitive_description=DataFrameCommon.metadata.query()) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_output('produce') pipeline.add_step(step_3) # Final Output pipeline.add_output(name='output predictions', data_reference='steps.3.produce') return pipeline
def set_data(pipeline_description): global IP_STEP, OP_STEP, READER_STEP # denormalize denorm_step_idx = 0 step = PrimitiveStep( primitive_description=d3m.primitives.data_transformation.denormalize. Common.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') pipeline_description.add_step(step) # dataset_to_dataframe dataset_to_dataframe_step_idx = len(pipeline_description.steps) step = PrimitiveStep( primitive_description=d3m.primitives.data_transformation. dataset_to_dataframe.Common.metadata.query()) step.add_argument( name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.{}.produce'.format(denorm_step_idx)) step.add_output('produce') pipeline_description.add_step(step) # extract targets extract_step_idx = len(pipeline_description.steps) extract_targets = PrimitiveStep( d3m.primitives.data_transformation.extract_columns_by_semantic_types. Common.metadata.query()) extract_targets.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.{}.produce'.format( dataset_to_dataframe_step_idx)) extract_targets.add_hyperparameter( name='semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) extract_targets.add_output('produce') pipeline_description.add_step(extract_targets) # replace semantic types # Need to be used for CIFAR-10 replace_step_idx = len(pipeline_description.steps) replace_semantic = PrimitiveStep( d3m.primitives.data_transformation.replace_semantic_types.Common. metadata.query()) replace_semantic.add_argument( name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=f'steps.{extract_step_idx}.produce') replace_semantic.add_hyperparameter( name='to_semantic_types', argument_type=ArgumentType.VALUE, data=[ 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget', 'https://metadata.datadrivendiscovery.org/types/TrueTarget' ]) replace_semantic.add_hyperparameter( name='from_semantic_types', argument_type=ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/TrueTarget']) replace_semantic.add_output('produce') pipeline_description.add_step(replace_semantic) # image reader reader_step_idx = len(pipeline_description.steps) reader = PrimitiveStep( primitive_description=d3m.primitives.data_preprocessing.image_reader. Common.metadata.query()) reader.add_hyperparameter(name='return_result', argument_type=ArgumentType.VALUE, data='new') pipeline_description.add_step(reader) IP_STEP, OP_STEP, READER_STEP = dataset_to_dataframe_step_idx, replace_step_idx, reader_step_idx
def _to_pipeline(self, binding, sequence) -> Pipeline: """ Args: binding: Returns: """ # define an empty pipeline with the general dataset input primitive # generate empty pipeline with i/o/s/u =[] # pprint(binding) # print(sequence) # print("[INFO] list:",list(map(str, metadata_base.Context))) pipeline = Pipeline(name=self.template['name'] + ":" + str(id(binding)), description=self.description_info) templateinput = pipeline.add_input("input dataset") # save temporary output for another step to take as input outputs = {} outputs["template_input"] = templateinput # iterate through steps in the given binding and add each step to the # pipeline. The IO and hyperparameter are also handled here. for i, step in enumerate(sequence): self.step_number[step] = i # primitive_step = PrimitiveStep(self.primitive[binding[step][ # "primitive"]].metadata.query()) primitive_name = binding[step]["primitive"] if primitive_name in self.primitive: primitive_desc = dict( d3m_index.get_primitive(primitive_name).metadata.query()) primitive_step = PrimitiveStep(primitive_desc) # D3M version v2019.1.21 removes primitive description. Need another way # to pass "runtime" if "runtime" in binding[step]: # primitive_desc["runtime"] = binding[step]["runtime"] primitive_step.__dict__['_dsbox_runtime'] = binding[step][ "runtime"] # print('==== ', primitive_step._dsbox_runtime) else: raise exceptions.InvalidArgumentValueError( "Error, can't find the primitive : ", primitive_name) if binding[step]["hyperparameters"] != {}: hyper = binding[step]["hyperparameters"] for hyperName in hyper: primitive_step.add_hyperparameter( # argument_type should be fixed type not the type of the data!! name=hyperName, argument_type=self.argmentsmapper["value"], data=hyper[hyperName]) if self.need_add_reference and primitive_name == 'd3m.primitives.data_transformation.construct_predictions.DataFrameCommon': primitive_step.add_argument( "reference", metadata_base.ArgumentType.CONTAINER, "steps.0.produce") # first we need to extract the types of the primtive's input and # the generators's output type. # then we need to compare those and in case we have different # types, add the intermediate type caster in the pipeline # print(outputs) step_parameters = binding[step]["inputs"] step_arguments = [] for parameter in step_parameters: if type(parameter) is list: argument = [outputs[subparam] for subparam in parameter] else: argument = outputs[parameter] step_arguments.append(argument) self.bind_primitive_IO(primitive_step, step_arguments) pipeline.add_step(primitive_step) # pre v2019.1.21 # outputs[step] = primitive_step.add_output("produce") primitive_step.add_output("produce") outputs[step] = f'steps.{primitive_step.index}.produce' # END FOR # Add final output as the prediction of target attribute general_output = outputs[self.template["steps"][-1]["name"]] # print(general_output) pipeline.add_output(general_output, "predictions of input dataset") return pipeline
def create_pipeline(metric: str, cat_mode: str = 'one_hot', max_one_hot: int = 16, scale: bool = False) -> Pipeline: # create the basic pipeline image_pipeline = Pipeline(context=PipelineContext.TESTING) image_pipeline.add_input(name='inputs') # step 0 - denormalize dataframe (N.B.: injects semantic type information) step = PrimitiveStep(primitive_description=DenormalizePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') image_pipeline.add_step(step) # step 1 - extract dataframe from dataset step = PrimitiveStep(primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step.add_output('produce') image_pipeline.add_step(step) # step 2 - read images step = PrimitiveStep(primitive_description=DataFrameImageReaderPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_output('produce') step.add_hyperparameter('use_columns', ArgumentType.VALUE,[0,1]) step.add_hyperparameter('return_result', ArgumentType.VALUE, 'replace') image_pipeline.add_step(step) # step 3 - parse columns step = PrimitiveStep(primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step.add_output('produce') semantic_types = ('http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) image_pipeline.add_step(step) # step 4 - featurize images step = PrimitiveStep(primitive_description=ImageTransferPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step.add_output('produce') image_pipeline.add_step(step) # step 5 - extract targets step = PrimitiveStep(primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step.add_output('produce') target_types = ('https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/TrueTarget') step.add_hyperparameter('semantic_types', ArgumentType.VALUE, target_types) image_pipeline.add_step(step) # step 6 - Generates a random forest ensemble model. step = PrimitiveStep(primitive_description=EnsembleForestPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') step.add_output('produce') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) image_pipeline.add_step(step) # step 7 - convert predictions to expected format step = PrimitiveStep(primitive_description=ConstructPredictionsPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step.add_output('produce') step.add_hyperparameter('use_columns', ArgumentType.VALUE, [0, 1]) image_pipeline.add_step(step) # Adding output step to the pipeline image_pipeline.add_output(name='output', data_reference='steps.7.produce') return image_pipeline
def __init__( self, annotations: List[int] = None, gem_p: int = 1, dataset: str = "LL1_bigearth_landuse_detection", ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") if annotations is None: pipeline_description.add_input(name="annotations") # Denormalize step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Satellite Image Loader step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.satellite_image_loader.DistilSatelliteImageLoader" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter( name="return_result", argument_type=ArgumentType.VALUE, data="replace" ) step.add_output("produce") pipeline_description.add_step(step) # Distil column parser step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.DistilColumnParser" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="parsing_semantics", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ], ) pipeline_description.add_step(step) # parse image semantic types # TODO test without index step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/ImageObject", "https://metadata.datadrivendiscovery.org/types/PrimaryMultiKey", ], ) pipeline_description.add_step(step) # remote sensing pretrained step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.remote_sensing.remote_sensing_pretrained.RemoteSensingPretrained" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on annotations DS if annotations is None: step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.1", ) step.add_output("produce") pipeline_description.add_step(step) # image retrieval primitive step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.similarity_modeling.iterative_labeling.ImageRetrieval" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) if annotations is not None: step.add_argument( name="outputs", argument_type=ArgumentType.VALUE, data=annotations, ) else: step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.6.produce", ) step.add_output("produce") step.add_hyperparameter( name="gem_p", argument_type=ArgumentType.VALUE, data=gem_p ) pipeline_description.add_step(step) if annotations is not None: pipeline_description.add_output( name="output ranking", data_reference="steps.6.produce" ) else: pipeline_description.add_output( name="output ranking", data_reference="steps.7.produce" ) self.pipeline = pipeline_description self.dataset = dataset self.annotations = annotations
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0" ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep( primitive=index.get_primitive("d3m.primitives.schema_discovery.profiler.Common") ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", ], ) pipeline_description.add_step(step) # imputer step = PrimitiveStep( primitive=index.get_primitive("d3m.primitives.data_cleaning.imputer.SKlearn") ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="return_result", argument_type=ArgumentType.VALUE, data="replace" ) step.add_hyperparameter( name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True ) pipeline_description.add_step(step) # TSNE step = PrimitiveStep( primitive=index.get_primitive( 'd3m.primitives.dimensionality_reduction.t_distributed_stochastic_neighbor_embedding.Tsne' ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name='n_components', argument_type=ArgumentType.VALUE, data=3 ) step.add_output("produce") pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["https://metadata.datadrivendiscovery.org/types/Attribute"], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # R Forest step = PrimitiveStep( primitive=index.get_primitive( 'd3m.primitives.learner.random_forest.DistilEnsembleForest' ) ) step.add_argument( name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce' ) step.add_argument( name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce' ) step.add_output('produce') pipeline_description.add_step(step) # construct predictions step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.7.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output( name="output predictions", data_reference="steps.8.produce" ) self.pipeline = pipeline_description
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Simon step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.column_type_profiler.Simon")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # XG Boost step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.classification.xgboost_gbtree.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter(name="add_index_columns", argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.4.produce") self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: Denormalize primitive -> put all resources in one dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.denormalize.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: dataset_to_dataframe step_1 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2 column parser -> labeled semantic types to data types step_2 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common')) step_2.add_argument(name='inputs',
def generate_only(): # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0 - Denormalize step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.denormalize.Common')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1 - Transform to dataframe step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.dataset_to_dataframe.Common')) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2 - Extract target step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common')) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce") step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE, data=["https://metadata.datadrivendiscovery.org/types/TrueTarget"]) step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 3 - Transform step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.to_numeric.DSBOX')) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_3.add_hyperparameter(name='drop_non_numeric_columns', argument_type=ArgumentType.VALUE, data=False) step_3.add_output('produce') pipeline_description.add_step(step_3) # Step 4 - Single table featurization step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization')) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Step 5 - Time series to list step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_preprocessing.time_series_to_list.DSBOX')) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Step 6 - Time series featurization step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.feature_extraction.random_projection_timeseries_featurization.DSBOX')) step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') step_6.add_hyperparameter(name='generate_metadata', argument_type=ArgumentType.VALUE, data=True) step_6.add_output('produce') pipeline_description.add_step(step_6) # Step 7 - Concat singletable features with time series features step_7 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.data_transformation.horizontal_concat.DataFrameCommon')) step_7.add_argument(name='left', argument_type=ArgumentType.CONTAINER, data_reference='steps.6.produce') step_7.add_argument(name='right', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_7.add_output('produce') pipeline_description.add_step(step_7) # Step 8 - Classification step_8 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.classification.random_forest.SKlearn')) step_8.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.7.produce') step_8.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_8.add_hyperparameter(name='add_index_columns', argument_type=ArgumentType.VALUE, data=True) step_8.add_hyperparameter(name='use_semantic_types', argument_type=ArgumentType.VALUE, data=True) step_8.add_output('produce') pipeline_description.add_step(step_8) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.8.produce') # Generate .yml file for the pipeline import featuretools_ta1 from pipeline_tests.utils import generate_pipeline dataset_name = 'LL1_50words_MIN_METADATA' dataset_path = '/featuretools_ta1/datasets/seed_datasets_current' primitive_name = 'd3m.primitives.feature_construction.deep_feature_synthesis.SingleTableFeaturization' version = featuretools_ta1.__version__ test_name = os.path.splitext(os.path.basename(__file__))[0][5:] yml, pipeline_run_file = generate_pipeline(primitive_name=primitive_name, pipeline_description=pipeline_description, dataset_name=dataset_name, test_name=test_name) # fit-score command fs_cmd = 'python3 -m d3m runtime -d /featuretools_ta1/datasets/ fit-score -p {}'.format(yml) fs_cmd += ' -r {}/{}/{}_problem/problemDoc.json'.format(dataset_path, dataset_name, dataset_name) fs_cmd += ' -i {}/{}/TRAIN/dataset_TRAIN/datasetDoc.json'.format(dataset_path, dataset_name) fs_cmd += ' -t {}/{}/TEST/dataset_TEST/datasetDoc.json'.format(dataset_path, dataset_name) fs_cmd += ' -a {}/{}/SCORE/dataset_SCORE/datasetDoc.json'.format(dataset_path, dataset_name) fs_cmd += ' -O {}'.format(pipeline_run_file) # Run pipeline to save pipeline_run file os.system(fs_cmd) # Create and return command for running from pipeline_run file: pipeline_run_cmd = 'python3 -m d3m --pipelines-path /featuretools_ta1/MIT_FeatureLabs/{}/{}/pipelines/'.format(primitive_name, version) pipeline_run_cmd += ' runtime -d /featuretools_ta1/datasets/ fit-score' pipeline_run_cmd += ' -u {}'.format(pipeline_run_file) return pipeline_run_cmd
def __init__( self, epochs: int = 5000, attention_lstm: bool = True, ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Ts formatter step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.time_series_formatter.DistilTimeSeriesFormatter" )) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step.add_output("produce") pipeline_description.add_step(step) # DS to DF on formatted ts DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument(name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0") step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ], ) pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # LSTM FCN step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.time_series_classification.convolutional_neural_net.LSTM_FCN" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter(name='epochs', argument_type=ArgumentType.VALUE, data=epochs) step.add_hyperparameter(name='attention_lstm', argument_type=ArgumentType.VALUE, data=attention_lstm) step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.6.produce") self.pipeline = pipeline_description
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Text reader step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.text_reader.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=( "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ), ) pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["https://metadata.datadrivendiscovery.org/types/Attribute"], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # Sent2Vec primitive step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.feature_extraction.nk_sent2vec.Sent2Vec")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_output("produce") pipeline_description.add_step(step) # R Forest step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.learner.random_forest.DistilEnsembleForest")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.7.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.6.produce", ) step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.8.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.9.produce") self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep # Creating pipeline pipeline_description = Pipeline(context=Context.TESTING) pipeline_description.add_input(name='inputs') # Step 2: DISTIL/NK Storc primitive step_0 = PrimitiveStep(primitive=index.get_primitive( 'd3m.primitives.time_series_classification.k_neighbors.Kanine')) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # Final Output pipeline_description.add_output(name='output predictions', data_reference='steps.0.produce') # Output to JSON with open('pipeline.json', 'w') as outfile: outfile.write(pipeline_description.to_json())
def __init__( self, binary_labels, weights_filepath: str = "scratch_dir/model_weights.pth", epochs_frozen: int = 20, epochs_unfrozen: int = 100, ): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Denormalize step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.denormalize.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Satellite Image Loader step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.satellite_image_loader.DistilSatelliteImageLoader" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") step.add_output("produce") pipeline_description.add_step(step) # Distil column parser step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.DistilColumnParser" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter( name="parsing_semantics", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", ], ) pipeline_description.add_step(step) # parse image semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_output("produce") step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/ImageObject", ], ) pipeline_description.add_step(step) # image segmentation step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.remote_sensing.convolutional_neural_net.ImageSegmentation" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.VALUE, data=binary_labels, ) step.add_output("produce") step.add_hyperparameter( name="weights_filepath", argument_type=ArgumentType.VALUE, data=weights_filepath, ) step.add_hyperparameter(name="epochs_frozen", argument_type=ArgumentType.VALUE, data=epochs_frozen) step.add_hyperparameter( name="epochs_unfrozen", argument_type=ArgumentType.VALUE, data=epochs_unfrozen, ) pipeline_description.add_step(step) pipeline_description.add_output(name="output predictions", data_reference="steps.5.produce") self.pipeline = pipeline_description
def __init__(self, algorithm: str = 'PseudoLabel'): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") step.add_hyperparameter( name="parse_semantic_types", argument_type=ArgumentType.VALUE, data=[ "http://schema.org/Boolean", "http://schema.org/Integer", "http://schema.org/Float", "https://metadata.datadrivendiscovery.org/types/FloatVector", "http://schema.org/DateTime", ], ) pipeline_description.add_step(step) # imputer step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.imputer.SKlearn")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_output("produce") step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") step.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True) pipeline_description.add_step(step) # parse attribute semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["https://metadata.datadrivendiscovery.org/types/Attribute"], ) step.add_output("produce") pipeline_description.add_step(step) # parse integer/float attribute semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=["http://schema.org/Integer", "http://schema.org/Float"], ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_output("produce") pipeline_description.add_step(step) # Tabular Semi Supervised step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.semisupervised_classification.iterative_labeling.TabularSemiSupervised" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.6.produce", ) step.add_hyperparameter(name="algorithm", argument_type=ArgumentType.VALUE, data=algorithm) step.add_output("produce") pipeline_description.add_step(step) # construct predictions step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.7.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output(name="output predictions", data_reference="steps.8.produce") self.pipeline = pipeline_description
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # DS to DF on input DS step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser on input DF step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # # imputer # step = PrimitiveStep( # primitive=index.get_primitive("d3m.primitives.data_cleaning.imputer.SKlearn") # ) # step.add_argument( # name="inputs", # argument_type=ArgumentType.CONTAINER, # data_reference="steps.2.produce", # ) # step.add_output("produce") # step.add_hyperparameter( # name="return_result", argument_type=ArgumentType.VALUE, data="replace" # ) # step.add_hyperparameter( # name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True # ) # pipeline_description.add_step(step) # Rffeatures step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.feature_selection.rffeatures.Rffeatures" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_hyperparameter( name="only_numeric_cols", argument_type=ArgumentType.VALUE, data=True ) step.add_hyperparameter( name="proportion_of_features", argument_type=ArgumentType.VALUE, data=1.0 ) step.add_output("produce") pipeline_description.add_step(step) # parse target semantic types step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", ], ) step.add_hyperparameter( name="add_index_columns", argument_type=ArgumentType.VALUE, data=True ) step.add_output("produce") pipeline_description.add_step(step) # XGBoost step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.classification.xgboost_gbtree.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_output("produce") step.add_hyperparameter( name="add_index_columns", argument_type=ArgumentType.VALUE, data=True ) pipeline_description.add_step(step) # # R Forest # step = PrimitiveStep( # primitive=index.get_primitive( # 'd3m.primitives.classification.random_forest.SKlearn' # ) # ) # step.add_argument( # name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce' # ) # step.add_argument( # name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce' # ) # step.add_output('produce') # step.add_hyperparameter( # name='add_index_columns', argument_type=ArgumentType.VALUE,data=True # ) # step.add_hyperparameter( # name='use_semantic_types', argument_type=ArgumentType.VALUE,data=True # ) # pipeline_description.add_step(step) # construct predictions step = PrimitiveStep( primitive=index.get_primitive( "d3m.primitives.data_transformation.construct_predictions.Common" ) ) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_argument( name="reference", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # Final Output pipeline_description.add_output( name="output predictions", data_reference="steps.6.produce" ) self.pipeline = pipeline_description
from d3m import index from d3m.metadata.base import ArgumentType, Context from d3m.metadata.pipeline import Pipeline, PrimitiveStep import sys # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # Step 0: dataset_to_dataframe step_0 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step_0.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step_0.add_output("produce") pipeline_description.add_step(step_0) # Step 1: Simple Profiler Column Role Annotation step_1 = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step_1.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step_1.add_output("produce") pipeline_description.add_step(step_1)
from d3m.metadata import hyperparams import copy # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest # extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe primitive_0 = index.get_primitive( 'd3m.primitives.tods.data_processing.dataset_to_dataframe') step_0 = PrimitiveStep(primitive=primitive_0) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # # Step 1: column_parser primitive_1 = index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common') step_1 = PrimitiveStep(primitive=primitive_1) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 2: extract_columns_by_semantic_types(attributes)
def create_pipeline(metric: str) -> Pipeline: previous_step = 0 input_val = 'steps.{}.produce' # create the basic pipeline qa_pipeline = Pipeline(context=PipelineContext.TESTING) qa_pipeline.add_input(name='inputs') # Denormalize so that we have a single dataframe in the dataset step = PrimitiveStep( primitive_description=DenormalizePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step.add_output('produce') qa_pipeline.add_step(step) # Extract dataframe from dataset step = PrimitiveStep( primitive_description=DatasetToDataFramePrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_output('produce') qa_pipeline.add_step(step) previous_step += 1 # Parse columns. step = PrimitiveStep( primitive_description=ColumnParserPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_output('produce') semantic_types = ( 'http://schema.org/Boolean', 'http://schema.org/Integer', 'http://schema.org/Float', 'https://metadata.datadrivendiscovery.org/types/FloatVector') step.add_hyperparameter('parse_semantic_types', ArgumentType.VALUE, semantic_types) qa_pipeline.add_step(step) previous_step += 1 parse_step = previous_step # Extract attributes step = PrimitiveStep( primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') step.add_hyperparameter( 'semantic_types', ArgumentType.VALUE, ('https://metadata.datadrivendiscovery.org/types/Attribute', )) qa_pipeline.add_step(step) previous_step += 1 attributes_step = previous_step # Extract targets step = PrimitiveStep( primitive_description=ExtractColumnsBySemanticTypesPrimitive.metadata. query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') target_types = ( 'https://metadata.datadrivendiscovery.org/types/Target', 'https://metadata.datadrivendiscovery.org/types/TrueTarget') step.add_hyperparameter('semantic_types', ArgumentType.VALUE, target_types) qa_pipeline.add_step(step) previous_step += 1 target_step = previous_step # Generates a bert pair classification model. step = PrimitiveStep( primitive_description=BertPairClassificationPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(attributes_step)) step.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(target_step)) step.add_output('produce') step.add_hyperparameter('metric', ArgumentType.VALUE, metric) step.add_hyperparameter('doc_col_0', ArgumentType.VALUE, 1) step.add_hyperparameter('doc_col_1', ArgumentType.VALUE, 3) qa_pipeline.add_step(step) previous_step += 1 # convert predictions to expected format step = PrimitiveStep( primitive_description=ConstructPredictionsPrimitive.metadata.query()) step.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(previous_step)) step.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference=input_val.format(parse_step)) step.add_output('produce') qa_pipeline.add_step(step) previous_step += 1 # Adding output step to the pipeline qa_pipeline.add_output(name='output', data_reference=input_val.format(previous_step)) return qa_pipeline
def image_regress_pipeline(resolver=None) -> Pipeline: if resolver is None: resolver = custom_resolver.BlackListResolver() # Creating Pipeline pipeline_description = Pipeline(context='PRETRAINING') pipeline_description.add_input(name='inputs') start_step = "inputs.0" # Step 1: Denormalize step_0 = PrimitiveStep(primitive_description=d3m.primitives.dsbox. Denormalize.metadata.query(), resolver=resolver) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=start_step) step_0.add_output('produce') pipeline_description.add_step(step_0) # Step 1: DatasetToDataFrame step_1 = PrimitiveStep(primitive_description=d3m.primitives.datasets. DatasetToDataFrame.metadata.query(), resolver=resolver) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') add_hyperparameters(step_1, d3m.primitives.data.DataFrameToList) step_1.add_output('produce') pipeline_description.add_step(step_1) # Step 3: Extract Target Column step_2 = PrimitiveStep(primitive_description=d3m.primitives.data. ExtractColumnsBySemanticTypes.metadata.query(), resolver=resolver) step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_2.add_hyperparameter( name='semantic_types', argument_type=ArgumentType.VALUE, data=[ "https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/SuggestedTarget" ]) step_2.add_output('produce') pipeline_description.add_step(step_2) # Step 4: Dataframe to tensor step_3 = PrimitiveStep(primitive_description=d3m.primitives.dsbox. DataFrameToTensor.metadata.query(), resolver=resolver) step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce') step_3.add_output('produce') pipeline_description.add_step(step_3) # Step 5: Vgg16 Feature Extractor step_4 = PrimitiveStep(primitive_description=d3m.primitives.dsbox. Vgg16ImageFeature.metadata.query(), resolver=resolver) step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.3.produce') step_4.add_output('produce') pipeline_description.add_step(step_4) # Step 6: Apply PCA to feature generated step_5 = PrimitiveStep(primitive_description=d3m.primitives.sklearn_wrap. SKPCA.metadata.query(), resolver=resolver) step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce') step_5.add_output('produce') pipeline_description.add_step(step_5) # Step 7: Apply Random Forest Regressor step_6 = PrimitiveStep(primitive_description=d3m.primitives.sklearn_wrap. SKRandomForestRegressor.metadata.query(), resolver=resolver) step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce') step_6.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.2.produce') step_6.add_output('produce') pipeline_description.add_step(step_6) # Output Generated pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce') last_step = len(pipeline_description.steps) - 1 attributes = pipelines.int_to_step(last_step - 1) targets = pipelines.int_to_step(last_step) return pipeline_description
from d3m.metadata.pipeline import Pipeline, PrimitiveStep from d3m.metadata import hyperparams # -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest # extract_columns_by_semantic_types(targets) -> ^ # Creating pipeline pipeline_description = Pipeline() pipeline_description.add_input(name='inputs') # Step 0: dataset_to_dataframe primitive_0 = index.get_primitive( 'd3m.primitives.tods.data_processing.dataset_to_dataframe') step_0 = PrimitiveStep(primitive=primitive_0) step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0') step_0.add_output('produce') pipeline_description.add_step(step_0) # # Step 1: column_parser primitive_1 = index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common') step_1 = PrimitiveStep(primitive=primitive_1) step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce') step_1.add_output('produce') pipeline_description.add_step(step_1) # # Step 2: Standardization
def __init__(self): pipeline_description = Pipeline() pipeline_description.add_input(name="inputs") # dataset_to_dataframe step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.dataset_to_dataframe.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="inputs.0", ) step.add_output("produce") pipeline_description.add_step(step) # Simple Profiler Column Role Annotation step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.schema_discovery.profiler.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.0.produce", ) step.add_output("produce") pipeline_description.add_step(step) # column parser -> labeled semantic types to data types step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.column_parser.Common")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.1.produce", ) step.add_output("produce") pipeline_description.add_step(step) # imputer -> imputes null values based on mean of column step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_cleaning.imputer.SKlearn")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.2.produce", ) step.add_hyperparameter(name="return_result", argument_type=ArgumentType.VALUE, data="replace") step.add_hyperparameter(name="use_semantic_types", argument_type=ArgumentType.VALUE, data=True) step.add_output("produce") pipeline_description.add_step(step) # extract feature columns step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=( "https://metadata.datadrivendiscovery.org/types/Attribute", ), ) step.add_output("produce") pipeline_description.add_step(step) # extract target columns step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.extract_columns_by_semantic_types.Common" )) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.3.produce", ) step.add_hyperparameter( name="semantic_types", argument_type=ArgumentType.VALUE, data=("https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/TrueTarget"), ) step.add_output("produce") pipeline_description.add_step(step) # Text encoder step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.data_transformation.encoder.DistilTextEncoder")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.4.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_hyperparameter( name="metric", argument_type=ArgumentType.VALUE, data='accuracy', ) step.add_output("produce") pipeline_description.add_step(step) # Random forest shap values step = PrimitiveStep(primitive=index.get_primitive( "d3m.primitives.learner.random_forest.DistilEnsembleForest")) step.add_argument( name="inputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.6.produce", ) step.add_argument( name="outputs", argument_type=ArgumentType.CONTAINER, data_reference="steps.5.produce", ) step.add_output("produce_shap_values") pipeline_description.add_step(step) # Final Output pipeline_description.add_output( name="output", data_reference="steps.7.produce_shap_values") self.pipeline = pipeline_description