def setUpClass(cls): to_register = { 'd3m.primitives.evaluation.train_score_dataset_split.Common': TrainScoreDatasetSplitPrimitive, } # To hide any logging or stdout output. with utils.silence(): for python_path, primitive in to_register.items(): index.register_primitive(python_path, primitive)
def __getattr__(self, item: str) -> typing.Any: """ This method is called when there is no real attribute with name "item" already present in this module object (so not an existing method, an already loaded primitive, or already imported submodule). If it looks like "name" is pointing to a primitive, we load the primitive here and add it to the module object as a real attribute by calling "register_primitive". If it does not look like a primitive, we raise an exception and Python importing logic tries to import the module instead. """ # Importing here to prevent import cycle. from d3m import index item_path = self.__name__.split('.') + [item] for entry_point in entry_points(): entry_point_name = ['d3m', 'primitives'] + entry_point.name.split('.') # We assume for the last segment to be a class, so the full path has to match # for path to look like it is pointing to a primitive's class. if item_path == entry_point_name: primitive = None try: logger.debug("Loading entry point '%(entry_point_name)s'.", {'entry_point_name': entry_point.name}) entry_point.require() primitive = entry_point.resolve() except pkg_resources.ResolutionError as error: logger.warning("While loading primitive '%(entry_point_name)s', an error has been detected: %(error)s", {'entry_point_name': entry_point.name, 'error': error}) logger.warning("Attempting to load primitive '%(entry_point_name)s' without checking requirements.", {'entry_point_name': entry_point.name}) # There was an error, so we try again without checking requirements. if primitive is None: primitive = entry_point.resolve() try: # We set the sentinel so that when during registration attribute with name "name" # is accessed this method is not called again (because a real attribute already # exists) but the sentinel is returned. setattr(self, item, index._SENTINEL) index.register_primitive('.'.join(entry_point_name), primitive) except Exception: if getattr(self, item) is index._SENTINEL: delattr(self, item) raise # Calling "register_primitive" should set a real attribute on this module object. assert getattr(self, item) is primitive return primitive raise AttributeError('module \'{name}\' has no attribute \'{item}\''.format(name=self.__name__, item=item))
def test_validate(self): # To hide any logging or stdout output. with utils.silence(): index.register_primitive( 'd3m.primitives.data_transformation.column_parser.Common', ColumnParserPrimitive) primitive = index.get_primitive_by_id( 'd510cb7a-1782-4f51-b44c-58f0236e47c7') primitive_description = primitive.metadata.to_json_structure() pipeline_run.validate_primitive(primitive_description)
def setUpClass(cls): # To hide any logging or stdout output. with d3m_utils.silence(): index.register_primitive('d3m.primitives.regression.monomial.Test', MonomialPrimitive) index.register_primitive('d3m.primitives.data_generation.random.Test', RandomPrimitive) index.register_primitive('d3m.primitives.operator.sum.Test', SumPrimitive) index.register_primitive('d3m.primitives.operator.increment.Test', IncrementPrimitive)
def test_register(self): FooBarPrimitive = create_primitive( 'e2fc24f8-5b32-4759-be5b-8126a42522a3', 'd3m.primitives.foo.bar.FooBarPrimitive') # To hide any logging or stdout output. with self.assertLogs(level=logging.DEBUG) as cm: with utils.redirect_to_logging(): index.register_primitive( 'd3m.primitives.foo.bar.FooBarPrimitive', FooBarPrimitive) # Just to log something, otherwise "assertLogs" can fail. logging.getLogger().debug("Start test.") index.get_primitive('d3m.primitives.foo.bar.FooBarPrimitive')
def setUpClass(cls): index.register_primitive( MetafeatureExtractor.metadata.query()['python_path'], MetafeatureExtractor) cls.classification_dataset_util = utils.D3MDatasetUtil( config.DATASETS_DIR, '185_baseball_MIN_METADATA') cls.classification_pipeline_paths = [] for filename in CLASSIFICATION_PIPELINE_FILENAMES: cls.classification_pipeline_paths.append( os.path.join(PIPELINES_DIR, filename)) cls.regression_dataset_util = utils.D3MDatasetUtil( config.DATASETS_DIR, '196_autoMpg_MIN_METADATA') cls.regression_pipeline_paths = [] for filename in REGRESSION_PIPELINE_FILENAMES: cls.regression_pipeline_paths.append( os.path.join(PIPELINES_DIR, filename))
def generate_profiler_pipeline(task_type, random_id=False): if random_id: pipeline_id = str(uuid.uuid4()) elif task_type == 'classification': pipeline_id = 'f4ebb9c9-ef15-491d-9a39-595c20f3e78e' elif task_type == 'regression': pipeline_id = '9f5f6042-6582-494a-bc4b-92c7797a6614' else: raise ValueError('Invalid task_type: {}'.format(task_type)) d3m_index.register_primitive( SemanticProfilerPrimitive.metadata.query()['python_path'], SemanticProfilerPrimitive ) pipeline = pipeline_module.Pipeline(pipeline_id) pipeline.add_input(name='inputs') step_counter = 0 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.0' ) step.add_output('produce') pipeline.add_step(step) raw_data_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.schema_discovery.profiler.BYU' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_data_data_reference ) step.add_output('produce') pipeline.add_step(step) profiled_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=profiled_data_reference ) step.add_output('produce') pipeline.add_step(step) parsed_data_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common' ) ) step.add_hyperparameter( name='semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'] ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=parsed_data_data_reference ) step.add_output('produce') pipeline.add_step(step) raw_attributes_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common' ) ) step.add_hyperparameter( name='semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'] ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=parsed_data_data_reference ) step.add_output('produce') pipeline.add_step(step) true_targets_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_cleaning.imputer.SKlearn' ) ) step.add_hyperparameter( name='use_semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=True ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_attributes_data_reference ) step.add_output('produce') pipeline.add_step(step) imputed_attributes_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 if task_type == 'regression': step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.regression.random_forest.SKlearn' ) ) step.add_hyperparameter( name='use_semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=True ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=imputed_attributes_data_reference ) step.add_argument( name='outputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=true_targets_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 elif task_type == 'classification': step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.classification.random_forest.SKlearn' ) ) step.add_hyperparameter( name='use_semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=True ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=imputed_attributes_data_reference ) step.add_argument( name='outputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=true_targets_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 else: raise ValueError('Invalid task_type: {}'.format(task_type)) step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.{}.produce'.format(step_counter - 1) ) step.add_argument( name='reference', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_data_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 pipeline.add_output( name='predictions', data_reference='steps.{}.produce'.format(step_counter - 1) ) return pipeline
def generate_imputer_pipeline(task_type, random_id=False): if random_id: pipeline_id = str(uuid.uuid4()) elif task_type == 'classification': pipeline_id = '168d3fbf-a3fe-456a-93a3-d2720ef8cb42' elif task_type == 'regression': pipeline_id = 'faeb3eb9-648f-4059-b067-791ebff47bc4' else: raise ValueError('Invalid task_type: {}'.format(task_type)) d3m_index.register_primitive( RandomSamplingImputer.metadata.query()['python_path'], RandomSamplingImputer ) pipeline = pipeline_module.Pipeline(pipeline_id) pipeline.add_input(name='inputs') step_counter = 0 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.0' ) step.add_output('produce') pipeline.add_step(step) raw_data_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.schema_discovery.profiler.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_data_data_reference ) step.add_output('produce') pipeline.add_step(step) profiled_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=profiled_data_reference ) step.add_output('produce') pipeline.add_step(step) parsed_data_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common' ) ) step.add_hyperparameter( name='semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'] ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=parsed_data_data_reference ) step.add_output('produce') pipeline.add_step(step) raw_attributes_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common' ) ) step.add_hyperparameter( name='semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'] ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=parsed_data_data_reference ) step.add_output('produce') pipeline.add_step(step) true_targets_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_preprocessing.random_sampling_imputer.BYU' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_attributes_data_reference ) step.add_output('produce') pipeline.add_step(step) imputed_attributes_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 if task_type == 'regression': step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.regression.random_forest.SKlearn' ) ) step.add_hyperparameter( name='use_semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=True ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=imputed_attributes_data_reference ) step.add_argument( name='outputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=true_targets_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 elif task_type == 'classification': step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.classification.random_forest.SKlearn' ) ) step.add_hyperparameter( name='use_semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=True ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=imputed_attributes_data_reference ) step.add_argument( name='outputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=true_targets_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 else: raise ValueError('Invalid task_type: {}'.format(task_type)) step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.{}.produce'.format(step_counter - 1) ) step.add_argument( name='reference', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_data_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 pipeline.add_output( name='predictions', data_reference='steps.{}.produce'.format(step_counter - 1) ) return pipeline
def generate_metafeature_pipeline(task_type, random_id=False): if random_id: pipeline_id = str(uuid.uuid4()) elif task_type == 'classification': pipeline_id = 'baa68a80-3a7d-472d-8d4f-54918cc1bd8f' elif task_type == 'regression': pipeline_id = '28e413f9-6085-4e34-b2c2-a5182a322a4b' else: raise ValueError('Invalid task_type: {}'.format(task_type)) d3m_index.register_primitive( MetafeatureExtractor.metadata.query()['python_path'], MetafeatureExtractor ) pipeline = pipeline_module.Pipeline(pipeline_id) pipeline.add_input(name='inputs') step_counter = 0 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.dataset_to_dataframe.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='inputs.0' ) step.add_output('produce') pipeline.add_step(step) raw_data_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.schema_discovery.profiler.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_data_data_reference ) step.add_output('produce') pipeline.add_step(step) profiled_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.column_parser.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=profiled_data_reference ) step.add_output('produce') pipeline.add_step(step) parsed_data_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.metalearning.metafeature_extractor.BYU' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=parsed_data_data_reference ) step.add_output('produce') pipeline.add_step(step) imputed_data_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common' ) ) step.add_hyperparameter( name='semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/Attribute'] ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=parsed_data_data_reference ) step.add_output('produce') pipeline.add_step(step) raw_attributes_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.extract_columns_by_semantic_types.Common' ) ) step.add_hyperparameter( name='semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'] ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=parsed_data_data_reference ) step.add_output('produce') pipeline.add_step(step) true_targets_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_cleaning.imputer.SKlearn' ) ) step.add_hyperparameter( name='use_semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=True ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_attributes_data_reference ) step.add_output('produce') pipeline.add_step(step) imputed_attributes_data_reference = 'steps.{}.produce'.format(step_counter) step_counter += 1 if task_type == 'regression': step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.regression.random_forest.SKlearn' ) ) step.add_hyperparameter( name='use_semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=True ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=imputed_attributes_data_reference ) step.add_argument( name='outputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=true_targets_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 elif task_type == 'classification': step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.classification.random_forest.SKlearn' ) ) step.add_hyperparameter( name='use_semantic_types', argument_type=metadata_base.ArgumentType.VALUE, data=True ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=imputed_attributes_data_reference ) step.add_argument( name='outputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=true_targets_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 else: raise ValueError('Invalid task_type: {}'.format(task_type)) step = pipeline_module.PrimitiveStep( primitive=d3m_index.get_primitive( 'd3m.primitives.data_transformation.construct_predictions.Common' ) ) step.add_argument( name='inputs', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference='steps.{}.produce'.format(step_counter - 1) ) step.add_argument( name='reference', argument_type=metadata_base.ArgumentType.CONTAINER, data_reference=raw_data_data_reference ) step.add_output('produce') pipeline.add_step(step) step_counter += 1 pipeline.add_output( name='predictions', data_reference='steps.{}.produce'.format(step_counter - 1) ) return pipeline