Esempio n. 1
0
    def determine_filters(name='', version=None, strict=True, **kwargs):
        '''
        stateless method to determine which filters to apply when looking for
        existing persistable

        Returns: database class, filter dictionary

        :param registered_name: Class name registered in SimpleML
        :param strict: whether to assume same class and name == same persistable,
        or, load the data and compare the hash
        '''
        if version is not None:
            filters = {'name': name, 'version': version}
        # Datasets are special because we cannot assert the data is the same until we load it
        elif strict:
            registered_name = kwargs.get('registered_name')
            new_dataset = SIMPLEML_REGISTRY.get(registered_name)(name=name,
                                                                 **kwargs)
            filters = {
                'name': name,
                'registered_name': registered_name,
                'hash_': new_dataset._hash()
            }

        else:
            filters = {
                'name': name,
                'registered_name': kwargs.get('registered_name')
            }

        return BaseRawDataset, filters
Esempio n. 2
0
 def retrieve_from_registry(registered_name):
     '''
     stateless method to query registry for class definitions. handles errors
     '''
     cls = SIMPLEML_REGISTRY.get(registered_name)
     if cls is None:
         raise TrainingError(
             'Referenced class unregistered: {}'.format(registered_name))
     return cls
Esempio n. 3
0
    def create_new(registered_name, **kwargs):
        '''
        Stateless method to create a new persistable with the desired parameters
        kwargs are passed directly to persistable

        :param registered_name: Class name registered in SimpleML
        '''
        new_dataset = SIMPLEML_REGISTRY.get(registered_name)(**kwargs)
        new_dataset.build_dataframe()
        new_dataset.save()

        return new_dataset
Esempio n. 4
0
    def test_register_on_import(self):
        def import_new_class():
            class BLAHBLAHTESTCLASS(with_metaclass(MetaRegistry, object)):
                __abstract__ = True

            return BLAHBLAHTESTCLASS

        class_name = 'BLAHBLAHTESTCLASS'
        self.assertNotIn(class_name, SIMPLEML_REGISTRY.registry)

        # Register
        fake_class = import_new_class()

        # Test
        self.assertIn(class_name, SIMPLEML_REGISTRY.registry)
        self.assertEqual(fake_class, SIMPLEML_REGISTRY.get(class_name))
Esempio n. 5
0
    def determine_filters(cls, name='', version=None, strict=True, **kwargs):
        '''
        stateless method to determine which filters to apply when looking for
        existing persistable

        Returns: database class, filter dictionary

        :param registered_name: Class name registered in SimpleML
        :param strict: whether to assume same class and name = same persistable,
        or, load the data and compare the hash
        '''

        if version is not None:
            filters = {'name': name, 'version': version}

        else:
            registered_name = kwargs.pop('registered_name')
            # Check if dependency object was passed
            dataset_pipeline = kwargs.pop('dataset_pipeline', None)

            if dataset_pipeline is None:
                # Use dependency reference to retrieve object
                dataset_pipeline = cls.retrieve_pipeline(
                    **kwargs.pop('dataset_pipeline_kwargs', {}))

            if strict:
                # Build dummy object to retrieve hash to look for
                new_dataset = SIMPLEML_REGISTRY.get(registered_name)(name=name,
                                                                     **kwargs)
                new_dataset.add_pipeline(dataset_pipeline)
                new_dataset.build_dataframe()

                filters = {
                    'name': name,
                    'registered_name': registered_name,
                    'hash_': new_dataset._hash()
                }

            else:
                # Assume combo of name, class, and pipeline will be unique
                filters = {
                    'name': name,
                    'registered_name': registered_name,
                    'pipeline_id': dataset_pipeline.id
                }

        return BaseProcessedDataset, filters
Esempio n. 6
0
    def determine_filters(cls,
                          name=None,
                          model_id=None,
                          strict=False,
                          **kwargs):
        '''
        stateless method to determine which filters to apply when looking for
        existing persistable

        Returns: database class, filter dictionary

        :param registered_name: Class name registered in SimpleML
        :param strict: whether to fit objects first before assuming they are identical
        In theory if all inputs and classes are the same, the outputs should deterministically
        be the same as well (up to random iter). So, you dont need to fit objects
        to be sure they are the same
        '''
        if name is not None and model_id is not None:
            # Can't use default name because metrics are hard coded to reflect dataset split + class
            filters = {
                'name': name,
                'model_id': model_id,
            }

        else:
            # Check if dependency object was passed
            model = kwargs.pop('model', None)
            if model is None:
                # Use dependency reference to retrieve object
                model = cls.retrieve_model(**kwargs.pop('model_kwargs', {}))

            # Build dummy object to retrieve hash to look for
            registered_name = kwargs.pop('registered_name')
            new_metric = SIMPLEML_REGISTRY.get(registered_name)(name=name,
                                                                **kwargs)
            new_metric.add_model(model)
            if strict:
                new_metric.score()

            filters = {
                'name': new_metric.name,
                'registered_name': registered_name,
                'hash_': new_metric._hash()
            }

        return BaseMetric, filters
Esempio n. 7
0
    def create_new(cls, registered_name, model=None, **kwargs):
        '''
        Stateless method to create a new persistable with the desired parameters
        kwargs are passed directly to persistable

        :param registered_name: Class name registered in SimpleML
        :param model: model class
        '''
        if model is None:
            # Use dependency reference to retrieve object
            model = cls.retrieve_model(**kwargs.pop('model_kwargs', {}))

        new_metric = SIMPLEML_REGISTRY.get(registered_name)(**kwargs)
        new_metric.add_model(model)
        new_metric.score()
        new_metric.save()

        return new_metric
Esempio n. 8
0
    def create_new(cls, registered_name, dataset=None, **kwargs):
        '''
        Stateless method to create a new persistable with the desired parameters
        kwargs are passed directly to persistable

        :param registered_name: Class name registered in SimpleML
        :param dataset: dataset object
        '''
        if dataset is None:
            # Use dependency reference to retrieve object
            dataset = cls.retrieve_dataset(**kwargs.pop('dataset_kwargs', {}))

        new_pipeline = SIMPLEML_REGISTRY.get(registered_name)(**kwargs)
        new_pipeline.add_dataset(dataset)
        new_pipeline.fit()
        new_pipeline.save()

        return new_pipeline
Esempio n. 9
0
    def create_new(cls, registered_name, pipeline=None, **kwargs):
        '''
        Stateless method to create a new persistable with the desired parameters
        kwargs are passed directly to persistable

        :param registered_name: Class name registered in SimpleML
        :param pipeline: pipeline object
        '''
        if pipeline is None:
            # Use dependency reference to retrieve object
            pipeline = cls.retrieve_pipeline(
                **kwargs.pop('pipeline_kwargs', {}))

        new_model = SIMPLEML_REGISTRY.get(registered_name)(**kwargs)
        new_model.add_pipeline(pipeline)
        new_model.fit()
        new_model.save()

        return new_model
Esempio n. 10
0
    def determine_filters(cls, name='', version=None, strict=False, **kwargs):
        '''
        stateless method to determine which filters to apply when looking for
        existing persistable

        Returns: database class, filter dictionary

        :param registered_name: Class name registered in SimpleML
        :param strict: whether to fit objects first before assuming they are identical
        In theory if all inputs and classes are the same, the outputs should deterministically
        be the same as well (up to random iter). So, you dont need to fit objects
        to be sure they are the same
        '''
        if version is not None:
            filters = {'name': name, 'version': version}

        else:
            # Check if dependency object was passed
            pipeline = kwargs.pop('pipeline', None)
            if pipeline is None:
                # Use dependency reference to retrieve object
                pipeline = cls.retrieve_pipeline(
                    **kwargs.pop('pipeline_kwargs', {}))

            # Build dummy object to retrieve hash to look for
            registered_name = kwargs.pop('registered_name')
            new_model = SIMPLEML_REGISTRY.get(registered_name)(name=name,
                                                               **kwargs)
            new_model.add_pipeline(pipeline)
            if strict:
                new_model.fit()

            filters = {
                'name': name,
                'registered_name': registered_name,
                'hash_': new_model._hash()
            }

        return BaseModel, filters
Esempio n. 11
0
 def _load_class(self):
     '''
     Wrapper function to call global registry of all imported class names
     '''
     return SIMPLEML_REGISTRY.get(self.registered_name)