Example #1
0
class TestTypeSpecification(unittest.TestCase):

    def test_valid(self):
        self.type_specification = TypeSpecification('test_dir', 'test_one', ['test_one', 'test_two'])
        self.type_specification.validate()

    def test_invalid(self):
        self.type_specification = TypeSpecification('test_dir', 'test_invalid', ['test_one', 'test_two'])

        with self.assertRaises(AssertionError):
            self.type_specification.validate()
Example #2
0
    def __init__(self,
                 data_source: str,
                 column_name: str,
                 training_categories: List[str] = None,
                 eval_categories: List[str] = None,
                 verbosity: int = 0):
        """ Splits data already set on the ml model, using either the training data or evaluation data as source.
        The respective data is split using categories from a given column

        :param data_source:          Data used to split.
        """
        super().__init__(data_source, None)
        self.verbosity = verbosity
        self.data_source = TypeSpecification(
            name='data_source',
            value=data_source,
            valid_types=[self.TRAINING_DATA, self.EVALUATION_DATA])

        self.column_name = DataTypeSpecification(name='column_name',
                                                 value=column_name,
                                                 data_type=str)
        self.training_categories = NullSpecification('training_categories')
        if training_categories is not None:
            self.training_categories = DataTypeSpecification(
                'training_categories', training_categories, list)

        self.eval_categories = NullSpecification('eval_categories')
        if eval_categories is not None:
            self.eval_categories = DataTypeSpecification(
                'eval_categories', eval_categories, list)
Example #3
0
    def __init__(self,
                 train_fn: str = None,
                 train_kwargs: dict = None,
                 evaluation_fn: str = None,
                 evaluation_kwargs: dict = None,
                 prediction_fn: str = None,
                 prediction_kwargs: dict = None):
        super().__init__()
        self.fn_holder = InputFunctionHolder

        self.build_train = False
        if train_fn is not None:
            self.build_train = True
            self.train_fn_name = TypeSpecification('test_dir function',
                                                   train_fn,
                                                   self.VALID_FN_NAMES)
            # todo: do not print training kwargs, when saving description, some of them are objects,
            #  for example x and y are dataframes.
            self.train_kwargs = train_kwargs
            self.train_kwargs_descr = PrefixedDictSpecification(
                'train_kwargs', 'train', train_kwargs)

        self.build_eval = False
        if evaluation_fn is not None:
            self.build_eval = True
            self.evaluation_fn_name = TypeSpecification(
                'evaluation function', evaluation_fn, self.VALID_FN_NAMES)
            # todo: idem.
            self.evaluation_kwargs = evaluation_kwargs
            self.evaluation_kwargs_descr = PrefixedDictSpecification(
                'evaluation_kwargs', 'eval', evaluation_kwargs)

        self.build_predict = False
        if prediction_fn is not None:
            self.build_predict = True
            self.prediction_fn_name = TypeSpecification(
                'prediction function', prediction_fn, self.VALID_FN_NAMES)
            # todo: idem.
            self.prediction_kwargs = prediction_kwargs
            self.prediction_kwargs_descr = PrefixedDictSpecification(
                'prediction_kwargs', 'pred', prediction_kwargs)
Example #4
0
 def __init__(self, data_source: str = None, random_seed: int = None):
     super().__init__()
     self.data_source = NullSpecification('data_source')
     if data_source is not None:
         self.data_source = TypeSpecification(
             name='data_source',
             value=data_source,
             valid_types=[self.TRAINING_DATA, self.EVALUATION_DATA])
     self.randomize = DataTypeSpecification('splitter_randomize', False,
                                            bool)
     self.seed = NullSpecification('splitter_seed')
     if random_seed is not None:
         self.randomize = DataTypeSpecification('splitter_randomize', True,
                                                bool)
         self.seed = DataTypeSpecification('splitter_seed', random_seed,
                                           int)
Example #5
0
    def test_invalid_estimator_type(self):
        invalid_estimator_builder = EstimatorBuilder(EstimatorStrategy.LINEAR_REGRESSOR)
        invalid_estimator_builder.estimator_type = TypeSpecification(name=EstimatorBuilder.ESTIMATOR,
                                                                     value='invalid',
                                                                     valid_types=EstimatorStrategy.ALL_STRATEGIES)

        valid_estimator_builder = EstimatorBuilder(EstimatorStrategy.LINEAR_REGRESSOR)

        with self.assertRaises(AssertionError):
            invalid_estimator_builder.validate()

        with self.assertRaises(AssertionError):
            valid_estimator_builder.set_estimator('invalid')
            valid_estimator_builder.validate()

        with self.assertRaises(AssertionError):
            builder = EstimatorBuilder('invalid')
            builder.validate()
Example #6
0
    def __init__(self,
                 optimizer_type: str,
                 learning_rate: float,
                 gradient_clipping: Optional[float] = None,
                 kwargs: dict = None):
        super().__init__()
        self.optimizer_type = TypeSpecification(
            'optimizer_type', optimizer_type, OptimizerStrategy.ALL_STRATEGIES)
        self.learning_rate = DataTypeSpecification('optimizer_learning_rate',
                                                   learning_rate, float)

        self.gradient_clipping = NullSpecification('gradient_clipping')
        if gradient_clipping is not None:
            self.gradient_clipping = DataTypeSpecification(
                'gradient_clipping', gradient_clipping, float)

        self.kwargs = NullSpecification('kwargs')
        if kwargs is not None:
            self.kwargs = DataTypeSpecification('kwargs', kwargs, dict)
Example #7
0
 def set_estimator(self, estimator_type):
     self.estimator_type = TypeSpecification(
         'estimator_type', estimator_type, EstimatorStrategy.ALL_STRATEGIES)
Example #8
0
    def test_invalid(self):
        self.type_specification = TypeSpecification('test_dir', 'test_invalid', ['test_one', 'test_two'])

        with self.assertRaises(AssertionError):
            self.type_specification.validate()
Example #9
0
 def test_valid(self):
     self.type_specification = TypeSpecification('test_dir', 'test_one', ['test_one', 'test_two'])
     self.type_specification.validate()