class TestTypeSpecification(unittest.TestCase): def test_valid(self): self.type_specification = TypeSpecification('test_dir', 'test_one', ['test_one', 'test_two']) self.type_specification.validate() def test_invalid(self): self.type_specification = TypeSpecification('test_dir', 'test_invalid', ['test_one', 'test_two']) with self.assertRaises(AssertionError): self.type_specification.validate()
def __init__(self, data_source: str, column_name: str, training_categories: List[str] = None, eval_categories: List[str] = None, verbosity: int = 0): """ Splits data already set on the ml model, using either the training data or evaluation data as source. The respective data is split using categories from a given column :param data_source: Data used to split. """ super().__init__(data_source, None) self.verbosity = verbosity self.data_source = TypeSpecification( name='data_source', value=data_source, valid_types=[self.TRAINING_DATA, self.EVALUATION_DATA]) self.column_name = DataTypeSpecification(name='column_name', value=column_name, data_type=str) self.training_categories = NullSpecification('training_categories') if training_categories is not None: self.training_categories = DataTypeSpecification( 'training_categories', training_categories, list) self.eval_categories = NullSpecification('eval_categories') if eval_categories is not None: self.eval_categories = DataTypeSpecification( 'eval_categories', eval_categories, list)
def __init__(self, train_fn: str = None, train_kwargs: dict = None, evaluation_fn: str = None, evaluation_kwargs: dict = None, prediction_fn: str = None, prediction_kwargs: dict = None): super().__init__() self.fn_holder = InputFunctionHolder self.build_train = False if train_fn is not None: self.build_train = True self.train_fn_name = TypeSpecification('test_dir function', train_fn, self.VALID_FN_NAMES) # todo: do not print training kwargs, when saving description, some of them are objects, # for example x and y are dataframes. self.train_kwargs = train_kwargs self.train_kwargs_descr = PrefixedDictSpecification( 'train_kwargs', 'train', train_kwargs) self.build_eval = False if evaluation_fn is not None: self.build_eval = True self.evaluation_fn_name = TypeSpecification( 'evaluation function', evaluation_fn, self.VALID_FN_NAMES) # todo: idem. self.evaluation_kwargs = evaluation_kwargs self.evaluation_kwargs_descr = PrefixedDictSpecification( 'evaluation_kwargs', 'eval', evaluation_kwargs) self.build_predict = False if prediction_fn is not None: self.build_predict = True self.prediction_fn_name = TypeSpecification( 'prediction function', prediction_fn, self.VALID_FN_NAMES) # todo: idem. self.prediction_kwargs = prediction_kwargs self.prediction_kwargs_descr = PrefixedDictSpecification( 'prediction_kwargs', 'pred', prediction_kwargs)
def __init__(self, data_source: str = None, random_seed: int = None): super().__init__() self.data_source = NullSpecification('data_source') if data_source is not None: self.data_source = TypeSpecification( name='data_source', value=data_source, valid_types=[self.TRAINING_DATA, self.EVALUATION_DATA]) self.randomize = DataTypeSpecification('splitter_randomize', False, bool) self.seed = NullSpecification('splitter_seed') if random_seed is not None: self.randomize = DataTypeSpecification('splitter_randomize', True, bool) self.seed = DataTypeSpecification('splitter_seed', random_seed, int)
def test_invalid_estimator_type(self): invalid_estimator_builder = EstimatorBuilder(EstimatorStrategy.LINEAR_REGRESSOR) invalid_estimator_builder.estimator_type = TypeSpecification(name=EstimatorBuilder.ESTIMATOR, value='invalid', valid_types=EstimatorStrategy.ALL_STRATEGIES) valid_estimator_builder = EstimatorBuilder(EstimatorStrategy.LINEAR_REGRESSOR) with self.assertRaises(AssertionError): invalid_estimator_builder.validate() with self.assertRaises(AssertionError): valid_estimator_builder.set_estimator('invalid') valid_estimator_builder.validate() with self.assertRaises(AssertionError): builder = EstimatorBuilder('invalid') builder.validate()
def __init__(self, optimizer_type: str, learning_rate: float, gradient_clipping: Optional[float] = None, kwargs: dict = None): super().__init__() self.optimizer_type = TypeSpecification( 'optimizer_type', optimizer_type, OptimizerStrategy.ALL_STRATEGIES) self.learning_rate = DataTypeSpecification('optimizer_learning_rate', learning_rate, float) self.gradient_clipping = NullSpecification('gradient_clipping') if gradient_clipping is not None: self.gradient_clipping = DataTypeSpecification( 'gradient_clipping', gradient_clipping, float) self.kwargs = NullSpecification('kwargs') if kwargs is not None: self.kwargs = DataTypeSpecification('kwargs', kwargs, dict)
def set_estimator(self, estimator_type): self.estimator_type = TypeSpecification( 'estimator_type', estimator_type, EstimatorStrategy.ALL_STRATEGIES)
def test_invalid(self): self.type_specification = TypeSpecification('test_dir', 'test_invalid', ['test_one', 'test_two']) with self.assertRaises(AssertionError): self.type_specification.validate()
def test_valid(self): self.type_specification = TypeSpecification('test_dir', 'test_one', ['test_one', 'test_two']) self.type_specification.validate()