def test_fixed_effect_lr_lbfgs_model_creation(self): fe_model = ModelFactory.get_model( base_training_params=setup_fake_base_training_params(training_stage=constants.FIXED_EFFECT, model_type=constants.LOGISTIC_REGRESSION), raw_model_params=self.model_params) # Assert the type of model self.assertIsInstance(fe_model, FixedEffectLRModelLBFGS)
def test_random_effect_driver_wiring(self): re_driver = DriverFactory.get_driver( base_training_params=setup_fake_base_training_params( constants.RANDOM_EFFECT), raw_model_params=self.model_params) # Assert the type of driver self.assertIsInstance(re_driver, RandomEffectDriver)
def _get_params(paths, max_iters): """ Get the various parameter for model initialization. :param paths: An AllPaths namedtuple. :return: Three different parameter sets. """ base_training_params = setup_fake_base_training_params( training_stage=constants.FIXED_EFFECT) base_training_params.training_output_dir = paths.training_score_path base_training_params.validation_output_dir = paths.validation_score_path schema_params = setup_fake_schema_params() raw_model_params = [ '--' + constants.FEATURE_BAGS, 'global', '--' + constants.TRAIN_DATA_PATH, paths.training_data_path, '--' + constants.VALIDATION_DATA_PATH, paths.validation_data_path, '--' + constants.METADATA_FILE, paths.metadata_file, '--' + constants.FEATURE_FILE, paths.feature_file, '--' + constants.NUM_OF_LBFGS_ITERATIONS, f"{max_iters}", '--' + constants.MODEL_OUTPUT_DIR, paths.model_output_dir, '--' + constants.COPY_TO_LOCAL, 'False', '--' + constants.BATCH_SIZE, '16', '--' + constants.L2_REG_WEIGHT, f"{_L2_REG_WEIGHT}", "--" + constants.REGULARIZE_BIAS, 'True', "--" + constants.DELAYED_EXIT_IN_SECONDS, '1' ] return base_training_params, schema_params, raw_model_params
def test_fixed_effect_driver_wiring(self): fe_driver = DriverFactory.get_driver( base_training_params=setup_fake_base_training_params( constants.FIXED_EFFECT), raw_model_params=self.model_params) # Assert the type of driver self.assertIsInstance(fe_driver, FixedEffectDriver)
def get_raw_params(self, partition_entity='memberId', num_of_lbfgs_iterations=None): base_training_params = setup_fake_base_training_params( training_stage=constants.RANDOM_EFFECT) base_training_params.batch_size = 2 # flatten the params raw_params = list(base_training_params.__to_argv__()) model_params = setup_fake_raw_model_params( training_stage=constants.RANDOM_EFFECT) raw_params.extend(model_params) raw_params.extend(['--' + constants.MODEL_IDS_DIR, test_dataset_path]) raw_params.extend([ '--' + constants.FEATURE_FILE, os.path.join(test_dataset_path, fake_feature_file) ]) raw_params.extend( ['--' + constants.PARTITION_ENTITY, partition_entity]) raw_params.extend(['--' + constants.LABEL, 'response']) raw_params.extend(['--' + constants.L2_REG_WEIGHT, '0.1']) if num_of_lbfgs_iterations: raw_params.extend([ '--' + constants.NUM_OF_LBFGS_ITERATIONS, f'{num_of_lbfgs_iterations}' ]) return base_training_params, raw_params
def get_raw_params(self, partition_entity='memberId', num_of_lbfgs_iterations=None, intercept_only=False): base_training_params = setup_fake_base_training_params( training_stage=constants.RANDOM_EFFECT) base_training_params.batch_size = 2 # flatten the params raw_params = list(base_training_params.__to_argv__()) model_params = setup_fake_raw_model_params( training_stage=constants.RANDOM_EFFECT) raw_params.extend(model_params) raw_params.extend(['--' + constants.MODEL_IDS_DIR, test_dataset_path]) raw_params.extend([ '--' + constants.FEATURE_FILE, os.path.join(test_dataset_path, fake_feature_file) ]) raw_params.extend( ['--' + constants.PARTITION_ENTITY, partition_entity]) raw_params.extend(['--' + constants.LABEL_COLUMN_NAME, 'response']) raw_params.extend(['--' + constants.L2_REG_WEIGHT, '0.1']) if num_of_lbfgs_iterations: raw_params.extend([ '--' + constants.NUM_OF_LBFGS_ITERATIONS, f'{num_of_lbfgs_iterations}' ]) if intercept_only: feature_bag_index = raw_params.index(f'--{constants.FEATURE_BAG}') raw_params.pop(feature_bag_index) raw_params.pop(feature_bag_index) assert (f'--{constants.FEATURE_BAG}' not in raw_params) assert ('per_member' not in raw_params) return base_training_params, raw_params
def _get_params(paths, max_iters, intercept_only): """ Get the various parameter for model initialization. :param paths: An AllPaths namedtuple. :param max_iters: maximum l-BFGS iterations. :param intercept_only: whether the model has intercept only, no other features. :return: Three different parameter sets. """ base_training_params = setup_fake_base_training_params( training_stage=constants.FIXED_EFFECT) base_training_params.training_score_dir = paths.training_score_dir base_training_params.validation_score_dir = paths.validation_score_dir schema_params = setup_fake_schema_params() raw_model_params = [ '--' + constants.TRAINING_DATA_DIR, paths.training_data_dir, '--' + constants.VALIDATION_DATA_DIR, paths.validation_data_dir, '--' + constants.METADATA_FILE, paths.metadata_file, '--' + constants.NUM_OF_LBFGS_ITERATIONS, f"{max_iters}", '--' + constants.OUTPUT_MODEL_DIR, paths.output_model_dir, '--' + constants.COPY_TO_LOCAL, 'False', '--' + constants.BATCH_SIZE, '16', '--' + constants.L2_REG_WEIGHT, f"{_L2_REG_WEIGHT}", "--" + constants.REGULARIZE_BIAS, 'True', "--" + constants.DELAYED_EXIT_IN_SECONDS, '1' ] if not intercept_only: raw_model_params.extend([ '--' + constants.FEATURE_BAG, 'global', '--' + constants.FEATURE_FILE, paths.feature_file ]) return base_training_params, schema_params, raw_model_params
def test_random_effect_custom_logistic_regression_model_creation(self): re_model = ModelFactory.get_model( base_training_params=setup_fake_base_training_params( training_stage=constants.RANDOM_EFFECT, model_type=constants.LOGISTIC_REGRESSION), raw_model_params=self.model_params) self.assertIsInstance(re_model, RandomEffectLRLBFGSModel)
def setUp(self): self.task_type = "worker" self.worker_index = 0 self.num_workers = 5 set_fake_tf_config(task_type=self.task_type, worker_index=self.worker_index) self.params = setup_fake_base_training_params() self.model_params = setup_fake_raw_model_params()
def _get_params(paths, max_iters, intercept_only, has_validation_data_dir=True, disable_fixed_effect_scoring_after_training=False, has_intercept=True, model_type=constants.LOGISTIC_REGRESSION, fixed_effect_variance_mode=None, l2_reg_weight=_L2_REG_WEIGHT): """ Get the various parameter for model initialization. :param paths: An AllPaths namedtuple. :param max_iters: maximum l-BFGS iterations. :param intercept_only: whether the model has intercept only, no other features. :param has_validation_data_dir: whether to use validation data :param disable_fixed_effect_scoring_after_training: whether to disable scoring :param has_intercept: whether to include intercept in the model :param model_type: the type of linear model to use (e.g, "linear_regression", "logistic_regression", etc.) :param fixed_effect_variance_mode: fixed effect variance mode, support "None", "FULL" and "SIMPLE". :param l2_reg_weight: l2 regularization weight. :return: Three different parameter sets. """ base_training_params = setup_fake_base_training_params(training_stage=constants.FIXED_EFFECT, model_type=model_type) base_training_params.training_score_dir = paths.training_score_dir base_training_params.validation_score_dir = paths.validation_score_dir schema_params = setup_fake_schema_params() raw_model_params = ['--' + constants.TRAINING_DATA_DIR, paths.training_data_dir, '--' + constants.METADATA_FILE, paths.metadata_file, '--' + constants.NUM_OF_LBFGS_ITERATIONS, f"{max_iters}", '--' + constants.OUTPUT_MODEL_DIR, paths.output_model_dir, '--' + constants.COPY_TO_LOCAL, 'False', '--' + constants.BATCH_SIZE, '16', '--' + constants.L2_REG_WEIGHT, f"{l2_reg_weight}", "--" + constants.REGULARIZE_BIAS, 'True', "--" + constants.DELAYED_EXIT_IN_SECONDS, '1'] if has_validation_data_dir: raw_model_params.extend(['--' + constants.VALIDATION_DATA_DIR, paths.validation_data_dir]) if disable_fixed_effect_scoring_after_training: raw_model_params.extend(['--disable_fixed_effect_scoring_after_training', 'True']) if not intercept_only: raw_model_params.extend(['--' + constants.FEATURE_BAG, 'global', '--' + constants.FEATURE_FILE, paths.feature_file]) if has_intercept: raw_model_params.extend(['--has_intercept', 'True']) else: raw_model_params.extend(['--has_intercept', 'False', '--regularize_bias', 'False']) if fixed_effect_variance_mode is not None: raw_model_params.extend(['--fixed_effect_variance_mode', fixed_effect_variance_mode]) return base_training_params, schema_params, raw_model_params
def get_raw_params(self, partition_entity='memberId'): base_training_params = setup_fake_base_training_params( training_stage=constants.RANDOM_EFFECT) base_training_params[constants.BATCH_SIZE] = 2 # flatten the params raw_params = [ x for key in base_training_params for x in ['--' + key, str(base_training_params[key])] ] model_params = setup_fake_raw_model_params( training_stage=constants.RANDOM_EFFECT) raw_params.extend(model_params) raw_params.extend(['--' + constants.MODEL_IDS_DIR, test_dataset_path]) raw_params.extend([ '--' + constants.FEATURE_FILE, os.path.join(test_dataset_path, fake_feature_file) ]) raw_params.extend( ['--' + constants.PARTITION_ENTITY, partition_entity]) raw_params.extend(['--' + constants.LABEL, 'response']) raw_params.extend(['--' + constants.L2_REG_WEIGHT, '0.1']) return base_training_params, raw_params
def _get_params(self): base_training_params = setup_fake_base_training_params( training_stage=constants.FIXED_EFFECT) base_training_params[ constants.TRAINING_OUTPUT_DIR] = self.training_score_dir base_training_params[ constants.VALIDATION_OUTPUT_DIR] = self.validation_score_dir schema_params = setup_fake_schema_params() raw_model_params = [ '--' + constants.FEATURE_BAGS, 'global', '--' + constants.TRAIN_DATA_PATH, self.train_data_path, '--' + constants.VALIDATION_DATA_PATH, self.validation_data_path, '--' + constants.METADATA_FILE, self.metadata_file, '--' + constants.FEATURE_FILE, self.feature_file, '--' + constants.NUM_OF_LBFGS_ITERATIONS, '1', '--' + constants.MODEL_OUTPUT_DIR, self.model_output_dir, '--' + constants.COPY_TO_LOCAL, 'False', # Batch size > number samples to make sure # there is no shuffling of data among batches '--' + constants.BATCH_SIZE, '64', '--' + constants.L2_REG_WEIGHT, '0.01', "--" + constants.REGULARIZE_BIAS, 'True' ] return base_training_params, schema_params, raw_model_params
def setUp(self): self.params = setup_fake_base_training_params() self.model_params = setup_fake_raw_model_params()