예제 #1
0
 def test_fixed_effect_lr_lbfgs_model_creation(self):
     fe_model = ModelFactory.get_model(
         base_training_params=setup_fake_base_training_params(training_stage=constants.FIXED_EFFECT,
                                                              model_type=constants.LOGISTIC_REGRESSION),
         raw_model_params=self.model_params)
     # Assert the type of model
     self.assertIsInstance(fe_model, FixedEffectLRModelLBFGS)
예제 #2
0
 def test_random_effect_driver_wiring(self):
     re_driver = DriverFactory.get_driver(
         base_training_params=setup_fake_base_training_params(
             constants.RANDOM_EFFECT),
         raw_model_params=self.model_params)
     # Assert the type of driver
     self.assertIsInstance(re_driver, RandomEffectDriver)
예제 #3
0
def _get_params(paths, max_iters):
    """
    Get the various parameter for model initialization.
    :param paths: An AllPaths namedtuple.
    :return: Three different parameter sets.
    """
    base_training_params = setup_fake_base_training_params(
        training_stage=constants.FIXED_EFFECT)
    base_training_params.training_output_dir = paths.training_score_path
    base_training_params.validation_output_dir = paths.validation_score_path

    schema_params = setup_fake_schema_params()

    raw_model_params = [
        '--' + constants.FEATURE_BAGS, 'global',
        '--' + constants.TRAIN_DATA_PATH, paths.training_data_path,
        '--' + constants.VALIDATION_DATA_PATH, paths.validation_data_path,
        '--' + constants.METADATA_FILE, paths.metadata_file,
        '--' + constants.FEATURE_FILE, paths.feature_file,
        '--' + constants.NUM_OF_LBFGS_ITERATIONS, f"{max_iters}",
        '--' + constants.MODEL_OUTPUT_DIR, paths.model_output_dir,
        '--' + constants.COPY_TO_LOCAL, 'False', '--' + constants.BATCH_SIZE,
        '16', '--' + constants.L2_REG_WEIGHT, f"{_L2_REG_WEIGHT}",
        "--" + constants.REGULARIZE_BIAS, 'True',
        "--" + constants.DELAYED_EXIT_IN_SECONDS, '1'
    ]
    return base_training_params, schema_params, raw_model_params
예제 #4
0
 def test_fixed_effect_driver_wiring(self):
     fe_driver = DriverFactory.get_driver(
         base_training_params=setup_fake_base_training_params(
             constants.FIXED_EFFECT),
         raw_model_params=self.model_params)
     # Assert the type of driver
     self.assertIsInstance(fe_driver, FixedEffectDriver)
 def get_raw_params(self,
                    partition_entity='memberId',
                    num_of_lbfgs_iterations=None):
     base_training_params = setup_fake_base_training_params(
         training_stage=constants.RANDOM_EFFECT)
     base_training_params.batch_size = 2
     # flatten the params
     raw_params = list(base_training_params.__to_argv__())
     model_params = setup_fake_raw_model_params(
         training_stage=constants.RANDOM_EFFECT)
     raw_params.extend(model_params)
     raw_params.extend(['--' + constants.MODEL_IDS_DIR, test_dataset_path])
     raw_params.extend([
         '--' + constants.FEATURE_FILE,
         os.path.join(test_dataset_path, fake_feature_file)
     ])
     raw_params.extend(
         ['--' + constants.PARTITION_ENTITY, partition_entity])
     raw_params.extend(['--' + constants.LABEL, 'response'])
     raw_params.extend(['--' + constants.L2_REG_WEIGHT, '0.1'])
     if num_of_lbfgs_iterations:
         raw_params.extend([
             '--' + constants.NUM_OF_LBFGS_ITERATIONS,
             f'{num_of_lbfgs_iterations}'
         ])
     return base_training_params, raw_params
 def get_raw_params(self,
                    partition_entity='memberId',
                    num_of_lbfgs_iterations=None,
                    intercept_only=False):
     base_training_params = setup_fake_base_training_params(
         training_stage=constants.RANDOM_EFFECT)
     base_training_params.batch_size = 2
     # flatten the params
     raw_params = list(base_training_params.__to_argv__())
     model_params = setup_fake_raw_model_params(
         training_stage=constants.RANDOM_EFFECT)
     raw_params.extend(model_params)
     raw_params.extend(['--' + constants.MODEL_IDS_DIR, test_dataset_path])
     raw_params.extend([
         '--' + constants.FEATURE_FILE,
         os.path.join(test_dataset_path, fake_feature_file)
     ])
     raw_params.extend(
         ['--' + constants.PARTITION_ENTITY, partition_entity])
     raw_params.extend(['--' + constants.LABEL_COLUMN_NAME, 'response'])
     raw_params.extend(['--' + constants.L2_REG_WEIGHT, '0.1'])
     if num_of_lbfgs_iterations:
         raw_params.extend([
             '--' + constants.NUM_OF_LBFGS_ITERATIONS,
             f'{num_of_lbfgs_iterations}'
         ])
     if intercept_only:
         feature_bag_index = raw_params.index(f'--{constants.FEATURE_BAG}')
         raw_params.pop(feature_bag_index)
         raw_params.pop(feature_bag_index)
         assert (f'--{constants.FEATURE_BAG}' not in raw_params)
         assert ('per_member' not in raw_params)
     return base_training_params, raw_params
def _get_params(paths, max_iters, intercept_only):
    """
    Get the various parameter for model initialization.
    :param paths: An AllPaths namedtuple.
    :param max_iters: maximum l-BFGS iterations.
    :param intercept_only: whether the model has intercept only, no other features.
    :return: Three different parameter sets.
    """
    base_training_params = setup_fake_base_training_params(
        training_stage=constants.FIXED_EFFECT)
    base_training_params.training_score_dir = paths.training_score_dir
    base_training_params.validation_score_dir = paths.validation_score_dir

    schema_params = setup_fake_schema_params()

    raw_model_params = [
        '--' + constants.TRAINING_DATA_DIR, paths.training_data_dir,
        '--' + constants.VALIDATION_DATA_DIR, paths.validation_data_dir,
        '--' + constants.METADATA_FILE, paths.metadata_file,
        '--' + constants.NUM_OF_LBFGS_ITERATIONS, f"{max_iters}",
        '--' + constants.OUTPUT_MODEL_DIR, paths.output_model_dir,
        '--' + constants.COPY_TO_LOCAL, 'False', '--' + constants.BATCH_SIZE,
        '16', '--' + constants.L2_REG_WEIGHT, f"{_L2_REG_WEIGHT}",
        "--" + constants.REGULARIZE_BIAS, 'True',
        "--" + constants.DELAYED_EXIT_IN_SECONDS, '1'
    ]
    if not intercept_only:
        raw_model_params.extend([
            '--' + constants.FEATURE_BAG, 'global',
            '--' + constants.FEATURE_FILE, paths.feature_file
        ])
    return base_training_params, schema_params, raw_model_params
예제 #8
0
 def test_random_effect_custom_logistic_regression_model_creation(self):
     re_model = ModelFactory.get_model(
         base_training_params=setup_fake_base_training_params(
             training_stage=constants.RANDOM_EFFECT,
             model_type=constants.LOGISTIC_REGRESSION),
         raw_model_params=self.model_params)
     self.assertIsInstance(re_model, RandomEffectLRLBFGSModel)
예제 #9
0
 def setUp(self):
     self.task_type = "worker"
     self.worker_index = 0
     self.num_workers = 5
     set_fake_tf_config(task_type=self.task_type,
                        worker_index=self.worker_index)
     self.params = setup_fake_base_training_params()
     self.model_params = setup_fake_raw_model_params()
def _get_params(paths, max_iters, intercept_only, has_validation_data_dir=True,
                disable_fixed_effect_scoring_after_training=False, has_intercept=True,
                model_type=constants.LOGISTIC_REGRESSION,
                fixed_effect_variance_mode=None, l2_reg_weight=_L2_REG_WEIGHT):
    """
    Get the various parameter for model initialization.
    :param paths: An AllPaths namedtuple.
    :param max_iters: maximum l-BFGS iterations.
    :param intercept_only: whether the model has intercept only, no other features.
    :param has_validation_data_dir: whether to use validation data
    :param disable_fixed_effect_scoring_after_training: whether to disable scoring
    :param has_intercept: whether to include intercept in the model
    :param model_type: the type of linear model to use (e.g, "linear_regression", "logistic_regression", etc.)
    :param fixed_effect_variance_mode: fixed effect variance mode, support "None", "FULL" and "SIMPLE".
    :param l2_reg_weight: l2 regularization weight.
    :return: Three different parameter sets.
    """
    base_training_params = setup_fake_base_training_params(training_stage=constants.FIXED_EFFECT,
                                                           model_type=model_type)
    base_training_params.training_score_dir = paths.training_score_dir
    base_training_params.validation_score_dir = paths.validation_score_dir

    schema_params = setup_fake_schema_params()

    raw_model_params = ['--' + constants.TRAINING_DATA_DIR, paths.training_data_dir,
                        '--' + constants.METADATA_FILE, paths.metadata_file,
                        '--' + constants.NUM_OF_LBFGS_ITERATIONS, f"{max_iters}",
                        '--' + constants.OUTPUT_MODEL_DIR, paths.output_model_dir,
                        '--' + constants.COPY_TO_LOCAL, 'False',
                        '--' + constants.BATCH_SIZE, '16',
                        '--' + constants.L2_REG_WEIGHT, f"{l2_reg_weight}",
                        "--" + constants.REGULARIZE_BIAS, 'True',
                        "--" + constants.DELAYED_EXIT_IN_SECONDS, '1']

    if has_validation_data_dir:
        raw_model_params.extend(['--' + constants.VALIDATION_DATA_DIR, paths.validation_data_dir])

    if disable_fixed_effect_scoring_after_training:
        raw_model_params.extend(['--disable_fixed_effect_scoring_after_training', 'True'])

    if not intercept_only:
        raw_model_params.extend(['--' + constants.FEATURE_BAG, 'global',
                                 '--' + constants.FEATURE_FILE, paths.feature_file])
    if has_intercept:
        raw_model_params.extend(['--has_intercept', 'True'])
    else:
        raw_model_params.extend(['--has_intercept', 'False', '--regularize_bias', 'False'])

    if fixed_effect_variance_mode is not None:
        raw_model_params.extend(['--fixed_effect_variance_mode', fixed_effect_variance_mode])

    return base_training_params, schema_params, raw_model_params
예제 #11
0
 def get_raw_params(self, partition_entity='memberId'):
     base_training_params = setup_fake_base_training_params(
         training_stage=constants.RANDOM_EFFECT)
     base_training_params[constants.BATCH_SIZE] = 2
     # flatten the params
     raw_params = [
         x for key in base_training_params
         for x in ['--' + key, str(base_training_params[key])]
     ]
     model_params = setup_fake_raw_model_params(
         training_stage=constants.RANDOM_EFFECT)
     raw_params.extend(model_params)
     raw_params.extend(['--' + constants.MODEL_IDS_DIR, test_dataset_path])
     raw_params.extend([
         '--' + constants.FEATURE_FILE,
         os.path.join(test_dataset_path, fake_feature_file)
     ])
     raw_params.extend(
         ['--' + constants.PARTITION_ENTITY, partition_entity])
     raw_params.extend(['--' + constants.LABEL, 'response'])
     raw_params.extend(['--' + constants.L2_REG_WEIGHT, '0.1'])
     return base_training_params, raw_params
예제 #12
0
    def _get_params(self):
        base_training_params = setup_fake_base_training_params(
            training_stage=constants.FIXED_EFFECT)
        base_training_params[
            constants.TRAINING_OUTPUT_DIR] = self.training_score_dir
        base_training_params[
            constants.VALIDATION_OUTPUT_DIR] = self.validation_score_dir

        schema_params = setup_fake_schema_params()

        raw_model_params = [
            '--' + constants.FEATURE_BAGS,
            'global',
            '--' + constants.TRAIN_DATA_PATH,
            self.train_data_path,
            '--' + constants.VALIDATION_DATA_PATH,
            self.validation_data_path,
            '--' + constants.METADATA_FILE,
            self.metadata_file,
            '--' + constants.FEATURE_FILE,
            self.feature_file,
            '--' + constants.NUM_OF_LBFGS_ITERATIONS,
            '1',
            '--' + constants.MODEL_OUTPUT_DIR,
            self.model_output_dir,
            '--' + constants.COPY_TO_LOCAL,
            'False',
            # Batch size > number samples to make sure
            # there is no shuffling of data among batches
            '--' + constants.BATCH_SIZE,
            '64',
            '--' + constants.L2_REG_WEIGHT,
            '0.01',
            "--" + constants.REGULARIZE_BIAS,
            'True'
        ]
        return base_training_params, schema_params, raw_model_params
예제 #13
0
 def setUp(self):
     self.params = setup_fake_base_training_params()
     self.model_params = setup_fake_raw_model_params()