예제 #1
0
    def test_create_deepnull_prediction(self, target_is_binary):
        size = 1000
        design_df = _create_df(target_is_binary=target_is_binary, size=size)
        design_df['FID'] = np.arange(size)
        design_df['IID'] = np.arange(size)
        design_df['unused_str_column'] = np.random.choice(
            list('abcdefg'), size)

        input_df = design_df.copy(deep=True)
        full_config = config.get_config(config.DEEPNULL)
        full_config.model_config.mlp_units = (32, 16)
        full_config.training_config.num_epochs = 1
        full_config.training_config.batch_size = 32

        with tempfile.TemporaryDirectory() as tmpdir:
            final_df, _, test_perf_df = train_eval.create_deepnull_prediction(
                input_df=input_df,
                target='label',
                target_is_binary=target_is_binary,
                covariates=['cov1', 'cov2'],
                full_config=full_config,
                prediction_column='label_deepnull',
                num_folds=3,
                seed=5,
                logdir=tmpdir,
                verbosity=0)

        pd.testing.assert_frame_equal(design_df, input_df)
        self.assertCountEqual(final_df.columns,
                              list(design_df.columns) + ['label_deepnull'])
        pd.testing.assert_frame_equal(design_df, final_df[design_df.columns])
        self.assertCountEqual(
            test_perf_df.columns,
            ['IID', 'label', 'label_deepnull', 'label_deepnull_eval_fold'])
예제 #2
0
 def test_deepnull_model_compiles(self, cls):
     full_config = config.get_config(config.DEEPNULL)
     model = cls(target='target',
                 covariates=['cov1', 'cov2'],
                 full_config=full_config,
                 fold_ix=0)
     self.assertIsInstance(model, model_lib._DeepNull)
예제 #3
0
 def test_get_model(self, config_name, binary, expected):
     full_config = config.get_config(config_name)
     actual = model_lib.get_model(target='target',
                                  target_is_binary=binary,
                                  covariates=['cov1', 'cov2'],
                                  full_config=full_config,
                                  fold_ix=0,
                                  logdir='/tmp',
                                  seed=1)
     self.assertIsInstance(actual, expected)
예제 #4
0
    def test_xgboost_model_fit_and_predict(self, cls, metric, target,
                                           expected):
        train_df, eval_df = _create_test_data()
        full_config = config.get_config(config.XGBOOST)
        model = cls(target=target,
                    covariates=['cov1', 'cov2'],
                    full_config=full_config)
        model.fit(train_df=train_df, eval_df=eval_df, verbosity=0)
        actual_df = model.predict(df=eval_df,
                                  prediction_column='xgboost_prediction')
        actual_metric = metric(eval_df[target],
                               actual_df['xgboost_prediction'])

        self.assertEqual(actual_df.shape, (200, 2))
        self.assertEqual(list(actual_df.columns),
                         ['IID', 'xgboost_prediction'])
        self.assertAlmostEqual(actual_metric, expected)
예제 #5
0
def main(argv: Sequence[str]) -> None:
    if len(argv) > 1:
        raise app.UsageError('Too many command-line arguments.')

    tf.random.set_seed(_SEED.value)
    logging.info('Loading data from %s', _INPUT_TSV.value)
    input_df, binary_col_map = data.load_plink_or_bolt_file(
        path_or_buf=_INPUT_TSV.value, missing_value=_MISSING_VALUE.value)

    if FLAGS.model_config is None:
        full_config = config.get_config('deepnull')
    else:
        full_config = FLAGS.model_config

    logging.info('Training DeepNull model on %s with model %s', _TARGET.value,
                 full_config)
    final_df, eval_metrics, _ = train_eval.create_deepnull_prediction(
        input_df=input_df,
        target=_TARGET.value,
        target_is_binary=_TARGET.value in binary_col_map,
        covariates=_COVARIATES.value,
        full_config=full_config,
        prediction_column=_PREDS_COL.value,
        num_folds=_NUM_FOLDS.value,
        seed=_SEED.value,
        logdir=_LOGDIR.value,
        # Level 2 is printing once per epoch during training.
        verbosity=2 if _VERBOSE.value else 0,
    )

    if not metrics.acceptable_model_performance(eval_metrics):
        logging.warning(
            'WARNING: data folds have substantially different performance. Consider'
            ' retraining model with a different seed.')

    logging.info('Writing trained results to %s', _OUTPUT_TSV.value)
    data.write_plink_or_bolt_file(input_df=final_df,
                                  path_or_buf=_OUTPUT_TSV.value,
                                  binary_column_mapping=binary_col_map,
                                  missing_value=_MISSING_VALUE.value,
                                  cast_ints=True)
예제 #6
0
    def test_deepnull_model_fit_and_predict(self, cls, metric, target,
                                            expected):
        train_df, eval_df = _create_test_data()
        full_config = config.get_config(config.DEEPNULL)
        full_config.model_config.mlp_units = (32, 16)
        full_config.training_config.num_epochs = 2
        full_config.training_config.batch_size = 200
        model = cls(target=target,
                    covariates=['cov1', 'cov2'],
                    full_config=full_config,
                    fold_ix=0)

        tf.random.set_seed(42)
        model.fit(train_df=train_df, eval_df=eval_df, verbosity=0)
        actual_df = model.predict(df=eval_df,
                                  prediction_column='deepnull_prediction')
        actual_metric = metric(eval_df[target],
                               actual_df['deepnull_prediction'])

        self.assertEqual(actual_df.shape, (200, 2))
        self.assertEqual(list(actual_df.columns),
                         ['IID', 'deepnull_prediction'])
        self.assertAlmostEqual(actual_metric, expected)
 def test_unsupported_model(self):
     bad = 'unsupported_model'
     with self.assertRaisesRegex(
             ValueError, f'Config "{bad}" is not a supported model'):
         config.get_config(bad)
 def test_supported_model(self):
     valid_config = config.get_config(config.DEEPNULL)
     self.assertEqual(valid_config.model_type, config.DEEPNULL)