def test_sum(self): dataset = Dataset('adult') model = self.fit('Logloss', dataset) model1 = self.normalize_model(model, dataset, 'test_file') model2 = self.normalize_model(model, dataset, 'train_file') s1, b1 = self.get_scale_bias(model1) s2, b2 = self.get_scale_bias(model2) # Pick weights w1, w2 so that model_sum = w1 * model1 + w2 * model2 det = (s1 * b2 - s2 * b1) w1 = b2 / det if det else 0.5 w2 = -b1 / det if det else 0.5 model_sum = get_test_output_path('model_sum{}.bin') yc.execute([ CATBOOST_PATH, 'model-sum', '--model-with-weight', '{}={}'.format(model1, w1), '--model-with-weight', '{}={}'.format(model2, w2), '--output-path', model_sum, ]) eval_orig = self.eval_model(model, dataset, 'test_file') eval_sum = self.eval_model(model_sum, dataset, 'test_file') yc.execute( get_limited_precision_dsv_diff_tool(1e-8) + [eval_orig, eval_sum])
def test_normalize_idempotent(self): dataset = Dataset('adult') model = self.fit('Logloss', dataset) model_normalized_once = self.normalize_model(model, dataset, 'test_file', 'train_file') model_normalized_twice = self.normalize_model(model_normalized_once, dataset, 'test_file', 'train_file') eval1 = self.eval_model(model_normalized_once, dataset, 'test_file') eval2 = self.eval_model(model_normalized_twice, dataset, 'test_file') yc.execute(get_limited_precision_dsv_diff_tool(0) + [eval1, eval2])
def diff_tool(threshold=None): return get_limited_precision_dsv_diff_tool(threshold, True)